mirror of
https://codeberg.org/scip/leo.git
synced 2025-12-16 12:11:04 +01:00
fix lang spec and parsing
This commit is contained in:
10
Changelog
10
Changelog
@@ -1,3 +1,13 @@
|
||||
1.41:
|
||||
|
||||
Generalized lang parsing and passing to dict.leo.org, which
|
||||
also adds support for new languages like ru, ch or pl. Thanks
|
||||
to J.A.Eichler.
|
||||
|
||||
1.40:
|
||||
|
||||
not logged, sorry.
|
||||
|
||||
1.39:
|
||||
|
||||
fixed rt.cpan.org#91464: disable caching if DB_File is not found.
|
||||
|
||||
227
Org.pm
227
Org.pm
@@ -7,7 +7,7 @@
|
||||
# or service marks of their respective holders.
|
||||
|
||||
package WWW::Dict::Leo::Org;
|
||||
$WWW::Dict::Leo::Org::VERSION = "1.40";
|
||||
$WWW::Dict::Leo::Org::VERSION = "1.41";
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
@@ -63,34 +63,31 @@ sub translate {
|
||||
my @match = ();
|
||||
|
||||
#
|
||||
# form var transitions for searchLoc
|
||||
#
|
||||
my %lang = (
|
||||
de2en => 1,
|
||||
en2de => -1,
|
||||
de2fr => 1,
|
||||
fr2de => -1,
|
||||
es2de => -1,
|
||||
de2es => 1,
|
||||
es => 0,
|
||||
en => 0,
|
||||
fr => 0,
|
||||
speak => "ende"
|
||||
);
|
||||
# form var transitions for searchLoc(=translation direction) and lp(=language)
|
||||
my %lang = ( speak => "ende" );
|
||||
|
||||
my @langs = qw(en es ru pt fr pl ch it);
|
||||
if ($this->{"-Language"}) {
|
||||
if ($this->{"-Language"} =~ /fr/) {
|
||||
# used for francaise translation
|
||||
$lang{speak} = "frde";
|
||||
# en | fr | ru2en | de2pl etc
|
||||
# de2, 2de, de are not part of lang spec
|
||||
if (! grep { $this->{"-Language"} =~ /$_/ } @langs) {
|
||||
croak "Unsupported language: " . $this->{"-Language"};
|
||||
}
|
||||
elsif ($this->{"-Language"} =~ /es/) {
|
||||
$lang{speak} = "esde";
|
||||
my $spec = $this->{"-Language"};
|
||||
my $l;
|
||||
if ($spec =~ /(..)2de/) {
|
||||
$l = $1;
|
||||
$this->{"-Language"} = -1;
|
||||
$lang{speak} = "${l}de";
|
||||
}
|
||||
if (exists $lang{$this->{"-Language"}}) {
|
||||
$this->{"-Language"} = $lang{$this->{"-Language"}};
|
||||
elsif ($spec =~ /de2(..)/) {
|
||||
$l = $1;
|
||||
$this->{"-Language"} = 1;
|
||||
$lang{speak} = "${l}de";
|
||||
}
|
||||
else {
|
||||
croak "Unsupported language: " . $this->{"-Language"};
|
||||
$lang{speak} = $this->{"-Language"} . 'de';
|
||||
$this->{"-Language"} = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,11 +95,11 @@ sub translate {
|
||||
# cut invalid values for parameters or set defaults if unspecified
|
||||
#
|
||||
my %form = (
|
||||
spellToler => { mask => [ qw(standard on off) ], val => $this->{"-SpellTolerance"} || "standard" },
|
||||
deStem => { mask => [ qw(standard none forcedAll) ], val => $this->{"-Morphology"} || "standard" },
|
||||
cmpType => { mask => [ qw(fuzzy exact relaxed) ], val => $this->{"-CharTolerance"} || "relaxed" },
|
||||
searchLoc => { mask => [ qw(-1 0 1) ], val => $this->{"-Language"} || "0" },
|
||||
);
|
||||
spellToler => { mask => [ qw(standard on off) ], val => $this->{"-SpellTolerance"} || "standard" },
|
||||
deStem => { mask => [ qw(standard none forcedAll) ], val => $this->{"-Morphology"} || "standard" },
|
||||
cmpType => { mask => [ qw(fuzzy exact relaxed) ], val => $this->{"-CharTolerance"} || "relaxed" },
|
||||
searchLoc => { mask => [ qw(-1 0 1) ], val => $this->{"-Language"} || "0" },
|
||||
);
|
||||
my @form;
|
||||
foreach my $var (keys %form) {
|
||||
if (grep { $form{$var}->{val} eq $_ } @{$form{$var}->{mask}}) {
|
||||
@@ -166,10 +163,10 @@ sub translate {
|
||||
}
|
||||
|
||||
my $conn = new IO::Socket::INET(
|
||||
Proto => "tcp",
|
||||
PeerAddr => $ip,
|
||||
PeerPort => $port,
|
||||
) or die "Unable to connect to $ip:$port: $!\n";
|
||||
Proto => "tcp",
|
||||
PeerAddr => $ip,
|
||||
PeerPort => $port,
|
||||
) or die "Unable to connect to $ip:$port: $!\n";
|
||||
$conn->autoflush(1);
|
||||
|
||||
$this->debug( "GET $url?$form HTTP/1.0");
|
||||
@@ -208,41 +205,41 @@ Accept-Language: en_US, en\r\n);
|
||||
# got HTTP error
|
||||
my $err = $2;
|
||||
if ($err == 407) {
|
||||
croak "proxy auth required or access denied!\n";
|
||||
croak "proxy auth required or access denied!\n";
|
||||
}
|
||||
else {
|
||||
if ($site =~ /Leider konnten wir zu Ihrem Suchbegriff/ ||
|
||||
$site =~ /found no matches for your search/
|
||||
) {
|
||||
return ();
|
||||
}
|
||||
else {
|
||||
croak "got HTTP error $err!\n";
|
||||
}
|
||||
if ($site =~ /Leider konnten wir zu Ihrem Suchbegriff/ ||
|
||||
$site =~ /found no matches for your search/
|
||||
) {
|
||||
return ();
|
||||
}
|
||||
else {
|
||||
croak "got HTTP error $err!\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my @request = (
|
||||
{
|
||||
id => 2,
|
||||
row => sub { $this->row(@_); },
|
||||
hdr => sub { $this->hdr(@_); }
|
||||
},
|
||||
{
|
||||
id => 3,
|
||||
hdr => sub { $this->hdr(@_); },
|
||||
row => sub { $this->row(@_); }
|
||||
},
|
||||
{
|
||||
id => 4,
|
||||
hdr => sub { $this->hdr(@_); },
|
||||
row => sub { $this->row(@_); }
|
||||
}
|
||||
);
|
||||
{
|
||||
id => 2,
|
||||
row => sub { $this->row(@_); },
|
||||
hdr => sub { $this->hdr(@_); }
|
||||
},
|
||||
{
|
||||
id => 3,
|
||||
hdr => sub { $this->hdr(@_); },
|
||||
row => sub { $this->row(@_); }
|
||||
},
|
||||
{
|
||||
id => 4,
|
||||
hdr => sub { $this->hdr(@_); },
|
||||
row => sub { $this->row(@_); }
|
||||
}
|
||||
);
|
||||
$this->{Linecount} = 0;
|
||||
my $p = HTML::TableParser->new( \@request,
|
||||
{ Decode => 1, Trim => 1, Chomp => 1, DecodeNBSP => 1 } );
|
||||
{ Decode => 1, Trim => 1, Chomp => 1, DecodeNBSP => 1 } );
|
||||
$site=~s/ /\ \;/g;
|
||||
$p->parse($site);
|
||||
|
||||
@@ -418,35 +415,31 @@ Possible values: fuzzy, exact, relaxed.
|
||||
=item I<-Language>
|
||||
|
||||
Translation direction. Please note that dict.leo.org always translates
|
||||
either to or from german. The following values can be used:
|
||||
either to or from german.
|
||||
|
||||
=over
|
||||
The following languages are supported: english, polish, spanish, portugese
|
||||
russian and chinese.
|
||||
|
||||
=item de
|
||||
You can specify only the country code, or append B<de2> in order to
|
||||
force translation to german, or preprend B<de2> in order to translate
|
||||
to the other language.
|
||||
|
||||
Alias for B<de2en> - german to english.
|
||||
Valid examples:
|
||||
|
||||
=item fr
|
||||
ru to or from russian
|
||||
de2pl to polish
|
||||
es2de spanish to german
|
||||
|
||||
Alias for B<de2fr> - german to french.
|
||||
Valid country codes:
|
||||
|
||||
=item es
|
||||
en english
|
||||
es spanish
|
||||
ru russian
|
||||
pt portugese
|
||||
pl polish
|
||||
ch chinese
|
||||
|
||||
Alias for B<de2es> - german to espaniol.
|
||||
|
||||
=item en2de
|
||||
|
||||
english to german.
|
||||
|
||||
=item fr2de
|
||||
|
||||
french to german.
|
||||
|
||||
=item es2de
|
||||
|
||||
espaniol to german.
|
||||
|
||||
=back
|
||||
Default: B<en>.
|
||||
|
||||
=back
|
||||
|
||||
@@ -458,48 +451,48 @@ Use this method after initialization to connect to dict.leo.org
|
||||
and translate the given term. It returns an array of hashes containing
|
||||
the actual results.
|
||||
|
||||
use WWW::Dict::Leo::Org;
|
||||
use Data::Dumper;
|
||||
my $leo = new WWW::Dict::Leo::Org();
|
||||
my @matches = $leo->translate("test");
|
||||
print Dumper(\@matches);
|
||||
use WWW::Dict::Leo::Org;
|
||||
use Data::Dumper;
|
||||
my $leo = new WWW::Dict::Leo::Org();
|
||||
my @matches = $leo->translate("test");
|
||||
print Dumper(\@matches);
|
||||
|
||||
which prints:
|
||||
|
||||
$VAR1 = [
|
||||
{
|
||||
'data' => [
|
||||
{
|
||||
'left' => 'check',
|
||||
'right' => 'der Test'
|
||||
},
|
||||
{
|
||||
'left' => 'quiz (Amer.)',
|
||||
'right' => 'der Test <20><> [Schule]'
|
||||
],
|
||||
'title' => 'Unmittelbare Treffer'
|
||||
},
|
||||
{
|
||||
'data' => [
|
||||
{
|
||||
'left' => 'check',
|
||||
'right' => 'der Test'
|
||||
},
|
||||
{
|
||||
'left' => 'quiz (Amer.)',
|
||||
'right' => 'der Test <20><> [Schule]'
|
||||
],
|
||||
'title' => 'Unmittelbare Treffer'
|
||||
'data' => [
|
||||
{
|
||||
'left' => 'to fail a test',
|
||||
'right' => 'einen Test nicht bestehen'
|
||||
},
|
||||
{
|
||||
'left' => 'to test',
|
||||
'right' => 'Tests macheneinen Test machen'
|
||||
}
|
||||
],
|
||||
'title' => 'Verben und Verbzusammensetzungen'
|
||||
},
|
||||
{
|
||||
'data' => [
|
||||
{
|
||||
'left' => 'to fail a test',
|
||||
'right' => 'einen Test nicht bestehen'
|
||||
},
|
||||
{
|
||||
'left' => 'to test',
|
||||
'right' => 'Tests macheneinen Test machen'
|
||||
}
|
||||
],
|
||||
'title' => 'Verben und Verbzusammensetzungen'
|
||||
},
|
||||
'data' => [
|
||||
{
|
||||
'left' => 'testing <20>adj.',
|
||||
'right' => 'im Test'
|
||||
}
|
||||
],
|
||||
'title' => 'Wendungen und Ausdr<64>cke'
|
||||
}
|
||||
'data' => [
|
||||
{
|
||||
'left' => 'testing <20>adj.',
|
||||
'right' => 'im Test'
|
||||
}
|
||||
],
|
||||
'title' => 'Wendungen und Ausdr<64>cke'
|
||||
}
|
||||
];
|
||||
|
||||
|
||||
@@ -541,6 +534,6 @@ Please don't forget to add debugging output!
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
1.40
|
||||
1.41
|
||||
|
||||
=cut
|
||||
|
||||
159
leo
159
leo
@@ -8,8 +8,6 @@
|
||||
# the term to be translated. It will then return the results in
|
||||
# an unformatted form.
|
||||
#
|
||||
# $Id: leo,v 1.33 2008/04/22 22:23:39 scip Exp $
|
||||
#
|
||||
# Copyleft (l) 2000-2016 by Thomas v.D. <tlinden@cpan.org>. leo may be
|
||||
# used and distributed under the terms of the GNU General Public License.
|
||||
# All other brand and product names are trademarks, registered trademarks
|
||||
@@ -37,7 +35,7 @@ my $default_c = "\033[0m"; # reset default terminal color
|
||||
my $bold_c = "\033[0;34m"; # blue color
|
||||
my $copy_c = "\033[0;35m"; # copyright message color (green)
|
||||
|
||||
my $version = "1.39";
|
||||
my $version = "1.41";
|
||||
my $config = $ENV{HOME} . "/.leo";
|
||||
my $cache = $ENV{HOME} . "/.leo-CACHE.db";
|
||||
|
||||
@@ -45,10 +43,10 @@ my $debugging = 0;
|
||||
|
||||
#defaults for config
|
||||
my %conf = (
|
||||
use_cache => "no",
|
||||
use_color => "yes",
|
||||
use_latin => "yes"
|
||||
);
|
||||
use_cache => "no",
|
||||
use_color => "yes",
|
||||
use_latin => "yes"
|
||||
);
|
||||
|
||||
my %validopts = qw(use_cache 0 use_color 0 user_agent 0 use_latin 0);
|
||||
my %line = %validopts;
|
||||
@@ -68,17 +66,17 @@ isatty(1) && eval q{ use open OUT => ':locale'};
|
||||
#
|
||||
Getopt::Long::Configure( qw(no_ignore_case));
|
||||
if (! GetOptions (
|
||||
"spelltolerance|s=s" => \$o_s,
|
||||
"morphology|m=s" => \$o_m,
|
||||
"chartolerance|c=s" => \$o_c,
|
||||
"language|l=s" => \$o_l,
|
||||
"force|f" => \$o_f,
|
||||
"version|v" => \$o_v,
|
||||
"help|h" => \$o_h,
|
||||
"debug|d" => \$o_d,
|
||||
"noescapechars|n" => \$o_n,
|
||||
"user|u=s" => \$o_u,
|
||||
"passwd|p=s" => \$o_p
|
||||
"spelltolerance|s=s" => \$o_s,
|
||||
"morphology|m=s" => \$o_m,
|
||||
"chartolerance|c=s" => \$o_c,
|
||||
"language|l=s" => \$o_l,
|
||||
"force|f" => \$o_f,
|
||||
"version|v" => \$o_v,
|
||||
"help|h" => \$o_h,
|
||||
"debug|d" => \$o_d,
|
||||
"noescapechars|n" => \$o_n,
|
||||
"user|u=s" => \$o_u,
|
||||
"passwd|p=s" => \$o_p
|
||||
) ) {
|
||||
&usage;
|
||||
}
|
||||
@@ -137,7 +135,8 @@ if (-e $config) {
|
||||
foreach my $opt (keys %conf) {
|
||||
if (!exists $validopts{$opt}) {
|
||||
print "<$opt>\n";
|
||||
print STDERR "Error in config $config line: " . $line{$opt} . ". Unsupported option \"$opt\"!\n";
|
||||
print STDERR "Error in config $config line: " .
|
||||
$line{$opt} . ". Unsupported option \"$opt\"!\n";
|
||||
exit;
|
||||
}
|
||||
}
|
||||
@@ -205,28 +204,28 @@ if($o_c) {
|
||||
}
|
||||
|
||||
if (exists $ENV{http_proxy} and $o_u) {
|
||||
# authenticate
|
||||
if (! $o_p) {
|
||||
# ask for it
|
||||
my $proxy_pass;
|
||||
local $| = 1;
|
||||
print "password: ";
|
||||
eval {
|
||||
local($|) = 1;
|
||||
local(*TTY);
|
||||
open(TTY,"/dev/tty") or die "No /dev/tty!";
|
||||
system ("stty -echo </dev/tty") and die "stty failed!";
|
||||
chomp($proxy_pass = <TTY>);
|
||||
print STDERR "\r\n";
|
||||
system ("stty echo </dev/tty") and die "stty failed!";
|
||||
close(TTY);
|
||||
};
|
||||
if ($@) {
|
||||
$proxy_pass = <>;
|
||||
}
|
||||
chomp $proxy_pass;
|
||||
$PARAM{"-ProxyPass"} = $proxy_pass;
|
||||
# authenticate
|
||||
if (! $o_p) {
|
||||
# ask for it
|
||||
my $proxy_pass;
|
||||
local $| = 1;
|
||||
print "password: ";
|
||||
eval {
|
||||
local($|) = 1;
|
||||
local(*TTY);
|
||||
open(TTY,"/dev/tty") or die "No /dev/tty!";
|
||||
system ("stty -echo </dev/tty") and die "stty failed!";
|
||||
chomp($proxy_pass = <TTY>);
|
||||
print STDERR "\r\n";
|
||||
system ("stty echo </dev/tty") and die "stty failed!";
|
||||
close(TTY);
|
||||
};
|
||||
if ($@) {
|
||||
$proxy_pass = <>;
|
||||
}
|
||||
chomp $proxy_pass;
|
||||
$PARAM{"-ProxyPass"} = $proxy_pass;
|
||||
}
|
||||
}
|
||||
|
||||
my (@match, $lines, $maxsize);
|
||||
@@ -246,7 +245,7 @@ if(exists $CACHE{$cache_key} && $conf{use_cache} eq "yes") {
|
||||
}
|
||||
else {
|
||||
my $leo = new WWW::Dict::Leo::Org(%PARAM) or
|
||||
die "Could not initialize WWW::Dict::Leo::Org: $!\n";
|
||||
die "Could not initialize WWW::Dict::Leo::Org: $!\n";
|
||||
@match = $leo->translate($string);
|
||||
$lines = $leo->lines();
|
||||
$maxsize = $leo->maxsize();
|
||||
@@ -328,19 +327,36 @@ Translate a term from german to english or vice versa.
|
||||
-s, --spelltolerance=on|off allow spelling errors
|
||||
-m, --morphology=none|forcedAll provide morphology information
|
||||
-c, --chartolerance=fuzzy|exact allow umlaut alternatives
|
||||
-l, --language=en|fr|de2(en|fr)|(en|fr)2de translation direction
|
||||
-n, --noescapechars dont use escapes for highlighting
|
||||
-f, --force don't use the query cache
|
||||
-u, --user=username user for proxy authentication
|
||||
-p, --passwd=password cleartext passphrase for proxy authentication
|
||||
-h, --help display this help and exit
|
||||
-d, --debug enable debugging output
|
||||
-v, --version output version information and exit
|
||||
-l, --language=[de2]<countrycode>[2de] translation direction
|
||||
-n, --noescapechars dont use escapes for highlighting
|
||||
-f, --force don't use the query cache
|
||||
-u, --user=username user for proxy authentication
|
||||
-p, --passwd=password cleartext passphrase for proxy authentication
|
||||
-h, --help display this help and exit
|
||||
-d, --debug enable debugging output
|
||||
-v, --version output version information and exit
|
||||
|
||||
<term> is the string you are asking to be translated. It will
|
||||
be requested from STDIN if not specified on the commandline.
|
||||
|
||||
Report bugs to <tom\@daemon.de>.
|
||||
Supported <countrycode>s are:
|
||||
|
||||
en english
|
||||
es spanish
|
||||
ru russian
|
||||
pt portugese
|
||||
pl polish
|
||||
ch chinese
|
||||
|
||||
You can specify only the country code, or append B<de2> in order to
|
||||
force translation to german, or preprend B<de2> in order to translate
|
||||
to the other language. Valid examples:
|
||||
|
||||
ru to or from russian
|
||||
de2pl to polish
|
||||
es2de spanish to german
|
||||
|
||||
Report bugs to <tlinden\@cpan.org>.
|
||||
);
|
||||
|
||||
exit 1;
|
||||
@@ -412,11 +428,30 @@ Default: B<relaxed>.
|
||||
|
||||
=item I<-l --language>
|
||||
|
||||
Translation direction.
|
||||
Translation direction. Please note that dict.leo.org always translates
|
||||
either to or from german.
|
||||
|
||||
Possible values: B<en>, B<fr>, B<de2en>, B<en2de>, B<de2fr> or B<fr2de>.
|
||||
The following languages are supported: english, polish, spanish, portugese
|
||||
russian and chinese.
|
||||
|
||||
B<en> and B<fr> do select the translation direction automatically.
|
||||
You can specify only the country code, or append B<de2> in order to
|
||||
force translation to german, or preprend B<de2> in order to translate
|
||||
to the other language.
|
||||
|
||||
Valid examples:
|
||||
|
||||
ru to or from russian
|
||||
de2pl to polish
|
||||
es2de spanish to german
|
||||
|
||||
Valid country codes:
|
||||
|
||||
en english
|
||||
es spanish
|
||||
ru russian
|
||||
pt portugese
|
||||
pl polish
|
||||
ch chinese
|
||||
|
||||
Default: B<en>.
|
||||
|
||||
@@ -521,7 +556,7 @@ The config option has higher precedence.
|
||||
You may modify the user agent as B<leo> identifies itself
|
||||
on the target site. The default is:
|
||||
|
||||
User-Agent: Mozilla/5.0 (compatible; Konqueror/3.3.1; X11)
|
||||
User-Agent: Mozilla/5.0 (compatible; Konqueror/3.3.1; X11)
|
||||
|
||||
=back
|
||||
|
||||
@@ -534,11 +569,11 @@ translation option settings.
|
||||
|
||||
If you, for example, execute once the following query:
|
||||
|
||||
% leo langnase
|
||||
% leo langnase
|
||||
|
||||
and somewhere later:
|
||||
|
||||
% leo -c exact
|
||||
% leo -c exact
|
||||
|
||||
then B<leo> will treat the latter query as a different
|
||||
one than the previous one, because I<dict.leo.org>
|
||||
@@ -552,7 +587,7 @@ B<leo> can be used with a HTTP proxy service. For this to
|
||||
work, you only have to set the environment variable
|
||||
B<http_proxy>. It has the following format:
|
||||
|
||||
PROTO://[USER:PASSWD@]SERVER[:PORT]
|
||||
PROTO://[USER:PASSWD@]SERVER[:PORT]
|
||||
|
||||
The only supported protocol is B<http>. If your proxy works without
|
||||
authentication, you can omit the B<user:passwd> part. If no
|
||||
@@ -560,11 +595,11 @@ port is specified, B<80> will be used.
|
||||
|
||||
Here is an example (for bash):
|
||||
|
||||
export http_proxy=http://172.16.120.120:3128
|
||||
export http_proxy=http://172.16.120.120:3128
|
||||
|
||||
and an example with authentication credentials:
|
||||
|
||||
export http_proxy=http://max:34dwe2@172.16.120.120:3128
|
||||
export http_proxy=http://max:34dwe2@172.16.120.120:3128
|
||||
|
||||
As security is always important, I have to warn you, that
|
||||
other users on the same machine can read your environment
|
||||
@@ -583,8 +618,8 @@ the password will be read from STDIN.
|
||||
|
||||
=head1 FILES
|
||||
|
||||
~/.leo the config file for leo. Not required.
|
||||
~/.leo-CACHE.db* the cache file.
|
||||
~/.leo the config file for leo. Not required.
|
||||
~/.leo-CACHE.db* the cache file.
|
||||
|
||||
|
||||
=head1 AUTHOR
|
||||
@@ -632,6 +667,6 @@ browser, what is not prohibited. At least not today.
|
||||
|
||||
=head1 VERSION
|
||||
|
||||
This is the manpage for B<leo> version B<1.39>.
|
||||
This is the manpage for B<leo> version B<1.41>.
|
||||
|
||||
=cut
|
||||
|
||||
Reference in New Issue
Block a user