diff --git a/Changelog b/Changelog index a357b40..195f8ef 100644 --- a/Changelog +++ b/Changelog @@ -1,3 +1,13 @@ +1.41: + +Generalized lang parsing and passing to dict.leo.org, which +also adds support for new languages like ru, ch or pl. Thanks +to J.A.Eichler. + +1.40: + +not logged, sorry. + 1.39: fixed rt.cpan.org#91464: disable caching if DB_File is not found. diff --git a/Org.pm b/Org.pm index 2e78901..48a6427 100644 --- a/Org.pm +++ b/Org.pm @@ -7,7 +7,7 @@ # or service marks of their respective holders. package WWW::Dict::Leo::Org; -$WWW::Dict::Leo::Org::VERSION = "1.40"; +$WWW::Dict::Leo::Org::VERSION = "1.41"; use strict; use warnings; @@ -63,34 +63,31 @@ sub translate { my @match = (); # - # form var transitions for searchLoc - # - my %lang = ( - de2en => 1, - en2de => -1, - de2fr => 1, - fr2de => -1, - es2de => -1, - de2es => 1, - es => 0, - en => 0, - fr => 0, - speak => "ende" - ); + # form var transitions for searchLoc(=translation direction) and lp(=language) + my %lang = ( speak => "ende" ); + my @langs = qw(en es ru pt fr pl ch it); if ($this->{"-Language"}) { - if ($this->{"-Language"} =~ /fr/) { - # used for francaise translation - $lang{speak} = "frde"; + # en | fr | ru2en | de2pl etc + # de2, 2de, de are not part of lang spec + if (! grep { $this->{"-Language"} =~ /$_/ } @langs) { + croak "Unsupported language: " . $this->{"-Language"}; } - elsif ($this->{"-Language"} =~ /es/) { - $lang{speak} = "esde"; + my $spec = $this->{"-Language"}; + my $l; + if ($spec =~ /(..)2de/) { + $l = $1; + $this->{"-Language"} = -1; + $lang{speak} = "${l}de"; } - if (exists $lang{$this->{"-Language"}}) { - $this->{"-Language"} = $lang{$this->{"-Language"}}; + elsif ($spec =~ /de2(..)/) { + $l = $1; + $this->{"-Language"} = 1; + $lang{speak} = "${l}de"; } else { - croak "Unsupported language: " . $this->{"-Language"}; + $lang{speak} = $this->{"-Language"} . 'de'; + $this->{"-Language"} = 0; } } @@ -98,11 +95,11 @@ sub translate { # cut invalid values for parameters or set defaults if unspecified # my %form = ( - spellToler => { mask => [ qw(standard on off) ], val => $this->{"-SpellTolerance"} || "standard" }, - deStem => { mask => [ qw(standard none forcedAll) ], val => $this->{"-Morphology"} || "standard" }, - cmpType => { mask => [ qw(fuzzy exact relaxed) ], val => $this->{"-CharTolerance"} || "relaxed" }, - searchLoc => { mask => [ qw(-1 0 1) ], val => $this->{"-Language"} || "0" }, - ); + spellToler => { mask => [ qw(standard on off) ], val => $this->{"-SpellTolerance"} || "standard" }, + deStem => { mask => [ qw(standard none forcedAll) ], val => $this->{"-Morphology"} || "standard" }, + cmpType => { mask => [ qw(fuzzy exact relaxed) ], val => $this->{"-CharTolerance"} || "relaxed" }, + searchLoc => { mask => [ qw(-1 0 1) ], val => $this->{"-Language"} || "0" }, + ); my @form; foreach my $var (keys %form) { if (grep { $form{$var}->{val} eq $_ } @{$form{$var}->{mask}}) { @@ -166,10 +163,10 @@ sub translate { } my $conn = new IO::Socket::INET( - Proto => "tcp", - PeerAddr => $ip, - PeerPort => $port, - ) or die "Unable to connect to $ip:$port: $!\n"; + Proto => "tcp", + PeerAddr => $ip, + PeerPort => $port, + ) or die "Unable to connect to $ip:$port: $!\n"; $conn->autoflush(1); $this->debug( "GET $url?$form HTTP/1.0"); @@ -208,41 +205,41 @@ Accept-Language: en_US, en\r\n); # got HTTP error my $err = $2; if ($err == 407) { - croak "proxy auth required or access denied!\n"; + croak "proxy auth required or access denied!\n"; } else { - if ($site =~ /Leider konnten wir zu Ihrem Suchbegriff/ || - $site =~ /found no matches for your search/ - ) { - return (); - } - else { - croak "got HTTP error $err!\n"; - } + if ($site =~ /Leider konnten wir zu Ihrem Suchbegriff/ || + $site =~ /found no matches for your search/ + ) { + return (); + } + else { + croak "got HTTP error $err!\n"; + } } } } my @request = ( - { - id => 2, - row => sub { $this->row(@_); }, - hdr => sub { $this->hdr(@_); } - }, - { - id => 3, - hdr => sub { $this->hdr(@_); }, - row => sub { $this->row(@_); } - }, - { - id => 4, - hdr => sub { $this->hdr(@_); }, - row => sub { $this->row(@_); } - } - ); + { + id => 2, + row => sub { $this->row(@_); }, + hdr => sub { $this->hdr(@_); } + }, + { + id => 3, + hdr => sub { $this->hdr(@_); }, + row => sub { $this->row(@_); } + }, + { + id => 4, + hdr => sub { $this->hdr(@_); }, + row => sub { $this->row(@_); } + } + ); $this->{Linecount} = 0; my $p = HTML::TableParser->new( \@request, - { Decode => 1, Trim => 1, Chomp => 1, DecodeNBSP => 1 } ); + { Decode => 1, Trim => 1, Chomp => 1, DecodeNBSP => 1 } ); $site=~s/ /\ \;/g; $p->parse($site); @@ -418,35 +415,31 @@ Possible values: fuzzy, exact, relaxed. =item I<-Language> Translation direction. Please note that dict.leo.org always translates -either to or from german. The following values can be used: +either to or from german. -=over +The following languages are supported: english, polish, spanish, portugese +russian and chinese. -=item de +You can specify only the country code, or append B in order to +force translation to german, or preprend B in order to translate +to the other language. -Alias for B - german to english. +Valid examples: -=item fr + ru to or from russian + de2pl to polish + es2de spanish to german -Alias for B - german to french. +Valid country codes: -=item es + en english + es spanish + ru russian + pt portugese + pl polish + ch chinese -Alias for B - german to espaniol. - -=item en2de - -english to german. - -=item fr2de - -french to german. - -=item es2de - -espaniol to german. - -=back +Default: B. =back @@ -458,48 +451,48 @@ Use this method after initialization to connect to dict.leo.org and translate the given term. It returns an array of hashes containing the actual results. - use WWW::Dict::Leo::Org; - use Data::Dumper; - my $leo = new WWW::Dict::Leo::Org(); - my @matches = $leo->translate("test"); - print Dumper(\@matches); +use WWW::Dict::Leo::Org; +use Data::Dumper; +my $leo = new WWW::Dict::Leo::Org(); +my @matches = $leo->translate("test"); +print Dumper(\@matches); which prints: $VAR1 = [ + { + 'data' => [ + { + 'left' => 'check', + 'right' => 'der Test' + }, + { + 'left' => 'quiz (Amer.)', + 'right' => 'der Test    [Schule]' + ], + 'title' => 'Unmittelbare Treffer' + }, { - 'data' => [ - { - 'left' => 'check', - 'right' => 'der Test' - }, - { - 'left' => 'quiz (Amer.)', - 'right' => 'der Test    [Schule]' - ], - 'title' => 'Unmittelbare Treffer' + 'data' => [ + { + 'left' => 'to fail a test', + 'right' => 'einen Test nicht bestehen' + }, + { + 'left' => 'to test', + 'right' => 'Tests macheneinen Test machen' + } + ], + 'title' => 'Verben und Verbzusammensetzungen' }, - { - 'data' => [ - { - 'left' => 'to fail a test', - 'right' => 'einen Test nicht bestehen' - }, - { - 'left' => 'to test', - 'right' => 'Tests macheneinen Test machen' - } - ], - 'title' => 'Verben und Verbzusammensetzungen' - }, - 'data' => [ - { - 'left' => 'testing  adj.', - 'right' => 'im Test' - } - ], - 'title' => 'Wendungen und Ausdrücke' - } + 'data' => [ + { + 'left' => 'testing  adj.', + 'right' => 'im Test' + } + ], + 'title' => 'Wendungen und Ausdrücke' + } ]; @@ -541,6 +534,6 @@ Please don't forget to add debugging output! =head1 VERSION -1.40 +1.41 =cut diff --git a/leo b/leo index 33792cd..e232592 100755 --- a/leo +++ b/leo @@ -8,8 +8,6 @@ # the term to be translated. It will then return the results in # an unformatted form. # -# $Id: leo,v 1.33 2008/04/22 22:23:39 scip Exp $ -# # Copyleft (l) 2000-2016 by Thomas v.D. . leo may be # used and distributed under the terms of the GNU General Public License. # All other brand and product names are trademarks, registered trademarks @@ -37,7 +35,7 @@ my $default_c = "\033[0m"; # reset default terminal color my $bold_c = "\033[0;34m"; # blue color my $copy_c = "\033[0;35m"; # copyright message color (green) -my $version = "1.39"; +my $version = "1.41"; my $config = $ENV{HOME} . "/.leo"; my $cache = $ENV{HOME} . "/.leo-CACHE.db"; @@ -45,10 +43,10 @@ my $debugging = 0; #defaults for config my %conf = ( - use_cache => "no", - use_color => "yes", - use_latin => "yes" - ); + use_cache => "no", + use_color => "yes", + use_latin => "yes" + ); my %validopts = qw(use_cache 0 use_color 0 user_agent 0 use_latin 0); my %line = %validopts; @@ -68,17 +66,17 @@ isatty(1) && eval q{ use open OUT => ':locale'}; # Getopt::Long::Configure( qw(no_ignore_case)); if (! GetOptions ( - "spelltolerance|s=s" => \$o_s, - "morphology|m=s" => \$o_m, - "chartolerance|c=s" => \$o_c, - "language|l=s" => \$o_l, - "force|f" => \$o_f, - "version|v" => \$o_v, - "help|h" => \$o_h, - "debug|d" => \$o_d, - "noescapechars|n" => \$o_n, - "user|u=s" => \$o_u, - "passwd|p=s" => \$o_p + "spelltolerance|s=s" => \$o_s, + "morphology|m=s" => \$o_m, + "chartolerance|c=s" => \$o_c, + "language|l=s" => \$o_l, + "force|f" => \$o_f, + "version|v" => \$o_v, + "help|h" => \$o_h, + "debug|d" => \$o_d, + "noescapechars|n" => \$o_n, + "user|u=s" => \$o_u, + "passwd|p=s" => \$o_p ) ) { &usage; } @@ -137,7 +135,8 @@ if (-e $config) { foreach my $opt (keys %conf) { if (!exists $validopts{$opt}) { print "<$opt>\n"; - print STDERR "Error in config $config line: " . $line{$opt} . ". Unsupported option \"$opt\"!\n"; + print STDERR "Error in config $config line: " . + $line{$opt} . ". Unsupported option \"$opt\"!\n"; exit; } } @@ -205,28 +204,28 @@ if($o_c) { } if (exists $ENV{http_proxy} and $o_u) { - # authenticate - if (! $o_p) { - # ask for it - my $proxy_pass; - local $| = 1; - print "password: "; - eval { - local($|) = 1; - local(*TTY); - open(TTY,"/dev/tty") or die "No /dev/tty!"; - system ("stty -echo ); - print STDERR "\r\n"; - system ("stty echo ; - } - chomp $proxy_pass; - $PARAM{"-ProxyPass"} = $proxy_pass; + # authenticate + if (! $o_p) { + # ask for it + my $proxy_pass; + local $| = 1; + print "password: "; + eval { + local($|) = 1; + local(*TTY); + open(TTY,"/dev/tty") or die "No /dev/tty!"; + system ("stty -echo ); + print STDERR "\r\n"; + system ("stty echo ; } + chomp $proxy_pass; + $PARAM{"-ProxyPass"} = $proxy_pass; + } } my (@match, $lines, $maxsize); @@ -246,7 +245,7 @@ if(exists $CACHE{$cache_key} && $conf{use_cache} eq "yes") { } else { my $leo = new WWW::Dict::Leo::Org(%PARAM) or - die "Could not initialize WWW::Dict::Leo::Org: $!\n"; + die "Could not initialize WWW::Dict::Leo::Org: $!\n"; @match = $leo->translate($string); $lines = $leo->lines(); $maxsize = $leo->maxsize(); @@ -328,19 +327,36 @@ Translate a term from german to english or vice versa. -s, --spelltolerance=on|off allow spelling errors -m, --morphology=none|forcedAll provide morphology information -c, --chartolerance=fuzzy|exact allow umlaut alternatives - -l, --language=en|fr|de2(en|fr)|(en|fr)2de translation direction - -n, --noescapechars dont use escapes for highlighting - -f, --force don't use the query cache - -u, --user=username user for proxy authentication - -p, --passwd=password cleartext passphrase for proxy authentication - -h, --help display this help and exit - -d, --debug enable debugging output - -v, --version output version information and exit + -l, --language=[de2][2de] translation direction +-n, --noescapechars dont use escapes for highlighting +-f, --force don't use the query cache +-u, --user=username user for proxy authentication +-p, --passwd=password cleartext passphrase for proxy authentication +-h, --help display this help and exit +-d, --debug enable debugging output +-v, --version output version information and exit is the string you are asking to be translated. It will be requested from STDIN if not specified on the commandline. -Report bugs to . +Supported s are: + +en english +es spanish +ru russian +pt portugese +pl polish +ch chinese + +You can specify only the country code, or append B in order to +force translation to german, or preprend B in order to translate +to the other language. Valid examples: + +ru to or from russian +de2pl to polish +es2de spanish to german + +Report bugs to . ); exit 1; @@ -412,11 +428,30 @@ Default: B. =item I<-l --language> -Translation direction. +Translation direction. Please note that dict.leo.org always translates +either to or from german. -Possible values: B, B, B, B, B or B. +The following languages are supported: english, polish, spanish, portugese +russian and chinese. -B and B do select the translation direction automatically. +You can specify only the country code, or append B in order to +force translation to german, or preprend B in order to translate +to the other language. + +Valid examples: + +ru to or from russian +de2pl to polish +es2de spanish to german + +Valid country codes: + +en english +es spanish +ru russian +pt portugese +pl polish +ch chinese Default: B. @@ -521,7 +556,7 @@ The config option has higher precedence. You may modify the user agent as B identifies itself on the target site. The default is: - User-Agent: Mozilla/5.0 (compatible; Konqueror/3.3.1; X11) +User-Agent: Mozilla/5.0 (compatible; Konqueror/3.3.1; X11) =back @@ -534,11 +569,11 @@ translation option settings. If you, for example, execute once the following query: - % leo langnase +% leo langnase and somewhere later: - % leo -c exact +% leo -c exact then B will treat the latter query as a different one than the previous one, because I @@ -552,7 +587,7 @@ B can be used with a HTTP proxy service. For this to work, you only have to set the environment variable B. It has the following format: - PROTO://[USER:PASSWD@]SERVER[:PORT] +PROTO://[USER:PASSWD@]SERVER[:PORT] The only supported protocol is B. If your proxy works without authentication, you can omit the B part. If no @@ -560,11 +595,11 @@ port is specified, B<80> will be used. Here is an example (for bash): - export http_proxy=http://172.16.120.120:3128 +export http_proxy=http://172.16.120.120:3128 and an example with authentication credentials: - export http_proxy=http://max:34dwe2@172.16.120.120:3128 +export http_proxy=http://max:34dwe2@172.16.120.120:3128 As security is always important, I have to warn you, that other users on the same machine can read your environment @@ -583,8 +618,8 @@ the password will be read from STDIN. =head1 FILES - ~/.leo the config file for leo. Not required. - ~/.leo-CACHE.db* the cache file. +~/.leo the config file for leo. Not required. +~/.leo-CACHE.db* the cache file. =head1 AUTHOR @@ -632,6 +667,6 @@ browser, what is not prohibited. At least not today. =head1 VERSION -This is the manpage for B version B<1.39>. +This is the manpage for B version B<1.41>. =cut