fix lang spec and parsing

This commit is contained in:
TLINDEN
2016-10-08 12:36:25 +02:00
parent 50ac4d65ed
commit a865f75f0e
3 changed files with 217 additions and 179 deletions

View File

@@ -1,3 +1,13 @@
1.41:
Generalized lang parsing and passing to dict.leo.org, which
also adds support for new languages like ru, ch or pl. Thanks
to J.A.Eichler.
1.40:
not logged, sorry.
1.39: 1.39:
fixed rt.cpan.org#91464: disable caching if DB_File is not found. fixed rt.cpan.org#91464: disable caching if DB_File is not found.

227
Org.pm
View File

@@ -7,7 +7,7 @@
# or service marks of their respective holders. # or service marks of their respective holders.
package WWW::Dict::Leo::Org; package WWW::Dict::Leo::Org;
$WWW::Dict::Leo::Org::VERSION = "1.40"; $WWW::Dict::Leo::Org::VERSION = "1.41";
use strict; use strict;
use warnings; use warnings;
@@ -63,34 +63,31 @@ sub translate {
my @match = (); my @match = ();
# #
# form var transitions for searchLoc # form var transitions for searchLoc(=translation direction) and lp(=language)
# my %lang = ( speak => "ende" );
my %lang = (
de2en => 1,
en2de => -1,
de2fr => 1,
fr2de => -1,
es2de => -1,
de2es => 1,
es => 0,
en => 0,
fr => 0,
speak => "ende"
);
my @langs = qw(en es ru pt fr pl ch it);
if ($this->{"-Language"}) { if ($this->{"-Language"}) {
if ($this->{"-Language"} =~ /fr/) { # en | fr | ru2en | de2pl etc
# used for francaise translation # de2, 2de, de are not part of lang spec
$lang{speak} = "frde"; if (! grep { $this->{"-Language"} =~ /$_/ } @langs) {
croak "Unsupported language: " . $this->{"-Language"};
} }
elsif ($this->{"-Language"} =~ /es/) { my $spec = $this->{"-Language"};
$lang{speak} = "esde"; my $l;
if ($spec =~ /(..)2de/) {
$l = $1;
$this->{"-Language"} = -1;
$lang{speak} = "${l}de";
} }
if (exists $lang{$this->{"-Language"}}) { elsif ($spec =~ /de2(..)/) {
$this->{"-Language"} = $lang{$this->{"-Language"}}; $l = $1;
$this->{"-Language"} = 1;
$lang{speak} = "${l}de";
} }
else { else {
croak "Unsupported language: " . $this->{"-Language"}; $lang{speak} = $this->{"-Language"} . 'de';
$this->{"-Language"} = 0;
} }
} }
@@ -98,11 +95,11 @@ sub translate {
# cut invalid values for parameters or set defaults if unspecified # cut invalid values for parameters or set defaults if unspecified
# #
my %form = ( my %form = (
spellToler => { mask => [ qw(standard on off) ], val => $this->{"-SpellTolerance"} || "standard" }, spellToler => { mask => [ qw(standard on off) ], val => $this->{"-SpellTolerance"} || "standard" },
deStem => { mask => [ qw(standard none forcedAll) ], val => $this->{"-Morphology"} || "standard" }, deStem => { mask => [ qw(standard none forcedAll) ], val => $this->{"-Morphology"} || "standard" },
cmpType => { mask => [ qw(fuzzy exact relaxed) ], val => $this->{"-CharTolerance"} || "relaxed" }, cmpType => { mask => [ qw(fuzzy exact relaxed) ], val => $this->{"-CharTolerance"} || "relaxed" },
searchLoc => { mask => [ qw(-1 0 1) ], val => $this->{"-Language"} || "0" }, searchLoc => { mask => [ qw(-1 0 1) ], val => $this->{"-Language"} || "0" },
); );
my @form; my @form;
foreach my $var (keys %form) { foreach my $var (keys %form) {
if (grep { $form{$var}->{val} eq $_ } @{$form{$var}->{mask}}) { if (grep { $form{$var}->{val} eq $_ } @{$form{$var}->{mask}}) {
@@ -166,10 +163,10 @@ sub translate {
} }
my $conn = new IO::Socket::INET( my $conn = new IO::Socket::INET(
Proto => "tcp", Proto => "tcp",
PeerAddr => $ip, PeerAddr => $ip,
PeerPort => $port, PeerPort => $port,
) or die "Unable to connect to $ip:$port: $!\n"; ) or die "Unable to connect to $ip:$port: $!\n";
$conn->autoflush(1); $conn->autoflush(1);
$this->debug( "GET $url?$form HTTP/1.0"); $this->debug( "GET $url?$form HTTP/1.0");
@@ -208,41 +205,41 @@ Accept-Language: en_US, en\r\n);
# got HTTP error # got HTTP error
my $err = $2; my $err = $2;
if ($err == 407) { if ($err == 407) {
croak "proxy auth required or access denied!\n"; croak "proxy auth required or access denied!\n";
} }
else { else {
if ($site =~ /Leider konnten wir zu Ihrem Suchbegriff/ || if ($site =~ /Leider konnten wir zu Ihrem Suchbegriff/ ||
$site =~ /found no matches for your search/ $site =~ /found no matches for your search/
) { ) {
return (); return ();
} }
else { else {
croak "got HTTP error $err!\n"; croak "got HTTP error $err!\n";
} }
} }
} }
} }
my @request = ( my @request = (
{ {
id => 2, id => 2,
row => sub { $this->row(@_); }, row => sub { $this->row(@_); },
hdr => sub { $this->hdr(@_); } hdr => sub { $this->hdr(@_); }
}, },
{ {
id => 3, id => 3,
hdr => sub { $this->hdr(@_); }, hdr => sub { $this->hdr(@_); },
row => sub { $this->row(@_); } row => sub { $this->row(@_); }
}, },
{ {
id => 4, id => 4,
hdr => sub { $this->hdr(@_); }, hdr => sub { $this->hdr(@_); },
row => sub { $this->row(@_); } row => sub { $this->row(@_); }
} }
); );
$this->{Linecount} = 0; $this->{Linecount} = 0;
my $p = HTML::TableParser->new( \@request, my $p = HTML::TableParser->new( \@request,
{ Decode => 1, Trim => 1, Chomp => 1, DecodeNBSP => 1 } ); { Decode => 1, Trim => 1, Chomp => 1, DecodeNBSP => 1 } );
$site=~s/ /\&nbsp\;/g; $site=~s/ /\&nbsp\;/g;
$p->parse($site); $p->parse($site);
@@ -418,35 +415,31 @@ Possible values: fuzzy, exact, relaxed.
=item I<-Language> =item I<-Language>
Translation direction. Please note that dict.leo.org always translates Translation direction. Please note that dict.leo.org always translates
either to or from german. The following values can be used: either to or from german.
=over The following languages are supported: english, polish, spanish, portugese
russian and chinese.
=item de You can specify only the country code, or append B<de2> in order to
force translation to german, or preprend B<de2> in order to translate
to the other language.
Alias for B<de2en> - german to english. Valid examples:
=item fr ru to or from russian
de2pl to polish
es2de spanish to german
Alias for B<de2fr> - german to french. Valid country codes:
=item es en english
es spanish
ru russian
pt portugese
pl polish
ch chinese
Alias for B<de2es> - german to espaniol. Default: B<en>.
=item en2de
english to german.
=item fr2de
french to german.
=item es2de
espaniol to german.
=back
=back =back
@@ -458,48 +451,48 @@ Use this method after initialization to connect to dict.leo.org
and translate the given term. It returns an array of hashes containing and translate the given term. It returns an array of hashes containing
the actual results. the actual results.
use WWW::Dict::Leo::Org; use WWW::Dict::Leo::Org;
use Data::Dumper; use Data::Dumper;
my $leo = new WWW::Dict::Leo::Org(); my $leo = new WWW::Dict::Leo::Org();
my @matches = $leo->translate("test"); my @matches = $leo->translate("test");
print Dumper(\@matches); print Dumper(\@matches);
which prints: which prints:
$VAR1 = [ $VAR1 = [
{
'data' => [
{
'left' => 'check',
'right' => 'der Test'
},
{
'left' => 'quiz (Amer.)',
'right' => 'der Test <20><> [Schule]'
],
'title' => 'Unmittelbare Treffer'
},
{ {
'data' => [ 'data' => [
{ {
'left' => 'check', 'left' => 'to fail a test',
'right' => 'der Test' 'right' => 'einen Test nicht bestehen'
}, },
{ {
'left' => 'quiz (Amer.)', 'left' => 'to test',
'right' => 'der Test <20><> [Schule]' 'right' => 'Tests macheneinen Test machen'
], }
'title' => 'Unmittelbare Treffer' ],
'title' => 'Verben und Verbzusammensetzungen'
}, },
{ 'data' => [
'data' => [ {
{ 'left' => 'testing <20>adj.',
'left' => 'to fail a test', 'right' => 'im Test'
'right' => 'einen Test nicht bestehen' }
}, ],
{ 'title' => 'Wendungen und Ausdr<64>cke'
'left' => 'to test', }
'right' => 'Tests macheneinen Test machen'
}
],
'title' => 'Verben und Verbzusammensetzungen'
},
'data' => [
{
'left' => 'testing <20>adj.',
'right' => 'im Test'
}
],
'title' => 'Wendungen und Ausdr<64>cke'
}
]; ];
@@ -541,6 +534,6 @@ Please don't forget to add debugging output!
=head1 VERSION =head1 VERSION
1.40 1.41
=cut =cut

159
leo
View File

@@ -8,8 +8,6 @@
# the term to be translated. It will then return the results in # the term to be translated. It will then return the results in
# an unformatted form. # an unformatted form.
# #
# $Id: leo,v 1.33 2008/04/22 22:23:39 scip Exp $
#
# Copyleft (l) 2000-2016 by Thomas v.D. <tlinden@cpan.org>. leo may be # Copyleft (l) 2000-2016 by Thomas v.D. <tlinden@cpan.org>. leo may be
# used and distributed under the terms of the GNU General Public License. # used and distributed under the terms of the GNU General Public License.
# All other brand and product names are trademarks, registered trademarks # All other brand and product names are trademarks, registered trademarks
@@ -37,7 +35,7 @@ my $default_c = "\033[0m"; # reset default terminal color
my $bold_c = "\033[0;34m"; # blue color my $bold_c = "\033[0;34m"; # blue color
my $copy_c = "\033[0;35m"; # copyright message color (green) my $copy_c = "\033[0;35m"; # copyright message color (green)
my $version = "1.39"; my $version = "1.41";
my $config = $ENV{HOME} . "/.leo"; my $config = $ENV{HOME} . "/.leo";
my $cache = $ENV{HOME} . "/.leo-CACHE.db"; my $cache = $ENV{HOME} . "/.leo-CACHE.db";
@@ -45,10 +43,10 @@ my $debugging = 0;
#defaults for config #defaults for config
my %conf = ( my %conf = (
use_cache => "no", use_cache => "no",
use_color => "yes", use_color => "yes",
use_latin => "yes" use_latin => "yes"
); );
my %validopts = qw(use_cache 0 use_color 0 user_agent 0 use_latin 0); my %validopts = qw(use_cache 0 use_color 0 user_agent 0 use_latin 0);
my %line = %validopts; my %line = %validopts;
@@ -68,17 +66,17 @@ isatty(1) && eval q{ use open OUT => ':locale'};
# #
Getopt::Long::Configure( qw(no_ignore_case)); Getopt::Long::Configure( qw(no_ignore_case));
if (! GetOptions ( if (! GetOptions (
"spelltolerance|s=s" => \$o_s, "spelltolerance|s=s" => \$o_s,
"morphology|m=s" => \$o_m, "morphology|m=s" => \$o_m,
"chartolerance|c=s" => \$o_c, "chartolerance|c=s" => \$o_c,
"language|l=s" => \$o_l, "language|l=s" => \$o_l,
"force|f" => \$o_f, "force|f" => \$o_f,
"version|v" => \$o_v, "version|v" => \$o_v,
"help|h" => \$o_h, "help|h" => \$o_h,
"debug|d" => \$o_d, "debug|d" => \$o_d,
"noescapechars|n" => \$o_n, "noescapechars|n" => \$o_n,
"user|u=s" => \$o_u, "user|u=s" => \$o_u,
"passwd|p=s" => \$o_p "passwd|p=s" => \$o_p
) ) { ) ) {
&usage; &usage;
} }
@@ -137,7 +135,8 @@ if (-e $config) {
foreach my $opt (keys %conf) { foreach my $opt (keys %conf) {
if (!exists $validopts{$opt}) { if (!exists $validopts{$opt}) {
print "<$opt>\n"; print "<$opt>\n";
print STDERR "Error in config $config line: " . $line{$opt} . ". Unsupported option \"$opt\"!\n"; print STDERR "Error in config $config line: " .
$line{$opt} . ". Unsupported option \"$opt\"!\n";
exit; exit;
} }
} }
@@ -205,28 +204,28 @@ if($o_c) {
} }
if (exists $ENV{http_proxy} and $o_u) { if (exists $ENV{http_proxy} and $o_u) {
# authenticate # authenticate
if (! $o_p) { if (! $o_p) {
# ask for it # ask for it
my $proxy_pass; my $proxy_pass;
local $| = 1; local $| = 1;
print "password: "; print "password: ";
eval { eval {
local($|) = 1; local($|) = 1;
local(*TTY); local(*TTY);
open(TTY,"/dev/tty") or die "No /dev/tty!"; open(TTY,"/dev/tty") or die "No /dev/tty!";
system ("stty -echo </dev/tty") and die "stty failed!"; system ("stty -echo </dev/tty") and die "stty failed!";
chomp($proxy_pass = <TTY>); chomp($proxy_pass = <TTY>);
print STDERR "\r\n"; print STDERR "\r\n";
system ("stty echo </dev/tty") and die "stty failed!"; system ("stty echo </dev/tty") and die "stty failed!";
close(TTY); close(TTY);
}; };
if ($@) { if ($@) {
$proxy_pass = <>; $proxy_pass = <>;
}
chomp $proxy_pass;
$PARAM{"-ProxyPass"} = $proxy_pass;
} }
chomp $proxy_pass;
$PARAM{"-ProxyPass"} = $proxy_pass;
}
} }
my (@match, $lines, $maxsize); my (@match, $lines, $maxsize);
@@ -246,7 +245,7 @@ if(exists $CACHE{$cache_key} && $conf{use_cache} eq "yes") {
} }
else { else {
my $leo = new WWW::Dict::Leo::Org(%PARAM) or my $leo = new WWW::Dict::Leo::Org(%PARAM) or
die "Could not initialize WWW::Dict::Leo::Org: $!\n"; die "Could not initialize WWW::Dict::Leo::Org: $!\n";
@match = $leo->translate($string); @match = $leo->translate($string);
$lines = $leo->lines(); $lines = $leo->lines();
$maxsize = $leo->maxsize(); $maxsize = $leo->maxsize();
@@ -328,19 +327,36 @@ Translate a term from german to english or vice versa.
-s, --spelltolerance=on|off allow spelling errors -s, --spelltolerance=on|off allow spelling errors
-m, --morphology=none|forcedAll provide morphology information -m, --morphology=none|forcedAll provide morphology information
-c, --chartolerance=fuzzy|exact allow umlaut alternatives -c, --chartolerance=fuzzy|exact allow umlaut alternatives
-l, --language=en|fr|de2(en|fr)|(en|fr)2de translation direction -l, --language=[de2]<countrycode>[2de] translation direction
-n, --noescapechars dont use escapes for highlighting -n, --noescapechars dont use escapes for highlighting
-f, --force don't use the query cache -f, --force don't use the query cache
-u, --user=username user for proxy authentication -u, --user=username user for proxy authentication
-p, --passwd=password cleartext passphrase for proxy authentication -p, --passwd=password cleartext passphrase for proxy authentication
-h, --help display this help and exit -h, --help display this help and exit
-d, --debug enable debugging output -d, --debug enable debugging output
-v, --version output version information and exit -v, --version output version information and exit
<term> is the string you are asking to be translated. It will <term> is the string you are asking to be translated. It will
be requested from STDIN if not specified on the commandline. be requested from STDIN if not specified on the commandline.
Report bugs to <tom\@daemon.de>. Supported <countrycode>s are:
en english
es spanish
ru russian
pt portugese
pl polish
ch chinese
You can specify only the country code, or append B<de2> in order to
force translation to german, or preprend B<de2> in order to translate
to the other language. Valid examples:
ru to or from russian
de2pl to polish
es2de spanish to german
Report bugs to <tlinden\@cpan.org>.
); );
exit 1; exit 1;
@@ -412,11 +428,30 @@ Default: B<relaxed>.
=item I<-l --language> =item I<-l --language>
Translation direction. Translation direction. Please note that dict.leo.org always translates
either to or from german.
Possible values: B<en>, B<fr>, B<de2en>, B<en2de>, B<de2fr> or B<fr2de>. The following languages are supported: english, polish, spanish, portugese
russian and chinese.
B<en> and B<fr> do select the translation direction automatically. You can specify only the country code, or append B<de2> in order to
force translation to german, or preprend B<de2> in order to translate
to the other language.
Valid examples:
ru to or from russian
de2pl to polish
es2de spanish to german
Valid country codes:
en english
es spanish
ru russian
pt portugese
pl polish
ch chinese
Default: B<en>. Default: B<en>.
@@ -521,7 +556,7 @@ The config option has higher precedence.
You may modify the user agent as B<leo> identifies itself You may modify the user agent as B<leo> identifies itself
on the target site. The default is: on the target site. The default is:
User-Agent: Mozilla/5.0 (compatible; Konqueror/3.3.1; X11) User-Agent: Mozilla/5.0 (compatible; Konqueror/3.3.1; X11)
=back =back
@@ -534,11 +569,11 @@ translation option settings.
If you, for example, execute once the following query: If you, for example, execute once the following query:
% leo langnase % leo langnase
and somewhere later: and somewhere later:
% leo -c exact % leo -c exact
then B<leo> will treat the latter query as a different then B<leo> will treat the latter query as a different
one than the previous one, because I<dict.leo.org> one than the previous one, because I<dict.leo.org>
@@ -552,7 +587,7 @@ B<leo> can be used with a HTTP proxy service. For this to
work, you only have to set the environment variable work, you only have to set the environment variable
B<http_proxy>. It has the following format: B<http_proxy>. It has the following format:
PROTO://[USER:PASSWD@]SERVER[:PORT] PROTO://[USER:PASSWD@]SERVER[:PORT]
The only supported protocol is B<http>. If your proxy works without The only supported protocol is B<http>. If your proxy works without
authentication, you can omit the B<user:passwd> part. If no authentication, you can omit the B<user:passwd> part. If no
@@ -560,11 +595,11 @@ port is specified, B<80> will be used.
Here is an example (for bash): Here is an example (for bash):
export http_proxy=http://172.16.120.120:3128 export http_proxy=http://172.16.120.120:3128
and an example with authentication credentials: and an example with authentication credentials:
export http_proxy=http://max:34dwe2@172.16.120.120:3128 export http_proxy=http://max:34dwe2@172.16.120.120:3128
As security is always important, I have to warn you, that As security is always important, I have to warn you, that
other users on the same machine can read your environment other users on the same machine can read your environment
@@ -583,8 +618,8 @@ the password will be read from STDIN.
=head1 FILES =head1 FILES
~/.leo the config file for leo. Not required. ~/.leo the config file for leo. Not required.
~/.leo-CACHE.db* the cache file. ~/.leo-CACHE.db* the cache file.
=head1 AUTHOR =head1 AUTHOR
@@ -632,6 +667,6 @@ browser, what is not prohibited. At least not today.
=head1 VERSION =head1 VERSION
This is the manpage for B<leo> version B<1.39>. This is the manpage for B<leo> version B<1.41>.
=cut =cut