fix lang spec and parsing

This commit is contained in:
TLINDEN
2016-10-08 12:36:25 +02:00
parent 50ac4d65ed
commit a865f75f0e
3 changed files with 217 additions and 179 deletions

View File

@@ -1,3 +1,13 @@
1.41:
Generalized lang parsing and passing to dict.leo.org, which
also adds support for new languages like ru, ch or pl. Thanks
to J.A.Eichler.
1.40:
not logged, sorry.
1.39:
fixed rt.cpan.org#91464: disable caching if DB_File is not found.

227
Org.pm
View File

@@ -7,7 +7,7 @@
# or service marks of their respective holders.
package WWW::Dict::Leo::Org;
$WWW::Dict::Leo::Org::VERSION = "1.40";
$WWW::Dict::Leo::Org::VERSION = "1.41";
use strict;
use warnings;
@@ -63,34 +63,31 @@ sub translate {
my @match = ();
#
# form var transitions for searchLoc
#
my %lang = (
de2en => 1,
en2de => -1,
de2fr => 1,
fr2de => -1,
es2de => -1,
de2es => 1,
es => 0,
en => 0,
fr => 0,
speak => "ende"
);
# form var transitions for searchLoc(=translation direction) and lp(=language)
my %lang = ( speak => "ende" );
my @langs = qw(en es ru pt fr pl ch it);
if ($this->{"-Language"}) {
if ($this->{"-Language"} =~ /fr/) {
# used for francaise translation
$lang{speak} = "frde";
# en | fr | ru2en | de2pl etc
# de2, 2de, de are not part of lang spec
if (! grep { $this->{"-Language"} =~ /$_/ } @langs) {
croak "Unsupported language: " . $this->{"-Language"};
}
elsif ($this->{"-Language"} =~ /es/) {
$lang{speak} = "esde";
my $spec = $this->{"-Language"};
my $l;
if ($spec =~ /(..)2de/) {
$l = $1;
$this->{"-Language"} = -1;
$lang{speak} = "${l}de";
}
if (exists $lang{$this->{"-Language"}}) {
$this->{"-Language"} = $lang{$this->{"-Language"}};
elsif ($spec =~ /de2(..)/) {
$l = $1;
$this->{"-Language"} = 1;
$lang{speak} = "${l}de";
}
else {
croak "Unsupported language: " . $this->{"-Language"};
$lang{speak} = $this->{"-Language"} . 'de';
$this->{"-Language"} = 0;
}
}
@@ -98,11 +95,11 @@ sub translate {
# cut invalid values for parameters or set defaults if unspecified
#
my %form = (
spellToler => { mask => [ qw(standard on off) ], val => $this->{"-SpellTolerance"} || "standard" },
deStem => { mask => [ qw(standard none forcedAll) ], val => $this->{"-Morphology"} || "standard" },
cmpType => { mask => [ qw(fuzzy exact relaxed) ], val => $this->{"-CharTolerance"} || "relaxed" },
searchLoc => { mask => [ qw(-1 0 1) ], val => $this->{"-Language"} || "0" },
);
spellToler => { mask => [ qw(standard on off) ], val => $this->{"-SpellTolerance"} || "standard" },
deStem => { mask => [ qw(standard none forcedAll) ], val => $this->{"-Morphology"} || "standard" },
cmpType => { mask => [ qw(fuzzy exact relaxed) ], val => $this->{"-CharTolerance"} || "relaxed" },
searchLoc => { mask => [ qw(-1 0 1) ], val => $this->{"-Language"} || "0" },
);
my @form;
foreach my $var (keys %form) {
if (grep { $form{$var}->{val} eq $_ } @{$form{$var}->{mask}}) {
@@ -166,10 +163,10 @@ sub translate {
}
my $conn = new IO::Socket::INET(
Proto => "tcp",
PeerAddr => $ip,
PeerPort => $port,
) or die "Unable to connect to $ip:$port: $!\n";
Proto => "tcp",
PeerAddr => $ip,
PeerPort => $port,
) or die "Unable to connect to $ip:$port: $!\n";
$conn->autoflush(1);
$this->debug( "GET $url?$form HTTP/1.0");
@@ -208,41 +205,41 @@ Accept-Language: en_US, en\r\n);
# got HTTP error
my $err = $2;
if ($err == 407) {
croak "proxy auth required or access denied!\n";
croak "proxy auth required or access denied!\n";
}
else {
if ($site =~ /Leider konnten wir zu Ihrem Suchbegriff/ ||
$site =~ /found no matches for your search/
) {
return ();
}
else {
croak "got HTTP error $err!\n";
}
if ($site =~ /Leider konnten wir zu Ihrem Suchbegriff/ ||
$site =~ /found no matches for your search/
) {
return ();
}
else {
croak "got HTTP error $err!\n";
}
}
}
}
my @request = (
{
id => 2,
row => sub { $this->row(@_); },
hdr => sub { $this->hdr(@_); }
},
{
id => 3,
hdr => sub { $this->hdr(@_); },
row => sub { $this->row(@_); }
},
{
id => 4,
hdr => sub { $this->hdr(@_); },
row => sub { $this->row(@_); }
}
);
{
id => 2,
row => sub { $this->row(@_); },
hdr => sub { $this->hdr(@_); }
},
{
id => 3,
hdr => sub { $this->hdr(@_); },
row => sub { $this->row(@_); }
},
{
id => 4,
hdr => sub { $this->hdr(@_); },
row => sub { $this->row(@_); }
}
);
$this->{Linecount} = 0;
my $p = HTML::TableParser->new( \@request,
{ Decode => 1, Trim => 1, Chomp => 1, DecodeNBSP => 1 } );
{ Decode => 1, Trim => 1, Chomp => 1, DecodeNBSP => 1 } );
$site=~s/ /\&nbsp\;/g;
$p->parse($site);
@@ -418,35 +415,31 @@ Possible values: fuzzy, exact, relaxed.
=item I<-Language>
Translation direction. Please note that dict.leo.org always translates
either to or from german. The following values can be used:
either to or from german.
=over
The following languages are supported: english, polish, spanish, portugese
russian and chinese.
=item de
You can specify only the country code, or append B<de2> in order to
force translation to german, or preprend B<de2> in order to translate
to the other language.
Alias for B<de2en> - german to english.
Valid examples:
=item fr
ru to or from russian
de2pl to polish
es2de spanish to german
Alias for B<de2fr> - german to french.
Valid country codes:
=item es
en english
es spanish
ru russian
pt portugese
pl polish
ch chinese
Alias for B<de2es> - german to espaniol.
=item en2de
english to german.
=item fr2de
french to german.
=item es2de
espaniol to german.
=back
Default: B<en>.
=back
@@ -458,48 +451,48 @@ Use this method after initialization to connect to dict.leo.org
and translate the given term. It returns an array of hashes containing
the actual results.
use WWW::Dict::Leo::Org;
use Data::Dumper;
my $leo = new WWW::Dict::Leo::Org();
my @matches = $leo->translate("test");
print Dumper(\@matches);
use WWW::Dict::Leo::Org;
use Data::Dumper;
my $leo = new WWW::Dict::Leo::Org();
my @matches = $leo->translate("test");
print Dumper(\@matches);
which prints:
$VAR1 = [
{
'data' => [
{
'left' => 'check',
'right' => 'der Test'
},
{
'left' => 'quiz (Amer.)',
'right' => 'der Test <20><> [Schule]'
],
'title' => 'Unmittelbare Treffer'
},
{
'data' => [
{
'left' => 'check',
'right' => 'der Test'
},
{
'left' => 'quiz (Amer.)',
'right' => 'der Test <20><> [Schule]'
],
'title' => 'Unmittelbare Treffer'
'data' => [
{
'left' => 'to fail a test',
'right' => 'einen Test nicht bestehen'
},
{
'left' => 'to test',
'right' => 'Tests macheneinen Test machen'
}
],
'title' => 'Verben und Verbzusammensetzungen'
},
{
'data' => [
{
'left' => 'to fail a test',
'right' => 'einen Test nicht bestehen'
},
{
'left' => 'to test',
'right' => 'Tests macheneinen Test machen'
}
],
'title' => 'Verben und Verbzusammensetzungen'
},
'data' => [
{
'left' => 'testing <20>adj.',
'right' => 'im Test'
}
],
'title' => 'Wendungen und Ausdr<64>cke'
}
'data' => [
{
'left' => 'testing <20>adj.',
'right' => 'im Test'
}
],
'title' => 'Wendungen und Ausdr<64>cke'
}
];
@@ -541,6 +534,6 @@ Please don't forget to add debugging output!
=head1 VERSION
1.40
1.41
=cut

159
leo
View File

@@ -8,8 +8,6 @@
# the term to be translated. It will then return the results in
# an unformatted form.
#
# $Id: leo,v 1.33 2008/04/22 22:23:39 scip Exp $
#
# Copyleft (l) 2000-2016 by Thomas v.D. <tlinden@cpan.org>. leo may be
# used and distributed under the terms of the GNU General Public License.
# All other brand and product names are trademarks, registered trademarks
@@ -37,7 +35,7 @@ my $default_c = "\033[0m"; # reset default terminal color
my $bold_c = "\033[0;34m"; # blue color
my $copy_c = "\033[0;35m"; # copyright message color (green)
my $version = "1.39";
my $version = "1.41";
my $config = $ENV{HOME} . "/.leo";
my $cache = $ENV{HOME} . "/.leo-CACHE.db";
@@ -45,10 +43,10 @@ my $debugging = 0;
#defaults for config
my %conf = (
use_cache => "no",
use_color => "yes",
use_latin => "yes"
);
use_cache => "no",
use_color => "yes",
use_latin => "yes"
);
my %validopts = qw(use_cache 0 use_color 0 user_agent 0 use_latin 0);
my %line = %validopts;
@@ -68,17 +66,17 @@ isatty(1) && eval q{ use open OUT => ':locale'};
#
Getopt::Long::Configure( qw(no_ignore_case));
if (! GetOptions (
"spelltolerance|s=s" => \$o_s,
"morphology|m=s" => \$o_m,
"chartolerance|c=s" => \$o_c,
"language|l=s" => \$o_l,
"force|f" => \$o_f,
"version|v" => \$o_v,
"help|h" => \$o_h,
"debug|d" => \$o_d,
"noescapechars|n" => \$o_n,
"user|u=s" => \$o_u,
"passwd|p=s" => \$o_p
"spelltolerance|s=s" => \$o_s,
"morphology|m=s" => \$o_m,
"chartolerance|c=s" => \$o_c,
"language|l=s" => \$o_l,
"force|f" => \$o_f,
"version|v" => \$o_v,
"help|h" => \$o_h,
"debug|d" => \$o_d,
"noescapechars|n" => \$o_n,
"user|u=s" => \$o_u,
"passwd|p=s" => \$o_p
) ) {
&usage;
}
@@ -137,7 +135,8 @@ if (-e $config) {
foreach my $opt (keys %conf) {
if (!exists $validopts{$opt}) {
print "<$opt>\n";
print STDERR "Error in config $config line: " . $line{$opt} . ". Unsupported option \"$opt\"!\n";
print STDERR "Error in config $config line: " .
$line{$opt} . ". Unsupported option \"$opt\"!\n";
exit;
}
}
@@ -205,28 +204,28 @@ if($o_c) {
}
if (exists $ENV{http_proxy} and $o_u) {
# authenticate
if (! $o_p) {
# ask for it
my $proxy_pass;
local $| = 1;
print "password: ";
eval {
local($|) = 1;
local(*TTY);
open(TTY,"/dev/tty") or die "No /dev/tty!";
system ("stty -echo </dev/tty") and die "stty failed!";
chomp($proxy_pass = <TTY>);
print STDERR "\r\n";
system ("stty echo </dev/tty") and die "stty failed!";
close(TTY);
};
if ($@) {
$proxy_pass = <>;
}
chomp $proxy_pass;
$PARAM{"-ProxyPass"} = $proxy_pass;
# authenticate
if (! $o_p) {
# ask for it
my $proxy_pass;
local $| = 1;
print "password: ";
eval {
local($|) = 1;
local(*TTY);
open(TTY,"/dev/tty") or die "No /dev/tty!";
system ("stty -echo </dev/tty") and die "stty failed!";
chomp($proxy_pass = <TTY>);
print STDERR "\r\n";
system ("stty echo </dev/tty") and die "stty failed!";
close(TTY);
};
if ($@) {
$proxy_pass = <>;
}
chomp $proxy_pass;
$PARAM{"-ProxyPass"} = $proxy_pass;
}
}
my (@match, $lines, $maxsize);
@@ -246,7 +245,7 @@ if(exists $CACHE{$cache_key} && $conf{use_cache} eq "yes") {
}
else {
my $leo = new WWW::Dict::Leo::Org(%PARAM) or
die "Could not initialize WWW::Dict::Leo::Org: $!\n";
die "Could not initialize WWW::Dict::Leo::Org: $!\n";
@match = $leo->translate($string);
$lines = $leo->lines();
$maxsize = $leo->maxsize();
@@ -328,19 +327,36 @@ Translate a term from german to english or vice versa.
-s, --spelltolerance=on|off allow spelling errors
-m, --morphology=none|forcedAll provide morphology information
-c, --chartolerance=fuzzy|exact allow umlaut alternatives
-l, --language=en|fr|de2(en|fr)|(en|fr)2de translation direction
-n, --noescapechars dont use escapes for highlighting
-f, --force don't use the query cache
-u, --user=username user for proxy authentication
-p, --passwd=password cleartext passphrase for proxy authentication
-h, --help display this help and exit
-d, --debug enable debugging output
-v, --version output version information and exit
-l, --language=[de2]<countrycode>[2de] translation direction
-n, --noescapechars dont use escapes for highlighting
-f, --force don't use the query cache
-u, --user=username user for proxy authentication
-p, --passwd=password cleartext passphrase for proxy authentication
-h, --help display this help and exit
-d, --debug enable debugging output
-v, --version output version information and exit
<term> is the string you are asking to be translated. It will
be requested from STDIN if not specified on the commandline.
Report bugs to <tom\@daemon.de>.
Supported <countrycode>s are:
en english
es spanish
ru russian
pt portugese
pl polish
ch chinese
You can specify only the country code, or append B<de2> in order to
force translation to german, or preprend B<de2> in order to translate
to the other language. Valid examples:
ru to or from russian
de2pl to polish
es2de spanish to german
Report bugs to <tlinden\@cpan.org>.
);
exit 1;
@@ -412,11 +428,30 @@ Default: B<relaxed>.
=item I<-l --language>
Translation direction.
Translation direction. Please note that dict.leo.org always translates
either to or from german.
Possible values: B<en>, B<fr>, B<de2en>, B<en2de>, B<de2fr> or B<fr2de>.
The following languages are supported: english, polish, spanish, portugese
russian and chinese.
B<en> and B<fr> do select the translation direction automatically.
You can specify only the country code, or append B<de2> in order to
force translation to german, or preprend B<de2> in order to translate
to the other language.
Valid examples:
ru to or from russian
de2pl to polish
es2de spanish to german
Valid country codes:
en english
es spanish
ru russian
pt portugese
pl polish
ch chinese
Default: B<en>.
@@ -521,7 +556,7 @@ The config option has higher precedence.
You may modify the user agent as B<leo> identifies itself
on the target site. The default is:
User-Agent: Mozilla/5.0 (compatible; Konqueror/3.3.1; X11)
User-Agent: Mozilla/5.0 (compatible; Konqueror/3.3.1; X11)
=back
@@ -534,11 +569,11 @@ translation option settings.
If you, for example, execute once the following query:
% leo langnase
% leo langnase
and somewhere later:
% leo -c exact
% leo -c exact
then B<leo> will treat the latter query as a different
one than the previous one, because I<dict.leo.org>
@@ -552,7 +587,7 @@ B<leo> can be used with a HTTP proxy service. For this to
work, you only have to set the environment variable
B<http_proxy>. It has the following format:
PROTO://[USER:PASSWD@]SERVER[:PORT]
PROTO://[USER:PASSWD@]SERVER[:PORT]
The only supported protocol is B<http>. If your proxy works without
authentication, you can omit the B<user:passwd> part. If no
@@ -560,11 +595,11 @@ port is specified, B<80> will be used.
Here is an example (for bash):
export http_proxy=http://172.16.120.120:3128
export http_proxy=http://172.16.120.120:3128
and an example with authentication credentials:
export http_proxy=http://max:34dwe2@172.16.120.120:3128
export http_proxy=http://max:34dwe2@172.16.120.120:3128
As security is always important, I have to warn you, that
other users on the same machine can read your environment
@@ -583,8 +618,8 @@ the password will be read from STDIN.
=head1 FILES
~/.leo the config file for leo. Not required.
~/.leo-CACHE.db* the cache file.
~/.leo the config file for leo. Not required.
~/.leo-CACHE.db* the cache file.
=head1 AUTHOR
@@ -632,6 +667,6 @@ browser, what is not prohibited. At least not today.
=head1 VERSION
This is the manpage for B<leo> version B<1.39>.
This is the manpage for B<leo> version B<1.41>.
=cut