2014-02-06 09:23:53 +01:00
|
|
|
|
#
|
2016-01-27 00:09:16 +01:00
|
|
|
|
# Copyleft (l) 2000-2016 Thomas v.D. <tlinden@cpan.org>.
|
|
|
|
|
|
#
|
2016-01-26 23:53:42 +01:00
|
|
|
|
# leo may be
|
2014-02-06 09:23:53 +01:00
|
|
|
|
# used and distributed under the terms of the GNU General Public License.
|
|
|
|
|
|
# All other brand and product names are trademarks, registered trademarks
|
|
|
|
|
|
# or service marks of their respective holders.
|
|
|
|
|
|
|
|
|
|
|
|
package WWW::Dict::Leo::Org;
|
2016-10-24 11:31:38 +02:00
|
|
|
|
$WWW::Dict::Leo::Org::VERSION = "1.45";
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
|
|
use warnings;
|
|
|
|
|
|
use English '-no_match_vars';
|
|
|
|
|
|
use Carp::Heavy;
|
|
|
|
|
|
use Carp;
|
|
|
|
|
|
use IO::Socket;
|
|
|
|
|
|
use MIME::Base64;
|
implement new XML API, using XML::Simple
Unfortunately, we cannot yet parse the additional hints (plural forms,
cases, parts of speech, etc., everything inside <repr> tags) because the
XML::Simple API does not retain the correct ordering of CDATA content
mixed with subtags... :-/
This also removes the options -SpellTolerance, -Morphology, and
-CharTolerance, which are no longer supported by the new API (as long as
I can see.)
Note that although the XML contains UTF-8 data, XML::Simple decodes
it to latin1, so we have to re-encode it to get good results.
2017-03-11 06:10:24 +01:00
|
|
|
|
use XML::Simple;
|
|
|
|
|
|
use Encode;
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
|
|
|
|
|
sub debug;
|
|
|
|
|
|
|
|
|
|
|
|
sub new {
|
|
|
|
|
|
my ($class, %param) = @_;
|
|
|
|
|
|
my $type = ref( $class ) || $class;
|
|
|
|
|
|
|
2016-10-09 11:17:14 +02:00
|
|
|
|
my %settings = (
|
implement new XML API, using XML::Simple
Unfortunately, we cannot yet parse the additional hints (plural forms,
cases, parts of speech, etc., everything inside <repr> tags) because the
XML::Simple API does not retain the correct ordering of CDATA content
mixed with subtags... :-/
This also removes the options -SpellTolerance, -Morphology, and
-CharTolerance, which are no longer supported by the new API (as long as
I can see.)
Note that although the XML contains UTF-8 data, XML::Simple decodes
it to latin1, so we have to re-encode it to get good results.
2017-03-11 06:10:24 +01:00
|
|
|
|
"-Host" => "dict.leo.org",
|
2016-10-24 11:31:38 +02:00
|
|
|
|
"-Port" => 80,
|
|
|
|
|
|
"-UserAgent" => "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0",
|
|
|
|
|
|
"-Proxy" => "",
|
|
|
|
|
|
"-ProxyUser" => "",
|
|
|
|
|
|
"-ProxyPass" => "",
|
|
|
|
|
|
"-Debug" => 0,
|
|
|
|
|
|
"-SpellTolerance" => "standard", # on, off
|
|
|
|
|
|
"-Morphology" => "standard", # none, forcedAll
|
|
|
|
|
|
"-CharTolerance" => "relaxed", # fuzzy, exact
|
|
|
|
|
|
"-Language" => "en", # en2de, de2fr, fr2de, de2es, es2de
|
|
|
|
|
|
"data" => {}, # the results
|
|
|
|
|
|
"section" => [],
|
|
|
|
|
|
"title" => "",
|
|
|
|
|
|
"segments" => [],
|
|
|
|
|
|
"Maxsize" => 0,
|
|
|
|
|
|
"Linecount" => 0,
|
|
|
|
|
|
);
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
|
|
|
|
|
foreach my $key (keys %param) {
|
|
|
|
|
|
$settings{$key} = $param{$key}; # override defaults
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
my $self = \%settings;
|
|
|
|
|
|
bless $self, $type;
|
|
|
|
|
|
|
|
|
|
|
|
return $self;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sub translate {
|
|
|
|
|
|
my($this, $term) = @_;
|
|
|
|
|
|
|
2016-10-09 11:17:14 +02:00
|
|
|
|
if (! $term) {
|
|
|
|
|
|
croak "No term to translate given!";
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-02-06 09:23:53 +01:00
|
|
|
|
my $linecount = 0;
|
|
|
|
|
|
my $maxsize = 0;
|
|
|
|
|
|
my @match = ();
|
|
|
|
|
|
|
|
|
|
|
|
#
|
2016-10-08 12:36:25 +02:00
|
|
|
|
# form var transitions for searchLoc(=translation direction) and lp(=language)
|
|
|
|
|
|
my %lang = ( speak => "ende" );
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
2016-10-08 12:36:25 +02:00
|
|
|
|
my @langs = qw(en es ru pt fr pl ch it);
|
2014-02-06 09:23:53 +01:00
|
|
|
|
if ($this->{"-Language"}) {
|
2016-10-08 12:36:25 +02:00
|
|
|
|
# en | fr | ru2en | de2pl etc
|
|
|
|
|
|
# de2, 2de, de are not part of lang spec
|
|
|
|
|
|
if (! grep { $this->{"-Language"} =~ /$_/ } @langs) {
|
|
|
|
|
|
croak "Unsupported language: " . $this->{"-Language"};
|
2014-02-06 09:23:53 +01:00
|
|
|
|
}
|
2016-10-08 12:36:25 +02:00
|
|
|
|
my $spec = $this->{"-Language"};
|
|
|
|
|
|
my $l;
|
|
|
|
|
|
if ($spec =~ /(..)2de/) {
|
|
|
|
|
|
$l = $1;
|
|
|
|
|
|
$this->{"-Language"} = -1;
|
|
|
|
|
|
$lang{speak} = "${l}de";
|
2014-02-06 09:23:53 +01:00
|
|
|
|
}
|
2016-10-08 12:36:25 +02:00
|
|
|
|
elsif ($spec =~ /de2(..)/) {
|
|
|
|
|
|
$l = $1;
|
|
|
|
|
|
$this->{"-Language"} = 1;
|
|
|
|
|
|
$lang{speak} = "${l}de";
|
2014-02-06 09:23:53 +01:00
|
|
|
|
}
|
|
|
|
|
|
else {
|
2016-10-08 12:36:25 +02:00
|
|
|
|
$lang{speak} = $this->{"-Language"} . 'de';
|
|
|
|
|
|
$this->{"-Language"} = 0;
|
2014-02-06 09:23:53 +01:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# add language
|
implement new XML API, using XML::Simple
Unfortunately, we cannot yet parse the additional hints (plural forms,
cases, parts of speech, etc., everything inside <repr> tags) because the
XML::Simple API does not retain the correct ordering of CDATA content
mixed with subtags... :-/
This also removes the options -SpellTolerance, -Morphology, and
-CharTolerance, which are no longer supported by the new API (as long as
I can see.)
Note that although the XML contains UTF-8 data, XML::Simple decodes
it to latin1, so we have to re-encode it to get good results.
2017-03-11 06:10:24 +01:00
|
|
|
|
my @form;
|
2014-02-06 09:23:53 +01:00
|
|
|
|
push @form, "lp=$lang{speak}";
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
# process whitespaces
|
|
|
|
|
|
#
|
|
|
|
|
|
my $query = $term;
|
|
|
|
|
|
$query =~ s/\s\s*/ /g;
|
|
|
|
|
|
$query =~ s/\s/\+/g;
|
|
|
|
|
|
push @form, "search=$query";
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
# make the query cgi'ish
|
|
|
|
|
|
#
|
|
|
|
|
|
my $form = join "&", @form;
|
|
|
|
|
|
|
|
|
|
|
|
# store for result caching
|
|
|
|
|
|
$this->{Form} = $form;
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
# check for proxy settings and use it if exists
|
|
|
|
|
|
# otherwise use direct connection
|
|
|
|
|
|
#
|
|
|
|
|
|
my ($url, $site);
|
|
|
|
|
|
my $ip = $this->{"-Host"};
|
|
|
|
|
|
my $port = $this->{"-Port"};
|
|
|
|
|
|
my $proxy_user = $this->{"-ProxyUser"};
|
|
|
|
|
|
my $proxy_pass = $this->{"-ProxyPass"};
|
|
|
|
|
|
|
|
|
|
|
|
if ($this->{"-Proxy"}) {
|
|
|
|
|
|
my $proxy = $this->{"-Proxy"};
|
|
|
|
|
|
$proxy =~ s/^http:\/\///i;
|
|
|
|
|
|
if ($proxy =~ /^(.+):(.+)\@(.*)$/) {
|
|
|
|
|
|
# proxy user account
|
|
|
|
|
|
$proxy_user = $1;
|
|
|
|
|
|
$proxy_pass = $2;
|
|
|
|
|
|
$proxy = $3;
|
|
|
|
|
|
$this->debug( "proxy_user: $proxy_user");
|
|
|
|
|
|
}
|
|
|
|
|
|
my($host, $pport) = split /:/, $proxy;
|
|
|
|
|
|
if ($pport) {
|
implement new XML API, using XML::Simple
Unfortunately, we cannot yet parse the additional hints (plural forms,
cases, parts of speech, etc., everything inside <repr> tags) because the
XML::Simple API does not retain the correct ordering of CDATA content
mixed with subtags... :-/
This also removes the options -SpellTolerance, -Morphology, and
-CharTolerance, which are no longer supported by the new API (as long as
I can see.)
Note that although the XML contains UTF-8 data, XML::Simple decodes
it to latin1, so we have to re-encode it to get good results.
2017-03-11 06:10:24 +01:00
|
|
|
|
$url = "http://$ip:$port/dictQuery/m-vocab/$lang{speak}/query.xml";
|
2014-02-06 09:23:53 +01:00
|
|
|
|
$port = $pport;
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
$port = 80;
|
|
|
|
|
|
}
|
|
|
|
|
|
$ip = $host;
|
|
|
|
|
|
$this->debug( "connecting to proxy:", $ip, $port);
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
$this->debug( "connecting to site:", $ip, "port", $port);
|
implement new XML API, using XML::Simple
Unfortunately, we cannot yet parse the additional hints (plural forms,
cases, parts of speech, etc., everything inside <repr> tags) because the
XML::Simple API does not retain the correct ordering of CDATA content
mixed with subtags... :-/
This also removes the options -SpellTolerance, -Morphology, and
-CharTolerance, which are no longer supported by the new API (as long as
I can see.)
Note that although the XML contains UTF-8 data, XML::Simple decodes
it to latin1, so we have to re-encode it to get good results.
2017-03-11 06:10:24 +01:00
|
|
|
|
$url = "/dictQuery/m-vocab/$lang{speak}/query.xml";
|
2014-02-06 09:23:53 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
my $conn = new IO::Socket::INET(
|
2016-10-08 12:36:25 +02:00
|
|
|
|
Proto => "tcp",
|
|
|
|
|
|
PeerAddr => $ip,
|
|
|
|
|
|
PeerPort => $port,
|
|
|
|
|
|
) or die "Unable to connect to $ip:$port: $!\n";
|
2014-02-06 09:23:53 +01:00
|
|
|
|
$conn->autoflush(1);
|
|
|
|
|
|
|
|
|
|
|
|
$this->debug( "GET $url?$form HTTP/1.0");
|
|
|
|
|
|
print $conn "GET $url?$form HTTP/1.0\r\n";
|
|
|
|
|
|
|
|
|
|
|
|
# be nice, simulate Konqueror.
|
|
|
|
|
|
print $conn
|
|
|
|
|
|
qq($this->{"-UserAgent"}
|
|
|
|
|
|
Host: $this->{"-Host"}:$this->{"-Port"}
|
|
|
|
|
|
Accept: text/*;q=1.0, image/png;q=1.0, image/jpeg;q=1.0, image/gif;q=1.0, image/*;q=0.8, */*;q=0.5
|
|
|
|
|
|
Accept-Charset: iso-8859-1;q=1.0, *;q=0.9, utf-8;q=0.8
|
|
|
|
|
|
Accept-Language: en_US, en\r\n);
|
|
|
|
|
|
|
|
|
|
|
|
if ($this->{"-Proxy"} and $proxy_user) {
|
|
|
|
|
|
# authenticate
|
|
|
|
|
|
# construct the auth header
|
|
|
|
|
|
my $coded = encode_base64("$proxy_user:$proxy_pass");
|
|
|
|
|
|
$this->debug( "Proxy-Authorization: Basic $coded");
|
|
|
|
|
|
print $conn "Proxy-Authorization: Basic $coded\r\n";
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# finish the request
|
|
|
|
|
|
print $conn "\r\n";
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
# parse dict.leo.org output
|
|
|
|
|
|
#
|
2017-03-11 05:45:44 +01:00
|
|
|
|
$site = "";
|
|
|
|
|
|
my $got_headers = 0;
|
|
|
|
|
|
while (<$conn>) {
|
|
|
|
|
|
if ($got_headers) {
|
|
|
|
|
|
$site .= $_;
|
|
|
|
|
|
}
|
|
|
|
|
|
elsif (/^\r?$/) {
|
|
|
|
|
|
$got_headers = 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
elsif ($_ !~ /HTTP\/1\.(0|1) 200 OK/i) {
|
|
|
|
|
|
if (/HTTP\/1\.(0|1) (\d+) /i) {
|
|
|
|
|
|
# got HTTP error
|
|
|
|
|
|
my $err = $2;
|
|
|
|
|
|
if ($err == 407) {
|
|
|
|
|
|
croak "proxy auth required or access denied!\n";
|
|
|
|
|
|
close $conn;
|
2016-10-08 12:36:25 +02:00
|
|
|
|
return ();
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
croak "got HTTP error $err!\n";
|
2017-03-11 05:45:44 +01:00
|
|
|
|
close $conn;
|
|
|
|
|
|
return ();
|
2016-10-08 12:36:25 +02:00
|
|
|
|
}
|
2014-02-06 09:23:53 +01:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2017-03-11 05:45:44 +01:00
|
|
|
|
close $conn or die "Connection failed: $!\n";
|
|
|
|
|
|
$this->debug( "connection: done");
|
|
|
|
|
|
|
2016-01-26 23:53:42 +01:00
|
|
|
|
$this->{Linecount} = 0;
|
implement new XML API, using XML::Simple
Unfortunately, we cannot yet parse the additional hints (plural forms,
cases, parts of speech, etc., everything inside <repr> tags) because the
XML::Simple API does not retain the correct ordering of CDATA content
mixed with subtags... :-/
This also removes the options -SpellTolerance, -Morphology, and
-CharTolerance, which are no longer supported by the new API (as long as
I can see.)
Note that although the XML contains UTF-8 data, XML::Simple decodes
it to latin1, so we have to re-encode it to get good results.
2017-03-11 06:10:24 +01:00
|
|
|
|
$this->{Maxsize} = 0;
|
|
|
|
|
|
|
|
|
|
|
|
# parse the XML
|
|
|
|
|
|
my $xml = new XML::Simple;
|
|
|
|
|
|
my $data = $xml->XMLin($site,
|
|
|
|
|
|
ForceArray => [ 'section', 'entry' ],
|
|
|
|
|
|
ForceContent => 1,
|
|
|
|
|
|
KeyAttr => { side => 'lang' }
|
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
my (@matches, $from_lang, $to_lang);
|
|
|
|
|
|
$from_lang = substr $lang{speak}, 0, 2;
|
|
|
|
|
|
$to_lang = substr $lang{speak}, 2, 2;
|
|
|
|
|
|
|
|
|
|
|
|
# parse all the <word>s and build a string
|
|
|
|
|
|
sub parse_word($) {
|
|
|
|
|
|
my $word = shift;
|
|
|
|
|
|
if (ref $word eq "HASH") {
|
|
|
|
|
|
if ($word->{content}) {
|
|
|
|
|
|
return encode('UTF-8', $word->{content});
|
|
|
|
|
|
}
|
|
|
|
|
|
elsif ($word->{cc}) {
|
|
|
|
|
|
# chinese simplified, traditional and pinyin
|
|
|
|
|
|
return encode('UTF-8', $word->{cc}->{cs}->{content} . "[" .
|
|
|
|
|
|
$word->{cc}->{ct}->{content} . "] " .
|
|
|
|
|
|
$word->{cc}->{pa}->{content});
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
elsif (ref $word eq "ARRAY") {
|
|
|
|
|
|
return encode('UTF-8', @{$word}[-1]->{content});
|
|
|
|
|
|
}
|
|
|
|
|
|
else {
|
|
|
|
|
|
return encode('UTF-8', $word);
|
|
|
|
|
|
}
|
2014-02-06 09:23:53 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
implement new XML API, using XML::Simple
Unfortunately, we cannot yet parse the additional hints (plural forms,
cases, parts of speech, etc., everything inside <repr> tags) because the
XML::Simple API does not retain the correct ordering of CDATA content
mixed with subtags... :-/
This also removes the options -SpellTolerance, -Morphology, and
-CharTolerance, which are no longer supported by the new API (as long as
I can see.)
Note that although the XML contains UTF-8 data, XML::Simple decodes
it to latin1, so we have to re-encode it to get good results.
2017-03-11 06:10:24 +01:00
|
|
|
|
foreach my $section (@{$data->{sectionlist}->{section}}) {
|
|
|
|
|
|
my @entries;
|
|
|
|
|
|
foreach my $entry (@{$section->{entry}}) {
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
implement new XML API, using XML::Simple
Unfortunately, we cannot yet parse the additional hints (plural forms,
cases, parts of speech, etc., everything inside <repr> tags) because the
XML::Simple API does not retain the correct ordering of CDATA content
mixed with subtags... :-/
This also removes the options -SpellTolerance, -Morphology, and
-CharTolerance, which are no longer supported by the new API (as long as
I can see.)
Note that although the XML contains UTF-8 data, XML::Simple decodes
it to latin1, so we have to re-encode it to get good results.
2017-03-11 06:10:24 +01:00
|
|
|
|
my $left = parse_word $entry->{side}->{$from_lang}->{words}->{word};
|
|
|
|
|
|
my $right = parse_word $entry->{side}->{$to_lang}->{words}->{word};
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
implement new XML API, using XML::Simple
Unfortunately, we cannot yet parse the additional hints (plural forms,
cases, parts of speech, etc., everything inside <repr> tags) because the
XML::Simple API does not retain the correct ordering of CDATA content
mixed with subtags... :-/
This also removes the options -SpellTolerance, -Morphology, and
-CharTolerance, which are no longer supported by the new API (as long as
I can see.)
Note that although the XML contains UTF-8 data, XML::Simple decodes
it to latin1, so we have to re-encode it to get good results.
2017-03-11 06:10:24 +01:00
|
|
|
|
push @entries, { left => $left, right => $right };
|
|
|
|
|
|
if ($this->{Maxsize} < length($left)) {
|
|
|
|
|
|
$this->{Maxsize} = length($left);
|
|
|
|
|
|
}
|
|
|
|
|
|
$this->{Linecount}++;
|
2014-02-06 09:23:53 +01:00
|
|
|
|
}
|
implement new XML API, using XML::Simple
Unfortunately, we cannot yet parse the additional hints (plural forms,
cases, parts of speech, etc., everything inside <repr> tags) because the
XML::Simple API does not retain the correct ordering of CDATA content
mixed with subtags... :-/
This also removes the options -SpellTolerance, -Morphology, and
-CharTolerance, which are no longer supported by the new API (as long as
I can see.)
Note that although the XML contains UTF-8 data, XML::Simple decodes
it to latin1, so we have to re-encode it to get good results.
2017-03-11 06:10:24 +01:00
|
|
|
|
push @matches, {
|
|
|
|
|
|
title => encode('UTF-8', $section->{sctTitle}),
|
|
|
|
|
|
data => \@entries
|
|
|
|
|
|
};
|
2014-02-06 09:23:53 +01:00
|
|
|
|
}
|
2016-01-26 23:53:42 +01:00
|
|
|
|
|
implement new XML API, using XML::Simple
Unfortunately, we cannot yet parse the additional hints (plural forms,
cases, parts of speech, etc., everything inside <repr> tags) because the
XML::Simple API does not retain the correct ordering of CDATA content
mixed with subtags... :-/
This also removes the options -SpellTolerance, -Morphology, and
-CharTolerance, which are no longer supported by the new API (as long as
I can see.)
Note that although the XML contains UTF-8 data, XML::Simple decodes
it to latin1, so we have to re-encode it to get good results.
2017-03-11 06:10:24 +01:00
|
|
|
|
return @matches;
|
2014-02-06 09:23:53 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
2016-10-08 21:09:16 +02:00
|
|
|
|
sub grapheme_length {
|
|
|
|
|
|
my($this, $str) = @_;
|
|
|
|
|
|
my $count = 0;
|
|
|
|
|
|
while ($str =~ /\X/g) { $count++ };
|
|
|
|
|
|
return $count;
|
|
|
|
|
|
}
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
|
|
|
|
|
sub maxsize {
|
|
|
|
|
|
my($this) = @_;
|
|
|
|
|
|
return $this->{Maxsize};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sub lines {
|
|
|
|
|
|
my($this) = @_;
|
|
|
|
|
|
return $this->{Linecount};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sub form {
|
|
|
|
|
|
my($this) = @_;
|
|
|
|
|
|
return $this->{Form};
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sub debug {
|
|
|
|
|
|
my($this, $msg) = @_;
|
|
|
|
|
|
if ($this->{"-Debug"}) {
|
|
|
|
|
|
print STDERR "%DEBUG: $msg\n";
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1;
|
|
|
|
|
|
|
2016-01-26 23:53:42 +01:00
|
|
|
|
=encoding ISO8859-1
|
|
|
|
|
|
|
2014-02-06 09:23:53 +01:00
|
|
|
|
=head1 NAME
|
|
|
|
|
|
|
|
|
|
|
|
WWW::Dict::Leo::Org - Interface module to dictionary dict.leo.org
|
|
|
|
|
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
|
|
|
|
|
|
|
|
use WWW::Dict::Leo::Org;
|
|
|
|
|
|
my $leo = new WWW::Dict::Leo::Org();
|
|
|
|
|
|
my @matches = $leo->translate($term);
|
|
|
|
|
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
|
|
|
|
|
|
|
|
B<WWW::Dict::Leo::Org> is a module which connects to the website
|
|
|
|
|
|
B<dict.leo.org> and translates the given term. It returns an array
|
|
|
|
|
|
of hashes. Each hash contains a left side and a right side of the
|
|
|
|
|
|
result entry.
|
|
|
|
|
|
|
|
|
|
|
|
=head1 OPTIONS
|
|
|
|
|
|
|
|
|
|
|
|
B<new()> has several parameters, which can be supplied as a hash.
|
|
|
|
|
|
|
|
|
|
|
|
All parameters are optional.
|
|
|
|
|
|
|
|
|
|
|
|
=over
|
|
|
|
|
|
|
|
|
|
|
|
=item I<-Host>
|
|
|
|
|
|
|
|
|
|
|
|
The hostname of the dict website to use. For the moment only dict.leo.org
|
|
|
|
|
|
is supported, which is also the default - therefore changing the
|
|
|
|
|
|
hostname would not make much sense.
|
|
|
|
|
|
|
|
|
|
|
|
=item I<-Port>
|
|
|
|
|
|
|
|
|
|
|
|
The tcp port to use for connecting, the default is 80, you shouldn't
|
|
|
|
|
|
change it.
|
|
|
|
|
|
|
|
|
|
|
|
=item I<-UserAgent>
|
|
|
|
|
|
|
|
|
|
|
|
The user-agent to send to dict.leo.org site. Currently this is the
|
|
|
|
|
|
default:
|
|
|
|
|
|
|
|
|
|
|
|
Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9
|
|
|
|
|
|
|
|
|
|
|
|
=item I<-Proxy>
|
|
|
|
|
|
|
|
|
|
|
|
Fully qualified proxy server. Specify as you would do in the well
|
|
|
|
|
|
known environment variable B<http_proxy>, example:
|
|
|
|
|
|
|
|
|
|
|
|
-Proxy => "http://192.168.1.1:3128"
|
|
|
|
|
|
|
|
|
|
|
|
=item I<-ProxyUser> I<-ProxyPass>
|
|
|
|
|
|
|
|
|
|
|
|
If your proxy requires authentication, use these parameters
|
|
|
|
|
|
to specify the credentials.
|
|
|
|
|
|
|
|
|
|
|
|
=item I<-Debug>
|
|
|
|
|
|
|
|
|
|
|
|
If enabled (set to 1), prints a lot of debug information to
|
|
|
|
|
|
stderr, normally only required for developers or to
|
|
|
|
|
|
report bugs (see below).
|
|
|
|
|
|
|
|
|
|
|
|
=back
|
|
|
|
|
|
|
|
|
|
|
|
Parameters to control behavior of dict.leo.org:
|
|
|
|
|
|
|
|
|
|
|
|
=over
|
|
|
|
|
|
|
|
|
|
|
|
=item I<-Language>
|
|
|
|
|
|
|
|
|
|
|
|
Translation direction. Please note that dict.leo.org always translates
|
2016-10-08 12:36:25 +02:00
|
|
|
|
either to or from german.
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
2016-10-24 11:31:38 +02:00
|
|
|
|
The following languages are supported: english, polish, spanish, portuguese
|
2016-10-08 12:36:25 +02:00
|
|
|
|
russian and chinese.
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
2016-10-08 12:36:25 +02:00
|
|
|
|
You can specify only the country code, or append B<de2> in order to
|
|
|
|
|
|
force translation to german, or preprend B<de2> in order to translate
|
|
|
|
|
|
to the other language.
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
2016-10-08 12:36:25 +02:00
|
|
|
|
Valid examples:
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
2016-10-08 12:36:25 +02:00
|
|
|
|
ru to or from russian
|
|
|
|
|
|
de2pl to polish
|
|
|
|
|
|
es2de spanish to german
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
2016-10-08 12:36:25 +02:00
|
|
|
|
Valid country codes:
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
2016-10-08 12:36:25 +02:00
|
|
|
|
en english
|
|
|
|
|
|
es spanish
|
|
|
|
|
|
ru russian
|
2016-10-24 11:31:38 +02:00
|
|
|
|
pt portuguese
|
2016-10-08 12:36:25 +02:00
|
|
|
|
pl polish
|
|
|
|
|
|
ch chinese
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
2016-10-08 12:36:25 +02:00
|
|
|
|
Default: B<en>.
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
|
|
|
|
|
=back
|
|
|
|
|
|
|
|
|
|
|
|
=head1 METHODS
|
|
|
|
|
|
|
|
|
|
|
|
=head2 translate($term)
|
|
|
|
|
|
|
|
|
|
|
|
Use this method after initialization to connect to dict.leo.org
|
|
|
|
|
|
and translate the given term. It returns an array of hashes containing
|
|
|
|
|
|
the actual results.
|
|
|
|
|
|
|
2016-10-09 11:17:14 +02:00
|
|
|
|
use WWW::Dict::Leo::Org;
|
|
|
|
|
|
use Data::Dumper;
|
|
|
|
|
|
my $leo = new WWW::Dict::Leo::Org();
|
|
|
|
|
|
my @matches = $leo->translate("test");
|
|
|
|
|
|
print Dumper(\@matches);
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
|
|
|
|
|
which prints:
|
|
|
|
|
|
|
2016-10-09 11:17:14 +02:00
|
|
|
|
$VAR1 = [
|
|
|
|
|
|
{
|
|
|
|
|
|
'data' => [
|
|
|
|
|
|
{
|
|
|
|
|
|
'left' => 'check',
|
|
|
|
|
|
'right' => 'der Test'
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
'left' => 'quiz (Amer.)',
|
|
|
|
|
|
'right' => 'der Test <20><> [Schule]'
|
|
|
|
|
|
],
|
|
|
|
|
|
'title' => 'Unmittelbare Treffer'
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
'data' => [
|
|
|
|
|
|
{
|
|
|
|
|
|
'left' => 'to fail a test',
|
|
|
|
|
|
'right' => 'einen Test nicht bestehen'
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
'left' => 'to test',
|
|
|
|
|
|
'right' => 'Tests macheneinen Test machen'
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
'title' => 'Verben und Verbzusammensetzungen'
|
|
|
|
|
|
},
|
|
|
|
|
|
'data' => [
|
|
|
|
|
|
{
|
|
|
|
|
|
'left' => 'testing <20>adj.',
|
|
|
|
|
|
'right' => 'im Test'
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
'title' => 'Wendungen und Ausdr<64>cke'
|
|
|
|
|
|
}
|
|
|
|
|
|
];
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
You might take a look at the B<leo> script how to process
|
|
|
|
|
|
this data.
|
|
|
|
|
|
|
|
|
|
|
|
=head2 maxsize()
|
|
|
|
|
|
|
|
|
|
|
|
Returns the size of the largest returned term (left side).
|
|
|
|
|
|
|
|
|
|
|
|
=head2 lines()
|
|
|
|
|
|
|
|
|
|
|
|
Returns the number of translation results.
|
|
|
|
|
|
|
|
|
|
|
|
=head2 form()
|
|
|
|
|
|
|
|
|
|
|
|
Returns the submitted form uri.
|
|
|
|
|
|
|
|
|
|
|
|
=head1 SEE ALSO
|
|
|
|
|
|
|
|
|
|
|
|
L<leo>
|
|
|
|
|
|
|
|
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
|
|
|
2016-01-27 00:09:16 +01:00
|
|
|
|
WWW::Dict::Leo::Org - Copyright (c) 2007-2016 by Thomas v.D.
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
|
|
|
|
|
L<http://dict.leo.org/> -
|
2016-01-27 00:09:16 +01:00
|
|
|
|
Copyright (c) 1995-2016 LEO Dictionary Team.
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
|
|
|
|
|
=head1 AUTHOR
|
|
|
|
|
|
|
2016-01-27 00:09:16 +01:00
|
|
|
|
Thomas v.D. <tlinden@cpan.org>
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
|
|
|
|
|
=head1 HOW TO REPORT BUGS
|
|
|
|
|
|
|
|
|
|
|
|
Use L<rt.cpan.org> to report bugs, select the queue for B<WWW::Dict::Leo::Org>.
|
|
|
|
|
|
|
|
|
|
|
|
Please don't forget to add debugging output!
|
|
|
|
|
|
|
|
|
|
|
|
=head1 VERSION
|
|
|
|
|
|
|
2016-10-24 11:31:38 +02:00
|
|
|
|
1.45
|
2014-02-06 09:23:53 +01:00
|
|
|
|
|
|
|
|
|
|
=cut
|