# file KLEIDER/src/localization/GoogleTranslate.pm # Übersetzung durch GoogleTranslate # 2015-08-09 Herbert Schiemann # GPL Version 2 oder neuer package Herbaer::Translate::GoogleTranslate ; use parent Herbaer::Translate::Base ; use utf8 ; use Herbaer::Replace ; use IO::Handle ; use JSON ; use LWP ; use Time::HiRes qw ( usleep ); use URI::Escape ; # "\" wird durch "\\", # ":" wird durch "\c", # Zeilenende durch "\n" ersetzt sub _encode { my $t = shift; $t =~ s/\\/\\\\/g; $t =~ s/\n/\\n/g; $t =~ s/:/\\c/g; return $t; } # _encode # macht _encode rückgängig sub _decode { my $t = shift; $t =~ s/\\(.)/$1 eq "n" ? "\n" : $1 eq "c" ? ":" : $1/ge; return $t; } # _decode sub new { my ($class, $secrets, $debug) = @_; print STDERR "new Herbaer::Translate::GoogleTranslate\n" if $debug; my $hnd; if (! open ($hnd, "<:encoding(utf-8)", $secrets)) { print STDERR "Kann Datei $secrets nicht lesen:$!\n" if $debug; return undef; } my $line; my $key; my $url; my $post_url; my $wait = 0; while (defined ($line = <$hnd>)) { $line =~ s/\s*$//; if ($line =~ /^\s*google\.translate\.apikey\s*=\s*(.*)/) { $key = $1; $url = "https://www.googleapis.com/language/translate/v2?key=${key}" . "\&format=text\&q=\$\{text\}\&source=\$\{srl\}\&target=\$\{tgl\}" ; $post_url = "https://www.googleapis.com/language/translate/v2?key=${key}" . "\&format=text\&source=\$\{srl\}\&target=\$\{tgl\}" ; } if ($line =~ /^\s*google\.translate\.wait\s*=\s*([1-9][0-9]+)/) { $wait = $1 + 0; } } close $hnd; my $self = { "dbg" => $debug, "ua" => LWP::UserAgent -> new (), "u" => $url, "p_url" => $post_url, "p_cont" => "q=\${text}", "wait" => $wait, "pid" => undef, }; my $ua = $self -> {"ua"}; $ua -> agent ($ua -> _agent . " (+http://kleider.herbaer.de)"); $self = bless ($self, $class); # Kindprozess starten if ($wait) { my ($cw, $cr, $pw, $pr); pipe ($cr, $pw); pipe ($pr, $cw); my $pid = fork (); if (!$pid) { close ($pr); close ($pw); binmode ($cr, ":encoding(utf-8)"); binmode ($cw, ":encoding(utf-8)"); $cw -> autoflush (); my $line; my $st; # source text my $sl; # source language my $tl; # target language my $tt; # translated text while (defined ($line = <$cr>)) { if ($line =~ /^tr:([^:]+):([^:]+):([^:]+)$/) { $st = $1; $sl = $2; $tl = $3; $tt = $self -> _translate (_decode ($st), $sl, $tl) || ""; print $cw _encode ($tt) . "\n"; usleep ($self -> {"wait"}); } } close ($cr); close ($cw); exit 0; } else { close ($cr); close ($cw); binmode ($pr, ":encoding(utf-8)"); binmode ($pw, ":encoding(utf-8)"); $pw -> autoflush (); $self -> {"pid"} = $pid; $self -> {"r"} = $pr; $self -> {"w"} = $pw; } } return bless ($self, $class); } # new sub translate { my ($self, $text, $srl, $tgl) = @_ ; my $dbg = $self -> {"dbg"}; if ($dbg) { print STDERR "Herbaer::Translate::GoogleTranslate::translate\n"; } $srl =~ s/-.+//; $srl = lc ($srl); $tgl =~ s/-.+//; $tgl = lc ($tgl); $tgl = "zh-CN" if $tgl eq "zh"; my $tt; if ($self -> {"pid"}) { my $read = $self -> {"r"}; my $write = $self -> {"w"}; print $write join (":", "tr", _encode ($text), $srl, $tgl), "\n"; $tt = <$read> || ""; $tt =~ s/\n//; $tt = _decode ($tt); } else { $tt = $self -> _translate ($text, $srl, $tgl); } $tt =~ s/(\s)\x{200b}+/$1/g; # Null-Zwischenraum nach Leerraum $tt =~ s/\x{200b}+(\s)/$1/g; # Null-Zwischenraum vor Leerraum $tt =~ s/\x{200b}\x{200b}+/\x{200b}/g; # Folgen von Null-Zwischenraum # Null-Zwischenraum vor öffnender Klammer (chinesisch, japanisch) $tt =~ s/([a-zäöüßA-ZÄÖÜ])([(][a-zäöüßA-ZÄÖÜ])/${1}\x{200b}${2}/g; # Null-Zwischenraum nach schließender Klammer, Komma oder Punkt (zh, ja) $tt =~ s/([a-zäöüßA-ZÄÖÜ][)、。,​])([a-zäöüßA-ZÄÖÜ])/${1}\x{200b}${2}/g; $tt; } # translate sub _translate { require bytes ; my ($self, $text, $srl, $tgl) = @_ ; my $input = { "text" => uri_escape_utf8 ($text), "srl" => $srl, "tgl" => $tgl, }; my $dbg = $self -> {"dbg"}; if ($dbg) { print STDERR "Herbaer::Translate::GoogleTranslate::_translate\n"; } my $ua = $self -> {"ua"}; my $resp; my $data; my $url = replace ($self -> {"u"}, $input); if (bytes::length ($url) < 2000) { print STDERR "URL $url\n" if $dbg; $resp = $ua -> get ($url); } else { $url = replace ( $self -> {"p_url"}, $input); $cont = replace ( $self -> {"p_cont"}, $input); my $req = HTTP::Request -> new ( "POST" => $url ); $req -> header ( "X-HTTP-Method-Override" => "GET" ); $req -> content_type ("application/x-www-form-urlencoded"); $req -> content ($cont); $resp = $ua -> request ($req); } print STDERR $resp -> as_string(), "\n" if $dbg; if ( ! $resp -> is_success () ) { print STDERR "Kann URL $url nicht laden\n" if $dbg; } else { $data = decode_json ($resp -> decoded_content ()); } $tt = $data -> {"data"} -> {"translations"} [0] -> {"translatedText"} || ""; } # _translate sub languages { my ($self, $srl) = @_ ; my $dbg = $self -> {"dbg"}; if ($dbg) { print STDERR "Herbaer::Translate::GoogleTranslate::languages\n"; } my $url = $self -> {"u"}; $url =~ s/\/v2\?/\/v2\/languages\?/; $url =~ s/&.*/&source=/; $srl =~ s/-.*//; $url .= $srl; my $ua = $self -> {"ua"}; print STDERR "URL $url\n" if $dbg; my $resp = $resp = $ua -> get ($url); my $data; if ( ! $resp -> is_success () ) { print STDERR "Kann URL $url nicht laden\n" if $dbg; } else { $data = decode_json ($resp -> decoded_content ()); } my $l; my $ret = []; for $l (@{$data -> {"data"} -> {"languages"}}) { push @$ret, $l -> {"language"}; print STDERR $l -> {"language"}, "\n" if $dbg; } $ret; } # languages # Name des Uebersetzers sub translator_name { "google"; } # translator_name sub DESTROY { my $self = shift; my $dbg = $self -> {"dbg"}; if ($dbg) { print STDERR "Herbaer::Translate::GoogleTranslate::DESTROY\n"; } if ($self -> {"pid"}) { close ($self -> {"w"}); close ($self -> {"r"}); wait; } 1; } # DESTROY 1; # end of file KLEIDER/src/localization/GoogleTranslate.pm