# This is some code to get an entire web page. Just add
# "require 'webpage.pl';" near the start of your program.
# Then you can &TranslatePage('http://www.somepage.edu');
#
# By Patrick Kellogg (after David Mankin)
package webpage;
use LWP::UserAgent;
require "translation.pl";
require "tif.pl";
$DEFAULT_CGI_METHOD="GET";
$DEFAULT_PROXY="http://cacheflow.cs.colorado.edu:3128";
#$DEFAULT_PROXY="http://cache.cs.colorado.edu:3128";
$AGENT_NAME="FIT/0.1";
##
# TranslatePage(string);
##
sub TranslatePage {
my $TransLang = shift;
my $InputURL = shift;
$_ = $InputURL;
#print $_ . "\n";
#Trim the white space
s/^\s*(.*?)\s*$/$1/;
if (substr($_,0,3) eq "www") {
$_ = "http://" . $_;
}
$InputURL = $_;
#print $InputURL . "\n";
$ua = new LWP::UserAgent;
$ua->agent($AGENT_NAME . " " . $ua->agent);
$ua->proxy('http', $DEFAULT_PROXY) if $DEFAULT_PROXY;
$req = new HTTP::Request 'GET' => $InputURL;
$req->header('Accept' => 'text/html');
$res = $ua->request($req);
if ($res->is_success) {
#print "Success\n";
$NewPage = &FixContent($TransLang,$res->content);
} else {
#print "Error: " . $res->code . " " . $res->message;
$NewPage = "";
}
#Do some final adjestments to NewPage
$NewPage =~ s/>/>\n/g;
$NewPage =~ s/<[Hh][Ee][Aa][Dd]>/
\n/g;
return $NewPage;
}
##
# FixContent(string);
##
sub FixContent {
my $NewLang = shift;
my $PageIn = shift;
@TagArray = ();
@SentenceArray = ();
#Loop through character-by-character and
#save the values to one array and the to another
$TranslatingFlag = 0;
$FileLen = length $PageIn;
for ($i = 0; $i < $FileLen; $i++) {
$NextChar = substr($PageIn, $i, 1);
#Not translating
if ($TranslatingFlag == 0) {
#See if we start a new block
if ($NextChar eq ">") {
$TagBuffer = $TagBuffer . ">";
$TagBuffer =~ s/^\s*(.*?)\s*$/$1/;
$TransBuffer = "";
$TranslatingFlag = 1;
} else {
#Add the character to the end of TagBuffer
$TagBuffer = $TagBuffer . $NextChar;
}
#Translating
} else {
#See if we've reached the end of the string
if ($NextChar eq '<') {
#Remove tabs
$TransBuffer =~ s/\t|\n|\r|\f/ /g;
#Remove any two spaces in a row
$TransBuffer =~ s/ +/ /g;
#Remove any spaces at the start or end
$TransBuffer =~ s/^\s*(.*?)\s*$/$1/;
#See if anything is left
if ($TransBuffer eq "") {
#Don't clear the TagBuffer, just append a <
$TagBuffer = $TagBuffer . "<";
$TranslatingFlag = 0;
} else {
#Push the results onto the stacks
push @SentenceArray, $TransBuffer;
push @TagArray, $TagBuffer;
$TagBuffer = "<";
$TranslatingFlag = 0;
}
} else {
#Add the next character to the buffer
$TransBuffer = $TransBuffer . $NextChar;
}
}
}
#push the last tag
push @TagArray, $TagBuffer;
#print "TagArray\n";
#print join "\n", @TagArray;
#print "\n";
#print "SentenceArray\n";
#print join "\n", @SentenceArray;
#print "\n";
#Construct the text to translate
$ToTranslate = join " . xxx . ", @SentenceArray ;
#print $ToTranslate . "\n";
#Do the call for the ToTranslate text
if ($NewLang eq "fr_en") {
$MyResult = &tif::fit($ToTranslate);
} else {
$MyResult = &translation::translate_lang($NewLang,$ToTranslate); }
#$MyResult = $ToTranslate;
#print $MyResult . "\n";
#Split the returned string
@MySplit = split /\s*\.?\s*xxx\s*\.?\s*/, $MyResult;
#print join "\n", @MySplit;
#print "\n";
#Put the strings back into the page
$OutputPage = "";
while ((@TagArray)||(@MySplit)) {
#Pop the TagArray
$OutputPage = $OutputPage . (shift @TagArray);
#Pop the SentenceArray
$OutputPage = $OutputPage . (shift @MySplit);
}
#return "\n";
return $OutputPage;
}
1;