# This is some code to get an entire web page. Just add # "require 'webpage.pl';" near the start of your program. # Then you can &TranslatePage('http://www.somepage.edu'); # # By Patrick Kellogg (after David Mankin) package webpage; use LWP::UserAgent; require "translation.pl"; require "tif.pl"; $DEFAULT_CGI_METHOD="GET"; $DEFAULT_PROXY="http://cacheflow.cs.colorado.edu:3128"; #$DEFAULT_PROXY="http://cache.cs.colorado.edu:3128"; $AGENT_NAME="FIT/0.1"; ## # TranslatePage(string); ## sub TranslatePage { my $TransLang = shift; my $InputURL = shift; $_ = $InputURL; #print $_ . "\n"; #Trim the white space s/^\s*(.*?)\s*$/$1/; if (substr($_,0,3) eq "www") { $_ = "http://" . $_; } $InputURL = $_; #print $InputURL . "\n"; $ua = new LWP::UserAgent; $ua->agent($AGENT_NAME . " " . $ua->agent); $ua->proxy('http', $DEFAULT_PROXY) if $DEFAULT_PROXY; $req = new HTTP::Request 'GET' => $InputURL; $req->header('Accept' => 'text/html'); $res = $ua->request($req); if ($res->is_success) { #print "Success\n"; $NewPage = &FixContent($TransLang,$res->content); } else { #print "Error: " . $res->code . " " . $res->message; $NewPage = ""; } #Do some final adjestments to NewPage $NewPage =~ s/>/>\n/g; $NewPage =~ s/<[Hh][Ee][Aa][Dd]>/\n/g; return $NewPage; } ## # FixContent(string); ## sub FixContent { my $NewLang = shift; my $PageIn = shift; @TagArray = (); @SentenceArray = (); #Loop through character-by-character and #save the values to one array and the to another $TranslatingFlag = 0; $FileLen = length $PageIn; for ($i = 0; $i < $FileLen; $i++) { $NextChar = substr($PageIn, $i, 1); #Not translating if ($TranslatingFlag == 0) { #See if we start a new block if ($NextChar eq ">") { $TagBuffer = $TagBuffer . ">"; $TagBuffer =~ s/^\s*(.*?)\s*$/$1/; $TransBuffer = ""; $TranslatingFlag = 1; } else { #Add the character to the end of TagBuffer $TagBuffer = $TagBuffer . $NextChar; } #Translating } else { #See if we've reached the end of the string if ($NextChar eq '<') { #Remove tabs $TransBuffer =~ s/\t|\n|\r|\f/ /g; #Remove any two spaces in a row $TransBuffer =~ s/ +/ /g; #Remove any spaces at the start or end $TransBuffer =~ s/^\s*(.*?)\s*$/$1/; #See if anything is left if ($TransBuffer eq "") { #Don't clear the TagBuffer, just append a < $TagBuffer = $TagBuffer . "<"; $TranslatingFlag = 0; } else { #Push the results onto the stacks push @SentenceArray, $TransBuffer; push @TagArray, $TagBuffer; $TagBuffer = "<"; $TranslatingFlag = 0; } } else { #Add the next character to the buffer $TransBuffer = $TransBuffer . $NextChar; } } } #push the last tag push @TagArray, $TagBuffer; #print "TagArray\n"; #print join "\n", @TagArray; #print "\n"; #print "SentenceArray\n"; #print join "\n", @SentenceArray; #print "\n"; #Construct the text to translate $ToTranslate = join " . xxx . ", @SentenceArray ; #print $ToTranslate . "\n"; #Do the call for the ToTranslate text if ($NewLang eq "fr_en") { $MyResult = &tif::fit($ToTranslate); } else { $MyResult = &translation::translate_lang($NewLang,$ToTranslate); } #$MyResult = $ToTranslate; #print $MyResult . "\n"; #Split the returned string @MySplit = split /\s*\.?\s*xxx\s*\.?\s*/, $MyResult; #print join "\n", @MySplit; #print "\n"; #Put the strings back into the page $OutputPage = ""; while ((@TagArray)||(@MySplit)) { #Pop the TagArray $OutputPage = $OutputPage . (shift @TagArray); #Pop the SentenceArray $OutputPage = $OutputPage . (shift @MySplit); } #return "\n"; return $OutputPage; } 1;