# This is some code to get an entire web page. Just add # "require 'webpage.pl';" near the start of your program. # Then you can &TranslatePage('http://www.somepage.edu'); # # By Patrick Kellogg (after David Mankin) package webpage; use LWP::UserAgent; require "translation.pl"; $DEFAULT_CGI_METHOD="GET"; $DEFAULT_PROXY="http://cacheflow.cs.colorado.edu:3128"; $AGENT_NAME="FIT/0.1"; ## # TranslatePage(string); ## sub TranslatePage { my $TransLang = shift; my $InputURL = shift; $_ = $InputURL; #Trim the white space s/^\s*(.*?)\s*$/$1/; if (substr($_,-1,1) ne "/") { $_ = $_ . "/"; } if (substr($_,0,3) eq "www") { $_ = "http://" . $_; } $InputURL = $_; $ua = new LWP::UserAgent; $ua->agent($AGENT_NAME . " " . $ua->agent); $ua->proxy('http', $DEFAULT_PROXY) if $DEFAULT_PROXY; $req = new HTTP::Request 'GET' => $InputURL; $req->header('Accept' => 'text/html'); $res = $ua->request($req); if ($res->is_success) { $NewPage = &FixContent($TransLang,$res->content); } else { print "Error: " . $res->code . " " . $res->message; $NewPage = ""; } #Do some final adjestments to NewPage $_ = $NewPage; s/>/>\n/g; s/<[Hh][Ee][Aa][Dd]>/\n/g; $NewPage =$_; return $NewPage; } ## # FixContent(string); ## sub FixContent { my $NewLang = shift; my $PageIn = shift; #Loop through character-by-character and #either pass the characters through or translate them #$OutputPage = "\n"; $Pass = 0; $FileLen = length $PageIn; for ($i = 0; $i < $FileLen; $i++) { $NextChar = substr($PageIn, $i, 1); #Not translating if ($Pass == 0) { #See if we start a new block if ($NextChar eq '>') { #Start translating $MyBuffer = ""; $Pass = 1; } $OutputPage = $OutputPage . $NextChar; #Translating } else { #See if we've reached the end of the string if ($NextChar eq '<') { #See if anything was caught $_ = $MyBuffer; if (!(/^(\s*)$/)) { #Remove tabs s/\t//g; #Remove newlines s/\n//g; #Remove carriage returns s/\r//g; #Remove formfeeds s/\f//g; #Remove any two spaces in a row s/ / /g; #Remove any spaces at the start or end s/^\s*(.*?)\s*$/$1/; #Don't do a translation $OutputPage = $OutputPage . $_; #Do the translation #print $NewLang . " >" . $_ . "< ", "\n"; #$NewTrans = &translation::translate_lang($NewLang,$_); #print "NewTrans >" . $NewTrans . "<", "\n"; #$OutputPage = $OutputPage . $NewTrans; } $Pass = 0; $OutputPage = $OutputPage . "<"; } else { #Add the next character to the buffer $MyBuffer = $MyBuffer . $NextChar; } } } return $OutputPage; } 1;