########################################################################### # # WgetDownload.pm -- Download base module that handles calling Wget # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 2006 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package WgetDownload; eval {require bytes}; # suppress the annoying "subroutine redefined" warning that various # plugins cause under perl 5.6 $SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)}; use BaseDownload; use strict; use IPC::Open2; use Cwd; sub BEGIN { @WgetDownload::ISA = ('BaseDownload'); } my $arguments = [ { 'name' => "proxy_on", 'desc' => "{WgetDownload.proxy_on}", 'type' => "flag", 'reqd' => "no", 'hiddengli' => "yes"}, { 'name' => "proxy_host", 'desc' => "{WgetDownload.proxy_host}", 'type' => "string", 'reqd' => "no", 'hiddengli' => "yes"}, { 'name' => "proxy_port", 'desc' => "{WgetDownload.proxy_port}", 'type' => "string", 'reqd' => "no", 'hiddengli' => "yes"}, { 'name' => "user_name", 'desc' => "{WgetDownload.user_name}", 'type' => "string", 'reqd' => "no", 'hiddengli' => "yes"}, { 'name' => "user_password", 'desc' => "{WgetDownload.user_password}", 'type' => "string", 'reqd' => "no", 'hiddengli' => "yes"}]; my $options = { 'name' => "WgetDownload", 'desc' => "{WgetDownload.desc}", 'abstract' => "yes", 'inherits' => "yes", 'args' => $arguments }; # Declaring variables related to the wget child process so that the termination # signal handler for SIGTERM can close the streams and tidy up before ending # the child process. my $childpid; my($chld_out, $chld_in); # Handler called when this process is killed or abruptly ends due to receiving # one of the terminating signals that this handler is registered to deal with. sub abrupt_end_handler { my $termination_signal = shift (@_); { if(defined $childpid) { close($chld_out); close($chld_in); # Send TERM signal to child process to terminate it. Sending the INT signal didn't work # See http://perldoc.perl.org/perlipc.html#Signals # Warning on using kill at http://perldoc.perl.org/perlfork.html kill("TERM", $childpid); } } exit(0); } # Registering a handler for when termination signals SIGINT and SIGTERM are received to stop # the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill # signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows). # Note that SIGKILL can't be handled as the handler won't get called for it. More information: # http://affy.blogspot.com/p5be/ch13.htm # http://perldoc.perl.org/perlipc.html#Signals $SIG{'INT'} = \&abrupt_end_handler; $SIG{'TERM'} = \&abrupt_end_handler; sub new { my ($class) = shift (@_); my ($getlist,$inputargs,$hashArgOptLists) = @_; push(@$getlist, $class); push(@{$hashArgOptLists->{"ArgList"}},@{$arguments}); push(@{$hashArgOptLists->{"OptList"}},$options); my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists); return bless $self, $class; } sub checkWgetSetup { my ($self,$blnGliCall) = @_; #TODO: proxy detection?? if((!$blnGliCall) && $self->{'proxy_on'}) { &checkProxySetup($self); } &checkURL($self); } sub getWgetOptions { my ($self) = @_; my $strOptions = ""; if ($self->{'proxy_on'} && $self->{'proxy_host'} && $self->{'proxy_port'}) { $strOptions .= " -e httpproxy=$self->{'proxy_host'}:$self->{'proxy_port'} "; if ($self->{'user_name'} && $self->{'user_password'}) { $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}"; } } if ($self->{'proxy_on'}) { $strOptions .= " --proxy "; } return $strOptions; } # Checking for proxy setup: proxy server, proxy port, proxy username and password. sub checkProxySetup { my ($self) = @_; ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?"); # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'} # Test if the connection is succeful. If the connection wasn't succeful then ask user to supply username and password. } sub useWget { my ($self, $cmdWget,$blnShow, $working_dir) = @_; my ($strReadIn,$strLine,$command); $strReadIn = "" unless defined $strReadIn; my $current_dir = cwd(); my $changed_dir = 0; if (defined $working_dir && -e $working_dir) { chdir "$working_dir"; $changed_dir = 1; } my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget"); #$command = "\"$wget_file_path\" $cmdWget |"; #open(*WIN,$command) || die "wget request failed: $!\n"; #open(*WIN,$command) || die "wget request failed: $!\n"; $command = "\"$wget_file_path\" $cmdWget"; $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n"; while (defined($strLine=<$chld_out>)) # we're reading in from child process' stdout { if($blnShow) { print STDERR "$strReadIn\n"; } $strReadIn .= $strLine; } close($chld_in); close($chld_out); # Program terminates only when the following line is included # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary # it prevents the child from turning into a "zombie process". # While the wget process terminates without it, this perl script does not: # the DOS prompt is not returned without it. waitpid $childpid, 0; if ($changed_dir) { chdir $current_dir; } return $strReadIn; } sub useWgetMonitored { my ($self, $cmdWget,$blnShow, $working_dir) = @_; my $current_dir = cwd(); my $changed_dir = 0; if (defined $working_dir && -e $working_dir) { chdir "$working_dir"; $changed_dir = 1; } my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget"); #my $command = "\"$wget_file_path\" $cmdWget 2>&1 |"; #open(*WIN,$command) || die "wget request failed: $!\n"; ### print STDERR "**** wget cmd = $command\n"; #open(*WIN,$command) || die "wget request failed: $!\n"; my $command = "\"$wget_file_path\" $cmdWget"; $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n"; my $full_text = ""; my $error_text = ""; my @follow_list = (); my $line; while (defined($line=<$chld_out>)) # we're reading in from child process' stdout { if((defined $blnShow) && $blnShow) { print STDERR "$line"; } if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) { my $follow_url = $1; push(@follow_list,$follow_url); } if ($line =~ m/ERROR\s+\d+/) { $error_text .= $line; } $full_text .= $line; } close($chld_in); close($chld_out); # Program terminates only when the following line is included # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary # it prevents the child from turning into a "zombie process". # While the wget process terminates without it, this perl script does not: # the DOS prompt is not returned without it. waitpid $childpid, 0; my $command_status = $?; if ($command_status != 0) { $error_text .= "Exit error: $command_status"; } if ($changed_dir) { chdir $current_dir; } my $final_follow = pop(@follow_list); # might be undefined, but that's OK return ($full_text,$error_text,$final_follow); } # TODO: Check if the URL is valid?? Not sure what should be in this function yet!! sub checkURL { my ($self) = @_; if ($self->{'url'} eq "") { &error("checkURL","no URL is specified!! Please specifies the URL for downloading."); } } sub error { my ($strFunctionName,$strError) = @_; { print "Error occoured in WgetDownload.pm\n". "In Function:".$strFunctionName."\n". "Error Message:".$strError."\n"; exit(-1); } } 1;