[14657] | 1 | ###########################################################################
|
---|
| 2 | #
|
---|
| 3 | # WgetDownload.pm -- Download base module that handles calling Wget
|
---|
| 4 | # A component of the Greenstone digital library software
|
---|
| 5 | # from the New Zealand Digital Library Project at the
|
---|
| 6 | # University of Waikato, New Zealand.
|
---|
| 7 | #
|
---|
| 8 | # Copyright (C) 2006 New Zealand Digital Library Project
|
---|
| 9 | #
|
---|
| 10 | # This program is free software; you can redistribute it and/or modify
|
---|
| 11 | # it under the terms of the GNU General Public License as published by
|
---|
| 12 | # the Free Software Foundation; either version 2 of the License, or
|
---|
| 13 | # (at your option) any later version.
|
---|
| 14 | #
|
---|
| 15 | # This program is distributed in the hope that it will be useful,
|
---|
| 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 18 | # GNU General Public License for more details.
|
---|
| 19 | #
|
---|
| 20 | # You should have received a copy of the GNU General Public License
|
---|
| 21 | # along with this program; if not, write to the Free Software
|
---|
| 22 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 23 | #
|
---|
| 24 | ###########################################################################
|
---|
| 25 |
|
---|
| 26 | package WgetDownload;
|
---|
| 27 |
|
---|
| 28 | eval {require bytes};
|
---|
| 29 |
|
---|
| 30 | # suppress the annoying "subroutine redefined" warning that various
|
---|
| 31 | # plugins cause under perl 5.6
|
---|
| 32 | $SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
|
---|
| 33 |
|
---|
[17207] | 34 | use BaseDownload;
|
---|
[14657] | 35 | use strict;
|
---|
| 36 | use IPC::Open2;
|
---|
| 37 | use Cwd;
|
---|
| 38 |
|
---|
| 39 | sub BEGIN {
|
---|
[17207] | 40 | @WgetDownload::ISA = ('BaseDownload');
|
---|
[14657] | 41 | }
|
---|
| 42 |
|
---|
| 43 | my $arguments =
|
---|
| 44 | [ { 'name' => "proxy_on",
|
---|
| 45 | 'desc' => "{WgetDownload.proxy_on}",
|
---|
| 46 | 'type' => "flag",
|
---|
| 47 | 'reqd' => "no",
|
---|
| 48 | 'hiddengli' => "yes"},
|
---|
| 49 | { 'name' => "proxy_host",
|
---|
| 50 | 'desc' => "{WgetDownload.proxy_host}",
|
---|
| 51 | 'type' => "string",
|
---|
| 52 | 'reqd' => "no",
|
---|
| 53 | 'hiddengli' => "yes"},
|
---|
| 54 | { 'name' => "proxy_port",
|
---|
| 55 | 'desc' => "{WgetDownload.proxy_port}",
|
---|
| 56 | 'type' => "string",
|
---|
| 57 | 'reqd' => "no",
|
---|
| 58 | 'hiddengli' => "yes"},
|
---|
| 59 | { 'name' => "user_name",
|
---|
| 60 | 'desc' => "{WgetDownload.user_name}",
|
---|
| 61 | 'type' => "string",
|
---|
| 62 | 'reqd' => "no",
|
---|
| 63 | 'hiddengli' => "yes"},
|
---|
| 64 | { 'name' => "user_password",
|
---|
| 65 | 'desc' => "{WgetDownload.user_password}",
|
---|
| 66 | 'type' => "string",
|
---|
| 67 | 'reqd' => "no",
|
---|
| 68 | 'hiddengli' => "yes"}];
|
---|
| 69 |
|
---|
| 70 | my $options = { 'name' => "WgetDownload",
|
---|
| 71 | 'desc' => "{WgetDownload.desc}",
|
---|
| 72 | 'abstract' => "yes",
|
---|
| 73 | 'inherits' => "yes",
|
---|
| 74 | 'args' => $arguments };
|
---|
| 75 |
|
---|
| 76 |
|
---|
[17354] | 77 | # Declaring variables related to the wget child process so that the termination
|
---|
| 78 | # signal handler for SIGTERM can close the streams and tidy up before ending
|
---|
| 79 | # the child process.
|
---|
| 80 | my $childpid;
|
---|
| 81 | my($chld_out, $chld_in);
|
---|
| 82 |
|
---|
| 83 | # Handler called when this process is killed or abruptly ends due to receiving
|
---|
| 84 | # one of the terminating signals that this handler is registered to deal with.
|
---|
| 85 | sub abrupt_end_handler {
|
---|
| 86 | my $termination_signal = shift (@_);
|
---|
| 87 | {
|
---|
| 88 | if(defined $childpid) {
|
---|
| 89 | close($chld_out);
|
---|
| 90 | close($chld_in);
|
---|
| 91 |
|
---|
| 92 | # Send TERM signal to child process to terminate it. Sending the INT signal didn't work
|
---|
| 93 | # See http://perldoc.perl.org/perlipc.html#Signals
|
---|
| 94 | # Warning on using kill at http://perldoc.perl.org/perlfork.html
|
---|
| 95 | kill("TERM", $childpid);
|
---|
| 96 | }
|
---|
| 97 | }
|
---|
| 98 | exit(0);
|
---|
| 99 | }
|
---|
| 100 |
|
---|
| 101 | # Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
|
---|
| 102 | # the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
|
---|
| 103 | # signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
|
---|
| 104 | # Note that SIGKILL can't be handled as the handler won't get called for it. More information:
|
---|
| 105 | # http://affy.blogspot.com/p5be/ch13.htm
|
---|
| 106 | # http://perldoc.perl.org/perlipc.html#Signals
|
---|
| 107 | $SIG{'INT'} = \&abrupt_end_handler;
|
---|
| 108 | $SIG{'TERM'} = \&abrupt_end_handler;
|
---|
| 109 |
|
---|
[14657] | 110 | sub new {
|
---|
| 111 | my ($class) = shift (@_);
|
---|
| 112 | my ($getlist,$inputargs,$hashArgOptLists) = @_;
|
---|
| 113 | push(@$getlist, $class);
|
---|
| 114 |
|
---|
[17207] | 115 | push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
|
---|
| 116 | push(@{$hashArgOptLists->{"OptList"}},$options);
|
---|
[14657] | 117 |
|
---|
[17207] | 118 | my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
|
---|
[14657] | 119 |
|
---|
| 120 | return bless $self, $class;
|
---|
| 121 | }
|
---|
| 122 |
|
---|
| 123 | sub checkWgetSetup
|
---|
| 124 | {
|
---|
| 125 | my ($self,$blnGliCall) = @_;
|
---|
| 126 | #TODO: proxy detection??
|
---|
| 127 |
|
---|
| 128 | if((!$blnGliCall) && $self->{'proxy_on'})
|
---|
| 129 | {
|
---|
| 130 | &checkProxySetup($self);
|
---|
| 131 | }
|
---|
| 132 | &checkURL($self);
|
---|
| 133 | }
|
---|
| 134 |
|
---|
| 135 | sub getWgetOptions
|
---|
| 136 | {
|
---|
| 137 | my ($self) = @_;
|
---|
| 138 | my $strOptions = "";
|
---|
| 139 |
|
---|
[16791] | 140 | if ($self->{'proxy_on'} && $self->{'proxy_host'} && $self->{'proxy_port'})
|
---|
[14657] | 141 | {
|
---|
[14918] | 142 |
|
---|
| 143 | $strOptions .= " -e httpproxy=$self->{'proxy_host'}:$self->{'proxy_port'} ";
|
---|
| 144 |
|
---|
| 145 | if ($self->{'user_name'} && $self->{'user_password'})
|
---|
| 146 | {
|
---|
| 147 | $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";
|
---|
| 148 | }
|
---|
[14657] | 149 | }
|
---|
| 150 |
|
---|
[16791] | 151 | if ($self->{'proxy_on'}) {
|
---|
| 152 | $strOptions .= " --proxy ";
|
---|
| 153 | }
|
---|
[14918] | 154 |
|
---|
[14657] | 155 | return $strOptions;
|
---|
| 156 | }
|
---|
| 157 |
|
---|
| 158 | # Checking for proxy setup: proxy server, proxy port, proxy username and password.
|
---|
| 159 | sub checkProxySetup
|
---|
| 160 | {
|
---|
| 161 | my ($self) = @_;
|
---|
| 162 | ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
|
---|
| 163 | # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
|
---|
| 164 | # Test if the connection is succeful. If the connection wasn't succeful then ask user to supply username and password.
|
---|
| 165 |
|
---|
| 166 | }
|
---|
| 167 |
|
---|
| 168 | sub useWget
|
---|
| 169 | {
|
---|
| 170 | my ($self, $cmdWget,$blnShow, $working_dir) = @_;
|
---|
| 171 |
|
---|
| 172 | my ($strReadIn,$strLine,$command);
|
---|
| 173 | $strReadIn = "" unless defined $strReadIn;
|
---|
| 174 |
|
---|
| 175 | my $current_dir = cwd();
|
---|
| 176 | my $changed_dir = 0;
|
---|
| 177 | if (defined $working_dir && -e $working_dir) {
|
---|
| 178 | chdir "$working_dir";
|
---|
| 179 | $changed_dir = 1;
|
---|
| 180 | }
|
---|
| 181 | my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
|
---|
[17354] | 182 | #$command = "\"$wget_file_path\" $cmdWget |"; #open(*WIN,$command) || die "wget request failed: $!\n";
|
---|
| 183 | #open(*WIN,$command) || die "wget request failed: $!\n";
|
---|
[14657] | 184 |
|
---|
| 185 |
|
---|
[17354] | 186 | $command = "\"$wget_file_path\" $cmdWget";
|
---|
| 187 | $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
|
---|
| 188 |
|
---|
| 189 | while (defined($strLine=<$chld_out>)) # we're reading in from child process' stdout
|
---|
[14657] | 190 | {
|
---|
| 191 | if($blnShow)
|
---|
| 192 | {
|
---|
| 193 | print STDERR "$strReadIn\n";
|
---|
| 194 | }
|
---|
| 195 |
|
---|
| 196 | $strReadIn .= $strLine;
|
---|
| 197 | }
|
---|
[17354] | 198 |
|
---|
| 199 | close($chld_in);
|
---|
| 200 | close($chld_out);
|
---|
| 201 |
|
---|
| 202 | # Program terminates only when the following line is included
|
---|
| 203 | # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
|
---|
| 204 | # it prevents the child from turning into a "zombie process".
|
---|
| 205 | # While the wget process terminates without it, this perl script does not:
|
---|
| 206 | # the DOS prompt is not returned without it.
|
---|
| 207 | waitpid $childpid, 0;
|
---|
[14657] | 208 |
|
---|
| 209 | if ($changed_dir) {
|
---|
| 210 | chdir $current_dir;
|
---|
| 211 | }
|
---|
| 212 |
|
---|
| 213 | return $strReadIn;
|
---|
| 214 | }
|
---|
| 215 |
|
---|
[16791] | 216 |
|
---|
| 217 | sub useWgetMonitored
|
---|
| 218 | {
|
---|
| 219 | my ($self, $cmdWget,$blnShow, $working_dir) = @_;
|
---|
| 220 |
|
---|
| 221 |
|
---|
| 222 | my $current_dir = cwd();
|
---|
| 223 | my $changed_dir = 0;
|
---|
| 224 | if (defined $working_dir && -e $working_dir) {
|
---|
| 225 | chdir "$working_dir";
|
---|
| 226 | $changed_dir = 1;
|
---|
| 227 | }
|
---|
| 228 | my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
|
---|
[17354] | 229 | #my $command = "\"$wget_file_path\" $cmdWget 2>&1 |"; #open(*WIN,$command) || die "wget request failed: $!\n";
|
---|
[16791] | 230 | ### print STDERR "**** wget cmd = $command\n";
|
---|
[17354] | 231 | #open(*WIN,$command) || die "wget request failed: $!\n";
|
---|
[16791] | 232 |
|
---|
[17354] | 233 | my $command = "\"$wget_file_path\" $cmdWget";
|
---|
| 234 | $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
|
---|
[16791] | 235 |
|
---|
| 236 | my $full_text = "";
|
---|
| 237 | my $error_text = "";
|
---|
| 238 | my @follow_list = ();
|
---|
| 239 | my $line;
|
---|
| 240 |
|
---|
[17354] | 241 | while (defined($line=<$chld_out>)) # we're reading in from child process' stdout
|
---|
[16791] | 242 | {
|
---|
| 243 | if((defined $blnShow) && $blnShow)
|
---|
| 244 | {
|
---|
| 245 | print STDERR "$line";
|
---|
| 246 | }
|
---|
| 247 |
|
---|
| 248 | if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
|
---|
| 249 | my $follow_url = $1;
|
---|
| 250 | push(@follow_list,$follow_url);
|
---|
| 251 | }
|
---|
| 252 |
|
---|
| 253 | if ($line =~ m/ERROR\s+\d+/) {
|
---|
| 254 | $error_text .= $line;
|
---|
| 255 | }
|
---|
| 256 |
|
---|
| 257 | $full_text .= $line;
|
---|
| 258 | }
|
---|
| 259 |
|
---|
[17354] | 260 | close($chld_in);
|
---|
| 261 | close($chld_out);
|
---|
[16791] | 262 |
|
---|
[17354] | 263 | # Program terminates only when the following line is included
|
---|
| 264 | # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
|
---|
| 265 | # it prevents the child from turning into a "zombie process".
|
---|
| 266 | # While the wget process terminates without it, this perl script does not:
|
---|
| 267 | # the DOS prompt is not returned without it.
|
---|
| 268 | waitpid $childpid, 0;
|
---|
| 269 |
|
---|
[16791] | 270 | my $command_status = $?;
|
---|
| 271 | if ($command_status != 0) {
|
---|
| 272 | $error_text .= "Exit error: $command_status";
|
---|
| 273 | }
|
---|
| 274 |
|
---|
| 275 | if ($changed_dir) {
|
---|
| 276 | chdir $current_dir;
|
---|
| 277 | }
|
---|
| 278 |
|
---|
| 279 | my $final_follow = pop(@follow_list); # might be undefined, but that's OK
|
---|
| 280 |
|
---|
| 281 | return ($full_text,$error_text,$final_follow);
|
---|
| 282 | }
|
---|
| 283 |
|
---|
| 284 |
|
---|
[14657] | 285 | # TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
|
---|
| 286 | sub checkURL
|
---|
| 287 | {
|
---|
| 288 | my ($self) = @_;
|
---|
| 289 | if ($self->{'url'} eq "")
|
---|
| 290 | {
|
---|
| 291 | &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
|
---|
| 292 | }
|
---|
| 293 | }
|
---|
| 294 |
|
---|
| 295 | sub error
|
---|
| 296 | {
|
---|
| 297 | my ($strFunctionName,$strError) = @_;
|
---|
| 298 | {
|
---|
| 299 | print "Error occoured in WgetDownload.pm\n".
|
---|
| 300 | "In Function:".$strFunctionName."\n".
|
---|
| 301 | "Error Message:".$strError."\n";
|
---|
| 302 | exit(-1);
|
---|
| 303 | }
|
---|
| 304 | }
|
---|
| 305 |
|
---|
| 306 | 1;
|
---|