source: gsdl/trunk/perllib/downloaders/WgetDownload.pm@ 17537

Last change on this file since 17537 was 17537, checked in by ak19, 16 years ago

Subroutine useWgetMonitored updated to include the modifications made recently to subroutine useWget: uses a serverSocket to monitor any signals from GLI indicating that wget should be prematurely terminated.

  • Property svn:keywords set to Author Date Id Revision
File size: 14.6 KB
RevLine 
[14657]1###########################################################################
2#
3# WgetDownload.pm -- Download base module that handles calling Wget
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package WgetDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
[17207]34use BaseDownload;
[14657]35use strict;
[17529]36use Cwd;
[14657]37use IPC::Open2;
[17529]38use IO::Select;
39use IO::Socket;
[14657]40
[17529]41
[14657]42sub BEGIN {
[17207]43 @WgetDownload::ISA = ('BaseDownload');
[14657]44}
45
46my $arguments =
47 [ { 'name' => "proxy_on",
48 'desc' => "{WgetDownload.proxy_on}",
49 'type' => "flag",
50 'reqd' => "no",
51 'hiddengli' => "yes"},
52 { 'name' => "proxy_host",
53 'desc' => "{WgetDownload.proxy_host}",
54 'type' => "string",
55 'reqd' => "no",
56 'hiddengli' => "yes"},
57 { 'name' => "proxy_port",
58 'desc' => "{WgetDownload.proxy_port}",
59 'type' => "string",
60 'reqd' => "no",
61 'hiddengli' => "yes"},
62 { 'name' => "user_name",
63 'desc' => "{WgetDownload.user_name}",
64 'type' => "string",
65 'reqd' => "no",
66 'hiddengli' => "yes"},
67 { 'name' => "user_password",
68 'desc' => "{WgetDownload.user_password}",
69 'type' => "string",
70 'reqd' => "no",
71 'hiddengli' => "yes"}];
72
73my $options = { 'name' => "WgetDownload",
74 'desc' => "{WgetDownload.desc}",
75 'abstract' => "yes",
76 'inherits' => "yes",
77 'args' => $arguments };
78
79
[17529]80# Declaring file global variables related to the wget child process so that
81# the termination signal handler for SIGTERM can close the streams and tidy
82# up before ending the child process.
[17354]83my $childpid;
[17531]84my ($chld_out, $chld_in);
[17529]85my ($serverSocket, $read_set);
[17354]86
[17531]87# The port this script's server socket will be listening on, to handle
88# incoming signals from GLI to terminate wget. This is also file global,
89# since OAIDownload.pm will make several calls on wget using the same
90# instance of this script and we want to reuse whatever port GLI gave us.
91my $port;
92
[17529]93# When this script is called from the command line, this handler will be called
94# if this process is killed or abruptly ends due to receiving one of the
95# terminating signals that this handler is registered to deal with.
[17354]96sub abrupt_end_handler {
97 my $termination_signal = shift (@_);
[17529]98 if(defined $childpid) {
99 close($chld_out);
100 close($chld_in);
[17354]101
[17529]102 #print STDOUT "Received termination signal: $termination_signal\n";
103
104 # Send TERM signal to child process to terminate it. Sending the INT signal doesn't work
105 # See http://perldoc.perl.org/perlipc.html#Signals
106 # Warning on using kill at http://perldoc.perl.org/perlfork.html
107 kill("TERM", $childpid);
108
109 # If the SIGTERM sent on Linux calls this handler, we want to make
110 # sure any socket connection is closed.
111 # Otherwise sockets are only used when this script is run from GLI
112 # in which case the handlers don't really get called.
113 if(defined $serverSocket) {
114 $read_set->remove($serverSocket) if defined $read_set;
115 close($serverSocket);
[17354]116 }
117 }
[17529]118
[17354]119 exit(0);
120}
121
122# Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
123# the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
124# signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
125# Note that SIGKILL can't be handled as the handler won't get called for it. More information:
126# http://affy.blogspot.com/p5be/ch13.htm
127# http://perldoc.perl.org/perlipc.html#Signals
128$SIG{'INT'} = \&abrupt_end_handler;
129$SIG{'TERM'} = \&abrupt_end_handler;
130
[14657]131sub new {
132 my ($class) = shift (@_);
133 my ($getlist,$inputargs,$hashArgOptLists) = @_;
134 push(@$getlist, $class);
135
[17207]136 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
137 push(@{$hashArgOptLists->{"OptList"}},$options);
[14657]138
[17207]139 my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
[14657]140
141 return bless $self, $class;
142}
143
144sub checkWgetSetup
145{
146 my ($self,$blnGliCall) = @_;
147 #TODO: proxy detection??
148
149 if((!$blnGliCall) && $self->{'proxy_on'})
150 {
151 &checkProxySetup($self);
152 }
153 &checkURL($self);
154}
155
156sub getWgetOptions
157{
158 my ($self) = @_;
159 my $strOptions = "";
160
[16791]161 if ($self->{'proxy_on'} && $self->{'proxy_host'} && $self->{'proxy_port'})
[14657]162 {
[14918]163
164 $strOptions .= " -e httpproxy=$self->{'proxy_host'}:$self->{'proxy_port'} ";
165
166 if ($self->{'user_name'} && $self->{'user_password'})
167 {
168 $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";
169 }
[14657]170 }
171
[16791]172 if ($self->{'proxy_on'}) {
173 $strOptions .= " --proxy ";
174 }
[14918]175
[14657]176 return $strOptions;
177}
178
179# Checking for proxy setup: proxy server, proxy port, proxy username and password.
180sub checkProxySetup
181{
182 my ($self) = @_;
183 ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
184 # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
[17529]185 # Test if the connection is successful. If the connection wasn't successful then ask user to supply username and password.
[14657]186
187}
188
189sub useWget
190{
[17529]191 #local $| = 1; # autoflush stdout buffer
192 #print STDOUT "*** Start of subroutine useWget in $0\n";
193
[14657]194 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
195
196 my ($strReadIn,$strLine,$command);
197 $strReadIn = "" unless defined $strReadIn;
198
199 my $current_dir = cwd();
200 my $changed_dir = 0;
201 if (defined $working_dir && -e $working_dir) {
202 chdir "$working_dir";
203 $changed_dir = 1;
204 }
205
[17529]206 # When we are running this script through GLI, the SIGTERM signal handler
207 # won't get called on Windows when wget is to be prematurely terminated.
208 # Instead, when wget has to be terminated in the middle of execution, GLI will
209 # connect to a serverSocket here to communicate when it's time to stop wget.
[17531]210 if(defined $self->{'gli'} && $self->{'gli'} && !defined $port) {
[14657]211
[17529]212 $port = <STDIN>; # gets a port on localhost that's not yet in use
213 chomp($port);
214
215 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
216 LocalPort => $port,
217 Listen => 1,
218 Reuse => 1);
219
220 die "can't setup server" unless $serverSocket;
221 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
222
223 $read_set = new IO::Select(); # create handle set for reading
224 $read_set->add($serverSocket); # add the main socket to the set
225 }
226
227 my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
228 $command = "\"$wget_file_path\" $cmdWget 2>&1";
229 # print STDERR "Command is: $command\n";
[17354]230 $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
231
[17529]232 my $loop = 1;
233 while($loop)
[14657]234 {
[17529]235 if (defined(my $strLine=<$chld_out>)) { # we're reading in from child process' stdout
236 if($blnShow) {
237 print STDERR "$strLine\n";
238 }
239 $strReadIn .= $strLine;
240 }
241 else { # wget finished, terminate naturally
[17531]242 #print STDERR "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n";
[17529]243 close($chld_in);
244 close($chld_out);
245 waitpid $childpid, 0;
246 $loop = 0;
247
248 $childpid = undef;
249 if(defined $port) {
250 $read_set->remove($serverSocket);
251 close($serverSocket);
252 }
[14657]253 }
254
[17529]255 # if we run this script from the command-line (as opposed to from GLI),
256 # then we're not working with sockets and can therefore can skip the next bits
257 next unless(defined $port);
258
259 # http://www.perlfect.com/articles/select.shtml
260 # "multiplex between several filehandles within a single thread of control,
261 # thus creating the effect of parallelism in the handling of I/O."
262 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
263
264 # take all readable handles in turn
265 foreach my $rh (@rh_set) {
266 if($rh == $serverSocket) {
267 my $client = $rh->accept();
268 #$client->autoflush(1); # autoflush output buffer - don't put this back in: output split irregularly over lines
269 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
270
271 # Read from the client (getting rid of trailing newline)
272 # Has the client sent the <<STOP>> signal?
273 my $signal = <$client>;
274 chomp($signal);
275 if($signal eq "<<STOP>>") {
276 print $client "Perl received STOP signal (on port $port): stopping wget\n";
277
278 $loop = 0;
279 close($chld_in);
280 close($chld_out);
281 kill("TERM", $childpid);
282
283 $childpid = undef;
284
285 # Stop monitoring the read_handle
286 # close the serverSocket (the Java end will close the client socket that Java opened)
287 $read_set->remove($rh); #$read_set->remove($serverSocket);
288 close($rh); #close($serverSocket);
289 #print $client "Perl is about to exit\n";
290 last;
291 }
292 }
293 }
[14657]294 }
295
296 if ($changed_dir) {
297 chdir $current_dir;
298 }
299
300 return $strReadIn;
301}
302
[16791]303
304sub useWgetMonitored
305{
[17537]306 #local $| = 1; # autoflush stdout buffer
307 #print STDOUT "*** Start of subroutine useWgetMonitored in $0\n";
308
[16791]309 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
310
311
312 my $current_dir = cwd();
313 my $changed_dir = 0;
314 if (defined $working_dir && -e $working_dir) {
315 chdir "$working_dir";
316 $changed_dir = 1;
317 }
[17537]318
319 # When we are running this script through GLI, the SIGTERM signal handler
320 # won't get called on Windows when wget is to be prematurely terminated.
321 # Instead, when wget has to be terminated in the middle of execution, GLI will
322 # connect to a serverSocket here to communicate when it's time to stop wget.
323 if(defined $self->{'gli'} && $self->{'gli'} && !defined $port) {
324
325 $port = <STDIN>; # gets a port on localhost that's not yet in use
326 chomp($port);
327
328 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
329 LocalPort => $port,
330 Listen => 1,
331 Reuse => 1);
332
333 die "can't setup server" unless $serverSocket;
334 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
335
336 $read_set = new IO::Select(); # create handle set for reading
337 $read_set->add($serverSocket); # add the main socket to the set
338 }
339
[16791]340 my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
[17354]341 #my $command = "\"$wget_file_path\" $cmdWget 2>&1 |"; #open(*WIN,$command) || die "wget request failed: $!\n";
[17537]342 my $command = "\"$wget_file_path\" $cmdWget 2>&1";
343 # print STDERR "Command is: $command\n";
[17354]344 $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
[16791]345
346 my $full_text = "";
347 my $error_text = "";
348 my @follow_list = ();
349 my $line;
350
[17537]351 my $loop = 1;
352 while($loop)
[16791]353 {
[17537]354 if (defined($line=<$chld_out>)) { # we're reading in from child process' stdout
355 if((defined $blnShow) && $blnShow)
356 {
357 print STDERR "$line";
358 }
359
360 if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
361 my $follow_url = $1;
362 push(@follow_list,$follow_url);
363 }
364
365 if ($line =~ m/ERROR\s+\d+/) {
366 $error_text .= $line;
367 }
368
369 $full_text .= $line;
370 }
371 else { # wget finished, terminate naturally
372 #print STDERR "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n";
373 close($chld_in);
374 close($chld_out);
375 # Program terminates only when the following line is included
376 # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
377 # it prevents the child from turning into a "zombie process".
378 # While the wget process terminates without it, this perl script does not:
379 # the DOS prompt is not returned without it.
380 waitpid $childpid, 0;
381 $loop = 0;
382
383 $childpid = undef;
384 if(defined $port) {
385 $read_set->remove($serverSocket);
386 close($serverSocket);
387 }
[16791]388 }
389
[17537]390 # if we run this script from the command-line (as opposed to from GLI),
391 # then we're not working with sockets and can therefore can skip the next bits
392 next unless(defined $port);
[16791]393
[17537]394 # http://www.perlfect.com/articles/select.shtml
395 # "multiplex between several filehandles within a single thread of control,
396 # thus creating the effect of parallelism in the handling of I/O."
397 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
398
399 # take all readable handles in turn
400 foreach my $rh (@rh_set) {
401 if($rh == $serverSocket) {
402 my $client = $rh->accept();
403 #$client->autoflush(1); # autoflush output buffer - don't put this back in: splits output irregularly over multilines
404 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
405
406 # Read from the client (getting rid of trailing newline)
407 # Has the client sent the <<STOP>> signal?
408 my $signal = <$client>;
409 chomp($signal);
410 if($signal eq "<<STOP>>") {
411 print $client "Perl received STOP signal (on port $port): stopping wget\n";
412
413 $loop = 0;
414 close($chld_in);
415 close($chld_out);
416 kill("TERM", $childpid);
417
418 $childpid = undef;
419
420 # Stop monitoring the read_handle
421 # close the serverSocket (the Java end will close the client socket that Java opened)
422 $read_set->remove($rh); #$read_set->remove($serverSocket);
423 close($rh); #close($serverSocket);
424 #print $client "Perl is about to exit\n";
425 last;
426 }
427 }
[16791]428 }
429 }
430
431 my $command_status = $?;
432 if ($command_status != 0) {
433 $error_text .= "Exit error: $command_status";
434 }
435
436 if ($changed_dir) {
437 chdir $current_dir;
438 }
439
440 my $final_follow = pop(@follow_list); # might be undefined, but that's OK
441
442 return ($full_text,$error_text,$final_follow);
443}
444
445
[14657]446# TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
447sub checkURL
448{
449 my ($self) = @_;
450 if ($self->{'url'} eq "")
451 {
452 &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
453 }
454}
455
456sub error
457{
458 my ($strFunctionName,$strError) = @_;
459 {
460 print "Error occoured in WgetDownload.pm\n".
461 "In Function:".$strFunctionName."\n".
462 "Error Message:".$strError."\n";
463 exit(-1);
464 }
465}
466
4671;
Note: See TracBrowser for help on using the repository browser.