source: gsdl/trunk/perllib/downloaders/WgetDownload.pm@ 17785

Last change on this file since 17785 was 17785, checked in by ak19, 15 years ago

Bugfix 1 of 2. On Linux, when a currently running Download Job is closed using the Close button in the download pannel, suddenly the GLI app window seemed to close as well (without the program actually terminating in most cases, since the download thread is still running, waiting for downloads). This only occurred on Windows. Now this has been fixed in the perl code by exiting the for loop that waits to kill the wget child process--for some reason, forgetting to break out of the for loop did not pose a similar problem on Windows thus far.

  • Property svn:keywords set to Author Date Id Revision
File size: 16.9 KB
Line 
1###########################################################################
2#
3# WgetDownload.pm -- Download base module that handles calling Wget
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package WgetDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use BaseDownload;
35use strict;
36use Cwd;
37use IPC::Open2;
38use IO::Select;
39use IO::Socket;
40
41
42sub BEGIN {
43 @WgetDownload::ISA = ('BaseDownload');
44}
45
46my $arguments =
47 [ { 'name' => "proxy_on",
48 'desc' => "{WgetDownload.proxy_on}",
49 'type' => "flag",
50 'reqd' => "no",
51 'hiddengli' => "yes"},
52 { 'name' => "proxy_host",
53 'desc' => "{WgetDownload.proxy_host}",
54 'type' => "string",
55 'reqd' => "no",
56 'hiddengli' => "yes"},
57 { 'name' => "proxy_port",
58 'desc' => "{WgetDownload.proxy_port}",
59 'type' => "string",
60 'reqd' => "no",
61 'hiddengli' => "yes"},
62 { 'name' => "user_name",
63 'desc' => "{WgetDownload.user_name}",
64 'type' => "string",
65 'reqd' => "no",
66 'hiddengli' => "yes"},
67 { 'name' => "user_password",
68 'desc' => "{WgetDownload.user_password}",
69 'type' => "string",
70 'reqd' => "no",
71 'hiddengli' => "yes"}];
72
73my $options = { 'name' => "WgetDownload",
74 'desc' => "{WgetDownload.desc}",
75 'abstract' => "yes",
76 'inherits' => "yes",
77 'args' => $arguments };
78
79
80# Declaring file global variables related to the wget child process so that
81# the termination signal handler for SIGTERM can close the streams and tidy
82# up before ending the child process.
83my $childpid;
84my ($chld_out, $chld_in);
85my ($serverSocket, $read_set);
86
87# The port this script's server socket will be listening on, to handle
88# incoming signals from GLI to terminate wget. This is also file global,
89# since OAIDownload.pm will make several calls on wget using the same
90# instance of this script and we want to reuse whatever port GLI gave us.
91my $port;
92
93# When this script is called from the command line, this handler will be called
94# if this process is killed or abruptly ends due to receiving one of the
95# terminating signals that this handler is registered to deal with.
96sub abrupt_end_handler {
97 my $termination_signal = shift (@_);
98
99 if(defined $childpid) {
100 close($chld_out);
101 close($chld_in);
102
103 print STDOUT "Received termination signal: $termination_signal\n";
104
105 # Send TERM signal to child process to terminate it. Sending the INT signal doesn't work
106 # See http://perldoc.perl.org/perlipc.html#Signals
107 # Warning on using kill at http://perldoc.perl.org/perlfork.html
108 kill("TERM", $childpid);
109
110 # If the SIGTERM sent on Linux calls this handler, we want to make
111 # sure any socket connection is closed.
112 # Otherwise sockets are only used when this script is run from GLI
113 # in which case the handlers don't really get called.
114 if(defined $serverSocket) {
115 $read_set->remove($serverSocket) if defined $read_set;
116 close($serverSocket);
117 }
118 }
119
120 exit(0);
121}
122
123# Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
124# the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
125# signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
126# Note that SIGKILL can't be handled as the handler won't get called for it. More information:
127# http://affy.blogspot.com/p5be/ch13.htm
128# http://perldoc.perl.org/perlipc.html#Signals
129$SIG{'INT'} = \&abrupt_end_handler;
130$SIG{'TERM'} = \&abrupt_end_handler;
131
132sub new {
133 my ($class) = shift (@_);
134 my ($getlist,$inputargs,$hashArgOptLists) = @_;
135 push(@$getlist, $class);
136
137 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
138 push(@{$hashArgOptLists->{"OptList"}},$options);
139
140 my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
141
142 return bless $self, $class;
143}
144
145sub checkWgetSetup
146{
147 my ($self,$blnGliCall) = @_;
148 #TODO: proxy detection??
149
150 if((!$blnGliCall) && $self->{'proxy_on'})
151 {
152 &checkProxySetup($self);
153 }
154 &checkURL($self);
155}
156
157sub getWgetOptions
158{
159 my ($self) = @_;
160 my $strOptions = "";
161
162 if ($self->{'proxy_on'} && $self->{'proxy_host'} && $self->{'proxy_port'})
163 {
164
165 $strOptions .= " -e httpproxy=$self->{'proxy_host'}:$self->{'proxy_port'} ";
166
167 if ($self->{'user_name'} && $self->{'user_password'})
168 {
169 $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";
170 }
171 }
172
173 if ($self->{'proxy_on'}) {
174 $strOptions .= " --proxy ";
175 }
176
177 return $strOptions;
178}
179
180# Checking for proxy setup: proxy server, proxy port, proxy username and password.
181sub checkProxySetup
182{
183 my ($self) = @_;
184 ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
185 # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
186 # Test if the connection is successful. If the connection wasn't successful then ask user to supply username and password.
187
188}
189
190# Returns true if the wget status needs to be monitored through sockets
191# (if a socket is used to communicate with the Java program on when to
192# terminate wget). True if we are running gli, or if the particular type
193# of WgetDownload is *not* OAIDownload (in that case, the original way of
194# terminating the perl script from Java terminated wget as well).
195sub dealingWithSockets() {
196 my ($self) = @_;
197 return (defined $self->{'gli'} && $self->{'gli'} && !defined $port && ref($self) ne "OAIDownload");
198 # use ref($self) to find the classname of an object
199}
200
201sub useWget
202{
203 #local $| = 1; # autoflush stdout buffer
204 #print STDOUT "*** Start of subroutine useWget in $0\n";
205
206 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
207
208 my ($strReadIn,$strLine,$command);
209 $strReadIn = "" unless defined $strReadIn;
210
211 my $current_dir = cwd();
212 my $changed_dir = 0;
213 if (defined $working_dir && -e $working_dir) {
214 chdir "$working_dir";
215 $changed_dir = 1;
216 }
217
218 # When we are running this script through GLI, the SIGTERM signal handler
219 # won't get called on Windows when wget is to be prematurely terminated.
220 # Instead, when wget has to be terminated in the middle of execution, GLI will
221 # connect to a serverSocket here to communicate when it's time to stop wget.
222 if($self->dealingWithSockets()) {
223
224 $port = <STDIN>; # gets a port on localhost that's not yet in use
225 chomp($port);
226
227 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
228 LocalPort => $port,
229 Listen => 1,
230 Reuse => 1);
231
232 die "can't setup server" unless $serverSocket;
233 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
234
235 $read_set = new IO::Select(); # create handle set for reading
236 $read_set->add($serverSocket); # add the main socket to the set
237 }
238
239 my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
240 $command = "\"$wget_file_path\" $cmdWget 2>&1";
241 # print STDERR "Command is: $command\n";
242 $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
243
244 my $loop = 1;
245 while($loop)
246 {
247 if (defined(my $strLine=<$chld_out>)) { # we're reading in from child process' stdout
248 if($blnShow) {
249 print STDERR "$strLine\n";
250 }
251 $strReadIn .= $strLine;
252 }
253 else { # wget finished, terminate naturally
254 #print STDOUT "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n";
255 close($chld_in);
256 close($chld_out);
257 waitpid $childpid, 0;
258 $loop = 0;
259
260 $childpid = undef;
261 if(defined $port) {
262 $read_set->remove($serverSocket);
263 close($serverSocket);
264 }
265 }
266
267 # if we run this script from the command-line (as opposed to from GLI),
268 # then we're not working with sockets and can therefore can skip the next bits
269 next unless(defined $port);
270
271 # http://www.perlfect.com/articles/select.shtml
272 # "multiplex between several filehandles within a single thread of control,
273 # thus creating the effect of parallelism in the handling of I/O."
274 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
275
276 # take all readable handles in turn
277 foreach my $rh (@rh_set) {
278 if($rh == $serverSocket) {
279 my $client = $rh->accept();
280 #$client->autoflush(1); # autoflush output buffer - don't put this back in: output split irregularly over lines
281 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
282
283 # Read from the client (getting rid of the trailing newline)
284 # Has the client sent the <<STOP>> signal?
285 my $signal = <$client>;
286 chomp($signal);
287 if($signal eq "<<STOP>>") {
288 print $client "Perl received STOP signal (on port $port): stopping wget\n";
289 $loop = 0; # out of outer while loop
290 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
291
292 # Sometimes the wget process takes some time to start up. If the STOP signal
293 # was sent, don't try to terminate the process until we know it is running.
294 # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
295 # for it to start up, checking for whether it is running in order to kill it.
296 for(my $seconds = 1; $seconds <= 5; $seconds++) {
297 if(kill(0, $childpid)) {
298 # If kill(0, $childpid) returns true, then the process is running
299 # and we need to kill it.
300 close($chld_in);
301 close($chld_out);
302 kill("TERM", $childpid);
303
304 $childpid = undef;
305
306 # Stop monitoring the read_handle and close the serverSocket
307 # (the Java end will close the client socket that Java opened)
308 $read_set->remove($rh); #$read_set->remove($serverSocket);
309 close($rh); #close($serverSocket);
310 print $client "Perl terminated wget and is about to exit\n";
311 last; # out of inner for loop
312 }
313 else { # the process may just be starting up, wait
314 sleep(1);
315 }
316 }
317 last; # out of foreach loop
318 }
319 }
320 }
321 }
322
323 if ($changed_dir) {
324 chdir $current_dir;
325 }
326
327 return $strReadIn;
328}
329
330
331sub useWgetMonitored
332{
333 #local $| = 1; # autoflush stdout buffer
334 #print STDOUT "*** Start of subroutine useWgetMonitored in $0\n";
335
336 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
337
338
339 my $current_dir = cwd();
340 my $changed_dir = 0;
341 if (defined $working_dir && -e $working_dir) {
342 chdir "$working_dir";
343 $changed_dir = 1;
344 }
345
346 # When we are running this script through GLI, the SIGTERM signal handler
347 # won't get called on Windows when wget is to be prematurely terminated.
348 # Instead, when wget has to be terminated in the middle of execution, GLI will
349 # connect to a serverSocket here to communicate when it's time to stop wget.
350 if($self->dealingWithSockets()) {
351
352 $port = <STDIN>; # gets a port on localhost that's not yet in use
353 chomp($port);
354
355 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
356 LocalPort => $port,
357 Listen => 1,
358 Reuse => 1);
359
360 die "can't setup server" unless $serverSocket;
361 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
362
363 $read_set = new IO::Select(); # create handle set for reading
364 $read_set->add($serverSocket); # add the main socket to the set
365 }
366
367 my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
368 #my $command = "\"$wget_file_path\" $cmdWget 2>&1 |"; #open(*WIN,$command) || die "wget request failed: $!\n";
369 my $command = "\"$wget_file_path\" $cmdWget 2>&1";
370 # print STDERR "Command is: $command\n";
371 $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
372
373 my $full_text = "";
374 my $error_text = "";
375 my @follow_list = ();
376 my $line;
377
378 my $loop = 1;
379 while($loop)
380 {
381 if (defined($line=<$chld_out>)) { # we're reading in from child process' stdout
382 if((defined $blnShow) && $blnShow)
383 {
384 print STDERR "$line";
385 }
386
387 if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
388 my $follow_url = $1;
389 push(@follow_list,$follow_url);
390 }
391
392 if ($line =~ m/ERROR\s+\d+/) {
393 $error_text .= $line;
394 }
395
396 $full_text .= $line;
397 }
398 else { # wget finished, terminate naturally
399 #print STDERR "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n";
400 close($chld_in);
401 close($chld_out);
402 # Program terminates only when the following line is included
403 # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
404 # it prevents the child from turning into a "zombie process".
405 # While the wget process terminates without it, this perl script does not:
406 # the DOS prompt is not returned without it.
407 waitpid $childpid, 0;
408 $loop = 0;
409
410 $childpid = undef;
411 if(defined $port) {
412 $read_set->remove($serverSocket);
413 close($serverSocket);
414 }
415 }
416
417 # if we run this script from the command-line (as opposed to from GLI),
418 # then we're not working with sockets and can therefore can skip the next bits
419 next unless(defined $port);
420
421 # http://www.perlfect.com/articles/select.shtml
422 # "multiplex between several filehandles within a single thread of control,
423 # thus creating the effect of parallelism in the handling of I/O."
424 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
425
426 # take all readable handles in turn
427 foreach my $rh (@rh_set) {
428 if($rh == $serverSocket) {
429 my $client = $rh->accept();
430 #$client->autoflush(1); # autoflush output buffer - don't put this back in: splits output irregularly over multilines
431 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
432
433 # Read from the client (getting rid of trailing newline)
434 # Has the client sent the <<STOP>> signal?
435 my $signal = <$client>;
436 chomp($signal);
437 if($signal eq "<<STOP>>") {
438 print $client "Perl received STOP signal (on port $port): stopping wget\n";
439 $loop = 0; # out of outer while loop
440 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
441
442 # Sometimes the wget process takes some time to start up. If the STOP signal
443 # was sent, don't try to terminate the process until we know it is running.
444 # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
445 # for it to start up, checking for whether it is running in order to kill it.
446 for(my $seconds = 1; $seconds <= 5; $seconds++) {
447 if(kill(0, $childpid)) {
448 # If kill(0, $childpid) returns true, then the process is running
449 # and we need to kill it.
450 close($chld_in);
451 close($chld_out);
452 kill("TERM", $childpid);
453
454 $childpid = undef;
455
456 # Stop monitoring the read_handle and close the serverSocket
457 # (the Java end will close the client socket that Java opened)
458 $read_set->remove($rh); #$read_set->remove($serverSocket);
459 close($rh); #close($serverSocket);
460 print $client "Perl terminated wget and is about to exit\n";
461 last; # out of inner for loop
462 }
463 else { # the process may just be starting up, wait
464 sleep(1);
465 }
466 }
467 last; # out of foreach loop
468 }
469 }
470 }
471 }
472
473 my $command_status = $?;
474 if ($command_status != 0) {
475 $error_text .= "Exit error: $command_status";
476 }
477
478 if ($changed_dir) {
479 chdir $current_dir;
480 }
481
482 my $final_follow = pop(@follow_list); # might be undefined, but that's OK
483
484 return ($full_text,$error_text,$final_follow);
485}
486
487
488# TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
489sub checkURL
490{
491 my ($self) = @_;
492 if ($self->{'url'} eq "")
493 {
494 &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
495 }
496}
497
498sub error
499{
500 my ($strFunctionName,$strError) = @_;
501 {
502 print "Error occoured in WgetDownload.pm\n".
503 "In Function:".$strFunctionName."\n".
504 "Error Message:".$strError."\n";
505 exit(-1);
506 }
507}
508
5091;
Note: See TracBrowser for help on using the repository browser.