source: main/trunk/greenstone2/perllib/downloaders/WgetDownload.pm@ 31920

Last change on this file since 31920 was 31920, checked in by ak19, 7 years ago

Untested on Windows as yet. 1. Major overhaul to WgetDownload's useWget() and useWgetMonitored() subroutines. Their use of open3 was wrong and would cause blocking if proxy set wrong or if https_proxy not set/set wrong and the url entered was http but resolves to https. The problem was more fundamental than the symptoms indicated the open3() calls were used wrong and resulted in blocking. The blocking could be indefinite. To generally avoid blocking, needed to use IO::select() to loop to check any child streams that are ready. To avoid possibly indefinite blocking, needed to use IO::select() with a timeout on the can_read() method. The need for all these and their use is indicated in the links added to the committed version of this module. 2. After the use of select() worked in principle, there was still the large problem that terminating unnaturally did not stop a second wget that had been launched. This unexpectedly had to do with doublequotes around wget's path that attempted to preserve any spaces in the path, but which behaved differently with open3(): any double quotes launched a subshell to run the command passed to open3(). And the wget cmd launched by the subshell cmd wasn't actually a child process, so it could not be terminated via the parentpid used as a processgrouppid when doing the kill TERM -processgroupid. The solution lay with the unexpected cause of the problem, which was the double quotes. Now the command passed to open3() is an array of parameters and no double quotes. The array is meant to preserve spaces in any filepaths. 3. Removed the 2 tries parameter passed to wget, since we now loop a certain number of times trying to read from the child process' streams each time this times out. If it times out n times, then we give up and assume that the URL could not be read.

  • Property svn:keywords set to Author Date Id Revision
File size: 29.1 KB
Line 
1###########################################################################
2#
3# WgetDownload.pm -- Download base module that handles calling Wget
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package WgetDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use BaseDownload;
35use strict;
36use Cwd;
37use util;
38use IPC::Open3;
39use IO::Select;
40use IO::Socket;
41
42
43sub BEGIN {
44 @WgetDownload::ISA = ('BaseDownload');
45}
46
47my $arguments =
48 [ { 'name' => "proxy_on",
49 'desc' => "{WgetDownload.proxy_on}",
50 'type' => "flag",
51 'reqd' => "no",
52 'hiddengli' => "yes"},
53 { 'name' => "http_proxy_host",
54 'desc' => "{WgetDownload.http_proxy_host}",
55 'type' => "string",
56 'reqd' => "no",
57 'hiddengli' => "yes"},
58 { 'name' => "http_proxy_port",
59 'desc' => "{WgetDownload.http_proxy_port}",
60 'type' => "string",
61 'reqd' => "no",
62 'hiddengli' => "yes"},
63 { 'name' => "https_proxy_host",
64 'desc' => "{WgetDownload.https_proxy_host}",
65 'type' => "string",
66 'reqd' => "no",
67 'hiddengli' => "yes"},
68 { 'name' => "https_proxy_port",
69 'desc' => "{WgetDownload.https_proxy_port}",
70 'type' => "string",
71 'reqd' => "no",
72 'hiddengli' => "yes"},
73 { 'name' => "ftp_proxy_host",
74 'desc' => "{WgetDownload.ftp_proxy_host}",
75 'type' => "string",
76 'reqd' => "no",
77 'hiddengli' => "yes"},
78 { 'name' => "ftp_proxy_port",
79 'desc' => "{WgetDownload.ftp_proxy_port}",
80 'type' => "string",
81 'reqd' => "no",
82 'hiddengli' => "yes"},
83 { 'name' => "user_name",
84 'desc' => "{WgetDownload.user_name}",
85 'type' => "string",
86 'reqd' => "no",
87 'hiddengli' => "yes"},
88 { 'name' => "user_password",
89 'desc' => "{WgetDownload.user_password}",
90 'type' => "string",
91 'reqd' => "no",
92 'hiddengli' => "yes"},
93 { 'name' => "no_check_certificate",
94 'desc' => "{WgetDownload.no_check_certificate}",
95 'type' => "flag",
96 'reqd' => "no",
97 'hiddengli' => "yes"}
98 ];
99
100my $options = { 'name' => "WgetDownload",
101 'desc' => "{WgetDownload.desc}",
102 'abstract' => "yes",
103 'inherits' => "yes",
104 'args' => $arguments };
105
106
107# Declaring file global variables related to the wget child process so that
108# the termination signal handler for SIGTERM can close the streams and tidy
109# up before ending the child process.
110my $childpid;
111my ($chld_out, $chld_in);
112my ($serverSocket, $read_set);
113
114my $TIMEOUT = 1; # seconds
115my $NUM_TRIES = 10;
116
117# The port this script's server socket will be listening on, to handle
118# incoming signals from GLI to terminate wget. This is also file global,
119# since OAIDownload.pm will make several calls on wget using the same
120# instance of this script and we want to reuse whatever port GLI gave us.
121my $port;
122
123# When this script is called from the command line, this handler will be called
124# if this process is killed or abruptly ends due to receiving one of the
125# terminating signals that this handler is registered to deal with.
126sub abrupt_end_handler {
127 my $termination_signal = shift (@_);
128
129 if(defined $childpid) {
130 close($chld_out);
131 close($chld_in);
132
133 print STDOUT "Received termination signal: $termination_signal\n";
134
135 # Send TERM signal to child process to terminate it. Sending the INT signal doesn't work
136 # See http://perldoc.perl.org/perlipc.html#Signals
137 # Warning on using kill at http://perldoc.perl.org/perlfork.html
138 kill("TERM", $childpid); # prefix - to signal to kill process group
139
140 # If the SIGTERM sent on Linux calls this handler, we want to make
141 # sure any socket connection is closed.
142 # Otherwise sockets are only used when this script is run from GLI
143 # in which case the handlers don't really get called.
144 if(defined $serverSocket) {
145 $read_set->remove($serverSocket) if defined $read_set;
146 close($serverSocket);
147 }
148 }
149
150 exit(0);
151}
152
153# Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
154# the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
155# signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
156# Note that SIGKILL can't be handled as the handler won't get called for it. More information:
157# http://affy.blogspot.com/p5be/ch13.htm
158# http://perldoc.perl.org/perlipc.html#Signals
159$SIG{'INT'} = \&abrupt_end_handler;
160$SIG{'TERM'} = \&abrupt_end_handler;
161
162sub new {
163 my ($class) = shift (@_);
164 my ($getlist,$inputargs,$hashArgOptLists) = @_;
165 push(@$getlist, $class);
166
167 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
168 push(@{$hashArgOptLists->{"OptList"}},$options);
169
170 my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
171
172 # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lions (android too?)
173 &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set
174
175 return bless $self, $class;
176}
177
178sub checkWgetSetup
179{
180 my ($self,$blnGliCall) = @_;
181 #TODO: proxy detection??
182
183 if((!$blnGliCall) && $self->{'proxy_on'})
184 {
185 &checkProxySetup($self);
186 }
187 &checkURL($self);
188}
189
190# Not using this. On Windows, we used to pass proxying settings as flags to wget. But, as that can be
191# seen with Task Manager, we now have the proxy settings set in the environment and are no longer passing it
192sub addProxySettingsAsWgetFlags
193{
194 my ($self) = @_;
195 my $strOptions = "";
196
197 if($self->{'http_proxy_host'} && $self->{'http_proxy_port'}) {
198 $strOptions .= " -e http_proxy=$self->{'http_proxy_host'}:$self->{'http_proxy_port'} ";
199 }
200 if($self->{'https_proxy_host'} && $self->{'https_proxy_port'}) {
201 $strOptions .= " -e https_proxy=$self->{'https_proxy_host'}:$self->{'https_proxy_port'} ";
202 }
203 if($self->{'ftp_proxy_host'} && $self->{'ftp_proxy_port'}) {
204 $strOptions .= " -e ftp_proxy=$self->{'ftp_proxy_host'}:$self->{'ftp_proxy_port'} ";
205 }
206
207 # For wget, there is only one set pair of proxy-user and proxy-passwd, so wget seems to assume
208 # that all 3 proxy protocols (http|https|ftp) will use the same username and pwd combination?
209 # Note that this only matters when passing the proxying details as flags to wget, not when
210 # the proxies are setup as environment variables.
211 if ($self->{'user_name'} && $self->{'user_password'})
212 {
213 $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";
214 # how is "--proxy-passwd" instead of "--proxy-password" even working????
215 # see https://www.gnu.org/software/wget/manual/html_node/Proxies.html
216 # and https://www.gnu.org/software/wget/manual/wget.html
217 # Not touching this, in case the manual is simply wrong. Since our code works in
218 # practice (when we were still using wget proxy username/pwd flags for windows).
219 }
220
221 return $strOptions;
222}
223
224sub getWgetOptions
225{
226 my ($self) = @_;
227 my $strOptions = "";
228
229 # If proxy settings are set up in the environment, wget is ready to use them. More secure.
230 # But if proxy settings are not set up in the environment, pass them as flags to wget
231 # This is less secure, as pwd etc visible in task manager, but it was the original way in
232 # which wget was run on windows.
233 # Truth in Perl: https://home.ubalt.edu/abento/452/perl/perltruth.html
234 # http://www.perlmonks.org/?node=what%20is%20true%20and%20false%20in%20Perl%3F
235
236 if ($self->{'proxy_on'}) {
237 if(!$ENV{'http_proxy'} && !$ENV{'https_proxy'} && !$ENV{'ftp_proxy'}) {
238 $strOptions .= $self->addProxySettingsAsWgetFlags();
239 } # else wget will use proxy settings in environment, assume enough settings have been provided
240 # either way, we're using the proxy
241 $strOptions .= " --proxy ";
242 }
243
244 if($self->{'no_check_certificate'}) { # URL may be http that gets redirected to https, so if no_check_certificate is on, turn it on even if URL is http
245
246 $strOptions .= " --no-check-certificate ";
247 }
248
249 return $strOptions;
250}
251
252# Checking for proxy setup: proxy server, proxy port, proxy username and password.
253sub checkProxySetup
254{
255 my ($self) = @_;
256 ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
257 # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
258 # Test if the connection is successful. If the connection wasn't successful then ask user to supply username and password.
259
260}
261
262# Returns true if the wget status needs to be monitored through sockets
263# (if a socket is used to communicate with the Java program on when to
264# terminate wget). True if we are running gli, or if the particular type
265# of WgetDownload is *not* OAIDownload (in that case, the original way of
266# terminating the perl script from Java would terminate wget as well).
267sub dealingWithSockets() {
268 my ($self) = @_;
269 return (defined $self->{'gli'} && $self->{'gli'} && !defined $port && ref($self) ne "OAIDownload");
270 # use ref($self) to find the classname of an object
271}
272
273# Shouldn't use double quotes around wget path after all? See final comment at
274# http://www.perlmonks.org/?node_id=394709
275# http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63
276sub useWget
277{
278 #local $| = 1; # autoflush stdout buffer
279 #print STDOUT "*** Start of subroutine useWget in $0\n";
280
281 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
282
283 my ($strReadIn,$strLine,$command);
284 $strReadIn = "" unless defined $strReadIn;
285
286 my $current_dir = cwd();
287 my $changed_dir = 0;
288 if (defined $working_dir && -e $working_dir) {
289 chdir "$working_dir";
290 $changed_dir = 1;
291 }
292
293 # When we are running this script through GLI, the SIGTERM signal handler
294 # won't get called on Windows when wget is to be prematurely terminated.
295 # Instead, when wget has to be terminated in the middle of execution, GLI will
296 # connect to a serverSocket here to communicate when it's time to stop wget.
297 if($self->dealingWithSockets()) {
298
299 $port = <STDIN>; # gets a port on localhost that's not yet in use
300 chomp($port);
301
302 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
303 LocalPort => $port,
304 Listen => 1,
305 Reuse => 1);
306
307 die "can't setup server" unless $serverSocket;
308 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
309
310 $read_set = new IO::Select(); # create handle set for reading
311 $read_set->add($serverSocket); # add the main socket to the set
312 }
313
314 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
315 # compose the command as an array rather than as a string, to preserve spaces in the filepath
316 # because single/double quotes using open3 seem to launch a shell, see final comment at
317 # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget
318 # as 2 processes then got launched which don't have parent-child pid relationship.
319 my @commandargs = split(' ', $cmdWget);
320 unshift(@commandargs, $wget_file_path);
321 $command = "$wget_file_path $cmdWget";
322# print STDOUT "Command is: $command\n"; # displayed in GLI output
323# print STDERR "Command is: $command\n"; # goes into ServerInfoDialog
324
325 # Wget's output needs to be monitored to find out when it has naturally terminated.
326 # Wget's output is sent to its STDERR so we can't use open2 without doing 2>&1.
327 # On linux, 2>&1 launches a subshell which then launches wget, meaning that killing
328 # the childpid does not kill wget on Linux but the subshell that launched it instead.
329 # Therefore, we use open3. Though the child process wget sends output only to its stdout [is this meant to be "stderr"?],
330 # using open3 says chld_err is undefined and the output of wget only comes in chld_out(!)
331 # However that may be, it works with open3. But to avoid the confusion of managing and
332 # closing an extra unused handle, a single handle is used instead for both the child's
333 # stderr and stdout.
334 # See http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html
335 # for why this is the right thing to do.
336
337 # Both open2 and open3 don't return on failure, but raise an exception. The handling
338 # of the exception is described on p.568 of the Perl Cookbook
339 eval {
340 #$childpid = open3($chld_in, $chld_out, $chld_out, $command);
341 $childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
342 };
343 if ($@) {
344 if($@ =~ m/^open3/) {
345 die "open3 failed in $0: $!\n$@\n";
346 }
347 die "Tried to launch open3 in $0, got unexpected exception: $@";
348 }
349
350 # Switching to use IO::Select, which allows timeouts, instead of doing the potentially blocking
351 # if defined(my $strLine=<$chld_out>)
352 # Google: perl open3 read timeout
353 # Google: perl open3 select() example
354 # https://stackoverflow.com/questions/10029406/why-does-ipcopen3-get-deadlocked
355 # https://codereview.stackexchange.com/questions/84496/the-right-way-to-use-ipcopen3-in-perl
356 # https://gist.github.com/shalk/6988937
357 # https://stackoverflow.com/questions/18373500/how-to-check-if-command-executed-with-ipcopen3-is-hung
358 # http://perldoc.perl.org/IO/Select.html
359 # http://perldoc.perl.org/IPC/Open3.html - explains the need for select()/IO::Select with open3
360 # http://www.perlmonks.org/?node_id=951554
361 # http://search.cpan.org/~dmuey/IPC-Open3-Utils-0.91/lib/IPC/Open3/Utils.pm
362 # https://stackoverflow.com/questions/3000907/wget-not-behaving-via-ipcopen3-vs-bash?rq=1
363
364 # create the select object and add our streamhandle(s)
365 my $sel = new IO::Select;
366 $sel->add($chld_out);
367
368 my $num_consecutive_timedouts = 0;
369 my $error = 0;
370 my $loop = 1;
371
372 while($loop)
373 {
374 # assume we're going to timeout trying to read from child process
375 $num_consecutive_timedouts++;
376
377
378 # block until data is available on the registered filehandles or until the timeout specified
379 if(my @readyhandles = $sel->can_read($TIMEOUT)) {
380
381 $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
382 # since we're in this if statement
383
384 # now there's a list of registered filehandles we can read from to loop through reading from.
385 # though we've registered only one, chld_out
386 foreach my $fh (@readyhandles) {
387 my $strLine;
388 #sleep 3;
389
390 # read up to 4096 bytes from this filehandle fh.
391 # if there is less than 4096 bytes, we'll only get
392 # those available bytes and won't block. If there
393 # is more than 4096 bytes, we'll only read 4096 and
394 # wait for the next iteration through the loop to
395 # read the rest.
396 my $len = sysread($fh, $strLine, 4096);
397
398 if($len) { # read something
399 if($blnShow) {
400 print STDERR "$strLine\n";
401 }
402 $strReadIn .= $strLine;
403 }
404 else { # error or EOF: (!defined $len || $len == 0)
405
406 if(!defined $len) { # error reading
407 print STDERR "WgetDownload: Error reading from child stream: $!\n";
408 # SHOULD THIS 'die "errmsg";' instead? - no, sockets may need closing
409 $error = 1;
410 }
411 elsif ($len == 0) { # EOF
412 # Finished reading from this filehand $fh because we read 0 bytes.
413 # wget finished, terminate naturally
414 print STDERR "WgetDownload: wget finished\n";
415 #print STDOUT "\nPerl: open3 command, input streams closed. Wget terminated naturally.\n";
416 }
417
418 $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
419
420 # last; # if we have more than one filehandle registered with IO::Select
421
422 $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error
423
424 } # end else error or EOF
425
426 } # end foreach on readyhandles
427 } # end if on can_read
428
429 if($num_consecutive_timedouts >= $NUM_TRIES) {
430 $error = 1;
431 $loop = 0; # to break out of outer while loop
432
433 $num_consecutive_timedouts = 0;
434
435 print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n";
436 print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n";
437 #print STDERR "\tConsider cancelling this download?\n";
438 }
439
440 if($loop == 0) { # error or EOF, either way, clean up
441 if($error) {
442 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
443
444 if(kill(0, $childpid)) {
445 # If kill(0, $childpid) returns true, then the process is running
446 # and we need to kill it.
447 close($chld_in);
448 close($chld_out);
449 kill('TERM', $childpid); # kill the process group by prefixing - to signal
450
451 # https://coderwall.com/p/q-ovnw/killing-all-child-processes-in-a-shell-script
452 # https://stackoverflow.com/questions/392022/best-way-to-kill-all-child-processes
453 print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n";
454
455 #die "Perl terminated wget after timing out repeatedly and is about to exit\n";
456 }
457 }
458 else { # wget finished (no errors), terminate naturally
459 #print STDOUT "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n";
460 close($chld_in);
461 close($chld_out);
462 waitpid $childpid, 0;
463 }
464
465 # error or not
466 $childpid = undef;
467 # Stop monitoring the read_handle and close the serverSocket
468 # (the Java end will close the client socket that Java opened)
469 if(defined $port) {
470 $read_set->remove($serverSocket);
471 close($serverSocket);
472 }
473 }
474
475 # If we've already terminated, we can get out of the loop
476 #next if($loop == 0);
477
478 # if we run this script from the command-line (as opposed to from GLI),
479 # then we're not working with sockets and can therefore skip the next bits
480 next unless(defined $port);
481
482 # http://www.perlfect.com/articles/select.shtml
483 # "multiplex between several filehandles within a single thread of control,
484 # thus creating the effect of parallelism in the handling of I/O."
485 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
486
487 # take all readable handles in turn
488 foreach my $rh (@rh_set) {
489 if($rh == $serverSocket) {
490 my $client = $rh->accept();
491 #$client->autoflush(1); # autoflush output buffer - don't put this back in: output split irregularly over lines
492 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
493
494 # Read from the client (getting rid of the trailing newline)
495 # Has the client sent the <<STOP>> signal?
496 my $signal = <$client>;
497 chomp($signal);
498 if($signal eq "<<STOP>>") {
499 print $client "Perl received STOP signal (on port $port): stopping wget\n";
500 $loop = 0; # out of outer while loop
501 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
502
503 # Sometimes the wget process takes some time to start up. If the STOP signal
504 # was sent, don't try to terminate the process until we know it is running.
505 # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
506 # for it to start up, checking for whether it is running in order to kill it.
507 for(my $seconds = 1; $seconds <= 5; $seconds++) {
508 if(kill(0, $childpid)) {
509 # If kill(0, $childpid) returns true, then the process is running
510 # and we need to kill it.
511 close($chld_in);
512 close($chld_out);
513 kill("TERM", $childpid); # prefix - to signal to kill process group
514
515 $childpid = undef;
516
517 # Stop monitoring the read_handle and close the serverSocket
518 # (the Java end will close the client socket that Java opened)
519 $read_set->remove($rh); #$read_set->remove($serverSocket);
520 close($rh); #close($serverSocket);
521 print $client "Perl terminated wget and is about to exit\n";
522 last; # out of inner for loop
523 }
524 else { # the process may just be starting up, wait
525 sleep(1);
526 }
527 }
528 last; # out of foreach loop
529 }
530 }
531 }
532 }
533
534 if ($changed_dir) {
535 chdir $current_dir;
536 }
537
538 return $strReadIn;
539}
540
541
542sub useWgetMonitored
543{
544 #local $| = 1; # autoflush stdout buffer
545 #print STDOUT "*** Start of subroutine useWgetMonitored in $0\n";
546
547 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
548
549
550 my $current_dir = cwd();
551 my $changed_dir = 0;
552 if (defined $working_dir && -e $working_dir) {
553 chdir "$working_dir";
554 $changed_dir = 1;
555 }
556
557 # When we are running this script through GLI, the SIGTERM signal handler
558 # won't get called on Windows when wget is to be prematurely terminated.
559 # Instead, when wget has to be terminated in the middle of execution, GLI will
560 # connect to a serverSocket here to communicate when it's time to stop wget.
561 if($self->dealingWithSockets()) {
562
563 $port = <STDIN>; # gets a port on localhost that's not yet in use
564 chomp($port);
565
566 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
567 LocalPort => $port,
568 Listen => 1,
569 Reuse => 1);
570
571 die "can't setup server" unless $serverSocket;
572 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
573
574 $read_set = new IO::Select(); # create handle set for reading
575 $read_set->add($serverSocket); # add the main socket to the set
576 }
577
578 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
579 # compose the command as an array for open3, to preserve spaces in any filepath
580 my @commandargs = split(' ', $cmdWget);
581 unshift(@commandargs, $wget_file_path);
582 my $command = "$wget_file_path $cmdWget";
583 #print STDOUT "Command is: $command\n";
584
585 eval { # see p.568 of Perl Cookbook
586 $childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
587 };
588 if ($@) {
589 if($@ =~ m/^open3/) {
590 die "open3 failed in $0: $!\n$@\n";
591 }
592 die "Tried to launch open3 in $0, got unexpected exception: $@";
593 }
594
595 my $full_text = "";
596 my $error_text = "";
597 my @follow_list = ();
598 my $line;
599
600 # create the select object and add our streamhandle(s)
601 my $sel = new IO::Select;
602 $sel->add($chld_out);
603
604 my $num_consecutive_timedouts = 0;
605 my $error = 0;
606 my $loop = 1;
607 while($loop)
608 {
609 # assume we're going to timeout trying to read from child process
610 $num_consecutive_timedouts++;
611
612 # block until data is available on the registered filehandles or until the timeout specified
613 if(my @readyhandles = $sel->can_read($TIMEOUT)) {
614 $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
615 # since we're in this if statement
616
617 foreach my $fh (@readyhandles) {
618 my $len = sysread($fh, $line, 4096); # read up to 4k from current ready filehandle
619 if($len) { # read something
620
621
622 if((defined $blnShow) && $blnShow)
623 {
624 print STDERR "$line";
625 }
626
627 if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
628 my $follow_url = $1;
629 push(@follow_list,$follow_url);
630 }
631
632 if ($line =~ m/ERROR\s+\d+/) {
633 $error_text .= $line;
634 }
635
636 $full_text .= $line;
637 } else { # error or EOF
638 if(!defined $len) { # error reading
639 #print STDERR "WgetDownload: Error reading from child stream: $!\n";
640 $error = 1;
641 }
642 elsif ($len == 0) { # EOF, finished with this filehandle because 0 bytes read
643 #print STDERR "WgetDownload: wget finished\n"; # wget terminated naturally
644 }
645
646 $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
647
648 # last; # if we have more than one filehandle registered with IO::Select
649
650 $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error
651 } # end else error or EOF
652
653 } # end foreach on readyhandles
654 } # end if on can_read
655
656 if($num_consecutive_timedouts >= $NUM_TRIES) {
657 $error = 1;
658 $loop = 0; # to break out of outer while loop
659
660 $num_consecutive_timedouts = 0;
661
662 #print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n";
663 #print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n";
664 }
665
666 if($loop == 0) { # error or EOF, either way, clean up
667
668 if($error) {
669 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
670
671 if(kill(0, $childpid)) {
672 # If kill(0, $childpid) returns true, then the process is running
673 # and we need to kill it.
674 close($chld_in);
675 close($chld_out);
676 kill("TERM", $childpid); # prefix - to signal to kill process group
677
678 #die "Perl terminated wget after timing out repeatedly and is about to exit\n";
679 }
680 }
681 else { # wget finished, terminate naturally
682 close($chld_in);
683 close($chld_out);
684 # Program terminates only when the following line is included
685 # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
686 # it prevents the child from turning into a "zombie process".
687 # While the wget process terminates without it, this perl script does not:
688 # the DOS prompt is not returned without it.
689 waitpid $childpid, 0;
690 }
691
692 # error or not:
693 $childpid = undef;
694 if(defined $port) {
695 $read_set->remove($serverSocket);
696 close($serverSocket);
697 }
698 }
699
700 # if we run this script from the command-line (as opposed to from GLI),
701 # then we're not working with sockets and can therefore skip the next bits
702 next unless(defined $port);
703
704 # http://www.perlfect.com/articles/select.shtml
705 # "multiplex between several filehandles within a single thread of control,
706 # thus creating the effect of parallelism in the handling of I/O."
707 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
708
709 # take all readable handles in turn
710 foreach my $rh (@rh_set) {
711 if($rh == $serverSocket) {
712 my $client = $rh->accept();
713 #$client->autoflush(1); # autoflush output buffer - don't put this back in: splits output irregularly over multilines
714 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
715
716 # Read from the client (getting rid of trailing newline)
717 # Has the client sent the <<STOP>> signal?
718 my $signal = <$client>;
719 chomp($signal);
720 if($signal eq "<<STOP>>") {
721 print $client "Perl received STOP signal (on port $port): stopping wget\n";
722 $loop = 0; # out of outer while loop
723 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
724
725 # Sometimes the wget process takes some time to start up. If the STOP signal
726 # was sent, don't try to terminate the process until we know it is running.
727 # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
728 # for it to start up, checking for whether it is running in order to kill it.
729 for(my $seconds = 1; $seconds <= 5; $seconds++) {
730 if(kill(0, $childpid)) {
731 # If kill(0, $childpid) returns true, then the process is running
732 # and we need to kill it.
733 close($chld_in);
734 close($chld_out);
735 kill("TERM", $childpid); # prefix - to signal to kill process group
736
737 $childpid = undef;
738
739 # Stop monitoring the read_handle and close the serverSocket
740 # (the Java end will close the client socket that Java opened)
741 $read_set->remove($rh); #$read_set->remove($serverSocket);
742 close($rh); #close($serverSocket);
743 print $client "Perl terminated wget and is about to exit\n";
744 last; # out of inner for loop
745 }
746 else { # the process may just be starting up, wait
747 sleep(1);
748 }
749 }
750 last; # out of foreach loop
751 }
752 }
753 }
754 }
755
756 my $command_status = $?;
757 if ($command_status != 0) {
758 $error_text .= "Exit error: $command_status";
759 }
760
761 if ($changed_dir) {
762 chdir $current_dir;
763 }
764
765 my $final_follow = pop(@follow_list); # might be undefined, but that's OK
766
767 return ($full_text,$error_text,$final_follow);
768}
769
770
771# TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
772sub checkURL
773{
774 my ($self) = @_;
775 if ($self->{'url'} eq "")
776 {
777 &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
778 }
779}
780
781sub error
782{
783 my ($strFunctionName,$strError) = @_;
784 {
785 print "Error occoured in WgetDownload.pm\n".
786 "In Function:".$strFunctionName."\n".
787 "Error Message:".$strError."\n";
788 exit(-1);
789 }
790}
791
7921;
793
Note: See TracBrowser for help on using the repository browser.