source: debug-testing/readWgetUsingAlarm/WgetDownload.pm

Last change on this file was 31925, checked in by ak19, 7 years ago

Part 2. Committing code. Since perl IO::Select only works on Windows with Sockets and not pipes to process streams, tried to use perl alarm() to emulate the can_read with timeout that IO::Select provides. Unfortunately, alarm() does not work out on windows when a system call blocks. At that point the alarm doesn't happen.

File size: 30.8 KB
Line 
1###########################################################################
2#
3# WgetDownload.pm -- Download base module that handles calling Wget
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package WgetDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use BaseDownload;
35use strict;
36use Cwd;
37use util;
38use IPC::Open3;
39use IO::Select;
40use IO::Socket;
41
42
43sub BEGIN {
44 @WgetDownload::ISA = ('BaseDownload');
45}
46
47my $arguments =
48 [ { 'name' => "proxy_on",
49 'desc' => "{WgetDownload.proxy_on}",
50 'type' => "flag",
51 'reqd' => "no",
52 'hiddengli' => "yes"},
53 { 'name' => "http_proxy_host",
54 'desc' => "{WgetDownload.http_proxy_host}",
55 'type' => "string",
56 'reqd' => "no",
57 'hiddengli' => "yes"},
58 { 'name' => "http_proxy_port",
59 'desc' => "{WgetDownload.http_proxy_port}",
60 'type' => "string",
61 'reqd' => "no",
62 'hiddengli' => "yes"},
63 { 'name' => "https_proxy_host",
64 'desc' => "{WgetDownload.https_proxy_host}",
65 'type' => "string",
66 'reqd' => "no",
67 'hiddengli' => "yes"},
68 { 'name' => "https_proxy_port",
69 'desc' => "{WgetDownload.https_proxy_port}",
70 'type' => "string",
71 'reqd' => "no",
72 'hiddengli' => "yes"},
73 { 'name' => "ftp_proxy_host",
74 'desc' => "{WgetDownload.ftp_proxy_host}",
75 'type' => "string",
76 'reqd' => "no",
77 'hiddengli' => "yes"},
78 { 'name' => "ftp_proxy_port",
79 'desc' => "{WgetDownload.ftp_proxy_port}",
80 'type' => "string",
81 'reqd' => "no",
82 'hiddengli' => "yes"},
83 { 'name' => "user_name",
84 'desc' => "{WgetDownload.user_name}",
85 'type' => "string",
86 'reqd' => "no",
87 'hiddengli' => "yes"},
88 { 'name' => "user_password",
89 'desc' => "{WgetDownload.user_password}",
90 'type' => "string",
91 'reqd' => "no",
92 'hiddengli' => "yes"},
93 { 'name' => "no_check_certificate",
94 'desc' => "{WgetDownload.no_check_certificate}",
95 'type' => "flag",
96 'reqd' => "no",
97 'hiddengli' => "yes"}
98 ];
99
100my $options = { 'name' => "WgetDownload",
101 'desc' => "{WgetDownload.desc}",
102 'abstract' => "yes",
103 'inherits' => "yes",
104 'args' => $arguments };
105
106
107# Declaring file global variables related to the wget child process so that
108# the termination signal handler for SIGTERM can close the streams and tidy
109# up before ending the child process.
110my $childpid;
111my ($chld_out, $chld_in);
112my ($serverSocket, $read_set);
113
114my $TIMEOUT = 1; # seconds
115my $NUM_TRIES = 10;
116
117# The port this script's server socket will be listening on, to handle
118# incoming signals from GLI to terminate wget. This is also file global,
119# since OAIDownload.pm will make several calls on wget using the same
120# instance of this script and we want to reuse whatever port GLI gave us.
121my $port;
122
123# When this script is called from the command line, this handler will be called
124# if this process is killed or abruptly ends due to receiving one of the
125# terminating signals that this handler is registered to deal with.
126sub abrupt_end_handler {
127 my $termination_signal = shift (@_);
128
129 if(defined $childpid) {
130 close($chld_out);
131 close($chld_in);
132
133 print STDOUT "Received termination signal: $termination_signal\n";
134
135 # Send TERM signal to child process to terminate it. Sending the INT signal doesn't work
136 # See http://perldoc.perl.org/perlipc.html#Signals
137 # Warning on using kill at http://perldoc.perl.org/perlfork.html
138 kill("TERM", $childpid); # prefix - to signal to kill process group
139
140 # If the SIGTERM sent on Linux calls this handler, we want to make
141 # sure any socket connection is closed.
142 # Otherwise sockets are only used when this script is run from GLI
143 # in which case the handlers don't really get called.
144 if(defined $serverSocket) {
145 $read_set->remove($serverSocket) if defined $read_set;
146 close($serverSocket);
147 }
148 }
149
150 exit(0);
151}
152
153# Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
154# the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
155# signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
156# Note that SIGKILL can't be handled as the handler won't get called for it. More information:
157# http://affy.blogspot.com/p5be/ch13.htm
158# http://perldoc.perl.org/perlipc.html#Signals
159$SIG{'INT'} = \&abrupt_end_handler;
160$SIG{'TERM'} = \&abrupt_end_handler;
161
162sub new {
163 my ($class) = shift (@_);
164 my ($getlist,$inputargs,$hashArgOptLists) = @_;
165 push(@$getlist, $class);
166
167 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
168 push(@{$hashArgOptLists->{"OptList"}},$options);
169
170 my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
171
172 # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lions (android too?)
173 &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set
174
175 return bless $self, $class;
176}
177
178sub checkWgetSetup
179{
180 my ($self,$blnGliCall) = @_;
181 #TODO: proxy detection??
182
183 if((!$blnGliCall) && $self->{'proxy_on'})
184 {
185 &checkProxySetup($self);
186 }
187 &checkURL($self);
188}
189
190# Not using this. On Windows, we used to pass proxying settings as flags to wget. But, as that can be
191# seen with Task Manager, we now have the proxy settings set in the environment and are no longer passing it
192sub addProxySettingsAsWgetFlags
193{
194 my ($self) = @_;
195 my $strOptions = "";
196
197 if($self->{'http_proxy_host'} && $self->{'http_proxy_port'}) {
198 $strOptions .= " -e http_proxy=$self->{'http_proxy_host'}:$self->{'http_proxy_port'} ";
199 }
200 if($self->{'https_proxy_host'} && $self->{'https_proxy_port'}) {
201 $strOptions .= " -e https_proxy=$self->{'https_proxy_host'}:$self->{'https_proxy_port'} ";
202 }
203 if($self->{'ftp_proxy_host'} && $self->{'ftp_proxy_port'}) {
204 $strOptions .= " -e ftp_proxy=$self->{'ftp_proxy_host'}:$self->{'ftp_proxy_port'} ";
205 }
206
207 # For wget, there is only one set pair of proxy-user and proxy-passwd, so wget seems to assume
208 # that all 3 proxy protocols (http|https|ftp) will use the same username and pwd combination?
209 # Note that this only matters when passing the proxying details as flags to wget, not when
210 # the proxies are setup as environment variables.
211 if ($self->{'user_name'} && $self->{'user_password'})
212 {
213 $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";
214 # how is "--proxy-passwd" instead of "--proxy-password" even working????
215 # see https://www.gnu.org/software/wget/manual/html_node/Proxies.html
216 # and https://www.gnu.org/software/wget/manual/wget.html
217 # Not touching this, in case the manual is simply wrong. Since our code works in
218 # practice (when we were still using wget proxy username/pwd flags for windows).
219 }
220
221 return $strOptions;
222}
223
224sub getWgetOptions
225{
226 my ($self) = @_;
227 my $strOptions = "";
228
229 # If proxy settings are set up in the environment, wget is ready to use them. More secure.
230 # But if proxy settings are not set up in the environment, pass them as flags to wget
231 # This is less secure, as pwd etc visible in task manager, but it was the original way in
232 # which wget was run on windows.
233 # Truth in Perl: https://home.ubalt.edu/abento/452/perl/perltruth.html
234 # http://www.perlmonks.org/?node=what%20is%20true%20and%20false%20in%20Perl%3F
235
236 if ($self->{'proxy_on'}) {
237 if(!$ENV{'http_proxy'} && !$ENV{'https_proxy'} && !$ENV{'ftp_proxy'}) {
238 $strOptions .= $self->addProxySettingsAsWgetFlags();
239 } # else wget will use proxy settings in environment, assume enough settings have been provided
240 # either way, we're using the proxy
241 $strOptions .= " --proxy ";
242 }
243
244 if($self->{'no_check_certificate'}) { # URL may be http that gets redirected to https, so if no_check_certificate is on, turn it on even if URL is http
245
246 $strOptions .= " --no-check-certificate ";
247 }
248
249 return $strOptions;
250}
251
252# Checking for proxy setup: proxy server, proxy port, proxy username and password.
253sub checkProxySetup
254{
255 my ($self) = @_;
256 ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
257 # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
258 # Test if the connection is successful. If the connection wasn't successful then ask user to supply username and password.
259
260}
261
262# Returns true if the wget status needs to be monitored through sockets
263# (if a socket is used to communicate with the Java program on when to
264# terminate wget). True if we are running gli, or if the particular type
265# of WgetDownload is *not* OAIDownload (in that case, the original way of
266# terminating the perl script from Java would terminate wget as well).
267sub dealingWithSockets() {
268 my ($self) = @_;
269 return (defined $self->{'gli'} && $self->{'gli'} && !defined $port && ref($self) ne "OAIDownload");
270 # use ref($self) to find the classname of an object
271}
272
273# Shouldn't use double quotes around wget path after all? See final comment at
274# http://www.perlmonks.org/?node_id=394709
275# http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63
276#
277# IO::Select only works on Windows if using sockets, not if using pipes to child process streams
278# http://www.perlmonks.org/?node_id=869704
279# https://stackoverflow.com/questions/22339761/select-alternative-for-windows-for-both-console-input-and-sockets
280# https://stackoverflow.com/questions/36622189/clarification-regarding-ioselect
281#
282# Attempting to emulate IO::Select's can_read() timeout with alarm():
283# https://stackoverflow.com/questions/4517034/perl-set-read-timeout-in-client-socket
284# It looks like alarm() is not properly supported on Windows either.
285# See footnote of https://perldoc.perl.org/functions/alarm.html
286# http://www.perlmonks.org/bare/?node_id=405469
287# http://www.perlmonks.org/?node_id=881987
288# http://www.perlmonks.org/?node_id=1127570
289#
290# Support for alarm() exists in Strawberry Perl v 5.20 and ActivateState Perl v 5.8.something onwards,
291# however it is limited. The limitation is related to the entry about porting in the perldoc on alarm,
292# see https://perldoc.perl.org/perlport.html#alarm
293# "alarm(Win32) Emulated using timers that must be explicitly polled whenever Perl wants to dispatch "safe signals" and therefore cannot interrupt blocking system calls."
294# http://www.perlmonks.org/?node_id=1127475
295# http://www.perlmonks.org/?node_id=1127570
296# http://www.perlmonks.org/?node=552515
297#
298# Threads don't appear too safe
299# https://perldoc.perl.org/threads.html
300# https://perldoc.perl.org/perlpolicy.html#discouraged
301#
302# HOWEVER, could use Sockets instead of Pipes to process streams to communicate with child proc, so then can use IO::Sockets:
303# http://www.perlmonks.org/?node_id=869942
304# http://www.perlmonks.org/?node_id=811650
305# Though IPC::Run considered ideal, it has limitations on Windows:
306# http://search.cpan.org/~toddr/IPC-Run-0.96/lib/IPC/Run.pm -> http://search.cpan.org/~toddr/IPC-Run-0.96/lib/IPC/Run.pm#Win32_LIMITATIONS
307#
308sub useWget
309{
310 #local $| = 1; # autoflush stdout buffer
311 #print STDOUT "*** Start of subroutine useWget in $0\n";
312
313 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
314
315 my ($strReadIn,$strLine,$command);
316 $strReadIn = "" unless defined $strReadIn;
317
318 my $current_dir = cwd();
319 my $changed_dir = 0;
320 if (defined $working_dir && -e $working_dir) {
321 chdir "$working_dir";
322 $changed_dir = 1;
323 }
324
325 # When we are running this script through GLI, the SIGTERM signal handler
326 # won't get called on Windows when wget is to be prematurely terminated.
327 # Instead, when wget has to be terminated in the middle of execution, GLI will
328 # connect to a serverSocket here to communicate when it's time to stop wget.
329 if($self->dealingWithSockets()) {
330
331 $port = <STDIN>; # gets a port on localhost that's not yet in use
332 chomp($port);
333
334 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
335 LocalPort => $port,
336 Listen => 1,
337 Reuse => 1);
338
339 die "can't setup server" unless $serverSocket;
340 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
341
342 $read_set = new IO::Select(); # create handle set for reading
343 $read_set->add($serverSocket); # add the main socket to the set
344 }
345
346 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
347 # compose the command as an array rather than as a string, to preserve spaces in the filepath
348 # because single/double quotes using open3 seem to launch a shell, see final comment at
349 # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget
350 # as 2 processes then got launched which don't have parent-child pid relationship.
351 my @commandargs = split(' ', $cmdWget);
352 unshift(@commandargs, $wget_file_path);
353 $command = "$wget_file_path $cmdWget";
354 print STDOUT "Command is: $command\n"; # displayed in GLI output
355 print STDERR "Command is: $command\n"; # goes into ServerInfoDialog
356
357 # Wget's output needs to be monitored to find out when it has naturally terminated.
358 # Wget's output is sent to its STDERR so we can't use open2 without doing 2>&1.
359 # On linux, 2>&1 launches a subshell which then launches wget, meaning that killing
360 # the childpid does not kill wget on Linux but the subshell that launched it instead.
361 # Therefore, we use open3. Though the child process wget sends output only to its stdout [is this meant to be "stderr"?],
362 # using open3 says chld_err is undefined and the output of wget only comes in chld_out(!)
363 # However that may be, it works with open3. But to avoid the confusion of managing and
364 # closing an extra unused handle, a single handle is used instead for both the child's
365 # stderr and stdout.
366 # See http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html
367 # for why this is the right thing to do.
368
369 # Both open2 and open3 don't return on failure, but raise an exception. The handling
370 # of the exception is described on p.568 of the Perl Cookbook
371 eval {
372 #$childpid = open3($chld_in, $chld_out, $chld_out, $command);
373 $childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
374 };
375 if ($@) {
376 if($@ =~ m/^open3/) {
377 die "open3 failed in $0: $!\n$@\n";
378 }
379 die "Tried to launch open3 in $0, got unexpected exception: $@";
380 }
381
382 # Switching to use IO::Select, which allows timeouts, instead of doing the potentially blocking
383 # if defined(my $strLine=<$chld_out>)
384 # Google: perl open3 read timeout
385 # Google: perl open3 select() example
386 # https://stackoverflow.com/questions/10029406/why-does-ipcopen3-get-deadlocked
387 # https://codereview.stackexchange.com/questions/84496/the-right-way-to-use-ipcopen3-in-perl
388 # https://gist.github.com/shalk/6988937
389 # https://stackoverflow.com/questions/18373500/how-to-check-if-command-executed-with-ipcopen3-is-hung
390 # http://perldoc.perl.org/IO/Select.html
391 # http://perldoc.perl.org/IPC/Open3.html - explains the need for select()/IO::Select with open3
392 # http://www.perlmonks.org/?node_id=951554
393 # http://search.cpan.org/~dmuey/IPC-Open3-Utils-0.91/lib/IPC/Open3/Utils.pm
394 # https://stackoverflow.com/questions/3000907/wget-not-behaving-via-ipcopen3-vs-bash?rq=1
395
396 my $num_consecutive_timedouts = 0;
397 my $error = 0;
398 my $loop = 1;
399 my $size = 4096;
400 my $ALARM_EXCEPTION = "alarm wget timed out";
401
402 while($loop)
403 {
404 my $strLine;
405 my $len = undef;
406
407 # https://stackoverflow.com/questions/4517034/perl-set-read-timeout-in-client-socket
408 eval {
409 local $SIG{ALRM} = sub { die "$ALARM_EXCEPTION\n" }; # NB: \n required
410 alarm $TIMEOUT;
411 print STDERR "#### BEFORE sysread\n";
412 $len = sysread($chld_out, $strLine, $size);
413 print STDERR "#### AFTER sysread\n";
414 alarm 0;
415 };
416 if ($@ && $@ =~ m/$ALARM_EXCEPTION/) { #if ($@) { # if ($@ && $@ =~ m/$ALARM_EXCEPTION/) # http://perldoc.perl.org/perlipc.html#Signals
417 print STDERR "@@@@ GOT ALARM: timed out reading from child proc\n";
418 $num_consecutive_timedouts++;
419 }
420 else { # didn't timeout, so success reading from child process
421
422 $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
423 # since we're in this else statement
424
425 if($len) { # read something
426 if($blnShow) {
427 print STDERR "$strLine\n";
428 }
429 $strReadIn .= $strLine;
430 }
431 else { # error or EOF: (!defined $len || $len == 0)
432
433 if(!defined $len) { # error reading
434 print STDERR "WgetDownload: Error reading from child stream: $!\n";
435 # SHOULD THIS 'die "errmsg";' instead? - no, sockets may need closing
436 $error = 1;
437 }
438 elsif ($len == 0) { # EOF
439 # Finished reading from this filehand $fh because we read 0 bytes.
440 # wget finished, terminate naturally
441 print STDERR "WgetDownload: wget finished\n";
442 #print STDOUT "\nPerl: open3 command, input streams closed. Wget terminated naturally.\n";
443 }
444
445 $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
446
447 } # end else error or EOF
448
449
450 } # else on sysread succeeded vs alarm timeout
451
452
453 if($num_consecutive_timedouts >= $NUM_TRIES) {
454 $error = 1;
455 $loop = 0; # to break out of outer while loop
456
457 $num_consecutive_timedouts = 0;
458
459 print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n";
460 print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n";
461 #print STDERR "\tConsider cancelling this download?\n";
462 }
463
464 if($loop == 0) { # error or EOF, either way, clean up
465 if($error) {
466 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
467
468 if(kill(0, $childpid)) {
469 # If kill(0, $childpid) returns true, then the process is running
470 # and we need to kill it.
471 close($chld_in);
472 close($chld_out);
473 kill('TERM', $childpid); # kill the process group by prefixing - to signal
474
475 # https://coderwall.com/p/q-ovnw/killing-all-child-processes-in-a-shell-script
476 # https://stackoverflow.com/questions/392022/best-way-to-kill-all-child-processes
477 print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n";
478
479 #die "Perl terminated wget after timing out repeatedly and is about to exit\n";
480 }
481 }
482 else { # wget finished (no errors), terminate naturally
483 #print STDOUT "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n";
484 close($chld_in);
485 close($chld_out);
486 waitpid $childpid, 0;
487 }
488
489 # error or not, loop out to be 0 now
490 $childpid = undef;
491 # Stop monitoring the read_handle and close the serverSocket
492 # (the Java end will close the client socket that Java opened)
493 if(defined $port) {
494 $read_set->remove($serverSocket);
495 close($serverSocket);
496 }
497 }
498
499 # If we've already terminated, we can get out of the loop
500 next if($loop == 0);
501
502 # Otherwise check for whether Java GLI attempted to connect to this perl script via socket
503
504 # if we run this script from the command-line (as opposed to from GLI),
505 # then we're not working with sockets and can therefore skip the next bits
506 next unless(defined $port);
507
508# print STDERR "@@@ Now checking to see if GLI is connecting to perl socket...\n";
509
510 # http://www.perlfect.com/articles/select.shtml
511 # "multiplex between several filehandles within a single thread of control,
512 # thus creating the effect of parallelism in the handling of I/O."
513 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
514
515 # take all readable handles in turn
516 foreach my $rh (@rh_set) {
517 if($rh == $serverSocket) {
518 my $client = $rh->accept();
519 #$client->autoflush(1); # autoflush output buffer - don't put this back in: output split irregularly over lines
520 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
521
522 # Read from the client (getting rid of the trailing newline)
523 # Has the client sent the <<STOP>> signal?
524 my $signal = <$client>;
525 chomp($signal);
526 if($signal eq "<<STOP>>") {
527 print $client "Perl received STOP signal (on port $port): stopping wget\n";
528 $loop = 0; # out of outer while loop
529 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
530
531 # Sometimes the wget process takes some time to start up. If the STOP signal
532 # was sent, don't try to terminate the process until we know it is running.
533 # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
534 # for it to start up, checking for whether it is running in order to kill it.
535 for(my $seconds = 1; $seconds <= 5; $seconds++) {
536 if(kill(0, $childpid)) {
537 # If kill(0, $childpid) returns true, then the process is running
538 # and we need to kill it.
539 close($chld_in);
540 close($chld_out);
541 kill("TERM", $childpid); # prefix - to signal to kill process group
542
543 $childpid = undef;
544
545 # Stop monitoring the read_handle and close the serverSocket
546 # (the Java end will close the client socket that Java opened)
547 $read_set->remove($rh); #$read_set->remove($serverSocket);
548 close($rh); #close($serverSocket);
549 print $client "Perl terminated wget and is about to exit\n";
550 last; # out of inner for loop
551 }
552 else { # the process may just be starting up, wait
553 sleep(1);
554 }
555 }
556 last; # out of inner for loop
557 }
558 }
559 }
560 } # end while($loop)
561
562
563 if ($changed_dir) {
564 chdir $current_dir;
565 }
566
567 return $strReadIn;
568}
569
570
571sub useWgetMonitored
572{
573 #local $| = 1; # autoflush stdout buffer
574 #print STDOUT "*** Start of subroutine useWgetMonitored in $0\n";
575
576 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
577
578
579 my $current_dir = cwd();
580 my $changed_dir = 0;
581 if (defined $working_dir && -e $working_dir) {
582 chdir "$working_dir";
583 $changed_dir = 1;
584 }
585
586 # When we are running this script through GLI, the SIGTERM signal handler
587 # won't get called on Windows when wget is to be prematurely terminated.
588 # Instead, when wget has to be terminated in the middle of execution, GLI will
589 # connect to a serverSocket here to communicate when it's time to stop wget.
590 if($self->dealingWithSockets()) {
591
592 $port = <STDIN>; # gets a port on localhost that's not yet in use
593 chomp($port);
594
595 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
596 LocalPort => $port,
597 Listen => 1,
598 Reuse => 1);
599
600 die "can't setup server" unless $serverSocket;
601 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
602
603 $read_set = new IO::Select(); # create handle set for reading
604 $read_set->add($serverSocket); # add the main socket to the set
605 }
606
607 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
608 # compose the command as an array for open3, to preserve spaces in any filepath
609 my @commandargs = split(' ', $cmdWget);
610 unshift(@commandargs, $wget_file_path);
611 my $command = "$wget_file_path $cmdWget";
612 #print STDOUT "Command is: $command\n";
613
614 eval { # see p.568 of Perl Cookbook
615 $childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
616 };
617 if ($@) {
618 if($@ =~ m/^open3/) {
619 die "open3 failed in $0: $!\n$@\n";
620 }
621 die "Tried to launch open3 in $0, got unexpected exception: $@";
622 }
623
624 my $full_text = "";
625 my $error_text = "";
626 my @follow_list = ();
627 my $line;
628
629 # create the select object and add our streamhandle(s)
630 my $sel = new IO::Select;
631 $sel->add($chld_out);
632
633 my $num_consecutive_timedouts = 0;
634 my $error = 0;
635 my $loop = 1;
636 while($loop)
637 {
638 # assume we're going to timeout trying to read from child process
639 $num_consecutive_timedouts++;
640
641 # block until data is available on the registered filehandles or until the timeout specified
642 if(my @readyhandles = $sel->can_read($TIMEOUT)) {
643 $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
644 # since we're in this if statement
645
646 foreach my $fh (@readyhandles) {
647 my $len = sysread($fh, $line, 4096); # read up to 4k from current ready filehandle
648 if($len) { # read something
649
650
651 if((defined $blnShow) && $blnShow)
652 {
653 print STDERR "$line";
654 }
655
656 if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
657 my $follow_url = $1;
658 push(@follow_list,$follow_url);
659 }
660
661 if ($line =~ m/ERROR\s+\d+/) {
662 $error_text .= $line;
663 }
664
665 $full_text .= $line;
666 } else { # error or EOF
667 if(!defined $len) { # error reading
668 #print STDERR "WgetDownload: Error reading from child stream: $!\n";
669 $error = 1;
670 }
671 elsif ($len == 0) { # EOF, finished with this filehandle because 0 bytes read
672 #print STDERR "WgetDownload: wget finished\n"; # wget terminated naturally
673 }
674
675 $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
676
677 # last; # if we have more than one filehandle registered with IO::Select
678
679 $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error
680 } # end else error or EOF
681
682 } # end foreach on readyhandles
683 } # end if on can_read
684
685 if($num_consecutive_timedouts >= $NUM_TRIES) {
686 $error = 1;
687 $loop = 0; # to break out of outer while loop
688
689 $num_consecutive_timedouts = 0;
690
691 #print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n";
692 #print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n";
693 }
694
695 if($loop == 0) { # error or EOF, either way, clean up
696
697 if($error) {
698 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
699
700 if(kill(0, $childpid)) {
701 # If kill(0, $childpid) returns true, then the process is running
702 # and we need to kill it.
703 close($chld_in);
704 close($chld_out);
705 kill("TERM", $childpid); # prefix - to signal to kill process group
706
707 #die "Perl terminated wget after timing out repeatedly and is about to exit\n";
708 }
709 }
710 else { # wget finished, terminate naturally
711 close($chld_in);
712 close($chld_out);
713 # Program terminates only when the following line is included
714 # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
715 # it prevents the child from turning into a "zombie process".
716 # While the wget process terminates without it, this perl script does not:
717 # the DOS prompt is not returned without it.
718 waitpid $childpid, 0;
719 }
720
721 # error or not:
722 $childpid = undef;
723 if(defined $port) {
724 $read_set->remove($serverSocket);
725 close($serverSocket);
726 }
727 }
728
729 # if we run this script from the command-line (as opposed to from GLI),
730 # then we're not working with sockets and can therefore skip the next bits
731 next unless(defined $port);
732
733 # http://www.perlfect.com/articles/select.shtml
734 # "multiplex between several filehandles within a single thread of control,
735 # thus creating the effect of parallelism in the handling of I/O."
736 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
737
738 # take all readable handles in turn
739 foreach my $rh (@rh_set) {
740 if($rh == $serverSocket) {
741 my $client = $rh->accept();
742 #$client->autoflush(1); # autoflush output buffer - don't put this back in: splits output irregularly over multilines
743 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
744
745 # Read from the client (getting rid of trailing newline)
746 # Has the client sent the <<STOP>> signal?
747 my $signal = <$client>;
748 chomp($signal);
749 if($signal eq "<<STOP>>") {
750 print $client "Perl received STOP signal (on port $port): stopping wget\n";
751 $loop = 0; # out of outer while loop
752 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
753
754 # Sometimes the wget process takes some time to start up. If the STOP signal
755 # was sent, don't try to terminate the process until we know it is running.
756 # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
757 # for it to start up, checking for whether it is running in order to kill it.
758 for(my $seconds = 1; $seconds <= 5; $seconds++) {
759 if(kill(0, $childpid)) {
760 # If kill(0, $childpid) returns true, then the process is running
761 # and we need to kill it.
762 close($chld_in);
763 close($chld_out);
764 kill("TERM", $childpid); # prefix - to signal to kill process group
765
766 $childpid = undef;
767
768 # Stop monitoring the read_handle and close the serverSocket
769 # (the Java end will close the client socket that Java opened)
770 $read_set->remove($rh); #$read_set->remove($serverSocket);
771 close($rh); #close($serverSocket);
772 print $client "Perl terminated wget and is about to exit\n";
773 last; # out of inner for loop
774 }
775 else { # the process may just be starting up, wait
776 sleep(1);
777 }
778 }
779 last; # out of foreach loop
780 }
781 }
782 }
783 }
784
785 my $command_status = $?;
786 if ($command_status != 0) {
787 $error_text .= "Exit error: $command_status";
788 }
789
790 if ($changed_dir) {
791 chdir $current_dir;
792 }
793
794 my $final_follow = pop(@follow_list); # might be undefined, but that's OK
795
796 return ($full_text,$error_text,$final_follow);
797}
798
799
800# TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
801sub checkURL
802{
803 my ($self) = @_;
804 if ($self->{'url'} eq "")
805 {
806 &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
807 }
808}
809
810sub error
811{
812 my ($strFunctionName,$strError) = @_;
813 {
814 print "Error occoured in WgetDownload.pm\n".
815 "In Function:".$strFunctionName."\n".
816 "Error Message:".$strError."\n";
817 exit(-1);
818 }
819}
820
8211;
822
Note: See TracBrowser for help on using the repository browser.