root/main/trunk/greenstone2/perllib/downloaders/WgetDownload.pm @ 31929

Revision 31929, 32.5 KB (checked in by ak19, 12 months ago)

The recent overhaul of perl running wget and allowing proper termination on timeout and allowing wget to be cancelled on blocking (accomplished using timeouts), didn't work on Windows, since IO::Select's can_read() method only works on Windows with Sockets not other types of file handles because of lack of kernel level Win support, unlike on Linux where can_read() works with all types of file handles. The solution was not using alarm() to emulate read with timeouts in place of IO::Select's can_read(timeout) . (See the debug_testing area of trac for a commit containing the alarm() that worked on Linux but again not Windows.) The solution was to turn the filehandles to the wget child process' iostreams into Sockets, and then use IO::Select's can_read as before. Works on the usually problematic Windows. Still to test on linux.

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# WgetDownload.pm -- Download base module that handles calling Wget
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package WgetDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use BaseDownload;
35use strict;
36use Cwd;
37use util;
38use IPC::Open3;
39use IO::Select;
40use IO::Socket;
41
42#use IO::Select qw( );
43#use IPC::Open3 qw( open3 );
44use Socket     qw( AF_UNIX SOCK_STREAM PF_UNSPEC ); # http://perlmeme.org/howtos/perlfunc/qw_function.html
45
46
47sub BEGIN {
48    @WgetDownload::ISA = ('BaseDownload');
49}
50
51my $arguments =
52     [ { 'name' => "proxy_on",
53    'desc' => "{WgetDownload.proxy_on}",
54    'type' => "flag",
55    'reqd' => "no",
56    'hiddengli' => "yes"},
57      { 'name' => "http_proxy_host",
58    'desc' => "{WgetDownload.http_proxy_host}",
59    'type' => "string",
60    'reqd' => "no",
61    'hiddengli' => "yes"},
62      { 'name' => "http_proxy_port",
63    'desc' => "{WgetDownload.http_proxy_port}",
64    'type' => "string",
65    'reqd' => "no",
66    'hiddengli' => "yes"},
67      { 'name' => "https_proxy_host", 
68    'desc' => "{WgetDownload.https_proxy_host}",         
69    'type' => "string",
70    'reqd' => "no",
71    'hiddengli' => "yes"},
72      { 'name' => "https_proxy_port",
73    'desc' => "{WgetDownload.https_proxy_port}",         
74    'type' => "string",
75    'reqd' => "no",
76    'hiddengli' => "yes"},
77      { 'name' => "ftp_proxy_host", 
78    'desc' => "{WgetDownload.ftp_proxy_host}",         
79    'type' => "string",
80    'reqd' => "no",
81    'hiddengli' => "yes"},
82      { 'name' => "ftp_proxy_port",
83    'desc' => "{WgetDownload.ftp_proxy_port}",         
84    'type' => "string",
85    'reqd' => "no",
86    'hiddengli' => "yes"},
87      { 'name' => "user_name", 
88    'desc' => "{WgetDownload.user_name}",
89    'type' => "string",
90    'reqd' => "no",
91    'hiddengli' => "yes"},
92      { 'name' => "user_password",
93    'desc' => "{WgetDownload.user_password}",
94    'type' => "string",
95    'reqd' => "no",
96    'hiddengli' => "yes"},
97      { 'name' => "no_check_certificate",
98    'desc' => "{WgetDownload.no_check_certificate}",
99    'type' => "flag",
100    'reqd' => "no",
101    'hiddengli' => "yes"}
102     ];
103
104my $options = { 'name'     => "WgetDownload",
105        'desc'     => "{WgetDownload.desc}",
106        'abstract' => "yes",
107        'inherits' => "yes",
108        'args'     => $arguments };
109
110
111# Declaring file global variables related to the wget child process so that
112# the termination signal handler for SIGTERM can close the streams and tidy
113# up before ending the child process.
114my $childpid;
115my ($chld_out, $chld_in);
116my ($serverSocket, $read_set);
117
118my $TIMEOUT = 1; # seconds
119my $NUM_TRIES = 10;
120
121# The port this script's server socket will be listening on, to handle
122# incoming signals from GLI to terminate wget. This is also file global,
123# since OAIDownload.pm will make several calls on wget using the same
124# instance of this script and we want to reuse whatever port GLI gave us.
125my $port;
126
127# When this script is called from the command line, this handler will be called
128# if this process is killed or abruptly ends due to receiving one of the
129# terminating signals that this handler is registered to deal with.
130sub abrupt_end_handler {
131    my $termination_signal = shift (@_);
132
133    if(defined $childpid) {
134    close($chld_out);
135    close($chld_in);
136   
137    print STDOUT "Received termination signal: $termination_signal\n";
138
139    # Send TERM signal to child process to terminate it. Sending the INT signal doesn't work
140    # See http://perldoc.perl.org/perlipc.html#Signals
141    # Warning on using kill at http://perldoc.perl.org/perlfork.html
142    kill("TERM", $childpid); # prefix - to signal to kill process group
143
144    # If the SIGTERM sent on Linux calls this handler, we want to make
145    # sure any socket connection is closed.
146    # Otherwise sockets are only used when this script is run from GLI
147    # in which case the handlers don't really get called.
148    if(defined $serverSocket) {
149        $read_set->remove($serverSocket) if defined $read_set;
150        close($serverSocket);
151    }
152    }
153
154    exit(0);
155}
156
157# Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
158# the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
159# signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
160# Note that SIGKILL can't be handled as the handler won't get called for it. More information:
161# http://affy.blogspot.com/p5be/ch13.htm
162# http://perldoc.perl.org/perlipc.html#Signals
163$SIG{'INT'} = \&abrupt_end_handler;
164$SIG{'TERM'} = \&abrupt_end_handler;
165
166sub new {
167    my ($class) = shift (@_);
168    my ($getlist,$inputargs,$hashArgOptLists) = @_;
169    push(@$getlist, $class);
170
171    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
172    push(@{$hashArgOptLists->{"OptList"}},$options);
173
174    my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
175
176    # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lions (android too?)
177    &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set
178
179    return bless $self, $class;
180}
181
182sub checkWgetSetup
183{
184    my ($self,$blnGliCall) = @_;
185    #TODO: proxy detection??
186   
187    if((!$blnGliCall) && $self->{'proxy_on'})
188    {
189    &checkProxySetup($self);
190    }
191    &checkURL($self);
192}
193
194# Not using this. On Windows, we used to pass proxying settings as flags to wget. But, as that can be
195# seen with Task Manager, we now have the proxy settings set in the environment and are no longer passing it
196sub addProxySettingsAsWgetFlags
197{
198    my ($self) = @_;
199    my $strOptions = "";
200
201    if($self->{'http_proxy_host'} && $self->{'http_proxy_port'}) {
202    $strOptions .= " -e http_proxy=$self->{'http_proxy_host'}:$self->{'http_proxy_port'} ";
203    }
204    if($self->{'https_proxy_host'} && $self->{'https_proxy_port'}) {
205    $strOptions .= " -e https_proxy=$self->{'https_proxy_host'}:$self->{'https_proxy_port'} ";
206    }
207    if($self->{'ftp_proxy_host'} && $self->{'ftp_proxy_port'}) {
208    $strOptions .= " -e ftp_proxy=$self->{'ftp_proxy_host'}:$self->{'ftp_proxy_port'} ";
209    }
210   
211    # For wget, there is only one set pair of proxy-user and proxy-passwd, so wget seems to assume
212    # that all 3 proxy protocols (http|https|ftp) will use the same username and pwd combination?
213    # Note that this only matters when passing the proxying details as flags to wget, not when
214    # the proxies are setup as environment variables.
215    if ($self->{'user_name'} && $self->{'user_password'})
216    {
217    $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";     
218    # how is "--proxy-passwd" instead of "--proxy-password" even working????       
219    # see https://www.gnu.org/software/wget/manual/html_node/Proxies.html
220    # and https://www.gnu.org/software/wget/manual/wget.html
221    # Not touching this, in case the manual is simply wrong. Since our code works in
222    # practice (when we were still using wget proxy username/pwd flags for windows).
223    }   
224   
225    return $strOptions;
226}
227
228sub getWgetOptions
229{
230    my ($self) = @_;
231    my $strOptions = "";
232   
233    # If proxy settings are set up in the environment, wget is ready to use them. More secure.
234    # But if proxy settings are not set up in the environment, pass them as flags to wget
235    # This is less secure, as pwd etc visible in task manager, but it was the original way in
236    # which wget was run on windows.
237    # Truth in Perl: https://home.ubalt.edu/abento/452/perl/perltruth.html
238    # http://www.perlmonks.org/?node=what%20is%20true%20and%20false%20in%20Perl%3F
239   
240    if ($self->{'proxy_on'}) {
241    if(!$ENV{'http_proxy'} && !$ENV{'https_proxy'} && !$ENV{'ftp_proxy'}) {
242        $strOptions .= $self->addProxySettingsAsWgetFlags();
243    } # else wget will use proxy settings in environment, assume enough settings have been provided
244    # either way, we're using the proxy
245    $strOptions .= " --proxy ";
246    }
247   
248    if($self->{'no_check_certificate'}) { # URL may be http that gets redirected to https, so if no_check_certificate is on, turn it on even if URL is http
249   
250    $strOptions .= " --no-check-certificate ";
251    }
252   
253    return $strOptions;
254}
255
256# Checking for proxy setup: proxy server, proxy port, proxy username and password.
257sub checkProxySetup
258{
259    my ($self) = @_;
260    ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
261    # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
262    # Test if the connection is successful. If the connection wasn't successful then ask user to supply username and password.
263
264}
265
266# Returns true if the wget status needs to be monitored through sockets
267# (if a socket is used to communicate with the Java program on when to
268# terminate wget). True if we are running gli, or if the particular type
269# of WgetDownload is *not* OAIDownload (in that case, the original way of
270# terminating the perl script from Java would terminate wget as well).
271sub dealingWithSockets() {
272    my ($self) = @_;
273    return (defined $self->{'gli'} && $self->{'gli'} && !defined $port && ref($self) ne "OAIDownload");
274                       # use ref($self) to find the classname of an object
275}
276
277# On Windows, we can only use IO::Select's can_read() with Sockets, not with the usual handles to a child process' iostreams
278# However, we can use Sockets as the handles to connect to a child process' streams, which then allows us to use can_read()
279# not just on Unix but Windows too. The 2 subroutines below to use Sockets to connect to a child process' iostreams come from
280# http://www.perlmonks.org/?node_id=869942
281# http://www.perlmonks.org/?node_id=811650
282# It was suggested that IPC::Run will take care of all this or circumvent the need for all this,
283# but IPC::Run has limitations on Windows, see http://search.cpan.org/~toddr/IPC-Run-0.96/lib/IPC/Run.pm#Win32_LIMITATIONS
284
285# Create a unidirectional pipe to an iostream of a process that is actually a socket
286sub _pipe {
287    socketpair($_[0], $_[1], AF_UNIX, SOCK_STREAM, PF_UNSPEC)
288        or return undef;
289    shutdown($_[0], 1);  # No more writing for reader. See http://www.perlmonks.org/?node=108244
290    shutdown($_[1], 0);  # No more reading for writer
291    return 1;
292}
293
294sub _open3 {
295    local (*TO_CHLD_R,     *TO_CHLD_W);
296    local (*FR_CHLD_R,     *FR_CHLD_W);
297    local (*FR_CHLD_ERR_R, *FR_CHLD_ERR_W);
298
299    if ($^O =~ /Win32/) {
300        _pipe(*TO_CHLD_R,     *TO_CHLD_W    ) or die $^E;
301        _pipe(*FR_CHLD_R,     *FR_CHLD_W    ) or die $^E;
302        #_pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $^E;
303    } else {
304        pipe(*TO_CHLD_R,     *TO_CHLD_W    ) or die $!;
305        pipe(*FR_CHLD_R,     *FR_CHLD_W    ) or die $!;
306        #pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $!;
307    }
308
309    #my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_ERR_W', @_);
310    my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_W', @_); # use one handle, chldout, for both stdout and stderr of child proc,
311                                                                      # see http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html
312   
313    return ( $pid, *TO_CHLD_W, *FR_CHLD_R, *FR_CHLD_ERR_R );
314}
315
316# useWget and useWgetMonitored are very similar and, when updating, will probably need updating in tandem
317# useWget(Monitored) runs the wget command using open3 and then sits in a loop doing two things per iteration:
318# - processing a set buffer size of the wget (child) process' stdout/stderr streams, if anything has appeared there
319# - followed by checking the socket connection to Java GLI, to see if GLI is trying to cancel the wget process we're running.
320# Then the loop of these two things repeats.
321sub useWget
322{
323    #local $| = 1; # autoflush stdout buffer
324    #print STDOUT "*** Start of subroutine useWget in $0\n";
325
326    my ($self, $cmdWget,$blnShow, $working_dir) = @_;
327
328    my ($strReadIn,$strLine,$command);
329    $strReadIn = "" unless defined $strReadIn;
330
331    my $current_dir = cwd();
332    my $changed_dir = 0;
333    if (defined $working_dir && -e $working_dir) {
334    chdir "$working_dir";
335    $changed_dir = 1;
336    }
337
338    # When we are running this script through GLI, the SIGTERM signal handler
339    # won't get called on Windows when wget is to be prematurely terminated.
340    # Instead, when wget has to be terminated in the middle of execution, GLI will
341    # connect to a serverSocket here to communicate when it's time to stop wget.
342    if($self->dealingWithSockets()) {
343
344    $port = <STDIN>; # gets a port on localhost that's not yet in use
345    chomp($port);
346   
347    $serverSocket = IO::Socket::INET->new( Proto     => 'tcp',
348                           LocalPort => $port,
349                           Listen    => 1,
350                           Reuse     => 1);
351   
352    die "can't setup server" unless $serverSocket;
353    #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
354
355    $read_set = new IO::Select();         # create handle set for reading
356    $read_set->add($serverSocket);        # add the main socket to the set
357    }
358
359    my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
360   
361    # Shouldn't use double quotes around wget path after all? See final comment at
362    # http://www.perlmonks.org/?node_id=394709
363    # http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63
364    # Therefore, compose the command as an array rather than as a string, to preserve spaces in the filepath
365    # because single/double quotes using open3 seem to launch a subshell, see also final comment at
366    # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget, as 2 processes
367    # got launched then which don't have parent-child pid relationship (so that terminating one doesn't terminate the other).
368    my @commandargs = split(' ', $cmdWget);
369    unshift(@commandargs, $wget_file_path);
370    $command = "$wget_file_path $cmdWget";
371#    print STDOUT "Command is: $command\n"; # displayed in GLI output
372#    print STDERR "Command is: $command\n"; # goes into ServerInfoDialog
373   
374    # Wget's output needs to be monitored to find out when it has naturally terminated.
375    # Wget's output is sent to its STDERR so we can't use open2 without doing 2>&1.
376    # On linux, 2>&1 launches a subshell which then launches wget, meaning that killing
377    # the childpid does not kill wget on Linux but the subshell that launched it instead.
378    # Therefore, we use open3. Though the child process wget sends output only to its stdout [is this meant to be "stderr"?],
379    # using open3 says chld_err is undefined and the output of wget only comes in chld_out(!)
380    # However that may be, it works with open3. But to avoid the confusion of managing and
381    # closing an extra unused handle, a single handle is used instead for both the child's
382    # stderr and stdout.
383    # See http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html
384    # for why this is the right thing to do.
385
386    # Both open2 and open3 don't return on failure, but raise an exception. The handling
387    # of the exception is described on p.568 of the Perl Cookbook
388    eval {
389    #$childpid = open3($chld_in, $chld_out, $chld_out, $command); # There should be no double quotes in command, like around filepaths to wget, else need to use array version of command as below
390    #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
391   
392    # instead of calling open3 directly, call wrapper _open3() subroutine that will use sockets to
393    # connect to the child process' iostreams, because we can then use IO::Select's can_read() even on Windows
394    ($childpid, $chld_in, $chld_out) = _open3(@commandargs);
395    };
396    if ($@) {
397    if($@ =~ m/^open3/) {
398        die "open3 failed in $0: $!\n$@\n";     
399    }
400    die "Tried to launch open3 in $0, got unexpected exception: $@";
401    }
402
403    # Switching to use IO::Select, which allows timeouts, instead of doing the potentially blocking
404    #     if defined(my $strLine=<$chld_out>)
405    # Google: perl open3 read timeout
406    # Google: perl open3 select() example
407    # https://stackoverflow.com/questions/10029406/why-does-ipcopen3-get-deadlocked
408    # https://codereview.stackexchange.com/questions/84496/the-right-way-to-use-ipcopen3-in-perl
409    # https://gist.github.com/shalk/6988937
410    # https://stackoverflow.com/questions/18373500/how-to-check-if-command-executed-with-ipcopen3-is-hung
411    # http://perldoc.perl.org/IO/Select.html
412    # http://perldoc.perl.org/IPC/Open3.html - explains the need for select()/IO::Select with open3
413    # http://www.perlmonks.org/?node_id=951554
414    # http://search.cpan.org/~dmuey/IPC-Open3-Utils-0.91/lib/IPC/Open3/Utils.pm
415    # https://stackoverflow.com/questions/3000907/wget-not-behaving-via-ipcopen3-vs-bash?rq=1
416
417    # create the select object and add our streamhandle(s)
418    my $sel = new IO::Select;
419    $sel->add($chld_out);
420
421    my $num_consecutive_timedouts = 0;
422    my $error = 0;
423    my $loop = 1;
424   
425    while($loop)
426    {
427    # assume we're going to timeout trying to read from child process
428    $num_consecutive_timedouts++;
429
430   
431    # block until data is available on the registered filehandles or until the timeout specified   
432    if(my @readyhandles = $sel->can_read($TIMEOUT)) {
433
434        $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
435        # since we're in this if statement
436       
437        # now there's a list of registered filehandles we can read from to loop through reading from.
438        # though we've registered only one, chld_out
439        foreach my $fh (@readyhandles) {
440        my $strLine;
441        #sleep 3;
442       
443        # read up to 4096 bytes from this filehandle fh.
444        # if there is less than 4096 bytes, we'll only get
445        # those available bytes and won't block.  If there
446        # is more than 4096 bytes, we'll only read 4096 and
447        # wait for the next iteration through the loop to
448        # read the rest.
449        my $len = sysread($fh, $strLine, 4096);
450       
451        if($len) { # read something
452            if($blnShow) {
453            print STDERR "$strLine\n";
454            }
455            $strReadIn .= $strLine;
456        }
457        else { # error or EOF: (!defined $len || $len == 0)         
458           
459            if(!defined $len) { # error reading         
460            print STDERR "WgetDownload: Error reading from child stream: $!\n";
461            # SHOULD THIS 'die "errmsg";' instead? - no, sockets may need closing
462            $error = 1;
463            }
464            elsif ($len == 0) { # EOF           
465            # Finished reading from this filehand $fh because we read 0 bytes.
466            # wget finished, terminate naturally
467            print STDERR "WgetDownload: wget finished\n";
468            #print STDOUT "\nPerl: open3 command, input streams closed. Wget terminated naturally.\n";
469            }
470
471            $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
472           
473            # last; # if we have more than one filehandle registered with IO::Select
474           
475            $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error         
476           
477        } # end else error or EOF
478       
479        } # end foreach on readyhandles
480    } # end if on can_read
481   
482    if($num_consecutive_timedouts >= $NUM_TRIES) {
483        $error = 1;
484        $loop = 0;                          # to break out of outer while loop
485
486        $num_consecutive_timedouts = 0;
487
488        print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n";
489        print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n";
490    }
491
492    if($loop == 0) { # error or EOF, either way, clean up
493        if($error) {
494        $self->{'forced_quit'} = 1;         # subclasses need to know we're quitting
495       
496        if(kill(0, $childpid)) {
497            # If kill(0, $childpid) returns true, then the process is running
498            # and we need to kill it.
499            close($chld_in);
500            close($chld_out);
501            kill('TERM', $childpid); # kill the process group by prefixing - to signal
502
503            # https://coderwall.com/p/q-ovnw/killing-all-child-processes-in-a-shell-script
504            # https://stackoverflow.com/questions/392022/best-way-to-kill-all-child-processes
505            #print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n";         
506            #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n";
507        }
508        }
509        else { # wget finished (no errors), terminate naturally
510        #print STDOUT "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n";
511        close($chld_in);
512        close($chld_out);
513        waitpid $childpid, 0;       
514        }
515
516        # error or not
517        $childpid = undef;
518        # Stop monitoring the read_handle and close the serverSocket
519        # (the Java end will close the client socket that Java opened)
520        if(defined $port) {
521        $read_set->remove($serverSocket);
522        close($serverSocket);
523        }
524    }
525
526    # If we've already terminated, either naturally or on error, we can get out of the while loop
527    next if($loop == 0);
528   
529    # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket
530   
531    # if we run this script from the command-line (as opposed to from GLI),
532    # then we're not working with sockets and can therefore skip the next bits
533    next unless(defined $port);
534   
535    # http://www.perlfect.com/articles/select.shtml
536    # "multiplex between several filehandles within a single thread of control,
537    # thus creating the effect of parallelism in the handling of I/O."
538    my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
539
540    # take all readable handles in turn
541    foreach my $rh (@rh_set) {
542        if($rh == $serverSocket) {
543        my $client = $rh->accept();
544        #$client->autoflush(1); # autoflush output buffer - don't put this back in: output split irregularly over lines
545        print $client "Talked to ServerSocket (port $port). Connection accepted\n";
546       
547        # Read from the client (getting rid of the trailing newline)
548        # Has the client sent the <<STOP>> signal?
549        my $signal = <$client>;
550        chomp($signal);
551        if($signal eq "<<STOP>>") {
552            print $client "Perl received STOP signal (on port $port): stopping wget\n";
553            $loop = 0;                          # out of outer while loop
554            $self->{'forced_quit'} = 1;         # subclasses need to know we're quitting
555           
556            # Sometimes the wget process takes some time to start up. If the STOP signal
557            # was sent, don't try to terminate the process until we know it is running.
558            # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
559            # for it to start up, checking for whether it is running in order to kill it.
560            for(my $seconds = 1; $seconds <= 5; $seconds++) {
561            if(kill(0, $childpid)) {
562                # If kill(0, $childpid) returns true, then the process is running
563                # and we need to kill it.
564                close($chld_in);
565                close($chld_out);
566                kill("TERM", $childpid); # prefix - to signal to kill process group
567               
568                $childpid = undef;
569               
570                # Stop monitoring the read_handle and close the serverSocket
571                # (the Java end will close the client socket that Java opened)
572                $read_set->remove($rh);     #$read_set->remove($serverSocket);
573                close($rh);             #close($serverSocket);
574                print $client "Perl terminated wget and is about to exit\n";
575                last;                           # out of inner for loop
576            }
577            else { # the process may just be starting up, wait
578                sleep(1);
579            }
580            }
581            last;                               # out of foreach loop
582        }
583        }
584    }
585    }
586
587    if ($changed_dir) {
588    chdir $current_dir;
589    }
590   
591    return $strReadIn;
592}
593
594
595sub useWgetMonitored
596{
597    #local $| = 1; # autoflush stdout buffer
598    #print STDOUT "*** Start of subroutine useWgetMonitored in $0\n";
599
600    my ($self, $cmdWget,$blnShow, $working_dir) = @_;
601
602
603    my $current_dir = cwd();
604    my $changed_dir = 0;
605    if (defined $working_dir && -e $working_dir) {
606    chdir "$working_dir";
607    $changed_dir = 1;
608    }
609
610    # When we are running this script through GLI, the SIGTERM signal handler
611    # won't get called on Windows when wget is to be prematurely terminated.
612    # Instead, when wget has to be terminated in the middle of execution, GLI will
613    # connect to a serverSocket here to communicate when it's time to stop wget.
614    if($self->dealingWithSockets()) {
615
616    $port = <STDIN>; # gets a port on localhost that's not yet in use
617    chomp($port);
618   
619    $serverSocket = IO::Socket::INET->new( Proto     => 'tcp',
620                           LocalPort => $port,
621                           Listen    => 1,
622                           Reuse     => 1);
623   
624    die "can't setup server" unless $serverSocket;
625    #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
626
627    $read_set = new IO::Select();         # create handle set for reading
628    $read_set->add($serverSocket);        # add the main socket to the set
629    }
630
631    my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
632    # compose the command as an array for open3, to preserve spaces in any filepath
633    my @commandargs = split(' ', $cmdWget);
634    unshift(@commandargs, $wget_file_path);
635    my $command = "$wget_file_path $cmdWget";
636    #print STDOUT "Command is: $command\n";
637
638    eval {     # see p.568 of Perl Cookbook
639    #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
640    ($childpid, $chld_in, $chld_out) = _open3(@commandargs);
641    };
642    if ($@) {
643    if($@ =~ m/^open3/) {
644        die "open3 failed in $0: $!\n$@\n";     
645    }
646    die "Tried to launch open3 in $0, got unexpected exception: $@";
647    }
648
649    my $full_text = "";
650    my $error_text = "";
651    my @follow_list = ();
652    my $line;
653
654    # create the select object and add our streamhandle(s)
655    my $sel = new IO::Select;
656    $sel->add($chld_out);
657   
658    my $num_consecutive_timedouts = 0;
659    my $error = 0;
660    my $loop = 1;
661    while($loop)
662    {
663    # assume we're going to timeout trying to read from child process
664    $num_consecutive_timedouts++;
665
666    # block until data is available on the registered filehandles or until the timeout specified   
667    if(my @readyhandles = $sel->can_read($TIMEOUT)) {
668        $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
669        # since we're in this if statement
670       
671        foreach my $fh (@readyhandles) {
672        my $len = sysread($fh, $line, 4096); # read up to 4k from current ready filehandle
673        if($len) { # read something
674       
675           
676            if((defined $blnShow) && $blnShow)
677            {
678            print STDERR "$line";
679            }
680           
681            if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
682            my $follow_url = $1;
683            push(@follow_list,$follow_url);
684            }
685           
686            if ($line =~ m/ERROR\s+\d+/) {
687            $error_text .= $line;
688            }
689           
690            $full_text .= $line;
691        } else { # error or EOF
692            if(!defined $len) { # error reading
693            #print STDERR "WgetDownload: Error reading from child stream: $!\n";
694            $error = 1;
695            }
696            elsif ($len == 0) { # EOF, finished with this filehandle because 0 bytes read
697            #print STDERR "WgetDownload: wget finished\n"; # wget terminated naturally
698            }
699
700            $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
701           
702            # last; # if we have more than one filehandle registered with IO::Select
703           
704            $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error         
705        } # end else error or EOF
706       
707        } # end foreach on readyhandles
708    }  # end if on can_read
709
710    if($num_consecutive_timedouts >= $NUM_TRIES) {
711        $error = 1;
712        $loop = 0;                          # to break out of outer while loop
713
714        $num_consecutive_timedouts = 0;
715
716        #print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n";
717        #print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n";
718    }
719
720    if($loop == 0) { # error or EOF, either way, clean up
721       
722        if($error) {
723        $self->{'forced_quit'} = 1;         # subclasses need to know we're quitting
724       
725        if(kill(0, $childpid)) {
726            # If kill(0, $childpid) returns true, then the process is running
727            # and we need to kill it.
728            close($chld_in);
729            close($chld_out);
730            kill("TERM", $childpid); # prefix - to signal to kill process group
731           
732            #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n";
733        }
734        }
735        else { # wget finished, terminate naturally
736        close($chld_in);
737        close($chld_out);
738        # Program terminates only when the following line is included
739        # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
740        # it prevents the child from turning into a "zombie process".
741        # While the wget process terminates without it, this perl script does not:
742        # the DOS prompt is not returned without it.
743        waitpid $childpid, 0;
744        }
745       
746        # error or not:
747        $childpid = undef;     
748        if(defined $port) {
749        $read_set->remove($serverSocket);
750        close($serverSocket);
751        }
752    }
753   
754    # If we've already terminated, either naturally or on error, we can get out of the while loop
755    next if($loop == 0);
756
757    # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket
758   
759    # if we run this script from the command-line (as opposed to from GLI),
760    # then we're not working with sockets and can therefore skip the next bits
761    next unless(defined $port);
762
763    # http://www.perlfect.com/articles/select.shtml
764    # "multiplex between several filehandles within a single thread of control,
765    # thus creating the effect of parallelism in the handling of I/O."
766    my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
767
768    # take all readable handles in turn
769    foreach my $rh (@rh_set) {
770        if($rh == $serverSocket) {
771        my $client = $rh->accept();
772        #$client->autoflush(1); # autoflush output buffer - don't put this back in: splits output irregularly over multilines
773        print $client "Talked to ServerSocket (port $port). Connection accepted\n";
774       
775        # Read from the client (getting rid of trailing newline)
776        # Has the client sent the <<STOP>> signal?
777        my $signal = <$client>;
778        chomp($signal);
779        if($signal eq "<<STOP>>") {
780            print $client "Perl received STOP signal (on port $port): stopping wget\n";
781            $loop = 0;                          # out of outer while loop
782            $self->{'forced_quit'} = 1;         # subclasses need to know we're quitting
783           
784            # Sometimes the wget process takes some time to start up. If the STOP signal
785            # was sent, don't try to terminate the process until we know it is running.
786            # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
787            # for it to start up, checking for whether it is running in order to kill it.
788            for(my $seconds = 1; $seconds <= 5; $seconds++) {
789            if(kill(0, $childpid)) {
790                # If kill(0, $childpid) returns true, then the process is running
791                # and we need to kill it.
792                close($chld_in);
793                close($chld_out);
794                kill("TERM", $childpid); # prefix - to signal to kill process group
795               
796                $childpid = undef;
797               
798                # Stop monitoring the read_handle and close the serverSocket
799                # (the Java end will close the client socket that Java opened)
800                $read_set->remove($rh);     #$read_set->remove($serverSocket);
801                close($rh);             #close($serverSocket);
802                print $client "Perl terminated wget and is about to exit\n";
803                last;                           # out of inner for loop
804            }
805            else { # the process may just be starting up, wait
806                sleep(1);
807            }
808            }
809            last;                               # out of foreach loop
810        }
811        }
812    }
813    }
814
815    my $command_status = $?;
816    if ($command_status != 0) {
817    $error_text .= "Exit error: $command_status";
818    }
819
820    if ($changed_dir) {
821    chdir $current_dir;
822    }
823   
824    my $final_follow = pop(@follow_list); # might be undefined, but that's OK
825   
826    return ($full_text,$error_text,$final_follow);
827}
828
829
830# TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
831sub checkURL
832{
833    my ($self) = @_;
834    if ($self->{'url'} eq "")
835    {
836    &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
837    }
838}
839
840sub error
841{
842    my ($strFunctionName,$strError) = @_;
843    {
844    print "Error occoured in WgetDownload.pm\n".
845        "In Function:".$strFunctionName."\n".
846        "Error Message:".$strError."\n";
847    exit(-1);
848    }
849}
850
8511;
852
Note: See TracBrowser for help on using the browser.