root/main/trunk/greenstone2/perllib/downloaders/WgetDownload.pm @ 31956

Revision 31956, 33.8 KB (checked in by ak19, 15 months ago)

Dr Bainbridge read up on why the Sockets to our wget child process' iostreams were being forcibly closed on Windows when we've finished successfully downloading, resulting unexpectedly in the $len bytes that we sysread() being undefined (usually denoting an error) rather than 0. It turns out that by using Sockets on Windows as filehandles to a child process' iostreams is merely implemented in this manner on success.

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# WgetDownload.pm -- Download base module that handles calling Wget
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package WgetDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use BaseDownload;
35use strict;
36use Cwd;
37use util;
38use IPC::Open3;
39use IO::Select;
40use IO::Socket;
41
42#use IO::Select qw( );
43#use IPC::Open3 qw( open3 );
44use Socket     qw( AF_UNIX SOCK_STREAM PF_UNSPEC ); # http://perlmeme.org/howtos/perlfunc/qw_function.html
45
46
47sub BEGIN {
48    @WgetDownload::ISA = ('BaseDownload');
49}
50
51my $arguments =
52     [ { 'name' => "proxy_on",
53    'desc' => "{WgetDownload.proxy_on}",
54    'type' => "flag",
55    'reqd' => "no",
56    'hiddengli' => "yes"},
57      { 'name' => "http_proxy_host",
58    'desc' => "{WgetDownload.http_proxy_host}",
59    'type' => "string",
60    'reqd' => "no",
61    'hiddengli' => "yes"},
62      { 'name' => "http_proxy_port",
63    'desc' => "{WgetDownload.http_proxy_port}",
64    'type' => "string",
65    'reqd' => "no",
66    'hiddengli' => "yes"},
67      { 'name' => "https_proxy_host", 
68    'desc' => "{WgetDownload.https_proxy_host}",         
69    'type' => "string",
70    'reqd' => "no",
71    'hiddengli' => "yes"},
72      { 'name' => "https_proxy_port",
73    'desc' => "{WgetDownload.https_proxy_port}",         
74    'type' => "string",
75    'reqd' => "no",
76    'hiddengli' => "yes"},
77      { 'name' => "ftp_proxy_host", 
78    'desc' => "{WgetDownload.ftp_proxy_host}",         
79    'type' => "string",
80    'reqd' => "no",
81    'hiddengli' => "yes"},
82      { 'name' => "ftp_proxy_port",
83    'desc' => "{WgetDownload.ftp_proxy_port}",         
84    'type' => "string",
85    'reqd' => "no",
86    'hiddengli' => "yes"},
87      { 'name' => "user_name", 
88    'desc' => "{WgetDownload.user_name}",
89    'type' => "string",
90    'reqd' => "no",
91    'hiddengli' => "yes"},
92      { 'name' => "user_password",
93    'desc' => "{WgetDownload.user_password}",
94    'type' => "string",
95    'reqd' => "no",
96    'hiddengli' => "yes"},
97      { 'name' => "no_check_certificate",
98    'desc' => "{WgetDownload.no_check_certificate}",
99    'type' => "flag",
100    'reqd' => "no",
101    'hiddengli' => "yes"}
102     ];
103
104my $options = { 'name'     => "WgetDownload",
105        'desc'     => "{WgetDownload.desc}",
106        'abstract' => "yes",
107        'inherits' => "yes",
108        'args'     => $arguments };
109
110
111# Declaring file global variables related to the wget child process so that
112# the termination signal handler for SIGTERM can close the streams and tidy
113# up before ending the child process.
114my $childpid;
115my ($chld_out, $chld_in);
116my ($serverSocket, $read_set);
117
118my $TIMEOUT = 1; # seconds
119my $NUM_TRIES = 10;
120
121# The port this script's server socket will be listening on, to handle
122# incoming signals from GLI to terminate wget. This is also file global,
123# since OAIDownload.pm will make several calls on wget using the same
124# instance of this script and we want to reuse whatever port GLI gave us.
125my $port;
126
127# When this script is called from the command line, this handler will be called
128# if this process is killed or abruptly ends due to receiving one of the
129# terminating signals that this handler is registered to deal with.
130sub abrupt_end_handler {
131    my $termination_signal = shift (@_);
132
133    if(defined $childpid) {
134    close($chld_out);
135    close($chld_in);
136   
137    print STDOUT "Received termination signal: $termination_signal\n";
138
139    # Send TERM signal to child process to terminate it. Sending the INT signal doesn't work
140    # See http://perldoc.perl.org/perlipc.html#Signals
141    # Warning on using kill at http://perldoc.perl.org/perlfork.html
142    kill("TERM", $childpid); # prefix - to signal to kill process group
143
144    # If the SIGTERM sent on Linux calls this handler, we want to make
145    # sure any socket connection is closed.
146    # Otherwise sockets are only used when this script is run from GLI
147    # in which case the handlers don't really get called.
148    if(defined $serverSocket) {
149        $read_set->remove($serverSocket) if defined $read_set;
150        close($serverSocket);
151    }
152    }
153
154    exit(0);
155}
156
157# Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
158# the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
159# signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
160# Note that SIGKILL can't be handled as the handler won't get called for it. More information:
161# http://affy.blogspot.com/p5be/ch13.htm
162# http://perldoc.perl.org/perlipc.html#Signals
163$SIG{'INT'} = \&abrupt_end_handler;
164$SIG{'TERM'} = \&abrupt_end_handler;
165
166sub new {
167    my ($class) = shift (@_);
168    my ($getlist,$inputargs,$hashArgOptLists) = @_;
169    push(@$getlist, $class);
170
171    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
172    push(@{$hashArgOptLists->{"OptList"}},$options);
173
174    my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
175
176    # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lions (android too?)
177    &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set
178
179    return bless $self, $class;
180}
181
182sub checkWgetSetup
183{
184    my ($self,$blnGliCall) = @_;
185    #TODO: proxy detection??
186   
187    if((!$blnGliCall) && $self->{'proxy_on'})
188    {
189    &checkProxySetup($self);
190    }
191    &checkURL($self);
192}
193
194# Not using this. On Windows, we used to pass proxying settings as flags to wget. But, as that can be
195# seen with Task Manager, we now have the proxy settings set in the environment and are no longer passing it
196sub addProxySettingsAsWgetFlags
197{
198    my ($self) = @_;
199    my $strOptions = "";
200
201    if($self->{'http_proxy_host'} && $self->{'http_proxy_port'}) {
202    $strOptions .= " -e http_proxy=$self->{'http_proxy_host'}:$self->{'http_proxy_port'} ";
203    }
204    if($self->{'https_proxy_host'} && $self->{'https_proxy_port'}) {
205    $strOptions .= " -e https_proxy=$self->{'https_proxy_host'}:$self->{'https_proxy_port'} ";
206    }
207    if($self->{'ftp_proxy_host'} && $self->{'ftp_proxy_port'}) {
208    $strOptions .= " -e ftp_proxy=$self->{'ftp_proxy_host'}:$self->{'ftp_proxy_port'} ";
209    }
210   
211    # For wget, there is only one set pair of proxy-user and proxy-passwd, so wget seems to assume
212    # that all 3 proxy protocols (http|https|ftp) will use the same username and pwd combination?
213    # Note that this only matters when passing the proxying details as flags to wget, not when
214    # the proxies are setup as environment variables.
215    if ($self->{'user_name'} && $self->{'user_password'})
216    {
217    $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";     
218    # how is "--proxy-passwd" instead of "--proxy-password" even working????       
219    # see https://www.gnu.org/software/wget/manual/html_node/Proxies.html
220    # and https://www.gnu.org/software/wget/manual/wget.html
221    # Not touching this, in case the manual is simply wrong. Since our code works in
222    # practice (when we were still using wget proxy username/pwd flags for windows).
223    }   
224   
225    return $strOptions;
226}
227
228sub getWgetOptions
229{
230    my ($self) = @_;
231    my $strOptions = "";
232   
233    # If proxy settings are set up in the environment, wget is ready to use them. More secure.
234    # But if proxy settings are not set up in the environment, pass them as flags to wget
235    # This is less secure, as pwd etc visible in task manager, but it was the original way in
236    # which wget was run on windows.
237    # Truth in Perl: https://home.ubalt.edu/abento/452/perl/perltruth.html
238    # http://www.perlmonks.org/?node=what%20is%20true%20and%20false%20in%20Perl%3F
239   
240    if ($self->{'proxy_on'}) {
241    if(!$ENV{'http_proxy'} && !$ENV{'https_proxy'} && !$ENV{'ftp_proxy'}) {
242        $strOptions .= $self->addProxySettingsAsWgetFlags();
243    } # else wget will use proxy settings in environment, assume enough settings have been provided
244    # either way, we're using the proxy
245    $strOptions .= " --proxy ";
246    }
247   
248    if($self->{'no_check_certificate'}) { # URL may be http that gets redirected to https, so if no_check_certificate is on, turn it on even if URL is http
249   
250    $strOptions .= " --no-check-certificate ";
251    }
252   
253    return $strOptions;
254}
255
256# Checking for proxy setup: proxy server, proxy port, proxy username and password.
257sub checkProxySetup
258{
259    my ($self) = @_;
260    ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
261    # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
262    # Test if the connection is successful. If the connection wasn't successful then ask user to supply username and password.
263
264}
265
266# Returns true if the wget status needs to be monitored through sockets
267# (if a socket is used to communicate with the Java program on when to
268# terminate wget). True if we are running gli, or if the particular type
269# of WgetDownload is *not* OAIDownload (in that case, the original way of
270# terminating the perl script from Java would terminate wget as well).
271sub dealingWithSockets() {
272    my ($self) = @_;
273    return (defined $self->{'gli'} && $self->{'gli'} && !defined $port && ref($self) ne "OAIDownload");
274                       # use ref($self) to find the classname of an object
275}
276
277# On Windows, we can only use IO::Select's can_read() with Sockets, not with the usual handles to a child process' iostreams
278# However, we can use Sockets as the handles to connect to a child process' streams, which then allows us to use can_read()
279# not just on Unix but Windows too. The 2 subroutines below to use Sockets to connect to a child process' iostreams come from
280# http://www.perlmonks.org/?node_id=869942
281# http://www.perlmonks.org/?node_id=811650
282# It was suggested that IPC::Run will take care of all this or circumvent the need for all this,
283# but IPC::Run has limitations on Windows, see http://search.cpan.org/~toddr/IPC-Run-0.96/lib/IPC/Run.pm#Win32_LIMITATIONS
284
285# Create a unidirectional pipe to an iostream of a process that is actually a socket
286sub _pipe {
287    socketpair($_[0], $_[1], AF_UNIX, SOCK_STREAM, PF_UNSPEC)
288        or return undef;
289    shutdown($_[0], 1);  # No more writing for reader. See http://www.perlmonks.org/?node=108244
290    shutdown($_[1], 0);  # No more reading for writer
291    return 1;
292}
293
294sub _open3 {
295    local (*TO_CHLD_R,     *TO_CHLD_W);
296    local (*FR_CHLD_R,     *FR_CHLD_W);
297    #local (*FR_CHLD_ERR_R, *FR_CHLD_ERR_W);
298
299    if ($^O =~ /Win32/) {
300        _pipe(*TO_CHLD_R,     *TO_CHLD_W    ) or die $^E;
301        _pipe(*FR_CHLD_R,     *FR_CHLD_W    ) or die $^E;
302        #_pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $^E;
303    } else {
304        pipe(*TO_CHLD_R,     *TO_CHLD_W    ) or die $!;
305        pipe(*FR_CHLD_R,     *FR_CHLD_W    ) or die $!;
306        #pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $!;
307    }
308
309    #my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_ERR_W', @_);
310    my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_W', @_); # use one handle, chldout, for both stdout and stderr of child proc,
311                                                                      # see http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html
312   
313    #return ( $pid, *TO_CHLD_W, *FR_CHLD_R, *FR_CHLD_ERR_R );
314    return ( $pid, *TO_CHLD_W, *FR_CHLD_R);
315}
316
317# useWget and useWgetMonitored are very similar and, when updating, will probably need updating in tandem
318# useWget(Monitored) runs the wget command using open3 and then sits in a loop doing two things per iteration:
319# - processing a set buffer size of the wget (child) process' stdout/stderr streams, if anything has appeared there
320# - followed by checking the socket connection to Java GLI, to see if GLI is trying to cancel the wget process we're running.
321# Then the loop of these two things repeats.
322sub useWget
323{
324    #local $| = 1; # autoflush stdout buffer
325    #print STDOUT "*** Start of subroutine useWget in $0\n";
326
327    my ($self, $cmdWget,$blnShow, $working_dir) = @_;
328
329    my ($strReadIn,$strLine,$command);
330    $strReadIn = "" unless defined $strReadIn;
331
332    my $current_dir = cwd();
333    my $changed_dir = 0;
334    if (defined $working_dir && -e $working_dir) {
335    chdir "$working_dir";
336    $changed_dir = 1;
337    }
338
339    # When we are running this script through GLI, the SIGTERM signal handler
340    # won't get called on Windows when wget is to be prematurely terminated.
341    # Instead, when wget has to be terminated in the middle of execution, GLI will
342    # connect to a serverSocket here to communicate when it's time to stop wget.
343    if($self->dealingWithSockets()) {
344
345    $port = <STDIN>; # gets a port on localhost that's not yet in use
346    chomp($port);
347   
348    $serverSocket = IO::Socket::INET->new( Proto     => 'tcp',
349                           LocalPort => $port,
350                           Listen    => 1,
351                           Reuse     => 1);
352   
353    die "can't setup server" unless $serverSocket;
354    #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
355
356    $read_set = new IO::Select();         # create handle set for reading
357    $read_set->add($serverSocket);        # add the main socket to the set
358    }
359
360    my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
361   
362    # Shouldn't use double quotes around wget path after all? See final comment at
363    # http://www.perlmonks.org/?node_id=394709
364    # http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63
365    # Therefore, compose the command as an array rather than as a string, to preserve spaces in the filepath
366    # because single/double quotes using open3 seem to launch a subshell, see also final comment at
367    # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget, as 2 processes
368    # got launched then which don't have parent-child pid relationship (so that terminating one doesn't terminate the other).
369    my @commandargs = split(' ', $cmdWget);
370    unshift(@commandargs, $wget_file_path);
371    $command = "$wget_file_path $cmdWget";
372#    print STDOUT "Command is: $command\n"; # displayed in GLI output
373#    print STDERR "Command is: $command\n"; # goes into ServerInfoDialog
374   
375    # Wget's output needs to be monitored to find out when it has naturally terminated.
376    # Wget's output is sent to its STDERR so we can't use open2 without doing 2>&1.
377    # On linux, 2>&1 launches a subshell which then launches wget, meaning that killing
378    # the childpid does not kill wget on Linux but the subshell that launched it instead.
379    # Therefore, we use open3. Though the child process wget sends output only to its stdout [is this meant to be "stderr"?],
380    # using open3 says chld_err is undefined and the output of wget only comes in chld_out(!)
381    # However that may be, it works with open3. But to avoid the confusion of managing and
382    # closing an extra unused handle, a single handle is used instead for both the child's
383    # stderr and stdout.
384    # See http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html
385    # for why this is the right thing to do.
386
387    # Both open2 and open3 don't return on failure, but raise an exception. The handling
388    # of the exception is described on p.568 of the Perl Cookbook
389    eval {
390    #$childpid = open3($chld_in, $chld_out, $chld_out, $command); # There should be no double quotes in command, like around filepaths to wget, else need to use array version of command as below
391    #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
392   
393    # instead of calling open3 directly, call wrapper _open3() subroutine that will use sockets to
394    # connect to the child process' iostreams, because we can then use IO::Select's can_read() even on Windows
395    ($childpid, $chld_in, $chld_out) = _open3(@commandargs);
396    };
397    if ($@) {
398    if($@ =~ m/^open3/) {
399        die "open3 failed in $0: $!\n$@\n";     
400    }
401    die "Tried to launch open3 in $0, got unexpected exception: $@";
402    }
403
404    # Switching to use IO::Select, which allows timeouts, instead of doing the potentially blocking
405    #     if defined(my $strLine=<$chld_out>)
406    # Google: perl open3 read timeout
407    # Google: perl open3 select() example
408    # https://stackoverflow.com/questions/10029406/why-does-ipcopen3-get-deadlocked
409    # https://codereview.stackexchange.com/questions/84496/the-right-way-to-use-ipcopen3-in-perl
410    # https://gist.github.com/shalk/6988937
411    # https://stackoverflow.com/questions/18373500/how-to-check-if-command-executed-with-ipcopen3-is-hung
412    # http://perldoc.perl.org/IO/Select.html
413    # http://perldoc.perl.org/IPC/Open3.html - explains the need for select()/IO::Select with open3
414    # http://www.perlmonks.org/?node_id=951554
415    # http://search.cpan.org/~dmuey/IPC-Open3-Utils-0.91/lib/IPC/Open3/Utils.pm
416    # https://stackoverflow.com/questions/3000907/wget-not-behaving-via-ipcopen3-vs-bash?rq=1
417
418    # create the select object and add our streamhandle(s)
419    my $sel = new IO::Select;
420    $sel->add($chld_out);
421
422    my $num_consecutive_timedouts = 0;
423    my $error = 0;
424    my $loop = 1;
425   
426    while($loop)
427    {
428    # assume we're going to timeout trying to read from child process
429    $num_consecutive_timedouts++;
430
431   
432    # block until data is available on the registered filehandles or until the timeout specified   
433    if(my @readyhandles = $sel->can_read($TIMEOUT)) {
434
435        $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
436        # since we're in this if statement
437       
438        # now there's a list of registered filehandles we can read from to loop through reading from.
439        # though we've registered only one, chld_out
440        foreach my $fh (@readyhandles) {
441        my $strLine;
442        #sleep 3;
443       
444        # read up to 4096 bytes from this filehandle fh.
445        # if there is less than 4096 bytes, we'll only get
446        # those available bytes and won't block.  If there
447        # is more than 4096 bytes, we'll only read 4096 and
448        # wait for the next iteration through the loop to
449        # read the rest.
450        my $len = sysread($fh, $strLine, 4096);
451       
452        if($len) { # read something
453            if($blnShow) {
454            print STDERR "$strLine\n";
455            }
456            $strReadIn .= $strLine;
457        }
458        else { # error or EOF: (!defined $len || $len == 0)         
459           
460            if(!defined $len) { # could be an error reading
461              # On Windows, the socket ends up forcibly closed on the "other" side. It's just the way it's implemented
462              # on Windows when using sockets to our child process' iostreams. So $len not being defined is not an error in that case. Refer to
463              # https://stackoverflow.com/questions/16675950/perl-select-returning-undef-on-sysread-when-using-windows-ipcopen3-and-ios/16676271
464              if(!$!{ECONNRESET}) { # anything other ECONNRESET error means it's a real case of undefined $len being an error
465                print STDERR "WgetDownload: Error reading from child stream: $!\n";
466                # SHOULD THIS 'die "errmsg";' instead? - no, sockets may need closing
467                $error = 1;
468              } else { # $! contains message "An existing connection was forcibly closed by remote host" where "remote" is a reference to the sockets to our wget child process,
469                    # NOT to the remote web server we're downloading from. In such a case, the error code is ECONNRESET, and it's not an error, despite $len being undefined.
470                print STDERR "WgetDownload: wget finished\n";
471              }
472            }
473            elsif ($len == 0) { # EOF
474            # Finished reading from this filehandle $fh because we read 0 bytes.
475            # wget finished, terminate naturally
476            print STDERR "WgetDownload: wget finished\n";
477            #print STDOUT "\nPerl: open3 command, input streams closed. Wget terminated naturally.\n";
478            }
479
480            $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
481           
482            # last; # if we have more than one filehandle registered with IO::Select
483           
484            $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error         
485           
486        } # end else error or EOF
487       
488        } # end foreach on readyhandles
489    } # end if on can_read
490   
491    if($num_consecutive_timedouts >= $NUM_TRIES) {
492        $error = 1;
493        $loop = 0;                          # to break out of outer while loop
494
495        $num_consecutive_timedouts = 0;
496
497        print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n";
498        print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n";
499    }
500
501    if($loop == 0) { # error or EOF, either way, clean up
502        if($error) {
503        $self->{'forced_quit'} = 1;         # subclasses need to know we're quitting
504       
505        if(kill(0, $childpid)) {
506            # If kill(0, $childpid) returns true, then the process is running
507            # and we need to kill it.
508            close($chld_in);
509            close($chld_out);
510            kill('TERM', $childpid); # kill the process group by prefixing - to signal
511
512            # https://coderwall.com/p/q-ovnw/killing-all-child-processes-in-a-shell-script
513            # https://stackoverflow.com/questions/392022/best-way-to-kill-all-child-processes
514            #print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n";         
515            #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n";
516        }
517        }
518        else { # wget finished (no errors), terminate naturally
519        #print STDOUT "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n";
520        close($chld_in);
521        close($chld_out);
522        waitpid $childpid, 0;       
523        }
524
525        # error or not
526        $childpid = undef;
527        # Stop monitoring the read_handle and close the serverSocket
528        # (the Java end will close the client socket that Java opened)
529        if(defined $port) {
530        $read_set->remove($serverSocket);
531        close($serverSocket);
532        }
533    }
534
535    # If we've already terminated, either naturally or on error, we can get out of the while loop
536    next if($loop == 0);
537   
538    # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket
539   
540    # if we run this script from the command-line (as opposed to from GLI),
541    # then we're not working with sockets and can therefore skip the next bits
542    next unless(defined $port);
543   
544    # http://www.perlfect.com/articles/select.shtml
545    # "multiplex between several filehandles within a single thread of control,
546    # thus creating the effect of parallelism in the handling of I/O."
547    my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
548
549    # take all readable handles in turn
550    foreach my $rh (@rh_set) {
551        if($rh == $serverSocket) {
552        my $client = $rh->accept();
553        #$client->autoflush(1); # autoflush output buffer - don't put this back in: output split irregularly over lines
554        print $client "Talked to ServerSocket (port $port). Connection accepted\n";
555       
556        # Read from the client (getting rid of the trailing newline)
557        # Has the client sent the <<STOP>> signal?
558        my $signal = <$client>;
559        chomp($signal);
560        if($signal eq "<<STOP>>") {
561            print $client "Perl received STOP signal (on port $port): stopping wget\n";
562            $loop = 0;                          # out of outer while loop
563            $self->{'forced_quit'} = 1;         # subclasses need to know we're quitting
564           
565            # Sometimes the wget process takes some time to start up. If the STOP signal
566            # was sent, don't try to terminate the process until we know it is running.
567            # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
568            # for it to start up, checking for whether it is running in order to kill it.
569            for(my $seconds = 1; $seconds <= 5; $seconds++) {
570            if(kill(0, $childpid)) {
571                # If kill(0, $childpid) returns true, then the process is running
572                # and we need to kill it.
573                close($chld_in);
574                close($chld_out);
575                kill("TERM", $childpid); # prefix - to signal to kill process group
576               
577                $childpid = undef;
578               
579                # Stop monitoring the read_handle and close the serverSocket
580                # (the Java end will close the client socket that Java opened)
581                $read_set->remove($rh);     #$read_set->remove($serverSocket);
582                close($rh);             #close($serverSocket);
583                print $client "Perl terminated wget and is about to exit\n";
584                last;                           # out of inner for loop
585            }
586            else { # the process may just be starting up, wait
587                sleep(1);
588            }
589            }
590            last;                               # out of foreach loop
591        }
592        }
593    }
594    }
595
596    if ($changed_dir) {
597    chdir $current_dir;
598    }
599   
600    return $strReadIn;
601}
602
603
604sub useWgetMonitored
605{
606    #local $| = 1; # autoflush stdout buffer
607    #print STDOUT "*** Start of subroutine useWgetMonitored in $0\n";
608
609    my ($self, $cmdWget,$blnShow, $working_dir) = @_;
610
611
612    my $current_dir = cwd();
613    my $changed_dir = 0;
614    if (defined $working_dir && -e $working_dir) {
615    chdir "$working_dir";
616    $changed_dir = 1;
617    }
618
619    # When we are running this script through GLI, the SIGTERM signal handler
620    # won't get called on Windows when wget is to be prematurely terminated.
621    # Instead, when wget has to be terminated in the middle of execution, GLI will
622    # connect to a serverSocket here to communicate when it's time to stop wget.
623    if($self->dealingWithSockets()) {
624
625    $port = <STDIN>; # gets a port on localhost that's not yet in use
626    chomp($port);
627   
628    $serverSocket = IO::Socket::INET->new( Proto     => 'tcp',
629                           LocalPort => $port,
630                           Listen    => 1,
631                           Reuse     => 1);
632   
633    die "can't setup server" unless $serverSocket;
634    #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
635
636    $read_set = new IO::Select();         # create handle set for reading
637    $read_set->add($serverSocket);        # add the main socket to the set
638    }
639
640    my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
641    # compose the command as an array for open3, to preserve spaces in any filepath
642    my @commandargs = split(' ', $cmdWget);
643    unshift(@commandargs, $wget_file_path);
644    my $command = "$wget_file_path $cmdWget";
645    #print STDOUT "Command is: $command\n";
646
647    eval {     # see p.568 of Perl Cookbook
648    #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
649    ($childpid, $chld_in, $chld_out) = _open3(@commandargs);
650    };
651    if ($@) {
652    if($@ =~ m/^open3/) {
653        die "open3 failed in $0: $!\n$@\n";     
654    }
655    die "Tried to launch open3 in $0, got unexpected exception: $@";
656    }
657
658    my $full_text = "";
659    my $error_text = "";
660    my @follow_list = ();
661    my $line;
662
663    # create the select object and add our streamhandle(s)
664    my $sel = new IO::Select;
665    $sel->add($chld_out);
666   
667    my $num_consecutive_timedouts = 0;
668    my $error = 0;
669    my $loop = 1;
670    while($loop)
671    {
672    # assume we're going to timeout trying to read from child process
673    $num_consecutive_timedouts++;
674
675    # block until data is available on the registered filehandles or until the timeout specified   
676    if(my @readyhandles = $sel->can_read($TIMEOUT)) {
677        $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
678        # since we're in this if statement
679       
680        foreach my $fh (@readyhandles) {
681        my $len = sysread($fh, $line, 4096); # read up to 4k from current ready filehandle
682        if($len) { # read something
683       
684           
685            if((defined $blnShow) && $blnShow)
686            {
687            print STDERR "$line";
688            }
689           
690            if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
691            my $follow_url = $1;
692            push(@follow_list,$follow_url);
693            }
694           
695            if ($line =~ m/ERROR\s+\d+/) {
696            $error_text .= $line;
697            }
698           
699            $full_text .= $line;
700        } else { # error or EOF
701            if(!defined $len) { # error reading
702            #print STDERR "WgetDownload: Error reading from child stream: $!\n";
703            $error = 1;
704            }
705             if(!defined $len) {
706              if(!$!{ECONNRESET}) { # anything other ECONNRESET error means it's a real case of undefined $len being an error
707                #print STDERR "WgetDownload: Error reading from child stream: $!\n";               
708                $error = 1;
709              } else { # the error code is ECONNRESET, and it's not an error, despite $len being undefined.
710                       # Happens on Windows when using sockets to a child process' iostreams
711                #print STDERR "WgetDownload: wget finished\n";
712              }
713            }
714            elsif ($len == 0) { # EOF, finished with this filehandle because 0 bytes read
715            #print STDERR "WgetDownload: wget finished\n"; # wget terminated naturally
716            }
717
718            $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
719           
720            # last; # if we have more than one filehandle registered with IO::Select
721           
722            $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error         
723        } # end else error or EOF
724       
725        } # end foreach on readyhandles
726    }  # end if on can_read
727
728    if($num_consecutive_timedouts >= $NUM_TRIES) {
729        $error = 1;
730        $loop = 0;                          # to break out of outer while loop
731
732        $num_consecutive_timedouts = 0;
733
734        #print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n";
735        #print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n";
736    }
737
738    if($loop == 0) { # error or EOF, either way, clean up
739       
740        if($error) {
741        $self->{'forced_quit'} = 1;         # subclasses need to know we're quitting
742       
743        if(kill(0, $childpid)) {
744            # If kill(0, $childpid) returns true, then the process is running
745            # and we need to kill it.
746            close($chld_in);
747            close($chld_out);
748            kill("TERM", $childpid); # prefix - to signal to kill process group
749           
750            #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n";
751        }
752        }
753        else { # wget finished, terminate naturally
754        close($chld_in);
755        close($chld_out);
756        # Program terminates only when the following line is included
757        # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
758        # it prevents the child from turning into a "zombie process".
759        # While the wget process terminates without it, this perl script does not:
760        # the DOS prompt is not returned without it.
761        waitpid $childpid, 0;
762        }
763       
764        # error or not:
765        $childpid = undef;     
766        if(defined $port) {
767        $read_set->remove($serverSocket);
768        close($serverSocket);
769        }
770    }
771   
772    # If we've already terminated, either naturally or on error, we can get out of the while loop
773    next if($loop == 0);
774
775    # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket
776   
777    # if we run this script from the command-line (as opposed to from GLI),
778    # then we're not working with sockets and can therefore skip the next bits
779    next unless(defined $port);
780
781    # http://www.perlfect.com/articles/select.shtml
782    # "multiplex between several filehandles within a single thread of control,
783    # thus creating the effect of parallelism in the handling of I/O."
784    my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
785
786    # take all readable handles in turn
787    foreach my $rh (@rh_set) {
788        if($rh == $serverSocket) {
789        my $client = $rh->accept();
790        #$client->autoflush(1); # autoflush output buffer - don't put this back in: splits output irregularly over multilines
791        print $client "Talked to ServerSocket (port $port). Connection accepted\n";
792       
793        # Read from the client (getting rid of trailing newline)
794        # Has the client sent the <<STOP>> signal?
795        my $signal = <$client>;
796        chomp($signal);
797        if($signal eq "<<STOP>>") {
798            print $client "Perl received STOP signal (on port $port): stopping wget\n";
799            $loop = 0;                          # out of outer while loop
800            $self->{'forced_quit'} = 1;         # subclasses need to know we're quitting
801           
802            # Sometimes the wget process takes some time to start up. If the STOP signal
803            # was sent, don't try to terminate the process until we know it is running.
804            # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
805            # for it to start up, checking for whether it is running in order to kill it.
806            for(my $seconds = 1; $seconds <= 5; $seconds++) {
807            if(kill(0, $childpid)) {
808                # If kill(0, $childpid) returns true, then the process is running
809                # and we need to kill it.
810                close($chld_in);
811                close($chld_out);
812                kill("TERM", $childpid); # prefix - to signal to kill process group
813               
814                $childpid = undef;
815               
816                # Stop monitoring the read_handle and close the serverSocket
817                # (the Java end will close the client socket that Java opened)
818                $read_set->remove($rh);     #$read_set->remove($serverSocket);
819                close($rh);             #close($serverSocket);
820                print $client "Perl terminated wget and is about to exit\n";
821                last;                           # out of inner for loop
822            }
823            else { # the process may just be starting up, wait
824                sleep(1);
825            }
826            }
827            last;                               # out of foreach loop
828        }
829        }
830    }
831    }
832
833    my $command_status = $?;
834    if ($command_status != 0) {
835    $error_text .= "Exit error: $command_status";
836    }
837
838    if ($changed_dir) {
839    chdir $current_dir;
840    }
841   
842    my $final_follow = pop(@follow_list); # might be undefined, but that's OK
843   
844    return ($full_text,$error_text,$final_follow);
845}
846
847
848# TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
849sub checkURL
850{
851    my ($self) = @_;
852    if ($self->{'url'} eq "")
853    {
854    &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
855    }
856}
857
858sub error
859{
860    my ($strFunctionName,$strError) = @_;
861    {
862    print "Error occoured in WgetDownload.pm\n".
863        "In Function:".$strFunctionName."\n".
864        "Error Message:".$strError."\n";
865    exit(-1);
866    }
867}
868
8691;
870
Note: See TracBrowser for help on using the browser.