Changeset 31929

Show
Ignore:
Timestamp:
01.09.2017 20:50:34 (3 weeks ago)
Author:
ak19
Message:

The recent overhaul of perl running wget and allowing proper termination on timeout and allowing wget to be cancelled on blocking (accomplished using timeouts), didn't work on Windows, since IO::Select's can_read() method only works on Windows with Sockets not other types of file handles because of lack of kernel level Win support, unlike on Linux where can_read() works with all types of file handles. The solution was not using alarm() to emulate read with timeouts in place of IO::Select's can_read(timeout) . (See the debug_testing area of trac for a commit containing the alarm() that worked on Linux but again not Windows.) The solution was to turn the filehandles to the wget child process' iostreams into Sockets, and then use IO::Select's can_read as before. Works on the usually problematic Windows. Still to test on linux.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/downloaders/WgetDownload.pm

    r31920 r31929  
    4040use IO::Socket; 
    4141 
     42#use IO::Select qw( ); 
     43#use IPC::Open3 qw( open3 ); 
     44use Socket     qw( AF_UNIX SOCK_STREAM PF_UNSPEC ); # http://perlmeme.org/howtos/perlfunc/qw_function.html 
     45 
    4246 
    4347sub BEGIN { 
     
    271275} 
    272276 
    273 # Shouldn't use double quotes around wget path after all? See final comment at 
    274 # http://www.perlmonks.org/?node_id=394709 
    275 # http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63 
     277# On Windows, we can only use IO::Select's can_read() with Sockets, not with the usual handles to a child process' iostreams 
     278# However, we can use Sockets as the handles to connect to a child process' streams, which then allows us to use can_read() 
     279# not just on Unix but Windows too. The 2 subroutines below to use Sockets to connect to a child process' iostreams come from 
     280# http://www.perlmonks.org/?node_id=869942 
     281# http://www.perlmonks.org/?node_id=811650 
     282# It was suggested that IPC::Run will take care of all this or circumvent the need for all this, 
     283# but IPC::Run has limitations on Windows, see http://search.cpan.org/~toddr/IPC-Run-0.96/lib/IPC/Run.pm#Win32_LIMITATIONS 
     284 
     285# Create a unidirectional pipe to an iostream of a process that is actually a socket 
     286sub _pipe { 
     287    socketpair($_[0], $_[1], AF_UNIX, SOCK_STREAM, PF_UNSPEC) 
     288        or return undef; 
     289    shutdown($_[0], 1);  # No more writing for reader. See http://www.perlmonks.org/?node=108244 
     290    shutdown($_[1], 0);  # No more reading for writer 
     291    return 1; 
     292} 
     293 
     294sub _open3 { 
     295    local (*TO_CHLD_R,     *TO_CHLD_W); 
     296    local (*FR_CHLD_R,     *FR_CHLD_W); 
     297    local (*FR_CHLD_ERR_R, *FR_CHLD_ERR_W); 
     298 
     299    if ($^O =~ /Win32/) { 
     300        _pipe(*TO_CHLD_R,     *TO_CHLD_W    ) or die $^E; 
     301        _pipe(*FR_CHLD_R,     *FR_CHLD_W    ) or die $^E; 
     302        #_pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $^E; 
     303    } else { 
     304        pipe(*TO_CHLD_R,     *TO_CHLD_W    ) or die $!; 
     305        pipe(*FR_CHLD_R,     *FR_CHLD_W    ) or die $!; 
     306        #pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $!; 
     307    } 
     308 
     309    #my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_ERR_W', @_); 
     310    my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_W', @_); # use one handle, chldout, for both stdout and stderr of child proc, 
     311                                                                      # see http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html 
     312     
     313    return ( $pid, *TO_CHLD_W, *FR_CHLD_R, *FR_CHLD_ERR_R ); 
     314} 
     315 
     316# useWget and useWgetMonitored are very similar and, when updating, will probably need updating in tandem 
     317# useWget(Monitored) runs the wget command using open3 and then sits in a loop doing two things per iteration: 
     318# - processing a set buffer size of the wget (child) process' stdout/stderr streams, if anything has appeared there 
     319# - followed by checking the socket connection to Java GLI, to see if GLI is trying to cancel the wget process we're running. 
     320# Then the loop of these two things repeats. 
    276321sub useWget 
    277322{ 
     
    313358 
    314359    my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget"); 
    315     # compose the command as an array rather than as a string, to preserve spaces in the filepath 
    316     # because single/double quotes using open3 seem to launch a shell, see final comment at  
    317     # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget 
    318     # as 2 processes then got launched which don't have parent-child pid relationship. 
     360     
     361    # Shouldn't use double quotes around wget path after all? See final comment at 
     362    # http://www.perlmonks.org/?node_id=394709 
     363    # http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63 
     364    # Therefore, compose the command as an array rather than as a string, to preserve spaces in the filepath 
     365    # because single/double quotes using open3 seem to launch a subshell, see also final comment at  
     366    # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget, as 2 processes 
     367    # got launched then which don't have parent-child pid relationship (so that terminating one doesn't terminate the other). 
    319368    my @commandargs = split(' ', $cmdWget); 
    320369    unshift(@commandargs, $wget_file_path); 
     
    338387    # of the exception is described on p.568 of the Perl Cookbook 
    339388    eval { 
    340     #$childpid = open3($chld_in, $chld_out, $chld_out, $command); 
    341     $childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);     
     389    #$childpid = open3($chld_in, $chld_out, $chld_out, $command); # There should be no double quotes in command, like around filepaths to wget, else need to use array version of command as below 
     390    #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs); 
     391     
     392    # instead of calling open3 directly, call wrapper _open3() subroutine that will use sockets to 
     393    # connect to the child process' iostreams, because we can then use IO::Select's can_read() even on Windows 
     394    ($childpid, $chld_in, $chld_out) = _open3(@commandargs); 
    342395    }; 
    343396    if ($@) { 
     
    435488        print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n"; 
    436489        print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n"; 
    437         #print STDERR "\tConsider cancelling this download?\n";  
    438490    } 
    439491 
     
    451503            # https://coderwall.com/p/q-ovnw/killing-all-child-processes-in-a-shell-script 
    452504            # https://stackoverflow.com/questions/392022/best-way-to-kill-all-child-processes 
    453             print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n"; 
    454              
    455             #die "Perl terminated wget after timing out repeatedly and is about to exit\n"; 
     505            #print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n";          
     506            #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n"; 
    456507        } 
    457508        } 
     
    473524    } 
    474525 
    475     # If we've already terminated, we can get out of the loop 
    476     #next if($loop == 0); 
     526    # If we've already terminated, either naturally or on error, we can get out of the while loop 
     527    next if($loop == 0); 
     528     
     529    # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket 
    477530     
    478531    # if we run this script from the command-line (as opposed to from GLI),  
     
    584637 
    585638    eval {     # see p.568 of Perl Cookbook 
    586     $childpid = open3($chld_in, $chld_out, $chld_out, @commandargs); 
     639    #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs); 
     640    ($childpid, $chld_in, $chld_out) = _open3(@commandargs); 
    587641    }; 
    588642    if ($@) { 
     
    676730            kill("TERM", $childpid); # prefix - to signal to kill process group 
    677731             
    678             #die "Perl terminated wget after timing out repeatedly and is about to exit\n"; 
     732            #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n"; 
    679733        } 
    680734        } 
     
    697751        } 
    698752    } 
    699  
     753     
     754    # If we've already terminated, either naturally or on error, we can get out of the while loop 
     755    next if($loop == 0); 
     756 
     757    # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket 
     758     
    700759    # if we run this script from the command-line (as opposed to from GLI),  
    701760    # then we're not working with sockets and can therefore skip the next bits