Changeset 31929


Ignore:
Timestamp:
2017-09-01T20:50:34+12:00 (7 years ago)
Author:
ak19
Message:

The recent overhaul of perl running wget and allowing proper termination on timeout and allowing wget to be cancelled on blocking (accomplished using timeouts), didn't work on Windows, since IO::Select's can_read() method only works on Windows with Sockets not other types of file handles because of lack of kernel level Win support, unlike on Linux where can_read() works with all types of file handles. The solution was not using alarm() to emulate read with timeouts in place of IO::Select's can_read(timeout) . (See the debug_testing area of trac for a commit containing the alarm() that worked on Linux but again not Windows.) The solution was to turn the filehandles to the wget child process' iostreams into Sockets, and then use IO::Select's can_read as before. Works on the usually problematic Windows. Still to test on linux.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/downloaders/WgetDownload.pm

    r31920 r31929  
    4040use IO::Socket;
    4141
     42#use IO::Select qw( );
     43#use IPC::Open3 qw( open3 );
     44use Socket     qw( AF_UNIX SOCK_STREAM PF_UNSPEC ); # http://perlmeme.org/howtos/perlfunc/qw_function.html
     45
    4246
    4347sub BEGIN {
     
    271275}
    272276
    273 # Shouldn't use double quotes around wget path after all? See final comment at
    274 # http://www.perlmonks.org/?node_id=394709
    275 # http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63
     277# On Windows, we can only use IO::Select's can_read() with Sockets, not with the usual handles to a child process' iostreams
     278# However, we can use Sockets as the handles to connect to a child process' streams, which then allows us to use can_read()
     279# not just on Unix but Windows too. The 2 subroutines below to use Sockets to connect to a child process' iostreams come from
     280# http://www.perlmonks.org/?node_id=869942
     281# http://www.perlmonks.org/?node_id=811650
     282# It was suggested that IPC::Run will take care of all this or circumvent the need for all this,
     283# but IPC::Run has limitations on Windows, see http://search.cpan.org/~toddr/IPC-Run-0.96/lib/IPC/Run.pm#Win32_LIMITATIONS
     284
     285# Create a unidirectional pipe to an iostream of a process that is actually a socket
     286sub _pipe {
     287    socketpair($_[0], $_[1], AF_UNIX, SOCK_STREAM, PF_UNSPEC)
     288        or return undef;
     289    shutdown($_[0], 1);  # No more writing for reader. See http://www.perlmonks.org/?node=108244
     290    shutdown($_[1], 0);  # No more reading for writer
     291    return 1;
     292}
     293
     294sub _open3 {
     295    local (*TO_CHLD_R,     *TO_CHLD_W);
     296    local (*FR_CHLD_R,     *FR_CHLD_W);
     297    local (*FR_CHLD_ERR_R, *FR_CHLD_ERR_W);
     298
     299    if ($^O =~ /Win32/) {
     300        _pipe(*TO_CHLD_R,     *TO_CHLD_W    ) or die $^E;
     301        _pipe(*FR_CHLD_R,     *FR_CHLD_W    ) or die $^E;
     302        #_pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $^E;
     303    } else {
     304        pipe(*TO_CHLD_R,     *TO_CHLD_W    ) or die $!;
     305        pipe(*FR_CHLD_R,     *FR_CHLD_W    ) or die $!;
     306        #pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $!;
     307    }
     308
     309    #my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_ERR_W', @_);
     310    my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_W', @_); # use one handle, chldout, for both stdout and stderr of child proc,
     311                                                                      # see http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html
     312   
     313    return ( $pid, *TO_CHLD_W, *FR_CHLD_R, *FR_CHLD_ERR_R );
     314}
     315
     316# useWget and useWgetMonitored are very similar and, when updating, will probably need updating in tandem
     317# useWget(Monitored) runs the wget command using open3 and then sits in a loop doing two things per iteration:
     318# - processing a set buffer size of the wget (child) process' stdout/stderr streams, if anything has appeared there
     319# - followed by checking the socket connection to Java GLI, to see if GLI is trying to cancel the wget process we're running.
     320# Then the loop of these two things repeats.
    276321sub useWget
    277322{
     
    313358
    314359    my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
    315     # compose the command as an array rather than as a string, to preserve spaces in the filepath
    316     # because single/double quotes using open3 seem to launch a shell, see final comment at
    317     # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget
    318     # as 2 processes then got launched which don't have parent-child pid relationship.
     360   
     361    # Shouldn't use double quotes around wget path after all? See final comment at
     362    # http://www.perlmonks.org/?node_id=394709
     363    # http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63
     364    # Therefore, compose the command as an array rather than as a string, to preserve spaces in the filepath
     365    # because single/double quotes using open3 seem to launch a subshell, see also final comment at
     366    # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget, as 2 processes
     367    # got launched then which don't have parent-child pid relationship (so that terminating one doesn't terminate the other).
    319368    my @commandargs = split(' ', $cmdWget);
    320369    unshift(@commandargs, $wget_file_path);
     
    338387    # of the exception is described on p.568 of the Perl Cookbook
    339388    eval {
    340     #$childpid = open3($chld_in, $chld_out, $chld_out, $command);
    341     $childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);   
     389    #$childpid = open3($chld_in, $chld_out, $chld_out, $command); # There should be no double quotes in command, like around filepaths to wget, else need to use array version of command as below
     390    #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
     391   
     392    # instead of calling open3 directly, call wrapper _open3() subroutine that will use sockets to
     393    # connect to the child process' iostreams, because we can then use IO::Select's can_read() even on Windows
     394    ($childpid, $chld_in, $chld_out) = _open3(@commandargs);
    342395    };
    343396    if ($@) {
     
    435488        print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n";
    436489        print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n";
    437         #print STDERR "\tConsider cancelling this download?\n";
    438490    }
    439491
     
    451503            # https://coderwall.com/p/q-ovnw/killing-all-child-processes-in-a-shell-script
    452504            # https://stackoverflow.com/questions/392022/best-way-to-kill-all-child-processes
    453             print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n";
    454            
    455             #die "Perl terminated wget after timing out repeatedly and is about to exit\n";
     505            #print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n";         
     506            #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n";
    456507        }
    457508        }
     
    473524    }
    474525
    475     # If we've already terminated, we can get out of the loop
    476     #next if($loop == 0);
     526    # If we've already terminated, either naturally or on error, we can get out of the while loop
     527    next if($loop == 0);
     528   
     529    # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket
    477530   
    478531    # if we run this script from the command-line (as opposed to from GLI),
     
    584637
    585638    eval {     # see p.568 of Perl Cookbook
    586     $childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
     639    #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
     640    ($childpid, $chld_in, $chld_out) = _open3(@commandargs);
    587641    };
    588642    if ($@) {
     
    676730            kill("TERM", $childpid); # prefix - to signal to kill process group
    677731           
    678             #die "Perl terminated wget after timing out repeatedly and is about to exit\n";
     732            #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n";
    679733        }
    680734        }
     
    697751        }
    698752    }
    699 
     753   
     754    # If we've already terminated, either naturally or on error, we can get out of the while loop
     755    next if($loop == 0);
     756
     757    # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket
     758   
    700759    # if we run this script from the command-line (as opposed to from GLI),
    701760    # then we're not working with sockets and can therefore skip the next bits
Note: See TracChangeset for help on using the changeset viewer.