Changeset 31929 for main/trunk/greenstone2/perllib
- Timestamp:
- 2017-09-01T20:50:34+12:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/downloaders/WgetDownload.pm
r31920 r31929 40 40 use IO::Socket; 41 41 42 #use IO::Select qw( ); 43 #use IPC::Open3 qw( open3 ); 44 use Socket qw( AF_UNIX SOCK_STREAM PF_UNSPEC ); # http://perlmeme.org/howtos/perlfunc/qw_function.html 45 42 46 43 47 sub BEGIN { … … 271 275 } 272 276 273 # Shouldn't use double quotes around wget path after all? See final comment at 274 # http://www.perlmonks.org/?node_id=394709 275 # http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63 277 # On Windows, we can only use IO::Select's can_read() with Sockets, not with the usual handles to a child process' iostreams 278 # However, we can use Sockets as the handles to connect to a child process' streams, which then allows us to use can_read() 279 # not just on Unix but Windows too. The 2 subroutines below to use Sockets to connect to a child process' iostreams come from 280 # http://www.perlmonks.org/?node_id=869942 281 # http://www.perlmonks.org/?node_id=811650 282 # It was suggested that IPC::Run will take care of all this or circumvent the need for all this, 283 # but IPC::Run has limitations on Windows, see http://search.cpan.org/~toddr/IPC-Run-0.96/lib/IPC/Run.pm#Win32_LIMITATIONS 284 285 # Create a unidirectional pipe to an iostream of a process that is actually a socket 286 sub _pipe { 287 socketpair($_[0], $_[1], AF_UNIX, SOCK_STREAM, PF_UNSPEC) 288 or return undef; 289 shutdown($_[0], 1); # No more writing for reader. See http://www.perlmonks.org/?node=108244 290 shutdown($_[1], 0); # No more reading for writer 291 return 1; 292 } 293 294 sub _open3 { 295 local (*TO_CHLD_R, *TO_CHLD_W); 296 local (*FR_CHLD_R, *FR_CHLD_W); 297 local (*FR_CHLD_ERR_R, *FR_CHLD_ERR_W); 298 299 if ($^O =~ /Win32/) { 300 _pipe(*TO_CHLD_R, *TO_CHLD_W ) or die $^E; 301 _pipe(*FR_CHLD_R, *FR_CHLD_W ) or die $^E; 302 #_pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $^E; 303 } else { 304 pipe(*TO_CHLD_R, *TO_CHLD_W ) or die $!; 305 pipe(*FR_CHLD_R, *FR_CHLD_W ) or die $!; 306 #pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $!; 307 } 308 309 #my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_ERR_W', @_); 310 my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_W', @_); # use one handle, chldout, for both stdout and stderr of child proc, 311 # see http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html 312 313 return ( $pid, *TO_CHLD_W, *FR_CHLD_R, *FR_CHLD_ERR_R ); 314 } 315 316 # useWget and useWgetMonitored are very similar and, when updating, will probably need updating in tandem 317 # useWget(Monitored) runs the wget command using open3 and then sits in a loop doing two things per iteration: 318 # - processing a set buffer size of the wget (child) process' stdout/stderr streams, if anything has appeared there 319 # - followed by checking the socket connection to Java GLI, to see if GLI is trying to cancel the wget process we're running. 320 # Then the loop of these two things repeats. 276 321 sub useWget 277 322 { … … 313 358 314 359 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget"); 315 # compose the command as an array rather than as a string, to preserve spaces in the filepath 316 # because single/double quotes using open3 seem to launch a shell, see final comment at 317 # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget 318 # as 2 processes then got launched which don't have parent-child pid relationship. 360 361 # Shouldn't use double quotes around wget path after all? See final comment at 362 # http://www.perlmonks.org/?node_id=394709 363 # http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63 364 # Therefore, compose the command as an array rather than as a string, to preserve spaces in the filepath 365 # because single/double quotes using open3 seem to launch a subshell, see also final comment at 366 # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget, as 2 processes 367 # got launched then which don't have parent-child pid relationship (so that terminating one doesn't terminate the other). 319 368 my @commandargs = split(' ', $cmdWget); 320 369 unshift(@commandargs, $wget_file_path); … … 338 387 # of the exception is described on p.568 of the Perl Cookbook 339 388 eval { 340 #$childpid = open3($chld_in, $chld_out, $chld_out, $command); 341 $childpid = open3($chld_in, $chld_out, $chld_out, @commandargs); 389 #$childpid = open3($chld_in, $chld_out, $chld_out, $command); # There should be no double quotes in command, like around filepaths to wget, else need to use array version of command as below 390 #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs); 391 392 # instead of calling open3 directly, call wrapper _open3() subroutine that will use sockets to 393 # connect to the child process' iostreams, because we can then use IO::Select's can_read() even on Windows 394 ($childpid, $chld_in, $chld_out) = _open3(@commandargs); 342 395 }; 343 396 if ($@) { … … 435 488 print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n"; 436 489 print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n"; 437 #print STDERR "\tConsider cancelling this download?\n";438 490 } 439 491 … … 451 503 # https://coderwall.com/p/q-ovnw/killing-all-child-processes-in-a-shell-script 452 504 # https://stackoverflow.com/questions/392022/best-way-to-kill-all-child-processes 453 print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n"; 454 455 #die "Perl terminated wget after timing out repeatedly and is about to exit\n"; 505 #print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n"; 506 #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n"; 456 507 } 457 508 } … … 473 524 } 474 525 475 # If we've already terminated, we can get out of the loop 476 #next if($loop == 0); 526 # If we've already terminated, either naturally or on error, we can get out of the while loop 527 next if($loop == 0); 528 529 # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket 477 530 478 531 # if we run this script from the command-line (as opposed to from GLI), … … 584 637 585 638 eval { # see p.568 of Perl Cookbook 586 $childpid = open3($chld_in, $chld_out, $chld_out, @commandargs); 639 #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs); 640 ($childpid, $chld_in, $chld_out) = _open3(@commandargs); 587 641 }; 588 642 if ($@) { … … 676 730 kill("TERM", $childpid); # prefix - to signal to kill process group 677 731 678 # die"Perl terminated wget after timing out repeatedly and is about to exit\n";732 #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n"; 679 733 } 680 734 } … … 697 751 } 698 752 } 699 753 754 # If we've already terminated, either naturally or on error, we can get out of the while loop 755 next if($loop == 0); 756 757 # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket 758 700 759 # if we run this script from the command-line (as opposed to from GLI), 701 760 # then we're not working with sockets and can therefore skip the next bits
Note:
See TracChangeset
for help on using the changeset viewer.