Changeset 31920
- Timestamp:
- 2017-08-30T19:29:36+12:00 (7 years ago)
- Location:
- main/trunk/greenstone2/perllib/downloaders
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/downloaders/WebDownload.pm
r31898 r31920 118 118 } 119 119 #my $cmdWget = "-N -k -x -t 2 -P \"".$hashGeneralOptions->{"cache_dir"}."\" $strWgetOptions $strOptions ".$self->{'url'}; 120 my $cmdWget = "-N -k -x --tries=2 $strWgetOptions $strOptions $cache_dir " .$self->{'url'}; 120 #my $cmdWget = "-N -k -x --tries=2 $strWgetOptions $strOptions $cache_dir " .$self->{'url'}; 121 my $cmdWget = "-N -k -x $strWgetOptions $strOptions $cache_dir " .$self->{'url'}; 121 122 122 123 #print STDOUT "\n@@@@ RUNNING WGET CMD: $cmdWget\n\n"; … … 191 192 my $strOptions = $self->getWgetOptions(); 192 193 193 my $strBaseCMD = $strOptions." --tries=2 -q -O - \"$self->{'url'}\""; 194 #my $strBaseCMD = $strOptions." --tries=2 -q -O - \"$self->{'url'}\""; 195 my $strBaseCMD = $strOptions." -q -O - $self->{'url'}"; 194 196 195 197 #&util::print_env(STDERR, "https_proxy", "http_proxy", "ftp_proxy"); -
main/trunk/greenstone2/perllib/downloaders/WgetDownload.pm
r31880 r31920 112 112 my ($serverSocket, $read_set); 113 113 114 my $TIMEOUT = 1; # seconds 115 my $NUM_TRIES = 10; 116 114 117 # The port this script's server socket will be listening on, to handle 115 118 # incoming signals from GLI to terminate wget. This is also file global, … … 133 136 # See http://perldoc.perl.org/perlipc.html#Signals 134 137 # Warning on using kill at http://perldoc.perl.org/perlfork.html 135 kill("TERM", $childpid); 138 kill("TERM", $childpid); # prefix - to signal to kill process group 136 139 137 140 # If the SIGTERM sent on Linux calls this handler, we want to make … … 261 264 # terminate wget). True if we are running gli, or if the particular type 262 265 # of WgetDownload is *not* OAIDownload (in that case, the original way of 263 # terminating the perl script from Java terminatedwget as well).266 # terminating the perl script from Java would terminate wget as well). 264 267 sub dealingWithSockets() { 265 268 my ($self) = @_; … … 268 271 } 269 272 270 273 # Shouldn't use double quotes around wget path after all? See final comment at 274 # http://www.perlmonks.org/?node_id=394709 275 # http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63 271 276 sub useWget 272 277 { … … 308 313 309 314 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget"); 310 $command = "\"$wget_file_path\" $cmdWget"; 311 #print STDOUT "Command is: $command\n"; # displayed in GLI output 312 #print STDERR "Command is: $command\n"; # goes into ServerInfoDialog 315 # compose the command as an array rather than as a string, to preserve spaces in the filepath 316 # because single/double quotes using open3 seem to launch a shell, see final comment at 317 # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget 318 # as 2 processes then got launched which don't have parent-child pid relationship. 319 my @commandargs = split(' ', $cmdWget); 320 unshift(@commandargs, $wget_file_path); 321 $command = "$wget_file_path $cmdWget"; 322 # print STDOUT "Command is: $command\n"; # displayed in GLI output 323 # print STDERR "Command is: $command\n"; # goes into ServerInfoDialog 313 324 314 325 # Wget's output needs to be monitored to find out when it has naturally terminated. … … 316 327 # On linux, 2>&1 launches a subshell which then launches wget, meaning that killing 317 328 # the childpid does not kill wget on Linux but the subshell that launched it instead. 318 # Therefore, we use open3. Though the child process wget sends output only to its stdout ,329 # Therefore, we use open3. Though the child process wget sends output only to its stdout [is this meant to be "stderr"?], 319 330 # using open3 says chld_err is undefined and the output of wget only comes in chld_out(!) 320 331 # However that may be, it works with open3. But to avoid the confusion of managing and … … 326 337 # Both open2 and open3 don't return on failure, but raise an exception. The handling 327 338 # of the exception is described on p.568 of the Perl Cookbook 328 eval { 329 $childpid = open3($chld_in, $chld_out, $chld_out, $command); 339 eval { 340 #$childpid = open3($chld_in, $chld_out, $chld_out, $command); 341 $childpid = open3($chld_in, $chld_out, $chld_out, @commandargs); 330 342 }; 331 343 if ($@) { … … 336 348 } 337 349 350 # Switching to use IO::Select, which allows timeouts, instead of doing the potentially blocking 351 # if defined(my $strLine=<$chld_out>) 352 # Google: perl open3 read timeout 353 # Google: perl open3 select() example 354 # https://stackoverflow.com/questions/10029406/why-does-ipcopen3-get-deadlocked 355 # https://codereview.stackexchange.com/questions/84496/the-right-way-to-use-ipcopen3-in-perl 356 # https://gist.github.com/shalk/6988937 357 # https://stackoverflow.com/questions/18373500/how-to-check-if-command-executed-with-ipcopen3-is-hung 358 # http://perldoc.perl.org/IO/Select.html 359 # http://perldoc.perl.org/IPC/Open3.html - explains the need for select()/IO::Select with open3 360 # http://www.perlmonks.org/?node_id=951554 361 # http://search.cpan.org/~dmuey/IPC-Open3-Utils-0.91/lib/IPC/Open3/Utils.pm 362 # https://stackoverflow.com/questions/3000907/wget-not-behaving-via-ipcopen3-vs-bash?rq=1 363 364 # create the select object and add our streamhandle(s) 365 my $sel = new IO::Select; 366 $sel->add($chld_out); 367 368 my $num_consecutive_timedouts = 0; 369 my $error = 0; 338 370 my $loop = 1; 371 339 372 while($loop) 340 373 { 341 if (defined(my $strLine=<$chld_out>)) { # we're reading in from child process' stdout 342 if($blnShow) { 343 print STDERR "$strLine\n"; 374 # assume we're going to timeout trying to read from child process 375 $num_consecutive_timedouts++; 376 377 378 # block until data is available on the registered filehandles or until the timeout specified 379 if(my @readyhandles = $sel->can_read($TIMEOUT)) { 380 381 $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all 382 # since we're in this if statement 383 384 # now there's a list of registered filehandles we can read from to loop through reading from. 385 # though we've registered only one, chld_out 386 foreach my $fh (@readyhandles) { 387 my $strLine; 388 #sleep 3; 389 390 # read up to 4096 bytes from this filehandle fh. 391 # if there is less than 4096 bytes, we'll only get 392 # those available bytes and won't block. If there 393 # is more than 4096 bytes, we'll only read 4096 and 394 # wait for the next iteration through the loop to 395 # read the rest. 396 my $len = sysread($fh, $strLine, 4096); 397 398 if($len) { # read something 399 if($blnShow) { 400 print STDERR "$strLine\n"; 401 } 402 $strReadIn .= $strLine; 403 } 404 else { # error or EOF: (!defined $len || $len == 0) 405 406 if(!defined $len) { # error reading 407 print STDERR "WgetDownload: Error reading from child stream: $!\n"; 408 # SHOULD THIS 'die "errmsg";' instead? - no, sockets may need closing 409 $error = 1; 410 } 411 elsif ($len == 0) { # EOF 412 # Finished reading from this filehand $fh because we read 0 bytes. 413 # wget finished, terminate naturally 414 print STDERR "WgetDownload: wget finished\n"; 415 #print STDOUT "\nPerl: open3 command, input streams closed. Wget terminated naturally.\n"; 416 } 417 418 $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop 419 420 # last; # if we have more than one filehandle registered with IO::Select 421 422 $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error 423 424 } # end else error or EOF 425 426 } # end foreach on readyhandles 427 } # end if on can_read 428 429 if($num_consecutive_timedouts >= $NUM_TRIES) { 430 $error = 1; 431 $loop = 0; # to break out of outer while loop 432 433 $num_consecutive_timedouts = 0; 434 435 print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n"; 436 print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n"; 437 #print STDERR "\tConsider cancelling this download?\n"; 438 } 439 440 if($loop == 0) { # error or EOF, either way, clean up 441 if($error) { 442 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting 443 444 if(kill(0, $childpid)) { 445 # If kill(0, $childpid) returns true, then the process is running 446 # and we need to kill it. 447 close($chld_in); 448 close($chld_out); 449 kill('TERM', $childpid); # kill the process group by prefixing - to signal 450 451 # https://coderwall.com/p/q-ovnw/killing-all-child-processes-in-a-shell-script 452 # https://stackoverflow.com/questions/392022/best-way-to-kill-all-child-processes 453 print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n"; 454 455 #die "Perl terminated wget after timing out repeatedly and is about to exit\n"; 456 } 344 457 } 345 $strReadIn .= $strLine; 346 } 347 else { # wget finished, terminate naturally 348 #print STDOUT "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n"; 349 close($chld_in); 350 close($chld_out); 351 waitpid $childpid, 0; 352 $loop = 0; 353 458 else { # wget finished (no errors), terminate naturally 459 #print STDOUT "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n"; 460 close($chld_in); 461 close($chld_out); 462 waitpid $childpid, 0; 463 } 464 465 # error or not 354 466 $childpid = undef; 467 # Stop monitoring the read_handle and close the serverSocket 468 # (the Java end will close the client socket that Java opened) 355 469 if(defined $port) { 356 470 $read_set->remove($serverSocket); … … 359 473 } 360 474 475 # If we've already terminated, we can get out of the loop 476 #next if($loop == 0); 477 361 478 # if we run this script from the command-line (as opposed to from GLI), 362 # then we're not working with sockets and can therefore canskip the next bits479 # then we're not working with sockets and can therefore skip the next bits 363 480 next unless(defined $port); 364 481 365 482 # http://www.perlfect.com/articles/select.shtml 366 483 # "multiplex between several filehandles within a single thread of control, … … 389 506 # for it to start up, checking for whether it is running in order to kill it. 390 507 for(my $seconds = 1; $seconds <= 5; $seconds++) { 391 if(kill(0, $childpid)) { 508 if(kill(0, $childpid)) { 392 509 # If kill(0, $childpid) returns true, then the process is running 393 510 # and we need to kill it. 394 511 close($chld_in); 395 512 close($chld_out); 396 kill("TERM", $childpid); 513 kill("TERM", $childpid); # prefix - to signal to kill process group 397 514 398 515 $childpid = undef; … … 460 577 461 578 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget"); 462 my $command = "\"$wget_file_path\" $cmdWget"; 579 # compose the command as an array for open3, to preserve spaces in any filepath 580 my @commandargs = split(' ', $cmdWget); 581 unshift(@commandargs, $wget_file_path); 582 my $command = "$wget_file_path $cmdWget"; 463 583 #print STDOUT "Command is: $command\n"; 464 584 465 585 eval { # see p.568 of Perl Cookbook 466 $childpid = open3($chld_in, $chld_out, $chld_out, $command);586 $childpid = open3($chld_in, $chld_out, $chld_out, @commandargs); 467 587 }; 468 588 if ($@) { … … 478 598 my $line; 479 599 600 # create the select object and add our streamhandle(s) 601 my $sel = new IO::Select; 602 $sel->add($chld_out); 603 604 my $num_consecutive_timedouts = 0; 605 my $error = 0; 480 606 my $loop = 1; 481 607 while($loop) 482 608 { 483 if (defined($line=<$chld_out>)) { # we're reading in from child process' stdout 484 if((defined $blnShow) && $blnShow) 485 { 486 print STDERR "$line"; 609 # assume we're going to timeout trying to read from child process 610 $num_consecutive_timedouts++; 611 612 # block until data is available on the registered filehandles or until the timeout specified 613 if(my @readyhandles = $sel->can_read($TIMEOUT)) { 614 $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all 615 # since we're in this if statement 616 617 foreach my $fh (@readyhandles) { 618 my $len = sysread($fh, $line, 4096); # read up to 4k from current ready filehandle 619 if($len) { # read something 620 621 622 if((defined $blnShow) && $blnShow) 623 { 624 print STDERR "$line"; 625 } 626 627 if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) { 628 my $follow_url = $1; 629 push(@follow_list,$follow_url); 630 } 631 632 if ($line =~ m/ERROR\s+\d+/) { 633 $error_text .= $line; 634 } 635 636 $full_text .= $line; 637 } else { # error or EOF 638 if(!defined $len) { # error reading 639 #print STDERR "WgetDownload: Error reading from child stream: $!\n"; 640 $error = 1; 641 } 642 elsif ($len == 0) { # EOF, finished with this filehandle because 0 bytes read 643 #print STDERR "WgetDownload: wget finished\n"; # wget terminated naturally 644 } 645 646 $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop 647 648 # last; # if we have more than one filehandle registered with IO::Select 649 650 $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error 651 } # end else error or EOF 652 653 } # end foreach on readyhandles 654 } # end if on can_read 655 656 if($num_consecutive_timedouts >= $NUM_TRIES) { 657 $error = 1; 658 $loop = 0; # to break out of outer while loop 659 660 $num_consecutive_timedouts = 0; 661 662 #print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n"; 663 #print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n"; 664 } 665 666 if($loop == 0) { # error or EOF, either way, clean up 667 668 if($error) { 669 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting 670 671 if(kill(0, $childpid)) { 672 # If kill(0, $childpid) returns true, then the process is running 673 # and we need to kill it. 674 close($chld_in); 675 close($chld_out); 676 kill("TERM", $childpid); # prefix - to signal to kill process group 677 678 #die "Perl terminated wget after timing out repeatedly and is about to exit\n"; 679 } 680 } 681 else { # wget finished, terminate naturally 682 close($chld_in); 683 close($chld_out); 684 # Program terminates only when the following line is included 685 # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary 686 # it prevents the child from turning into a "zombie process". 687 # While the wget process terminates without it, this perl script does not: 688 # the DOS prompt is not returned without it. 689 waitpid $childpid, 0; 487 690 } 488 691 489 if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) { 490 my $follow_url = $1; 491 push(@follow_list,$follow_url); 492 } 493 494 if ($line =~ m/ERROR\s+\d+/) { 495 $error_text .= $line; 496 } 497 498 $full_text .= $line; 499 } 500 else { # wget finished, terminate naturally 501 #print STDERR "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n"; 502 close($chld_in); 503 close($chld_out); 504 # Program terminates only when the following line is included 505 # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary 506 # it prevents the child from turning into a "zombie process". 507 # While the wget process terminates without it, this perl script does not: 508 # the DOS prompt is not returned without it. 509 waitpid $childpid, 0; 510 $loop = 0; 511 512 $childpid = undef; 692 # error or not: 693 $childpid = undef; 513 694 if(defined $port) { 514 695 $read_set->remove($serverSocket); … … 518 699 519 700 # if we run this script from the command-line (as opposed to from GLI), 520 # then we're not working with sockets and can therefore canskip the next bits701 # then we're not working with sockets and can therefore skip the next bits 521 702 next unless(defined $port); 522 703 … … 547 728 # for it to start up, checking for whether it is running in order to kill it. 548 729 for(my $seconds = 1; $seconds <= 5; $seconds++) { 549 if(kill(0, $childpid)) { 730 if(kill(0, $childpid)) { 550 731 # If kill(0, $childpid) returns true, then the process is running 551 732 # and we need to kill it. 552 733 close($chld_in); 553 734 close($chld_out); 554 kill("TERM", $childpid); 735 kill("TERM", $childpid); # prefix - to signal to kill process group 555 736 556 737 $childpid = undef;
Note:
See TracChangeset
for help on using the changeset viewer.