source: main/trunk/greenstone2/perllib/downloaders/WgetDownload.pm@ 31929

Last change on this file since 31929 was 31929, checked in by ak19, 7 years ago

The recent overhaul of perl running wget and allowing proper termination on timeout and allowing wget to be cancelled on blocking (accomplished using timeouts), didn't work on Windows, since IO::Select's can_read() method only works on Windows with Sockets not other types of file handles because of lack of kernel level Win support, unlike on Linux where can_read() works with all types of file handles. The solution was not using alarm() to emulate read with timeouts in place of IO::Select's can_read(timeout) . (See the debug_testing area of trac for a commit containing the alarm() that worked on Linux but again not Windows.) The solution was to turn the filehandles to the wget child process' iostreams into Sockets, and then use IO::Select's can_read as before. Works on the usually problematic Windows. Still to test on linux.

  • Property svn:keywords set to Author Date Id Revision
File size: 32.5 KB
Line 
1###########################################################################
2#
3# WgetDownload.pm -- Download base module that handles calling Wget
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package WgetDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use BaseDownload;
35use strict;
36use Cwd;
37use util;
38use IPC::Open3;
39use IO::Select;
40use IO::Socket;
41
42#use IO::Select qw( );
43#use IPC::Open3 qw( open3 );
44use Socket qw( AF_UNIX SOCK_STREAM PF_UNSPEC ); # http://perlmeme.org/howtos/perlfunc/qw_function.html
45
46
47sub BEGIN {
48 @WgetDownload::ISA = ('BaseDownload');
49}
50
51my $arguments =
52 [ { 'name' => "proxy_on",
53 'desc' => "{WgetDownload.proxy_on}",
54 'type' => "flag",
55 'reqd' => "no",
56 'hiddengli' => "yes"},
57 { 'name' => "http_proxy_host",
58 'desc' => "{WgetDownload.http_proxy_host}",
59 'type' => "string",
60 'reqd' => "no",
61 'hiddengli' => "yes"},
62 { 'name' => "http_proxy_port",
63 'desc' => "{WgetDownload.http_proxy_port}",
64 'type' => "string",
65 'reqd' => "no",
66 'hiddengli' => "yes"},
67 { 'name' => "https_proxy_host",
68 'desc' => "{WgetDownload.https_proxy_host}",
69 'type' => "string",
70 'reqd' => "no",
71 'hiddengli' => "yes"},
72 { 'name' => "https_proxy_port",
73 'desc' => "{WgetDownload.https_proxy_port}",
74 'type' => "string",
75 'reqd' => "no",
76 'hiddengli' => "yes"},
77 { 'name' => "ftp_proxy_host",
78 'desc' => "{WgetDownload.ftp_proxy_host}",
79 'type' => "string",
80 'reqd' => "no",
81 'hiddengli' => "yes"},
82 { 'name' => "ftp_proxy_port",
83 'desc' => "{WgetDownload.ftp_proxy_port}",
84 'type' => "string",
85 'reqd' => "no",
86 'hiddengli' => "yes"},
87 { 'name' => "user_name",
88 'desc' => "{WgetDownload.user_name}",
89 'type' => "string",
90 'reqd' => "no",
91 'hiddengli' => "yes"},
92 { 'name' => "user_password",
93 'desc' => "{WgetDownload.user_password}",
94 'type' => "string",
95 'reqd' => "no",
96 'hiddengli' => "yes"},
97 { 'name' => "no_check_certificate",
98 'desc' => "{WgetDownload.no_check_certificate}",
99 'type' => "flag",
100 'reqd' => "no",
101 'hiddengli' => "yes"}
102 ];
103
104my $options = { 'name' => "WgetDownload",
105 'desc' => "{WgetDownload.desc}",
106 'abstract' => "yes",
107 'inherits' => "yes",
108 'args' => $arguments };
109
110
111# Declaring file global variables related to the wget child process so that
112# the termination signal handler for SIGTERM can close the streams and tidy
113# up before ending the child process.
114my $childpid;
115my ($chld_out, $chld_in);
116my ($serverSocket, $read_set);
117
118my $TIMEOUT = 1; # seconds
119my $NUM_TRIES = 10;
120
121# The port this script's server socket will be listening on, to handle
122# incoming signals from GLI to terminate wget. This is also file global,
123# since OAIDownload.pm will make several calls on wget using the same
124# instance of this script and we want to reuse whatever port GLI gave us.
125my $port;
126
127# When this script is called from the command line, this handler will be called
128# if this process is killed or abruptly ends due to receiving one of the
129# terminating signals that this handler is registered to deal with.
130sub abrupt_end_handler {
131 my $termination_signal = shift (@_);
132
133 if(defined $childpid) {
134 close($chld_out);
135 close($chld_in);
136
137 print STDOUT "Received termination signal: $termination_signal\n";
138
139 # Send TERM signal to child process to terminate it. Sending the INT signal doesn't work
140 # See http://perldoc.perl.org/perlipc.html#Signals
141 # Warning on using kill at http://perldoc.perl.org/perlfork.html
142 kill("TERM", $childpid); # prefix - to signal to kill process group
143
144 # If the SIGTERM sent on Linux calls this handler, we want to make
145 # sure any socket connection is closed.
146 # Otherwise sockets are only used when this script is run from GLI
147 # in which case the handlers don't really get called.
148 if(defined $serverSocket) {
149 $read_set->remove($serverSocket) if defined $read_set;
150 close($serverSocket);
151 }
152 }
153
154 exit(0);
155}
156
157# Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
158# the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
159# signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
160# Note that SIGKILL can't be handled as the handler won't get called for it. More information:
161# http://affy.blogspot.com/p5be/ch13.htm
162# http://perldoc.perl.org/perlipc.html#Signals
163$SIG{'INT'} = \&abrupt_end_handler;
164$SIG{'TERM'} = \&abrupt_end_handler;
165
166sub new {
167 my ($class) = shift (@_);
168 my ($getlist,$inputargs,$hashArgOptLists) = @_;
169 push(@$getlist, $class);
170
171 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
172 push(@{$hashArgOptLists->{"OptList"}},$options);
173
174 my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
175
176 # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lions (android too?)
177 &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set
178
179 return bless $self, $class;
180}
181
182sub checkWgetSetup
183{
184 my ($self,$blnGliCall) = @_;
185 #TODO: proxy detection??
186
187 if((!$blnGliCall) && $self->{'proxy_on'})
188 {
189 &checkProxySetup($self);
190 }
191 &checkURL($self);
192}
193
194# Not using this. On Windows, we used to pass proxying settings as flags to wget. But, as that can be
195# seen with Task Manager, we now have the proxy settings set in the environment and are no longer passing it
196sub addProxySettingsAsWgetFlags
197{
198 my ($self) = @_;
199 my $strOptions = "";
200
201 if($self->{'http_proxy_host'} && $self->{'http_proxy_port'}) {
202 $strOptions .= " -e http_proxy=$self->{'http_proxy_host'}:$self->{'http_proxy_port'} ";
203 }
204 if($self->{'https_proxy_host'} && $self->{'https_proxy_port'}) {
205 $strOptions .= " -e https_proxy=$self->{'https_proxy_host'}:$self->{'https_proxy_port'} ";
206 }
207 if($self->{'ftp_proxy_host'} && $self->{'ftp_proxy_port'}) {
208 $strOptions .= " -e ftp_proxy=$self->{'ftp_proxy_host'}:$self->{'ftp_proxy_port'} ";
209 }
210
211 # For wget, there is only one set pair of proxy-user and proxy-passwd, so wget seems to assume
212 # that all 3 proxy protocols (http|https|ftp) will use the same username and pwd combination?
213 # Note that this only matters when passing the proxying details as flags to wget, not when
214 # the proxies are setup as environment variables.
215 if ($self->{'user_name'} && $self->{'user_password'})
216 {
217 $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";
218 # how is "--proxy-passwd" instead of "--proxy-password" even working????
219 # see https://www.gnu.org/software/wget/manual/html_node/Proxies.html
220 # and https://www.gnu.org/software/wget/manual/wget.html
221 # Not touching this, in case the manual is simply wrong. Since our code works in
222 # practice (when we were still using wget proxy username/pwd flags for windows).
223 }
224
225 return $strOptions;
226}
227
228sub getWgetOptions
229{
230 my ($self) = @_;
231 my $strOptions = "";
232
233 # If proxy settings are set up in the environment, wget is ready to use them. More secure.
234 # But if proxy settings are not set up in the environment, pass them as flags to wget
235 # This is less secure, as pwd etc visible in task manager, but it was the original way in
236 # which wget was run on windows.
237 # Truth in Perl: https://home.ubalt.edu/abento/452/perl/perltruth.html
238 # http://www.perlmonks.org/?node=what%20is%20true%20and%20false%20in%20Perl%3F
239
240 if ($self->{'proxy_on'}) {
241 if(!$ENV{'http_proxy'} && !$ENV{'https_proxy'} && !$ENV{'ftp_proxy'}) {
242 $strOptions .= $self->addProxySettingsAsWgetFlags();
243 } # else wget will use proxy settings in environment, assume enough settings have been provided
244 # either way, we're using the proxy
245 $strOptions .= " --proxy ";
246 }
247
248 if($self->{'no_check_certificate'}) { # URL may be http that gets redirected to https, so if no_check_certificate is on, turn it on even if URL is http
249
250 $strOptions .= " --no-check-certificate ";
251 }
252
253 return $strOptions;
254}
255
256# Checking for proxy setup: proxy server, proxy port, proxy username and password.
257sub checkProxySetup
258{
259 my ($self) = @_;
260 ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
261 # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
262 # Test if the connection is successful. If the connection wasn't successful then ask user to supply username and password.
263
264}
265
266# Returns true if the wget status needs to be monitored through sockets
267# (if a socket is used to communicate with the Java program on when to
268# terminate wget). True if we are running gli, or if the particular type
269# of WgetDownload is *not* OAIDownload (in that case, the original way of
270# terminating the perl script from Java would terminate wget as well).
271sub dealingWithSockets() {
272 my ($self) = @_;
273 return (defined $self->{'gli'} && $self->{'gli'} && !defined $port && ref($self) ne "OAIDownload");
274 # use ref($self) to find the classname of an object
275}
276
277# On Windows, we can only use IO::Select's can_read() with Sockets, not with the usual handles to a child process' iostreams
278# However, we can use Sockets as the handles to connect to a child process' streams, which then allows us to use can_read()
279# not just on Unix but Windows too. The 2 subroutines below to use Sockets to connect to a child process' iostreams come from
280# http://www.perlmonks.org/?node_id=869942
281# http://www.perlmonks.org/?node_id=811650
282# It was suggested that IPC::Run will take care of all this or circumvent the need for all this,
283# but IPC::Run has limitations on Windows, see http://search.cpan.org/~toddr/IPC-Run-0.96/lib/IPC/Run.pm#Win32_LIMITATIONS
284
285# Create a unidirectional pipe to an iostream of a process that is actually a socket
286sub _pipe {
287 socketpair($_[0], $_[1], AF_UNIX, SOCK_STREAM, PF_UNSPEC)
288 or return undef;
289 shutdown($_[0], 1); # No more writing for reader. See http://www.perlmonks.org/?node=108244
290 shutdown($_[1], 0); # No more reading for writer
291 return 1;
292}
293
294sub _open3 {
295 local (*TO_CHLD_R, *TO_CHLD_W);
296 local (*FR_CHLD_R, *FR_CHLD_W);
297 local (*FR_CHLD_ERR_R, *FR_CHLD_ERR_W);
298
299 if ($^O =~ /Win32/) {
300 _pipe(*TO_CHLD_R, *TO_CHLD_W ) or die $^E;
301 _pipe(*FR_CHLD_R, *FR_CHLD_W ) or die $^E;
302 #_pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $^E;
303 } else {
304 pipe(*TO_CHLD_R, *TO_CHLD_W ) or die $!;
305 pipe(*FR_CHLD_R, *FR_CHLD_W ) or die $!;
306 #pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $!;
307 }
308
309 #my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_ERR_W', @_);
310 my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_W', @_); # use one handle, chldout, for both stdout and stderr of child proc,
311 # see http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html
312
313 return ( $pid, *TO_CHLD_W, *FR_CHLD_R, *FR_CHLD_ERR_R );
314}
315
316# useWget and useWgetMonitored are very similar and, when updating, will probably need updating in tandem
317# useWget(Monitored) runs the wget command using open3 and then sits in a loop doing two things per iteration:
318# - processing a set buffer size of the wget (child) process' stdout/stderr streams, if anything has appeared there
319# - followed by checking the socket connection to Java GLI, to see if GLI is trying to cancel the wget process we're running.
320# Then the loop of these two things repeats.
321sub useWget
322{
323 #local $| = 1; # autoflush stdout buffer
324 #print STDOUT "*** Start of subroutine useWget in $0\n";
325
326 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
327
328 my ($strReadIn,$strLine,$command);
329 $strReadIn = "" unless defined $strReadIn;
330
331 my $current_dir = cwd();
332 my $changed_dir = 0;
333 if (defined $working_dir && -e $working_dir) {
334 chdir "$working_dir";
335 $changed_dir = 1;
336 }
337
338 # When we are running this script through GLI, the SIGTERM signal handler
339 # won't get called on Windows when wget is to be prematurely terminated.
340 # Instead, when wget has to be terminated in the middle of execution, GLI will
341 # connect to a serverSocket here to communicate when it's time to stop wget.
342 if($self->dealingWithSockets()) {
343
344 $port = <STDIN>; # gets a port on localhost that's not yet in use
345 chomp($port);
346
347 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
348 LocalPort => $port,
349 Listen => 1,
350 Reuse => 1);
351
352 die "can't setup server" unless $serverSocket;
353 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
354
355 $read_set = new IO::Select(); # create handle set for reading
356 $read_set->add($serverSocket); # add the main socket to the set
357 }
358
359 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
360
361 # Shouldn't use double quotes around wget path after all? See final comment at
362 # http://www.perlmonks.org/?node_id=394709
363 # http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63
364 # Therefore, compose the command as an array rather than as a string, to preserve spaces in the filepath
365 # because single/double quotes using open3 seem to launch a subshell, see also final comment at
366 # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget, as 2 processes
367 # got launched then which don't have parent-child pid relationship (so that terminating one doesn't terminate the other).
368 my @commandargs = split(' ', $cmdWget);
369 unshift(@commandargs, $wget_file_path);
370 $command = "$wget_file_path $cmdWget";
371# print STDOUT "Command is: $command\n"; # displayed in GLI output
372# print STDERR "Command is: $command\n"; # goes into ServerInfoDialog
373
374 # Wget's output needs to be monitored to find out when it has naturally terminated.
375 # Wget's output is sent to its STDERR so we can't use open2 without doing 2>&1.
376 # On linux, 2>&1 launches a subshell which then launches wget, meaning that killing
377 # the childpid does not kill wget on Linux but the subshell that launched it instead.
378 # Therefore, we use open3. Though the child process wget sends output only to its stdout [is this meant to be "stderr"?],
379 # using open3 says chld_err is undefined and the output of wget only comes in chld_out(!)
380 # However that may be, it works with open3. But to avoid the confusion of managing and
381 # closing an extra unused handle, a single handle is used instead for both the child's
382 # stderr and stdout.
383 # See http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html
384 # for why this is the right thing to do.
385
386 # Both open2 and open3 don't return on failure, but raise an exception. The handling
387 # of the exception is described on p.568 of the Perl Cookbook
388 eval {
389 #$childpid = open3($chld_in, $chld_out, $chld_out, $command); # There should be no double quotes in command, like around filepaths to wget, else need to use array version of command as below
390 #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
391
392 # instead of calling open3 directly, call wrapper _open3() subroutine that will use sockets to
393 # connect to the child process' iostreams, because we can then use IO::Select's can_read() even on Windows
394 ($childpid, $chld_in, $chld_out) = _open3(@commandargs);
395 };
396 if ($@) {
397 if($@ =~ m/^open3/) {
398 die "open3 failed in $0: $!\n$@\n";
399 }
400 die "Tried to launch open3 in $0, got unexpected exception: $@";
401 }
402
403 # Switching to use IO::Select, which allows timeouts, instead of doing the potentially blocking
404 # if defined(my $strLine=<$chld_out>)
405 # Google: perl open3 read timeout
406 # Google: perl open3 select() example
407 # https://stackoverflow.com/questions/10029406/why-does-ipcopen3-get-deadlocked
408 # https://codereview.stackexchange.com/questions/84496/the-right-way-to-use-ipcopen3-in-perl
409 # https://gist.github.com/shalk/6988937
410 # https://stackoverflow.com/questions/18373500/how-to-check-if-command-executed-with-ipcopen3-is-hung
411 # http://perldoc.perl.org/IO/Select.html
412 # http://perldoc.perl.org/IPC/Open3.html - explains the need for select()/IO::Select with open3
413 # http://www.perlmonks.org/?node_id=951554
414 # http://search.cpan.org/~dmuey/IPC-Open3-Utils-0.91/lib/IPC/Open3/Utils.pm
415 # https://stackoverflow.com/questions/3000907/wget-not-behaving-via-ipcopen3-vs-bash?rq=1
416
417 # create the select object and add our streamhandle(s)
418 my $sel = new IO::Select;
419 $sel->add($chld_out);
420
421 my $num_consecutive_timedouts = 0;
422 my $error = 0;
423 my $loop = 1;
424
425 while($loop)
426 {
427 # assume we're going to timeout trying to read from child process
428 $num_consecutive_timedouts++;
429
430
431 # block until data is available on the registered filehandles or until the timeout specified
432 if(my @readyhandles = $sel->can_read($TIMEOUT)) {
433
434 $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
435 # since we're in this if statement
436
437 # now there's a list of registered filehandles we can read from to loop through reading from.
438 # though we've registered only one, chld_out
439 foreach my $fh (@readyhandles) {
440 my $strLine;
441 #sleep 3;
442
443 # read up to 4096 bytes from this filehandle fh.
444 # if there is less than 4096 bytes, we'll only get
445 # those available bytes and won't block. If there
446 # is more than 4096 bytes, we'll only read 4096 and
447 # wait for the next iteration through the loop to
448 # read the rest.
449 my $len = sysread($fh, $strLine, 4096);
450
451 if($len) { # read something
452 if($blnShow) {
453 print STDERR "$strLine\n";
454 }
455 $strReadIn .= $strLine;
456 }
457 else { # error or EOF: (!defined $len || $len == 0)
458
459 if(!defined $len) { # error reading
460 print STDERR "WgetDownload: Error reading from child stream: $!\n";
461 # SHOULD THIS 'die "errmsg";' instead? - no, sockets may need closing
462 $error = 1;
463 }
464 elsif ($len == 0) { # EOF
465 # Finished reading from this filehand $fh because we read 0 bytes.
466 # wget finished, terminate naturally
467 print STDERR "WgetDownload: wget finished\n";
468 #print STDOUT "\nPerl: open3 command, input streams closed. Wget terminated naturally.\n";
469 }
470
471 $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
472
473 # last; # if we have more than one filehandle registered with IO::Select
474
475 $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error
476
477 } # end else error or EOF
478
479 } # end foreach on readyhandles
480 } # end if on can_read
481
482 if($num_consecutive_timedouts >= $NUM_TRIES) {
483 $error = 1;
484 $loop = 0; # to break out of outer while loop
485
486 $num_consecutive_timedouts = 0;
487
488 print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n";
489 print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n";
490 }
491
492 if($loop == 0) { # error or EOF, either way, clean up
493 if($error) {
494 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
495
496 if(kill(0, $childpid)) {
497 # If kill(0, $childpid) returns true, then the process is running
498 # and we need to kill it.
499 close($chld_in);
500 close($chld_out);
501 kill('TERM', $childpid); # kill the process group by prefixing - to signal
502
503 # https://coderwall.com/p/q-ovnw/killing-all-child-processes-in-a-shell-script
504 # https://stackoverflow.com/questions/392022/best-way-to-kill-all-child-processes
505 #print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n";
506 #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n";
507 }
508 }
509 else { # wget finished (no errors), terminate naturally
510 #print STDOUT "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n";
511 close($chld_in);
512 close($chld_out);
513 waitpid $childpid, 0;
514 }
515
516 # error or not
517 $childpid = undef;
518 # Stop monitoring the read_handle and close the serverSocket
519 # (the Java end will close the client socket that Java opened)
520 if(defined $port) {
521 $read_set->remove($serverSocket);
522 close($serverSocket);
523 }
524 }
525
526 # If we've already terminated, either naturally or on error, we can get out of the while loop
527 next if($loop == 0);
528
529 # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket
530
531 # if we run this script from the command-line (as opposed to from GLI),
532 # then we're not working with sockets and can therefore skip the next bits
533 next unless(defined $port);
534
535 # http://www.perlfect.com/articles/select.shtml
536 # "multiplex between several filehandles within a single thread of control,
537 # thus creating the effect of parallelism in the handling of I/O."
538 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
539
540 # take all readable handles in turn
541 foreach my $rh (@rh_set) {
542 if($rh == $serverSocket) {
543 my $client = $rh->accept();
544 #$client->autoflush(1); # autoflush output buffer - don't put this back in: output split irregularly over lines
545 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
546
547 # Read from the client (getting rid of the trailing newline)
548 # Has the client sent the <<STOP>> signal?
549 my $signal = <$client>;
550 chomp($signal);
551 if($signal eq "<<STOP>>") {
552 print $client "Perl received STOP signal (on port $port): stopping wget\n";
553 $loop = 0; # out of outer while loop
554 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
555
556 # Sometimes the wget process takes some time to start up. If the STOP signal
557 # was sent, don't try to terminate the process until we know it is running.
558 # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
559 # for it to start up, checking for whether it is running in order to kill it.
560 for(my $seconds = 1; $seconds <= 5; $seconds++) {
561 if(kill(0, $childpid)) {
562 # If kill(0, $childpid) returns true, then the process is running
563 # and we need to kill it.
564 close($chld_in);
565 close($chld_out);
566 kill("TERM", $childpid); # prefix - to signal to kill process group
567
568 $childpid = undef;
569
570 # Stop monitoring the read_handle and close the serverSocket
571 # (the Java end will close the client socket that Java opened)
572 $read_set->remove($rh); #$read_set->remove($serverSocket);
573 close($rh); #close($serverSocket);
574 print $client "Perl terminated wget and is about to exit\n";
575 last; # out of inner for loop
576 }
577 else { # the process may just be starting up, wait
578 sleep(1);
579 }
580 }
581 last; # out of foreach loop
582 }
583 }
584 }
585 }
586
587 if ($changed_dir) {
588 chdir $current_dir;
589 }
590
591 return $strReadIn;
592}
593
594
595sub useWgetMonitored
596{
597 #local $| = 1; # autoflush stdout buffer
598 #print STDOUT "*** Start of subroutine useWgetMonitored in $0\n";
599
600 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
601
602
603 my $current_dir = cwd();
604 my $changed_dir = 0;
605 if (defined $working_dir && -e $working_dir) {
606 chdir "$working_dir";
607 $changed_dir = 1;
608 }
609
610 # When we are running this script through GLI, the SIGTERM signal handler
611 # won't get called on Windows when wget is to be prematurely terminated.
612 # Instead, when wget has to be terminated in the middle of execution, GLI will
613 # connect to a serverSocket here to communicate when it's time to stop wget.
614 if($self->dealingWithSockets()) {
615
616 $port = <STDIN>; # gets a port on localhost that's not yet in use
617 chomp($port);
618
619 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
620 LocalPort => $port,
621 Listen => 1,
622 Reuse => 1);
623
624 die "can't setup server" unless $serverSocket;
625 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
626
627 $read_set = new IO::Select(); # create handle set for reading
628 $read_set->add($serverSocket); # add the main socket to the set
629 }
630
631 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
632 # compose the command as an array for open3, to preserve spaces in any filepath
633 my @commandargs = split(' ', $cmdWget);
634 unshift(@commandargs, $wget_file_path);
635 my $command = "$wget_file_path $cmdWget";
636 #print STDOUT "Command is: $command\n";
637
638 eval { # see p.568 of Perl Cookbook
639 #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
640 ($childpid, $chld_in, $chld_out) = _open3(@commandargs);
641 };
642 if ($@) {
643 if($@ =~ m/^open3/) {
644 die "open3 failed in $0: $!\n$@\n";
645 }
646 die "Tried to launch open3 in $0, got unexpected exception: $@";
647 }
648
649 my $full_text = "";
650 my $error_text = "";
651 my @follow_list = ();
652 my $line;
653
654 # create the select object and add our streamhandle(s)
655 my $sel = new IO::Select;
656 $sel->add($chld_out);
657
658 my $num_consecutive_timedouts = 0;
659 my $error = 0;
660 my $loop = 1;
661 while($loop)
662 {
663 # assume we're going to timeout trying to read from child process
664 $num_consecutive_timedouts++;
665
666 # block until data is available on the registered filehandles or until the timeout specified
667 if(my @readyhandles = $sel->can_read($TIMEOUT)) {
668 $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
669 # since we're in this if statement
670
671 foreach my $fh (@readyhandles) {
672 my $len = sysread($fh, $line, 4096); # read up to 4k from current ready filehandle
673 if($len) { # read something
674
675
676 if((defined $blnShow) && $blnShow)
677 {
678 print STDERR "$line";
679 }
680
681 if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
682 my $follow_url = $1;
683 push(@follow_list,$follow_url);
684 }
685
686 if ($line =~ m/ERROR\s+\d+/) {
687 $error_text .= $line;
688 }
689
690 $full_text .= $line;
691 } else { # error or EOF
692 if(!defined $len) { # error reading
693 #print STDERR "WgetDownload: Error reading from child stream: $!\n";
694 $error = 1;
695 }
696 elsif ($len == 0) { # EOF, finished with this filehandle because 0 bytes read
697 #print STDERR "WgetDownload: wget finished\n"; # wget terminated naturally
698 }
699
700 $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
701
702 # last; # if we have more than one filehandle registered with IO::Select
703
704 $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error
705 } # end else error or EOF
706
707 } # end foreach on readyhandles
708 } # end if on can_read
709
710 if($num_consecutive_timedouts >= $NUM_TRIES) {
711 $error = 1;
712 $loop = 0; # to break out of outer while loop
713
714 $num_consecutive_timedouts = 0;
715
716 #print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n";
717 #print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n";
718 }
719
720 if($loop == 0) { # error or EOF, either way, clean up
721
722 if($error) {
723 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
724
725 if(kill(0, $childpid)) {
726 # If kill(0, $childpid) returns true, then the process is running
727 # and we need to kill it.
728 close($chld_in);
729 close($chld_out);
730 kill("TERM", $childpid); # prefix - to signal to kill process group
731
732 #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n";
733 }
734 }
735 else { # wget finished, terminate naturally
736 close($chld_in);
737 close($chld_out);
738 # Program terminates only when the following line is included
739 # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
740 # it prevents the child from turning into a "zombie process".
741 # While the wget process terminates without it, this perl script does not:
742 # the DOS prompt is not returned without it.
743 waitpid $childpid, 0;
744 }
745
746 # error or not:
747 $childpid = undef;
748 if(defined $port) {
749 $read_set->remove($serverSocket);
750 close($serverSocket);
751 }
752 }
753
754 # If we've already terminated, either naturally or on error, we can get out of the while loop
755 next if($loop == 0);
756
757 # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket
758
759 # if we run this script from the command-line (as opposed to from GLI),
760 # then we're not working with sockets and can therefore skip the next bits
761 next unless(defined $port);
762
763 # http://www.perlfect.com/articles/select.shtml
764 # "multiplex between several filehandles within a single thread of control,
765 # thus creating the effect of parallelism in the handling of I/O."
766 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
767
768 # take all readable handles in turn
769 foreach my $rh (@rh_set) {
770 if($rh == $serverSocket) {
771 my $client = $rh->accept();
772 #$client->autoflush(1); # autoflush output buffer - don't put this back in: splits output irregularly over multilines
773 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
774
775 # Read from the client (getting rid of trailing newline)
776 # Has the client sent the <<STOP>> signal?
777 my $signal = <$client>;
778 chomp($signal);
779 if($signal eq "<<STOP>>") {
780 print $client "Perl received STOP signal (on port $port): stopping wget\n";
781 $loop = 0; # out of outer while loop
782 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
783
784 # Sometimes the wget process takes some time to start up. If the STOP signal
785 # was sent, don't try to terminate the process until we know it is running.
786 # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
787 # for it to start up, checking for whether it is running in order to kill it.
788 for(my $seconds = 1; $seconds <= 5; $seconds++) {
789 if(kill(0, $childpid)) {
790 # If kill(0, $childpid) returns true, then the process is running
791 # and we need to kill it.
792 close($chld_in);
793 close($chld_out);
794 kill("TERM", $childpid); # prefix - to signal to kill process group
795
796 $childpid = undef;
797
798 # Stop monitoring the read_handle and close the serverSocket
799 # (the Java end will close the client socket that Java opened)
800 $read_set->remove($rh); #$read_set->remove($serverSocket);
801 close($rh); #close($serverSocket);
802 print $client "Perl terminated wget and is about to exit\n";
803 last; # out of inner for loop
804 }
805 else { # the process may just be starting up, wait
806 sleep(1);
807 }
808 }
809 last; # out of foreach loop
810 }
811 }
812 }
813 }
814
815 my $command_status = $?;
816 if ($command_status != 0) {
817 $error_text .= "Exit error: $command_status";
818 }
819
820 if ($changed_dir) {
821 chdir $current_dir;
822 }
823
824 my $final_follow = pop(@follow_list); # might be undefined, but that's OK
825
826 return ($full_text,$error_text,$final_follow);
827}
828
829
830# TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
831sub checkURL
832{
833 my ($self) = @_;
834 if ($self->{'url'} eq "")
835 {
836 &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
837 }
838}
839
840sub error
841{
842 my ($strFunctionName,$strError) = @_;
843 {
844 print "Error occoured in WgetDownload.pm\n".
845 "In Function:".$strFunctionName."\n".
846 "Error Message:".$strError."\n";
847 exit(-1);
848 }
849}
850
8511;
852
Note: See TracBrowser for help on using the repository browser.