source: main/trunk/greenstone2/perllib/downloaders/WgetDownload.pm@ 31956

Last change on this file since 31956 was 31956, checked in by ak19, 7 years ago

Dr Bainbridge read up on why the Sockets to our wget child process' iostreams were being forcibly closed on Windows when we've finished successfully downloading, resulting unexpectedly in the $len bytes that we sysread() being undefined (usually denoting an error) rather than 0. It turns out that by using Sockets on Windows as filehandles to a child process' iostreams is merely implemented in this manner on success.

  • Property svn:keywords set to Author Date Id Revision
File size: 33.8 KB
Line 
1###########################################################################
2#
3# WgetDownload.pm -- Download base module that handles calling Wget
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package WgetDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use BaseDownload;
35use strict;
36use Cwd;
37use util;
38use IPC::Open3;
39use IO::Select;
40use IO::Socket;
41
42#use IO::Select qw( );
43#use IPC::Open3 qw( open3 );
44use Socket qw( AF_UNIX SOCK_STREAM PF_UNSPEC ); # http://perlmeme.org/howtos/perlfunc/qw_function.html
45
46
47sub BEGIN {
48 @WgetDownload::ISA = ('BaseDownload');
49}
50
51my $arguments =
52 [ { 'name' => "proxy_on",
53 'desc' => "{WgetDownload.proxy_on}",
54 'type' => "flag",
55 'reqd' => "no",
56 'hiddengli' => "yes"},
57 { 'name' => "http_proxy_host",
58 'desc' => "{WgetDownload.http_proxy_host}",
59 'type' => "string",
60 'reqd' => "no",
61 'hiddengli' => "yes"},
62 { 'name' => "http_proxy_port",
63 'desc' => "{WgetDownload.http_proxy_port}",
64 'type' => "string",
65 'reqd' => "no",
66 'hiddengli' => "yes"},
67 { 'name' => "https_proxy_host",
68 'desc' => "{WgetDownload.https_proxy_host}",
69 'type' => "string",
70 'reqd' => "no",
71 'hiddengli' => "yes"},
72 { 'name' => "https_proxy_port",
73 'desc' => "{WgetDownload.https_proxy_port}",
74 'type' => "string",
75 'reqd' => "no",
76 'hiddengli' => "yes"},
77 { 'name' => "ftp_proxy_host",
78 'desc' => "{WgetDownload.ftp_proxy_host}",
79 'type' => "string",
80 'reqd' => "no",
81 'hiddengli' => "yes"},
82 { 'name' => "ftp_proxy_port",
83 'desc' => "{WgetDownload.ftp_proxy_port}",
84 'type' => "string",
85 'reqd' => "no",
86 'hiddengli' => "yes"},
87 { 'name' => "user_name",
88 'desc' => "{WgetDownload.user_name}",
89 'type' => "string",
90 'reqd' => "no",
91 'hiddengli' => "yes"},
92 { 'name' => "user_password",
93 'desc' => "{WgetDownload.user_password}",
94 'type' => "string",
95 'reqd' => "no",
96 'hiddengli' => "yes"},
97 { 'name' => "no_check_certificate",
98 'desc' => "{WgetDownload.no_check_certificate}",
99 'type' => "flag",
100 'reqd' => "no",
101 'hiddengli' => "yes"}
102 ];
103
104my $options = { 'name' => "WgetDownload",
105 'desc' => "{WgetDownload.desc}",
106 'abstract' => "yes",
107 'inherits' => "yes",
108 'args' => $arguments };
109
110
111# Declaring file global variables related to the wget child process so that
112# the termination signal handler for SIGTERM can close the streams and tidy
113# up before ending the child process.
114my $childpid;
115my ($chld_out, $chld_in);
116my ($serverSocket, $read_set);
117
118my $TIMEOUT = 1; # seconds
119my $NUM_TRIES = 10;
120
121# The port this script's server socket will be listening on, to handle
122# incoming signals from GLI to terminate wget. This is also file global,
123# since OAIDownload.pm will make several calls on wget using the same
124# instance of this script and we want to reuse whatever port GLI gave us.
125my $port;
126
127# When this script is called from the command line, this handler will be called
128# if this process is killed or abruptly ends due to receiving one of the
129# terminating signals that this handler is registered to deal with.
130sub abrupt_end_handler {
131 my $termination_signal = shift (@_);
132
133 if(defined $childpid) {
134 close($chld_out);
135 close($chld_in);
136
137 print STDOUT "Received termination signal: $termination_signal\n";
138
139 # Send TERM signal to child process to terminate it. Sending the INT signal doesn't work
140 # See http://perldoc.perl.org/perlipc.html#Signals
141 # Warning on using kill at http://perldoc.perl.org/perlfork.html
142 kill("TERM", $childpid); # prefix - to signal to kill process group
143
144 # If the SIGTERM sent on Linux calls this handler, we want to make
145 # sure any socket connection is closed.
146 # Otherwise sockets are only used when this script is run from GLI
147 # in which case the handlers don't really get called.
148 if(defined $serverSocket) {
149 $read_set->remove($serverSocket) if defined $read_set;
150 close($serverSocket);
151 }
152 }
153
154 exit(0);
155}
156
157# Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
158# the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
159# signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
160# Note that SIGKILL can't be handled as the handler won't get called for it. More information:
161# http://affy.blogspot.com/p5be/ch13.htm
162# http://perldoc.perl.org/perlipc.html#Signals
163$SIG{'INT'} = \&abrupt_end_handler;
164$SIG{'TERM'} = \&abrupt_end_handler;
165
166sub new {
167 my ($class) = shift (@_);
168 my ($getlist,$inputargs,$hashArgOptLists) = @_;
169 push(@$getlist, $class);
170
171 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
172 push(@{$hashArgOptLists->{"OptList"}},$options);
173
174 my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
175
176 # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lions (android too?)
177 &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set
178
179 return bless $self, $class;
180}
181
182sub checkWgetSetup
183{
184 my ($self,$blnGliCall) = @_;
185 #TODO: proxy detection??
186
187 if((!$blnGliCall) && $self->{'proxy_on'})
188 {
189 &checkProxySetup($self);
190 }
191 &checkURL($self);
192}
193
194# Not using this. On Windows, we used to pass proxying settings as flags to wget. But, as that can be
195# seen with Task Manager, we now have the proxy settings set in the environment and are no longer passing it
196sub addProxySettingsAsWgetFlags
197{
198 my ($self) = @_;
199 my $strOptions = "";
200
201 if($self->{'http_proxy_host'} && $self->{'http_proxy_port'}) {
202 $strOptions .= " -e http_proxy=$self->{'http_proxy_host'}:$self->{'http_proxy_port'} ";
203 }
204 if($self->{'https_proxy_host'} && $self->{'https_proxy_port'}) {
205 $strOptions .= " -e https_proxy=$self->{'https_proxy_host'}:$self->{'https_proxy_port'} ";
206 }
207 if($self->{'ftp_proxy_host'} && $self->{'ftp_proxy_port'}) {
208 $strOptions .= " -e ftp_proxy=$self->{'ftp_proxy_host'}:$self->{'ftp_proxy_port'} ";
209 }
210
211 # For wget, there is only one set pair of proxy-user and proxy-passwd, so wget seems to assume
212 # that all 3 proxy protocols (http|https|ftp) will use the same username and pwd combination?
213 # Note that this only matters when passing the proxying details as flags to wget, not when
214 # the proxies are setup as environment variables.
215 if ($self->{'user_name'} && $self->{'user_password'})
216 {
217 $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";
218 # how is "--proxy-passwd" instead of "--proxy-password" even working????
219 # see https://www.gnu.org/software/wget/manual/html_node/Proxies.html
220 # and https://www.gnu.org/software/wget/manual/wget.html
221 # Not touching this, in case the manual is simply wrong. Since our code works in
222 # practice (when we were still using wget proxy username/pwd flags for windows).
223 }
224
225 return $strOptions;
226}
227
228sub getWgetOptions
229{
230 my ($self) = @_;
231 my $strOptions = "";
232
233 # If proxy settings are set up in the environment, wget is ready to use them. More secure.
234 # But if proxy settings are not set up in the environment, pass them as flags to wget
235 # This is less secure, as pwd etc visible in task manager, but it was the original way in
236 # which wget was run on windows.
237 # Truth in Perl: https://home.ubalt.edu/abento/452/perl/perltruth.html
238 # http://www.perlmonks.org/?node=what%20is%20true%20and%20false%20in%20Perl%3F
239
240 if ($self->{'proxy_on'}) {
241 if(!$ENV{'http_proxy'} && !$ENV{'https_proxy'} && !$ENV{'ftp_proxy'}) {
242 $strOptions .= $self->addProxySettingsAsWgetFlags();
243 } # else wget will use proxy settings in environment, assume enough settings have been provided
244 # either way, we're using the proxy
245 $strOptions .= " --proxy ";
246 }
247
248 if($self->{'no_check_certificate'}) { # URL may be http that gets redirected to https, so if no_check_certificate is on, turn it on even if URL is http
249
250 $strOptions .= " --no-check-certificate ";
251 }
252
253 return $strOptions;
254}
255
256# Checking for proxy setup: proxy server, proxy port, proxy username and password.
257sub checkProxySetup
258{
259 my ($self) = @_;
260 ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
261 # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
262 # Test if the connection is successful. If the connection wasn't successful then ask user to supply username and password.
263
264}
265
266# Returns true if the wget status needs to be monitored through sockets
267# (if a socket is used to communicate with the Java program on when to
268# terminate wget). True if we are running gli, or if the particular type
269# of WgetDownload is *not* OAIDownload (in that case, the original way of
270# terminating the perl script from Java would terminate wget as well).
271sub dealingWithSockets() {
272 my ($self) = @_;
273 return (defined $self->{'gli'} && $self->{'gli'} && !defined $port && ref($self) ne "OAIDownload");
274 # use ref($self) to find the classname of an object
275}
276
277# On Windows, we can only use IO::Select's can_read() with Sockets, not with the usual handles to a child process' iostreams
278# However, we can use Sockets as the handles to connect to a child process' streams, which then allows us to use can_read()
279# not just on Unix but Windows too. The 2 subroutines below to use Sockets to connect to a child process' iostreams come from
280# http://www.perlmonks.org/?node_id=869942
281# http://www.perlmonks.org/?node_id=811650
282# It was suggested that IPC::Run will take care of all this or circumvent the need for all this,
283# but IPC::Run has limitations on Windows, see http://search.cpan.org/~toddr/IPC-Run-0.96/lib/IPC/Run.pm#Win32_LIMITATIONS
284
285# Create a unidirectional pipe to an iostream of a process that is actually a socket
286sub _pipe {
287 socketpair($_[0], $_[1], AF_UNIX, SOCK_STREAM, PF_UNSPEC)
288 or return undef;
289 shutdown($_[0], 1); # No more writing for reader. See http://www.perlmonks.org/?node=108244
290 shutdown($_[1], 0); # No more reading for writer
291 return 1;
292}
293
294sub _open3 {
295 local (*TO_CHLD_R, *TO_CHLD_W);
296 local (*FR_CHLD_R, *FR_CHLD_W);
297 #local (*FR_CHLD_ERR_R, *FR_CHLD_ERR_W);
298
299 if ($^O =~ /Win32/) {
300 _pipe(*TO_CHLD_R, *TO_CHLD_W ) or die $^E;
301 _pipe(*FR_CHLD_R, *FR_CHLD_W ) or die $^E;
302 #_pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $^E;
303 } else {
304 pipe(*TO_CHLD_R, *TO_CHLD_W ) or die $!;
305 pipe(*FR_CHLD_R, *FR_CHLD_W ) or die $!;
306 #pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $!;
307 }
308
309 #my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_ERR_W', @_);
310 my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_W', @_); # use one handle, chldout, for both stdout and stderr of child proc,
311 # see http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html
312
313 #return ( $pid, *TO_CHLD_W, *FR_CHLD_R, *FR_CHLD_ERR_R );
314 return ( $pid, *TO_CHLD_W, *FR_CHLD_R);
315}
316
317# useWget and useWgetMonitored are very similar and, when updating, will probably need updating in tandem
318# useWget(Monitored) runs the wget command using open3 and then sits in a loop doing two things per iteration:
319# - processing a set buffer size of the wget (child) process' stdout/stderr streams, if anything has appeared there
320# - followed by checking the socket connection to Java GLI, to see if GLI is trying to cancel the wget process we're running.
321# Then the loop of these two things repeats.
322sub useWget
323{
324 #local $| = 1; # autoflush stdout buffer
325 #print STDOUT "*** Start of subroutine useWget in $0\n";
326
327 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
328
329 my ($strReadIn,$strLine,$command);
330 $strReadIn = "" unless defined $strReadIn;
331
332 my $current_dir = cwd();
333 my $changed_dir = 0;
334 if (defined $working_dir && -e $working_dir) {
335 chdir "$working_dir";
336 $changed_dir = 1;
337 }
338
339 # When we are running this script through GLI, the SIGTERM signal handler
340 # won't get called on Windows when wget is to be prematurely terminated.
341 # Instead, when wget has to be terminated in the middle of execution, GLI will
342 # connect to a serverSocket here to communicate when it's time to stop wget.
343 if($self->dealingWithSockets()) {
344
345 $port = <STDIN>; # gets a port on localhost that's not yet in use
346 chomp($port);
347
348 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
349 LocalPort => $port,
350 Listen => 1,
351 Reuse => 1);
352
353 die "can't setup server" unless $serverSocket;
354 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
355
356 $read_set = new IO::Select(); # create handle set for reading
357 $read_set->add($serverSocket); # add the main socket to the set
358 }
359
360 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
361
362 # Shouldn't use double quotes around wget path after all? See final comment at
363 # http://www.perlmonks.org/?node_id=394709
364 # http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63
365 # Therefore, compose the command as an array rather than as a string, to preserve spaces in the filepath
366 # because single/double quotes using open3 seem to launch a subshell, see also final comment at
367 # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget, as 2 processes
368 # got launched then which don't have parent-child pid relationship (so that terminating one doesn't terminate the other).
369 my @commandargs = split(' ', $cmdWget);
370 unshift(@commandargs, $wget_file_path);
371 $command = "$wget_file_path $cmdWget";
372# print STDOUT "Command is: $command\n"; # displayed in GLI output
373# print STDERR "Command is: $command\n"; # goes into ServerInfoDialog
374
375 # Wget's output needs to be monitored to find out when it has naturally terminated.
376 # Wget's output is sent to its STDERR so we can't use open2 without doing 2>&1.
377 # On linux, 2>&1 launches a subshell which then launches wget, meaning that killing
378 # the childpid does not kill wget on Linux but the subshell that launched it instead.
379 # Therefore, we use open3. Though the child process wget sends output only to its stdout [is this meant to be "stderr"?],
380 # using open3 says chld_err is undefined and the output of wget only comes in chld_out(!)
381 # However that may be, it works with open3. But to avoid the confusion of managing and
382 # closing an extra unused handle, a single handle is used instead for both the child's
383 # stderr and stdout.
384 # See http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html
385 # for why this is the right thing to do.
386
387 # Both open2 and open3 don't return on failure, but raise an exception. The handling
388 # of the exception is described on p.568 of the Perl Cookbook
389 eval {
390 #$childpid = open3($chld_in, $chld_out, $chld_out, $command); # There should be no double quotes in command, like around filepaths to wget, else need to use array version of command as below
391 #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
392
393 # instead of calling open3 directly, call wrapper _open3() subroutine that will use sockets to
394 # connect to the child process' iostreams, because we can then use IO::Select's can_read() even on Windows
395 ($childpid, $chld_in, $chld_out) = _open3(@commandargs);
396 };
397 if ($@) {
398 if($@ =~ m/^open3/) {
399 die "open3 failed in $0: $!\n$@\n";
400 }
401 die "Tried to launch open3 in $0, got unexpected exception: $@";
402 }
403
404 # Switching to use IO::Select, which allows timeouts, instead of doing the potentially blocking
405 # if defined(my $strLine=<$chld_out>)
406 # Google: perl open3 read timeout
407 # Google: perl open3 select() example
408 # https://stackoverflow.com/questions/10029406/why-does-ipcopen3-get-deadlocked
409 # https://codereview.stackexchange.com/questions/84496/the-right-way-to-use-ipcopen3-in-perl
410 # https://gist.github.com/shalk/6988937
411 # https://stackoverflow.com/questions/18373500/how-to-check-if-command-executed-with-ipcopen3-is-hung
412 # http://perldoc.perl.org/IO/Select.html
413 # http://perldoc.perl.org/IPC/Open3.html - explains the need for select()/IO::Select with open3
414 # http://www.perlmonks.org/?node_id=951554
415 # http://search.cpan.org/~dmuey/IPC-Open3-Utils-0.91/lib/IPC/Open3/Utils.pm
416 # https://stackoverflow.com/questions/3000907/wget-not-behaving-via-ipcopen3-vs-bash?rq=1
417
418 # create the select object and add our streamhandle(s)
419 my $sel = new IO::Select;
420 $sel->add($chld_out);
421
422 my $num_consecutive_timedouts = 0;
423 my $error = 0;
424 my $loop = 1;
425
426 while($loop)
427 {
428 # assume we're going to timeout trying to read from child process
429 $num_consecutive_timedouts++;
430
431
432 # block until data is available on the registered filehandles or until the timeout specified
433 if(my @readyhandles = $sel->can_read($TIMEOUT)) {
434
435 $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
436 # since we're in this if statement
437
438 # now there's a list of registered filehandles we can read from to loop through reading from.
439 # though we've registered only one, chld_out
440 foreach my $fh (@readyhandles) {
441 my $strLine;
442 #sleep 3;
443
444 # read up to 4096 bytes from this filehandle fh.
445 # if there is less than 4096 bytes, we'll only get
446 # those available bytes and won't block. If there
447 # is more than 4096 bytes, we'll only read 4096 and
448 # wait for the next iteration through the loop to
449 # read the rest.
450 my $len = sysread($fh, $strLine, 4096);
451
452 if($len) { # read something
453 if($blnShow) {
454 print STDERR "$strLine\n";
455 }
456 $strReadIn .= $strLine;
457 }
458 else { # error or EOF: (!defined $len || $len == 0)
459
460 if(!defined $len) { # could be an error reading
461 # On Windows, the socket ends up forcibly closed on the "other" side. It's just the way it's implemented
462 # on Windows when using sockets to our child process' iostreams. So $len not being defined is not an error in that case. Refer to
463 # https://stackoverflow.com/questions/16675950/perl-select-returning-undef-on-sysread-when-using-windows-ipcopen3-and-ios/16676271
464 if(!$!{ECONNRESET}) { # anything other ECONNRESET error means it's a real case of undefined $len being an error
465 print STDERR "WgetDownload: Error reading from child stream: $!\n";
466 # SHOULD THIS 'die "errmsg";' instead? - no, sockets may need closing
467 $error = 1;
468 } else { # $! contains message "An existing connection was forcibly closed by remote host" where "remote" is a reference to the sockets to our wget child process,
469 # NOT to the remote web server we're downloading from. In such a case, the error code is ECONNRESET, and it's not an error, despite $len being undefined.
470 print STDERR "WgetDownload: wget finished\n";
471 }
472 }
473 elsif ($len == 0) { # EOF
474 # Finished reading from this filehandle $fh because we read 0 bytes.
475 # wget finished, terminate naturally
476 print STDERR "WgetDownload: wget finished\n";
477 #print STDOUT "\nPerl: open3 command, input streams closed. Wget terminated naturally.\n";
478 }
479
480 $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
481
482 # last; # if we have more than one filehandle registered with IO::Select
483
484 $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error
485
486 } # end else error or EOF
487
488 } # end foreach on readyhandles
489 } # end if on can_read
490
491 if($num_consecutive_timedouts >= $NUM_TRIES) {
492 $error = 1;
493 $loop = 0; # to break out of outer while loop
494
495 $num_consecutive_timedouts = 0;
496
497 print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n";
498 print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n";
499 }
500
501 if($loop == 0) { # error or EOF, either way, clean up
502 if($error) {
503 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
504
505 if(kill(0, $childpid)) {
506 # If kill(0, $childpid) returns true, then the process is running
507 # and we need to kill it.
508 close($chld_in);
509 close($chld_out);
510 kill('TERM', $childpid); # kill the process group by prefixing - to signal
511
512 # https://coderwall.com/p/q-ovnw/killing-all-child-processes-in-a-shell-script
513 # https://stackoverflow.com/questions/392022/best-way-to-kill-all-child-processes
514 #print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n";
515 #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n";
516 }
517 }
518 else { # wget finished (no errors), terminate naturally
519 #print STDOUT "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n";
520 close($chld_in);
521 close($chld_out);
522 waitpid $childpid, 0;
523 }
524
525 # error or not
526 $childpid = undef;
527 # Stop monitoring the read_handle and close the serverSocket
528 # (the Java end will close the client socket that Java opened)
529 if(defined $port) {
530 $read_set->remove($serverSocket);
531 close($serverSocket);
532 }
533 }
534
535 # If we've already terminated, either naturally or on error, we can get out of the while loop
536 next if($loop == 0);
537
538 # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket
539
540 # if we run this script from the command-line (as opposed to from GLI),
541 # then we're not working with sockets and can therefore skip the next bits
542 next unless(defined $port);
543
544 # http://www.perlfect.com/articles/select.shtml
545 # "multiplex between several filehandles within a single thread of control,
546 # thus creating the effect of parallelism in the handling of I/O."
547 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
548
549 # take all readable handles in turn
550 foreach my $rh (@rh_set) {
551 if($rh == $serverSocket) {
552 my $client = $rh->accept();
553 #$client->autoflush(1); # autoflush output buffer - don't put this back in: output split irregularly over lines
554 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
555
556 # Read from the client (getting rid of the trailing newline)
557 # Has the client sent the <<STOP>> signal?
558 my $signal = <$client>;
559 chomp($signal);
560 if($signal eq "<<STOP>>") {
561 print $client "Perl received STOP signal (on port $port): stopping wget\n";
562 $loop = 0; # out of outer while loop
563 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
564
565 # Sometimes the wget process takes some time to start up. If the STOP signal
566 # was sent, don't try to terminate the process until we know it is running.
567 # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
568 # for it to start up, checking for whether it is running in order to kill it.
569 for(my $seconds = 1; $seconds <= 5; $seconds++) {
570 if(kill(0, $childpid)) {
571 # If kill(0, $childpid) returns true, then the process is running
572 # and we need to kill it.
573 close($chld_in);
574 close($chld_out);
575 kill("TERM", $childpid); # prefix - to signal to kill process group
576
577 $childpid = undef;
578
579 # Stop monitoring the read_handle and close the serverSocket
580 # (the Java end will close the client socket that Java opened)
581 $read_set->remove($rh); #$read_set->remove($serverSocket);
582 close($rh); #close($serverSocket);
583 print $client "Perl terminated wget and is about to exit\n";
584 last; # out of inner for loop
585 }
586 else { # the process may just be starting up, wait
587 sleep(1);
588 }
589 }
590 last; # out of foreach loop
591 }
592 }
593 }
594 }
595
596 if ($changed_dir) {
597 chdir $current_dir;
598 }
599
600 return $strReadIn;
601}
602
603
604sub useWgetMonitored
605{
606 #local $| = 1; # autoflush stdout buffer
607 #print STDOUT "*** Start of subroutine useWgetMonitored in $0\n";
608
609 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
610
611
612 my $current_dir = cwd();
613 my $changed_dir = 0;
614 if (defined $working_dir && -e $working_dir) {
615 chdir "$working_dir";
616 $changed_dir = 1;
617 }
618
619 # When we are running this script through GLI, the SIGTERM signal handler
620 # won't get called on Windows when wget is to be prematurely terminated.
621 # Instead, when wget has to be terminated in the middle of execution, GLI will
622 # connect to a serverSocket here to communicate when it's time to stop wget.
623 if($self->dealingWithSockets()) {
624
625 $port = <STDIN>; # gets a port on localhost that's not yet in use
626 chomp($port);
627
628 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
629 LocalPort => $port,
630 Listen => 1,
631 Reuse => 1);
632
633 die "can't setup server" unless $serverSocket;
634 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
635
636 $read_set = new IO::Select(); # create handle set for reading
637 $read_set->add($serverSocket); # add the main socket to the set
638 }
639
640 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
641 # compose the command as an array for open3, to preserve spaces in any filepath
642 my @commandargs = split(' ', $cmdWget);
643 unshift(@commandargs, $wget_file_path);
644 my $command = "$wget_file_path $cmdWget";
645 #print STDOUT "Command is: $command\n";
646
647 eval { # see p.568 of Perl Cookbook
648 #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
649 ($childpid, $chld_in, $chld_out) = _open3(@commandargs);
650 };
651 if ($@) {
652 if($@ =~ m/^open3/) {
653 die "open3 failed in $0: $!\n$@\n";
654 }
655 die "Tried to launch open3 in $0, got unexpected exception: $@";
656 }
657
658 my $full_text = "";
659 my $error_text = "";
660 my @follow_list = ();
661 my $line;
662
663 # create the select object and add our streamhandle(s)
664 my $sel = new IO::Select;
665 $sel->add($chld_out);
666
667 my $num_consecutive_timedouts = 0;
668 my $error = 0;
669 my $loop = 1;
670 while($loop)
671 {
672 # assume we're going to timeout trying to read from child process
673 $num_consecutive_timedouts++;
674
675 # block until data is available on the registered filehandles or until the timeout specified
676 if(my @readyhandles = $sel->can_read($TIMEOUT)) {
677 $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
678 # since we're in this if statement
679
680 foreach my $fh (@readyhandles) {
681 my $len = sysread($fh, $line, 4096); # read up to 4k from current ready filehandle
682 if($len) { # read something
683
684
685 if((defined $blnShow) && $blnShow)
686 {
687 print STDERR "$line";
688 }
689
690 if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
691 my $follow_url = $1;
692 push(@follow_list,$follow_url);
693 }
694
695 if ($line =~ m/ERROR\s+\d+/) {
696 $error_text .= $line;
697 }
698
699 $full_text .= $line;
700 } else { # error or EOF
701 if(!defined $len) { # error reading
702 #print STDERR "WgetDownload: Error reading from child stream: $!\n";
703 $error = 1;
704 }
705 if(!defined $len) {
706 if(!$!{ECONNRESET}) { # anything other ECONNRESET error means it's a real case of undefined $len being an error
707 #print STDERR "WgetDownload: Error reading from child stream: $!\n";
708 $error = 1;
709 } else { # the error code is ECONNRESET, and it's not an error, despite $len being undefined.
710 # Happens on Windows when using sockets to a child process' iostreams
711 #print STDERR "WgetDownload: wget finished\n";
712 }
713 }
714 elsif ($len == 0) { # EOF, finished with this filehandle because 0 bytes read
715 #print STDERR "WgetDownload: wget finished\n"; # wget terminated naturally
716 }
717
718 $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
719
720 # last; # if we have more than one filehandle registered with IO::Select
721
722 $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error
723 } # end else error or EOF
724
725 } # end foreach on readyhandles
726 } # end if on can_read
727
728 if($num_consecutive_timedouts >= $NUM_TRIES) {
729 $error = 1;
730 $loop = 0; # to break out of outer while loop
731
732 $num_consecutive_timedouts = 0;
733
734 #print STDERR "WARNING from WgetDownload: wget timed out $NUM_TRIES times waiting for a response\n";
735 #print STDERR "\tThe URL may be inaccessible or the proxy configuration is wrong or incomplete.\n";
736 }
737
738 if($loop == 0) { # error or EOF, either way, clean up
739
740 if($error) {
741 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
742
743 if(kill(0, $childpid)) {
744 # If kill(0, $childpid) returns true, then the process is running
745 # and we need to kill it.
746 close($chld_in);
747 close($chld_out);
748 kill("TERM", $childpid); # prefix - to signal to kill process group
749
750 #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n";
751 }
752 }
753 else { # wget finished, terminate naturally
754 close($chld_in);
755 close($chld_out);
756 # Program terminates only when the following line is included
757 # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
758 # it prevents the child from turning into a "zombie process".
759 # While the wget process terminates without it, this perl script does not:
760 # the DOS prompt is not returned without it.
761 waitpid $childpid, 0;
762 }
763
764 # error or not:
765 $childpid = undef;
766 if(defined $port) {
767 $read_set->remove($serverSocket);
768 close($serverSocket);
769 }
770 }
771
772 # If we've already terminated, either naturally or on error, we can get out of the while loop
773 next if($loop == 0);
774
775 # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket
776
777 # if we run this script from the command-line (as opposed to from GLI),
778 # then we're not working with sockets and can therefore skip the next bits
779 next unless(defined $port);
780
781 # http://www.perlfect.com/articles/select.shtml
782 # "multiplex between several filehandles within a single thread of control,
783 # thus creating the effect of parallelism in the handling of I/O."
784 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
785
786 # take all readable handles in turn
787 foreach my $rh (@rh_set) {
788 if($rh == $serverSocket) {
789 my $client = $rh->accept();
790 #$client->autoflush(1); # autoflush output buffer - don't put this back in: splits output irregularly over multilines
791 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
792
793 # Read from the client (getting rid of trailing newline)
794 # Has the client sent the <<STOP>> signal?
795 my $signal = <$client>;
796 chomp($signal);
797 if($signal eq "<<STOP>>") {
798 print $client "Perl received STOP signal (on port $port): stopping wget\n";
799 $loop = 0; # out of outer while loop
800 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
801
802 # Sometimes the wget process takes some time to start up. If the STOP signal
803 # was sent, don't try to terminate the process until we know it is running.
804 # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
805 # for it to start up, checking for whether it is running in order to kill it.
806 for(my $seconds = 1; $seconds <= 5; $seconds++) {
807 if(kill(0, $childpid)) {
808 # If kill(0, $childpid) returns true, then the process is running
809 # and we need to kill it.
810 close($chld_in);
811 close($chld_out);
812 kill("TERM", $childpid); # prefix - to signal to kill process group
813
814 $childpid = undef;
815
816 # Stop monitoring the read_handle and close the serverSocket
817 # (the Java end will close the client socket that Java opened)
818 $read_set->remove($rh); #$read_set->remove($serverSocket);
819 close($rh); #close($serverSocket);
820 print $client "Perl terminated wget and is about to exit\n";
821 last; # out of inner for loop
822 }
823 else { # the process may just be starting up, wait
824 sleep(1);
825 }
826 }
827 last; # out of foreach loop
828 }
829 }
830 }
831 }
832
833 my $command_status = $?;
834 if ($command_status != 0) {
835 $error_text .= "Exit error: $command_status";
836 }
837
838 if ($changed_dir) {
839 chdir $current_dir;
840 }
841
842 my $final_follow = pop(@follow_list); # might be undefined, but that's OK
843
844 return ($full_text,$error_text,$final_follow);
845}
846
847
848# TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
849sub checkURL
850{
851 my ($self) = @_;
852 if ($self->{'url'} eq "")
853 {
854 &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
855 }
856}
857
858sub error
859{
860 my ($strFunctionName,$strError) = @_;
861 {
862 print "Error occoured in WgetDownload.pm\n".
863 "In Function:".$strFunctionName."\n".
864 "Error Message:".$strError."\n";
865 exit(-1);
866 }
867}
868
8691;
870
Note: See TracBrowser for help on using the repository browser.