source: main/trunk/greenstone2/perllib/downloaders/WgetDownload.pm@ 31957

Last change on this file since 31957 was 31957, checked in by ak19, 3 years ago

Tidying up. 1. Shifting informative error message to strings.properties. 2. Commenting out unnecessary message that was printed out for debugging when things went well. 3. To use gsprintf passing in STDERR/STDOUT as a parameter when use strict is on, need to make some exceptions to strict.

  • Property svn:keywords set to Author Date Id Revision
File size: 33.7 KB
Line 
1###########################################################################
2#
3# WgetDownload.pm -- Download base module that handles calling Wget
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package WgetDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use BaseDownload;
35use strict;
36no strict 'subs'; # make an exception so we can use variables as filehandles to pass STDERR/STDOUT to functions, needed for gsprintf()
37use Cwd;
38use util;
39use IPC::Open3;
40use IO::Select;
41use IO::Socket;
42
43#use IO::Select qw( );
44#use IPC::Open3 qw( open3 );
45use Socket qw( AF_UNIX SOCK_STREAM PF_UNSPEC ); # http://perlmeme.org/howtos/perlfunc/qw_function.html
46
47
48sub BEGIN {
49 @WgetDownload::ISA = ('BaseDownload');
50}
51
52my $arguments =
53 [ { 'name' => "proxy_on",
54 'desc' => "{WgetDownload.proxy_on}",
55 'type' => "flag",
56 'reqd' => "no",
57 'hiddengli' => "yes"},
58 { 'name' => "http_proxy_host",
59 'desc' => "{WgetDownload.http_proxy_host}",
60 'type' => "string",
61 'reqd' => "no",
62 'hiddengli' => "yes"},
63 { 'name' => "http_proxy_port",
64 'desc' => "{WgetDownload.http_proxy_port}",
65 'type' => "string",
66 'reqd' => "no",
67 'hiddengli' => "yes"},
68 { 'name' => "https_proxy_host",
69 'desc' => "{WgetDownload.https_proxy_host}",
70 'type' => "string",
71 'reqd' => "no",
72 'hiddengli' => "yes"},
73 { 'name' => "https_proxy_port",
74 'desc' => "{WgetDownload.https_proxy_port}",
75 'type' => "string",
76 'reqd' => "no",
77 'hiddengli' => "yes"},
78 { 'name' => "ftp_proxy_host",
79 'desc' => "{WgetDownload.ftp_proxy_host}",
80 'type' => "string",
81 'reqd' => "no",
82 'hiddengli' => "yes"},
83 { 'name' => "ftp_proxy_port",
84 'desc' => "{WgetDownload.ftp_proxy_port}",
85 'type' => "string",
86 'reqd' => "no",
87 'hiddengli' => "yes"},
88 { 'name' => "user_name",
89 'desc' => "{WgetDownload.user_name}",
90 'type' => "string",
91 'reqd' => "no",
92 'hiddengli' => "yes"},
93 { 'name' => "user_password",
94 'desc' => "{WgetDownload.user_password}",
95 'type' => "string",
96 'reqd' => "no",
97 'hiddengli' => "yes"},
98 { 'name' => "no_check_certificate",
99 'desc' => "{WgetDownload.no_check_certificate}",
100 'type' => "flag",
101 'reqd' => "no",
102 'hiddengli' => "yes"}
103 ];
104
105my $options = { 'name' => "WgetDownload",
106 'desc' => "{WgetDownload.desc}",
107 'abstract' => "yes",
108 'inherits' => "yes",
109 'args' => $arguments };
110
111
112# Declaring file global variables related to the wget child process so that
113# the termination signal handler for SIGTERM can close the streams and tidy
114# up before ending the child process.
115my $childpid;
116my ($chld_out, $chld_in);
117my ($serverSocket, $read_set);
118
119my $TIMEOUT = 1; # seconds
120my $NUM_TRIES = 10;
121
122# The port this script's server socket will be listening on, to handle
123# incoming signals from GLI to terminate wget. This is also file global,
124# since OAIDownload.pm will make several calls on wget using the same
125# instance of this script and we want to reuse whatever port GLI gave us.
126my $port;
127
128# When this script is called from the command line, this handler will be called
129# if this process is killed or abruptly ends due to receiving one of the
130# terminating signals that this handler is registered to deal with.
131sub abrupt_end_handler {
132 my $termination_signal = shift (@_);
133
134 if(defined $childpid) {
135 close($chld_out);
136 close($chld_in);
137
138 print STDOUT "Received termination signal: $termination_signal\n";
139
140 # Send TERM signal to child process to terminate it. Sending the INT signal doesn't work
141 # See http://perldoc.perl.org/perlipc.html#Signals
142 # Warning on using kill at http://perldoc.perl.org/perlfork.html
143 kill("TERM", $childpid); # prefix - to signal to kill process group
144
145 # If the SIGTERM sent on Linux calls this handler, we want to make
146 # sure any socket connection is closed.
147 # Otherwise sockets are only used when this script is run from GLI
148 # in which case the handlers don't really get called.
149 if(defined $serverSocket) {
150 $read_set->remove($serverSocket) if defined $read_set;
151 close($serverSocket);
152 }
153 }
154
155 exit(0);
156}
157
158# Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
159# the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
160# signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
161# Note that SIGKILL can't be handled as the handler won't get called for it. More information:
162# http://affy.blogspot.com/p5be/ch13.htm
163# http://perldoc.perl.org/perlipc.html#Signals
164$SIG{'INT'} = \&abrupt_end_handler;
165$SIG{'TERM'} = \&abrupt_end_handler;
166
167sub new {
168 my ($class) = shift (@_);
169 my ($getlist,$inputargs,$hashArgOptLists) = @_;
170 push(@$getlist, $class);
171
172 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
173 push(@{$hashArgOptLists->{"OptList"}},$options);
174
175 my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
176
177 # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lions (android too?)
178 &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set
179
180 return bless $self, $class;
181}
182
183sub checkWgetSetup
184{
185 my ($self,$blnGliCall) = @_;
186 #TODO: proxy detection??
187
188 if((!$blnGliCall) && $self->{'proxy_on'})
189 {
190 &checkProxySetup($self);
191 }
192 &checkURL($self);
193}
194
195# Not using this. On Windows, we used to pass proxying settings as flags to wget. But, as that can be
196# seen with Task Manager, we now have the proxy settings set in the environment and are no longer passing it
197sub addProxySettingsAsWgetFlags
198{
199 my ($self) = @_;
200 my $strOptions = "";
201
202 if($self->{'http_proxy_host'} && $self->{'http_proxy_port'}) {
203 $strOptions .= " -e http_proxy=$self->{'http_proxy_host'}:$self->{'http_proxy_port'} ";
204 }
205 if($self->{'https_proxy_host'} && $self->{'https_proxy_port'}) {
206 $strOptions .= " -e https_proxy=$self->{'https_proxy_host'}:$self->{'https_proxy_port'} ";
207 }
208 if($self->{'ftp_proxy_host'} && $self->{'ftp_proxy_port'}) {
209 $strOptions .= " -e ftp_proxy=$self->{'ftp_proxy_host'}:$self->{'ftp_proxy_port'} ";
210 }
211
212 # For wget, there is only one set pair of proxy-user and proxy-passwd, so wget seems to assume
213 # that all 3 proxy protocols (http|https|ftp) will use the same username and pwd combination?
214 # Note that this only matters when passing the proxying details as flags to wget, not when
215 # the proxies are setup as environment variables.
216 if ($self->{'user_name'} && $self->{'user_password'})
217 {
218 $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";
219 # how is "--proxy-passwd" instead of "--proxy-password" even working????
220 # see https://www.gnu.org/software/wget/manual/html_node/Proxies.html
221 # and https://www.gnu.org/software/wget/manual/wget.html
222 # Not touching this, in case the manual is simply wrong. Since our code works in
223 # practice (when we were still using wget proxy username/pwd flags for windows).
224 }
225
226 return $strOptions;
227}
228
229sub getWgetOptions
230{
231 my ($self) = @_;
232 my $strOptions = "";
233
234 # If proxy settings are set up in the environment, wget is ready to use them. More secure.
235 # But if proxy settings are not set up in the environment, pass them as flags to wget
236 # This is less secure, as pwd etc visible in task manager, but it was the original way in
237 # which wget was run on windows.
238 # Truth in Perl: https://home.ubalt.edu/abento/452/perl/perltruth.html
239 # http://www.perlmonks.org/?node=what%20is%20true%20and%20false%20in%20Perl%3F
240
241 if ($self->{'proxy_on'}) {
242 if(!$ENV{'http_proxy'} && !$ENV{'https_proxy'} && !$ENV{'ftp_proxy'}) {
243 $strOptions .= $self->addProxySettingsAsWgetFlags();
244 } # else wget will use proxy settings in environment, assume enough settings have been provided
245 # either way, we're using the proxy
246 $strOptions .= " --proxy ";
247 }
248
249 if($self->{'no_check_certificate'}) { # URL may be http that gets redirected to https, so if no_check_certificate is on, turn it on even if URL is http
250
251 $strOptions .= " --no-check-certificate ";
252 }
253
254 return $strOptions;
255}
256
257# Checking for proxy setup: proxy server, proxy port, proxy username and password.
258sub checkProxySetup
259{
260 my ($self) = @_;
261 ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
262 # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
263 # Test if the connection is successful. If the connection wasn't successful then ask user to supply username and password.
264
265}
266
267# Returns true if the wget status needs to be monitored through sockets
268# (if a socket is used to communicate with the Java program on when to
269# terminate wget). True if we are running gli, or if the particular type
270# of WgetDownload is *not* OAIDownload (in that case, the original way of
271# terminating the perl script from Java would terminate wget as well).
272sub dealingWithSockets() {
273 my ($self) = @_;
274 return (defined $self->{'gli'} && $self->{'gli'} && !defined $port && ref($self) ne "OAIDownload");
275 # use ref($self) to find the classname of an object
276}
277
278# On Windows, we can only use IO::Select's can_read() with Sockets, not with the usual handles to a child process' iostreams
279# However, we can use Sockets as the handles to connect to a child process' streams, which then allows us to use can_read()
280# not just on Unix but Windows too. The 2 subroutines below to use Sockets to connect to a child process' iostreams come from
281# http://www.perlmonks.org/?node_id=869942
282# http://www.perlmonks.org/?node_id=811650
283# It was suggested that IPC::Run will take care of all this or circumvent the need for all this,
284# but IPC::Run has limitations on Windows, see http://search.cpan.org/~toddr/IPC-Run-0.96/lib/IPC/Run.pm#Win32_LIMITATIONS
285
286# Create a unidirectional pipe to an iostream of a process that is actually a socket
287sub _pipe {
288 socketpair($_[0], $_[1], AF_UNIX, SOCK_STREAM, PF_UNSPEC)
289 or return undef;
290 shutdown($_[0], 1); # No more writing for reader. See http://www.perlmonks.org/?node=108244
291 shutdown($_[1], 0); # No more reading for writer
292 return 1;
293}
294
295sub _open3 {
296 local (*TO_CHLD_R, *TO_CHLD_W);
297 local (*FR_CHLD_R, *FR_CHLD_W);
298 #local (*FR_CHLD_ERR_R, *FR_CHLD_ERR_W);
299
300 if ($^O =~ /Win32/) {
301 _pipe(*TO_CHLD_R, *TO_CHLD_W ) or die $^E;
302 _pipe(*FR_CHLD_R, *FR_CHLD_W ) or die $^E;
303 #_pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $^E;
304 } else {
305 pipe(*TO_CHLD_R, *TO_CHLD_W ) or die $!;
306 pipe(*FR_CHLD_R, *FR_CHLD_W ) or die $!;
307 #pipe(*FR_CHLD_ERR_R, *FR_CHLD_ERR_W) or die $!;
308 }
309
310 #my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_ERR_W', @_);
311 my $pid = open3('>&TO_CHLD_R', '<&FR_CHLD_W', '<&FR_CHLD_W', @_); # use one handle, chldout, for both stdout and stderr of child proc,
312 # see http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html
313
314 #return ( $pid, *TO_CHLD_W, *FR_CHLD_R, *FR_CHLD_ERR_R );
315 return ( $pid, *TO_CHLD_W, *FR_CHLD_R);
316}
317
318# useWget and useWgetMonitored are very similar and, when updating, will probably need updating in tandem
319# useWget(Monitored) runs the wget command using open3 and then sits in a loop doing two things per iteration:
320# - processing a set buffer size of the wget (child) process' stdout/stderr streams, if anything has appeared there
321# - followed by checking the socket connection to Java GLI, to see if GLI is trying to cancel the wget process we're running.
322# Then the loop of these two things repeats.
323sub useWget
324{
325 #local $| = 1; # autoflush stdout buffer
326 #print STDOUT "*** Start of subroutine useWget in $0\n";
327
328 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
329
330 my ($strReadIn,$strLine,$command);
331 $strReadIn = "" unless defined $strReadIn;
332
333 my $current_dir = cwd();
334 my $changed_dir = 0;
335 if (defined $working_dir && -e $working_dir) {
336 chdir "$working_dir";
337 $changed_dir = 1;
338 }
339
340 # When we are running this script through GLI, the SIGTERM signal handler
341 # won't get called on Windows when wget is to be prematurely terminated.
342 # Instead, when wget has to be terminated in the middle of execution, GLI will
343 # connect to a serverSocket here to communicate when it's time to stop wget.
344 if($self->dealingWithSockets()) {
345
346 $port = <STDIN>; # gets a port on localhost that's not yet in use
347 chomp($port);
348
349 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
350 LocalPort => $port,
351 Listen => 1,
352 Reuse => 1);
353
354 die "can't setup server" unless $serverSocket;
355 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
356
357 $read_set = new IO::Select(); # create handle set for reading
358 $read_set->add($serverSocket); # add the main socket to the set
359 }
360
361 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
362
363 # Shouldn't use double quotes around wget path after all? See final comment at
364 # http://www.perlmonks.org/?node_id=394709
365 # http://coldattic.info/shvedsky/pro/blogs/a-foo-walks-into-a-bar/posts/63
366 # Therefore, compose the command as an array rather than as a string, to preserve spaces in the filepath
367 # because single/double quotes using open3 seem to launch a subshell, see also final comment at
368 # http://www.perlmonks.org/?node_id=394709 and that ends up causing problems in terminating wget, as 2 processes
369 # got launched then which don't have parent-child pid relationship (so that terminating one doesn't terminate the other).
370 my @commandargs = split(' ', $cmdWget);
371 unshift(@commandargs, $wget_file_path);
372 $command = "$wget_file_path $cmdWget";
373# print STDOUT "Command is: $command\n"; # displayed in GLI output
374# print STDERR "Command is: $command\n"; # goes into ServerInfoDialog
375
376 # Wget's output needs to be monitored to find out when it has naturally terminated.
377 # Wget's output is sent to its STDERR so we can't use open2 without doing 2>&1.
378 # On linux, 2>&1 launches a subshell which then launches wget, meaning that killing
379 # the childpid does not kill wget on Linux but the subshell that launched it instead.
380 # Therefore, we use open3. Though the child process wget sends output only to its stdout [is this meant to be "stderr"?],
381 # using open3 says chld_err is undefined and the output of wget only comes in chld_out(!)
382 # However that may be, it works with open3. But to avoid the confusion of managing and
383 # closing an extra unused handle, a single handle is used instead for both the child's
384 # stderr and stdout.
385 # See http://blog.0x1fff.com/2009/09/howto-execute-system-commands-in-perl.html
386 # for why this is the right thing to do.
387
388 # Both open2 and open3 don't return on failure, but raise an exception. The handling
389 # of the exception is described on p.568 of the Perl Cookbook
390 eval {
391 #$childpid = open3($chld_in, $chld_out, $chld_out, $command); # There should be no double quotes in command, like around filepaths to wget, else need to use array version of command as below
392 #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
393
394 # instead of calling open3 directly, call wrapper _open3() subroutine that will use sockets to
395 # connect to the child process' iostreams, because we can then use IO::Select's can_read() even on Windows
396 ($childpid, $chld_in, $chld_out) = _open3(@commandargs);
397 };
398 if ($@) {
399 if($@ =~ m/^open3/) {
400 die "open3 failed in $0: $!\n$@\n";
401 }
402 die "Tried to launch open3 in $0, got unexpected exception: $@";
403 }
404
405 # Switching to use IO::Select, which allows timeouts, instead of doing the potentially blocking
406 # if defined(my $strLine=<$chld_out>)
407 # Google: perl open3 read timeout
408 # Google: perl open3 select() example
409 # https://stackoverflow.com/questions/10029406/why-does-ipcopen3-get-deadlocked
410 # https://codereview.stackexchange.com/questions/84496/the-right-way-to-use-ipcopen3-in-perl
411 # https://gist.github.com/shalk/6988937
412 # https://stackoverflow.com/questions/18373500/how-to-check-if-command-executed-with-ipcopen3-is-hung
413 # http://perldoc.perl.org/IO/Select.html
414 # http://perldoc.perl.org/IPC/Open3.html - explains the need for select()/IO::Select with open3
415 # http://www.perlmonks.org/?node_id=951554
416 # http://search.cpan.org/~dmuey/IPC-Open3-Utils-0.91/lib/IPC/Open3/Utils.pm
417 # https://stackoverflow.com/questions/3000907/wget-not-behaving-via-ipcopen3-vs-bash?rq=1
418
419 # create the select object and add our streamhandle(s)
420 my $sel = new IO::Select;
421 $sel->add($chld_out);
422
423 my $num_consecutive_timedouts = 0;
424 my $error = 0;
425 my $loop = 1;
426
427 while($loop)
428 {
429 # assume we're going to timeout trying to read from child process
430 $num_consecutive_timedouts++;
431
432
433 # block until data is available on the registered filehandles or until the timeout specified
434 if(my @readyhandles = $sel->can_read($TIMEOUT)) {
435
436 $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
437 # since we're in this if statement
438
439 # now there's a list of registered filehandles we can read from to loop through reading from.
440 # though we've registered only one, chld_out
441 foreach my $fh (@readyhandles) {
442 my $strLine;
443 #sleep 3;
444
445 # read up to 4096 bytes from this filehandle fh.
446 # if there is less than 4096 bytes, we'll only get
447 # those available bytes and won't block. If there
448 # is more than 4096 bytes, we'll only read 4096 and
449 # wait for the next iteration through the loop to
450 # read the rest.
451 my $len = sysread($fh, $strLine, 4096);
452
453 if($len) { # read something
454 if($blnShow) {
455 print STDERR "$strLine\n";
456 }
457 $strReadIn .= $strLine;
458 }
459 else { # error or EOF: (!defined $len || $len == 0)
460
461 if(!defined $len) { # could be an error reading
462 # On Windows, the socket ends up forcibly closed on the "other" side. It's just the way it's implemented
463 # on Windows when using sockets to our child process' iostreams. So $len not being defined is not an error in that case. Refer to
464 # https://stackoverflow.com/questions/16675950/perl-select-returning-undef-on-sysread-when-using-windows-ipcopen3-and-ios/16676271
465 if(!$!{ECONNRESET}) { # anything other ECONNRESET error means it's a real case of undefined $len being an error
466 print STDERR "WgetDownload: Error reading from child stream: $!\n";
467 # SHOULD THIS 'die "errmsg";' instead? - no, sockets may need closing
468 $error = 1;
469 } else { # $! contains message "An existing connection was forcibly closed by remote host" where "remote" is a reference to the sockets to our wget child process,
470 # NOT to the remote web server we're downloading from. In such a case, the error code is ECONNRESET, and it's not an error, despite $len being undefined.
471 #print STDERR "WgetDownload: wget finished\n";
472 }
473 }
474 elsif ($len == 0) { # EOF
475 # Finished reading from this filehandle $fh because we read 0 bytes.
476 # wget finished, terminate naturally
477 #print STDERR "WgetDownload: wget finished\n"; #print STDOUT "\nPerl: open3 command, input streams closed. Wget terminated naturally.\n";
478 }
479
480 $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
481
482 # last; # if we have more than one filehandle registered with IO::Select
483
484 $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error
485
486 } # end else error or EOF
487
488 } # end foreach on readyhandles
489 } # end if on can_read
490
491 if($num_consecutive_timedouts >= $NUM_TRIES) {
492 $error = 1;
493 $loop = 0; # to break out of outer while loop
494
495 $num_consecutive_timedouts = 0;
496
497 &gsprintf::gsprintf(STDERR, "{WgetDownload.wget_timed_out_warning}\n", $NUM_TRIES);
498 }
499
500 if($loop == 0) { # error or EOF, either way, clean up
501 if($error) {
502 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
503
504 if(kill(0, $childpid)) {
505 # If kill(0, $childpid) returns true, then the process is running
506 # and we need to kill it.
507 close($chld_in);
508 close($chld_out);
509 kill('TERM', $childpid); # kill the process group by prefixing - to signal
510
511 # https://coderwall.com/p/q-ovnw/killing-all-child-processes-in-a-shell-script
512 # https://stackoverflow.com/questions/392022/best-way-to-kill-all-child-processes
513 #print STDERR "SENT SIGTERM TO CHILD PID: $childpid\n";
514 #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n";
515 }
516 }
517 else { # wget finished (no errors), terminate naturally
518 #print STDOUT "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n";
519 close($chld_in);
520 close($chld_out);
521 waitpid $childpid, 0;
522 }
523
524 # error or not
525 $childpid = undef;
526 # Stop monitoring the read_handle and close the serverSocket
527 # (the Java end will close the client socket that Java opened)
528 if(defined $port) {
529 $read_set->remove($serverSocket);
530 close($serverSocket);
531 }
532 }
533
534 # If we've already terminated, either naturally or on error, we can get out of the while loop
535 next if($loop == 0);
536
537 # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket
538
539 # if we run this script from the command-line (as opposed to from GLI),
540 # then we're not working with sockets and can therefore skip the next bits
541 next unless(defined $port);
542
543 # http://www.perlfect.com/articles/select.shtml
544 # "multiplex between several filehandles within a single thread of control,
545 # thus creating the effect of parallelism in the handling of I/O."
546 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
547
548 # take all readable handles in turn
549 foreach my $rh (@rh_set) {
550 if($rh == $serverSocket) {
551 my $client = $rh->accept();
552 #$client->autoflush(1); # autoflush output buffer - don't put this back in: output split irregularly over lines
553 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
554
555 # Read from the client (getting rid of the trailing newline)
556 # Has the client sent the <<STOP>> signal?
557 my $signal = <$client>;
558 chomp($signal);
559 if($signal eq "<<STOP>>") {
560 print $client "Perl received STOP signal (on port $port): stopping wget\n";
561 $loop = 0; # out of outer while loop
562 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
563
564 # Sometimes the wget process takes some time to start up. If the STOP signal
565 # was sent, don't try to terminate the process until we know it is running.
566 # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
567 # for it to start up, checking for whether it is running in order to kill it.
568 for(my $seconds = 1; $seconds <= 5; $seconds++) {
569 if(kill(0, $childpid)) {
570 # If kill(0, $childpid) returns true, then the process is running
571 # and we need to kill it.
572 close($chld_in);
573 close($chld_out);
574 kill("TERM", $childpid); # prefix - to signal to kill process group
575
576 $childpid = undef;
577
578 # Stop monitoring the read_handle and close the serverSocket
579 # (the Java end will close the client socket that Java opened)
580 $read_set->remove($rh); #$read_set->remove($serverSocket);
581 close($rh); #close($serverSocket);
582 print $client "Perl terminated wget and is about to exit\n";
583 last; # out of inner for loop
584 }
585 else { # the process may just be starting up, wait
586 sleep(1);
587 }
588 }
589 last; # out of foreach loop
590 }
591 }
592 }
593 }
594
595 if ($changed_dir) {
596 chdir $current_dir;
597 }
598
599 return $strReadIn;
600}
601
602
603sub useWgetMonitored
604{
605 #local $| = 1; # autoflush stdout buffer
606 #print STDOUT "*** Start of subroutine useWgetMonitored in $0\n";
607
608 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
609
610
611 my $current_dir = cwd();
612 my $changed_dir = 0;
613 if (defined $working_dir && -e $working_dir) {
614 chdir "$working_dir";
615 $changed_dir = 1;
616 }
617
618 # When we are running this script through GLI, the SIGTERM signal handler
619 # won't get called on Windows when wget is to be prematurely terminated.
620 # Instead, when wget has to be terminated in the middle of execution, GLI will
621 # connect to a serverSocket here to communicate when it's time to stop wget.
622 if($self->dealingWithSockets()) {
623
624 $port = <STDIN>; # gets a port on localhost that's not yet in use
625 chomp($port);
626
627 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
628 LocalPort => $port,
629 Listen => 1,
630 Reuse => 1);
631
632 die "can't setup server" unless $serverSocket;
633 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
634
635 $read_set = new IO::Select(); # create handle set for reading
636 $read_set->add($serverSocket); # add the main socket to the set
637 }
638
639 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
640 # compose the command as an array for open3, to preserve spaces in any filepath
641 my @commandargs = split(' ', $cmdWget);
642 unshift(@commandargs, $wget_file_path);
643 my $command = "$wget_file_path $cmdWget";
644 #print STDOUT "Command is: $command\n";
645
646 eval { # see p.568 of Perl Cookbook
647 #$childpid = open3($chld_in, $chld_out, $chld_out, @commandargs);
648 ($childpid, $chld_in, $chld_out) = _open3(@commandargs);
649 };
650 if ($@) {
651 if($@ =~ m/^open3/) {
652 die "open3 failed in $0: $!\n$@\n";
653 }
654 die "Tried to launch open3 in $0, got unexpected exception: $@";
655 }
656
657 my $full_text = "";
658 my $error_text = "";
659 my @follow_list = ();
660 my $line;
661
662 # create the select object and add our streamhandle(s)
663 my $sel = new IO::Select;
664 $sel->add($chld_out);
665
666 my $num_consecutive_timedouts = 0;
667 my $error = 0;
668 my $loop = 1;
669 while($loop)
670 {
671 # assume we're going to timeout trying to read from child process
672 $num_consecutive_timedouts++;
673
674 # block until data is available on the registered filehandles or until the timeout specified
675 if(my @readyhandles = $sel->can_read($TIMEOUT)) {
676 $num_consecutive_timedouts = 0; # re-zero, as we didn't timeout reading from child process after all
677 # since we're in this if statement
678
679 foreach my $fh (@readyhandles) {
680 my $len = sysread($fh, $line, 4096); # read up to 4k from current ready filehandle
681 if($len) { # read something
682
683
684 if((defined $blnShow) && $blnShow)
685 {
686 print STDERR "$line";
687 }
688
689 if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
690 my $follow_url = $1;
691 push(@follow_list,$follow_url);
692 }
693
694 if ($line =~ m/ERROR\s+\d+/) {
695 $error_text .= $line;
696 }
697
698 $full_text .= $line;
699 } else { # error or EOF
700 if(!defined $len) { # error reading
701 #print STDERR "WgetDownload: Error reading from child stream: $!\n";
702 $error = 1;
703 }
704 if(!defined $len) {
705 if(!$!{ECONNRESET}) { # anything other ECONNRESET error means it's a real case of undefined $len being an error
706 #print STDERR "WgetDownload: Error reading from child stream: $!\n";
707 $error = 1;
708 } else { # the error code is ECONNRESET, and it's not an error, despite $len being undefined.
709 # Happens on Windows when using sockets to a child process' iostreams
710 #print STDERR "WgetDownload: wget finished\n";
711 }
712 }
713 elsif ($len == 0) { # EOF, finished with this filehandle because 0 bytes read
714 #print STDERR "WgetDownload: wget finished\n"; # wget terminated naturally
715 }
716
717 $loop = 0; # error or EOF, either way will need to clean up and break out of outer loop
718
719 # last; # if we have more than one filehandle registered with IO::Select
720
721 $sel->remove($fh); # if more than one filehandle registered, we should unregister all of them here on error
722 } # end else error or EOF
723
724 } # end foreach on readyhandles
725 } # end if on can_read
726
727 if($num_consecutive_timedouts >= $NUM_TRIES) {
728 $error = 1;
729 $loop = 0; # to break out of outer while loop
730
731 $num_consecutive_timedouts = 0;
732
733 #&gsprintf::gsprintf(STDERR, "{WgetDownload.wget_timed_out_warning}\n", $NUM_TRIES);
734 }
735
736 if($loop == 0) { # error or EOF, either way, clean up
737
738 if($error) {
739 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
740
741 if(kill(0, $childpid)) {
742 # If kill(0, $childpid) returns true, then the process is running
743 # and we need to kill it.
744 close($chld_in);
745 close($chld_out);
746 kill("TERM", $childpid); # prefix - to signal to kill process group
747
748 #print STDERR "Perl terminated wget after timing out repeatedly and is about to exit\n";
749 }
750 }
751 else { # wget finished, terminate naturally
752 close($chld_in);
753 close($chld_out);
754 # Program terminates only when the following line is included
755 # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
756 # it prevents the child from turning into a "zombie process".
757 # While the wget process terminates without it, this perl script does not:
758 # the DOS prompt is not returned without it.
759 waitpid $childpid, 0;
760 }
761
762 # error or not:
763 $childpid = undef;
764 if(defined $port) {
765 $read_set->remove($serverSocket);
766 close($serverSocket);
767 }
768 }
769
770 # If we've already terminated, either naturally or on error, we can get out of the while loop
771 next if($loop == 0);
772
773 # Otherwise check for whether Java GLI has attempted to connect to this perl script via socket
774
775 # if we run this script from the command-line (as opposed to from GLI),
776 # then we're not working with sockets and can therefore skip the next bits
777 next unless(defined $port);
778
779 # http://www.perlfect.com/articles/select.shtml
780 # "multiplex between several filehandles within a single thread of control,
781 # thus creating the effect of parallelism in the handling of I/O."
782 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
783
784 # take all readable handles in turn
785 foreach my $rh (@rh_set) {
786 if($rh == $serverSocket) {
787 my $client = $rh->accept();
788 #$client->autoflush(1); # autoflush output buffer - don't put this back in: splits output irregularly over multilines
789 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
790
791 # Read from the client (getting rid of trailing newline)
792 # Has the client sent the <<STOP>> signal?
793 my $signal = <$client>;
794 chomp($signal);
795 if($signal eq "<<STOP>>") {
796 print $client "Perl received STOP signal (on port $port): stopping wget\n";
797 $loop = 0; # out of outer while loop
798 $self->{'forced_quit'} = 1; # subclasses need to know we're quitting
799
800 # Sometimes the wget process takes some time to start up. If the STOP signal
801 # was sent, don't try to terminate the process until we know it is running.
802 # Otherwise wget may start running after we tried to kill it. Wait 5 seconds
803 # for it to start up, checking for whether it is running in order to kill it.
804 for(my $seconds = 1; $seconds <= 5; $seconds++) {
805 if(kill(0, $childpid)) {
806 # If kill(0, $childpid) returns true, then the process is running
807 # and we need to kill it.
808 close($chld_in);
809 close($chld_out);
810 kill("TERM", $childpid); # prefix - to signal to kill process group
811
812 $childpid = undef;
813
814 # Stop monitoring the read_handle and close the serverSocket
815 # (the Java end will close the client socket that Java opened)
816 $read_set->remove($rh); #$read_set->remove($serverSocket);
817 close($rh); #close($serverSocket);
818 print $client "Perl terminated wget and is about to exit\n";
819 last; # out of inner for loop
820 }
821 else { # the process may just be starting up, wait
822 sleep(1);
823 }
824 }
825 last; # out of foreach loop
826 }
827 }
828 }
829 }
830
831 my $command_status = $?;
832 if ($command_status != 0) {
833 $error_text .= "Exit error: $command_status";
834 }
835
836 if ($changed_dir) {
837 chdir $current_dir;
838 }
839
840 my $final_follow = pop(@follow_list); # might be undefined, but that's OK
841
842 return ($full_text,$error_text,$final_follow);
843}
844
845
846# TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
847sub checkURL
848{
849 my ($self) = @_;
850 if ($self->{'url'} eq "")
851 {
852 &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
853 }
854}
855
856sub error
857{
858 my ($strFunctionName,$strError) = @_;
859 {
860 print "Error occoured in WgetDownload.pm\n".
861 "In Function:".$strFunctionName."\n".
862 "Error Message:".$strError."\n";
863 exit(-1);
864 }
865}
866
8671;
868
Note: See TracBrowser for help on using the repository browser.