source: gsdl/trunk/perllib/downloaders/WgetDownload.pm@ 17529

Last change on this file since 17529 was 17529, checked in by ak19, 16 years ago

Now WgetDownload.pm uses Sockets to communicate with GLI which launched it, to monitor for when GLI may tell it to prematurely terminate Wget. WgetDownload.pm still uses a signal handler (for SIGINT) to respond to the ctrl-c sent when this script is called frm the command prompt (via downloadfrom.pl).

  • Property svn:keywords set to Author Date Id Revision
File size: 11.5 KB
Line 
1###########################################################################
2#
3# WgetDownload.pm -- Download base module that handles calling Wget
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package WgetDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use BaseDownload;
35use strict;
36use Cwd;
37use IPC::Open2;
38use IO::Select;
39use IO::Socket;
40
41
42sub BEGIN {
43 @WgetDownload::ISA = ('BaseDownload');
44}
45
46my $arguments =
47 [ { 'name' => "proxy_on",
48 'desc' => "{WgetDownload.proxy_on}",
49 'type' => "flag",
50 'reqd' => "no",
51 'hiddengli' => "yes"},
52 { 'name' => "proxy_host",
53 'desc' => "{WgetDownload.proxy_host}",
54 'type' => "string",
55 'reqd' => "no",
56 'hiddengli' => "yes"},
57 { 'name' => "proxy_port",
58 'desc' => "{WgetDownload.proxy_port}",
59 'type' => "string",
60 'reqd' => "no",
61 'hiddengli' => "yes"},
62 { 'name' => "user_name",
63 'desc' => "{WgetDownload.user_name}",
64 'type' => "string",
65 'reqd' => "no",
66 'hiddengli' => "yes"},
67 { 'name' => "user_password",
68 'desc' => "{WgetDownload.user_password}",
69 'type' => "string",
70 'reqd' => "no",
71 'hiddengli' => "yes"}];
72
73my $options = { 'name' => "WgetDownload",
74 'desc' => "{WgetDownload.desc}",
75 'abstract' => "yes",
76 'inherits' => "yes",
77 'args' => $arguments };
78
79
80# Declaring file global variables related to the wget child process so that
81# the termination signal handler for SIGTERM can close the streams and tidy
82# up before ending the child process.
83my $childpid;
84my($chld_out, $chld_in);
85my ($serverSocket, $read_set);
86
87# When this script is called from the command line, this handler will be called
88# if this process is killed or abruptly ends due to receiving one of the
89# terminating signals that this handler is registered to deal with.
90sub abrupt_end_handler {
91 my $termination_signal = shift (@_);
92 if(defined $childpid) {
93 close($chld_out);
94 close($chld_in);
95
96 #print STDOUT "Received termination signal: $termination_signal\n";
97
98 # Send TERM signal to child process to terminate it. Sending the INT signal doesn't work
99 # See http://perldoc.perl.org/perlipc.html#Signals
100 # Warning on using kill at http://perldoc.perl.org/perlfork.html
101 kill("TERM", $childpid);
102
103 # If the SIGTERM sent on Linux calls this handler, we want to make
104 # sure any socket connection is closed.
105 # Otherwise sockets are only used when this script is run from GLI
106 # in which case the handlers don't really get called.
107 if(defined $serverSocket) {
108 $read_set->remove($serverSocket) if defined $read_set;
109 close($serverSocket);
110 }
111 }
112
113 exit(0);
114}
115
116# Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
117# the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
118# signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
119# Note that SIGKILL can't be handled as the handler won't get called for it. More information:
120# http://affy.blogspot.com/p5be/ch13.htm
121# http://perldoc.perl.org/perlipc.html#Signals
122$SIG{'INT'} = \&abrupt_end_handler;
123$SIG{'TERM'} = \&abrupt_end_handler;
124
125sub new {
126 my ($class) = shift (@_);
127 my ($getlist,$inputargs,$hashArgOptLists) = @_;
128 push(@$getlist, $class);
129
130 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
131 push(@{$hashArgOptLists->{"OptList"}},$options);
132
133 my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
134
135 return bless $self, $class;
136}
137
138sub checkWgetSetup
139{
140 my ($self,$blnGliCall) = @_;
141 #TODO: proxy detection??
142
143 if((!$blnGliCall) && $self->{'proxy_on'})
144 {
145 &checkProxySetup($self);
146 }
147 &checkURL($self);
148}
149
150sub getWgetOptions
151{
152 my ($self) = @_;
153 my $strOptions = "";
154
155 if ($self->{'proxy_on'} && $self->{'proxy_host'} && $self->{'proxy_port'})
156 {
157
158 $strOptions .= " -e httpproxy=$self->{'proxy_host'}:$self->{'proxy_port'} ";
159
160 if ($self->{'user_name'} && $self->{'user_password'})
161 {
162 $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";
163 }
164 }
165
166 if ($self->{'proxy_on'}) {
167 $strOptions .= " --proxy ";
168 }
169
170 return $strOptions;
171}
172
173# Checking for proxy setup: proxy server, proxy port, proxy username and password.
174sub checkProxySetup
175{
176 my ($self) = @_;
177 ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
178 # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
179 # Test if the connection is successful. If the connection wasn't successful then ask user to supply username and password.
180
181}
182
183sub useWget
184{
185 #local $| = 1; # autoflush stdout buffer
186 #print STDOUT "*** Start of subroutine useWget in $0\n";
187
188 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
189
190 my ($strReadIn,$strLine,$command);
191 $strReadIn = "" unless defined $strReadIn;
192
193 my $current_dir = cwd();
194 my $changed_dir = 0;
195 if (defined $working_dir && -e $working_dir) {
196 chdir "$working_dir";
197 $changed_dir = 1;
198 }
199
200 # When we are running this script through GLI, the SIGTERM signal handler
201 # won't get called on Windows when wget is to be prematurely terminated.
202 # Instead, when wget has to be terminated in the middle of execution, GLI will
203 # connect to a serverSocket here to communicate when it's time to stop wget.
204 my $port;
205 if(defined $self->{'gli'} && $self->{'gli'}) {
206
207 $port = <STDIN>; # gets a port on localhost that's not yet in use
208 chomp($port);
209
210 $serverSocket = IO::Socket::INET->new( Proto => 'tcp',
211 LocalPort => $port,
212 Listen => 1,
213 Reuse => 1);
214
215 die "can't setup server" unless $serverSocket;
216 #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
217
218 $read_set = new IO::Select(); # create handle set for reading
219 $read_set->add($serverSocket); # add the main socket to the set
220 }
221
222 my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
223 $command = "\"$wget_file_path\" $cmdWget 2>&1";
224 # print STDERR "Command is: $command\n";
225 $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
226
227 my $loop = 1;
228 while($loop)
229 {
230 if (defined(my $strLine=<$chld_out>)) { # we're reading in from child process' stdout
231 if($blnShow) {
232 print STDERR "$strLine\n";
233 }
234 $strReadIn .= $strLine;
235 }
236 else { # wget finished, terminate naturally
237 print STDERR "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n";
238 close($chld_in);
239 close($chld_out);
240 waitpid $childpid, 0;
241 $loop = 0;
242
243 $childpid = undef;
244 if(defined $port) {
245 $read_set->remove($serverSocket);
246 close($serverSocket);
247 }
248 }
249
250 # if we run this script from the command-line (as opposed to from GLI),
251 # then we're not working with sockets and can therefore can skip the next bits
252 next unless(defined $port);
253
254 # http://www.perlfect.com/articles/select.shtml
255 # "multiplex between several filehandles within a single thread of control,
256 # thus creating the effect of parallelism in the handling of I/O."
257 my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
258
259 # take all readable handles in turn
260 foreach my $rh (@rh_set) {
261 if($rh == $serverSocket) {
262 my $client = $rh->accept();
263 #$client->autoflush(1); # autoflush output buffer - don't put this back in: output split irregularly over lines
264 print $client "Talked to ServerSocket (port $port). Connection accepted\n";
265
266 # Read from the client (getting rid of trailing newline)
267 # Has the client sent the <<STOP>> signal?
268 my $signal = <$client>;
269 chomp($signal);
270 if($signal eq "<<STOP>>") {
271 print $client "Perl received STOP signal (on port $port): stopping wget\n";
272
273 $loop = 0;
274 close($chld_in);
275 close($chld_out);
276 kill("TERM", $childpid);
277
278 $childpid = undef;
279
280 # Stop monitoring the read_handle
281 # close the serverSocket (the Java end will close the client socket that Java opened)
282 $read_set->remove($rh); #$read_set->remove($serverSocket);
283 close($rh); #close($serverSocket);
284 #print $client "Perl is about to exit\n";
285 last;
286 }
287 }
288 }
289 }
290
291 if ($changed_dir) {
292 chdir $current_dir;
293 }
294
295 return $strReadIn;
296}
297
298
299sub useWgetMonitored
300{
301 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
302
303
304 my $current_dir = cwd();
305 my $changed_dir = 0;
306 if (defined $working_dir && -e $working_dir) {
307 chdir "$working_dir";
308 $changed_dir = 1;
309 }
310 my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
311 #my $command = "\"$wget_file_path\" $cmdWget 2>&1 |"; #open(*WIN,$command) || die "wget request failed: $!\n";
312### print STDERR "**** wget cmd = $command\n";
313 #open(*WIN,$command) || die "wget request failed: $!\n";
314
315 my $command = "\"$wget_file_path\" $cmdWget";
316 $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
317
318 my $full_text = "";
319 my $error_text = "";
320 my @follow_list = ();
321 my $line;
322
323 while (defined($line=<$chld_out>)) # we're reading in from child process' stdout
324 {
325 if((defined $blnShow) && $blnShow)
326 {
327 print STDERR "$line";
328 }
329
330 if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
331 my $follow_url = $1;
332 push(@follow_list,$follow_url);
333 }
334
335 if ($line =~ m/ERROR\s+\d+/) {
336 $error_text .= $line;
337 }
338
339 $full_text .= $line;
340 }
341
342 close($chld_in);
343 close($chld_out);
344
345 # Program terminates only when the following line is included
346 # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
347 # it prevents the child from turning into a "zombie process".
348 # While the wget process terminates without it, this perl script does not:
349 # the DOS prompt is not returned without it.
350 waitpid $childpid, 0;
351
352 my $command_status = $?;
353 if ($command_status != 0) {
354 $error_text .= "Exit error: $command_status";
355 }
356
357 if ($changed_dir) {
358 chdir $current_dir;
359 }
360
361 my $final_follow = pop(@follow_list); # might be undefined, but that's OK
362
363 return ($full_text,$error_text,$final_follow);
364}
365
366
367# TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
368sub checkURL
369{
370 my ($self) = @_;
371 if ($self->{'url'} eq "")
372 {
373 &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
374 }
375}
376
377sub error
378{
379 my ($strFunctionName,$strError) = @_;
380 {
381 print "Error occoured in WgetDownload.pm\n".
382 "In Function:".$strFunctionName."\n".
383 "Error Message:".$strError."\n";
384 exit(-1);
385 }
386}
387
3881;
Note: See TracBrowser for help on using the repository browser.