root/gsdl/trunk/perllib/downloaders/WgetDownload.pm @ 17529

Revision 17529, 11.5 KB (checked in by ak19, 11 years ago)

Now WgetDownload?.pm uses Sockets to communicate with GLI which launched it, to monitor for when GLI may tell it to prematurely terminate Wget. WgetDownload?.pm still uses a signal handler (for SIGINT) to respond to the ctrl-c sent when this script is called frm the command prompt (via downloadfrom.pl).

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# WgetDownload.pm -- Download base module that handles calling Wget
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package WgetDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use BaseDownload;
35use strict;
36use Cwd;
37use IPC::Open2;
38use IO::Select;
39use IO::Socket;
40
41
42sub BEGIN {
43    @WgetDownload::ISA = ('BaseDownload');
44}
45
46my $arguments =
47     [ { 'name' => "proxy_on",
48    'desc' => "{WgetDownload.proxy_on}",
49    'type' => "flag",
50    'reqd' => "no",
51    'hiddengli' => "yes"},
52      { 'name' => "proxy_host", 
53    'desc' => "{WgetDownload.proxy_host}",         
54    'type' => "string",
55    'reqd' => "no",
56    'hiddengli' => "yes"},
57      { 'name' => "proxy_port",
58    'desc' => "{WgetDownload.proxy_port}",         
59    'type' => "string",
60    'reqd' => "no",
61    'hiddengli' => "yes"},
62      { 'name' => "user_name", 
63    'desc' => "{WgetDownload.user_name}",         
64    'type' => "string",
65    'reqd' => "no",
66    'hiddengli' => "yes"},
67      { 'name' => "user_password",
68    'desc' => "{WgetDownload.user_password}",         
69    'type' => "string",
70    'reqd' => "no",
71    'hiddengli' => "yes"}];
72
73my $options = { 'name'     => "WgetDownload",
74        'desc'     => "{WgetDownload.desc}",
75        'abstract' => "yes",
76        'inherits' => "yes",
77        'args'     => $arguments };
78
79
80# Declaring file global variables related to the wget child process so that
81# the termination signal handler for SIGTERM can close the streams and tidy
82# up before ending the child process.
83my $childpid;
84my($chld_out, $chld_in);
85my ($serverSocket, $read_set);
86
87# When this script is called from the command line, this handler will be called
88# if this process is killed or abruptly ends due to receiving one of the
89# terminating signals that this handler is registered to deal with.
90sub abrupt_end_handler {
91    my $termination_signal = shift (@_);
92    if(defined $childpid) {
93    close($chld_out);
94    close($chld_in);
95   
96    #print STDOUT "Received termination signal: $termination_signal\n";
97
98    # Send TERM signal to child process to terminate it. Sending the INT signal doesn't work
99    # See http://perldoc.perl.org/perlipc.html#Signals
100    # Warning on using kill at http://perldoc.perl.org/perlfork.html
101    kill("TERM", $childpid);
102
103    # If the SIGTERM sent on Linux calls this handler, we want to make
104    # sure any socket connection is closed.
105    # Otherwise sockets are only used when this script is run from GLI
106    # in which case the handlers don't really get called.
107    if(defined $serverSocket) {
108        $read_set->remove($serverSocket) if defined $read_set;
109        close($serverSocket);
110    }
111    }
112
113    exit(0);
114}
115
116# Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
117# the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
118# signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
119# Note that SIGKILL can't be handled as the handler won't get called for it. More information:
120# http://affy.blogspot.com/p5be/ch13.htm
121# http://perldoc.perl.org/perlipc.html#Signals
122$SIG{'INT'} = \&abrupt_end_handler;
123$SIG{'TERM'} = \&abrupt_end_handler;
124
125sub new {
126    my ($class) = shift (@_);
127    my ($getlist,$inputargs,$hashArgOptLists) = @_;
128    push(@$getlist, $class);
129
130    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
131    push(@{$hashArgOptLists->{"OptList"}},$options);
132
133    my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
134
135    return bless $self, $class;
136}
137
138sub checkWgetSetup
139{
140    my ($self,$blnGliCall) = @_;
141    #TODO: proxy detection??
142   
143    if((!$blnGliCall) && $self->{'proxy_on'})
144    {
145    &checkProxySetup($self);
146    }
147    &checkURL($self);
148}
149
150sub getWgetOptions
151{
152    my ($self) = @_;
153    my $strOptions = "";
154   
155    if ($self->{'proxy_on'} && $self->{'proxy_host'} && $self->{'proxy_port'})
156    {
157
158    $strOptions .= " -e httpproxy=$self->{'proxy_host'}:$self->{'proxy_port'} ";
159
160    if ($self->{'user_name'} && $self->{'user_password'})
161    {
162        $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";
163    }
164    }
165
166    if ($self->{'proxy_on'}) {
167    $strOptions .= " --proxy ";
168    }
169
170    return $strOptions;
171}
172
173# Checking for proxy setup: proxy server, proxy port, proxy username and password.
174sub checkProxySetup
175{
176    my ($self) = @_;
177    ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
178    # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
179    # Test if the connection is successful. If the connection wasn't successful then ask user to supply username and password.
180
181}
182
183sub useWget
184{
185    #local $| = 1; # autoflush stdout buffer
186    #print STDOUT "*** Start of subroutine useWget in $0\n";
187
188    my ($self, $cmdWget,$blnShow, $working_dir) = @_;
189
190    my ($strReadIn,$strLine,$command);
191    $strReadIn = "" unless defined $strReadIn;
192
193    my $current_dir = cwd();
194    my $changed_dir = 0;
195    if (defined $working_dir && -e $working_dir) {
196    chdir "$working_dir";
197    $changed_dir = 1;
198    }
199
200    # When we are running this script through GLI, the SIGTERM signal handler
201    # won't get called on Windows when wget is to be prematurely terminated.
202    # Instead, when wget has to be terminated in the middle of execution, GLI will
203    # connect to a serverSocket here to communicate when it's time to stop wget.
204    my $port;
205    if(defined $self->{'gli'} && $self->{'gli'}) {
206
207    $port = <STDIN>; # gets a port on localhost that's not yet in use
208    chomp($port);
209   
210    $serverSocket = IO::Socket::INET->new( Proto     => 'tcp',
211                           LocalPort => $port,
212                           Listen    => 1,
213                           Reuse     => 1);
214   
215    die "can't setup server" unless $serverSocket;
216    #print STDOUT "[Serversocket $0 accepting clients at port $port]\n";
217
218    $read_set = new IO::Select();         # create handle set for reading
219    $read_set->add($serverSocket);        # add the main socket to the set
220    }
221
222    my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
223    $command = "\"$wget_file_path\" $cmdWget 2>&1";
224    # print STDERR "Command is: $command\n";
225    $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
226
227    my $loop = 1;
228    while($loop)
229    {
230    if (defined(my $strLine=<$chld_out>)) { # we're reading in from child process' stdout
231        if($blnShow) {
232        print STDERR "$strLine\n";
233        }
234        $strReadIn .= $strLine;
235    }
236    else { # wget finished, terminate naturally
237        print STDERR "\nPerl: open2 command, input stream closed. Wget terminated naturally.\n";
238        close($chld_in);
239        close($chld_out);
240        waitpid $childpid, 0;
241        $loop = 0;
242       
243        $childpid = undef;
244        if(defined $port) {
245        $read_set->remove($serverSocket);
246        close($serverSocket);
247        }
248    }
249
250    # if we run this script from the command-line (as opposed to from GLI),
251    # then we're not working with sockets and can therefore can skip the next bits
252    next unless(defined $port);
253
254    # http://www.perlfect.com/articles/select.shtml
255    # "multiplex between several filehandles within a single thread of control,
256    # thus creating the effect of parallelism in the handling of I/O."
257    my @rh_set = $read_set->can_read(0.002); # every 2 ms check if there's a client socket connecting
258
259    # take all readable handles in turn
260    foreach my $rh (@rh_set) {
261        if($rh == $serverSocket) {
262        my $client = $rh->accept();
263        #$client->autoflush(1); # autoflush output buffer - don't put this back in: output split irregularly over lines
264        print $client "Talked to ServerSocket (port $port). Connection accepted\n";
265       
266        # Read from the client (getting rid of trailing newline)
267        # Has the client sent the <<STOP>> signal?
268        my $signal = <$client>;
269        chomp($signal);
270        if($signal eq "<<STOP>>") {
271            print $client "Perl received STOP signal (on port $port): stopping wget\n";
272           
273            $loop = 0;
274            close($chld_in);
275            close($chld_out);
276            kill("TERM", $childpid);
277           
278            $childpid = undef;
279
280            # Stop monitoring the read_handle
281            # close the serverSocket (the Java end will close the client socket that Java opened)
282            $read_set->remove($rh); #$read_set->remove($serverSocket);
283            close($rh);         #close($serverSocket);
284            #print $client "Perl is about to exit\n";
285            last;
286        }
287        }
288    }
289    }
290
291    if ($changed_dir) {
292    chdir $current_dir;
293    }
294   
295    return $strReadIn;
296}
297
298
299sub useWgetMonitored
300{
301    my ($self, $cmdWget,$blnShow, $working_dir) = @_;
302
303
304    my $current_dir = cwd();
305    my $changed_dir = 0;
306    if (defined $working_dir && -e $working_dir) {
307    chdir "$working_dir";
308    $changed_dir = 1;
309    }
310    my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
311    #my $command = "\"$wget_file_path\" $cmdWget 2>&1 |"; #open(*WIN,$command) || die "wget request failed: $!\n";
312###    print STDERR "**** wget cmd = $command\n";
313    #open(*WIN,$command) || die "wget request failed: $!\n";
314
315    my $command = "\"$wget_file_path\" $cmdWget";
316    $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
317
318    my $full_text = "";
319    my $error_text = "";
320    my @follow_list = ();
321    my $line;
322
323    while (defined($line=<$chld_out>)) # we're reading in from child process' stdout
324    {
325    if((defined $blnShow) && $blnShow)
326    {
327        print STDERR "$line";
328    }
329
330    if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
331        my $follow_url = $1;
332        push(@follow_list,$follow_url);
333    }
334
335    if ($line =~ m/ERROR\s+\d+/) {
336        $error_text .= $line;
337    }
338
339    $full_text .= $line;
340    }
341
342    close($chld_in);
343    close($chld_out);
344
345    # Program terminates only when the following line is included
346    # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
347    # it prevents the child from turning into a "zombie process".
348    # While the wget process terminates without it, this perl script does not:
349    # the DOS prompt is not returned without it.
350    waitpid $childpid, 0;
351
352    my $command_status = $?;
353    if ($command_status != 0) {
354    $error_text .= "Exit error: $command_status";
355    }
356
357    if ($changed_dir) {
358    chdir $current_dir;
359    }
360   
361    my $final_follow = pop(@follow_list); # might be undefined, but that's OK
362   
363    return ($full_text,$error_text,$final_follow);
364}
365
366
367# TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
368sub checkURL
369{
370    my ($self) = @_;
371    if ($self->{'url'} eq "")
372    {
373    &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
374    }
375}
376
377sub error
378{
379    my ($strFunctionName,$strError) = @_;
380    {
381    print "Error occoured in WgetDownload.pm\n".
382        "In Function:".$strFunctionName."\n".
383        "Error Message:".$strError."\n";
384    exit(-1);
385    }
386}
387
3881;
Note: See TracBrowser for help on using the browser.