root/gsdl/trunk/perllib/downloaders/WgetDownload.pm @ 17354

Revision 17354, 8.7 KB (checked in by ak19, 11 years ago)

Added SIGTERM and SIGINT handlers to terminate wget child process either when a Ctrl-C (SIGINT) is send to the perl script run from the command-line, or when the java parent process kills this perl script using Process.destroy (which sends a SIGTERM). This code works on Linux. Need to test on Windows (although something similar did not seem to work before on Windows).

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# WgetDownload.pm -- Download base module that handles calling Wget
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package WgetDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use BaseDownload;
35use strict;
36use IPC::Open2;
37use Cwd;
38
39sub BEGIN {
40    @WgetDownload::ISA = ('BaseDownload');
41}
42
43my $arguments =
44     [ { 'name' => "proxy_on",
45    'desc' => "{WgetDownload.proxy_on}",
46    'type' => "flag",
47    'reqd' => "no",
48    'hiddengli' => "yes"},
49      { 'name' => "proxy_host", 
50    'desc' => "{WgetDownload.proxy_host}",         
51    'type' => "string",
52    'reqd' => "no",
53    'hiddengli' => "yes"},
54      { 'name' => "proxy_port",
55    'desc' => "{WgetDownload.proxy_port}",         
56    'type' => "string",
57    'reqd' => "no",
58    'hiddengli' => "yes"},
59      { 'name' => "user_name", 
60    'desc' => "{WgetDownload.user_name}",         
61    'type' => "string",
62    'reqd' => "no",
63    'hiddengli' => "yes"},
64      { 'name' => "user_password",
65    'desc' => "{WgetDownload.user_password}",         
66    'type' => "string",
67    'reqd' => "no",
68    'hiddengli' => "yes"}];
69
70my $options = { 'name'     => "WgetDownload",
71        'desc'     => "{WgetDownload.desc}",
72        'abstract' => "yes",
73        'inherits' => "yes",
74        'args'     => $arguments };
75
76
77# Declaring variables related to the wget child process so that the termination
78# signal handler for SIGTERM can close the streams and tidy up before ending
79# the child process.
80my $childpid;
81my($chld_out, $chld_in);
82
83# Handler called when this process is killed or abruptly ends due to receiving
84# one of the terminating signals that this handler is registered to deal with.
85sub abrupt_end_handler {
86    my $termination_signal = shift (@_);
87    {
88    if(defined $childpid) {
89        close($chld_out);
90        close($chld_in);
91   
92        # Send TERM signal to child process to terminate it. Sending the INT signal didn't work
93        # See http://perldoc.perl.org/perlipc.html#Signals
94        # Warning on using kill at http://perldoc.perl.org/perlfork.html
95        kill("TERM", $childpid);
96    }
97    }
98    exit(0);
99}
100
101# Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
102# the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
103# signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
104# Note that SIGKILL can't be handled as the handler won't get called for it. More information:
105# http://affy.blogspot.com/p5be/ch13.htm
106# http://perldoc.perl.org/perlipc.html#Signals
107$SIG{'INT'} = \&abrupt_end_handler;
108$SIG{'TERM'} = \&abrupt_end_handler;
109
110sub new {
111    my ($class) = shift (@_);
112    my ($getlist,$inputargs,$hashArgOptLists) = @_;
113    push(@$getlist, $class);
114
115    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
116    push(@{$hashArgOptLists->{"OptList"}},$options);
117
118    my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
119
120    return bless $self, $class;
121}
122
123sub checkWgetSetup
124{
125    my ($self,$blnGliCall) = @_;
126    #TODO: proxy detection??
127   
128    if((!$blnGliCall) && $self->{'proxy_on'})
129    {
130    &checkProxySetup($self);
131    }
132    &checkURL($self);
133}
134
135sub getWgetOptions
136{
137    my ($self) = @_;
138    my $strOptions = "";
139   
140    if ($self->{'proxy_on'} && $self->{'proxy_host'} && $self->{'proxy_port'})
141    {
142
143    $strOptions .= " -e httpproxy=$self->{'proxy_host'}:$self->{'proxy_port'} ";
144
145    if ($self->{'user_name'} && $self->{'user_password'})
146    {
147        $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";
148    }
149    }
150
151    if ($self->{'proxy_on'}) {
152    $strOptions .= " --proxy ";
153    }
154
155    return $strOptions;
156}
157
158# Checking for proxy setup: proxy server, proxy port, proxy username and password.
159sub checkProxySetup
160{
161    my ($self) = @_;
162    ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
163    # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
164    # Test if the connection is succeful. If the connection wasn't succeful then ask user to supply username and password.
165
166}
167
168sub useWget
169{
170    my ($self, $cmdWget,$blnShow, $working_dir) = @_;
171
172    my ($strReadIn,$strLine,$command);
173    $strReadIn = "" unless defined $strReadIn;
174
175    my $current_dir = cwd();
176    my $changed_dir = 0;
177    if (defined $working_dir && -e $working_dir) {
178    chdir "$working_dir";
179    $changed_dir = 1;
180    }
181    my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
182    #$command = "\"$wget_file_path\" $cmdWget |";    #open(*WIN,$command) || die "wget request failed: $!\n";
183    #open(*WIN,$command) || die "wget request failed: $!\n";   
184
185
186    $command = "\"$wget_file_path\" $cmdWget";
187    $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
188
189    while (defined($strLine=<$chld_out>)) # we're reading in from child process' stdout
190    {
191    if($blnShow)
192    {
193        print STDERR "$strReadIn\n";
194    }
195
196    $strReadIn .= $strLine;
197    }
198   
199    close($chld_in);
200    close($chld_out);
201   
202    # Program terminates only when the following line is included
203    # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
204    # it prevents the child from turning into a "zombie process".
205    # While the wget process terminates without it, this perl script does not:
206    # the DOS prompt is not returned without it.
207    waitpid $childpid, 0;
208
209    if ($changed_dir) {
210    chdir $current_dir;
211    }
212   
213    return $strReadIn;
214}
215
216
217sub useWgetMonitored
218{
219    my ($self, $cmdWget,$blnShow, $working_dir) = @_;
220
221
222    my $current_dir = cwd();
223    my $changed_dir = 0;
224    if (defined $working_dir && -e $working_dir) {
225    chdir "$working_dir";
226    $changed_dir = 1;
227    }
228    my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
229    #my $command = "\"$wget_file_path\" $cmdWget 2>&1 |"; #open(*WIN,$command) || die "wget request failed: $!\n";
230###    print STDERR "**** wget cmd = $command\n";
231    #open(*WIN,$command) || die "wget request failed: $!\n";
232
233    my $command = "\"$wget_file_path\" $cmdWget";
234    $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
235
236    my $full_text = "";
237    my $error_text = "";
238    my @follow_list = ();
239    my $line;
240
241    while (defined($line=<$chld_out>)) # we're reading in from child process' stdout
242    {
243    if((defined $blnShow) && $blnShow)
244    {
245        print STDERR "$line";
246    }
247
248    if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
249        my $follow_url = $1;
250        push(@follow_list,$follow_url);
251    }
252
253    if ($line =~ m/ERROR\s+\d+/) {
254        $error_text .= $line;
255    }
256
257    $full_text .= $line;
258    }
259
260    close($chld_in);
261    close($chld_out);
262
263    # Program terminates only when the following line is included
264    # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
265    # it prevents the child from turning into a "zombie process".
266    # While the wget process terminates without it, this perl script does not:
267    # the DOS prompt is not returned without it.
268    waitpid $childpid, 0;
269
270    my $command_status = $?;
271    if ($command_status != 0) {
272    $error_text .= "Exit error: $command_status";
273    }
274
275    if ($changed_dir) {
276    chdir $current_dir;
277    }
278   
279    my $final_follow = pop(@follow_list); # might be undefined, but that's OK
280   
281    return ($full_text,$error_text,$final_follow);
282}
283
284
285# TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
286sub checkURL
287{
288    my ($self) = @_;
289    if ($self->{'url'} eq "")
290    {
291    &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
292    }
293}
294
295sub error
296{
297    my ($strFunctionName,$strError) = @_;
298    {
299    print "Error occoured in WgetDownload.pm\n".
300        "In Function:".$strFunctionName."\n".
301        "Error Message:".$strError."\n";
302    exit(-1);
303    }
304}
305
3061;
Note: See TracBrowser for help on using the browser.