source: gsdl/trunk/perllib/downloaders/WgetDownload.pm@ 17354

Last change on this file since 17354 was 17354, checked in by ak19, 13 years ago

Added SIGTERM and SIGINT handlers to terminate wget child process either when a Ctrl-C (SIGINT) is send to the perl script run from the command-line, or when the java parent process kills this perl script using Process.destroy (which sends a SIGTERM). This code works on Linux. Need to test on Windows (although something similar did not seem to work before on Windows).

  • Property svn:keywords set to Author Date Id Revision
File size: 8.7 KB
Line 
1###########################################################################
2#
3# WgetDownload.pm -- Download base module that handles calling Wget
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package WgetDownload;
27
28eval {require bytes};
29
30# suppress the annoying "subroutine redefined" warning that various
31# plugins cause under perl 5.6
32$SIG{__WARN__} = sub {warn($_[0]) unless ($_[0] =~ /Subroutine\s+\S+\sredefined/)};
33
34use BaseDownload;
35use strict;
36use IPC::Open2;
37use Cwd;
38
39sub BEGIN {
40 @WgetDownload::ISA = ('BaseDownload');
41}
42
43my $arguments =
44 [ { 'name' => "proxy_on",
45 'desc' => "{WgetDownload.proxy_on}",
46 'type' => "flag",
47 'reqd' => "no",
48 'hiddengli' => "yes"},
49 { 'name' => "proxy_host",
50 'desc' => "{WgetDownload.proxy_host}",
51 'type' => "string",
52 'reqd' => "no",
53 'hiddengli' => "yes"},
54 { 'name' => "proxy_port",
55 'desc' => "{WgetDownload.proxy_port}",
56 'type' => "string",
57 'reqd' => "no",
58 'hiddengli' => "yes"},
59 { 'name' => "user_name",
60 'desc' => "{WgetDownload.user_name}",
61 'type' => "string",
62 'reqd' => "no",
63 'hiddengli' => "yes"},
64 { 'name' => "user_password",
65 'desc' => "{WgetDownload.user_password}",
66 'type' => "string",
67 'reqd' => "no",
68 'hiddengli' => "yes"}];
69
70my $options = { 'name' => "WgetDownload",
71 'desc' => "{WgetDownload.desc}",
72 'abstract' => "yes",
73 'inherits' => "yes",
74 'args' => $arguments };
75
76
77# Declaring variables related to the wget child process so that the termination
78# signal handler for SIGTERM can close the streams and tidy up before ending
79# the child process.
80my $childpid;
81my($chld_out, $chld_in);
82
83# Handler called when this process is killed or abruptly ends due to receiving
84# one of the terminating signals that this handler is registered to deal with.
85sub abrupt_end_handler {
86 my $termination_signal = shift (@_);
87 {
88 if(defined $childpid) {
89 close($chld_out);
90 close($chld_in);
91
92 # Send TERM signal to child process to terminate it. Sending the INT signal didn't work
93 # See http://perldoc.perl.org/perlipc.html#Signals
94 # Warning on using kill at http://perldoc.perl.org/perlfork.html
95 kill("TERM", $childpid);
96 }
97 }
98 exit(0);
99}
100
101# Registering a handler for when termination signals SIGINT and SIGTERM are received to stop
102# the wget child process. SIGTERM--generated by Java's Process.destroy()--is the default kill
103# signal (kill -15) on Linux, while SIGINT is generated upon Ctrl-C (also on Windows).
104# Note that SIGKILL can't be handled as the handler won't get called for it. More information:
105# http://affy.blogspot.com/p5be/ch13.htm
106# http://perldoc.perl.org/perlipc.html#Signals
107$SIG{'INT'} = \&abrupt_end_handler;
108$SIG{'TERM'} = \&abrupt_end_handler;
109
110sub new {
111 my ($class) = shift (@_);
112 my ($getlist,$inputargs,$hashArgOptLists) = @_;
113 push(@$getlist, $class);
114
115 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
116 push(@{$hashArgOptLists->{"OptList"}},$options);
117
118 my $self = new BaseDownload($getlist,$inputargs,$hashArgOptLists);
119
120 return bless $self, $class;
121}
122
123sub checkWgetSetup
124{
125 my ($self,$blnGliCall) = @_;
126 #TODO: proxy detection??
127
128 if((!$blnGliCall) && $self->{'proxy_on'})
129 {
130 &checkProxySetup($self);
131 }
132 &checkURL($self);
133}
134
135sub getWgetOptions
136{
137 my ($self) = @_;
138 my $strOptions = "";
139
140 if ($self->{'proxy_on'} && $self->{'proxy_host'} && $self->{'proxy_port'})
141 {
142
143 $strOptions .= " -e httpproxy=$self->{'proxy_host'}:$self->{'proxy_port'} ";
144
145 if ($self->{'user_name'} && $self->{'user_password'})
146 {
147 $strOptions .= "--proxy-user=$self->{'user_name'}"." --proxy-passwd=$self->{'user_password'}";
148 }
149 }
150
151 if ($self->{'proxy_on'}) {
152 $strOptions .= " --proxy ";
153 }
154
155 return $strOptions;
156}
157
158# Checking for proxy setup: proxy server, proxy port, proxy username and password.
159sub checkProxySetup
160{
161 my ($self) = @_;
162 ($self->{'proxy_on'}) || &error("checkProxySetup","The proxy is not on? How could that be happening?");
163 # Setup .wgetrc by using $self->{'proxy_host'} and $self->{'proxy_port'}
164 # Test if the connection is succeful. If the connection wasn't succeful then ask user to supply username and password.
165
166}
167
168sub useWget
169{
170 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
171
172 my ($strReadIn,$strLine,$command);
173 $strReadIn = "" unless defined $strReadIn;
174
175 my $current_dir = cwd();
176 my $changed_dir = 0;
177 if (defined $working_dir && -e $working_dir) {
178 chdir "$working_dir";
179 $changed_dir = 1;
180 }
181 my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
182 #$command = "\"$wget_file_path\" $cmdWget |"; #open(*WIN,$command) || die "wget request failed: $!\n";
183 #open(*WIN,$command) || die "wget request failed: $!\n";
184
185
186 $command = "\"$wget_file_path\" $cmdWget";
187 $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
188
189 while (defined($strLine=<$chld_out>)) # we're reading in from child process' stdout
190 {
191 if($blnShow)
192 {
193 print STDERR "$strReadIn\n";
194 }
195
196 $strReadIn .= $strLine;
197 }
198
199 close($chld_in);
200 close($chld_out);
201
202 # Program terminates only when the following line is included
203 # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
204 # it prevents the child from turning into a "zombie process".
205 # While the wget process terminates without it, this perl script does not:
206 # the DOS prompt is not returned without it.
207 waitpid $childpid, 0;
208
209 if ($changed_dir) {
210 chdir $current_dir;
211 }
212
213 return $strReadIn;
214}
215
216
217sub useWgetMonitored
218{
219 my ($self, $cmdWget,$blnShow, $working_dir) = @_;
220
221
222 my $current_dir = cwd();
223 my $changed_dir = 0;
224 if (defined $working_dir && -e $working_dir) {
225 chdir "$working_dir";
226 $changed_dir = 1;
227 }
228 my $wget_file_path = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
229 #my $command = "\"$wget_file_path\" $cmdWget 2>&1 |"; #open(*WIN,$command) || die "wget request failed: $!\n";
230### print STDERR "**** wget cmd = $command\n";
231 #open(*WIN,$command) || die "wget request failed: $!\n";
232
233 my $command = "\"$wget_file_path\" $cmdWget";
234 $childpid = open2($chld_out, $chld_in, $command) || die "wget request failed: $!\n";
235
236 my $full_text = "";
237 my $error_text = "";
238 my @follow_list = ();
239 my $line;
240
241 while (defined($line=<$chld_out>)) # we're reading in from child process' stdout
242 {
243 if((defined $blnShow) && $blnShow)
244 {
245 print STDERR "$line";
246 }
247
248 if ($line =~ m/^Location:\s*(.*?)\s*\[following\]\s*$/i) {
249 my $follow_url = $1;
250 push(@follow_list,$follow_url);
251 }
252
253 if ($line =~ m/ERROR\s+\d+/) {
254 $error_text .= $line;
255 }
256
257 $full_text .= $line;
258 }
259
260 close($chld_in);
261 close($chld_out);
262
263 # Program terminates only when the following line is included
264 # http://perldoc.perl.org/IPC/Open2.html explains why this is necessary
265 # it prevents the child from turning into a "zombie process".
266 # While the wget process terminates without it, this perl script does not:
267 # the DOS prompt is not returned without it.
268 waitpid $childpid, 0;
269
270 my $command_status = $?;
271 if ($command_status != 0) {
272 $error_text .= "Exit error: $command_status";
273 }
274
275 if ($changed_dir) {
276 chdir $current_dir;
277 }
278
279 my $final_follow = pop(@follow_list); # might be undefined, but that's OK
280
281 return ($full_text,$error_text,$final_follow);
282}
283
284
285# TODO: Check if the URL is valid?? Not sure what should be in this function yet!!
286sub checkURL
287{
288 my ($self) = @_;
289 if ($self->{'url'} eq "")
290 {
291 &error("checkURL","no URL is specified!! Please specifies the URL for downloading.");
292 }
293}
294
295sub error
296{
297 my ($strFunctionName,$strError) = @_;
298 {
299 print "Error occoured in WgetDownload.pm\n".
300 "In Function:".$strFunctionName."\n".
301 "Error Message:".$strError."\n";
302 exit(-1);
303 }
304}
305
3061;
Note: See TracBrowser for help on using the repository browser.