source: main/trunk/greenstone2/perllib/servercontrol.pm@ 32830

Last change on this file since 32830 was 32830, checked in by ak19, 5 years ago

Second and final part of fixing up OAI stuff so that there's no lock on the index db by OAI servlet side when a collection is being rebuilt. The bug was that the OAI servlet side would still keep a filelock on the db (index col db and etc oai-inf db, to be precise) when a collection was deactivated before moving build to index during the activate.pl stage. That's because the OAIMessageRouter does not responde to (de)activate messages sent to the regular library servlet's MessageRouter. This commit: Getting servercontrol.pm of activate.pl to send a request to (de)activate a collection to the OAIMessageRouter was far more involved: although OAIMessageRouter inherits from MessageRouter, it did not recognise the (de)activate query params because it specifically only recognises OAI verbs like Identify and the special case of 'reset' sent to the OAIMessageRouter. So added in pathways for activate and deactivate to be recognised and processed. Now servercontrol.pm will send (de)activate requests to both the MessageRouter and the OAIMessageRouter. And there's further support for if a collection is not an OAICollection (not part of the list of collections maintained by OAIReceptionist). What I don't have working, is that the collection is still enumerated by ListSets of the OAI servlet whereas attempting to view records and identifiers of the deactivated set fails. This misbehaviour doesn't impact rebuilding with activate.pl since it both deactivates then activates a collection, so a collection is not meant to remain in the deactivated state. The fix may be more complicated than removing the collection from OAIReceptionist's list of sets, since the OAI side deals with supercollections etc when it first loads OAICollections. So any fix has to take that into account.

File size: 27.6 KB
Line 
1#############################################################################
2#
3# activate.pm -- functions to get the GS library URL, ping the library URL,
4# activate and deactivate a collection.
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###############################################################################
27
28package servercontrol;
29
30
31use strict;
32no strict 'refs'; # allow filehandles to be variables and vice versa
33no strict 'subs'; # allow barewords (eg STDERR) as function arguments
34
35# Greenstone includes
36use printusage;
37use parse2;
38
39
40# The perl library imports below are used by deprecated methods config_old(), is_URL_active() and pingHost()
41# If the following library imports are not supported by your perl installation, comment out these
42# imports and move the methods config_old(), is_URL_active() and pingHost() out to a temporary file.
43use HTTP::Response;
44use LWP::Simple qw($ua !head); # import useragent object as $ua from the full LWP to use along with LWP::Simple
45 # don't import LWP::Simple's head function by name since it can conflict with CGI:head())
46#use CGI qw(:standard); # then only CGI.pm defines a head()
47use Net::Ping;
48use URI;
49
50
51sub new
52{
53 my $class = shift(@_);
54
55 my ($qualified_collection, $site, $verbosity, $build_dir, $index_dir, $collect_dir, $library_url, $library_name) = @_;
56
57 # library_url: to be specified on the cmdline if not using a GS-included web server
58 # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
59
60 my $self = {'build_dir' => $build_dir,
61 'index_dir' => $index_dir,
62 'collect_dir' => $collect_dir,
63 'site' => $site,
64 'qualified_collection' => $qualified_collection,
65 #'is_persistent_server' => undef,
66 'library_url' => $library_url || $ENV{'GSDL_LIBRARY_URL'} || undef, # to be specified on the cmdline if not using a GS-included web server
67 'library_name' => $library_name,
68 #'gs_mode' => "gs2",
69 'verbosity' => $verbosity || 2
70 };
71
72 if ((defined $site) && ($site ne "")) { # GS3
73 $self->{'gs_mode'} = "gs3";
74 } else {
75 $self->{'gs_mode'} = "gs2";
76 }
77
78 return bless($self, $class);
79}
80
81## TODO: gsprintf to $self->{'out'} in these 2 print functions
82## See buildcolutils.pm new() for setting up $out
83
84sub print_task_msg {
85 my $self = shift(@_);
86 my ($task_msg, $verbosity_setting) = @_;
87
88 $verbosity_setting = $self->{'verbosity'} unless $verbosity_setting;
89 #$verbosity_setting = 1 unless defined $verbosity;
90 if($verbosity_setting >= 1) {
91 print STDERR "\n";
92 print STDERR "************************\n";
93 print STDERR "* $task_msg\n";
94 print STDERR "************************\n";
95 }
96}
97
98# Prints messages if the verbosity is right. Does not add new lines.
99sub print_msg {
100 my $self = shift(@_);
101 my ($msg, $min_verbosity, $verbosity_setting) = @_;
102
103 # only display error messages if the current
104 # verbosity setting >= the minimum verbosity level
105 # needed for that message to be displayed.
106
107 $verbosity_setting = $self->{'verbosity'} unless defined $verbosity_setting;
108 $min_verbosity = 1 unless defined $min_verbosity;
109 if($verbosity_setting >= $min_verbosity) { # by default display all 1 messages
110 print STDERR "$msg";
111 }
112}
113
114# Method to send a command to a GS2 or GS3 library_URL
115# the commands used in this script can be activate, deactivate, ping,
116# and is-persistent (is-persistent only implemented for GS2).
117sub config {
118 my $self = shift(@_);
119 my ($command, $check_message_against_regex, $expected_error_code, $silent, $oai_servlet) = @_;
120
121 my $library_url = $self->get_library_URL(); #$self->{'library_url'};
122 if($oai_servlet) { # if asked to contact the oaiserver servlet, then
123 # replace the library servlet name with oaiserver servlet name
124 $library_url =~ s@([^/]*?)$@$oai_servlet@;
125 }
126
127 # Gatherer.java's configGS3Server doesn't use the site variable
128 # so we don't have to either
129
130 # for GS2, getting the HTTP status isn't enough, we need to read the output
131 # since this is what CollectionManager.config() stipulates.
132 # Using LWP::UserAgent::get($url) for this
133
134 if(!defined $library_url) {
135 return 0;
136 }
137 else {
138 # ampersands need to be escaped
139 # - with single quotes around it for linux for the cmd to run in bash subshell
140 # - with a ^ before it on windows for the cmd to run in a DOS prompt subshell
141 # - or the entire wget command should be nested in double quotes (single quotes don't work on windows)
142 my $wgetCommand = $command;
143
144 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
145 my $tmpfilename = &util::get_tmp_filename(".html"); # random file name with html extension in tmp location in which we'll store the HTML page retrieved by wget
146
147 # https://www.gnu.org/software/wget/manual/wget.html
148 # output-document set to - (STDOUT), so page is streamed to STDOUT
149 # timeout: 5 seconds, tries: 1
150 # wget sends status information and response code to STDERR, so redirect it to STDOUT
151 # Searching for "perl backtick operator redirect stderr to stdout":
152 # http://www.perlmonks.org/?node=How%20can%20I%20capture%20STDERR%20from%20an%20external%20command%3F
153 ##$wgetCommand = "\"$wget_file_path\" --spider -T 5 -t 1 \"$library_url$wgetCommand\" 2>&1"; # won't save page
154 #$wgetCommand = "\"$wget_file_path\" --output-document=- -T 5 -t 1 \"$library_url$wgetCommand\" 2>&1"; # THIS CAN MIX UP STDERR WITH STDOUT IN THE VERY LINE WE REGEX TEST AGAINST EXPECTED OUTPUT!!
155 $wgetCommand = "\"$wget_file_path\" --output-document=\"$tmpfilename\" -T 5 -t 1 \"$library_url$wgetCommand\" 2>&1"; # keep stderr (response code, response_content) separate from html page content
156
157 ##print STDERR "@@@@ $wgetCommand\n";
158
159 my $response_content = "";
160 my $response_code = undef;
161 #my $response_content = `$wgetCommand`; # Dr Bainbridge advises against using backticks for running a process. If capturing std output, use open():
162 if (open(PIN, "$wgetCommand |")) {
163 while (defined (my $perl_output_line = <PIN>)) {
164 $response_content = $response_content . $perl_output_line;
165 }
166 close(PIN);
167 } else {
168 print STDERR "servercontrol.pm::config() failed to run $wgetCommand\n";
169 }
170
171
172 my @lines = split( /\n/, $response_content );
173 foreach my $line (@lines) {
174 #print STDERR "@@@@ LINE: $line\n";
175 if($line =~ m@failed: Connection timed out.$@) { # linux
176 $response_code = "failed: Connection timed out.";
177 last; # break keyword in perl = last
178 }
179 elsif($line =~ m@Giving up.$@) { # windows (unless -T 5 -t 1 is not passed in)
180 $response_code = "failed: Giving up.";
181 last; # break keyword in perl = last
182 }
183 elsif($line =~ m@failed: Connection refused.$@) {
184 $response_code = "failed: Connection refused.";
185 last; # break keyword in perl = last
186 }
187 elsif($line =~ m@HTTP request sent, @) {
188 $response_code = $line;
189 $response_code =~ s@[^\d]*(.*)$@$1@;
190 last;
191 }
192 }
193
194 if($command =~ m@ping@ && $response_code =~ m@failed: (Connection refused|Giving up)@) {
195 # server not running
196 $self->print_msg("*** Server not running. $library_url$command\n", 3);
197 &FileUtils::removeFiles($tmpfilename); # get rid of the ping response's temporary html file we downloaded
198 return 0;
199 }
200 if($response_code && (index($response_code, "200") != -1)) {
201 $self->print_msg("*** Command $library_url$command\n", 3);
202 $self->print_msg("*** HTTP Response Status: $response_code - Complete.", 3);
203
204 # check the page content is as expected
205 #my $resultstr = $response_content;
206
207 # The following file reading section is a candidate to use FileUtils::readUTF8File()
208 # in place of calling sysread() directly. But only if we can reason we'd be working with UTF8
209 open(FIN,"<$tmpfilename") or die "servercontrol.pm: Unable to open $tmpfilename to read ping response page...ERROR: $!\n";
210 my $resultstr;
211 # Read in the entire contents of the file in one hit
212 ##sysread(FIN, $resultstr, -s FIN);
213 # GLI GBuildProgressMonitor.hasSignalledStop() removes any lines that start with elements it doesn't recognise
214 # but we want to see the lines with HTML tags as they are the most interesting: they embed success/failure messages
215 # So read line by line from the downloaded web page and replace <> with [] so that GLI can display these lines too
216 while (my $line = <FIN>) {
217 chomp $line;
218 $line =~ s@<@[@g;
219 $line =~ s@>@]@g;
220 $resultstr .= "$line\n";
221 }
222 close(FIN);
223 &FileUtils::removeFiles($tmpfilename); # get rid of the ping response's temporary html file we downloaded
224
225
226 #$resultstr =~ s@.*gs_content\"\>@@s; ## only true for default library servlet
227 #$resultstr =~ s@</div>.*@@s;
228 if($resultstr =~ m/$check_message_against_regex/) {
229 $self->print_msg(" Response as expected.\n", 3);
230 $self->print_msg("@@@@@@ Got result:\n$resultstr\n", 4);
231 return 1;
232 } else {
233 # if we expect the collection to be inactive, then we'd be in silent mode: if so,
234 # don't print out the "ping did not succeed" response, but print out any other messages
235
236 # So we only suppress the ping col "did not succeed" response if we're in silent mode
237 # But if any message other than ping "did not succeed" is returned, we always print it
238 if($resultstr !~ m/did not succeed/ || !$silent) {
239 $self->print_msg("\n\tBUT: command $library_url$command response UNEXPECTED.\n", 3);
240 $self->print_msg("*** Got message:\n$response_content.\n", 4);
241 $self->print_msg("*** Got result:\n$resultstr\n", 3);
242 }
243 return 0; # ping on a collection may "not succeed."
244 }
245 }
246 elsif($response_code && $response_code =~ m@^(4|5)\d\d@) { # client side errors start with 4xx, server side with 5xx
247 # check the page content is as expected
248 if(defined $expected_error_code && $response_code =~ m@^$expected_error_code@) {
249 $self->print_msg(" Response status $response_code as expected.\n", 3);
250 } else {
251 $self->print_msg("*** Command $library_url$command\n");
252 $self->print_msg("*** Unexpected error type 1. HTTP Response Status: $response_code - Failed.\n");
253 }
254 return 0; # return false, since the response_code was an error, expected or not
255 }
256 else { # also if response_code is still undefined, as can happen with connection timing out
257 $self->print_msg("*** Command $library_url$command\n");
258 if(defined $response_code) {
259 $self->print_msg("*** Unexpected error type 2. HTTP Response Status: $response_code - Failed.\n");
260 } else {
261 $self->print_msg("*** Unexpected error type 3. Failed:\n\n$response_content\n\n");
262 }
263 return 0;
264 }
265 #print STDERR "********** WgetCommand: $wgetCommand\n\n";
266 #print STDERR "********** Response_content:\n$response_content\n\n";
267 #print STDERR "********** Response_CODE: $response_code\n";
268
269 }
270}
271
272sub deactivate_collection {
273 my $self = shift(@_);
274
275 my $gs_mode = $self->{'gs_mode'};
276 my $qualified_collection = $self->{'qualified_collection'};
277
278 if($gs_mode eq "gs2") {
279 my $DEACTIVATE_COMMAND = "?a=config&cmd=release-collection&c=";
280 my $check_message_against_regex = q/configured release-collection/;
281 $self->config($DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
282 }
283 elsif ($gs_mode eq "gs3") {
284 $self->print_msg("\t- Main library servlet\n");
285 my $DEACTIVATE_COMMAND = "?a=s&sa=d&st=collection&sn=";
286 my $check_message_against_regex = "collection: $qualified_collection deactivated";
287 $self->config($DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
288
289 # and deactivate the collection on OAIserver url too.
290 # NOTE: if it's not an OAI collection, then the message that the collection is "not enabled for OAI" is EXPECTED. Another possible valid outcome.
291 $self->print_msg("\t- OAI servlet\n");
292 $DEACTIVATE_COMMAND = "?deactivate=";
293 $check_message_against_regex = "(collection\: $qualified_collection deactivated|collection\: $qualified_collection is not enabled for OAI.)";
294 $self->config($DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, undef, undef, "oaiserver");
295 }
296}
297
298sub activate_collection {
299 my $self = shift(@_);
300
301 my $gs_mode = $self->{'gs_mode'};
302 my $qualified_collection = $self->{'qualified_collection'};
303
304 if($gs_mode eq "gs2") {
305 my $ACTIVATE_COMMAND = "?a=config&cmd=add-collection&c=";
306 my $check_message_against_regex = q/configured add-collection/;
307 $self->config($ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
308 }
309 elsif ($gs_mode eq "gs3") {
310 $self->print_msg("\t- Main library servlet\n");
311 my $ACTIVATE_COMMAND = "?a=s&sa=a&st=collection&sn=";
312 my $check_message_against_regex = "collection: $qualified_collection activated";
313 $self->config($ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
314
315 # and activate the collection on OAIserver url too.
316 # NOTE: if it's not an OAI collection, then the message that the collection is "not enabled for OAI" is EXPECTED. Another possible valid outcome.
317 $self->print_msg("\t- OAI servlet\n");
318 $ACTIVATE_COMMAND = "?activate=";
319 $check_message_against_regex = "(collection\: $qualified_collection activated|collection\: $qualified_collection is not enabled for OAI.)";
320 $self->config($ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, undef, undef, "oaiserver");
321 }
322}
323
324sub ping {
325 my $self = shift(@_);
326 my $command = shift(@_);
327 my $silent = shift(@_);
328
329 # If the GS server is not running, we *expect* to see a "500" status code.
330 # If the GS server is running, then "Ping" ... "succeeded" is expected on success.
331 # When pinging an inactive collection, it will say it did "not succeed". This is
332 # a message of interest to return.
333 my $check_responsemsg_against_regex = q/(succeeded)/;
334 my $expected_error_code = 500;
335
336 $self->print_msg("*** COMMAND WAS: |$command| ***\n", 4);
337
338 return $self->config($command, $check_responsemsg_against_regex, $expected_error_code, $silent);
339}
340
341# send a pingaction to the GS library. General server-level ping.
342sub ping_library {
343 my $self = shift(@_);
344
345 my $gs_mode = $self->{'gs_mode'};
346
347 my $command = "";
348 if($gs_mode eq "gs2") {
349 $command = "?a=ping";
350 }
351 elsif ($gs_mode eq "gs3") {
352 $command = "?a=s&sa=ping";
353 }
354 return $self->ping($command);
355}
356
357
358# send a pingaction to a collection in GS library to check if it's active
359sub ping_library_collection {
360 my $self = shift(@_);
361 my $silent = shift(@_);
362
363 my $gs_mode = $self->{'gs_mode'};
364 my $qualified_collection = $self->{'qualified_collection'};
365
366 my $command = "";
367 if($gs_mode eq "gs2") {
368 $command = "?a=ping&c=$qualified_collection";
369 }
370 elsif ($gs_mode eq "gs3") {
371 $command = "?a=s&sa=ping&st=collection&sn=$qualified_collection";
372 }
373 return $self->ping($command, $silent);
374}
375
376# return true if server is persistent, by calling is-persistent on library_url
377# this is only for GS2, since the GS3 server is always persistent
378sub is_persistent {
379 my $self = shift(@_);
380
381 if($self->{'gs_mode'} eq "gs3") { # GS3 server is always persistent
382 return 1;
383 }
384
385 my $command = "?a=is-persistent";
386 my $check_responsemsg_against_regex = q/true/; # isPersistent: true versus isPersistent: false
387 return $self->config($command, $check_responsemsg_against_regex);
388}
389
390sub set_library_URL {
391 my $self = shift(@_);
392 my $library_url = shift(@_);
393 $self->{'library_url'} = $library_url;
394}
395
396sub get_library_URL {
397 my $self = shift(@_);
398
399 # For web servers that are external to a Greenstone installation,
400 # the user can pass in their web server's library URL.
401 if($self->{'library_url'}) {
402 return $self->{'library_url'};
403 }
404
405 # For web servers included with GS (like tomcat for GS3 and server.exe
406 # and apache for GS2), we work out the library URL:
407 my ($gs_mode, $lib_name); # gs_mode can be gs3 or gs2, lib_name is the custom servlet name
408 $gs_mode = $self->{'gs_mode'};
409 $lib_name = $self->{'library_name'};
410
411 # If we get here, we are dealing with a server included with GS.
412 # For GS3, we ask ant for the library URL.
413 # For GS2, we derive the URL from the llssite.cfg file.
414
415 # note that unless we pass in $get_public_url=1, we now get the local http URL
416 # by default (e.g. http://127.0.0.1:httpPort/greenstone/library)
417 my $url = &util::get_full_greenstone_url_prefix($gs_mode, $lib_name); # found largely identical method copied
418 # into util.pm. Don't want duplicates, so calling that from here.
419
420 # either the url is still undef or it is now set
421 #print STDERR "\n@@@@@ final URL:|$url|\n" if $url;
422 #print STDERR "\n@@@@@ URL still undef\n" if !$url;
423
424 if (defined $url) {
425 $self->{'library_url'} = $url;
426 }
427
428 return $url;
429}
430
431sub do_deactivate {
432 my $self = shift(@_);
433
434 # 1. Get library URL
435
436 # For web servers that are external to a Greenstone installation,
437 # the user can pass in their web server's library URL.
438 # For web servers included with GS (like tomcat for GS3 and server.exe
439 # and apache for GS2), we work out the library URL:
440
441 # Can't do $self->{'library_url'}, because it may not yet be set
442 my $library_url = $self->get_library_URL(); # returns undef if no valid server URL
443
444 if(!defined $library_url) { # undef if no valid server URL
445 return; # can't do any deactivation without a valid server URL
446 }
447
448 my $is_persistent_server = $self->{'is_persistent_server'};
449 my $qualified_collection = $self->{'qualified_collection'};
450
451 # CollectionManager's installCollection phase in GLI
452 # 2. Ping the library URL, and if it's a persistent server and running, release the collection
453
454 $self->print_msg("Pinging $library_url\n");
455 if ($self->ping_library()) { # server running
456
457 # server is running, so release the collection if
458 # the server is persistent and the collection is active
459
460 # don't need to work out persistency of server more than once, since the libraryURL hasn't changed
461 if (!defined $is_persistent_server) {
462 $self->print_msg("Checking if Greenstone server is persistent\n");
463 $is_persistent_server = $self->is_persistent();
464 $self->{'is_persistent_server'} = $is_persistent_server;
465 }
466
467 if ($is_persistent_server) { # only makes sense to issue activate and deactivate cmds to a persistent server
468
469 $self->print_msg("Checking if the collection $qualified_collection is already active\n");
470 my $collection_active = $self->ping_library_collection();
471
472 if ($collection_active) {
473 $self->print_msg("De-activating collection $qualified_collection\n");
474 $self->deactivate_collection();
475 }
476 else {
477 $self->print_msg("Collection is not active => No need to deactivate\n");
478 }
479 }
480 else {
481 $self->print_msg("Server is not persistent => No need to deactivate collection\n");
482 }
483 }
484 else {
485 $self->print_msg("No response to Ping => Taken to mean server is not running\n");
486 }
487
488 return $is_persistent_server;
489}
490
491sub do_activate {
492 my $self = shift @_;
493
494 my $library_url = $self->get_library_URL(); # Can't do $self->{'library_url'}; as it may not be set yet
495
496 if(!defined $library_url) { # undef if no valid server URL
497 return; # nothing to activate if without valid server URL
498 }
499
500 my $is_persistent_server = $self->{'is_persistent_server'};
501 my $qualified_collection = $self->{'qualified_collection'};
502
503 $self->print_msg("Pinging $library_url\n");
504 if ($self->ping_library()) { # server running
505
506 # don't need to work out persistency of server more than once, since the libraryURL hasn't changed
507 if (!defined $is_persistent_server) {
508 $self->print_msg("Checking if Greenstone server is persistent\n");
509 $is_persistent_server = $self->is_persistent();
510 $self->{'is_persistent_server'} = $is_persistent_server;
511 }
512
513 if ($is_persistent_server) { # persistent server, so can try activating collection
514
515 $self->print_msg("Checking if the collection $qualified_collection is not already active\n");
516
517 # Since we could have deactivated the collection at this point,
518 # it is likely that it is not yet active. When pinging the collection
519 # a "ping did not succeed" message is expected, therefore tell the ping
520 # to proceed silently
521 my $silent = 1;
522 my $collection_active = $self->ping_library_collection($silent);
523
524 if (!$collection_active) {
525 $self->print_msg(" Collection is not active.\n");
526 $self->print_msg("Activating collection $qualified_collection\n");
527 $self->activate_collection();
528
529 # unless an error occurred, the collection should now be active:
530 $collection_active = $self->ping_library_collection(); # not silent if ping did not succeed
531 if(!$collection_active) {
532 $self->print_msg("ERROR: collection $qualified_collection did not get activated\n");
533 }
534 }
535 else {
536 $self->print_msg("Collection is already active => No need to activate\n");
537 }
538 }
539 else {
540 $self->print_msg("Server is not persistent => No need to activate collection\n");
541 }
542 }
543 else {
544 $self->print_msg("No response to Ping => Taken to mean server is not running\n");
545 }
546
547 return $is_persistent_server;
548}
549
550
551#########################################################
552### UNUSED METHODS - CAN BE HANDY
553
554
555# This method uses the perl libraries we're advised to use in place of wget for pinging and retrieving web
556# pages. The problem is that not all perl installations may support these libraries. So we now use the new
557# config() method further above, which uses the wget included in Greenstone binary installations.
558# If the library imports at page top conflict, comment out those imports and move the methods config_old(),
559# is_URL_active() and pingHost() out to a temporary file.
560#
561# If for some reason you can't use wget, then rename the config() method to config_old(), and rename the
562# method below to config() and things should work as before.
563sub config_old {
564 my $self = shift(@_);
565 my ($command, $check_message_against_regex, $expected_error_code, $silent, $oai_servlet) = @_;
566
567 my $library_url = $self->get_library_URL(); #$self->{'library_url'};
568 if($oai_servlet) { # if asked to contact the oaiserver servlet, then
569 # replace the library servlet name with oaiserver servlet name
570 $library_url =~ s@([^/]*?)$@$oai_servlet@;
571 }
572
573 # Gatherer.java's configGS3Server doesn't use the site variable
574 # so we don't have to either
575
576 # for GS2, getting the HTTP status isn't enough, we need to read the output
577 # since this is what CollectionManager.config() stipulates.
578 # Using LWP::UserAgent::get($url) for this
579
580 if(!defined $library_url) {
581 return 0;
582 }
583 else {
584 $ua->timeout(5); # set LWP useragent to 5s max timeout for testing the URL
585 # Need to set this, else it takes I don't know how long to timeout
586 # http://www.perlmonks.org/?node_id=618534
587
588 # http://search.cpan.org/~gaas/libwww-perl-6.04/lib/LWP/UserAgent.pm
589 # use LWP::UserAgent's get($url) since it returns an HTTP::Response code
590
591 my $response_obj = $ua->get($library_url.$command);
592
593 # $response_obj->content stores the content and $response_obj->code the HTTP response code
594 my $response_code = $response_obj->code();
595
596 if(LWP::Simple::is_success($response_code)) {# $response_code eq RC_OK) { # LWP::Simple::is_success($response_code)
597 $self->print_msg("*** Command $library_url$command\n", 3);
598 $self->print_msg("*** HTTP Response Status: $response_code - Complete.", 3);
599
600 # check the page content is as expected
601 my $response_content = $response_obj->content;
602 my $resultstr = $response_content;
603 $resultstr =~ s@.*gs_content\"\>@@s;
604 $resultstr =~ s@</div>.*@@s;
605
606 if($resultstr =~ m/$check_message_against_regex/) {#if($response_content =~ m/$check_message_against_regex/) {
607 $self->print_msg(" Response as expected.\n", 3);
608 $self->print_msg("@@@@@@ Got result:\n$resultstr\n", 4);
609 return 1;
610 } else {
611 # if we expect the collection to be inactive, then we'd be in silent mode: if so,
612 # don't print out the "ping did not succeed" response, but print out any other messages
613
614 # So we only suppress the ping col "did not succeed" response if we're in silent mode
615 # But if any message other than ping "did not succeed" is returned, we always print it
616 if($resultstr !~ m/did not succeed/ || !$silent) {#if($response_content !~ m/did not succeed/ || !$silent) {
617 $self->print_msg("\n\tBUT: command $library_url$command response UNEXPECTED.\n", 3);
618 $self->print_msg("*** Got message:\n$response_content.\n", 4);
619 $self->print_msg("*** Got result:\n$resultstr\n", 3);
620 }
621 return 0; # ping on a collection may "not succeed."
622 }
623 }
624 elsif(LWP::Simple::is_error($response_code)) { # method exported by LWP::Simple, along with HTTP::Status constants
625 # check the page content is as expected
626 if(defined $expected_error_code && $response_code == $expected_error_code) {
627 $self->print_msg(" Response status $response_code as expected.\n", 3);
628 } else {
629 $self->print_msg("*** Command $library_url$command\n");
630 $self->print_msg("*** Unexpected error. HTTP Response Status: $response_code - Failed.\n");
631 }
632 return 0; # return false, since the response_code was an error, expected or not
633 }
634 else {
635 $self->print_msg("*** Command $library_url$command\n");
636 $self->print_msg("*** Unexpected error. HTTP Response Status: $response_code - Failed.\n");
637 return 0;
638 }
639 }
640}
641
642# This method is now unused. Using ping_library instead to send the ping action to a
643# GS2/GS3 server. This method can be used more generally to test whether a URL is alive.
644# http://search.cpan.org/dist/libwww-perl/lib/LWP/Simple.pm
645# and http://www.perlmonks.org/?node_id=618534
646sub is_URL_active {
647 my $url = shift(@_); # gs3 or gs2 URL
648
649 my $status = 0;
650 if(defined $url) {
651 $ua->timeout(10); # set LWP useragent to 5s max timeout for testing the URL
652 # Need to set this, else it takes I don't know how long to timeout
653 # http://www.perlmonks.org/?node_id=618534
654
655 $status = LWP::Simple::head($url); # returns empty list of headers if it fails
656 # LWP::Simple::get($url) is more intensive, so don't need to do that
657 #print STDERR "**** $url is alive.\n" if $status;
658 }
659 return $status;
660}
661
662# Pinging seems to always return true, so this method doesn't work
663sub pingHost {
664 my $url = shift(@_); # gs3 or gs2 URL
665
666 my $status = 0;
667 if(defined $url) {
668 # Get just the domain. "http://localhost/gsdl?uq=332033495" becomes "localhost"
669 # "http://localhost/greenstone/cgi-bin/library.cgi" becomes "localhost" too
670
671 #my $host = $url;
672 #$host =~ s@^https?:\/\/(www.)?@@;
673 #$host =~ s@\/.*@@;
674 #print STDERR "**** HOST: $host\n";
675
676 # More robust way
677 # http://stackoverflow.com/questions/827024/how-do-i-extract-the-domain-out-of-an-url
678 my $uri = URI->new( $url );
679 my $host = $uri->host;
680
681 # Ping the host. http://perldoc.perl.org/Net/Ping.html
682 my $p = Net::Ping->new();
683 $status = $p->ping($host); # || 0. Appears to set to undef rather than 0
684 print STDERR "**** $host is alive.\n" if $status; #print "$host is alive.\n" if $p->ping($host);
685 $p->close();
686 }
687 # return whether pinging was a success or failure
688 return $status;
689}
690
6911;
Note: See TracBrowser for help on using the repository browser.