source: main/trunk/greenstone2/perllib/servercontrol.pm@ 31513

Last change on this file since 31513 was 31513, checked in by ak19, 7 years ago

servercontrol.pm uses established functions to generate tmpfiles in tmp directories, and then immediately deletes them after using them. Made some modifications for this in util.pm and added some extra helper functions there.

File size: 28.6 KB
Line 
1#############################################################################
2#
3# activate.pm -- functions to get the GS library URL, ping the library URL,
4# activate and deactivate a collection.
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###############################################################################
27
28package servercontrol;
29
30
31use strict;
32no strict 'refs'; # allow filehandles to be variables and vice versa
33no strict 'subs'; # allow barewords (eg STDERR) as function arguments
34
35# Greenstone includes
36use printusage;
37use parse2;
38
39
40# The perl library imports below are used by deprecated methods config_old(), is_URL_active() and pingHost()
41# If the following library imports are not supported by your perl installation, comment out these
42# imports and move the methods config_old(), is_URL_active() and pingHost() out to a temporary file.
43use HTTP::Response;
44use LWP::Simple qw($ua !head); # import useragent object as $ua from the full LWP to use along with LWP::Simple
45 # don't import LWP::Simple's head function by name since it can conflict with CGI:head())
46#use CGI qw(:standard); # then only CGI.pm defines a head()
47use Net::Ping;
48use URI;
49
50
51sub new
52{
53 my $class = shift(@_);
54
55 my ($qualified_collection, $site, $verbosity, $build_dir, $index_dir, $collect_dir, $library_url, $library_name) = @_;
56
57 # library_url: to be specified on the cmdline if not using a GS-included web server
58 # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
59
60 my $self = {'build_dir' => $build_dir,
61 'index_dir' => $index_dir,
62 'collect_dir' => $collect_dir,
63 'site' => $site,
64 'qualified_collection' => $qualified_collection,
65 #'is_persistent_server' => undef,
66 'library_url' => $library_url || $ENV{'GSDL_LIBRARY_URL'} || undef, # to be specified on the cmdline if not using a GS-included web server
67 'library_name' => $library_name,
68 #'gs_mode' => "gs2",
69 'verbosity' => $verbosity || 2
70 };
71
72 if ((defined $site) && ($site ne "")) { # GS3
73 $self->{'gs_mode'} = "gs3";
74 } else {
75 $self->{'gs_mode'} = "gs2";
76 }
77
78 return bless($self, $class);
79}
80
81## TODO: gsprintf to $self->{'out'} in these 2 print functions
82## See buildcolutils.pm new() for setting up $out
83
84sub print_task_msg {
85 my $self = shift(@_);
86 my ($task_msg, $verbosity_setting) = @_;
87
88 $verbosity_setting = $self->{'verbosity'} unless $verbosity_setting;
89 #$verbosity_setting = 1 unless defined $verbosity;
90 if($verbosity_setting >= 1) {
91 print STDERR "\n";
92 print STDERR "************************\n";
93 print STDERR "* $task_msg\n";
94 print STDERR "************************\n";
95 }
96}
97
98# Prints messages if the verbosity is right. Does not add new lines.
99sub print_msg {
100 my $self = shift(@_);
101 my ($msg, $min_verbosity, $verbosity_setting) = @_;
102
103 # only display error messages if the current
104 # verbosity setting >= the minimum verbosity level
105 # needed for that message to be displayed.
106
107 $verbosity_setting = $self->{'verbosity'} unless defined $verbosity_setting;
108 $min_verbosity = 1 unless defined $min_verbosity;
109 if($verbosity_setting >= $min_verbosity) { # by default display all 1 messages
110 print STDERR "$msg";
111 }
112}
113
114# Method to send a command to a GS2 or GS3 library_URL
115# the commands used in this script can be activate, deactivate, ping,
116# and is-persistent (is-persistent only implemented for GS2).
117sub config {
118 my $self = shift(@_);
119 my ($command, $check_message_against_regex, $expected_error_code, $silent) = @_;
120
121 my $library_url = $self->get_library_URL(); #$self->{'library_url'};
122
123
124 # Gatherer.java's configGS3Server doesn't use the site variable
125 # so we don't have to either
126
127 # for GS2, getting the HTTP status isn't enough, we need to read the output
128 # since this is what CollectionManager.config() stipulates.
129 # Using LWP::UserAgent::get($url) for this
130
131 if(!defined $library_url) {
132 return 0;
133 }
134 else {
135 # ampersands need to be escaped
136 # - with single quotes around it for linux for the cmd to run in bash subshell
137 # - with a ^ before it on windows for the cmd to run in a DOS prompt subshell
138 # - or the entire wget command should be nested in double quotes (single quotes don't work on windows)
139 my $wgetCommand = $command;
140
141 my $wget_file_path = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "wget");
142 my $tmpfilename = &util::get_tmp_filename(".html", 1); # random file name with html extension in timestamped-tmpdir location (2nd param=1)
143 # wherein we'll store the HTML page retrieved by wget
144
145 # https://www.gnu.org/software/wget/manual/wget.html
146 # output-document set to - (STDOUT), so page is streamed to STDOUT
147 # timeout: 5 seconds, tries: 1
148 # wget sends status information and response code to STDERR, so redirect it to STDOUT
149 # Searching for "perl backtick operator redirect stderr to stdout":
150 # http://www.perlmonks.org/?node=How%20can%20I%20capture%20STDERR%20from%20an%20external%20command%3F
151 ##$wgetCommand = "\"$wget_file_path\" --spider -T 5 -t 1 \"$library_url$wgetCommand\" 2>&1"; # won't save page
152 #$wgetCommand = "\"$wget_file_path\" --output-document=- -T 5 -t 1 \"$library_url$wgetCommand\" 2>&1"; # THIS CAN MIX UP STDERR WITH STDOUT IN THE VERY LINE WE REGEX TEST AGAINST EXPECTED OUTPUT!!
153 $wgetCommand = "\"$wget_file_path\" --output-document=$tmpfilename -T 5 -t 1 \"$library_url$wgetCommand\" 2>&1"; # keep stderr (response code, response_content) separate from html page content
154
155 ##print STDERR "@@@@ $wgetCommand\n";
156
157 my $response_content = `$wgetCommand`;
158 my $response_code = undef;
159 my @lines = split( /\n/, $response_content );
160 foreach my $line (@lines) {
161 #print STDERR "@@@@ LINE: $line\n";
162 if($line =~ m@failed: Connection timed out.$@) { # linux
163 $response_code = "failed: Connection timed out.";
164 last; # break keyword in perl = last
165 }
166 elsif($line =~ m@Giving up.$@) { # windows (unless -T 5 -t 1 is not passed in)
167 $response_code = "failed: Giving up.";
168 last; # break keyword in perl = last
169 }
170 elsif($line =~ m@failed: Connection refused.$@) {
171 $response_code = "failed: Connection refused.";
172 last; # break keyword in perl = last
173 }
174 elsif($line =~ m@HTTP request sent, @) {
175 $response_code = $line;
176 $response_code =~ s@[^\d]*(.*)$@$1@;
177 last;
178 }
179 }
180
181 if($command =~ m@ping@ && $response_code =~ m@failed: (Connection refused|Giving up)@) {
182 # server not running
183 $self->print_msg("*** Server not running. $library_url$command\n", 3);
184 &util::rm_tmp_file($tmpfilename); # will also remove any randomly named tmp directories containing the file
185 return 0;
186 }
187 if($response_code && $response_code eq "200 OK") {
188 $self->print_msg("*** Command $library_url$command\n", 3);
189 $self->print_msg("*** HTTP Response Status: $response_code - Complete.", 3);
190
191 # check the page content is as expected
192 #my $resultstr = $response_content;
193
194 open(FIN,"<$tmpfilename") or die "servercontrol.pm: Unable to open $tmpfilename to read ping response page...ERROR: $!\n";
195 my $resultstr;
196 # Read in the entire contents of the file in one hit
197 sysread(FIN, $resultstr, -s FIN);
198 close(FIN);
199 &util::rm_tmp_file($tmpfilename); # will also remove any randomly named tmp directories containing the file
200
201
202 #$resultstr =~ s@.*gs_content\"\>@@s; ## only true for default library servlet
203 #$resultstr =~ s@</div>.*@@s;
204 if($resultstr =~ m/$check_message_against_regex/) {
205 $self->print_msg(" Response as expected.\n", 3);
206 $self->print_msg("@@@@@@ Got result:\n$resultstr\n", 4);
207 return 1;
208 } else {
209 # if we expect the collection to be inactive, then we'd be in silent mode: if so,
210 # don't print out the "ping did not succeed" response, but print out any other messages
211
212 # So we only suppress the ping col "did not succeed" response if we're in silent mode
213 # But if any message other than ping "did not succeed" is returned, we always print it
214 if($resultstr !~ m/did not succeed/ || !$silent) {
215 $self->print_msg("\n\tBUT: command $library_url$command response UNEXPECTED.\n", 3);
216 $self->print_msg("*** Got message:\n$response_content.\n", 4);
217 $self->print_msg("*** Got result:\n$resultstr\n", 3);
218 }
219 return 0; # ping on a collection may "not succeed."
220 }
221 }
222 elsif($response_code && $response_code =~ m@^(4|5)\d\d@) { # client side errors start with 4xx, server side with 5xx
223 # check the page content is as expected
224 if(defined $expected_error_code && $response_code =~ m@^$expected_error_code@) {
225 $self->print_msg(" Response status $response_code as expected.\n", 3);
226 } else {
227 $self->print_msg("*** Command $library_url$command\n");
228 $self->print_msg("*** Unexpected error type 1. HTTP Response Status: $response_code - Failed.\n");
229 }
230 return 0; # return false, since the response_code was an error, expected or not
231 }
232 else { # also if response_code is still undefined, as can happen with connection timing out
233 $self->print_msg("*** Command $library_url$command\n");
234 if(defined $response_code) {
235 $self->print_msg("*** Unexpected error type 2. HTTP Response Status: $response_code - Failed.\n");
236 } else {
237 $self->print_msg("*** Unexpected error type 3. Failed:\n\n$response_content\n\n");
238 }
239 return 0;
240 }
241 #print STDERR "********** WgetCommand: $wgetCommand\n\n";
242 #print STDERR "********** Response_content:\n$response_content\n\n";
243 #print STDERR "********** Response_CODE: $response_code\n";
244
245 }
246}
247
248sub deactivate_collection {
249 my $self = shift(@_);
250
251 my $gs_mode = $self->{'gs_mode'};
252 my $qualified_collection = $self->{'qualified_collection'};
253
254 if($gs_mode eq "gs2") {
255 my $DEACTIVATE_COMMAND = "?a=config&cmd=release-collection&c=";
256 my $check_message_against_regex = q/configured release-collection/;
257 $self->config($DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
258 }
259 elsif ($gs_mode eq "gs3") {
260 my $DEACTIVATE_COMMAND = "?a=s&sa=d&st=collection&sn=";
261 my $check_message_against_regex = "collection: $qualified_collection deactivated";
262 $self->config($DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
263 }
264}
265
266sub activate_collection {
267 my $self = shift(@_);
268
269 my $gs_mode = $self->{'gs_mode'};
270 my $qualified_collection = $self->{'qualified_collection'};
271
272 if($gs_mode eq "gs2") {
273 my $ACTIVATE_COMMAND = "?a=config&cmd=add-collection&c=";
274 my $check_message_against_regex = q/configured add-collection/;
275 $self->config($ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
276 }
277 elsif ($gs_mode eq "gs3") {
278 my $ACTIVATE_COMMAND = "?a=s&sa=a&st=collection&sn=";
279 my $check_message_against_regex = "collection: $qualified_collection activated";
280 $self->config($ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
281 }
282}
283
284sub ping {
285 my $self = shift(@_);
286 my $command = shift(@_);
287 my $silent = shift(@_);
288
289 # If the GS server is not running, we *expect* to see a "500" status code.
290 # If the GS server is running, then "Ping" ... "succeeded" is expected on success.
291 # When pinging an inactive collection, it will say it did "not succeed". This is
292 # a message of interest to return.
293 my $check_responsemsg_against_regex = q/(succeeded)/;
294 my $expected_error_code = 500;
295
296 $self->print_msg("*** COMMAND WAS: |$command| ***\n", 4);
297
298 return $self->config($command, $check_responsemsg_against_regex, $expected_error_code, $silent);
299}
300
301# send a pingaction to the GS library. General server-level ping.
302sub ping_library {
303 my $self = shift(@_);
304
305 my $gs_mode = $self->{'gs_mode'};
306
307 my $command = "";
308 if($gs_mode eq "gs2") {
309 $command = "?a=ping";
310 }
311 elsif ($gs_mode eq "gs3") {
312 $command = "?a=s&sa=ping";
313 }
314 return $self->ping($command);
315}
316
317
318# send a pingaction to a collection in GS library to check if it's active
319sub ping_library_collection {
320 my $self = shift(@_);
321 my $silent = shift(@_);
322
323 my $gs_mode = $self->{'gs_mode'};
324 my $qualified_collection = $self->{'qualified_collection'};
325
326 my $command = "";
327 if($gs_mode eq "gs2") {
328 $command = "?a=ping&c=$qualified_collection";
329 }
330 elsif ($gs_mode eq "gs3") {
331 $command = "?a=s&sa=ping&st=collection&sn=$qualified_collection";
332 }
333 return $self->ping($command, $silent);
334}
335
336# return true if server is persistent, by calling is-persistent on library_url
337# this is only for GS2, since the GS3 server is always persistent
338sub is_persistent {
339 my $self = shift(@_);
340
341 if($self->{'gs_mode'} eq "gs3") { # GS3 server is always persistent
342 return 1;
343 }
344
345 my $command = "?a=is-persistent";
346 my $check_responsemsg_against_regex = q/true/; # isPersistent: true versus isPersistent: false
347 return $self->config($command, $check_responsemsg_against_regex);
348}
349
350sub set_library_URL {
351 my $self = shift(@_);
352 my $library_url = shift(@_);
353 $self->{'library_url'} = $library_url;
354}
355
356sub get_library_URL {
357 my $self = shift(@_);
358
359 # For web servers that are external to a Greenstone installation,
360 # the user can pass in their web server's library URL.
361 if($self->{'library_url'}) {
362 return $self->{'library_url'};
363 }
364
365 # For web servers included with GS (like tomcat for GS3 and server.exe
366 # and apache for GS2), we work out the library URL:
367 my ($gs_mode, $lib_name); # gs_mode can be gs3 or gs2, lib_name is the custom servlet name
368 $gs_mode = $self->{'gs_mode'};
369 $lib_name = $self->{'library_name'};
370
371 # If we get here, we are dealing with a server included with GS.
372 # For GS3, we ask ant for the library URL.
373 # For GS2, we derive the URL from the llssite.cfg file.
374
375 my $url = undef;
376
377 if($gs_mode eq "gs2") {
378 my $llssite_cfg = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "llssite.cfg");
379
380 if(-f $llssite_cfg) {
381 # check llssite.cfg for line with url property
382 # for server.exe also need to use portnumber and enterlib properties
383
384 # Read in the entire contents of the file in one hit
385 if (!open (FIN, $llssite_cfg)) {
386 $self->print_msg("activate.pl::get_library_URL failed to open $llssite_cfg ($!)\n");
387 return undef;
388 }
389
390 my $contents;
391 sysread(FIN, $contents, -s FIN);
392 close(FIN);
393
394 my @lines = split(/[\n\r]+/, $contents); # split on carriage-returns and/or linefeeds
395 my $enterlib = "";
396 my $portnumber = "8282"; # will remain empty (implicit port 80) unless it's specifically been assigned
397
398 foreach my $line (@lines) {
399 if($line =~ m/^url=(.*)$/) {
400 $url = $1;
401 } elsif($line =~ m/^enterlib=(.*)$/) {
402 $enterlib = $1;
403 } elsif($line =~ m/^portnumber=(.*)$/) {
404 $portnumber = $1;
405 }
406 }
407
408 if(!$url) {
409 return undef;
410 }
411 elsif($url eq "URL_pending") { # library is not running
412 # do not process url=URL_pending in the file, since for server.exe
413 # this just means the Enter Library button hasn't been pressed yet
414 $url = undef;
415 }
416 else {
417 # In the case of server.exe, need to do extra work to get the proper URL
418 # But first, need to know whether we're indeed dealing with server.exe:
419
420 # compare the URL's domain to the full URL
421 # E.g. for http://localhost:8383/greenstone3/cgi-bin, the domain is localhost:8383
422 my $uri = URI->new( $url );
423 my $host = $uri->host;
424 #print STDERR "@@@@@ host: $host\n";
425 if($url =~ m/https?:\/\/$host(\/)?$/) {
426 #if($url !~ m/https?:\/\/$host:$portnumber(\/)?/ || $url =~ m/https?:\/\/$host(\/)?$/) {
427 # (if the URL does not contain the portnumber, OR if the port is implicitly 80 and)
428 # If the domain with http:// prefix is completely the same as the URL, assume server.exe
429 # then the actual URL is the result of suffixing the port and enterlib properties in llssite.cfg
430 $url = $url.":".$portnumber.$enterlib;
431 } # else, apache web server
432
433 }
434 }
435 } elsif($gs_mode eq "gs3") {
436 # Either check build.properties for tomcat.server, tomcat.port and app.name (and default servlet name).
437 # app.name is stored in app.path by build.xml. Need to move app.name in build.properties from build.xml
438
439 # Or, run the new target get-default-servlet-url
440 # the output can look like:
441 #
442 # Buildfile: build.xml
443 # [echo] os.name: Windows Vista
444 #
445 # get-default-servlet-url:
446 # [echo] http://localhost:8383/greenstone3/library
447 # BUILD SUCCESSFUL
448 # Total time: 0 seconds
449
450 #my $output = qx/ant get-default-servlet-url/; # backtick operator, to get STDOUT (else 2>&1)
451 # see http://stackoverflow.com/questions/799968/whats-the-difference-between-perls-backticks-system-and-exec
452
453 # The get-default-servlet-url ant target can be run from anywhere by specifying the
454 # location of GS3's ant build.xml buildfile. Activate.pl can be run from anywhere for GS3
455 # GSDL3SRCHOME will be set for GS3 by gs3-setup.sh, a step that would have been necessary
456 # to run the activate.pl script in the first place
457 my $perl_command = "ant -buildfile \"$ENV{'GSDL3SRCHOME'}/build.xml\" get-default-servlet-url";
458
459 if (open(PIN, "$perl_command |")) {
460 while (defined (my $perl_output_line = <PIN>)) {
461 if($perl_output_line =~ m@(https?):\/\/(\S*)@) { # grab all the non-whitespace chars
462 $url="$1://".$2; # preserve the http protocol #$url="http://".$1;
463 }
464 }
465 close(PIN);
466 } else {
467 $self->print_msg("activate.pl::get_library_URL: Failed to run $perl_command to work out library URL for $gs_mode\n");
468 }
469 if(defined $lib_name) {
470 # replace the servlet_name portion of the url found, with the given library_name
471 $url =~ s@/[^/]*$@/$lib_name@;
472 }
473 }
474
475 # either the url is still undef or it is now set
476 #print STDERR "\n@@@@@ final URL:|$url|\n" if $url;
477 #print STDERR "\n@@@@@ URL still undef\n" if !$url;
478
479 $self->{'library_url'} = $url;
480 return $url;
481}
482
483
484sub do_deactivate {
485 my $self = shift(@_);
486
487 # 1. Get library URL
488
489 # For web servers that are external to a Greenstone installation,
490 # the user can pass in their web server's library URL.
491 # For web servers included with GS (like tomcat for GS3 and server.exe
492 # and apache for GS2), we work out the library URL:
493
494 # Can't do $self->{'library_url'}, because it may not yet be set
495 my $library_url = $self->get_library_URL(); # returns undef if no valid server URL
496
497 if(!defined $library_url) { # undef if no valid server URL
498 return; # can't do any deactivation without a valid server URL
499 }
500
501 my $is_persistent_server = $self->{'is_persistent_server'};
502 my $qualified_collection = $self->{'qualified_collection'};
503
504 # CollectionManager's installCollection phase in GLI
505 # 2. Ping the library URL, and if it's a persistent server and running, release the collection
506
507 $self->print_msg("Pinging $library_url\n");
508 if ($self->ping_library()) { # server running
509
510 # server is running, so release the collection if
511 # the server is persistent and the collection is active
512
513 # don't need to work out persistency of server more than once, since the libraryURL hasn't changed
514 if (!defined $is_persistent_server) {
515 $self->print_msg("Checking if Greenstone server is persistent\n");
516 $is_persistent_server = $self->is_persistent();
517 $self->{'is_persistent_server'} = $is_persistent_server;
518 }
519
520 if ($is_persistent_server) { # only makes sense to issue activate and deactivate cmds to a persistent server
521
522 $self->print_msg("Checking if the collection $qualified_collection is already active\n");
523 my $collection_active = $self->ping_library_collection();
524
525 if ($collection_active) {
526 $self->print_msg("De-activating collection $qualified_collection\n");
527 $self->deactivate_collection();
528 }
529 else {
530 $self->print_msg("Collection is not active => No need to deactivate\n");
531 }
532 }
533 else {
534 $self->print_msg("Server is not persistent => No need to deactivate collection\n");
535 }
536 }
537 else {
538 $self->print_msg("No response to Ping => Taken to mean server is not running\n");
539 }
540
541 return $is_persistent_server;
542}
543
544sub do_activate {
545 my $self = shift @_;
546
547 my $library_url = $self->get_library_URL(); # Can't do $self->{'library_url'}; as it may not be set yet
548
549 if(!defined $library_url) { # undef if no valid server URL
550 return; # nothing to activate if without valid server URL
551 }
552
553 my $is_persistent_server = $self->{'is_persistent_server'};
554 my $qualified_collection = $self->{'qualified_collection'};
555
556 $self->print_msg("Pinging $library_url\n");
557 if ($self->ping_library()) { # server running
558
559 # don't need to work out persistency of server more than once, since the libraryURL hasn't changed
560 if (!defined $is_persistent_server) {
561 $self->print_msg("Checking if Greenstone server is persistent\n");
562 $is_persistent_server = $self->is_persistent();
563 $self->{'is_persistent_server'} = $is_persistent_server;
564 }
565
566 if ($is_persistent_server) { # persistent server, so can try activating collection
567
568 $self->print_msg("Checking if the collection $qualified_collection is not already active\n");
569
570 # Since we could have deactivated the collection at this point,
571 # it is likely that it is not yet active. When pinging the collection
572 # a "ping did not succeed" message is expected, therefore tell the ping
573 # to proceed silently
574 my $silent = 1;
575 my $collection_active = $self->ping_library_collection($silent);
576
577 if (!$collection_active) {
578 $self->print_msg(" Collection is not active.\n");
579 $self->print_msg("Activating collection $qualified_collection\n");
580 $self->activate_collection();
581
582 # unless an error occurred, the collection should now be active:
583 $collection_active = $self->ping_library_collection(); # not silent if ping did not succeed
584 if(!$collection_active) {
585 $self->print_msg("ERROR: collection $qualified_collection did not get activated\n");
586 }
587 }
588 else {
589 $self->print_msg("Collection is already active => No need to activate\n");
590 }
591 }
592 else {
593 $self->print_msg("Server is not persistent => No need to activate collection\n");
594 }
595 }
596 else {
597 $self->print_msg("No response to Ping => Taken to mean server is not running\n");
598 }
599
600 return $is_persistent_server;
601}
602
603
604#########################################################
605### UNUSED METHODS - CAN BE HANDY
606
607
608# This method uses the perl libraries we're advised to use in place of wget for pinging and retrieving web
609# pages. The problem is that not all perl installations may support these libraries. So we now use the new
610# config() method further above, which uses the wget included in Greenstone binary installations.
611# If the library imports at page top conflict, comment out those imports and move the methods config_old(),
612# is_URL_active() and pingHost() out to a temporary file.
613#
614# If for some reason you can't use wget, then rename the config() method to config_old(), and rename the
615# method below to config() and things should work as before.
616sub config_old {
617 my $self = shift(@_);
618 my ($command, $check_message_against_regex, $expected_error_code, $silent) = @_;
619
620 my $library_url = $self->get_library_URL(); #$self->{'library_url'};
621
622
623 # Gatherer.java's configGS3Server doesn't use the site variable
624 # so we don't have to either
625
626 # for GS2, getting the HTTP status isn't enough, we need to read the output
627 # since this is what CollectionManager.config() stipulates.
628 # Using LWP::UserAgent::get($url) for this
629
630 if(!defined $library_url) {
631 return 0;
632 }
633 else {
634 $ua->timeout(5); # set LWP useragent to 5s max timeout for testing the URL
635 # Need to set this, else it takes I don't know how long to timeout
636 # http://www.perlmonks.org/?node_id=618534
637
638 # http://search.cpan.org/~gaas/libwww-perl-6.04/lib/LWP/UserAgent.pm
639 # use LWP::UserAgent's get($url) since it returns an HTTP::Response code
640
641 my $response_obj = $ua->get($library_url.$command);
642
643 # $response_obj->content stores the content and $response_obj->code the HTTP response code
644 my $response_code = $response_obj->code();
645
646 if(LWP::Simple::is_success($response_code)) {# $response_code eq RC_OK) { # LWP::Simple::is_success($response_code)
647 $self->print_msg("*** Command $library_url$command\n", 3);
648 $self->print_msg("*** HTTP Response Status: $response_code - Complete.", 3);
649
650 # check the page content is as expected
651 my $response_content = $response_obj->content;
652 my $resultstr = $response_content;
653 $resultstr =~ s@.*gs_content\"\>@@s;
654 $resultstr =~ s@</div>.*@@s;
655
656 if($resultstr =~ m/$check_message_against_regex/) {#if($response_content =~ m/$check_message_against_regex/) {
657 $self->print_msg(" Response as expected.\n", 3);
658 $self->print_msg("@@@@@@ Got result:\n$resultstr\n", 4);
659 return 1;
660 } else {
661 # if we expect the collection to be inactive, then we'd be in silent mode: if so,
662 # don't print out the "ping did not succeed" response, but print out any other messages
663
664 # So we only suppress the ping col "did not succeed" response if we're in silent mode
665 # But if any message other than ping "did not succeed" is returned, we always print it
666 if($resultstr !~ m/did not succeed/ || !$silent) {#if($response_content !~ m/did not succeed/ || !$silent) {
667 $self->print_msg("\n\tBUT: command $library_url$command response UNEXPECTED.\n", 3);
668 $self->print_msg("*** Got message:\n$response_content.\n", 4);
669 $self->print_msg("*** Got result:\n$resultstr\n", 3);
670 }
671 return 0; # ping on a collection may "not succeed."
672 }
673 }
674 elsif(LWP::Simple::is_error($response_code)) { # method exported by LWP::Simple, along with HTTP::Status constants
675 # check the page content is as expected
676 if(defined $expected_error_code && $response_code == $expected_error_code) {
677 $self->print_msg(" Response status $response_code as expected.\n", 3);
678 } else {
679 $self->print_msg("*** Command $library_url$command\n");
680 $self->print_msg("*** Unexpected error. HTTP Response Status: $response_code - Failed.\n");
681 }
682 return 0; # return false, since the response_code was an error, expected or not
683 }
684 else {
685 $self->print_msg("*** Command $library_url$command\n");
686 $self->print_msg("*** Unexpected error. HTTP Response Status: $response_code - Failed.\n");
687 return 0;
688 }
689 }
690}
691
692# This method is now unused. Using ping_library instead to send the ping action to a
693# GS2/GS3 server. This method can be used more generally to test whether a URL is alive.
694# http://search.cpan.org/dist/libwww-perl/lib/LWP/Simple.pm
695# and http://www.perlmonks.org/?node_id=618534
696sub is_URL_active {
697 my $url = shift(@_); # gs3 or gs2 URL
698
699 my $status = 0;
700 if(defined $url) {
701 $ua->timeout(10); # set LWP useragent to 5s max timeout for testing the URL
702 # Need to set this, else it takes I don't know how long to timeout
703 # http://www.perlmonks.org/?node_id=618534
704
705 $status = LWP::Simple::head($url); # returns empty list of headers if it fails
706 # LWP::Simple::get($url) is more intensive, so don't need to do that
707 #print STDERR "**** $url is alive.\n" if $status;
708 }
709 return $status;
710}
711
712# Pinging seems to always return true, so this method doesn't work
713sub pingHost {
714 my $url = shift(@_); # gs3 or gs2 URL
715
716 my $status = 0;
717 if(defined $url) {
718 # Get just the domain. "http://localhost/gsdl?uq=332033495" becomes "localhost"
719 # "http://localhost/greenstone/cgi-bin/library.cgi" becomes "localhost" too
720
721 #my $host = $url;
722 #$host =~ s@^https?:\/\/(www.)?@@;
723 #$host =~ s@\/.*@@;
724 #print STDERR "**** HOST: $host\n";
725
726 # More robust way
727 # http://stackoverflow.com/questions/827024/how-do-i-extract-the-domain-out-of-an-url
728 my $uri = URI->new( $url );
729 my $host = $uri->host;
730
731 # Ping the host. http://perldoc.perl.org/Net/Ping.html
732 my $p = Net::Ping->new();
733 $status = $p->ping($host); # || 0. Appears to set to undef rather than 0
734 print STDERR "**** $host is alive.\n" if $status; #print "$host is alive.\n" if $p->ping($host);
735 $p->close();
736 }
737 # return whether pinging was a success or failure
738 return $status;
739}
740
7411;
Note: See TracBrowser for help on using the repository browser.