source: main/trunk/greenstone2/bin/script/activate.pl@ 25579

Last change on this file since 25579 was 25579, checked in by ak19, 12 years ago
  1. Introduced the verbosity flag into activate.pl to print out debugging statements depending on verbosity, and this is passed in from buildcol.pl and full-rebuild.pl too now. 2. Cleaned up some debugging code.
File size: 22.3 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# activate.pl -- to be called after building a collection to activate it.
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2009 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30# This program is designed to support the building process of Greenstone.
31# It deactivates the collection just built, if the web server is running
32# and is a persistent web server (or if the library_URL provided as
33# parameter to this script is of a currently running web server). It then
34# moves building to index, before activating the collection on the GS2 or
35# GS3 web server again if necessary.
36
37
38BEGIN {
39 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
40 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
41 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
42}
43
44
45use strict;
46no strict 'refs'; # allow filehandles to be variables and vice versa
47no strict 'subs'; # allow barewords (eg STDERR) as function arguments
48
49use File::Basename;
50use File::Find;
51
52use HTTP::Response;
53use LWP::Simple qw($ua !head); # import useragent object as $ua from the full LWP to use along with LWP::Simple
54 # don't import LWP::Simple's head function by name since it can conflict with CGI:head())
55#use CGI qw(:standard); # then only CGI.pm defines a head()
56use Net::Ping;
57use URI;
58
59use colcfg;
60use scriptutil;
61use util;
62#use enum;
63
64# enumerations in perl, http://stackoverflow.com/questions/473666/does-perl-have-an-enumeration-type
65# Unfortunately, not part of perl's core
66#use enum qw(LEVEL_NONE LEVEL_ERROR LEVEL_INFO LEVEL_DEBUG); # debugging levels NONE == 0, ERROR=1 INFO=2 DEBUG=3
67
68# global variables
69#my $default_verbosity = LEVEL_ERROR; # by default we display basic error messages
70
71my $default_verbosity = 2; # by default we display basic error and info messages
72
73sub print_task_msg {
74 my ($task_msg, $verbosity_setting) = @_;
75
76 $verbosity_setting = $default_verbosity unless $verbosity_setting;
77 #$verbosity_setting = 1 unless defined $verbosity;
78 if($verbosity_setting >= 1) {
79 print STDERR "\n";
80 print STDERR "************************\n";
81 print STDERR "* $task_msg\n";
82 print STDERR "************************\n";
83 }
84}
85
86# Prints messages if the verbosity is right. Does not add new lines.
87sub print_msg {
88 my ($msg, $min_verbosity, $verbosity_setting) = @_;
89
90 # only display error messages if the current
91 # verbosity setting >= the minimum verbosity level
92 # needed for that message to be displayed.
93
94 $verbosity_setting = $default_verbosity unless defined $verbosity_setting;
95 $min_verbosity = 1 unless defined $min_verbosity;
96 if($verbosity_setting >= $min_verbosity) { # by default display all 1 messages
97 print STDERR "$msg";
98 }
99}
100
101# Method to send a command to a GS2 or GS3 library_URL
102# the commands used in this script can be activate, deactivate, ping,
103# and is-persistent (is-persistent only implemented for GS2).
104sub config {
105 my ($library_url, $command, $check_message_against_regex, $site) = @_;
106 # Gatherer.java's configGS3Server doesn't use the site variable
107 # so we don't have to either
108
109 # for GS2, getting the HTTP status isn't enough, we need to read the output
110 # since this is what CollectionManager.config() stipulates.
111 # Using LWP::UserAgent::get($url) for this
112
113 if(!defined $library_url) {
114 return 0;
115 }
116 else {
117 $ua->timeout(5); # set LWP useragent to 5s max timeout for testing the URL
118 # Need to set this, else it takes I don't know how long to timeout
119 # http://www.perlmonks.org/?node_id=618534
120
121 # http://search.cpan.org/~gaas/libwww-perl-6.04/lib/LWP/UserAgent.pm
122 # use LWP::UserAgent's get($url) since it returns an HTTP::Response code
123
124 my $response_obj = $ua->get( $library_url.$command);
125
126 # $response_obj->content stores the content and $response_obj->code the HTTP response code
127 my $response_code = $response_obj->code();
128
129 if(LWP::Simple::is_success($response_code)) {# $response_code eq RC_OK) { # LWP::Simple::is_success($response_code)
130 &print_msg("*** Command $library_url$command\n", 3);
131 &print_msg("*** HTTP Response Status: $response_code - Complete.", 3);
132
133 # check the page content is as expected
134 my $response_content = $response_obj->content;
135 if($response_content =~ m/$check_message_against_regex/) {
136 &print_msg(" Response as expected.\n", 3);
137 return 1;
138 } else {
139 &print_msg("\n\tBUT: command $library_url$command response UNEXPECTED.\n", 3);
140 &print_msg("*** Got message:\n$response_content.\n", 4);
141 return 0; # ping on a collection may "not succeed."
142 }
143 }
144 elsif(LWP::Simple::is_error($response_code)) { # method exported by LWP::Simple, along with HTTP::Status constants
145 &print_msg("*** Command $library_url$command\n");
146 &print_msg("*** HTTP Response Status: $response_code - Failed.\n");
147 return 0;
148 }
149 }
150}
151
152sub deactivate_collection {
153 my ($library_url, $gs_mode, $qualified_collection, $site) = @_;
154
155 if($gs_mode eq "gs2") {
156 my $DEACTIVATE_COMMAND = "?a=config&cmd=release-collection&c=";
157 my $check_message_against_regex = q/configured release-collection/;
158 config($library_url, $DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
159 }
160 elsif ($gs_mode eq "gs3") {
161 my $DEACTIVATE_COMMAND = "?a=s&sa=d&st=collection&sn=";
162 my $check_message_against_regex = "collection: $qualified_collection deactivated";
163 config($library_url, $DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, $site);
164 }
165}
166
167sub activate_collection {
168 my ($library_url, $gs_mode, $qualified_collection, $site) = @_;
169
170 if($gs_mode eq "gs2") {
171 my $ACTIVATE_COMMAND = "?a=config&cmd=add-collection&c=";
172 my $check_message_against_regex = q/configured add-collection/;
173 config($library_url, $ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
174 }
175 elsif ($gs_mode eq "gs3") {
176 my $ACTIVATE_COMMAND = "?a=s&sa=a&st=collection&sn=";
177 my $check_message_against_regex = "collection: $qualified_collection activated";
178 config($library_url, $ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, $site);
179 }
180}
181
182# send a pingaction to the GS library
183sub ping_library {
184 my ($library_url, $gs_mode, $site) = @_;
185
186 if($gs_mode eq "gs2") {
187 my $command = "?a=ping";
188 # "Ping" ... "succeeded" expected on success. (Ping on a collection may "not succeed".)
189 my $check_responsemsg_against_regex = q/succeeded/;
190 return config($library_url, $command, $check_responsemsg_against_regex);
191 }
192 elsif ($gs_mode eq "gs3") {
193 my $command = "?a=s&sa=ping";
194 my $check_responsemsg_against_regex = "Ping succeeded."; # no other ping response messages even exist for GS3
195 config($library_url, $command, $check_responsemsg_against_regex, $site);
196 }
197}
198
199# return true if server is persistent, by calling is-persistent on library_url
200# this is only for GS2, since the GS3 server is always persistent
201sub is_persistent {
202 my ($library_url, $gs_mode) = @_;
203
204 if($gs_mode eq "gs3") { # GS3 server is always persistent
205 return 1;
206 }
207
208 my $command = "?a=is-persistent";
209 my $check_responsemsg_against_regex = q/true/; # isPersistent: true versus isPersistent: false
210 return config($library_url, $command, $check_responsemsg_against_regex);
211}
212
213sub get_library_URL {
214 my $gs_mode = shift(@_); # gs3 or gs2
215
216 # If we get here, we are dealing with a server included with GS.
217 # For GS3, we ask ant for the library URL.
218 # For GS2, we derive the URL from the llssite.cfg file.
219
220 my $url = undef;
221
222 if($gs_mode eq "gs2") {
223 my $llssite_cfg = &util::filename_cat($ENV{'GSDLHOME'}, "llssite.cfg");
224
225 if(-f $llssite_cfg) {
226 # check llssite.cfg for line with url property
227 # for server.exe also need to use portnumber and enterlib properties
228
229 # Read in the entire contents of the file in one hit
230 if (!open (FIN, $llssite_cfg)) {
231 &print_msg("activate.pl::get_library_URL failed to open $llssite_cfg ($!)\n");
232 return undef;
233 }
234
235 my $contents;
236 sysread(FIN, $contents, -s FIN);
237 close(FIN);
238
239 my @lines = split(/[\n\r]+/, $contents); # split on carriage-returns and/or linefeeds
240 my $enterlib = "";
241 my $portnumber = ""; # will remain empty (implicit port 80) unless it's specifically been assigned
242
243 foreach my $line (@lines) {
244 if($line =~ m/^url=(.*)$/) {
245 $url = $1;
246 } elsif($line =~ m/^enterlib=(.*)$/) {
247 $enterlib = $1;
248 } elsif($line =~ m/^portnumber=(.*)$/) {
249 $portnumber = $1;
250 }
251 }
252
253 if(!$url) {
254 return undef;
255 }
256 elsif($url eq "URL_pending") { # library is not running
257 # do not process url=URL_pending in the file, since for server.exe
258 # this just means the Enter Library button hasn't been pressed yet
259 $url = undef;
260 }
261 else {
262 # In the case of server.exe, need to do extra work to get the proper URL
263 # But first, need to know whether we're indeed dealing with server.exe:
264
265 # compare the URL's domain to the full URL
266 # E.g. for http://localhost:8383/greenstone3/cgi-bin, the domain is localhost:8383
267 my $uri = URI->new( $url );
268 my $host = $uri->host;
269 #print STDERR "@@@@@ host: $host\n";
270 if($url =~ m/http:\/\/$host(\/)?$/) {
271 #if($url !~ m/http:\/\/$host:$portnumber(\/)?/ || $url =~ m/http:\/\/$host(\/)?$/) {
272 # (if the URL does not contain the portnumber, OR if the port is implicitly 80 and)
273 # If the domain with http:// prefix is completely the same as the URL, assume server.exe
274 # then the actual URL is the result of suffixing the port and enterlib properties in llssite.cfg
275 $url = $url.":".$portnumber.$enterlib;
276 } # else, apache web server
277
278 }
279 }
280 } elsif($gs_mode eq "gs3") {
281 # Either check build.properties for tomcat.server, tomcat.port and app.name (and default servlet name).
282 # app.name is stored in app.path by build.xml. Need to move app.name in build.properties from build.xml
283
284 # Or, run the new target get-default-servlet-url
285 # the output can look like:
286 #
287 # Buildfile: build.xml
288 # [echo] os.name: Windows Vista
289 #
290 # get-default-servlet-url:
291 # [echo] http://localhost:8383/greenstone3/library
292 # BUILD SUCCESSFUL
293 # Total time: 0 seconds
294
295 #my $output = qx/ant get-default-servlet-url/; # backtick operator, to get STDOUT (else 2>&1)
296 # see http://stackoverflow.com/questions/799968/whats-the-difference-between-perls-backticks-system-and-exec
297
298 my $perl_command = "ant get-default-servlet-url"; # assumes activate.pl is run from within the GS3 installation
299
300 if (open(PIN, "$perl_command |")) {
301 while (defined (my $perl_output_line = <PIN>)) {
302 if($perl_output_line =~ m@http:\/\/(\S*)@) { # grab all the non-whitespace chars
303 $url="http://".$1;
304 }
305 }
306 close(PIN);
307 } else {
308 &print_msg("activate.pl::get_library_URL: Failed to run $perl_command to work out library URL for $gs_mode\n");
309 }
310 }
311
312 # either the url is still undef or it is now set
313 #print STDERR "\n@@@@@ final URL:|$url|\n" if $url;
314 #print STDERR "\n@@@@@ URL still undef\n" if !$url;
315 return $url;
316}
317
318### UNUSED METHODS TO MOVE TO util.pm?
319
320# This method is now unused. Using ping_library instead to send the ping action to a
321# GS2/GS3 server. This method can be used more generally to test whether a URL is alive.
322# http://search.cpan.org/dist/libwww-perl/lib/LWP/Simple.pm
323# and http://www.perlmonks.org/?node_id=618534
324sub is_URL_active {
325 my $url = shift(@_); # gs3 or gs2 URL
326
327 my $status = 0;
328 if(defined $url) {
329 $ua->timeout(5); # set LWP useragent to 5s max timeout for testing the URL
330 # Need to set this, else it takes I don't know how long to timeout
331 # http://www.perlmonks.org/?node_id=618534
332
333 $status = LWP::Simple::head($url); # returns empty list of headers if it fails
334 # LWP::Simple::get($url) is more intensive, so don't need to do that
335 #print STDERR "**** $url is alive.\n" if $status;
336 }
337 return $status;
338}
339
340# Pinging seems to always return true, so this method doesn't work
341sub pingHost {
342 my $url = shift(@_); # gs3 or gs2 URL
343
344 my $status = 0;
345 if(defined $url) {
346 # Get just the domain. "http://localhost/gsdl?uq=332033495" becomes "localhost"
347 # "http://localhost/greenstone/cgi-bin/library.cgi" becomes "localhost" too
348
349 #my $host = $url;
350 #$host =~ s@^http:\/\/(www.)?@@;
351 #$host =~ s@\/.*@@;
352 #print STDERR "**** HOST: $host\n";
353
354 # More robust way
355 # http://stackoverflow.com/questions/827024/how-do-i-extract-the-domain-out-of-an-url
356 my $uri = URI->new( $url );
357 my $host = $uri->host;
358
359 # Ping the host. http://perldoc.perl.org/Net/Ping.html
360 my $p = Net::Ping->new();
361 $status = $p->ping($host); # || 0. Appears to set to undef rather than 0
362 print STDERR "**** $host is alive.\n" if $status; #print "$host is alive.\n" if $p->ping($host);
363 $p->close();
364 }
365 # return whether pinging was a success or failure
366 return $status;
367}
368
369
370# Most of the arguments are familiar from the building scripts like buildcol.pl
371# The special optional argument -library_url is for when we're dealing with a web
372# library server such as an apache that's separate from any included with GS2.
373# In such a case, this script's caller should pass in -library_url <URL>.
374#
375# $site argument must be specified in the cmdline for collectionConfig.xml to get
376# generated which makes $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
377sub main
378{
379 my ($argc,@argv) = @_;
380
381 if (($argc==0) || (($argc==1) && ($argv[0] =~ m/^--?h(elp)?$/))) {
382 my ($progname) = ($0 =~ m/^.*[\/|\\](.*?)$/);
383
384
385 print STDERR "\n";
386 print STDERR "Usage: $progname [-collectdir c -builddir b -indexdir i -site s -removeold -keepold -verbosity v\n";
387 print STDERR "\t-library_url URL] <[colgroup/]collection>\n";
388 print STDERR "\n";
389
390 exit(-1);
391 }
392
393 # get the collection details
394 my $qualified_collection = pop @argv; # qualified collection
395
396 my $collect_dir = "collect"; # undef
397 my $build_dir = undef;
398 my $index_dir = undef;
399 my $site = undef;
400
401 my $removeold = 0;
402 my $keepold = 0;
403
404 my $library_url = undef; # to be specified on the cmdline if not using a GS-included web server
405
406 while (my $arg = shift @argv) {
407 if ($arg eq "-collectdir") {
408 $collect_dir = shift @argv;
409 }
410 elsif ($arg eq "-builddir") {
411 $build_dir = shift @argv;
412 }
413 elsif ($arg eq "-indexdir") {
414 $index_dir = shift @argv;
415 }
416 elsif ($arg eq "-site") {
417 $site = shift @argv;
418 }
419 elsif ($arg eq "-removeold") {
420 $removeold = 1;
421 }
422 elsif ($arg eq "-keepold") {
423 $keepold = 1;
424 }
425 elsif ($arg eq "-library_url") {
426 $library_url = shift @argv;
427 }
428 elsif ($arg eq "-verbosity") {
429 $default_verbosity = shift @argv; # global variable
430
431 # ensure we're working with ints not strings (int context not str context), in case verbosity=0
432 # http://stackoverflow.com/questions/288900/how-can-i-convert-a-string-to-a-number-in-perl
433 $default_verbosity = int($default_verbosity || 0); ### is this the best way?
434 }
435 }
436
437 # work out the building and index dirs
438 my $collection_dir = &util::resolve_collection_dir($collect_dir, $qualified_collection, $site);
439 $build_dir = &util::filename_cat($collection_dir, "building") unless (defined $build_dir);
440 $index_dir = &util::filename_cat($collection_dir, "index") unless (defined $index_dir);
441
442 &print_task_msg("Running Collection Activation Stage");
443
444 # can't do anything without a build directory with something in it to move into index
445 if(!&util::dir_exists($build_dir)) {
446 &print_msg("No building folder at $build_dir to move to index.\n");
447 exit -1;
448 } elsif (&util::is_dir_empty($build_dir)) {
449 &print_msg("Nothing in building folder $build_dir to move into index folder.\n");
450 exit -1;
451 }
452
453 # get and check the collection name
454 if ((&colcfg::use_collection($site, $qualified_collection, $collect_dir)) eq "") {
455 &print_msg("Unable to use collection \"$qualified_collection\" within \"$collect_dir\"\n");
456 exit -1;
457 }
458
459 # Read in the collection configuration file.
460 # Beware: Only if $site is specified in the cmdline does collectionConfig.xml get
461 # generated and does $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
462 my ($collect_cfg_filename, $gs_mode) = &colcfg::get_collect_cfg_name(STDERR);
463
464 my $collectcfg = &colcfg::read_collection_cfg ($collect_cfg_filename,$gs_mode);
465
466 # look for build.cfg/buildConfig.xml
467 my $build_cfg_filename ="";
468
469 if ($gs_mode eq "gs2") {
470 $build_cfg_filename = &util::filename_cat($build_dir,"build.cfg");
471 } else {
472 $build_cfg_filename = &util::filename_cat($build_dir, "buildConfig.xml");
473 # gs_mode is GS3. Set the site now if this was not specified as cmdline argument
474 #$site = "localsite" unless defined $site;
475 }
476
477 # We would like to know what the buildtype is
478 # This will be useful for Solr.
479 if (-e $build_cfg_filename) {
480
481 # testing if there has been a change of indexer
482 # (e.g. collect.cfg now says lucene, but build.cfg says mgpp)
483 my $buildcfg = &colcfg::read_building_cfg ($build_cfg_filename, $gs_mode);
484 if ($buildcfg->{'buildtype'} ne $collectcfg->{'buildtype'}) {
485 ### do something? See incremental-buildcol.pl
486 }
487 else {
488 ### do something? See incremental-buildcol.pl
489 }
490 }
491 else { # build.cfg doesn't exist
492 ### do something? See incremental-buildcol.pl
493 }
494
495
496 # Now the logic in GLI's CollectionManager.java (processComplete()
497 # and installCollection()) and Gatherer.configGS3Server().
498
499 # 1. Get library URL
500
501 # For web servers that are external to a Greenstone installation,
502 # the user can pass in their web server's library URL.
503 # For web servers included with GS (like tomcat for GS3 and server.exe
504 # and apache for GS2), we work out the library URL:
505 if(!$library_url) {
506 $library_url = &get_library_URL($gs_mode); # returns undef if no server is running
507 }
508
509 # CollectionManager's installCollection phase in GLI
510 # 2. Ping the library URL, and if it's a persistent server and running, release the collection
511
512 my $is_persistent_server = undef;
513 if($library_url) { # running server
514 &print_msg("Pinging $library_url\n");
515 if($library_url && &ping_library($library_url, $gs_mode, $site)) {
516 # server is running, so release the collection if the server is persistent
517 &print_msg("Checking if Greenstone server is persistent\n");
518 $is_persistent_server = &is_persistent($library_url, $gs_mode);
519 if($is_persistent_server) {
520 &print_msg("De-activating collection $qualified_collection\n");
521 &deactivate_collection($library_url, $gs_mode, $qualified_collection, $site);
522 }
523 }
524 }
525 # 3. Do all the moving building to index stuff now
526
527 # If removeold: replace index dir with building dir.
528 # If keepold: move building's contents into index, where only duplicates will get deleted.
529 # removeold and keepold can't both be on at the same time
530 ($removeold, $keepold) = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
531 0, # incremental is irrelevant to what activate.pl does, setting this = 0
532 $build_dir, # checkdir. Usually archives or export to be deleted. activate.pl deletes building
533 $collectcfg);
534
535 if($removeold) {
536
537 if(&util::dir_exists($index_dir)) {
538 &print_task_msg("Removing \"index\"");
539 &util::rm_r($index_dir);
540
541 # Wait for a couple of seconds, just for luck
542 sleep 2;
543
544 if (&util::dir_exists($index_dir)) {
545 &print_msg("The index directory $index_dir could not be deleted.\n"); # CollectionManager.Index_Not_Deleted
546 }
547 }
548
549 # if remote GS server: gliserver.pl would call activate.pl to activate
550 # the collection at this point since activate.pl lives on the server side
551
552 # Move the building directory to become the new index directory
553 &print_task_msg("Moving \"building\" -> \"index\"");
554 &util::mv($build_dir, $index_dir);
555 if(&util::dir_exists($build_dir) || !&util::dir_exists($index_dir)) {
556 &print_msg("Could not move $build_dir to $index_dir.\n"); # CollectionManager.Build_Not_Moved
557 }
558 }
559 elsif ($keepold) {
560 # Copy just the contents of building dir into the index dir, overwriting
561 # existing files, but don't replace index with building.
562 &print_task_msg("Moving \"building\" -> \"index\"");
563 &util::mv_dir_contents($build_dir, $index_dir);
564 }
565
566
567 # 4. Ping the library URL, and if it's a persistent server and running, activate the collection again
568
569 # Check for success: if building does not exist OR is empty
570 if(!&util::dir_exists($build_dir) || &util::is_dir_empty($build_dir)) {
571 if($library_url) { # undef if no server is running
572 &print_msg("Pinging $library_url\n");
573 if(&ping_library($library_url, $gs_mode, $site)) {
574 # don't need to work out persistency of server more than once
575 if(!defined $is_persistent_server) {
576 &print_msg("Checking if Greenstone server is persistent\n");
577 $is_persistent_server = &is_persistent($library_url, $gs_mode);
578 }
579 if($is_persistent_server) {
580 &print_msg("Activating collection $qualified_collection\n");
581 &activate_collection($library_url, $gs_mode, $qualified_collection, $site);
582 }
583 }
584 }
585 } else { # installcollection failed
586 #CollectionManager.Preview_Ready_Failed
587 &print_msg("Building directory is not empty or still exists. Failed to properly move $build_dir to $index_dir.\n");
588 }
589}
590
591&main(scalar(@ARGV),@ARGV);
592
593
594
595
Note: See TracBrowser for help on using the repository browser.