source: main/trunk/greenstone2/bin/script/activate.pl@ 25573

Last change on this file since 25573 was 25573, checked in by ak19, 12 years ago

Adding in the new activate.pl script to be called at the end of the build process. It moves building to index after first deactivating the collection on the server (if this is running and a persistent server). After the move operation, it then activates the collection on the server again, if the server is still running and is a persistent server.

File size: 22.0 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# activate.pl -- to be called after building a collection to activate it.
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2009 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30# This program is designed to support the building process of Greenstone.
31# It deactivates the collection just built, if the web server is running
32# and is a persistent web server (or if the library_URL provided as
33# parameter to this script is of a currently running web server). It then
34# moves building to index, before activating the collection on the GS2 or
35# GS3 web server again if necessary.
36
37
38BEGIN {
39 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
40 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
41 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
42}
43
44
45use strict;
46no strict 'refs'; # allow filehandles to be variables and vice versa
47no strict 'subs'; # allow barewords (eg STDERR) as function arguments
48
49use File::Basename;
50use File::Find;
51
52use HTTP::Response;
53use LWP::Simple qw($ua !head); # import useragent object as $ua from the full LWP to use along with LWP::Simple
54 # don't import LWP::Simple's head function by name since it can conflict with CGI:head())
55#use CGI qw(:standard); # then only CGI.pm defines a head()
56use Net::Ping;
57use URI;
58
59use colcfg;
60use scriptutil;
61use util;
62
63
64sub print_task_msg {
65 my $task_msg = shift(@_);
66 print STDERR "\n";
67 print STDERR "************************\n";
68 print STDERR "* $task_msg\n";
69 print STDERR "************************\n";
70}
71
72# Method to send a command to a GS2 or GS3 library_URL
73# the commands used in this script can be activate, deactivate, ping,
74# and is-persistent (is-persistent only implemented for GS2).
75sub config {
76 my ($library_url, $command, $check_message_against_regex, $site) = @_;
77 # Gatherer.java's configGS3Server doesn't use the site variable
78 # so we don't have to either
79
80 # for GS2, getting the HTTP status isn't enough, we need to read the output
81 # since this is what CollectionManager.config() stipulates.
82 # Using LWP::UserAgent::get($url) for this
83
84 if(!defined $library_url) {
85 return 0;
86 }
87 else {
88 $ua->timeout(5); # set LWP useragent to 5s max timeout for testing the URL
89 # Need to set this, else it takes I don't know how long to timeout
90 # http://www.perlmonks.org/?node_id=618534
91
92 # http://search.cpan.org/~gaas/libwww-perl-6.04/lib/LWP/UserAgent.pm
93 # use LWP::UserAgent's get($url) since it returns an HTTP::Response code
94
95 my $response_obj = $ua->get( $library_url.$command);
96
97 # $response_obj->content stores the content and $response_obj->code the HTTP response code
98 my $response_code = $response_obj->code();
99
100 if(LWP::Simple::is_success($response_code)) {# $response_code eq RC_OK) { # LWP::Simple::is_success($response_code)
101 print STDERR "*** Command $library_url$command\n";
102 print STDERR "*** HTTP Response Status: $response_code - Complete.";
103
104 # check the page content is as expected
105 my $response_content = $response_obj->content;
106 if($response_content =~ m/$check_message_against_regex/) {
107 print STDERR " Response as expected.\n";
108 return 1;
109 } else {
110 print STDERR "\n\t BUT: command $library_url$command response UNEXPECTED.\n";
111 #print STDERR "*** Got message:\n$response_content.\n";
112 return 0; # ping on a collection may "not succeed."
113 }
114 }
115 elsif(LWP::Simple::is_error($response_code)) { # method exported by LWP::Simple, along with HTTP::Status constants
116 print STDERR "*** $response_code - Failed.\n";
117 return 0;
118 }
119 }
120}
121
122sub deactivate_collection {
123 my ($library_url, $gs_mode, $qualified_collection, $site) = @_;
124
125 if($gs_mode eq "gs2") {
126 my $DEACTIVATE_COMMAND = "?a=config&cmd=release-collection&c=";
127 my $check_message_against_regex = q/configured release-collection/;
128 config($library_url, $DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
129 }
130 elsif ($gs_mode eq "gs3") {
131 my $DEACTIVATE_COMMAND = "?a=s&sa=d&st=collection&sn=";
132 my $check_message_against_regex = "collection: $qualified_collection deactivated";
133 config($library_url, $DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, $site);
134 }
135}
136
137sub activate_collection {
138 my ($library_url, $gs_mode, $qualified_collection, $site) = @_;
139
140 if($gs_mode eq "gs2") {
141 my $ACTIVATE_COMMAND = "?a=config&cmd=add-collection&c=";
142 my $check_message_against_regex = q/configured add-collection/;
143 config($library_url, $ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
144 }
145 elsif ($gs_mode eq "gs3") {
146 my $ACTIVATE_COMMAND = "?a=s&sa=a&st=collection&sn=";
147 my $check_message_against_regex = "collection: $qualified_collection activated";
148 config($library_url, $ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, $site);
149 }
150}
151
152# send a pingaction to the GS library
153sub ping_library {
154 my ($library_url, $gs_mode, $site) = @_;
155
156 if($gs_mode eq "gs2") {
157 my $command = "?a=ping";
158 # "Ping" ... "succeeded" expected on success. (Ping on a collection may "not succeed".)
159 my $check_responsemsg_against_regex = q/succeeded/;
160 return config($library_url, $command, $check_responsemsg_against_regex);
161 }
162 elsif ($gs_mode eq "gs3") {
163 my $command = "?a=s&sa=ping";
164 my $check_responsemsg_against_regex = "Ping succeeded."; # no other ping response messages even exist for GS3
165 config($library_url, $command, $check_responsemsg_against_regex, $site);
166 }
167}
168
169# return true if server is persistent, by calling is-persistent on library_url
170# this is only for GS2, since the GS3 server is always persistent
171sub is_persistent {
172 my ($library_url, $gs_mode) = @_;
173
174 if($gs_mode eq "gs3") { # GS3 server is always persistent
175 return 1;
176 }
177
178 my $command = "?a=is-persistent";
179 my $check_responsemsg_against_regex = q/true/; # isPersistent: true versus isPersistent: false
180 return config($library_url, $command, $check_responsemsg_against_regex);
181}
182
183sub get_library_URL {
184 my $gs_mode = shift(@_); # gs3 or gs2
185
186 # If we get here, we are dealing with a server included with GS.
187 # For GS3, we ask ant for the library URL.
188 # For GS2, we derive the URL from the llssite.cfg file.
189
190 my $url = undef;
191
192 if($gs_mode eq "gs2") {
193 my $llssite_cfg = &util::filename_cat($ENV{'GSDLHOME'}, "llssite.cfg");
194
195 if(-f $llssite_cfg) {
196 # check llssite.cfg for line with url property
197 # for server.exe also need to use portnumber and enterlib properties
198
199 # Read in the entire contents of the file in one hit
200 if (!open (FIN, $llssite_cfg)) {
201 print STDERR "activate.pl::get_library_URL failed to open $llssite_cfg ($!)\n";
202 return undef;
203 }
204
205 my $contents;
206 sysread(FIN, $contents, -s FIN);
207 close(FIN);
208
209 my @lines = split(/[\n\r]+/, $contents); # split on carriage-returns and/or linefeeds
210 my $enterlib = "";
211 my $portnumber = ""; # will remain empty (implicit port 80) unless it's specifically been assigned
212
213 foreach my $line (@lines) {
214 if($line =~ m/^url=(.*)$/) {
215 $url = $1;
216 } elsif($line =~ m/^enterlib=(.*)$/) {
217 $enterlib = $1;
218 } elsif($line =~ m/^portnumber=(.*)$/) {
219 $portnumber = $1;
220 }
221 }
222
223 if(!$url) {
224 return undef;
225 }
226 elsif($url eq "URL_pending") { # library is not running
227 # do not process url=URL_pending in the file, since for server.exe
228 # this just means the Enter Library button hasn't been pressed yet
229 $url = undef;
230 }
231 else {
232 # In the case of server.exe, need to do extra work to get the proper URL
233 # But first, need to know whether we're indeed dealing with server.exe:
234
235 # compare the URL's domain to the full URL
236 # E.g. for http://localhost:8383/greenstone3/cgi-bin, the domain is localhost:8383
237 my $uri = URI->new( $url );
238 my $host = $uri->host;
239 #print STDERR "@@@@@ host: $host\n";
240 if($url =~ m/http:\/\/$host(\/)?$/) {
241 #if($url !~ m/http:\/\/$host:$portnumber(\/)?/ || $url =~ m/http:\/\/$host(\/)?$/) {
242 # (if the URL does not contain the portnumber, OR if the port is implicitly 80 and)
243 # If the domain with http:// prefix is completely the same as the URL, assume server.exe
244 # then the actual URL is the result of suffixing the port and enterlib properties in llssite.cfg
245 $url = $url.":".$portnumber.$enterlib;
246 } # else, apache web server
247
248 }
249 }
250 } elsif($gs_mode eq "gs3") {
251 # Either check build.properties for tomcat.server, tomcat.port and app.name (and default servlet name).
252 # app.name is stored in app.path by build.xml. Need to move app.name in build.properties from build.xml
253
254 # Or, run the new target get-default-servlet-url
255 # the output can look like:
256 #
257 # Buildfile: build.xml
258 # [echo] os.name: Windows Vista
259 #
260 # get-default-servlet-url:
261 # [echo] http://localhost:8383/greenstone3/library
262 # BUILD SUCCESSFUL
263 # Total time: 0 seconds
264
265 #my $output = qx/ant get-default-servlet-url/; # backtick operator, to get STDOUT (else 2>&1)
266 # see http://stackoverflow.com/questions/799968/whats-the-difference-between-perls-backticks-system-and-exec
267
268 my $perl_command = "ant get-default-servlet-url"; # assumes activate.pl is run from within the GS3 installation
269
270 if (open(PIN, "$perl_command |")) {
271 while (defined (my $perl_output_line = <PIN>)) {
272 if($perl_output_line =~ m@http:\/\/(\S*)@) { # grab all the non-whitespace chars
273 $url="http://".$1;
274 }
275 }
276 close(PIN);
277 } else {
278 print STDERR "**** activate.pl::get_library_URL: Failed to run $perl_command to work out library URL for $gs_mode\n";
279 }
280 }
281
282 # either the url is still undef or it is now set
283 #print STDERR "\n@@@@@ final URL:|$url|\n" if $url;
284 #print STDERR "\n@@@@@ URL still undef\n" if !$url;
285 return $url;
286}
287
288### METHODS TO MOVE TO util.pm?
289
290# Given the qualified collection name (colgroup/collection),
291# returns the collection and colgroup parts
292sub get_collection_parts {
293 # http://perldoc.perl.org/File/Basename.html
294 # my($filename, $directories, $suffix) = fileparse($path);
295 # "$directories contains everything up to and including the last directory separator in the $path
296 # including the volume (if applicable). The remainder of the $path is the $filename."
297 #my ($collection, $colgroup) = &File::Basename::fileparse($qualified_collection);
298
299 my $qualified_collection = shift(@_);
300
301 # Since activate.pl can be launched from the command-line, including by a user,
302 # best not to assume colgroup uses URL-style slashes as would be the case with GLI
303 # Also allow for the accidental inclusion of multiple slashes
304 my ($colgroup, $collection) = split(/[\/\\]+/, $qualified_collection); #split('/', $qualified_collection);
305
306 if(!defined $collection) {
307 $collection = $colgroup;
308 $colgroup = "";
309 }
310 return ($collection, $colgroup);
311}
312
313# work out the "collectdir/collection" location
314sub resolve_collection_dir {
315 my ($collect_dir, $qualified_collection, $site) = @_; #, $gs_mode
316
317 my ($colgroup, $collection) = get_collection_parts($qualified_collection);
318
319 if (defined $collect_dir) {
320 return &util::filename_cat($collect_dir,$colgroup, $collection);
321 }
322 else {
323 if (defined $site) {
324 return &util::filename_cat($ENV{'GSDL3HOME'},"sites",$site,"collect",$colgroup, $collection);
325 }
326 else {
327 return &util::filename_cat($ENV{'GSDLHOME'},"collect",$colgroup, $collection);
328 }
329 }
330}
331
332### UNUSED METHODS TO MOVE TO util.pm?
333
334# This method is now unused. Using ping_library instead to send the ping action to a
335# GS2/GS3 server. This method can be used more generally to test whether a URL is alive.
336# http://search.cpan.org/dist/libwww-perl/lib/LWP/Simple.pm
337# and http://www.perlmonks.org/?node_id=618534
338sub is_URL_active {
339 my $url = shift(@_); # gs3 or gs2 URL
340
341 my $status = 0;
342 if(defined $url) {
343 $ua->timeout(5); # set LWP useragent to 5s max timeout for testing the URL
344 # Need to set this, else it takes I don't know how long to timeout
345 # http://www.perlmonks.org/?node_id=618534
346
347 $status = LWP::Simple::head($url); # returns empty list of headers if it fails
348 # LWP::Simple::get($url) is more intensive, so don't need to do that
349 #print STDERR "**** $url is alive.\n" if $status;
350 }
351 return $status;
352}
353
354# Pinging seems to always return true, so this method doesn't work
355sub pingHost {
356 my $url = shift(@_); # gs3 or gs2 URL
357
358 my $status = 0;
359 if(defined $url) {
360 # Get just the domain. "http://localhost/gsdl?uq=332033495" becomes "localhost"
361 # "http://localhost/greenstone/cgi-bin/library.cgi" becomes "localhost" too
362
363 #my $host = $url;
364 #$host =~ s@^http:\/\/(www.)?@@;
365 #$host =~ s@\/.*@@;
366 #print STDERR "**** HOST: $host\n";
367
368 # More robust way
369 # http://stackoverflow.com/questions/827024/how-do-i-extract-the-domain-out-of-an-url
370 my $uri = URI->new( $url );
371 my $host = $uri->host;
372
373 # Ping the host. http://perldoc.perl.org/Net/Ping.html
374 my $p = Net::Ping->new();
375 $status = $p->ping($host); # || 0. Appears to set to undef rather than 0
376 print STDERR "**** $host is alive.\n" if $status; #print "$host is alive.\n" if $p->ping($host);
377 $p->close();
378 }
379 # return whether pinging was a success or failure
380 return $status;
381}
382
383
384# Most of the arguments are familiar from the building scripts like buildcol.pl
385# The special optional argument -library_url is for when we're dealing with a web
386# library server such as an apache that's separate from any included with GS2.
387# In such a case, this script's caller should pass in -library_url <URL>.
388#
389# $site argument must be specified in the cmdline for collectionConfig.xml to get
390# generated which makes $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
391sub main
392{
393 my ($argc,@argv) = @_;
394
395 if (($argc==0) || (($argc==1) && ($argv[0] =~ m/^--?h(elp)?$/))) {
396 my ($progname) = ($0 =~ m/^.*[\/|\\](.*?)$/);
397
398
399 print STDERR "\n";
400 print STDERR "Usage: $progname [-collectdir c -builddir b -indexdir i -site s -removeold -keepold -library_url URL] <[colgroup/]collection>\n";
401 print STDERR "\n";
402
403 exit(-1);
404 }
405
406 # get the collection details
407 my $qualified_collection = pop @argv; # qualified collection
408
409 my $collect_dir = "collect"; # undef
410 my $build_dir = undef;
411 my $index_dir = undef;
412 my $site = undef;
413
414 my $removeold = 0;
415 my $keepold = 0;
416
417 my $library_url = undef; # to be specified on the cmdline if not using a GS-included web server
418
419
420 while (my $arg = shift @argv) {
421 if ($arg eq "-collectdir") {
422 $collect_dir = shift @argv;
423 }
424 elsif ($arg eq "-builddir") {
425 $build_dir = shift @argv;
426 }
427 elsif ($arg eq "-indexdir") {
428 $index_dir = shift @argv;
429 }
430 elsif ($arg eq "-site") {
431 $site = shift @argv;
432 }
433 elsif ($arg eq "-removeold") {
434 $removeold = 1;
435 }
436 elsif ($arg eq "-keepold") {
437 $keepold = 1;
438 }
439 elsif ($arg eq "-library_url") {
440 $library_url = shift @argv;
441 }
442 }
443
444 # work out the building and index dirs
445 #$build_dir = &util::filename_cat($collect_dir, $qualified_collection, "building") unless (defined $build_dir);
446 #$index_dir = &util::filename_cat($collect_dir, $qualified_collection, "index") unless (defined $index_dir);
447 my $collection_dir = &resolve_collection_dir($collect_dir, $qualified_collection, $site);
448 $build_dir = &util::filename_cat($collection_dir, "building") unless (defined $build_dir);
449 $index_dir = &util::filename_cat($collection_dir, "index") unless (defined $index_dir);
450
451 &print_task_msg("Running Collection Activation Stage");
452
453 # can't do anything without a build directory with something in it to move into index
454 if(!&util::dir_exists($build_dir)) {
455 print STDERR "Can't move building to index: no building folder at $build_dir\n";
456 exit -1;
457 } elsif (&util::is_dir_empty($build_dir)) {
458 print STDERR "Nothing in building folder at $build_dir to move to index folder\n";
459 exit -1;
460 }
461
462 # get and check the collection name
463 if ((&colcfg::use_collection($site, $qualified_collection, $collect_dir)) eq "") {
464 print STDERR "Unable to use collection \"$qualified_collection\" within \"$collect_dir\"\n";
465 exit -1;
466 }
467
468 # Read in the collection configuration file.
469 # Beware: Only if $site is specified in the cmdline does collectionConfig.xml get
470 # generated and does $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
471 my ($collect_cfg_filename, $gs_mode) = &colcfg::get_collect_cfg_name(STDERR);
472
473 my $collectcfg = &colcfg::read_collection_cfg ($collect_cfg_filename,$gs_mode);
474
475 # look for build.cfg/buildConfig.xml
476 my $build_cfg_filename ="";
477
478 if ($gs_mode eq "gs2") {
479 $build_cfg_filename = &util::filename_cat($build_dir,"build.cfg");
480 } else {
481 $build_cfg_filename = &util::filename_cat($build_dir, "buildConfig.xml");
482 # gs_mode is GS3. Set the site now if this was not specified as cmdline argument
483 #$site = "localsite" unless defined $site;
484 }
485
486 # We would like to know what the buildtype is
487 # This will be useful for Solr.
488 if (-e $build_cfg_filename) {
489
490 # testing if there has been a change of indexer
491 # (e.g. collect.cfg now says lucene, but build.cfg says mgpp)
492 my $buildcfg = &colcfg::read_building_cfg ($build_cfg_filename, $gs_mode);
493 if ($buildcfg->{'buildtype'} ne $collectcfg->{'buildtype'}) {
494 ### do something? See incremental-buildcol.pl
495 }
496 else {
497 ### do something? See incremental-buildcol.pl
498 }
499 }
500 else { # build.cfg doesn't exist
501 ### do something? See incremental-buildcol.pl
502 }
503
504
505 # Now the logic in GLI's CollectionManager.java (processComplete()
506 # and installCollection()) and Gatherer.configGS3Server().
507
508 # 1. Get library URL
509
510 # For web servers that are external to a Greenstone installation,
511 # the user can pass in their web server's library URL.
512 # For web servers included with GS (like tomcat for GS3 and server.exe
513 # and apache for GS2), we work out the library URL:
514 if(!$library_url) {
515 $library_url = &get_library_URL($gs_mode);
516 }
517
518 # CollectionManager's installCollection phase in GLI
519 # 2. Ping the library URL, and if it's a persistent server and running, release the collection
520
521 my $is_persistent_server = undef;
522 if(&ping_library($library_url, $gs_mode, $site)) {
523 # server is running, so release the collection if the server is persistent
524 $is_persistent_server = &is_persistent($library_url, $gs_mode);
525 if($is_persistent_server) {
526 &deactivate_collection($library_url, $gs_mode, $qualified_collection, $site);
527 }
528 }
529
530 # 3. Do all the moving building to index stuff now
531
532 ### If removeold: replace index dir with building dir
533 # if keepold: move building's contents into index, where only duplicates will get deleted
534
535 #if(!$removeold && !$keepold) {
536 # $removeold = 1;
537 #} elsif($removeold && $keepold) {
538 # $keepold = 0;
539 #}
540
541 ($removeold, $keepold) = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
542 0, # incremental is irrelevant to what activate.pl does, setting this = 0
543 $build_dir, # checkdir. Usually archives or export to be deleted. activate.pl deletes building
544 $collectcfg);
545
546 if($removeold) {
547
548 if(&util::dir_exists($index_dir)) {
549 &print_task_msg("Removing \"index\"");
550 &util::rm_r($index_dir);
551
552 # Wait for a couple of seconds, just for luck
553 sleep 2;
554
555 if (&util::dir_exists($index_dir)) {
556 print STDERR "The index directory $index_dir could not be deleted.\n"; # CollectionManager.Index_Not_Deleted
557 }
558 }
559
560 # if remote GS server: gliserver.pl would call activate.pl to activate
561 # the collection at this point since activate.pl lives on the server side
562
563 # Move the building directory to become the new index directory
564 &print_task_msg("Moving \"building\" -> \"index\"");
565 &util::mv($build_dir, $index_dir);
566 if(&util::dir_exists($build_dir) || !&util::dir_exists($index_dir)) {
567 print STDERR "Could not move move $build_dir to $index_dir.\n"; # CollectionManager.Build_Not_Moved
568 }
569 }
570 elsif ($keepold) {
571 # Copy just the contents of building dir into the index dir, overwriting
572 # existing files, but don't replace index with building.
573 &print_task_msg("Moving \"building\" -> \"index\"");
574 &util::mv_dir_contents($build_dir, $index_dir);
575 }
576
577
578 # 4. Ping the library URL, and if it's a persistent server and running, activate the collection again
579
580 # Check for success: if building does not exist OR is empty
581 if(!&util::dir_exists($build_dir) || &util::is_dir_empty($build_dir)) {
582 if(&ping_library($library_url, $gs_mode, $site)) {
583 # don't need to work out persistency of server more than once
584 $is_persistent_server = &is_persistent($library_url, $gs_mode) unless (defined $is_persistent_server);
585 if($is_persistent_server) {
586 &activate_collection($library_url, $gs_mode, $qualified_collection, $site);
587 }
588 }
589 } else { # installcollection failed
590 #CollectionManager.Preview_Ready_Failed
591 print STDERR "Building directory is not empty or still exists. Failed to properly move $build_dir to $index_dir.\n";
592 }
593}
594
595&main(scalar(@ARGV),@ARGV);
596
597
598
599
Note: See TracBrowser for help on using the repository browser.