source: main/trunk/greenstone2/bin/script/activate.pl@ 29190

Last change on this file since 29190 was 29190, checked in by ak19, 10 years ago

Suppressing the ping collection 'did not succeed' message in silent mode. Using silent mode when the collection's been deactivated and we expect pinging a collection to return false.

  • Property svn:executable set to *
File size: 31.5 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# activate.pl -- to be called after building a collection to activate it.
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2009 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30# This program is designed to support the building process of Greenstone.
31# It deactivates the collection just built, if the web server is running
32# and is a persistent web server (or if the library_URL provided as
33# parameter to this script is of a currently running web server). It then
34# moves building to index, before activating the collection on the GS2 or
35# GS3 web server again if necessary.
36
37use Config;
38
39BEGIN {
40 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
41 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
42 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
43 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
44
45 # Adding cpan in, adds in its auto subfolder which conflicts with ActivePerl on Windows
46 # The auto folder has been moved into a perl-5.8 folder, and this will now be included
47 # only if the current version of perl is 5.8 (and not ActivePerl).
48 my $perl_dir;
49
50 # Note: $] encodes the version number of perl
51 if ($]>=5.010) {
52 $perl_dir="perl-5.".substr($],3,2);
53 }
54 elsif ($]>5.008) {
55 # perl 5.8.1 or above
56 $perl_dir = "perl-5.8";
57 }
58 elsif ($]>=5.008) {
59 # perl 5.8.1 or above
60 $perl_dir = "perl-5.8";
61 }
62 elsif ($]<5.008) {
63 # assume perl 5.6
64 $perl_dir = "perl-5.6";
65 }
66 else {
67 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
68 print STDERR " Please upgrade to a newer version of Perl.\n";
69 $perl_dir = "perl-5.8";
70 }
71
72 #if ($ENV{'GSDLOS'} !~ /^windows$/i) {
73 # Use push to put this on the end, so an existing XML::Parser will be used by default
74 if (-d "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir-mt" && $Config{usethreads}){
75 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir-mt");
76 }
77 else{
78 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
79 }
80 #}
81
82}
83
84
85use strict;
86no strict 'refs'; # allow filehandles to be variables and vice versa
87no strict 'subs'; # allow barewords (eg STDERR) as function arguments
88
89use File::Basename;
90use File::Find;
91
92use HTTP::Response;
93use LWP::Simple qw($ua !head); # import useragent object as $ua from the full LWP to use along with LWP::Simple
94 # don't import LWP::Simple's head function by name since it can conflict with CGI:head())
95#use CGI qw(:standard); # then only CGI.pm defines a head()
96use Net::Ping;
97use URI;
98
99use colcfg;
100use scriptutil;
101use util;
102#use enum;
103
104# enumerations in perl, http://stackoverflow.com/questions/473666/does-perl-have-an-enumeration-type
105# Unfortunately, not part of perl's core
106#use enum qw(LEVEL_NONE LEVEL_ERROR LEVEL_INFO LEVEL_DEBUG); # debugging levels NONE == 0, ERROR=1 INFO=2 DEBUG=3
107
108# global variables
109#my $default_verbosity = LEVEL_ERROR; # by default we display basic error messages
110
111my $default_verbosity = 2; # by default we display basic error and info messages
112
113sub print_task_msg {
114 my ($task_msg, $verbosity_setting) = @_;
115
116 $verbosity_setting = $default_verbosity unless $verbosity_setting;
117 #$verbosity_setting = 1 unless defined $verbosity;
118 if($verbosity_setting >= 1) {
119 print STDERR "\n";
120 print STDERR "************************\n";
121 print STDERR "* $task_msg\n";
122 print STDERR "************************\n";
123 }
124}
125
126# Prints messages if the verbosity is right. Does not add new lines.
127sub print_msg {
128 my ($msg, $min_verbosity, $verbosity_setting) = @_;
129
130 # only display error messages if the current
131 # verbosity setting >= the minimum verbosity level
132 # needed for that message to be displayed.
133
134 $verbosity_setting = $default_verbosity unless defined $verbosity_setting;
135 $min_verbosity = 1 unless defined $min_verbosity;
136 if($verbosity_setting >= $min_verbosity) { # by default display all 1 messages
137 print STDERR "$msg";
138 }
139}
140
141# Method to send a command to a GS2 or GS3 library_URL
142# the commands used in this script can be activate, deactivate, ping,
143# and is-persistent (is-persistent only implemented for GS2).
144sub config {
145 my ($library_url, $command, $check_message_against_regex, $site, $expected_error_code, $silent) = @_;
146 # Gatherer.java's configGS3Server doesn't use the site variable
147 # so we don't have to either
148
149 # for GS2, getting the HTTP status isn't enough, we need to read the output
150 # since this is what CollectionManager.config() stipulates.
151 # Using LWP::UserAgent::get($url) for this
152
153 if(!defined $library_url) {
154 return 0;
155 }
156 else {
157 $ua->timeout(5); # set LWP useragent to 5s max timeout for testing the URL
158 # Need to set this, else it takes I don't know how long to timeout
159 # http://www.perlmonks.org/?node_id=618534
160
161 # http://search.cpan.org/~gaas/libwww-perl-6.04/lib/LWP/UserAgent.pm
162 # use LWP::UserAgent's get($url) since it returns an HTTP::Response code
163
164 my $response_obj = $ua->get( $library_url.$command);
165
166 # $response_obj->content stores the content and $response_obj->code the HTTP response code
167 my $response_code = $response_obj->code();
168
169 if(LWP::Simple::is_success($response_code)) {# $response_code eq RC_OK) { # LWP::Simple::is_success($response_code)
170 &print_msg("*** Command $library_url$command\n", 3);
171 &print_msg("*** HTTP Response Status: $response_code - Complete.", 3);
172
173 # check the page content is as expected
174 my $response_content = $response_obj->content;
175 if($response_content =~ m/$check_message_against_regex/) {
176 &print_msg(" Response as expected.\n", 3);
177 return 1;
178 } else {
179 # if we expect the collection to be inactive, then we'd be in silent mode: if so,
180 # don't print out the "ping did not succeed" response, but print out any other messages
181
182 # So we only suppress the ping col "did not succeed" response if we're in silent mode
183 # But if any message other than ping "did not succeed" is returned, we always print it
184 if($response_content !~ m/did not succeed/ || !$silent) {
185 &print_msg("\n\tBUT: command $library_url$command response UNEXPECTED.\n", 3);
186 &print_msg("*** Got message:\n$response_content.\n", 4);
187 }
188 return 0; # ping on a collection may "not succeed."
189 }
190 }
191 elsif(LWP::Simple::is_error($response_code)) { # method exported by LWP::Simple, along with HTTP::Status constants
192 # check the page content is as expected
193 if(defined $expected_error_code && $response_code == $expected_error_code) {
194 &print_msg(" Response status $response_code as expected.\n", 3);
195 } else {
196 &print_msg("*** Command $library_url$command\n");
197 &print_msg("*** Unexpected error. HTTP Response Status: $response_code - Failed.\n");
198 }
199 return 0; # return false, since the response_code was an error, expected or not
200 }
201 else {
202 &print_msg("*** Command $library_url$command\n");
203 &print_msg("*** Unexpected error. HTTP Response Status: $response_code - Failed.\n");
204 return 0;
205 }
206 }
207}
208
209sub deactivate_collection {
210 my ($library_url, $gs_mode, $qualified_collection, $site) = @_;
211
212 if($gs_mode eq "gs2") {
213 my $DEACTIVATE_COMMAND = "?a=config&cmd=release-collection&c=";
214 my $check_message_against_regex = q/configured release-collection/;
215 config($library_url, $DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
216 }
217 elsif ($gs_mode eq "gs3") {
218 my $DEACTIVATE_COMMAND = "?a=s&sa=d&st=collection&sn=";
219 my $check_message_against_regex = "collection: $qualified_collection deactivated";
220 config($library_url, $DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, $site);
221 }
222}
223
224sub activate_collection {
225 my ($library_url, $gs_mode, $qualified_collection, $site) = @_;
226
227 if($gs_mode eq "gs2") {
228 my $ACTIVATE_COMMAND = "?a=config&cmd=add-collection&c=";
229 my $check_message_against_regex = q/configured add-collection/;
230 config($library_url, $ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
231 }
232 elsif ($gs_mode eq "gs3") {
233 my $ACTIVATE_COMMAND = "?a=s&sa=a&st=collection&sn=";
234 my $check_message_against_regex = "collection: $qualified_collection activated";
235 config($library_url, $ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, $site);
236 }
237}
238
239sub ping {
240 my ($library_url, $command, $gs_mode, $site, $silent) = @_;
241
242 # If the GS server is not running, we *expect* to see a "500" status code.
243 # If the GS server is running, then "Ping" ... "succeeded" is expected on success.
244 # When pinging an inactive collection, it will say it did "not succeed". This is
245 # a message of interest to return.
246 my $check_responsemsg_against_regex = q/(succeeded)/;
247 my $expected_error_code = 500;
248 return config($library_url, $command, $check_responsemsg_against_regex, $site, $expected_error_code, $silent);
249}
250
251# send a pingaction to the GS library. General server-level ping.
252sub ping_library {
253 my ($library_url, $gs_mode, $site) = @_;
254
255 my $command = "";
256 if($gs_mode eq "gs2") {
257 $command = "?a=ping";
258 }
259 elsif ($gs_mode eq "gs3") {
260 $command = "?a=s&sa=ping";
261 }
262 return &ping($library_url, $command, $gs_mode, $site);
263}
264
265
266# send a pingaction to a collection in GS library to check if it's active
267sub ping_library_collection {
268 my ($library_url, $gs_mode, $qualified_collection, $site, $silent) = @_;
269
270 my $command = "";
271 if($gs_mode eq "gs2") {
272 $command = "?a=ping&c=$qualified_collection";
273 }
274 elsif ($gs_mode eq "gs3") {
275 $command = "?a=s&sa=ping&st=collection&sn=$qualified_collection";
276 }
277 return &ping($library_url, $command, $gs_mode, $site, $silent);
278}
279
280# return true if server is persistent, by calling is-persistent on library_url
281# this is only for GS2, since the GS3 server is always persistent
282sub is_persistent {
283 my ($library_url, $gs_mode) = @_;
284
285 if($gs_mode eq "gs3") { # GS3 server is always persistent
286 return 1;
287 }
288
289 my $command = "?a=is-persistent";
290 my $check_responsemsg_against_regex = q/true/; # isPersistent: true versus isPersistent: false
291 return config($library_url, $command, $check_responsemsg_against_regex);
292}
293
294sub get_library_URL {
295 my $gs_mode = shift(@_); # gs3 or gs2
296
297 # If we get here, we are dealing with a server included with GS.
298 # For GS3, we ask ant for the library URL.
299 # For GS2, we derive the URL from the llssite.cfg file.
300
301 my $url = undef;
302
303 if($gs_mode eq "gs2") {
304 my $llssite_cfg = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "llssite.cfg");
305
306 if(-f $llssite_cfg) {
307 # check llssite.cfg for line with url property
308 # for server.exe also need to use portnumber and enterlib properties
309
310 # Read in the entire contents of the file in one hit
311 if (!open (FIN, $llssite_cfg)) {
312 &print_msg("activate.pl::get_library_URL failed to open $llssite_cfg ($!)\n");
313 return undef;
314 }
315
316 my $contents;
317 sysread(FIN, $contents, -s FIN);
318 close(FIN);
319
320 my @lines = split(/[\n\r]+/, $contents); # split on carriage-returns and/or linefeeds
321 my $enterlib = "";
322 my $portnumber = ""; # will remain empty (implicit port 80) unless it's specifically been assigned
323
324 foreach my $line (@lines) {
325 if($line =~ m/^url=(.*)$/) {
326 $url = $1;
327 } elsif($line =~ m/^enterlib=(.*)$/) {
328 $enterlib = $1;
329 } elsif($line =~ m/^portnumber=(.*)$/) {
330 $portnumber = $1;
331 }
332 }
333
334 if(!$url) {
335 return undef;
336 }
337 elsif($url eq "URL_pending") { # library is not running
338 # do not process url=URL_pending in the file, since for server.exe
339 # this just means the Enter Library button hasn't been pressed yet
340 $url = undef;
341 }
342 else {
343 # In the case of server.exe, need to do extra work to get the proper URL
344 # But first, need to know whether we're indeed dealing with server.exe:
345
346 # compare the URL's domain to the full URL
347 # E.g. for http://localhost:8383/greenstone3/cgi-bin, the domain is localhost:8383
348 my $uri = URI->new( $url );
349 my $host = $uri->host;
350 #print STDERR "@@@@@ host: $host\n";
351 if($url =~ m/http:\/\/$host(\/)?$/) {
352 #if($url !~ m/http:\/\/$host:$portnumber(\/)?/ || $url =~ m/http:\/\/$host(\/)?$/) {
353 # (if the URL does not contain the portnumber, OR if the port is implicitly 80 and)
354 # If the domain with http:// prefix is completely the same as the URL, assume server.exe
355 # then the actual URL is the result of suffixing the port and enterlib properties in llssite.cfg
356 $url = $url.":".$portnumber.$enterlib;
357 } # else, apache web server
358
359 }
360 }
361 } elsif($gs_mode eq "gs3") {
362 # Either check build.properties for tomcat.server, tomcat.port and app.name (and default servlet name).
363 # app.name is stored in app.path by build.xml. Need to move app.name in build.properties from build.xml
364
365 # Or, run the new target get-default-servlet-url
366 # the output can look like:
367 #
368 # Buildfile: build.xml
369 # [echo] os.name: Windows Vista
370 #
371 # get-default-servlet-url:
372 # [echo] http://localhost:8383/greenstone3/library
373 # BUILD SUCCESSFUL
374 # Total time: 0 seconds
375
376 #my $output = qx/ant get-default-servlet-url/; # backtick operator, to get STDOUT (else 2>&1)
377 # see http://stackoverflow.com/questions/799968/whats-the-difference-between-perls-backticks-system-and-exec
378
379 # The get-default-servlet-url ant target can be run from anywhere by specifying the
380 # location of GS3's ant build.xml buildfile. Activate.pl can be run from anywhere for GS3
381 # GSDL3SRCHOME will be set for GS3 by gs3-setup.sh, a step that would have been necessary
382 # to run the activate.pl script in the first place
383 my $perl_command = "ant -buildfile \"$ENV{'GSDL3SRCHOME'}/build.xml\" get-default-servlet-url";
384
385 if (open(PIN, "$perl_command |")) {
386 while (defined (my $perl_output_line = <PIN>)) {
387 if($perl_output_line =~ m@http:\/\/(\S*)@) { # grab all the non-whitespace chars
388 $url="http://".$1;
389 }
390 }
391 close(PIN);
392 } else {
393 &print_msg("activate.pl::get_library_URL: Failed to run $perl_command to work out library URL for $gs_mode\n");
394 }
395 }
396
397 # either the url is still undef or it is now set
398 #print STDERR "\n@@@@@ final URL:|$url|\n" if $url;
399 #print STDERR "\n@@@@@ URL still undef\n" if !$url;
400 return $url;
401}
402
403### UNUSED METHODS TO MOVE TO util.pm?
404
405# This method is now unused. Using ping_library instead to send the ping action to a
406# GS2/GS3 server. This method can be used more generally to test whether a URL is alive.
407# http://search.cpan.org/dist/libwww-perl/lib/LWP/Simple.pm
408# and http://www.perlmonks.org/?node_id=618534
409sub is_URL_active {
410 my $url = shift(@_); # gs3 or gs2 URL
411
412 my $status = 0;
413 if(defined $url) {
414 $ua->timeout(10); # set LWP useragent to 5s max timeout for testing the URL
415 # Need to set this, else it takes I don't know how long to timeout
416 # http://www.perlmonks.org/?node_id=618534
417
418 $status = LWP::Simple::head($url); # returns empty list of headers if it fails
419 # LWP::Simple::get($url) is more intensive, so don't need to do that
420 #print STDERR "**** $url is alive.\n" if $status;
421 }
422 return $status;
423}
424
425# Pinging seems to always return true, so this method doesn't work
426sub pingHost {
427 my $url = shift(@_); # gs3 or gs2 URL
428
429 my $status = 0;
430 if(defined $url) {
431 # Get just the domain. "http://localhost/gsdl?uq=332033495" becomes "localhost"
432 # "http://localhost/greenstone/cgi-bin/library.cgi" becomes "localhost" too
433
434 #my $host = $url;
435 #$host =~ s@^http:\/\/(www.)?@@;
436 #$host =~ s@\/.*@@;
437 #print STDERR "**** HOST: $host\n";
438
439 # More robust way
440 # http://stackoverflow.com/questions/827024/how-do-i-extract-the-domain-out-of-an-url
441 my $uri = URI->new( $url );
442 my $host = $uri->host;
443
444 # Ping the host. http://perldoc.perl.org/Net/Ping.html
445 my $p = Net::Ping->new();
446 $status = $p->ping($host); # || 0. Appears to set to undef rather than 0
447 print STDERR "**** $host is alive.\n" if $status; #print "$host is alive.\n" if $p->ping($host);
448 $p->close();
449 }
450 # return whether pinging was a success or failure
451 return $status;
452}
453
454
455# Most of the arguments are familiar from the building scripts like buildcol.pl
456# The special optional argument -library_url is for when we're dealing with a web
457# library server such as an apache that's separate from any included with GS2.
458# In such a case, this script's caller should pass in -library_url <URL>.
459#
460# $site argument must be specified in the cmdline for collectionConfig.xml to get
461# generated which makes $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
462sub main
463{
464 my ($argc,@argv) = @_;
465
466 if (($argc==0) || (($argc==1) && ($argv[0] =~ m/^--?h(elp)?$/))) {
467 my ($progname) = ($0 =~ m/^.*[\/|\\](.*?)$/);
468
469
470 print STDERR "\n";
471 print STDERR "Usage: $progname [-collectdir c -builddir b -indexdir i -site s -removeold -keepold -verbosity v\n";
472 print STDERR "\t-library_url URL] <[colgroup/]collection>\n";
473 print STDERR "\n";
474
475 exit(-1);
476 }
477
478 # get the collection details
479 my $qualified_collection = pop @argv; # qualified collection
480
481 my $collect_dir = undef; #"collect"; # can't be "collect" when only -site is provided for GS3
482 my $build_dir = undef;
483 my $index_dir = undef;
484 my $site = undef;
485
486 my $removeold = 0;
487 my $keepold = 0;
488 my $incremental = 0; # used by solr
489
490 my $library_url = undef; # to be specified on the cmdline if not using a GS-included web server
491
492 while (my $arg = shift @argv) {
493 if ($arg eq "-collectdir") {
494 $collect_dir = shift @argv;
495 }
496 elsif ($arg eq "-builddir") {
497 $build_dir = shift @argv;
498 }
499 elsif ($arg eq "-indexdir") {
500 $index_dir = shift @argv;
501 }
502 elsif ($arg eq "-site") {
503 $site = shift @argv;
504 }
505 elsif ($arg eq "-removeold") {
506 $removeold = 1;
507 }
508 elsif ($arg eq "-keepold") {
509 $keepold = 1;
510 }
511 elsif ($arg eq "-incremental") {
512 $incremental = 1;
513 }
514 elsif ($arg eq "-library_url") {
515 $library_url = shift @argv;
516 }
517 elsif ($arg eq "-verbosity") {
518 $default_verbosity = shift @argv; # global variable
519
520 # ensure we're working with ints not strings (int context not str context), in case verbosity=0
521 # http://stackoverflow.com/questions/288900/how-can-i-convert-a-string-to-a-number-in-perl
522 $default_verbosity = int($default_verbosity || 0); ### is this the best way?
523 }
524 }
525
526 # work out the building and index dirs
527 my $collection_dir = &util::resolve_collection_dir($collect_dir, $qualified_collection, $site);
528 $build_dir = &FileUtils::filenameConcatenate($collection_dir, "building") unless (defined $build_dir);
529 $index_dir = &FileUtils::filenameConcatenate($collection_dir, "index") unless (defined $index_dir);
530
531 &print_task_msg("Running Collection Activation Stage");
532
533 # get and check the collection name
534 if ((&colcfg::use_collection($site, $qualified_collection, $collect_dir)) eq "") {
535 &print_msg("Unable to use collection \"$qualified_collection\" within \"$collect_dir\"\n");
536 exit -1;
537 }
538
539 # Read in the collection configuration file.
540 # Beware: Only if $site is specified in the cmdline does collectionConfig.xml get
541 # generated and does $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
542 my $gs_mode = "gs2";
543 if ((defined $site) && ($site ne "")) { # GS3
544 $gs_mode = "gs3";
545 }
546 my $collect_cfg_filename = &colcfg::get_collect_cfg_name(STDERR, $gs_mode);
547 my $collectcfg = &colcfg::read_collection_cfg ($collect_cfg_filename,$gs_mode);
548
549 # look for build.cfg/buildConfig.xml
550 my $build_cfg_filename ="";
551
552 if ($gs_mode eq "gs2") {
553 $build_cfg_filename = &FileUtils::filenameConcatenate($build_dir,"build.cfg");
554 } else {
555 $build_cfg_filename = &FileUtils::filenameConcatenate($build_dir, "buildConfig.xml");
556 # gs_mode is GS3. Set the site now if this was not specified as cmdline argument
557 #$site = "localsite" unless defined $site;
558 }
559
560 # We need to know the buildtype for Solr.
561 # Any change of indexers is already detected and handled by the calling code (buildcol or
562 # full-rebuild), so that at this stage the config file's buildtype reflects the actual buildtype.
563
564 # From buildcol.pl we use searchtype for determining buildtype, but for old versions, use buildtype
565 my $buildtype;
566 if (defined $collectcfg->{'buildtype'}) {
567 $buildtype = $collectcfg->{'buildtype'};
568 } elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'}) {
569 $buildtype = "mgpp";
570 } else {
571 $buildtype = "mg"; #mg is the default
572 }
573
574 # can't do anything without a build directory with something in it to move into index
575 # Except if we're (doing incremental) building for solr, where we want to still
576 # activate and deactivate collections including for the incremental case
577 if(!&FileUtils::directoryExists($build_dir)) {
578 &print_msg("No building folder at $build_dir to move to index.\n");
579 exit -1 unless ($buildtype eq "solr"); #&& $incremental);
580 } elsif (&FileUtils::isDirectoryEmpty($build_dir)) {
581 &print_msg("Nothing in building folder $build_dir to move into index folder.\n");
582 exit -1 unless ($buildtype eq "solr"); #&& $incremental);
583 }
584
585
586 my $solr_server;
587 my @corenames = ();
588 if($buildtype eq "solr") { # start up the jetty server
589 my $solr_ext = $ENV{'GEXT_SOLR'}; # from solr_passes.pl
590 unshift (@INC, "$solr_ext/perllib");
591 require solrserver;
592
593 # Solr cores are named without taking the collection-group name into account, since solr
594 # is used for GS3 and GS3 doesn't use collection groups but has the site concept instead
595 my ($colname, $colgroup) = &util::get_collection_parts($qualified_collection);
596
597 # See solrbuilder.pm to get the indexing levels (document, section) from the collectcfg file
598 # Used to generate core names from them and remove cores by name
599 foreach my $level ( @{$collectcfg->{'levels'}} ){
600 my ($pindex) = $level =~ /^(.)/;
601 my $indexname = $pindex."idx";
602 push(@corenames, "$site-$colname-$indexname"); #"$site-$colname-didx", "$site-$colname-sidx"
603 }
604
605 # If the Solr/Jetty server is not already running, the following starts
606 # it up, and only returns when the server is "reading and listening"
607 $solr_server = new solrserver($build_dir);
608 $solr_server->start();
609
610 # We'll be moving building to index. For solr collection, there's further
611 # special processing to make a corresponding change to the solr.xml
612 # by removing the temporary building cores and (re)creating the index cores
613 }
614
615 # Now the logic in GLI's CollectionManager.java (processComplete()
616 # and installCollection()) and Gatherer.configGS3Server().
617
618 # 1. Get library URL
619
620 # For web servers that are external to a Greenstone installation,
621 # the user can pass in their web server's library URL.
622 # For web servers included with GS (like tomcat for GS3 and server.exe
623 # and apache for GS2), we work out the library URL:
624 if(!$library_url) {
625 $library_url = &get_library_URL($gs_mode); # returns undef if no server is running
626 }
627
628 # CollectionManager's installCollection phase in GLI
629 # 2. Ping the library URL, and if it's a persistent server and running, release the collection
630
631 my $is_persistent_server = undef;
632 if($library_url) { # undef if no valid server URL
633
634 &print_msg("Pinging $library_url\n");
635 if (&ping_library($library_url, $gs_mode, $site)) { # server running
636
637 # server is running, so release the collection if
638 # the server is persistent and the collection is active
639 &print_msg("Checking if Greenstone server is persistent\n");
640 $is_persistent_server = &is_persistent($library_url, $gs_mode);
641
642 if ($is_persistent_server) { # only makes sense to issue activate and deactivate cmds to a persistent server
643
644 &print_msg("Checking if the collection $qualified_collection is already active\n");
645 my $collection_active = &ping_library_collection($library_url, $gs_mode, $qualified_collection, $site);
646
647 if ($collection_active) {
648 &print_msg("De-activating collection $qualified_collection\n");
649 &deactivate_collection($library_url, $gs_mode, $qualified_collection, $site);
650 }
651 else {
652 &print_msg("Collection is not active => No need to deactivate\n");
653 }
654 }
655 else {
656 &print_msg("Server is not persistent => No need to deactivate collection\n");
657 }
658 }
659 else {
660 &print_msg("No response to Ping => Taken to mean server is not running\n");
661 }
662
663 }
664
665 # 3. Do all the moving building to index stuff now
666
667 # If removeold: replace index dir with building dir.
668 # If keepold: move building's contents into index, where only duplicates will get deleted.
669 # removeold and keepold can't both be on at the same time
670 # incremental becomes relevant for solr, though it was irrelevant to what activate.pl does (moving building to index)
671 my $incremental_mode;
672 ($removeold, $keepold, $incremental, $incremental_mode) = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
673 $incremental,
674 $build_dir, # checkdir. Usually archives or export to be deleted. activate.pl deletes building
675 $collectcfg);
676
677 if($removeold) {
678
679 if(&FileUtils::directoryExists($index_dir)) {
680 &print_task_msg("Removing \"index\"");
681
682 if ($buildtype eq "solr") {
683 # if solr, remove any cores that are using the index_dir before deleting this dir
684 foreach my $corename (@corenames) {
685 $solr_server->admin_unload_core($corename);
686 }
687 }
688
689 &FileUtils::removeFilesRecursive($index_dir);
690
691 # Wait for a couple of seconds, just for luck
692 sleep 2;
693
694 if (&FileUtils::directoryExists($index_dir)) {
695 &print_msg("The index directory $index_dir could not be deleted.\n"); # CollectionManager.Index_Not_Deleted
696 }
697 }
698
699 # if remote GS server: gliserver.pl would call activate.pl to activate
700 # the collection at this point since activate.pl lives on the server side
701
702 if ($buildtype eq "solr") {
703 # if solr, remove any cores that are using the building_dir before moving this dir onto index
704 foreach my $corename (@corenames) {
705 $solr_server->admin_unload_core("building-$corename");
706 }
707 }
708
709 # Move the building directory to become the new index directory
710 &print_task_msg("Moving \"building\" -> \"index\"");
711 &FileUtils::moveFiles($build_dir, $index_dir);
712 if(&FileUtils::directoryExists($build_dir) || !&FileUtils::directoryExists($index_dir)) {
713 &print_msg("Could not move $build_dir to $index_dir.\n"); # CollectionManager.Build_Not_Moved
714 }
715 }
716 elsif ($keepold || $incremental) {
717 if ($buildtype eq "solr") {
718 # if solr, remove any cores that may be using the building_dir before moving this dir onto index
719 foreach my $corename (@corenames) {
720 $solr_server->admin_unload_core("building-$corename") if $solr_server->admin_ping_core("building-$corename");
721 }
722 }
723
724 # Copy just the contents of building dir into the index dir, overwriting
725 # existing files, but don't replace index with building.
726 &print_task_msg("Moving \"building\" -> \"index\"");
727 &FileUtils::moveDirectoryContents($build_dir, $index_dir);
728 }
729
730 if ($buildtype eq "solr") {
731 # Call CREATE action to get the old cores pointing to the index folder
732 foreach my $corename (@corenames) {
733 if($removeold) {
734 # Call CREATE action to get all cores pointing to the index folder, since building is now index
735 $solr_server->admin_create_core($corename, $index_dir);
736
737 } elsif ($keepold || $incremental) {
738 # Call RELOAD core. Should already be using the index_dir directory for $keepold and $incremental case
739
740 # Ping to see if corename exists, if it does, reload, else create
741 if ($solr_server->admin_ping_core($corename)) {
742 $solr_server->admin_reload_core($corename);
743 } else {
744 $solr_server->admin_create_core($corename, $index_dir);
745 }
746 }
747 }
748
749 # regenerate the solr.xml.in from solr.xml in case we are working off a dvd.
750 $solr_server->solr_xml_to_solr_xml_in();
751 }
752
753 # 4. Ping the library URL, and if it's a persistent server and running, activate the collection again
754
755 # Check for success: if building does not exist OR is empty
756 if(!&FileUtils::directoryExists($build_dir) || &FileUtils::isDirectoryEmpty($build_dir)) {
757
758 if($library_url) { # undef if no valid server URL
759
760 &print_msg("Pinging $library_url\n");
761 if (&ping_library($library_url, $gs_mode, $site)) { # server running
762
763 # don't need to work out persistency of server more than once, since the libraryURL hasn't changed
764 if (!defined $is_persistent_server) {
765 &print_msg("Checking if Greenstone server is persistent\n");
766 $is_persistent_server = &is_persistent($library_url, $gs_mode);
767 }
768
769
770 if ($is_persistent_server) { # persistent server, so can try activating collection
771
772 &print_msg("Checking if the collection $qualified_collection is not already active\n");
773
774 # Since we could have deactivated the collection at this point,
775 # it is likely that it is not yet active. When pinging the collection
776 # a "ping did not succeed" message is expected, therefore tell the ping
777 # to proceed silently
778 my $silent = 1;
779 my $collection_active = &ping_library_collection($library_url, $gs_mode, $qualified_collection, $site, $silent);
780
781 if (!$collection_active) {
782 &print_msg(" Collection is not active.\n");
783 &print_msg("Activating collection $qualified_collection\n");
784 &activate_collection($library_url, $gs_mode, $qualified_collection, $site);
785
786 # unless an error occurred, the collection should now be active:
787 $collection_active = &ping_library_collection($library_url, $gs_mode, $qualified_collection, $site); # not silent if ping did not succeed
788 if(!$collection_active) {
789 &print_msg("ERROR: collection $qualified_collection did not get activated\n");
790 }
791 }
792 else {
793 &print_msg("Collection is already active => No need to activate\n");
794 }
795 }
796 else {
797 &print_msg("Server is not persistent => No need to activate collection\n");
798 }
799 }
800 else {
801 &print_msg("No response to Ping => Taken to mean server is not running\n");
802 }
803 }
804 } else { # installcollection failed
805 #CollectionManager.Preview_Ready_Failed
806 &print_msg("Building directory is not empty or still exists. Failed to properly move $build_dir to $index_dir.\n");
807 }
808
809 &print_msg("\n");
810
811 if($buildtype eq "solr") {
812 if ($solr_server->explicitly_started()) {
813 $solr_server->stop();
814 }
815 }
816}
817
818&main(scalar(@ARGV),@ARGV);
Note: See TracBrowser for help on using the repository browser.