source: main/trunk/greenstone2/bin/script/activate.pl@ 29948

Last change on this file since 29948 was 29948, checked in by ak19, 9 years ago

Final of two commits to fix rebuilding a collection using the online editor on Windows. Resolves the file lock problem. The changes to the java code from the previous commit work with changes in activate.pl in this commit. activate.pl is passed a flag now so that it no longer de-activates and re-activates the collection itself, but just concerns itself with moving building to index. The GS2Construct java code now de-activates the collection prior to calling activate.pl and then re-activates it afterward. In the way it was done before, activate.pl used to handle de- and re-activating the collection. But when it was launched from the server java code, the java VM would exit having left a copy of the file handles to the perl process when forking the process for activate.pl. The perl code could not move building to index since the file handles had locks (6 of them) on the index/text/collection.gdb. Changes have been made to GS2PerlConstructor too, so that it more cleanly closes all the pipes of a process, that the process itself may thereby exit cleanly. Not yet able to move this properly into its own classes since the StreamGobbler classes in GLI are not quite suited but were customised for FormatConverter. Further changes to activate.pl are for better error reporting.

  • Property svn:executable set to *
File size: 32.3 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# activate.pl -- to be called after building a collection to activate it.
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2009 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30# This program is designed to support the building process of Greenstone.
31# It deactivates the collection just built, if the web server is running
32# and is a persistent web server (or if the library_URL provided as
33# parameter to this script is of a currently running web server). It then
34# moves building to index, before activating the collection on the GS2 or
35# GS3 web server again if necessary.
36
37use Config;
38
39BEGIN {
40 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
41 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
42 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
43 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
44
45 # Adding cpan in, adds in its auto subfolder which conflicts with ActivePerl on Windows
46 # The auto folder has been moved into a perl-5.8 folder, and this will now be included
47 # only if the current version of perl is 5.8 (and not ActivePerl).
48 my $perl_dir;
49
50 # Note: $] encodes the version number of perl
51 if ($]>=5.010) {
52 $perl_dir="perl-5.".substr($],3,2);
53 }
54 elsif ($]>5.008) {
55 # perl 5.8.1 or above
56 $perl_dir = "perl-5.8";
57 }
58 elsif ($]>=5.008) {
59 # perl 5.8.1 or above
60 $perl_dir = "perl-5.8";
61 }
62 elsif ($]<5.008) {
63 # assume perl 5.6
64 $perl_dir = "perl-5.6";
65 }
66 else {
67 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
68 print STDERR " Please upgrade to a newer version of Perl.\n";
69 $perl_dir = "perl-5.8";
70 }
71
72 #if ($ENV{'GSDLOS'} !~ /^windows$/i) {
73 # Use push to put this on the end, so an existing XML::Parser will be used by default
74 if (-d "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir-mt" && $Config{usethreads}){
75 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir-mt");
76 }
77 else{
78 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
79 }
80 #}
81
82}
83
84
85use strict;
86no strict 'refs'; # allow filehandles to be variables and vice versa
87no strict 'subs'; # allow barewords (eg STDERR) as function arguments
88
89use File::Basename;
90use File::Find;
91
92use HTTP::Response;
93use LWP::Simple qw($ua !head); # import useragent object as $ua from the full LWP to use along with LWP::Simple
94 # don't import LWP::Simple's head function by name since it can conflict with CGI:head())
95#use CGI qw(:standard); # then only CGI.pm defines a head()
96use Net::Ping;
97use URI;
98
99use colcfg;
100use scriptutil;
101use util;
102#use enum;
103
104# enumerations in perl, http://stackoverflow.com/questions/473666/does-perl-have-an-enumeration-type
105# Unfortunately, not part of perl's core
106#use enum qw(LEVEL_NONE LEVEL_ERROR LEVEL_INFO LEVEL_DEBUG); # debugging levels NONE == 0, ERROR=1 INFO=2 DEBUG=3
107
108# global variables
109#my $default_verbosity = LEVEL_ERROR; # by default we display basic error messages
110
111my $default_verbosity = 2; # by default we display basic error and info messages
112
113sub print_task_msg {
114 my ($task_msg, $verbosity_setting) = @_;
115
116 $verbosity_setting = $default_verbosity unless $verbosity_setting;
117 #$verbosity_setting = 1 unless defined $verbosity;
118 if($verbosity_setting >= 1) {
119 print STDERR "\n";
120 print STDERR "************************\n";
121 print STDERR "* $task_msg\n";
122 print STDERR "************************\n";
123 }
124}
125
126# Prints messages if the verbosity is right. Does not add new lines.
127sub print_msg {
128 my ($msg, $min_verbosity, $verbosity_setting) = @_;
129
130 # only display error messages if the current
131 # verbosity setting >= the minimum verbosity level
132 # needed for that message to be displayed.
133
134 $verbosity_setting = $default_verbosity unless defined $verbosity_setting;
135 $min_verbosity = 1 unless defined $min_verbosity;
136 if($verbosity_setting >= $min_verbosity) { # by default display all 1 messages
137 print STDERR "$msg";
138 }
139}
140
141# Method to send a command to a GS2 or GS3 library_URL
142# the commands used in this script can be activate, deactivate, ping,
143# and is-persistent (is-persistent only implemented for GS2).
144sub config {
145 my ($library_url, $command, $check_message_against_regex, $site, $expected_error_code, $silent) = @_;
146 # Gatherer.java's configGS3Server doesn't use the site variable
147 # so we don't have to either
148
149 # for GS2, getting the HTTP status isn't enough, we need to read the output
150 # since this is what CollectionManager.config() stipulates.
151 # Using LWP::UserAgent::get($url) for this
152
153 if(!defined $library_url) {
154 return 0;
155 }
156 else {
157 $ua->timeout(5); # set LWP useragent to 5s max timeout for testing the URL
158 # Need to set this, else it takes I don't know how long to timeout
159 # http://www.perlmonks.org/?node_id=618534
160
161 # http://search.cpan.org/~gaas/libwww-perl-6.04/lib/LWP/UserAgent.pm
162 # use LWP::UserAgent's get($url) since it returns an HTTP::Response code
163
164 my $response_obj = $ua->get( $library_url.$command);
165
166 # $response_obj->content stores the content and $response_obj->code the HTTP response code
167 my $response_code = $response_obj->code();
168
169 if(LWP::Simple::is_success($response_code)) {# $response_code eq RC_OK) { # LWP::Simple::is_success($response_code)
170 &print_msg("*** Command $library_url$command\n", 3);
171 &print_msg("*** HTTP Response Status: $response_code - Complete.", 3);
172
173 # check the page content is as expected
174 my $response_content = $response_obj->content;
175 my $resultstr = $response_content;
176 $resultstr =~ s@.*gs_content\"\>@@s;
177 $resultstr =~ s@</div>.*@@s;
178
179 if($response_content =~ m/$check_message_against_regex/) {
180 &print_msg(" Response as expected.\n", 3);
181 &print_msg("@@@@@@ Got result:\n$resultstr\n", 4);
182 return 1;
183 } else {
184 # if we expect the collection to be inactive, then we'd be in silent mode: if so,
185 # don't print out the "ping did not succeed" response, but print out any other messages
186
187 # So we only suppress the ping col "did not succeed" response if we're in silent mode
188 # But if any message other than ping "did not succeed" is returned, we always print it
189 if($response_content !~ m/did not succeed/ || !$silent) {
190 &print_msg("\n\tBUT: command $library_url$command response UNEXPECTED.\n", 3);
191 &print_msg("*** Got message:\n$response_content.\n", 4);
192 &print_msg("*** Got result:\n$resultstr\n", 3);
193 }
194 return 0; # ping on a collection may "not succeed."
195 }
196 }
197 elsif(LWP::Simple::is_error($response_code)) { # method exported by LWP::Simple, along with HTTP::Status constants
198 # check the page content is as expected
199 if(defined $expected_error_code && $response_code == $expected_error_code) {
200 &print_msg(" Response status $response_code as expected.\n", 3);
201 } else {
202 &print_msg("*** Command $library_url$command\n");
203 &print_msg("*** Unexpected error. HTTP Response Status: $response_code - Failed.\n");
204 }
205 return 0; # return false, since the response_code was an error, expected or not
206 }
207 else {
208 &print_msg("*** Command $library_url$command\n");
209 &print_msg("*** Unexpected error. HTTP Response Status: $response_code - Failed.\n");
210 return 0;
211 }
212 }
213}
214
215sub deactivate_collection {
216 my ($library_url, $gs_mode, $qualified_collection, $site) = @_;
217
218 if($gs_mode eq "gs2") {
219 my $DEACTIVATE_COMMAND = "?a=config&cmd=release-collection&c=";
220 my $check_message_against_regex = q/configured release-collection/;
221 config($library_url, $DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
222 }
223 elsif ($gs_mode eq "gs3") {
224 my $DEACTIVATE_COMMAND = "?a=s&sa=d&st=collection&sn=";
225 my $check_message_against_regex = "collection: $qualified_collection deactivated";
226 config($library_url, $DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, $site);
227 }
228}
229
230sub activate_collection {
231 my ($library_url, $gs_mode, $qualified_collection, $site) = @_;
232
233 if($gs_mode eq "gs2") {
234 my $ACTIVATE_COMMAND = "?a=config&cmd=add-collection&c=";
235 my $check_message_against_regex = q/configured add-collection/;
236 config($library_url, $ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
237 }
238 elsif ($gs_mode eq "gs3") {
239 my $ACTIVATE_COMMAND = "?a=s&sa=a&st=collection&sn=";
240 my $check_message_against_regex = "collection: $qualified_collection activated";
241 config($library_url, $ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, $site);
242 }
243}
244
245sub ping {
246 my ($library_url, $command, $gs_mode, $site, $silent) = @_;
247
248 # If the GS server is not running, we *expect* to see a "500" status code.
249 # If the GS server is running, then "Ping" ... "succeeded" is expected on success.
250 # When pinging an inactive collection, it will say it did "not succeed". This is
251 # a message of interest to return.
252 my $check_responsemsg_against_regex = q/(succeeded)/;
253 my $expected_error_code = 500;
254
255 &print_msg("*** COMMAND WAS: |$command|***\n", 4);
256
257 return config($library_url, $command, $check_responsemsg_against_regex, $site, $expected_error_code, $silent);
258}
259
260# send a pingaction to the GS library. General server-level ping.
261sub ping_library {
262 my ($library_url, $gs_mode, $site) = @_;
263
264 my $command = "";
265 if($gs_mode eq "gs2") {
266 $command = "?a=ping";
267 }
268 elsif ($gs_mode eq "gs3") {
269 $command = "?a=s&sa=ping";
270 }
271 return &ping($library_url, $command, $gs_mode, $site);
272}
273
274
275# send a pingaction to a collection in GS library to check if it's active
276sub ping_library_collection {
277 my ($library_url, $gs_mode, $qualified_collection, $site, $silent) = @_;
278
279 my $command = "";
280 if($gs_mode eq "gs2") {
281 $command = "?a=ping&c=$qualified_collection";
282 }
283 elsif ($gs_mode eq "gs3") {
284 $command = "?a=s&sa=ping&st=collection&sn=$qualified_collection";
285 }
286 return &ping($library_url, $command, $gs_mode, $site, $silent);
287}
288
289# return true if server is persistent, by calling is-persistent on library_url
290# this is only for GS2, since the GS3 server is always persistent
291sub is_persistent {
292 my ($library_url, $gs_mode) = @_;
293
294 if($gs_mode eq "gs3") { # GS3 server is always persistent
295 return 1;
296 }
297
298 my $command = "?a=is-persistent";
299 my $check_responsemsg_against_regex = q/true/; # isPersistent: true versus isPersistent: false
300 return config($library_url, $command, $check_responsemsg_against_regex);
301}
302
303sub get_library_URL {
304 my $gs_mode = shift(@_); # gs3 or gs2
305
306 # If we get here, we are dealing with a server included with GS.
307 # For GS3, we ask ant for the library URL.
308 # For GS2, we derive the URL from the llssite.cfg file.
309
310 my $url = undef;
311
312 if($gs_mode eq "gs2") {
313 my $llssite_cfg = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "llssite.cfg");
314
315 if(-f $llssite_cfg) {
316 # check llssite.cfg for line with url property
317 # for server.exe also need to use portnumber and enterlib properties
318
319 # Read in the entire contents of the file in one hit
320 if (!open (FIN, $llssite_cfg)) {
321 &print_msg("activate.pl::get_library_URL failed to open $llssite_cfg ($!)\n");
322 return undef;
323 }
324
325 my $contents;
326 sysread(FIN, $contents, -s FIN);
327 close(FIN);
328
329 my @lines = split(/[\n\r]+/, $contents); # split on carriage-returns and/or linefeeds
330 my $enterlib = "";
331 my $portnumber = ""; # will remain empty (implicit port 80) unless it's specifically been assigned
332
333 foreach my $line (@lines) {
334 if($line =~ m/^url=(.*)$/) {
335 $url = $1;
336 } elsif($line =~ m/^enterlib=(.*)$/) {
337 $enterlib = $1;
338 } elsif($line =~ m/^portnumber=(.*)$/) {
339 $portnumber = $1;
340 }
341 }
342
343 if(!$url) {
344 return undef;
345 }
346 elsif($url eq "URL_pending") { # library is not running
347 # do not process url=URL_pending in the file, since for server.exe
348 # this just means the Enter Library button hasn't been pressed yet
349 $url = undef;
350 }
351 else {
352 # In the case of server.exe, need to do extra work to get the proper URL
353 # But first, need to know whether we're indeed dealing with server.exe:
354
355 # compare the URL's domain to the full URL
356 # E.g. for http://localhost:8383/greenstone3/cgi-bin, the domain is localhost:8383
357 my $uri = URI->new( $url );
358 my $host = $uri->host;
359 #print STDERR "@@@@@ host: $host\n";
360 if($url =~ m/http:\/\/$host(\/)?$/) {
361 #if($url !~ m/http:\/\/$host:$portnumber(\/)?/ || $url =~ m/http:\/\/$host(\/)?$/) {
362 # (if the URL does not contain the portnumber, OR if the port is implicitly 80 and)
363 # If the domain with http:// prefix is completely the same as the URL, assume server.exe
364 # then the actual URL is the result of suffixing the port and enterlib properties in llssite.cfg
365 $url = $url.":".$portnumber.$enterlib;
366 } # else, apache web server
367
368 }
369 }
370 } elsif($gs_mode eq "gs3") {
371 # Either check build.properties for tomcat.server, tomcat.port and app.name (and default servlet name).
372 # app.name is stored in app.path by build.xml. Need to move app.name in build.properties from build.xml
373
374 # Or, run the new target get-default-servlet-url
375 # the output can look like:
376 #
377 # Buildfile: build.xml
378 # [echo] os.name: Windows Vista
379 #
380 # get-default-servlet-url:
381 # [echo] http://localhost:8383/greenstone3/library
382 # BUILD SUCCESSFUL
383 # Total time: 0 seconds
384
385 #my $output = qx/ant get-default-servlet-url/; # backtick operator, to get STDOUT (else 2>&1)
386 # see http://stackoverflow.com/questions/799968/whats-the-difference-between-perls-backticks-system-and-exec
387
388 # The get-default-servlet-url ant target can be run from anywhere by specifying the
389 # location of GS3's ant build.xml buildfile. Activate.pl can be run from anywhere for GS3
390 # GSDL3SRCHOME will be set for GS3 by gs3-setup.sh, a step that would have been necessary
391 # to run the activate.pl script in the first place
392 my $perl_command = "ant -buildfile \"$ENV{'GSDL3SRCHOME'}/build.xml\" get-default-servlet-url";
393
394 if (open(PIN, "$perl_command |")) {
395 while (defined (my $perl_output_line = <PIN>)) {
396 if($perl_output_line =~ m@http:\/\/(\S*)@) { # grab all the non-whitespace chars
397 $url="http://".$1;
398 }
399 }
400 close(PIN);
401 } else {
402 &print_msg("activate.pl::get_library_URL: Failed to run $perl_command to work out library URL for $gs_mode\n");
403 }
404 }
405
406 # either the url is still undef or it is now set
407 #print STDERR "\n@@@@@ final URL:|$url|\n" if $url;
408 #print STDERR "\n@@@@@ URL still undef\n" if !$url;
409 return $url;
410}
411
412### UNUSED METHODS TO MOVE TO util.pm?
413
414# This method is now unused. Using ping_library instead to send the ping action to a
415# GS2/GS3 server. This method can be used more generally to test whether a URL is alive.
416# http://search.cpan.org/dist/libwww-perl/lib/LWP/Simple.pm
417# and http://www.perlmonks.org/?node_id=618534
418sub is_URL_active {
419 my $url = shift(@_); # gs3 or gs2 URL
420
421 my $status = 0;
422 if(defined $url) {
423 $ua->timeout(10); # set LWP useragent to 5s max timeout for testing the URL
424 # Need to set this, else it takes I don't know how long to timeout
425 # http://www.perlmonks.org/?node_id=618534
426
427 $status = LWP::Simple::head($url); # returns empty list of headers if it fails
428 # LWP::Simple::get($url) is more intensive, so don't need to do that
429 #print STDERR "**** $url is alive.\n" if $status;
430 }
431 return $status;
432}
433
434# Pinging seems to always return true, so this method doesn't work
435sub pingHost {
436 my $url = shift(@_); # gs3 or gs2 URL
437
438 my $status = 0;
439 if(defined $url) {
440 # Get just the domain. "http://localhost/gsdl?uq=332033495" becomes "localhost"
441 # "http://localhost/greenstone/cgi-bin/library.cgi" becomes "localhost" too
442
443 #my $host = $url;
444 #$host =~ s@^http:\/\/(www.)?@@;
445 #$host =~ s@\/.*@@;
446 #print STDERR "**** HOST: $host\n";
447
448 # More robust way
449 # http://stackoverflow.com/questions/827024/how-do-i-extract-the-domain-out-of-an-url
450 my $uri = URI->new( $url );
451 my $host = $uri->host;
452
453 # Ping the host. http://perldoc.perl.org/Net/Ping.html
454 my $p = Net::Ping->new();
455 $status = $p->ping($host); # || 0. Appears to set to undef rather than 0
456 print STDERR "**** $host is alive.\n" if $status; #print "$host is alive.\n" if $p->ping($host);
457 $p->close();
458 }
459 # return whether pinging was a success or failure
460 return $status;
461}
462
463
464# Most of the arguments are familiar from the building scripts like buildcol.pl
465# The special optional argument -library_url is for when we're dealing with a web
466# library server such as an apache that's separate from any included with GS2.
467# In such a case, this script's caller should pass in -library_url <URL>.
468#
469# $site argument must be specified in the cmdline for collectionConfig.xml to get
470# generated which makes $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
471sub main
472{
473 my ($argc,@argv) = @_;
474
475 if (($argc==0) || (($argc==1) && ($argv[0] =~ m/^--?h(elp)?$/))) {
476 my ($progname) = ($0 =~ m/^.*[\/|\\](.*?)$/);
477
478
479 print STDERR "\n";
480 print STDERR "Usage: $progname [-collectdir c -builddir b -indexdir i -site s -skipactivation -removeold -keepold -verbosity v\n";
481 print STDERR "\t-library_url URL] <[colgroup/]collection>\n";
482 print STDERR "\n";
483
484 exit(-1);
485 }
486
487 # get the collection details
488 my $qualified_collection = pop @argv; # qualified collection
489
490 my $collect_dir = undef; #"collect"; # can't be "collect" when only -site is provided for GS3
491 my $build_dir = undef;
492 my $index_dir = undef;
493 my $site = undef;
494
495 # if run from server (java code), it will handle deactivation and activation to prevent open file handles when java launches this script and exits:
496 my $skipactivation = 0;
497 my $removeold = 0;
498 my $keepold = 0;
499 my $incremental = 0; # used by solr
500
501 my $library_url = $ENV{'GSDL_LIBRARY_URL'} || undef; # to be specified on the cmdline if not using a GS-included web server
502 # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
503
504 while (my $arg = shift @argv) {
505 if ($arg eq "-collectdir") {
506 $collect_dir = shift @argv;
507 }
508 elsif ($arg eq "-builddir") {
509 $build_dir = shift @argv;
510 }
511 elsif ($arg eq "-indexdir") {
512 $index_dir = shift @argv;
513 }
514 elsif ($arg eq "-site") {
515 $site = shift @argv;
516 }
517 elsif ($arg eq "-skipactivation") {
518 $skipactivation = 1;
519 }
520 elsif ($arg eq "-removeold") {
521 $removeold = 1;
522 }
523 elsif ($arg eq "-keepold") {
524 $keepold = 1;
525 }
526 elsif ($arg eq "-incremental") {
527 $incremental = 1;
528 }
529 elsif ($arg eq "-library_url") {
530 $library_url = shift @argv;
531 }
532 elsif ($arg eq "-verbosity") {
533 $default_verbosity = shift @argv; # global variable
534
535 # ensure we're working with ints not strings (int context not str context), in case verbosity=0
536 # http://stackoverflow.com/questions/288900/how-can-i-convert-a-string-to-a-number-in-perl
537 $default_verbosity = int($default_verbosity || 0); ### is this the best way?
538 }
539 }
540
541 # work out the building and index dirs
542 my $collection_dir = &util::resolve_collection_dir($collect_dir, $qualified_collection, $site);
543 $build_dir = &FileUtils::filenameConcatenate($collection_dir, "building") unless (defined $build_dir);
544 $index_dir = &FileUtils::filenameConcatenate($collection_dir, "index") unless (defined $index_dir);
545
546 &print_task_msg("Running Collection Activation Stage");
547
548 # get and check the collection name
549 if ((&colcfg::use_collection($site, $qualified_collection, $collect_dir)) eq "") {
550 &print_msg("Unable to use collection \"$qualified_collection\" within \"$collect_dir\"\n");
551 exit -1;
552 }
553
554 # Read in the collection configuration file.
555 # Beware: Only if $site is specified in the cmdline does collectionConfig.xml get
556 # generated and does $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
557 my $gs_mode = "gs2";
558 if ((defined $site) && ($site ne "")) { # GS3
559 $gs_mode = "gs3";
560 }
561 my $collect_cfg_filename = &colcfg::get_collect_cfg_name(STDERR, $gs_mode);
562 my $collectcfg = &colcfg::read_collection_cfg ($collect_cfg_filename,$gs_mode);
563
564 # look for build.cfg/buildConfig.xml
565 my $build_cfg_filename ="";
566
567 if ($gs_mode eq "gs2") {
568 $build_cfg_filename = &FileUtils::filenameConcatenate($build_dir,"build.cfg");
569 } else {
570 $build_cfg_filename = &FileUtils::filenameConcatenate($build_dir, "buildConfig.xml");
571 # gs_mode is GS3. Set the site now if this was not specified as cmdline argument
572 #$site = "localsite" unless defined $site;
573 }
574
575 # We need to know the buildtype for Solr.
576 # Any change of indexers is already detected and handled by the calling code (buildcol or
577 # full-rebuild), so that at this stage the config file's buildtype reflects the actual buildtype.
578
579 # From buildcol.pl we use searchtype for determining buildtype, but for old versions, use buildtype
580 my $buildtype;
581 if (defined $collectcfg->{'buildtype'}) {
582 $buildtype = $collectcfg->{'buildtype'};
583 } elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'}) {
584 $buildtype = "mgpp";
585 } else {
586 $buildtype = "mg"; #mg is the default
587 }
588
589 # can't do anything without a build directory with something in it to move into index
590 # Except if we're (doing incremental) building for solr, where we want to still
591 # activate and deactivate collections including for the incremental case
592 if(!&FileUtils::directoryExists($build_dir)) {
593 &print_msg("No building folder at $build_dir to move to index.\n");
594 exit -1 unless ($buildtype eq "solr"); #&& $incremental);
595 } elsif (&FileUtils::isDirectoryEmpty($build_dir)) {
596 &print_msg("Nothing in building folder $build_dir to move into index folder.\n");
597 exit -1 unless ($buildtype eq "solr"); #&& $incremental);
598 }
599
600 # Now the logic in GLI's CollectionManager.java (processComplete()
601 # and installCollection()) and Gatherer.configGS3Server().
602
603 # 1. Get library URL
604
605 # For web servers that are external to a Greenstone installation,
606 # the user can pass in their web server's library URL.
607 # For web servers included with GS (like tomcat for GS3 and server.exe
608 # and apache for GS2), we work out the library URL:
609 if(!$library_url) {
610 $library_url = &get_library_URL($gs_mode); # returns undef if no server is running
611 }
612
613 # CollectionManager's installCollection phase in GLI
614 # 2. Ping the library URL, and if it's a persistent server and running, release the collection
615
616 my $is_persistent_server = undef;
617 if(!$skipactivation && $library_url) { # undef if no valid server URL
618
619 &print_msg("Pinging $library_url\n");
620 if (&ping_library($library_url, $gs_mode, $site)) { # server running
621
622 # server is running, so release the collection if
623 # the server is persistent and the collection is active
624 &print_msg("Checking if Greenstone server is persistent\n");
625 $is_persistent_server = &is_persistent($library_url, $gs_mode);
626
627 if ($is_persistent_server) { # only makes sense to issue activate and deactivate cmds to a persistent server
628
629 &print_msg("Checking if the collection $qualified_collection is already active\n");
630 my $collection_active = &ping_library_collection($library_url, $gs_mode, $qualified_collection, $site);
631
632 if ($collection_active) {
633 &print_msg("De-activating collection $qualified_collection\n");
634 &deactivate_collection($library_url, $gs_mode, $qualified_collection, $site);
635 }
636 else {
637 &print_msg("Collection is not active => No need to deactivate\n");
638 }
639 }
640 else {
641 &print_msg("Server is not persistent => No need to deactivate collection\n");
642 }
643 }
644 else {
645 &print_msg("No response to Ping => Taken to mean server is not running\n");
646 }
647
648 }
649
650 # 2b. If we're working with a solr collection, then start up the solrserver now.
651 my $solr_server;
652 my @corenames = ();
653 if($buildtype eq "solr") { # start up the jetty server
654 my $solr_ext = $ENV{'GEXT_SOLR'}; # from solr_passes.pl
655 unshift (@INC, "$solr_ext/perllib");
656 require solrserver;
657
658 # Solr cores are named without taking the collection-group name into account, since solr
659 # is used for GS3 and GS3 doesn't use collection groups but has the site concept instead
660 my ($colname, $colgroup) = &util::get_collection_parts($qualified_collection);
661
662 # See solrbuilder.pm to get the indexing levels (document, section) from the collectcfg file
663 # Used to generate core names from them and remove cores by name
664 foreach my $level ( @{$collectcfg->{'levels'}} ){
665 my ($pindex) = $level =~ /^(.)/;
666 my $indexname = $pindex."idx";
667 push(@corenames, "$site-$colname-$indexname"); #"$site-$colname-didx", "$site-$colname-sidx"
668 }
669
670 # If the Solr/Jetty server is not already running, the following starts
671 # it up, and only returns when the server is "reading and listening"
672 $solr_server = new solrserver($build_dir);
673 $solr_server->start();
674
675 # We'll be moving building to index. For solr collection, there's further
676 # special processing to make a corresponding change to the solr.xml
677 # by removing the temporary building cores and (re)creating the index cores
678 }
679
680
681 # 3. Do all the moving building to index stuff now
682
683 # If removeold: replace index dir with building dir.
684 # If keepold: move building's contents into index, where only duplicates will get deleted.
685 # removeold and keepold can't both be on at the same time
686 # incremental becomes relevant for solr, though it was irrelevant to what activate.pl does (moving building to index)
687 my $incremental_mode;
688 ($removeold, $keepold, $incremental, $incremental_mode) = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
689 $incremental,
690 $build_dir, # checkdir. Usually archives or export to be deleted. activate.pl deletes building
691 $collectcfg);
692
693 if($removeold) {
694
695 if(&FileUtils::directoryExists($index_dir)) {
696 &print_task_msg("Removing \"index\"");
697
698 if ($buildtype eq "solr") {
699 # if solr, remove any cores that are using the index_dir before deleting this dir
700 foreach my $corename (@corenames) {
701 $solr_server->admin_unload_core($corename);
702 }
703 }
704
705 &FileUtils::removeFilesRecursive($index_dir);
706
707 # Wait for a couple of seconds, just for luck
708 sleep 2;
709
710 if (&FileUtils::directoryExists($index_dir)) {
711 &print_msg("The index directory $index_dir could not be deleted.\n"); # CollectionManager.Index_Not_Deleted
712 }
713 }
714
715 # if remote GS server: gliserver.pl would call activate.pl to activate
716 # the collection at this point since activate.pl lives on the server side
717
718 if ($buildtype eq "solr") {
719 # if solr, remove any cores that are using the building_dir before moving this dir onto index
720 foreach my $corename (@corenames) {
721 $solr_server->admin_unload_core("building-$corename");
722 }
723 }
724
725 # Move the building directory to become the new index directory
726 &print_task_msg("Moving \"building\" -> \"index\"");
727 &FileUtils::moveFiles($build_dir, $index_dir);
728 if(&FileUtils::directoryExists($build_dir) || !&FileUtils::directoryExists($index_dir)) {
729 &print_msg("Could not move $build_dir to $index_dir.\n"); # CollectionManager.Build_Not_Moved
730 }
731 }
732 elsif ($keepold || $incremental) {
733 if ($buildtype eq "solr") {
734 # if solr, remove any cores that may be using the building_dir before moving this dir onto index
735 foreach my $corename (@corenames) {
736 $solr_server->admin_unload_core("building-$corename") if $solr_server->admin_ping_core("building-$corename");
737 }
738 }
739
740 # Copy just the contents of building dir into the index dir, overwriting
741 # existing files, but don't replace index with building.
742 &print_task_msg("Moving \"building\" -> \"index\"");
743 &FileUtils::moveDirectoryContents($build_dir, $index_dir);
744 }
745
746 if ($buildtype eq "solr") {
747 # Call CREATE action to get the old cores pointing to the index folder
748 foreach my $corename (@corenames) {
749 if($removeold) {
750 # Call CREATE action to get all cores pointing to the index folder, since building is now index
751 $solr_server->admin_create_core($corename, $index_dir);
752
753 } elsif ($keepold || $incremental) {
754 # Call RELOAD core. Should already be using the index_dir directory for $keepold and $incremental case
755
756 # Ping to see if corename exists, if it does, reload, else create
757 if ($solr_server->admin_ping_core($corename)) {
758 $solr_server->admin_reload_core($corename);
759 } else {
760 $solr_server->admin_create_core($corename, $index_dir);
761 }
762 }
763 }
764
765 # regenerate the solr.xml.in from solr.xml in case we are working off a dvd.
766 $solr_server->solr_xml_to_solr_xml_in();
767 }
768
769 # 4. Ping the library URL, and if it's a persistent server and running, activate the collection again
770
771 # Check for success: if building does not exist OR is empty
772 if(!&FileUtils::directoryExists($build_dir) || &FileUtils::isDirectoryEmpty($build_dir)) {
773
774 if(!$skipactivation && $library_url) { # undef if no valid server URL
775
776 &print_msg("Pinging $library_url\n");
777 if (&ping_library($library_url, $gs_mode, $site)) { # server running
778
779 # don't need to work out persistency of server more than once, since the libraryURL hasn't changed
780 if (!defined $is_persistent_server) {
781 &print_msg("Checking if Greenstone server is persistent\n");
782 $is_persistent_server = &is_persistent($library_url, $gs_mode);
783 }
784
785
786 if ($is_persistent_server) { # persistent server, so can try activating collection
787
788 &print_msg("Checking if the collection $qualified_collection is not already active\n");
789
790 # Since we could have deactivated the collection at this point,
791 # it is likely that it is not yet active. When pinging the collection
792 # a "ping did not succeed" message is expected, therefore tell the ping
793 # to proceed silently
794 my $silent = 1;
795 my $collection_active = &ping_library_collection($library_url, $gs_mode, $qualified_collection, $site, $silent);
796
797 if (!$collection_active) {
798 &print_msg(" Collection is not active.\n");
799 &print_msg("Activating collection $qualified_collection\n");
800 &activate_collection($library_url, $gs_mode, $qualified_collection, $site);
801
802 # unless an error occurred, the collection should now be active:
803 $collection_active = &ping_library_collection($library_url, $gs_mode, $qualified_collection, $site); # not silent if ping did not succeed
804 if(!$collection_active) {
805 &print_msg("ERROR: collection $qualified_collection did not get activated\n");
806 }
807 }
808 else {
809 &print_msg("Collection is already active => No need to activate\n");
810 }
811 }
812 else {
813 &print_msg("Server is not persistent => No need to activate collection\n");
814 }
815 }
816 else {
817 &print_msg("No response to Ping => Taken to mean server is not running\n");
818 }
819 }
820 } else { # installcollection failed
821 #CollectionManager.Preview_Ready_Failed
822 &print_msg("Building directory is not empty or still exists. Failed to properly move $build_dir to $index_dir.\n");
823 }
824
825 &print_msg("\n");
826
827 if($buildtype eq "solr") {
828 if ($solr_server->explicitly_started()) {
829 $solr_server->stop();
830 }
831 }
832}
833
834&main(scalar(@ARGV),@ARGV);
Note: See TracBrowser for help on using the repository browser.