source: main/trunk/greenstone2/bin/script/activate.pl@ 27240

Last change on this file since 27240 was 27240, checked in by kjdon, 11 years ago

Because activate uses perl modules like LWP and NET and URI which may not be available in all standard perl distributions, these were recently added into CPAN by Dr Bainbridge. Along with that, cpan was added into the INC array in activate.pl. Doing so caused a conflict on the svn version of the code on Windows, where ActivePerl was installed. That perl installation had a conflict with cpan\auto folder in particular its HTML and Win32 subfolders because these are perl5.8 specific. A new cpan\perl-5.8 folder is being introduced with its own auto subfolder which will contain all these things to prevent conflicts with other versions of perl. However, cpan\auto\Image(\Size) will still remain in the top cpan\auto level since that's not part of the ActivePerl installation. As it contains only perl code and no binaries, it may not as tied down to any specific version of perl.

  • Property svn:executable set to *
File size: 29.7 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# activate.pl -- to be called after building a collection to activate it.
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2009 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30# This program is designed to support the building process of Greenstone.
31# It deactivates the collection just built, if the web server is running
32# and is a persistent web server (or if the library_URL provided as
33# parameter to this script is of a currently running web server). It then
34# moves building to index, before activating the collection on the GS2 or
35# GS3 web server again if necessary.
36
37use Config;
38
39BEGIN {
40 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
41 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
42 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
43 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
44
45 # Adding cpan in, adds in its auto subfolder which conflicts with ActivePerl on Windows
46 # The auto folder has been moved into a perl-5.8 folder, and this will now be included
47 # only if the current version of perl is 5.8 (and not ActivePerl).
48 my $perl_dir;
49
50 # Note: $] encodes the version number of perl
51 if ($]>=5.010) {
52 $perl_dir="perl-5.".substr($],3,2);
53 }
54 elsif ($]>5.008) {
55 # perl 5.8.1 or above
56 $perl_dir = "perl-5.8";
57 }
58 elsif ($]>=5.008) {
59 # perl 5.8.1 or above
60 $perl_dir = "perl-5.8";
61 }
62 elsif ($]<5.008) {
63 # assume perl 5.6
64 $perl_dir = "perl-5.6";
65 }
66 else {
67 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
68 print STDERR " Please upgrade to a newer version of Perl.\n";
69 $perl_dir = "perl-5.8";
70 }
71
72 #if ($ENV{'GSDLOS'} !~ /^windows$/i) {
73 # Use push to put this on the end, so an existing XML::Parser will be used by default
74 if (-d "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir-mt" && $Config{usethreads}){
75 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir-mt");
76 }
77 else{
78 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
79 }
80 #}
81
82}
83
84
85use strict;
86no strict 'refs'; # allow filehandles to be variables and vice versa
87no strict 'subs'; # allow barewords (eg STDERR) as function arguments
88
89use File::Basename;
90use File::Find;
91
92use HTTP::Response;
93use LWP::Simple qw($ua !head); # import useragent object as $ua from the full LWP to use along with LWP::Simple
94 # don't import LWP::Simple's head function by name since it can conflict with CGI:head())
95#use CGI qw(:standard); # then only CGI.pm defines a head()
96use Net::Ping;
97use URI;
98
99use colcfg;
100use scriptutil;
101use util;
102#use enum;
103
104# enumerations in perl, http://stackoverflow.com/questions/473666/does-perl-have-an-enumeration-type
105# Unfortunately, not part of perl's core
106#use enum qw(LEVEL_NONE LEVEL_ERROR LEVEL_INFO LEVEL_DEBUG); # debugging levels NONE == 0, ERROR=1 INFO=2 DEBUG=3
107
108# global variables
109#my $default_verbosity = LEVEL_ERROR; # by default we display basic error messages
110
111my $default_verbosity = 2; # by default we display basic error and info messages
112
113sub print_task_msg {
114 my ($task_msg, $verbosity_setting) = @_;
115
116 $verbosity_setting = $default_verbosity unless $verbosity_setting;
117 #$verbosity_setting = 1 unless defined $verbosity;
118 if($verbosity_setting >= 1) {
119 print STDERR "\n";
120 print STDERR "************************\n";
121 print STDERR "* $task_msg\n";
122 print STDERR "************************\n";
123 }
124}
125
126# Prints messages if the verbosity is right. Does not add new lines.
127sub print_msg {
128 my ($msg, $min_verbosity, $verbosity_setting) = @_;
129
130 # only display error messages if the current
131 # verbosity setting >= the minimum verbosity level
132 # needed for that message to be displayed.
133
134 $verbosity_setting = $default_verbosity unless defined $verbosity_setting;
135 $min_verbosity = 1 unless defined $min_verbosity;
136 if($verbosity_setting >= $min_verbosity) { # by default display all 1 messages
137 print STDERR "$msg";
138 }
139}
140
141# Method to send a command to a GS2 or GS3 library_URL
142# the commands used in this script can be activate, deactivate, ping,
143# and is-persistent (is-persistent only implemented for GS2).
144sub config {
145 my ($library_url, $command, $check_message_against_regex, $site, $expected_error_code) = @_;
146 # Gatherer.java's configGS3Server doesn't use the site variable
147 # so we don't have to either
148
149 # for GS2, getting the HTTP status isn't enough, we need to read the output
150 # since this is what CollectionManager.config() stipulates.
151 # Using LWP::UserAgent::get($url) for this
152
153 if(!defined $library_url) {
154 return 0;
155 }
156 else {
157 $ua->timeout(5); # set LWP useragent to 5s max timeout for testing the URL
158 # Need to set this, else it takes I don't know how long to timeout
159 # http://www.perlmonks.org/?node_id=618534
160
161 # http://search.cpan.org/~gaas/libwww-perl-6.04/lib/LWP/UserAgent.pm
162 # use LWP::UserAgent's get($url) since it returns an HTTP::Response code
163
164 my $response_obj = $ua->get( $library_url.$command);
165
166 # $response_obj->content stores the content and $response_obj->code the HTTP response code
167 my $response_code = $response_obj->code();
168
169 if(LWP::Simple::is_success($response_code)) {# $response_code eq RC_OK) { # LWP::Simple::is_success($response_code)
170 &print_msg("*** Command $library_url$command\n", 3);
171 &print_msg("*** HTTP Response Status: $response_code - Complete.", 3);
172
173 # check the page content is as expected
174 my $response_content = $response_obj->content;
175 if($response_content =~ m/$check_message_against_regex/) {
176 &print_msg(" Response as expected.\n", 3);
177 return 1;
178 } else {
179 &print_msg("\n\tBUT: command $library_url$command response UNEXPECTED.\n", 3);
180 &print_msg("*** Got message:\n$response_content.\n", 4);
181 return 0; # ping on a collection may "not succeed."
182 }
183 }
184 elsif(LWP::Simple::is_error($response_code)) { # method exported by LWP::Simple, along with HTTP::Status constants
185 # check the page content is as expected
186 if(defined $expected_error_code && $response_code == $expected_error_code) {
187 &print_msg(" Response status $response_code as expected.\n", 3);
188 } else {
189 &print_msg("*** Command $library_url$command\n");
190 &print_msg("*** Unexpected error. HTTP Response Status: $response_code - Failed.\n");
191 }
192 return 0; # return false, since the response_code was an error, expected or not
193 }
194 }
195}
196
197sub deactivate_collection {
198 my ($library_url, $gs_mode, $qualified_collection, $site) = @_;
199
200 if($gs_mode eq "gs2") {
201 my $DEACTIVATE_COMMAND = "?a=config&cmd=release-collection&c=";
202 my $check_message_against_regex = q/configured release-collection/;
203 config($library_url, $DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
204 }
205 elsif ($gs_mode eq "gs3") {
206 my $DEACTIVATE_COMMAND = "?a=s&sa=d&st=collection&sn=";
207 my $check_message_against_regex = "collection: $qualified_collection deactivated";
208 config($library_url, $DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, $site);
209 }
210}
211
212sub activate_collection {
213 my ($library_url, $gs_mode, $qualified_collection, $site) = @_;
214
215 if($gs_mode eq "gs2") {
216 my $ACTIVATE_COMMAND = "?a=config&cmd=add-collection&c=";
217 my $check_message_against_regex = q/configured add-collection/;
218 config($library_url, $ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
219 }
220 elsif ($gs_mode eq "gs3") {
221 my $ACTIVATE_COMMAND = "?a=s&sa=a&st=collection&sn=";
222 my $check_message_against_regex = "collection: $qualified_collection activated";
223 config($library_url, $ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, $site);
224 }
225}
226
227sub ping {
228 my ($library_url, $command, $gs_mode, $site) = @_;
229
230 # If the GS server is not running, we *expect* to see a "500" status code.
231 # If the GS server is running, then "Ping" ... "succeeded" is expected on success.
232 # When pinging an inactive collection, it will say it did "not succeed". This is
233 # a message of interest to return.
234 my $check_responsemsg_against_regex = q/(succeeded)/;
235 my $expected_error_code = 500;
236 return config($library_url, $command, $check_responsemsg_against_regex, $site, $expected_error_code);
237}
238
239# send a pingaction to the GS library. General server-level ping.
240sub ping_library {
241 my ($library_url, $gs_mode, $site) = @_;
242
243 my $command = "";
244 if($gs_mode eq "gs2") {
245 $command = "?a=ping";
246 }
247 elsif ($gs_mode eq "gs3") {
248 $command = "?a=s&sa=ping";
249 }
250 return &ping($library_url, $command, $gs_mode, $site);
251}
252
253
254# send a pingaction to a collection in GS library to check if it's active
255sub ping_library_collection {
256 my ($library_url, $gs_mode, $qualified_collection, $site) = @_;
257
258 my $command = "";
259 if($gs_mode eq "gs2") {
260 $command = "?a=ping&c=$qualified_collection";
261 }
262 elsif ($gs_mode eq "gs3") {
263 $command = "?a=s&sa=ping&st=collection&sn=$qualified_collection";
264 }
265 return &ping($library_url, $command, $gs_mode, $site);
266}
267
268# return true if server is persistent, by calling is-persistent on library_url
269# this is only for GS2, since the GS3 server is always persistent
270sub is_persistent {
271 my ($library_url, $gs_mode) = @_;
272
273 if($gs_mode eq "gs3") { # GS3 server is always persistent
274 return 1;
275 }
276
277 my $command = "?a=is-persistent";
278 my $check_responsemsg_against_regex = q/true/; # isPersistent: true versus isPersistent: false
279 return config($library_url, $command, $check_responsemsg_against_regex);
280}
281
282sub get_library_URL {
283 my $gs_mode = shift(@_); # gs3 or gs2
284
285 # If we get here, we are dealing with a server included with GS.
286 # For GS3, we ask ant for the library URL.
287 # For GS2, we derive the URL from the llssite.cfg file.
288
289 my $url = undef;
290
291 if($gs_mode eq "gs2") {
292 my $llssite_cfg = &util::filename_cat($ENV{'GSDLHOME'}, "llssite.cfg");
293
294 if(-f $llssite_cfg) {
295 # check llssite.cfg for line with url property
296 # for server.exe also need to use portnumber and enterlib properties
297
298 # Read in the entire contents of the file in one hit
299 if (!open (FIN, $llssite_cfg)) {
300 &print_msg("activate.pl::get_library_URL failed to open $llssite_cfg ($!)\n");
301 return undef;
302 }
303
304 my $contents;
305 sysread(FIN, $contents, -s FIN);
306 close(FIN);
307
308 my @lines = split(/[\n\r]+/, $contents); # split on carriage-returns and/or linefeeds
309 my $enterlib = "";
310 my $portnumber = ""; # will remain empty (implicit port 80) unless it's specifically been assigned
311
312 foreach my $line (@lines) {
313 if($line =~ m/^url=(.*)$/) {
314 $url = $1;
315 } elsif($line =~ m/^enterlib=(.*)$/) {
316 $enterlib = $1;
317 } elsif($line =~ m/^portnumber=(.*)$/) {
318 $portnumber = $1;
319 }
320 }
321
322 if(!$url) {
323 return undef;
324 }
325 elsif($url eq "URL_pending") { # library is not running
326 # do not process url=URL_pending in the file, since for server.exe
327 # this just means the Enter Library button hasn't been pressed yet
328 $url = undef;
329 }
330 else {
331 # In the case of server.exe, need to do extra work to get the proper URL
332 # But first, need to know whether we're indeed dealing with server.exe:
333
334 # compare the URL's domain to the full URL
335 # E.g. for http://localhost:8383/greenstone3/cgi-bin, the domain is localhost:8383
336 my $uri = URI->new( $url );
337 my $host = $uri->host;
338 #print STDERR "@@@@@ host: $host\n";
339 if($url =~ m/http:\/\/$host(\/)?$/) {
340 #if($url !~ m/http:\/\/$host:$portnumber(\/)?/ || $url =~ m/http:\/\/$host(\/)?$/) {
341 # (if the URL does not contain the portnumber, OR if the port is implicitly 80 and)
342 # If the domain with http:// prefix is completely the same as the URL, assume server.exe
343 # then the actual URL is the result of suffixing the port and enterlib properties in llssite.cfg
344 $url = $url.":".$portnumber.$enterlib;
345 } # else, apache web server
346
347 }
348 }
349 } elsif($gs_mode eq "gs3") {
350 # Either check build.properties for tomcat.server, tomcat.port and app.name (and default servlet name).
351 # app.name is stored in app.path by build.xml. Need to move app.name in build.properties from build.xml
352
353 # Or, run the new target get-default-servlet-url
354 # the output can look like:
355 #
356 # Buildfile: build.xml
357 # [echo] os.name: Windows Vista
358 #
359 # get-default-servlet-url:
360 # [echo] http://localhost:8383/greenstone3/library
361 # BUILD SUCCESSFUL
362 # Total time: 0 seconds
363
364 #my $output = qx/ant get-default-servlet-url/; # backtick operator, to get STDOUT (else 2>&1)
365 # see http://stackoverflow.com/questions/799968/whats-the-difference-between-perls-backticks-system-and-exec
366
367 # The get-default-servlet-url ant target can be run from anywhere by specifying the
368 # location of GS3's ant build.xml buildfile. Activate.pl can be run from anywhere for GS3
369 # GSDL3SRCHOME will be set for GS3 by gs3-setup.sh, a step that would have been necessary
370 # to run the activate.pl script in the first place
371 my $perl_command = "ant -buildfile \"$ENV{'GSDL3SRCHOME'}/build.xml\" get-default-servlet-url";
372
373 if (open(PIN, "$perl_command |")) {
374 while (defined (my $perl_output_line = <PIN>)) {
375 if($perl_output_line =~ m@http:\/\/(\S*)@) { # grab all the non-whitespace chars
376 $url="http://".$1;
377 }
378 }
379 close(PIN);
380 } else {
381 &print_msg("activate.pl::get_library_URL: Failed to run $perl_command to work out library URL for $gs_mode\n");
382 }
383 }
384
385 # either the url is still undef or it is now set
386 #print STDERR "\n@@@@@ final URL:|$url|\n" if $url;
387 #print STDERR "\n@@@@@ URL still undef\n" if !$url;
388 return $url;
389}
390
391### UNUSED METHODS TO MOVE TO util.pm?
392
393# This method is now unused. Using ping_library instead to send the ping action to a
394# GS2/GS3 server. This method can be used more generally to test whether a URL is alive.
395# http://search.cpan.org/dist/libwww-perl/lib/LWP/Simple.pm
396# and http://www.perlmonks.org/?node_id=618534
397sub is_URL_active {
398 my $url = shift(@_); # gs3 or gs2 URL
399
400 my $status = 0;
401 if(defined $url) {
402 $ua->timeout(5); # set LWP useragent to 5s max timeout for testing the URL
403 # Need to set this, else it takes I don't know how long to timeout
404 # http://www.perlmonks.org/?node_id=618534
405
406 $status = LWP::Simple::head($url); # returns empty list of headers if it fails
407 # LWP::Simple::get($url) is more intensive, so don't need to do that
408 #print STDERR "**** $url is alive.\n" if $status;
409 }
410 return $status;
411}
412
413# Pinging seems to always return true, so this method doesn't work
414sub pingHost {
415 my $url = shift(@_); # gs3 or gs2 URL
416
417 my $status = 0;
418 if(defined $url) {
419 # Get just the domain. "http://localhost/gsdl?uq=332033495" becomes "localhost"
420 # "http://localhost/greenstone/cgi-bin/library.cgi" becomes "localhost" too
421
422 #my $host = $url;
423 #$host =~ s@^http:\/\/(www.)?@@;
424 #$host =~ s@\/.*@@;
425 #print STDERR "**** HOST: $host\n";
426
427 # More robust way
428 # http://stackoverflow.com/questions/827024/how-do-i-extract-the-domain-out-of-an-url
429 my $uri = URI->new( $url );
430 my $host = $uri->host;
431
432 # Ping the host. http://perldoc.perl.org/Net/Ping.html
433 my $p = Net::Ping->new();
434 $status = $p->ping($host); # || 0. Appears to set to undef rather than 0
435 print STDERR "**** $host is alive.\n" if $status; #print "$host is alive.\n" if $p->ping($host);
436 $p->close();
437 }
438 # return whether pinging was a success or failure
439 return $status;
440}
441
442
443# Most of the arguments are familiar from the building scripts like buildcol.pl
444# The special optional argument -library_url is for when we're dealing with a web
445# library server such as an apache that's separate from any included with GS2.
446# In such a case, this script's caller should pass in -library_url <URL>.
447#
448# $site argument must be specified in the cmdline for collectionConfig.xml to get
449# generated which makes $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
450sub main
451{
452 my ($argc,@argv) = @_;
453
454 if (($argc==0) || (($argc==1) && ($argv[0] =~ m/^--?h(elp)?$/))) {
455 my ($progname) = ($0 =~ m/^.*[\/|\\](.*?)$/);
456
457
458 print STDERR "\n";
459 print STDERR "Usage: $progname [-collectdir c -builddir b -indexdir i -site s -removeold -keepold -verbosity v\n";
460 print STDERR "\t-library_url URL] <[colgroup/]collection>\n";
461 print STDERR "\n";
462
463 exit(-1);
464 }
465
466 # get the collection details
467 my $qualified_collection = pop @argv; # qualified collection
468
469 my $collect_dir = undef; #"collect"; # can't be "collect" when only -site is provided for GS3
470 my $build_dir = undef;
471 my $index_dir = undef;
472 my $site = undef;
473
474 my $removeold = 0;
475 my $keepold = 0;
476 my $incremental = 0; # used by solr
477
478 my $library_url = undef; # to be specified on the cmdline if not using a GS-included web server
479
480 while (my $arg = shift @argv) {
481 if ($arg eq "-collectdir") {
482 $collect_dir = shift @argv;
483 }
484 elsif ($arg eq "-builddir") {
485 $build_dir = shift @argv;
486 }
487 elsif ($arg eq "-indexdir") {
488 $index_dir = shift @argv;
489 }
490 elsif ($arg eq "-site") {
491 $site = shift @argv;
492 }
493 elsif ($arg eq "-removeold") {
494 $removeold = 1;
495 }
496 elsif ($arg eq "-keepold") {
497 $keepold = 1;
498 }
499 elsif ($arg eq "-incremental") {
500 $incremental = 1;
501 }
502 elsif ($arg eq "-library_url") {
503 $library_url = shift @argv;
504 }
505 elsif ($arg eq "-verbosity") {
506 $default_verbosity = shift @argv; # global variable
507
508 # ensure we're working with ints not strings (int context not str context), in case verbosity=0
509 # http://stackoverflow.com/questions/288900/how-can-i-convert-a-string-to-a-number-in-perl
510 $default_verbosity = int($default_verbosity || 0); ### is this the best way?
511 }
512 }
513
514 # work out the building and index dirs
515 my $collection_dir = &util::resolve_collection_dir($collect_dir, $qualified_collection, $site);
516 $build_dir = &util::filename_cat($collection_dir, "building") unless (defined $build_dir);
517 $index_dir = &util::filename_cat($collection_dir, "index") unless (defined $index_dir);
518
519 &print_task_msg("Running Collection Activation Stage");
520
521 # get and check the collection name
522 if ((&colcfg::use_collection($site, $qualified_collection, $collect_dir)) eq "") {
523 &print_msg("Unable to use collection \"$qualified_collection\" within \"$collect_dir\"\n");
524 exit -1;
525 }
526
527 # Read in the collection configuration file.
528 # Beware: Only if $site is specified in the cmdline does collectionConfig.xml get
529 # generated and does $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
530 my $gs_mode = "gs2";
531 if ((defined $site) && ($site ne "")) { # GS3
532 $gs_mode = "gs3";
533 }
534 my $collect_cfg_filename = &colcfg::get_collect_cfg_name(STDERR, $gs_mode);
535 my $collectcfg = &colcfg::read_collection_cfg ($collect_cfg_filename,$gs_mode);
536
537 # look for build.cfg/buildConfig.xml
538 my $build_cfg_filename ="";
539
540 if ($gs_mode eq "gs2") {
541 $build_cfg_filename = &util::filename_cat($build_dir,"build.cfg");
542 } else {
543 $build_cfg_filename = &util::filename_cat($build_dir, "buildConfig.xml");
544 # gs_mode is GS3. Set the site now if this was not specified as cmdline argument
545 #$site = "localsite" unless defined $site;
546 }
547
548 # We need to know the buildtype for Solr.
549 # Any change of indexers is already detected and handled by the calling code (buildcol or
550 # full-rebuild), so that at this stage the config file's buildtype reflects the actual buildtype.
551
552 # From buildcol.pl we use searchtype for determining buildtype, but for old versions, use buildtype
553 my $buildtype;
554 if (defined $collectcfg->{'buildtype'}) {
555 $buildtype = $collectcfg->{'buildtype'};
556 } elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'}) {
557 $buildtype = "mgpp";
558 } else {
559 $buildtype = "mg"; #mg is the default
560 }
561
562 # can't do anything without a build directory with something in it to move into index
563 # Except if we're (doing incremental) building for solr, where we want to still
564 # activate and deactivate collections including for the incremental case
565 if(!&util::dir_exists($build_dir)) {
566 &print_msg("No building folder at $build_dir to move to index.\n");
567 exit -1 unless ($buildtype eq "solr"); #&& $incremental);
568 } elsif (&util::is_dir_empty($build_dir)) {
569 &print_msg("Nothing in building folder $build_dir to move into index folder.\n");
570 exit -1 unless ($buildtype eq "solr"); #&& $incremental);
571 }
572
573
574 my $solr_server;
575 my @corenames = ();
576 if($buildtype eq "solr") { # start up the jetty server
577 my $solr_ext = $ENV{'GEXT_SOLR'}; # from solr_passes.pl
578 unshift (@INC, "$solr_ext/perllib");
579 require solrserver;
580
581 # Solr cores are named without taking the collection-group name into account, since solr
582 # is used for GS3 and GS3 doesn't use collection groups but has the site concept instead
583 my ($colname, $colgroup) = &util::get_collection_parts($qualified_collection);
584
585 # See solrbuilder.pm to get the indexing levels (document, section) from the collectcfg file
586 # Used to generate core names from them and remove cores by name
587 foreach my $level ( @{$collectcfg->{'levels'}} ){
588 my ($pindex) = $level =~ /^(.)/;
589 my $indexname = $pindex."idx";
590 push(@corenames, "$site-$colname-$indexname"); #"$site-$colname-didx", "$site-$colname-sidx"
591 }
592
593 # If the Solr/Jetty server is not already running, the following starts
594 # it up, and only returns when the server is "reading and listening"
595 $solr_server = new solrserver($build_dir);
596 $solr_server->start();
597
598 # We'll be moving building to index. For solr collection, there's further
599 # special processing to make a corresponding change to the solr.xml
600 # by removing the temporary building cores and (re)creating the index cores
601 }
602
603 # Now the logic in GLI's CollectionManager.java (processComplete()
604 # and installCollection()) and Gatherer.configGS3Server().
605
606 # 1. Get library URL
607
608 # For web servers that are external to a Greenstone installation,
609 # the user can pass in their web server's library URL.
610 # For web servers included with GS (like tomcat for GS3 and server.exe
611 # and apache for GS2), we work out the library URL:
612 if(!$library_url) {
613 $library_url = &get_library_URL($gs_mode); # returns undef if no server is running
614 }
615
616 # CollectionManager's installCollection phase in GLI
617 # 2. Ping the library URL, and if it's a persistent server and running, release the collection
618
619 my $is_persistent_server = undef;
620 if($library_url) { # undef if no valid server URL
621
622 &print_msg("Pinging $library_url\n");
623 if(&ping_library($library_url, $gs_mode, $site)) { # server running
624
625 # server is running, so release the collection if
626 # the server is persistent and the collection is active
627 &print_msg("Checking if Greenstone server is persistent\n");
628 $is_persistent_server = &is_persistent($library_url, $gs_mode);
629
630 if($is_persistent_server) { # only makes sense to issue activate and deactivate cmds to a persistent server
631
632 &print_msg("Checking if the collection $qualified_collection is already active\n");
633 my $collection_active = &ping_library_collection($library_url, $gs_mode, $qualified_collection, $site);
634
635 if($collection_active) {
636 &print_msg("De-activating collection $qualified_collection\n");
637 &deactivate_collection($library_url, $gs_mode, $qualified_collection, $site);
638 }
639 }
640 }
641 }
642
643 # 3. Do all the moving building to index stuff now
644
645 # If removeold: replace index dir with building dir.
646 # If keepold: move building's contents into index, where only duplicates will get deleted.
647 # removeold and keepold can't both be on at the same time
648 # incremental becomes relevant for solr, though it was irrelevant to what activate.pl does (moving building to index)
649 my $incremental_mode;
650 ($removeold, $keepold, $incremental, $incremental_mode) = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
651 $incremental,
652 $build_dir, # checkdir. Usually archives or export to be deleted. activate.pl deletes building
653 $collectcfg);
654
655 if($removeold) {
656
657 if(&util::dir_exists($index_dir)) {
658 &print_task_msg("Removing \"index\"");
659
660 if ($buildtype eq "solr") {
661 # if solr, remove any cores that are using the index_dir before deleting this dir
662 foreach my $corename (@corenames) {
663 $solr_server->admin_unload_core($corename);
664 }
665 }
666
667 &util::rm_r($index_dir);
668
669 # Wait for a couple of seconds, just for luck
670 sleep 2;
671
672 if (&util::dir_exists($index_dir)) {
673 &print_msg("The index directory $index_dir could not be deleted.\n"); # CollectionManager.Index_Not_Deleted
674 }
675 }
676
677 # if remote GS server: gliserver.pl would call activate.pl to activate
678 # the collection at this point since activate.pl lives on the server side
679
680 if ($buildtype eq "solr") {
681 # if solr, remove any cores that are using the building_dir before moving this dir onto index
682 foreach my $corename (@corenames) {
683 $solr_server->admin_unload_core("building-$corename");
684 }
685 }
686
687 # Move the building directory to become the new index directory
688 &print_task_msg("Moving \"building\" -> \"index\"");
689 &util::mv($build_dir, $index_dir);
690 if(&util::dir_exists($build_dir) || !&util::dir_exists($index_dir)) {
691 &print_msg("Could not move $build_dir to $index_dir.\n"); # CollectionManager.Build_Not_Moved
692 }
693 }
694 elsif ($keepold || $incremental) {
695 if ($buildtype eq "solr") {
696 # if solr, remove any cores that may be using the building_dir before moving this dir onto index
697 foreach my $corename (@corenames) {
698 $solr_server->admin_unload_core("building-$corename") if $solr_server->admin_ping_core("building-$corename");
699 }
700 }
701
702 # Copy just the contents of building dir into the index dir, overwriting
703 # existing files, but don't replace index with building.
704 &print_task_msg("Moving \"building\" -> \"index\"");
705 &util::mv_dir_contents($build_dir, $index_dir);
706 }
707
708 if ($buildtype eq "solr") {
709 # Call CREATE action to get the old cores pointing to the index folder
710 foreach my $corename (@corenames) {
711 if($removeold) {
712 # Call CREATE action to get all cores pointing to the index folder, since building is now index
713 $solr_server->admin_create_core($corename, $index_dir);
714
715 } elsif ($keepold || $incremental) {
716 # Call RELOAD core. Should already be using the index_dir directory for $keepold and $incremental case
717
718 # Ping to see if corename exists, if it does, reload, else create
719 if ($solr_server->admin_ping_core($corename)) {
720 $solr_server->admin_reload_core($corename);
721 } else {
722 $solr_server->admin_create_core($corename, $index_dir);
723 }
724 }
725 }
726
727 # copy the just updated ext/solr/solr.xml to web/ext/solr/solr.xml
728 $solr_server->copy_solrxml_to_web();
729 }
730
731 # 4. Ping the library URL, and if it's a persistent server and running, activate the collection again
732
733 # Check for success: if building does not exist OR is empty
734 if(!&util::dir_exists($build_dir) || &util::is_dir_empty($build_dir)) {
735
736 if($library_url) { # undef if no valid server URL
737
738 &print_msg("Pinging $library_url\n");
739 if(&ping_library($library_url, $gs_mode, $site)) { # server running
740
741 # don't need to work out persistency of server more than once, since the libraryURL hasn't changed
742 if(!defined $is_persistent_server) {
743 &print_msg("Checking if Greenstone server is persistent\n");
744 $is_persistent_server = &is_persistent($library_url, $gs_mode);
745 }
746
747
748 if($is_persistent_server) { # persistent server, so can try activating collection
749
750 &print_msg("Checking if the collection $qualified_collection is not already active\n");
751 my $collection_active = &ping_library_collection($library_url, $gs_mode, $qualified_collection, $site);
752
753 if(!$collection_active) {
754 &print_msg("Activating collection $qualified_collection\n");
755 &activate_collection($library_url, $gs_mode, $qualified_collection, $site);
756
757 # unless an error occurred, the collection should now be active:
758 $collection_active = &ping_library_collection($library_url, $gs_mode, $qualified_collection, $site);
759 if(!$collection_active) {
760 &print_msg("ERROR: collection $qualified_collection did not get activated\n");
761 }
762 }
763 }
764 }
765 }
766 } else { # installcollection failed
767 #CollectionManager.Preview_Ready_Failed
768 &print_msg("Building directory is not empty or still exists. Failed to properly move $build_dir to $index_dir.\n");
769 }
770
771 &print_msg("\n");
772
773 if($buildtype eq "solr") {
774 if ($solr_server->explicitly_started()) {
775 $solr_server->stop();
776 }
777 }
778}
779
780&main(scalar(@ARGV),@ARGV);
Note: See TracBrowser for help on using the repository browser.