source: main/trunk/greenstone2/bin/script/activate.pl@ 35233

Last change on this file since 35233 was 35233, checked in by anupama, 3 years ago

Extending changes of commits 35213-35215 as they broke diffcol on Linux (yet strangely not on Windows): diffcol exports empty GSDL3SRCHOME (and GSDLHOME) variable. This ends up as defined but with no value. After commit revisions 35213-35215 the import, buildcol (and activate) scripts test whether these variables are defined (which they are even for GS2 when diffcol runs) and then sets the site variable to localsite. Need to test these variables aren't just defined but also not empty string before setting site to localsite, in order for diffcol testing, which runs on GS2 installs, to still work.

  • Property svn:executable set to *
File size: 16.8 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# activate.pl -- to be called after building a collection to activate it.
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2009 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30# This program is designed to support the building process of Greenstone.
31# It deactivates the collection just built, if the web server is running
32# and is a persistent web server (or if the library_URL provided as
33# parameter to this script is of a currently running web server). It then
34# moves building to index, before activating the collection on the GS2 or
35# GS3 web server again if necessary.
36
37use Config;
38
39BEGIN {
40 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
41 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
42 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
43 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
44
45 # Adding cpan in, adds in its auto subfolder which conflicts with ActivePerl on Windows
46 # The auto folder has been moved into a perl-5.8 folder, and this will now be included
47 # only if the current version of perl is 5.8 (and not ActivePerl).
48 my $perl_dir;
49
50 # Note: $] encodes the version number of perl
51 if ($]>=5.010) {
52 $perl_dir="perl-5.".substr($],3,2);
53 }
54 elsif ($]>5.008) {
55 # perl 5.8.1 or above
56 $perl_dir = "perl-5.8";
57 }
58 elsif ($]>=5.008) {
59 # perl 5.8.1 or above
60 $perl_dir = "perl-5.8";
61 }
62 elsif ($]<5.008) {
63 # assume perl 5.6
64 $perl_dir = "perl-5.6";
65 }
66 else {
67 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
68 print STDERR " Please upgrade to a newer version of Perl.\n";
69 $perl_dir = "perl-5.8";
70 }
71
72 #if ($ENV{'GSDLOS'} !~ /^windows$/i) {
73 # Use push to put this on the end, so an existing XML::Parser will be used by default
74 if (-d "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir-mt" && $Config{usethreads}){
75 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir-mt");
76 }
77 else{
78 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
79 }
80 #}
81
82}
83
84
85use strict;
86no strict 'refs'; # allow filehandles to be variables and vice versa
87no strict 'subs'; # allow barewords (eg STDERR) as function arguments
88
89use File::Basename;
90use File::Find;
91
92# Greenstone modules
93use colcfg;
94use oaiinfo;
95use scriptutil;
96use servercontrol;
97use util;
98
99
100# Most of the arguments are familiar from the building scripts like buildcol.pl
101# The special optional argument -library_url is for when we're dealing with a web
102# library server such as an apache that's separate from any included with GS2.
103# In such a case, this script's caller should pass in -library_url <URL>.
104#
105# $site argument must be specified in the cmdline for collectionConfig.xml to get
106# generated which makes $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
107sub main
108{
109 my ($argc,@argv) = @_;
110
111 if (($argc==0) || (($argc==1) && ($argv[0] =~ m/^--?h(elp)?$/))) {
112 my ($progname) = ($0 =~ m/^.*[\/|\\](.*?)$/);
113
114
115 print STDERR "\n";
116 print STDERR "Usage: $progname [-collectdir c -builddir b -indexdir i -site s -skipactivation -removeold -keepold -incremental -verbosity v\n";
117 print STDERR "\t-library_url URL -library_name n] <[colgroup/]collection>\n";
118 print STDERR "\n";
119
120 exit(-1);
121 }
122
123 # http://stackoverflow.com/questions/6156742/how-can-i-capture-the-complete-commandline-in-perl
124 #print STDERR "@@@@@@@@@ ACTIVATE CMD: " . join " ", $0, @ARGV . "\n";
125
126 # get the collection details
127 my $qualified_collection = pop @argv; # qualified collection
128
129 my $collect_dir = undef; #"collect"; # can't be "collect" when only -site is provided for GS3
130 my $build_dir = undef;
131 my $index_dir = undef;
132 my $site = undef;
133
134 # if run from server (java code), it will handle deactivation and activation to prevent open file handles when java launches this script and exits:
135 my $skipactivation = 0;
136 my $removeold = 0;
137 my $keepold = 0;
138 my $incremental = 0; # used by solr
139
140 my $default_verbosity = 2;
141
142 my $library_url = undef; # to be specified on the cmdline if not using a GS-included web server
143 # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
144
145 my $library_name = undef;
146
147 while (my $arg = shift @argv) {
148 if ($arg eq "-collectdir") {
149 $collect_dir = shift @argv;
150 }
151 elsif ($arg eq "-builddir") {
152 $build_dir = shift @argv;
153 }
154 elsif ($arg eq "-indexdir") {
155 $index_dir = shift @argv;
156 }
157 elsif ($arg eq "-site") {
158 $site = shift @argv;
159 }
160 elsif ($arg eq "-skipactivation") {
161 $skipactivation = 1;
162 }
163 elsif ($arg eq "-removeold") {
164 $removeold = 1;
165 }
166 elsif ($arg eq "-keepold") {
167 $keepold = 1;
168 }
169 elsif ($arg eq "-incremental") {
170 $incremental = 1;
171 }
172 elsif ($arg eq "-library_url") {
173 $library_url = shift @argv;
174 }
175 elsif ($arg eq "-library_name") {
176 $library_name = shift @argv;
177 }
178 elsif ($arg eq "-verbosity") {
179 $default_verbosity = shift @argv; # global variable
180
181 # ensure we're working with ints not strings (int context not str context), in case verbosity=0
182 # http://stackoverflow.com/questions/288900/how-can-i-convert-a-string-to-a-number-in-perl
183 $default_verbosity = int($default_verbosity || 0); ### is this the best way?
184 }
185 }
186
187 ## shortcut - if gsdl3srchome is set, and site is not set, set site to localsite
188 ## But need to check gsdl3srchome not empty even if defined, as that can
189 ## happen when nightly diffcol is run and doesn't mean it's a GS3 install
190 if (defined $ENV{'GSDL3SRCHOME'} && $ENV{'GSDL3SRCHOME'} ne "") {
191 if (!defined $site || $site eq "") {
192 $site = "localsite";
193 print STDERR "setting site to localsite as GSDL3SRCHOME was set\n";
194 }
195 }
196 # work out the building and index dirs
197 my $collection_dir = &util::resolve_collection_dir($collect_dir, $qualified_collection, $site);
198 $build_dir = &FileUtils::filenameConcatenate($collection_dir, "building") unless (defined $build_dir);
199 $index_dir = &FileUtils::filenameConcatenate($collection_dir, "index") unless (defined $index_dir);
200 my $solr_conf_dir = &FileUtils::filenameConcatenate($collection_dir, "etc", "conf");
201
202 my $gsserver = new servercontrol($qualified_collection, $site, $default_verbosity, $build_dir, $index_dir, $collect_dir, $library_url, $library_name);
203
204 $gsserver->print_task_msg("Running Collection Activation Stage");
205
206 # get and check the collection name
207 if ((&colcfg::use_collection($site, $qualified_collection, $collect_dir)) eq "") {
208 $gsserver->print_msg("Unable to use collection \"$qualified_collection\" within \"$collect_dir\"\n");
209 exit -1;
210 }
211
212 # Read in the collection configuration file.
213 # Beware: Only if $site is specified in the cmdline does collectionConfig.xml get
214 # generated and does $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
215 my $gs_mode = $gsserver->{'gs_mode'}; # "gs2" or "gs3", based on $site variable
216
217 my $collect_cfg_filename = &colcfg::get_collect_cfg_name(STDERR, $gs_mode);
218 my $collectcfg = &colcfg::read_collection_cfg ($collect_cfg_filename,$gs_mode);
219
220 # look for build.cfg/buildConfig.xml
221 my $build_cfg_filename ="";
222
223 if ($gs_mode eq "gs2") {
224 $build_cfg_filename = &FileUtils::filenameConcatenate($build_dir,"build.cfg");
225 } else {
226 $build_cfg_filename = &FileUtils::filenameConcatenate($build_dir, "buildConfig.xml");
227 # gs_mode is GS3. Set the site now if this was not specified as cmdline argument
228 #$site = "localsite" unless defined $site;
229 }
230
231 # We need to know the buildtype for Solr.
232 # Any change of indexers is already detected and handled by the calling code (buildcol or
233 # full-rebuild), so that at this stage the config file's buildtype reflects the actual buildtype.
234
235 # From buildcol.pl we use searchtype for determining buildtype, but for old versions, use buildtype
236 my $buildtype;
237 if (defined $collectcfg->{'buildtype'}) {
238 $buildtype = $collectcfg->{'buildtype'};
239 } elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'}) {
240 $buildtype = "mgpp";
241 } else {
242 $buildtype = "mg"; #mg is the default
243 }
244
245 # can't do anything without a build directory with something in it to move into index
246 # Except if we're (doing incremental) building for solr, where we want to still
247 # activate and deactivate collections including for the incremental case
248
249 if(!$incremental) { # if (!($incremental && ($build_dir eq $index_dir)))
250
251 if(!&FileUtils::directoryExists($build_dir)) {
252 $gsserver->print_msg("No building folder at $build_dir to move to index.\n");
253 exit -1 unless ($buildtype eq "solr"); #&& $incremental);
254 } elsif (&FileUtils::isDirectoryEmpty($build_dir)) {
255 $gsserver->print_msg("Nothing in building folder $build_dir to move into index folder.\n");
256 exit -1 unless ($buildtype eq "solr"); #&& $incremental);
257 }
258 }
259
260 # Now the logic in GLI's CollectionManager.java (processComplete()
261 # and installCollection()) and Gatherer.configGS3Server().
262
263 # 1. Get library URL
264 # CollectionManager's installCollection phase in GLI:
265 # 2. Ping the library URL, and if it's a persistent server and running, release the collection
266 $gsserver->do_deactivate() unless $skipactivation;
267
268 # solr core reloading - previously for incremental building, the solr core is unloaded and created
269 # - why is this? Does this actually need to happen? The following assumes that we *do* want to
270 # unload and create the core even if incremental building
271
272 # solr deactivation. Are we a solr collection? unload any cores that are
273 # present for this collection
274 # If we are not a solr collection, is there etc/solr/conf?
275 # if so, we were a solr collection previously, need to unload cores.
276 # in both cases, we attempt to unload didx and sidx cores, as well
277 # as both building cores
278
279 my $collection_was_previously_solr = 0;
280 if ($buildtype ne "solr" && FileUtils::directoryExists($solr_conf_dir)) {
281 $collection_was_previously_solr = 1;
282 }
283
284 # 2b. If we're working with a solr collection, then start up the solrserver now and unload
285 # any cores that may exist for this collection (building and index cores)
286 my $solr_server;
287 my @corenames = ();
288 my @all_cores = ();
289 my $core_basename = "";
290
291 # for unloading, lets try unloading both didx and sidx as these may have
292 # both been there previously, and if the user removed one in the config file
293 # we won't know that.
294 if($buildtype eq "solr" || $collection_was_previously_solr) {
295
296 my $solr_ext = $ENV{'GEXT_SOLR'}; # from solr_passes.pl
297 unshift (@INC, "$solr_ext/perllib");
298 require solrserver;
299
300 # Solr cores are named without taking the collection-group name into account, since solr
301 # is used for GS3 and GS3 doesn't use collection groups but has the site concept instead
302 my ($colname, $colgroup) = &util::get_collection_parts($qualified_collection);
303 $core_basename = "$site-$colname";
304 # all_cores contains both sidx and didx
305 @all_cores = ("$core_basename-didx", "$core_basename-sidx");
306
307 # If the Solr server is not already running, the following starts
308 # it up, and only returns when the server is "ready and listening"
309 $solr_server = new solrserver($build_dir);
310 $solr_server->start();
311
312 # unload all the possible cores
313 foreach my $corename (@all_cores) {
314 $solr_server->admin_unload_core("$corename") if ($solr_server->admin_ping_core("$corename"));
315 $solr_server->admin_unload_core("building-$corename") if ($solr_server->admin_ping_core("building-$corename"));
316 }
317
318 # tidy up other files if we were solr but are not anymore
319 if ($collection_was_previously_solr) {
320 # regenerate the solr.xml.in file to reflect the changes to cores
321 $solr_server->solr_xml_to_solr_xml_in();
322 # we are finished with solrserver now, stop it if necessary
323 if ($solr_server->explicitly_started()) {
324 $solr_server->stop();
325 }
326 # lets delete the conf folder as we are no longer solr
327 &FileUtils::removeFilesRecursive($solr_conf_dir);
328 }
329
330 }
331
332 # 3. Do all the moving building to index stuff now
333
334 # If removeold: replace index dir with building dir.
335 # If keepold: move building's contents into index, where only duplicates will get deleted.
336 # removeold and keepold can't both be on at the same time
337 # incremental becomes relevant for solr, though it was irrelevant to what activate.pl does
338 # (moving building to index)
339 my $incremental_mode;
340 ($removeold, $keepold, $incremental, $incremental_mode) = &scriptutil::check_removeold_and_keepold($removeold, $keepold, $incremental, $index_dir, $collectcfg);
341
342 if($removeold) {
343
344 if(&FileUtils::directoryExists($index_dir)) {
345 $gsserver->print_task_msg("Removing \"index\"");
346
347 &FileUtils::removeFilesRecursive($index_dir);
348
349 # Wait for a couple of seconds, just for luck
350 sleep 2;
351
352 if (&FileUtils::directoryExists($index_dir)) {
353 $gsserver->print_msg("The index directory $index_dir could not be deleted.\n"); # CollectionManager.Index_Not_Deleted
354 }
355 }
356
357 # Move the building directory to become the new index directory
358 $gsserver->print_task_msg("Moving \"building\" -> \"index\"");
359 &FileUtils::moveFiles($build_dir, $index_dir);
360 if(&FileUtils::directoryExists($build_dir) || !&FileUtils::directoryExists($index_dir)) {
361 $gsserver->print_msg("Could not move $build_dir to $index_dir.\n"); # CollectionManager.Build_Not_Moved
362 }
363 }
364
365 elsif ($keepold || $incremental) {
366
367 if($build_dir eq $index_dir) { # building_dir can have been set to "index" folder, see incremental-buildcol.pl
368 $gsserver->print_task_msg("building folder is index folder, not moving");
369 } else {
370 # Copy just the contents of building dir into the index dir, overwriting
371 # existing files, but don't replace index with building.
372 $gsserver->print_task_msg("Moving \"building\" -> \"index\"");
373 &FileUtils::moveDirectoryContents($build_dir, $index_dir);
374 }
375 }
376
377
378 # now we've moved building to index, move tmp oaidb to live oaidb in parallel
379 my $oai_info = new oaiinfo($collect_cfg_filename, $collectcfg->{'infodbtype'}, $default_verbosity);
380 $oai_info->activate_collection();
381
382
383 if ($buildtype eq "solr") {
384 # Call CREATE action to get the old cores pointing to the index folder
385 # -- any building or index cores have been unloaded already
386 # -- load up the new one
387
388 # generate actual core names
389 foreach my $level ( @{$collectcfg->{'levels'}} ){
390 my ($pindex) = $level =~ /^(.)/;
391 my $indexname = $pindex."idx";
392 push(@corenames, "$core_basename-$indexname");
393 }
394
395 foreach my $corename (@corenames) {
396 $solr_server->admin_create_core($corename, $index_dir);
397 }
398
399 # regenerate the solr.xml.in from solr.xml in case we are working off a dvd.
400 $solr_server->solr_xml_to_solr_xml_in();
401 # stop the server if necessary
402 if ($solr_server->explicitly_started()) {
403 $solr_server->stop();
404 }
405
406 }
407
408
409 # 4. Ping the library URL, and if it's a persistent server and running, activate the collection again
410
411 # Check for success: if building does not exist OR is empty OR if building is index (in which case there was no move)
412 if($build_dir eq $index_dir || !&FileUtils::directoryExists($build_dir) || &FileUtils::isDirectoryEmpty($build_dir)) {
413
414 $gsserver->do_activate() unless $skipactivation;
415
416 } else { # installcollection failed
417 #CollectionManager.Preview_Ready_Failed
418 $gsserver->print_msg("Building directory is not empty or still exists. Failed to properly move $build_dir to $index_dir.\n");
419 }
420
421 $gsserver->print_msg("\n");
422
423}
424
425&main(scalar(@ARGV),@ARGV);
Note: See TracBrowser for help on using the repository browser.