source: main/trunk/greenstone2/bin/script/activate.pl@ 31190

Last change on this file since 31190 was 31190, checked in by ak19, 7 years ago

First major commit to do with the new oaiinfo db that keeps track of which oids are deleted. Still need to fix up issue with the new remove and rename subroutines of dbutil's jdbm not being called to clean up *.lg log file associated with main db file. Still need to clean up unused methods in oaiinfo, remove debugging and test agains GS3 incr-build-with-manifest tutorial. (Previous 3 commits were commits, not all related.)

  • Property svn:executable set to *
File size: 16.1 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# activate.pl -- to be called after building a collection to activate it.
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2009 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30# This program is designed to support the building process of Greenstone.
31# It deactivates the collection just built, if the web server is running
32# and is a persistent web server (or if the library_URL provided as
33# parameter to this script is of a currently running web server). It then
34# moves building to index, before activating the collection on the GS2 or
35# GS3 web server again if necessary.
36
37use Config;
38
39BEGIN {
40 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
41 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
42 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
43 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
44
45 # Adding cpan in, adds in its auto subfolder which conflicts with ActivePerl on Windows
46 # The auto folder has been moved into a perl-5.8 folder, and this will now be included
47 # only if the current version of perl is 5.8 (and not ActivePerl).
48 my $perl_dir;
49
50 # Note: $] encodes the version number of perl
51 if ($]>=5.010) {
52 $perl_dir="perl-5.".substr($],3,2);
53 }
54 elsif ($]>5.008) {
55 # perl 5.8.1 or above
56 $perl_dir = "perl-5.8";
57 }
58 elsif ($]>=5.008) {
59 # perl 5.8.1 or above
60 $perl_dir = "perl-5.8";
61 }
62 elsif ($]<5.008) {
63 # assume perl 5.6
64 $perl_dir = "perl-5.6";
65 }
66 else {
67 print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
68 print STDERR " Please upgrade to a newer version of Perl.\n";
69 $perl_dir = "perl-5.8";
70 }
71
72 #if ($ENV{'GSDLOS'} !~ /^windows$/i) {
73 # Use push to put this on the end, so an existing XML::Parser will be used by default
74 if (-d "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir-mt" && $Config{usethreads}){
75 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir-mt");
76 }
77 else{
78 push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
79 }
80 #}
81
82}
83
84
85use strict;
86no strict 'refs'; # allow filehandles to be variables and vice versa
87no strict 'subs'; # allow barewords (eg STDERR) as function arguments
88
89use File::Basename;
90use File::Find;
91
92# Greenstone modules
93use colcfg;
94use oaiinfo;
95use scriptutil;
96use servercontrol;
97use util;
98
99
100# Most of the arguments are familiar from the building scripts like buildcol.pl
101# The special optional argument -library_url is for when we're dealing with a web
102# library server such as an apache that's separate from any included with GS2.
103# In such a case, this script's caller should pass in -library_url <URL>.
104#
105# $site argument must be specified in the cmdline for collectionConfig.xml to get
106# generated which makes $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
107sub main
108{
109 my ($argc,@argv) = @_;
110
111 if (($argc==0) || (($argc==1) && ($argv[0] =~ m/^--?h(elp)?$/))) {
112 my ($progname) = ($0 =~ m/^.*[\/|\\](.*?)$/);
113
114
115 print STDERR "\n";
116 print STDERR "Usage: $progname [-collectdir c -builddir b -indexdir i -site s -skipactivation -removeold -keepold -incremental -verbosity v\n";
117 print STDERR "\t-library_url URL -library_name n] <[colgroup/]collection>\n";
118 print STDERR "\n";
119
120 exit(-1);
121 }
122
123 # http://stackoverflow.com/questions/6156742/how-can-i-capture-the-complete-commandline-in-perl
124 #print STDERR "@@@@@@@@@ ACTIVATE CMD: " . join " ", $0, @ARGV . "\n";
125
126 # get the collection details
127 my $qualified_collection = pop @argv; # qualified collection
128
129 my $collect_dir = undef; #"collect"; # can't be "collect" when only -site is provided for GS3
130 my $build_dir = undef;
131 my $index_dir = undef;
132 my $site = undef;
133
134 # if run from server (java code), it will handle deactivation and activation to prevent open file handles when java launches this script and exits:
135 my $skipactivation = 0;
136 my $removeold = 0;
137 my $keepold = 0;
138 my $incremental = 0; # used by solr
139
140 my $default_verbosity = 2;
141
142 my $library_url = undef; # to be specified on the cmdline if not using a GS-included web server
143 # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
144
145 my $library_name = undef;
146
147 while (my $arg = shift @argv) {
148 if ($arg eq "-collectdir") {
149 $collect_dir = shift @argv;
150 }
151 elsif ($arg eq "-builddir") {
152 $build_dir = shift @argv;
153 }
154 elsif ($arg eq "-indexdir") {
155 $index_dir = shift @argv;
156 }
157 elsif ($arg eq "-site") {
158 $site = shift @argv;
159 }
160 elsif ($arg eq "-skipactivation") {
161 $skipactivation = 1;
162 }
163 elsif ($arg eq "-removeold") {
164 $removeold = 1;
165 }
166 elsif ($arg eq "-keepold") {
167 $keepold = 1;
168 }
169 elsif ($arg eq "-incremental") {
170 $incremental = 1;
171 }
172 elsif ($arg eq "-library_url") {
173 $library_url = shift @argv;
174 }
175 elsif ($arg eq "-library_name") {
176 $library_name = shift @argv;
177 }
178 elsif ($arg eq "-verbosity") {
179 $default_verbosity = shift @argv; # global variable
180
181 # ensure we're working with ints not strings (int context not str context), in case verbosity=0
182 # http://stackoverflow.com/questions/288900/how-can-i-convert-a-string-to-a-number-in-perl
183 $default_verbosity = int($default_verbosity || 0); ### is this the best way?
184 }
185 }
186
187 # work out the building and index dirs
188 my $collection_dir = &util::resolve_collection_dir($collect_dir, $qualified_collection, $site);
189 $build_dir = &FileUtils::filenameConcatenate($collection_dir, "building") unless (defined $build_dir);
190 $index_dir = &FileUtils::filenameConcatenate($collection_dir, "index") unless (defined $index_dir);
191
192 my $gsserver = new servercontrol($qualified_collection, $site, $default_verbosity, $build_dir, $index_dir, $collect_dir, $library_url, $library_name);
193
194 $gsserver->print_task_msg("Running Collection Activation Stage");
195
196 # get and check the collection name
197 if ((&colcfg::use_collection($site, $qualified_collection, $collect_dir)) eq "") {
198 $gsserver->print_msg("Unable to use collection \"$qualified_collection\" within \"$collect_dir\"\n");
199 exit -1;
200 }
201
202 # Read in the collection configuration file.
203 # Beware: Only if $site is specified in the cmdline does collectionConfig.xml get
204 # generated and does $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
205 my $gs_mode = $gsserver->{'gs_mode'}; # "gs2" or "gs3", based on $site variable
206
207 my $collect_cfg_filename = &colcfg::get_collect_cfg_name(STDERR, $gs_mode);
208 my $collectcfg = &colcfg::read_collection_cfg ($collect_cfg_filename,$gs_mode);
209
210 # look for build.cfg/buildConfig.xml
211 my $build_cfg_filename ="";
212
213 if ($gs_mode eq "gs2") {
214 $build_cfg_filename = &FileUtils::filenameConcatenate($build_dir,"build.cfg");
215 } else {
216 $build_cfg_filename = &FileUtils::filenameConcatenate($build_dir, "buildConfig.xml");
217 # gs_mode is GS3. Set the site now if this was not specified as cmdline argument
218 #$site = "localsite" unless defined $site;
219 }
220
221 # We need to know the buildtype for Solr.
222 # Any change of indexers is already detected and handled by the calling code (buildcol or
223 # full-rebuild), so that at this stage the config file's buildtype reflects the actual buildtype.
224
225 # From buildcol.pl we use searchtype for determining buildtype, but for old versions, use buildtype
226 my $buildtype;
227 if (defined $collectcfg->{'buildtype'}) {
228 $buildtype = $collectcfg->{'buildtype'};
229 } elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'}) {
230 $buildtype = "mgpp";
231 } else {
232 $buildtype = "mg"; #mg is the default
233 }
234
235 # can't do anything without a build directory with something in it to move into index
236 # Except if we're (doing incremental) building for solr, where we want to still
237 # activate and deactivate collections including for the incremental case
238
239 if(!$incremental) { # if (!($incremental && ($build_dir eq $index_dir)))
240
241 if(!&FileUtils::directoryExists($build_dir)) {
242 $gsserver->print_msg("No building folder at $build_dir to move to index.\n");
243 exit -1 unless ($buildtype eq "solr"); #&& $incremental);
244 } elsif (&FileUtils::isDirectoryEmpty($build_dir)) {
245 $gsserver->print_msg("Nothing in building folder $build_dir to move into index folder.\n");
246 exit -1 unless ($buildtype eq "solr"); #&& $incremental);
247 }
248 }
249
250 # Now the logic in GLI's CollectionManager.java (processComplete()
251 # and installCollection()) and Gatherer.configGS3Server().
252
253 # 1. Get library URL
254 # CollectionManager's installCollection phase in GLI:
255 # 2. Ping the library URL, and if it's a persistent server and running, release the collection
256 $gsserver->do_deactivate() unless $skipactivation;
257
258 # 2b. If we're working with a solr collection, then start up the solrserver now.
259 my $solr_server;
260 my @corenames = ();
261 if($buildtype eq "solr") { # start up the jetty server
262 my $solr_ext = $ENV{'GEXT_SOLR'}; # from solr_passes.pl
263 unshift (@INC, "$solr_ext/perllib");
264 require solrserver;
265
266 # Solr cores are named without taking the collection-group name into account, since solr
267 # is used for GS3 and GS3 doesn't use collection groups but has the site concept instead
268 my ($colname, $colgroup) = &util::get_collection_parts($qualified_collection);
269
270 # See solrbuilder.pm to get the indexing levels (document, section) from the collectcfg file
271 # Used to generate core names from them and remove cores by name
272 foreach my $level ( @{$collectcfg->{'levels'}} ){
273 my ($pindex) = $level =~ /^(.)/;
274 my $indexname = $pindex."idx";
275 push(@corenames, "$site-$colname-$indexname"); #"$site-$colname-didx", "$site-$colname-sidx"
276 }
277
278 # If the Solr/Jetty server is not already running, the following starts
279 # it up, and only returns when the server is "reading and listening"
280 $solr_server = new solrserver($build_dir);
281 $solr_server->start();
282
283 # We'll be moving building to index. For solr collection, there's further
284 # special processing to make a corresponding change to the solr.xml
285 # by removing the temporary building cores and (re)creating the index cores
286 }
287
288
289 # 3. Do all the moving building to index stuff now
290
291 # If removeold: replace index dir with building dir.
292 # If keepold: move building's contents into index, where only duplicates will get deleted.
293 # removeold and keepold can't both be on at the same time
294 # incremental becomes relevant for solr, though it was irrelevant to what activate.pl does (moving building to index)
295 my $incremental_mode;
296 ($removeold, $keepold, $incremental, $incremental_mode) = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
297 $incremental,
298 $build_dir, # checkdir. Usually archives or export to be deleted. activate.pl deletes building
299 $collectcfg);
300
301 if($removeold) {
302
303 if(&FileUtils::directoryExists($index_dir)) {
304 $gsserver->print_task_msg("Removing \"index\"");
305
306 if ($buildtype eq "solr") {
307 # if solr, remove any cores that are using the index_dir before deleting this dir
308 foreach my $corename (@corenames) {
309 $solr_server->admin_unload_core($corename);
310 }
311 }
312
313 &FileUtils::removeFilesRecursive($index_dir);
314
315 # Wait for a couple of seconds, just for luck
316 sleep 2;
317
318 if (&FileUtils::directoryExists($index_dir)) {
319 $gsserver->print_msg("The index directory $index_dir could not be deleted.\n"); # CollectionManager.Index_Not_Deleted
320 }
321 }
322
323 # if remote GS server: gliserver.pl would call activate.pl to activate
324 # the collection at this point since activate.pl lives on the server side
325
326 if ($buildtype eq "solr") {
327 # if solr, remove any cores that are using the building_dir before moving this dir onto index
328 foreach my $corename (@corenames) {
329 $solr_server->admin_unload_core("building-$corename");
330 }
331 }
332
333 # Move the building directory to become the new index directory
334 $gsserver->print_task_msg("Moving \"building\" -> \"index\"");
335 &FileUtils::moveFiles($build_dir, $index_dir);
336 if(&FileUtils::directoryExists($build_dir) || !&FileUtils::directoryExists($index_dir)) {
337 $gsserver->print_msg("Could not move $build_dir to $index_dir.\n"); # CollectionManager.Build_Not_Moved
338 }
339 }
340 elsif ($keepold || $incremental) {
341 if ($buildtype eq "solr" && $build_dir ne $index_dir) {
342 # if solr, remove any cores that may be using the building_dir before moving this dir onto index
343 foreach my $corename (@corenames) {
344 $solr_server->admin_unload_core("building-$corename") if $solr_server->admin_ping_core("building-$corename");
345 }
346 }
347
348 if($build_dir eq $index_dir) { # building_dir can have been set to "index" folder, see incremental-buildcol.pl
349 $gsserver->print_task_msg("building folder is index folder, not moving");
350 } else {
351 # Copy just the contents of building dir into the index dir, overwriting
352 # existing files, but don't replace index with building.
353 $gsserver->print_task_msg("Moving \"building\" -> \"index\"");
354 &FileUtils::moveDirectoryContents($build_dir, $index_dir);
355 }
356 }
357
358
359 # now we've moved building to index, move tmp oaidb to live oaidb in parallel
360 my $oai_info = new oaiinfo($collect_cfg_filename, $collectcfg->{'infodbtype'});
361 $oai_info->activate_collection();
362
363
364 if ($buildtype eq "solr") {
365 # Call CREATE action to get the old cores pointing to the index folder
366 foreach my $corename (@corenames) {
367 if($removeold) {
368 # Call CREATE action to get all cores pointing to the index folder, since building is now index
369 $solr_server->admin_create_core($corename, $index_dir);
370
371 } elsif ($keepold || $incremental) {
372 # Call RELOAD core. Should already be using the index_dir directory for $keepold and $incremental case
373
374 # Ping to see if corename exists, if it does, reload, else create
375 if ($solr_server->admin_ping_core($corename)) {
376 $solr_server->admin_reload_core($corename);
377 } else {
378 $solr_server->admin_create_core($corename, $index_dir);
379 }
380 }
381 }
382
383 # regenerate the solr.xml.in from solr.xml in case we are working off a dvd.
384 $solr_server->solr_xml_to_solr_xml_in();
385 }
386
387
388 # 4. Ping the library URL, and if it's a persistent server and running, activate the collection again
389
390 # Check for success: if building does not exist OR is empty OR if building is index (in which case there was no move)
391 if($build_dir eq $index_dir || !&FileUtils::directoryExists($build_dir) || &FileUtils::isDirectoryEmpty($build_dir)) {
392
393 $gsserver->do_activate() unless $skipactivation;
394
395 } else { # installcollection failed
396 #CollectionManager.Preview_Ready_Failed
397 $gsserver->print_msg("Building directory is not empty or still exists. Failed to properly move $build_dir to $index_dir.\n");
398 }
399
400 $gsserver->print_msg("\n");
401
402 if($buildtype eq "solr") {
403 if ($solr_server->explicitly_started()) {
404 $solr_server->stop();
405 }
406 }
407}
408
409&main(scalar(@ARGV),@ARGV);
Note: See TracBrowser for help on using the repository browser.