root/main/trunk/greenstone2/bin/script/activate.pl @ 29948

Revision 29948, 32.3 KB (checked in by ak19, 4 years ago)

Final of two commits to fix rebuilding a collection using the online editor on Windows. Resolves the file lock problem. The changes to the java code from the previous commit work with changes in activate.pl in this commit. activate.pl is passed a flag now so that it no longer de-activates and re-activates the collection itself, but just concerns itself with moving building to index. The GS2Construct java code now de-activates the collection prior to calling activate.pl and then re-activates it afterward. In the way it was done before, activate.pl used to handle de- and re-activating the collection. But when it was launched from the server java code, the java VM would exit having left a copy of the file handles to the perl process when forking the process for activate.pl. The perl code could not move building to index since the file handles had locks (6 of them) on the index/text/collection.gdb. Changes have been made to GS2PerlConstructor too, so that it more cleanly closes all the pipes of a process, that the process itself may thereby exit cleanly. Not yet able to move this properly into its own classes since the StreamGobbler? classes in GLI are not quite suited but were customised for FormatConverter?. Further changes to activate.pl are for better error reporting.

  • Property svn:executable set to *
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# activate.pl -- to be called after building a collection to activate it.
6#
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 2009 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29
30# This program is designed to support the building process of Greenstone.
31# It deactivates the collection just built, if the web server is running
32# and is a persistent web server (or if the library_URL provided as
33# parameter to this script is of a currently running web server). It then
34# moves building to index, before activating the collection on the GS2 or
35# GS3 web server again if necessary.
36
37use Config;
38
39BEGIN {
40    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
41    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
42    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
43    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
44   
45    # Adding cpan in, adds in its auto subfolder which conflicts with ActivePerl on Windows
46    # The auto folder has been moved into a perl-5.8 folder, and this will now be included
47    # only if the current version of perl is 5.8 (and not ActivePerl).
48    my $perl_dir;
49
50    # Note: $] encodes the version number of perl
51    if ($]>=5.010) {
52        $perl_dir="perl-5.".substr($],3,2);
53    }
54    elsif ($]>5.008) {
55    # perl 5.8.1 or above
56    $perl_dir = "perl-5.8";
57    }
58    elsif ($]>=5.008) {
59    # perl 5.8.1 or above
60    $perl_dir = "perl-5.8";
61    }
62    elsif ($]<5.008) {
63    # assume perl 5.6
64    $perl_dir = "perl-5.6";
65    }
66    else {
67    print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
68    print STDERR "         Please upgrade to a newer version of Perl.\n";
69    $perl_dir = "perl-5.8";
70    }
71
72    #if ($ENV{'GSDLOS'} !~ /^windows$/i) {
73        # Use push to put this on the end, so an existing XML::Parser will be used by default
74        if (-d "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir-mt" && $Config{usethreads}){
75            push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir-mt");
76        }
77        else{
78            push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
79        }
80    #}
81   
82}
83
84
85use strict;
86no strict 'refs'; # allow filehandles to be variables and vice versa
87no strict 'subs'; # allow barewords (eg STDERR) as function arguments
88
89use File::Basename;
90use File::Find;
91
92use HTTP::Response;
93use LWP::Simple qw($ua !head); # import useragent object as $ua from the full LWP to use along with LWP::Simple
94        # don't import LWP::Simple's head function by name since it can conflict with CGI:head())           
95#use CGI qw(:standard);  # then only CGI.pm defines a head()
96use Net::Ping;
97use URI;
98
99use colcfg;
100use scriptutil;
101use util;
102#use enum;
103
104# enumerations in perl, http://stackoverflow.com/questions/473666/does-perl-have-an-enumeration-type
105# Unfortunately, not part of perl's core
106#use enum qw(LEVEL_NONE LEVEL_ERROR LEVEL_INFO LEVEL_DEBUG); # debugging levels NONE == 0, ERROR=1 INFO=2 DEBUG=3
107
108# global variables
109#my $default_verbosity = LEVEL_ERROR; # by default we display basic error messages
110
111my $default_verbosity = 2; # by default we display basic error and info messages
112
113sub print_task_msg {
114    my ($task_msg, $verbosity_setting) = @_;
115   
116    $verbosity_setting = $default_verbosity unless $verbosity_setting;
117    #$verbosity_setting = 1 unless defined $verbosity;
118    if($verbosity_setting >= 1) {
119        print STDERR "\n";
120        print STDERR "************************\n";
121        print STDERR "* $task_msg\n";
122        print STDERR "************************\n";
123    }
124}
125
126# Prints messages if the verbosity is right. Does not add new lines.
127sub print_msg {
128    my ($msg, $min_verbosity, $verbosity_setting) = @_;
129   
130    # only display error messages if the current
131    # verbosity setting >= the minimum verbosity level
132    # needed for that message to be displayed.
133   
134    $verbosity_setting = $default_verbosity unless defined $verbosity_setting;
135    $min_verbosity = 1 unless defined $min_verbosity;
136    if($verbosity_setting >= $min_verbosity) { # by default display all 1 messages
137        print STDERR "$msg";
138    }
139}
140
141# Method to send a command to a GS2 or GS3 library_URL
142# the commands used in this script can be activate, deactivate, ping,
143# and is-persistent (is-persistent only implemented for GS2).
144sub config {
145    my ($library_url, $command, $check_message_against_regex, $site, $expected_error_code, $silent) = @_;
146            # Gatherer.java's configGS3Server doesn't use the site variable
147            # so we don't have to either
148   
149    # for GS2, getting the HTTP status isn't enough, we need to read the output
150    # since this is what CollectionManager.config() stipulates.
151    # Using LWP::UserAgent::get($url) for this 
152   
153    if(!defined $library_url) {
154        return 0;
155    }
156    else {
157        $ua->timeout(5); # set LWP useragent to 5s max timeout for testing the URL
158            # Need to set this, else it takes I don't know how long to timeout
159            # http://www.perlmonks.org/?node_id=618534
160       
161        # http://search.cpan.org/~gaas/libwww-perl-6.04/lib/LWP/UserAgent.pm
162        # use LWP::UserAgent's get($url) since it returns an HTTP::Response code
163       
164        my $response_obj = $ua->get( $library_url.$command);
165       
166        # $response_obj->content stores the content and $response_obj->code the HTTP response code
167        my $response_code = $response_obj->code();
168       
169        if(LWP::Simple::is_success($response_code)) {# $response_code eq RC_OK) { # LWP::Simple::is_success($response_code)
170            &print_msg("*** Command $library_url$command\n", 3);
171            &print_msg("*** HTTP Response Status: $response_code - Complete.", 3);
172           
173            # check the page content is as expected
174            my $response_content = $response_obj->content;
175            my $resultstr = $response_content;
176            $resultstr =~ s@.*gs_content\"\>@@s;       
177            $resultstr =~ s@</div>.*@@s;
178           
179            if($response_content =~ m/$check_message_against_regex/) {
180                &print_msg(" Response as expected.\n", 3);
181                &print_msg("@@@@@@ Got result:\n$resultstr\n", 4);
182                return 1;
183            } else {
184                # if we expect the collection to be inactive, then we'd be in silent mode: if so,
185                # don't print out the "ping did not succeed" response, but print out any other messages
186
187                # So we only suppress the ping col "did not succeed" response if we're in silent mode
188                # But if any message other than ping "did not succeed" is returned, we always print it
189                if($response_content !~ m/did not succeed/ || !$silent) {
190                &print_msg("\n\tBUT: command $library_url$command response UNEXPECTED.\n", 3);
191                &print_msg("*** Got message:\n$response_content.\n", 4);
192                &print_msg("*** Got result:\n$resultstr\n", 3);
193                }
194                return 0; # ping on a collection may "not succeed."
195            }
196        }
197        elsif(LWP::Simple::is_error($response_code)) { # method exported by LWP::Simple, along with HTTP::Status constants
198            # check the page content is as expected
199            if(defined $expected_error_code && $response_code == $expected_error_code) {
200            &print_msg(" Response status $response_code as expected.\n", 3);
201            } else {
202            &print_msg("*** Command $library_url$command\n");
203            &print_msg("*** Unexpected error. HTTP Response Status: $response_code - Failed.\n");
204            }
205            return 0; # return false, since the response_code was an error, expected or not
206        }
207        else {
208            &print_msg("*** Command $library_url$command\n");
209            &print_msg("*** Unexpected error. HTTP Response Status: $response_code - Failed.\n");
210            return 0;
211        }
212    }   
213}
214
215sub deactivate_collection {
216    my ($library_url, $gs_mode, $qualified_collection, $site) = @_;
217   
218    if($gs_mode eq "gs2") {
219        my $DEACTIVATE_COMMAND = "?a=config&cmd=release-collection&c=";
220        my $check_message_against_regex = q/configured release-collection/;
221        config($library_url, $DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
222    }
223    elsif ($gs_mode eq "gs3") {
224        my $DEACTIVATE_COMMAND = "?a=s&sa=d&st=collection&sn=";
225        my $check_message_against_regex = "collection: $qualified_collection deactivated";
226        config($library_url, $DEACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, $site);
227    }   
228}
229
230sub activate_collection {
231    my ($library_url, $gs_mode, $qualified_collection, $site) = @_;
232   
233    if($gs_mode eq "gs2") {
234        my $ACTIVATE_COMMAND = "?a=config&cmd=add-collection&c=";
235        my $check_message_against_regex = q/configured add-collection/;
236        config($library_url, $ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex);
237    }
238    elsif ($gs_mode eq "gs3") {
239        my $ACTIVATE_COMMAND = "?a=s&sa=a&st=collection&sn=";
240        my $check_message_against_regex = "collection: $qualified_collection activated";
241        config($library_url, $ACTIVATE_COMMAND.$qualified_collection, $check_message_against_regex, $site);
242    }   
243}
244
245sub ping {
246    my ($library_url, $command, $gs_mode, $site, $silent) = @_;
247
248    # If the GS server is not running, we *expect* to see a "500" status code.
249    # If the GS server is running, then "Ping" ... "succeeded" is expected on success.
250    # When pinging an inactive collection, it will say it did "not succeed". This is
251    # a message of interest to return.
252    my $check_responsemsg_against_regex = q/(succeeded)/;
253    my $expected_error_code = 500;
254   
255    &print_msg("*** COMMAND WAS: |$command|***\n", 4);
256   
257    return config($library_url, $command, $check_responsemsg_against_regex, $site, $expected_error_code, $silent);
258}
259
260# send a pingaction to the GS library. General server-level ping.
261sub ping_library {
262    my ($library_url, $gs_mode, $site) = @_;
263   
264    my $command = "";
265    if($gs_mode eq "gs2") {     
266        $command = "?a=ping";       
267    }
268    elsif ($gs_mode eq "gs3") {     
269        $command = "?a=s&sa=ping";
270    }
271    return &ping($library_url, $command, $gs_mode, $site);
272}
273
274
275# send a pingaction to a collection in GS library to check if it's active
276sub ping_library_collection {
277    my ($library_url, $gs_mode, $qualified_collection, $site, $silent) = @_;
278   
279    my $command = "";
280    if($gs_mode eq "gs2") {     
281        $command = "?a=ping&c=$qualified_collection";
282    }
283    elsif ($gs_mode eq "gs3") {     
284        $command = "?a=s&sa=ping&st=collection&sn=$qualified_collection";       
285    }
286    return &ping($library_url, $command, $gs_mode, $site, $silent);
287}
288
289# return true if server is persistent, by calling is-persistent on library_url
290# this is only for GS2, since the GS3 server is always persistent
291sub is_persistent {
292    my ($library_url, $gs_mode) = @_;
293   
294    if($gs_mode eq "gs3") { # GS3 server is always persistent
295        return 1;
296    }
297   
298    my $command = "?a=is-persistent";   
299    my $check_responsemsg_against_regex = q/true/;  # isPersistent: true versus isPersistent: false     
300    return config($library_url, $command, $check_responsemsg_against_regex);
301}
302   
303sub get_library_URL {
304    my $gs_mode = shift(@_); # gs3 or gs2
305   
306    # If we get here, we are dealing with a server included with GS.
307    # For GS3, we ask ant for the library URL.
308    # For GS2, we derive the URL from the llssite.cfg file.
309   
310    my $url = undef;   
311   
312    if($gs_mode eq "gs2") {     
313        my $llssite_cfg = &FileUtils::filenameConcatenate($ENV{'GSDLHOME'}, "llssite.cfg");
314       
315        if(-f $llssite_cfg) {
316            # check llssite.cfg for line with url property
317            # for server.exe also need to use portnumber and enterlib properties           
318           
319            # Read in the entire contents of the file in one hit
320            if (!open (FIN, $llssite_cfg)) {
321                &print_msg("activate.pl::get_library_URL failed to open $llssite_cfg ($!)\n");
322                return undef;
323            }
324           
325            my $contents;
326            sysread(FIN, $contents, -s FIN);           
327            close(FIN);
328           
329            my @lines = split(/[\n\r]+/, $contents); # split on carriage-returns and/or linefeeds
330            my $enterlib = "";
331            my $portnumber = ""; # will remain empty (implicit port 80) unless it's specifically been assigned
332           
333            foreach my $line (@lines) {             
334                if($line =~ m/^url=(.*)$/) {
335                    $url = $1;                 
336                } elsif($line =~ m/^enterlib=(.*)$/) {
337                    $enterlib = $1;                 
338                } elsif($line =~ m/^portnumber=(.*)$/) {
339                    $portnumber = $1;                   
340                }   
341            }
342           
343            if(!$url) {
344                return undef;
345            }
346            elsif($url eq "URL_pending") { # library is not running
347                # do not process url=URL_pending in the file, since for server.exe
348                # this just means the Enter Library button hasn't been pressed yet             
349                $url = undef;
350            }
351            else {
352                # In the case of server.exe, need to do extra work to get the proper URL
353                # But first, need to know whether we're indeed dealing with server.exe:
354               
355                # compare the URL's domain to the full URL
356                # E.g. for http://localhost:8383/greenstone3/cgi-bin, the domain is localhost:8383
357                my $uri = URI->new( $url );
358                my $host = $uri->host;
359                #print STDERR "@@@@@ host: $host\n";
360                if($url =~ m/http:\/\/$host(\/)?$/) {
361                    #if($url !~ m/http:\/\/$host:$portnumber(\/)?/ || $url =~ m/http:\/\/$host(\/)?$/) {
362                    # (if the URL does not contain the portnumber, OR if the port is implicitly 80 and)                 
363                    # If the domain with http:// prefix is completely the same as the URL, assume server.exe
364                    # then the actual URL is the result of suffixing the port and enterlib properties in llssite.cfg
365                    $url = $url.":".$portnumber.$enterlib;         
366                } # else, apache web server         
367               
368            }           
369        }
370    } elsif($gs_mode eq "gs3") {
371        # Either check build.properties for tomcat.server, tomcat.port and app.name (and default servlet name).
372        # app.name is stored in app.path by build.xml. Need to move app.name in build.properties from build.xml
373       
374        # Or, run the new target get-default-servlet-url
375        # the output can look like:
376        #
377        # Buildfile: build.xml
378        #   [echo] os.name: Windows Vista
379        #
380        # get-default-servlet-url:
381        #   [echo] http://localhost:8383/greenstone3/library
382        # BUILD SUCCESSFUL
383        # Total time: 0 seconds
384       
385        #my $output = qx/ant get-default-servlet-url/; # backtick operator, to get STDOUT (else 2>&1)
386            # see http://stackoverflow.com/questions/799968/whats-the-difference-between-perls-backticks-system-and-exec
387       
388            # The get-default-servlet-url ant target can be run from anywhere by specifying the
389            # location of GS3's ant build.xml buildfile. Activate.pl can be run from anywhere for GS3
390            # GSDL3SRCHOME will be set for GS3 by gs3-setup.sh, a step that would have been necessary
391            # to run the activate.pl script in the first place
392        my $perl_command = "ant -buildfile \"$ENV{'GSDL3SRCHOME'}/build.xml\" get-default-servlet-url";
393       
394        if (open(PIN, "$perl_command |")) {
395            while (defined (my $perl_output_line = <PIN>)) {
396                if($perl_output_line =~ m@http:\/\/(\S*)@) { # grab all the non-whitespace chars
397                    $url="http://".$1;
398                }
399            }
400            close(PIN);
401        } else {
402            &print_msg("activate.pl::get_library_URL: Failed to run $perl_command to work out library URL for $gs_mode\n");
403        }       
404    }
405   
406    # either the url is still undef or it is now set
407    #print STDERR "\n@@@@@ final URL:|$url|\n" if $url;     
408    #print STDERR "\n@@@@@ URL still undef\n" if !$url;
409    return $url;
410}
411
412### UNUSED METHODS TO MOVE TO util.pm?
413
414# This method is now unused. Using ping_library instead to send the ping action to a
415# GS2/GS3 server. This method can be used more generally to test whether a URL is alive.
416# http://search.cpan.org/dist/libwww-perl/lib/LWP/Simple.pm
417# and http://www.perlmonks.org/?node_id=618534
418sub is_URL_active {
419    my $url = shift(@_); # gs3 or gs2 URL   
420   
421    my $status = 0;
422    if(defined $url) {
423        $ua->timeout(10); # set LWP useragent to 5s max timeout for testing the URL
424            # Need to set this, else it takes I don't know how long to timeout
425            # http://www.perlmonks.org/?node_id=618534
426       
427        $status = LWP::Simple::head($url); # returns empty list of headers if it fails
428                # LWP::Simple::get($url) is more intensive, so don't need to do that
429        #print STDERR "**** $url is alive.\n" if $status;
430    }
431    return $status;
432}
433
434# Pinging seems to always return true, so this method doesn't work
435sub pingHost {
436    my $url = shift(@_); # gs3 or gs2 URL
437   
438    my $status = 0;
439    if(defined $url) {
440        # Get just the domain. "http://localhost/gsdl?uq=332033495" becomes "localhost"
441        # "http://localhost/greenstone/cgi-bin/library.cgi" becomes "localhost" too
442       
443        #my $host = $url;       
444        #$host =~ s@^http:\/\/(www.)?@@;       
445        #$host =~ s@\/.*@@;
446        #print STDERR "**** HOST: $host\n";
447       
448        # More robust way
449        # http://stackoverflow.com/questions/827024/how-do-i-extract-the-domain-out-of-an-url
450        my $uri = URI->new( $url );
451        my $host = $uri->host;
452       
453        # Ping the host. http://perldoc.perl.org/Net/Ping.html 
454        my $p = Net::Ping->new();       
455        $status = $p->ping($host); # || 0. Appears to set to undef rather than 0
456        print STDERR "**** $host is alive.\n" if $status; #print "$host is alive.\n" if $p->ping($host);
457        $p->close();       
458    }
459    # return whether pinging was a success or failure
460    return $status;
461}
462
463
464# Most of the arguments are familiar from the building scripts like buildcol.pl
465# The special optional argument -library_url is for when we're dealing with a web
466# library server such as an apache that's separate from any included with GS2.
467# In such a case, this script's caller should pass in -library_url <URL>.
468#
469# $site argument must be specified in the cmdline for collectionConfig.xml to get
470# generated which makes $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
471sub main
472{
473    my ($argc,@argv) = @_;
474
475    if (($argc==0)  || (($argc==1) && ($argv[0] =~ m/^--?h(elp)?$/))) {
476        my ($progname) = ($0 =~ m/^.*[\/|\\](.*?)$/);
477
478
479        print STDERR "\n";
480        print STDERR "Usage: $progname [-collectdir c -builddir b -indexdir i -site s -skipactivation -removeold -keepold -verbosity v\n";
481        print STDERR "\t-library_url URL] <[colgroup/]collection>\n";
482        print STDERR "\n";
483
484        exit(-1);
485    }
486   
487    # get the collection details
488    my $qualified_collection = pop @argv; # qualified collection
489   
490    my $collect_dir = undef; #"collect"; # can't be "collect" when only -site is provided for GS3
491    my $build_dir = undef;
492    my $index_dir = undef;
493    my $site = undef;
494   
495    # if run from server (java code), it will handle deactivation and activation to prevent open file handles when java launches this script and exits:
496    my $skipactivation = 0;
497    my $removeold = 0;
498    my $keepold = 0;
499    my $incremental = 0; # used by solr
500
501    my $library_url = $ENV{'GSDL_LIBRARY_URL'} || undef; # to be specified on the cmdline if not using a GS-included web server
502        # the GSDL_LIBRARY_URL env var is useful when running cmdline buildcol.pl in the linux package manager versions of GS3
503   
504    while (my $arg = shift @argv) {
505        if ($arg eq "-collectdir") {
506            $collect_dir = shift @argv;
507        }
508        elsif ($arg eq "-builddir") {
509            $build_dir = shift @argv;
510        }
511        elsif ($arg eq "-indexdir") {
512            $index_dir = shift @argv;
513        }
514        elsif ($arg eq "-site") {
515            $site = shift @argv;
516        }
517        elsif ($arg eq "-skipactivation") {
518            $skipactivation = 1;
519        }
520        elsif ($arg eq "-removeold") {
521            $removeold = 1;
522        }
523        elsif ($arg eq "-keepold") {
524            $keepold = 1;
525        }
526        elsif ($arg eq "-incremental") {
527            $incremental = 1;
528        }
529        elsif ($arg eq "-library_url") {
530            $library_url = shift @argv;
531        }
532        elsif ($arg eq "-verbosity") {
533            $default_verbosity = shift @argv; # global variable
534           
535            # ensure we're working with ints not strings (int context not str context), in case verbosity=0
536            # http://stackoverflow.com/questions/288900/how-can-i-convert-a-string-to-a-number-in-perl
537            $default_verbosity = int($default_verbosity || 0); ### is this the best way?
538        }
539    }
540   
541    # work out the building and index dirs
542    my $collection_dir = &util::resolve_collection_dir($collect_dir, $qualified_collection, $site);
543    $build_dir = &FileUtils::filenameConcatenate($collection_dir, "building") unless (defined $build_dir);
544    $index_dir = &FileUtils::filenameConcatenate($collection_dir, "index") unless (defined $index_dir);
545   
546    &print_task_msg("Running  Collection  Activation  Stage");
547   
548    # get and check the collection name
549    if ((&colcfg::use_collection($site, $qualified_collection, $collect_dir)) eq "") {
550        &print_msg("Unable to use collection \"$qualified_collection\" within \"$collect_dir\"\n");
551        exit -1;
552    }
553   
554    # Read in the collection configuration file.
555    # Beware: Only if $site is specified in the cmdline does collectionConfig.xml get
556    # generated and does $gs_mode=gs3, else collect.cfg gets generated and $gs_mode=gs2
557    my $gs_mode = "gs2";
558    if ((defined $site) && ($site ne "")) { # GS3
559    $gs_mode = "gs3";
560    }
561    my $collect_cfg_filename = &colcfg::get_collect_cfg_name(STDERR, $gs_mode);
562    my $collectcfg = &colcfg::read_collection_cfg ($collect_cfg_filename,$gs_mode);
563
564    # look for build.cfg/buildConfig.xml
565    my $build_cfg_filename ="";
566   
567    if ($gs_mode eq "gs2") {
568        $build_cfg_filename = &FileUtils::filenameConcatenate($build_dir,"build.cfg");
569    } else {
570        $build_cfg_filename = &FileUtils::filenameConcatenate($build_dir, "buildConfig.xml");
571        # gs_mode is GS3. Set the site now if this was not specified as cmdline argument
572        #$site = "localsite" unless defined $site;
573    }
574   
575    # We need to know the buildtype for Solr.
576    # Any change of indexers is already detected and handled by the calling code (buildcol or
577    # full-rebuild), so that at this stage the config file's buildtype reflects the actual buildtype.
578
579    # From buildcol.pl we use searchtype for determining buildtype, but for old versions, use buildtype
580    my $buildtype;
581    if (defined $collectcfg->{'buildtype'}) {
582    $buildtype = $collectcfg->{'buildtype'};
583    } elsif (defined $collectcfg->{'searchtypes'} || defined $collectcfg->{'searchtype'}) {
584    $buildtype = "mgpp";
585    } else {
586    $buildtype = "mg"; #mg is the default
587    }   
588   
589    # can't do anything without a build directory with something in it to move into index
590    # Except if we're (doing incremental) building for solr, where we want to still
591    # activate and deactivate collections including for the incremental case
592    if(!&FileUtils::directoryExists($build_dir)) {
593    &print_msg("No building folder at $build_dir to move to index.\n");
594    exit -1 unless ($buildtype eq "solr"); #&& $incremental);
595    } elsif (&FileUtils::isDirectoryEmpty($build_dir)) {
596    &print_msg("Nothing in building folder $build_dir to move into index folder.\n");
597    exit -1 unless ($buildtype eq "solr"); #&& $incremental);
598    }
599   
600    # Now the logic in GLI's CollectionManager.java (processComplete()
601    # and installCollection()) and Gatherer.configGS3Server().
602
603    # 1. Get library URL
604   
605    # For web servers that are external to a Greenstone installation,
606    # the user can pass in their web server's library URL.
607    # For web servers included with GS (like tomcat for GS3 and server.exe
608    # and apache for GS2), we work out the library URL:
609    if(!$library_url) {
610        $library_url = &get_library_URL($gs_mode); # returns undef if no server is running
611    }
612   
613    # CollectionManager's installCollection phase in GLI
614    # 2. Ping the library URL, and if it's a persistent server and running, release the collection
615   
616    my $is_persistent_server = undef;
617    if(!$skipactivation && $library_url) { # undef if no valid server URL
618
619        &print_msg("Pinging $library_url\n");       
620        if (&ping_library($library_url, $gs_mode, $site)) { # server running
621       
622            # server is running, so release the collection if
623            # the server is persistent and the collection is active
624            &print_msg("Checking if Greenstone server is persistent\n");
625            $is_persistent_server = &is_persistent($library_url, $gs_mode);         
626           
627            if ($is_persistent_server) { # only makes sense to issue activate and deactivate cmds to a persistent server
628               
629                &print_msg("Checking if the collection $qualified_collection is already active\n");
630                my $collection_active = &ping_library_collection($library_url, $gs_mode, $qualified_collection, $site);
631
632                if ($collection_active) {
633                &print_msg("De-activating collection $qualified_collection\n");
634                &deactivate_collection($library_url, $gs_mode, $qualified_collection, $site);
635                }
636                else {
637                &print_msg("Collection is not active => No need to deactivate\n");
638                }
639            }
640            else {
641                &print_msg("Server is not persistent => No need to deactivate collection\n");
642            }
643        }
644        else {
645            &print_msg("No response to Ping => Taken to mean server is not running\n");
646        }
647
648    }
649
650    # 2b. If we're working with a solr collection, then start up the solrserver now.
651    my $solr_server;
652    my @corenames = ();
653    if($buildtype eq "solr") { # start up the jetty server 
654    my $solr_ext = $ENV{'GEXT_SOLR'}; # from solr_passes.pl
655    unshift (@INC, "$solr_ext/perllib");
656    require solrserver;
657
658    # Solr cores are named without taking the collection-group name into account, since solr
659    # is used for GS3 and GS3 doesn't use collection groups but has the site concept instead
660    my ($colname, $colgroup) = &util::get_collection_parts($qualified_collection);
661
662    # See solrbuilder.pm to get the indexing levels (document, section) from the collectcfg file
663    # Used to generate core names from them and remove cores by name
664    foreach my $level ( @{$collectcfg->{'levels'}} ){
665        my ($pindex) = $level =~ /^(.)/;
666        my $indexname = $pindex."idx";
667        push(@corenames, "$site-$colname-$indexname"); #"$site-$colname-didx", "$site-$colname-sidx"
668        }
669   
670    # If the Solr/Jetty server is not already running, the following starts
671    # it up, and only returns when the server is "reading and listening"   
672    $solr_server = new solrserver($build_dir);
673    $solr_server->start();
674
675    # We'll be moving building to index. For solr collection, there's further
676    # special processing to make a corresponding change to the solr.xml
677    # by removing the temporary building cores and (re)creating the index cores
678    }
679
680
681    # 3. Do all the moving building to index stuff now 
682
683    # If removeold: replace index dir with building dir.
684    # If keepold: move building's contents into index, where only duplicates will get deleted.
685    # removeold and keepold can't both be on at the same time
686        # incremental becomes relevant for solr, though it was irrelevant to what activate.pl does (moving building to index)
687    my $incremental_mode;
688    ($removeold, $keepold, $incremental, $incremental_mode) = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
689                           $incremental,
690                           $build_dir, # checkdir. Usually archives or export to be deleted. activate.pl deletes building
691                           $collectcfg);
692   
693    if($removeold) {
694       
695        if(&FileUtils::directoryExists($index_dir)) {
696            &print_task_msg("Removing \"index\"");
697
698            if ($buildtype eq "solr") {
699                # if solr, remove any cores that are using the index_dir before deleting this dir
700                foreach my $corename (@corenames) {
701                $solr_server->admin_unload_core($corename);
702                }
703            }   
704
705            &FileUtils::removeFilesRecursive($index_dir);
706           
707            # Wait for a couple of seconds, just for luck
708            sleep 2;
709           
710            if (&FileUtils::directoryExists($index_dir)) {
711                &print_msg("The index directory $index_dir could not be deleted.\n"); # CollectionManager.Index_Not_Deleted
712            }
713        }
714       
715        # if remote GS server: gliserver.pl would call activate.pl to activate
716        # the collection at this point since activate.pl lives on the server side
717       
718        if ($buildtype eq "solr") {
719            # if solr, remove any cores that are using the building_dir before moving this dir onto index
720            foreach my $corename (@corenames) {
721            $solr_server->admin_unload_core("building-$corename");
722            }
723        }
724
725        # Move the building directory to become the new index directory
726        &print_task_msg("Moving \"building\" -> \"index\"");
727        &FileUtils::moveFiles($build_dir, $index_dir);
728        if(&FileUtils::directoryExists($build_dir) || !&FileUtils::directoryExists($index_dir)) {           
729            &print_msg("Could not move $build_dir to $index_dir.\n"); # CollectionManager.Build_Not_Moved
730        }
731    }
732    elsif ($keepold || $incremental) {
733            if ($buildtype eq "solr") {
734            # if solr, remove any cores that may be using the building_dir before moving this dir onto index
735            foreach my $corename (@corenames) {         
736            $solr_server->admin_unload_core("building-$corename") if $solr_server->admin_ping_core("building-$corename");
737            }
738        }
739       
740        # Copy just the contents of building dir into the index dir, overwriting
741        # existing files, but don't replace index with building.
742        &print_task_msg("Moving \"building\" -> \"index\"");
743        &FileUtils::moveDirectoryContents($build_dir, $index_dir);
744    }
745
746    if ($buildtype eq "solr") {
747    # Call CREATE action to get the old cores pointing to the index folder
748    foreach my $corename (@corenames) {
749        if($removeold) {
750        # Call CREATE action to get all cores pointing to the index folder, since building is now index
751        $solr_server->admin_create_core($corename, $index_dir);
752       
753        } elsif ($keepold || $incremental) {
754        # Call RELOAD core. Should already be using the index_dir directory for $keepold and $incremental case
755       
756        # Ping to see if corename exists, if it does, reload, else create
757        if ($solr_server->admin_ping_core($corename)) {
758            $solr_server->admin_reload_core($corename);
759        } else {
760            $solr_server->admin_create_core($corename, $index_dir);
761        }
762        }
763    }
764
765    # regenerate the solr.xml.in from solr.xml in case we are working off a dvd.
766    $solr_server->solr_xml_to_solr_xml_in();
767    }
768
769    # 4. Ping the library URL, and if it's a persistent server and running, activate the collection again   
770   
771    # Check for success: if building does not exist OR is empty
772    if(!&FileUtils::directoryExists($build_dir) || &FileUtils::isDirectoryEmpty($build_dir)) {
773   
774        if(!$skipactivation && $library_url) { # undef if no valid server URL
775       
776            &print_msg("Pinging $library_url\n");
777            if (&ping_library($library_url, $gs_mode, $site)) { # server running
778           
779                # don't need to work out persistency of server more than once, since the libraryURL hasn't changed             
780                if (!defined $is_persistent_server) {
781                    &print_msg("Checking if Greenstone server is persistent\n");
782                    $is_persistent_server = &is_persistent($library_url, $gs_mode);
783                }
784               
785                   
786                if ($is_persistent_server) { # persistent server, so can try activating collection
787
788                    &print_msg("Checking if the collection $qualified_collection is not already active\n");
789
790                    # Since we could have deactivated the collection at this point,
791                    # it is likely that it is not yet active. When pinging the collection
792                    # a "ping did not succeed" message is expected, therefore tell the ping
793                    # to proceed silently
794                    my $silent = 1;
795                    my $collection_active = &ping_library_collection($library_url, $gs_mode, $qualified_collection, $site, $silent);
796
797                    if (!$collection_active) {
798                    &print_msg(" Collection is not active.\n");
799                    &print_msg("Activating collection $qualified_collection\n");
800                    &activate_collection($library_url, $gs_mode, $qualified_collection, $site);
801                   
802                    # unless an error occurred, the collection should now be active:
803                    $collection_active = &ping_library_collection($library_url, $gs_mode, $qualified_collection, $site); # not silent if ping did not succeed
804                    if(!$collection_active) {
805                        &print_msg("ERROR: collection $qualified_collection did not get activated\n");
806                    }
807                    }
808                    else {
809                    &print_msg("Collection is already active => No need to activate\n");
810                    }
811                }
812                else {
813                    &print_msg("Server is not persistent => No need to activate collection\n");
814                }
815            }
816            else {
817                &print_msg("No response to Ping => Taken to mean server is not running\n");
818            }
819        }
820    } else { # installcollection failed     
821        #CollectionManager.Preview_Ready_Failed
822        &print_msg("Building directory is not empty or still exists. Failed to properly move $build_dir to $index_dir.\n");
823    }
824
825    &print_msg("\n");
826
827    if($buildtype eq "solr") {
828    if ($solr_server->explicitly_started()) {
829        $solr_server->stop();
830    }
831    }
832}
833
834&main(scalar(@ARGV),@ARGV);
Note: See TracBrowser for help on using the browser.