root/main/trunk/greenstone2/perllib/g2futil.pm @ 21588

Revision 21588, 12.9 KB (checked in by ak19, 9 years ago)

I think I've now fixed the bug where build operation in FLI would fail after the Fedora server was found not to have restarted yet upon creating the gsdl.xml file in the fedora/tomcat/conf/Catalina/localhost folder. The problem, it turns out, was that the tries and wait-retry args to wget would not retry if a connection is refused or a page not found is returned, and at times this would happen the very first time wget tried to access the Fedora server to check whether it was running, so it wouldn't retry anymore.

Line 
1package g2futil;
2
3
4BEGIN
5{
6    if (!defined $ENV{'FEDORA_HOME'}) {
7    print STDERR "Error: Environment variable FEDORA_HOME not set.\n";
8    exit 1;
9    }
10
11    my $fedora_client_bin = &util::filename_cat($ENV{'FEDORA_HOME'},"client","bin");
12    &util::envvar_append("PATH",$fedora_client_bin);
13}
14
15use strict;
16use util;
17
18sub run_cmd_old
19{
20    my ($cmd,$verbosity,$tolerate_error) = @_;
21
22    if (($verbosity == 0)
23    || (defined $tolerate_error && ($tolerate_error eq "tolerate_error"))) {
24    if($ENV{'GSDLOS'} =~ /^windows$/i) {
25        $cmd .= " > nul";
26    } else {
27        $cmd .= " > /dev/null";
28    }
29    }
30   
31    if ($verbosity >= 2) {
32    print "Runing command:\n";
33    print "$cmd\n";
34    }
35
36    my $status = system($cmd);
37
38    if ($verbosity >= 2) {
39    print "Exit status = ", $status/256, "\n";
40    }
41
42    if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error")) {
43    if ($status>0) {
44        print STDERR "Error executing:\n$cmd\n";
45        print STDERR "$!\n";
46    }
47    }
48
49    return $status;
50}
51
52
53sub run_cmd
54{
55    my ($prog,$arguments,$verbosity,$tolerate_error) = @_;
56
57    my $cmd_status = undef;
58
59    my $script_ext = ($ENV{'GSDLOS'} =~ m/^windows/) ? ".bat" : ".sh";
60
61    if ($prog =~ m/^fedora-/) {
62    $prog .= $script_ext;
63    }
64    if (($prog =~ m/.pl$/i) && ($ENV{'GSDLOS'} =~ m/^windows/)) {
65    $prog ="perl -S $prog";
66    }
67 
68    my $cmd = "$prog $arguments";
69
70###    print "*** cmd = $cmd\n";
71
72    if (open(CMD,"$cmd 2>&1 |"))
73    {
74    my $result = "";
75    my $line;
76    while (defined ($line = <CMD>))
77    {   
78        $result .= $line;       
79
80        if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error"))
81        {
82        print $line;
83        }
84
85
86    }
87   
88    close(CMD);
89   
90    $cmd_status = $?;
91
92    if ($cmd_status == 0) {
93        # Check for any lines in result begining 'Error:'
94       
95        if ($result =~ m/^Error\s*:/m) {
96        # Fedora script generated an error, but did not exit
97        # with an error status => artificially raise one
98
99        $cmd_status = -1;
100        }
101    }
102
103    if ($cmd_status != 0) {
104
105        if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error"))
106        {
107        print STDERR "Error: processing command failed.  Exit status $cmd_status\n";
108       
109        if ($verbosity >= 2) {
110            print STDERR "  Command was: $cmd\n";
111        }
112        if ($verbosity >= 3) {
113            print STDERR "result: $result\n";
114        }
115
116        }
117    }
118    }
119    else
120    {
121    print STDERR "Error: failed to execute $cmd\n";
122    }
123
124
125    return $cmd_status;
126}
127
128
129sub run_datastore_info
130{
131    my ($pid,$options) = @_;
132
133    my $verbosity = $options->{'verbosity'};
134
135    my $hostname = $options->{'hostname'};
136    my $port     = $options->{'port'};
137    my $username = $options->{'username'};
138    my $password = $options->{'password'};
139    my $protocol = $options->{'protocol'};
140
141    my $prog = "fedora-dsinfo";
142    my $arguments = "$hostname $port $username $password $pid $protocol";
143    my $status = run_cmd($prog,$arguments,$verbosity,"tolerate_error");
144
145    return $status;
146}
147
148sub run_purge
149{
150    my ($pid,$options) = @_;
151
152    my $verbosity = $options->{'verbosity'};
153
154    my $hostname = $options->{'hostname'};
155    my $port     = $options->{'port'};
156    my $username = $options->{'username'};
157    my $password = $options->{'password'};
158    my $protocol = $options->{'protocol'};
159
160    my $server = "$hostname:$port";
161
162    my $prog = "fedora-purge";
163    my $arguments = "$server $username $password $pid $protocol";
164    $arguments .= " \\\n \"Automated_purge_by_g2f_script\"";
165
166    my $status = run_cmd($prog,$arguments,$verbosity);
167
168    return $status;
169}
170
171sub run_ingest
172{
173    my ($docmets_filename,$options) = @_;
174
175    my $verbosity = $options->{'verbosity'};
176
177    my $hostname = $options->{'hostname'};
178    my $port     = $options->{'port'};
179    my $username = $options->{'username'};
180    my $password = $options->{'password'};
181    my $protocol = $options->{'protocol'};
182
183    my $server = "$hostname:$port";
184
185    my $prog = "fedora-ingest";
186
187    my $type = undef;
188   
189    if ($ENV{'FEDORA_VERSION'} =~ m/^2/) { # checking if major version is 2
190        $type = "metslikefedora1";
191    }
192    else {
193    $type = "info:fedora/fedora-system:METSFedoraExt-1.1";
194    }
195
196    my $arguments = "file \"$docmets_filename\" $type $server $username $password $protocol";
197    $arguments .= " \\\n \"Automated_purge_by_g2f_script\"";
198
199    my $status = run_cmd($prog,$arguments,$verbosity);
200
201    return $status;
202}
203
204
205sub rec_get_all_hash_dirs
206{
207    my ($full_dir,$all_dirs) = @_;
208
209    if (opendir(DIR, $full_dir)) {
210    my @sub_dirs = grep { ($_ !~ /^\./) && (-d &util::filename_cat($full_dir,$_)) } readdir(DIR);
211    closedir DIR;
212
213    my @hash_dirs = grep { $_ =~ m/\.dir$/ } @sub_dirs;
214    my @rec_dirs = grep { $_ !~ m/\.dir$/ } @sub_dirs;
215   
216    foreach my $hd (@hash_dirs) {
217        my $full_hash_dir = &util::filename_cat($full_dir,$hd);
218        push(@$all_dirs,$full_hash_dir);
219    }
220
221    foreach my $rd (@rec_dirs) {
222        my $full_rec_dir = &util::filename_cat($full_dir,$rd);
223        rec_get_all_hash_dirs($full_rec_dir,$all_dirs);
224    }       
225    }
226}
227
228sub get_all_hash_dirs
229{
230    my ($start_dir,$maxdocs) = @_;
231   
232    my @all_dirs = ();
233    rec_get_all_hash_dirs($start_dir,\@all_dirs);
234
235    if ((defined $maxdocs) && ($maxdocs ne "")) {
236    my @maxdoc_dirs = ();
237    for (my $i=0; $i<$maxdocs; $i++) {
238        push(@maxdoc_dirs,shift(@all_dirs));
239    }
240    @all_dirs = @maxdoc_dirs;
241    }
242
243    return @all_dirs;
244}
245
246sub get_hash_id
247{
248    my ($hash_dir) = @_;
249
250    my $hash_id = undef;
251
252    my $docmets_filename = &util::filename_cat($hash_dir,"docmets.xml");
253
254    if (open(DIN,"<$docmets_filename"))
255    {
256    while (defined (my $line = <DIN>))
257    {
258        if ($line =~ m/<dc:identifier>(.*?)<\/dc:identifier>/)
259        {
260        $hash_id = $1;
261        last;
262        }
263    }
264   
265    close(DIN);
266    }
267    else
268    {
269    print STDERR "Warning: Unable to open \"$docmets_filename\"\n";
270    }
271
272    return $hash_id;
273
274}
275
276
277# Subroutine to write the gsdl.xml file in FEDORA_HOME/tomcat/conf/Catalina/<host/localhost>/
278# This xml file will tell Fedora where to find the parent folder of the GS collect dir
279# so that it can obtain the FedoraMETS files for ingestion.
280# It depends on the Fedora server being on the same machine as the Greenstone server that
281# this code is part of.
282sub write_gsdl_xml_file
283{
284    my ($fedora_host, $collect_dir, $options) = @_;
285    my $verbosity = $options->{'verbosity'};
286    my $hostname = $options->{'hostname'};
287    my $port     = $options->{'port'};
288    my $protocol = $options->{'protocol'};
289
290    print STDERR "Ensuring that a correct gsdl.xml file exists on the Fedora server end\n";
291    # The top of this file has already made sure that FEDORA_HOME is set, but for GS3
292    # CATALINA_HOME is set to GS' own tomcat. Since we'll be working with fedora, we need
293    # to temporarily set CATALINA_HOME to fedora's tomcat. (Catalina is undefined for GS2).
294    my $gs_catalina_home = $ENV{'CATALINA_HOME'} if defined $ENV{'CATALINA_HOME'};
295    $ENV{'CATALINA_HOME'} = &util::filename_cat($ENV{'FEDORA_HOME'}, "tomcat");
296   
297    # 1. Find out which folder to write to: fedora_host or localhost
298    # whichever contains fedora.xml is the one we want (if none, exit with error value?)
299    my $fedora_home = $ENV{'FEDORA_HOME'};
300    my $base_path = &util::filename_cat($fedora_home, "tomcat", "conf", "Catalina");
301
302    my $host_path = &util::filename_cat($base_path, $fedora_host);
303    my $xmlFile = &util::filename_cat($host_path, "fedora.xml");
304    if (!-e $xmlFile) {
305    # check if the folder localhost contains fedoraXML
306    $host_path = &util::filename_cat($base_path, "localhost");
307    $xmlFile = &util::filename_cat($host_path, "fedora.xml");
308    if(!-e $xmlFile) {
309        # try putting gsdl in this folder, but still print a warning
310        print STDERR "$host_path does not contain file fedora.xml. Hoping gsdl.xml belongs there anyway\n";
311    }
312    }
313
314    # 2. Construct the string we are going write to the gsdl.xml file
315    # a. get the parent directory of collect_dir by removinbg the word
316    # "collect" from it and any optional OS-type slash at the end.
317    # (Path slash direction does not matter here.)
318    my $collectParentDir = $collect_dir;
319    $collectParentDir =~ s/collect(\/|\\)?//;
320 
321    # b. Use the collectParentDir to create the contents of gsdl.xml
322    my $gsdlXMLcontents = "<?xml version='1.0' encoding='utf-8'?>\n<Context docBase=\"";
323    $gsdlXMLcontents = $gsdlXMLcontents.$collectParentDir."\" path=\"/gsdl\"></Context>";
324   
325    # 3. If there is already a gsdl.xml file in host_path, compare the string we
326    # want to write with what is already in there. If they're the same, we can return
327    $xmlFile = &util::filename_cat($host_path, "gsdl.xml");
328    if(-e $xmlFile) {
329    # such a file exists, so read the contents
330    unless(open(FIN, "<$xmlFile")) {
331        print STDERR "g2f-import.pl: Unable to open existing $xmlFile for comparing...Recoverable. $!\n";
332        # doesn't matter, we'll just overwrite it then
333    }   
334    my $xml_contents;
335    {
336        local $/ = undef;        # Read entire file at once
337        $xml_contents = <FIN>;   # Now file is read in as one single 'line'
338    }
339    close(FIN); # close the file
340    if($xml_contents eq $gsdlXMLcontents) {
341        print STDERR "Fedora links to the FLI import folder through gsdl.xml.\n";
342        # it already contains what we want, we're done
343        return "gsdl.xml";
344    }
345    }
346
347    # 4. If we're here, the contents of gsdl.xml need to be updated:
348    # a. First stop the fedora server
349    my $script_ext = ($ENV{'GSDLOS'} =~ m/^windows/) ? ".bat" : ".sh";
350    my $stop_tomcat = &util::filename_cat($fedora_home, "tomcat", "bin", "shutdown".$script_ext);
351    # execute the command
352    $! = 0; # does this initialise the return value?
353    my $status = system($stop_tomcat);
354    if ($status!=0) { # to get the actual exit value, divide by 256, but not useful here
355    # possible tomcat was already stopped - it's not the end of the world
356    print STDERR "Failed to stop Fedora server. Perhaps it was not running. $!\n";
357    print "Exit status = ", $status/256, "\n";
358    }
359
360    # b. overwrite the file that has outdated contents with the contents we just constructed
361    unless(open(FOUT, ">$xmlFile")) {  # create or overwrite gsdl.xml file
362    die "g2f-import.pl: Unable to open $xmlFile for telling Fedora where the collect dir is...ERROR: $!\n";
363    }
364    # write out the updated contents and close the file
365    print FOUT $gsdlXMLcontents;
366    close(FOUT);
367
368    # c. Restart the fedora server
369    my $start_tomcat = &util::filename_cat($fedora_home, "tomcat", "bin", "startup".$script_ext);
370    $! = 0;
371    $status = system($start_tomcat);
372    if ($status!=0) {
373    print STDERR "Failed to restart the Fedora server... ERROR: $!\n";
374    print "Exit status = ", $status/256, "\n";
375    }
376
377    # reset CATALINA_HOME to GS' Tomcat (it is undefined for GS2 since GS2 has no tomcat):
378    $ENV{'CATALINA_HOME'} = $gs_catalina_home if defined $gs_catalina_home;
379   
380    # Starting up the Fedora server takes a long time. We need to wait for the server to be
381    # ready before import can continue, because g2f-import relies on an up-and-running Fedora
382    # server to purge the collection from it while g2f-build.pl needs a ready Fedora server
383    # in order to make it ingest the FedoraMETS. Sleeping is not sufficient (#sleep 10;) since
384    # the subsequent steps depend on a proper server restart.
385    # Dr Bainbridge's suggestion: test the server is ready with a call to wget.
386   
387    # Wget tries to retrieve the fedora search page (protocol://host:port/fedora/search)
388    # 20 times, waiting 3 seconds between each failed attempt. If it ultimately fails, we
389    # print a message to the user.
390    # The wget --spider option makes it check that the page is merely there rather than
391    # downloading it (see http://www.gnu.org/software/wget/manual/wget.html#Download-Options)
392    # -q is for quiet, --tries for the number of retries, --waitretry is the number of seconds
393    # between each attempt. Usually wget returns the contents of the page, but in our case it
394    # will return 0 for success since we are not downloading.
395
396    print STDERR "Fedora server restarted. Waiting for it to become ready...\n";
397    #print STDERR "****$protocol://$hostname:$port/fedora/search\n";
398   
399    $! = 0;
400
401    #my $fedoraServerReady = system("wget -q --spider --waitretry=10 --tries=20 $protocol://$hostname:$port/fedora/search");
402    # The retries above won't work if the server isn't running:
403    # http://www.gnu.org/software/wget/manual/wget.html
404    #'--tries=number'
405    # Set number of retries to number. Specify 0 or 'inf' for infinite retrying. The default is to retry 20 times,
406    # with the exception of fatal errors like "connection refused" or "not found" (404), which ARE NOT RETRIED.
407
408    # retry fedora server every second for a total of 15 times until the server is ready
409    my $fedoraServerReady = 0;
410    my $count = 0;
411    do {
412    $fedoraServerReady = system("wget -q --spider $protocol://$hostname:$port/fedora/search");
413    if($fedoraServerReady != 0) {
414        sleep(1);
415        $count++;
416        #print STDERR "$count second(s)\n";
417    }
418    } while($fedoraServerReady != 0 && $count < 15);
419
420    if($fedoraServerReady != 0) {
421    print STDERR "Fedora server is still not ready... ERROR: $!\n";
422    print "Exit status = ", $fedoraServerReady/256, "\n";
423    die "Exiting....\n";
424    }
425
426    # return some indication that things went well
427    return "gsdl.xml";
428}
429
430
4311;
Note: See TracBrowser for help on using the browser.