source: main/trunk/greenstone2/perllib/g2futil.pm@ 21588

Last change on this file since 21588 was 21588, checked in by ak19, 14 years ago

I think I've now fixed the bug where build operation in FLI would fail after the Fedora server was found not to have restarted yet upon creating the gsdl.xml file in the fedora/tomcat/conf/Catalina/localhost folder. The problem, it turns out, was that the tries and wait-retry args to wget would not retry if a connection is refused or a page not found is returned, and at times this would happen the very first time wget tried to access the Fedora server to check whether it was running, so it wouldn't retry anymore.

File size: 12.9 KB
RevLine 
[14968]1package g2futil;
2
3
4BEGIN
5{
6 if (!defined $ENV{'FEDORA_HOME'}) {
[15605]7 print STDERR "Error: Environment variable FEDORA_HOME not set.\n";
[14968]8 exit 1;
9 }
10
[16406]11 my $fedora_client_bin = &util::filename_cat($ENV{'FEDORA_HOME'},"client","bin");
12 &util::envvar_append("PATH",$fedora_client_bin);
[14968]13}
14
[15894]15use strict;
[14968]16use util;
17
18sub run_cmd_old
19{
20 my ($cmd,$verbosity,$tolerate_error) = @_;
21
22 if (($verbosity == 0)
23 || (defined $tolerate_error && ($tolerate_error eq "tolerate_error"))) {
[17062]24 if($ENV{'GSDLOS'} =~ /^windows$/i) {
25 $cmd .= " > nul";
26 } else {
27 $cmd .= " > /dev/null";
28 }
[14968]29 }
[17062]30
[14968]31 if ($verbosity >= 2) {
32 print "Runing command:\n";
33 print "$cmd\n";
34 }
35
36 my $status = system($cmd);
37
38 if ($verbosity >= 2) {
39 print "Exit status = ", $status/256, "\n";
40 }
41
42 if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error")) {
43 if ($status>0) {
44 print STDERR "Error executing:\n$cmd\n";
45 print STDERR "$!\n";
46 }
47 }
48
49 return $status;
50}
51
52
53sub run_cmd
54{
55 my ($prog,$arguments,$verbosity,$tolerate_error) = @_;
56
[15979]57 my $cmd_status = undef;
58
[14968]59 my $script_ext = ($ENV{'GSDLOS'} =~ m/^windows/) ? ".bat" : ".sh";
60
61 if ($prog =~ m/^fedora-/) {
62 $prog .= $script_ext;
63 }
[15582]64 if (($prog =~ m/.pl$/i) && ($ENV{'GSDLOS'} =~ m/^windows/)) {
65 $prog ="perl -S $prog";
66 }
67
[14968]68 my $cmd = "$prog $arguments";
69
[15656]70### print "*** cmd = $cmd\n";
[14968]71
72 if (open(CMD,"$cmd 2>&1 |"))
73 {
74 my $result = "";
75 my $line;
76 while (defined ($line = <CMD>))
77 {
78 $result .= $line;
79
80 if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error"))
81 {
82 print $line;
83 }
84
85
86 }
87
88 close(CMD);
89
90 $cmd_status = $?;
91
92 if ($cmd_status == 0) {
93 # Check for any lines in result begining 'Error:'
94
95 if ($result =~ m/^Error\s*:/m) {
96 # Fedora script generated an error, but did not exit
97 # with an error status => artificially raise one
98
99 $cmd_status = -1;
100 }
101 }
102
103 if ($cmd_status != 0) {
104
105 if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error"))
106 {
107 print STDERR "Error: processing command failed. Exit status $cmd_status\n";
108
109 if ($verbosity >= 2) {
110 print STDERR " Command was: $cmd\n";
111 }
112 if ($verbosity >= 3) {
113 print STDERR "result: $result\n";
114 }
115
116 }
117 }
118 }
119 else
120 {
121 print STDERR "Error: failed to execute $cmd\n";
122 }
123
124
125 return $cmd_status;
126}
127
128
129sub run_datastore_info
130{
131 my ($pid,$options) = @_;
132
133 my $verbosity = $options->{'verbosity'};
134
135 my $hostname = $options->{'hostname'};
136 my $port = $options->{'port'};
137 my $username = $options->{'username'};
138 my $password = $options->{'password'};
139 my $protocol = $options->{'protocol'};
140
141 my $prog = "fedora-dsinfo";
142 my $arguments = "$hostname $port $username $password $pid $protocol";
143 my $status = run_cmd($prog,$arguments,$verbosity,"tolerate_error");
144
145 return $status;
146}
147
148sub run_purge
149{
150 my ($pid,$options) = @_;
151
152 my $verbosity = $options->{'verbosity'};
153
154 my $hostname = $options->{'hostname'};
155 my $port = $options->{'port'};
156 my $username = $options->{'username'};
157 my $password = $options->{'password'};
158 my $protocol = $options->{'protocol'};
159
160 my $server = "$hostname:$port";
161
162 my $prog = "fedora-purge";
163 my $arguments = "$server $username $password $pid $protocol";
164 $arguments .= " \\\n \"Automated_purge_by_g2f_script\"";
165
166 my $status = run_cmd($prog,$arguments,$verbosity);
167
168 return $status;
169}
170
171sub run_ingest
172{
173 my ($docmets_filename,$options) = @_;
174
175 my $verbosity = $options->{'verbosity'};
176
177 my $hostname = $options->{'hostname'};
178 my $port = $options->{'port'};
179 my $username = $options->{'username'};
180 my $password = $options->{'password'};
181 my $protocol = $options->{'protocol'};
182
183 my $server = "$hostname:$port";
184
185 my $prog = "fedora-ingest";
186
[15015]187 my $type = undef;
188
[16462]189 if ($ENV{'FEDORA_VERSION'} =~ m/^2/) { # checking if major version is 2
190 $type = "metslikefedora1";
[15015]191 }
192 else {
193 $type = "info:fedora/fedora-system:METSFedoraExt-1.1";
194 }
[14968]195
196 my $arguments = "file \"$docmets_filename\" $type $server $username $password $protocol";
197 $arguments .= " \\\n \"Automated_purge_by_g2f_script\"";
198
199 my $status = run_cmd($prog,$arguments,$verbosity);
200
201 return $status;
202}
203
204
[16102]205sub rec_get_all_hash_dirs
206{
207 my ($full_dir,$all_dirs) = @_;
[14968]208
[16102]209 if (opendir(DIR, $full_dir)) {
210 my @sub_dirs = grep { ($_ !~ /^\./) && (-d &util::filename_cat($full_dir,$_)) } readdir(DIR);
211 closedir DIR;
212
213 my @hash_dirs = grep { $_ =~ m/\.dir$/ } @sub_dirs;
214 my @rec_dirs = grep { $_ !~ m/\.dir$/ } @sub_dirs;
215
216 foreach my $hd (@hash_dirs) {
217 my $full_hash_dir = &util::filename_cat($full_dir,$hd);
218 push(@$all_dirs,$full_hash_dir);
219 }
220
221 foreach my $rd (@rec_dirs) {
222 my $full_rec_dir = &util::filename_cat($full_dir,$rd);
223 rec_get_all_hash_dirs($full_rec_dir,$all_dirs);
224 }
225 }
226}
227
228sub get_all_hash_dirs
229{
230 my ($start_dir,$maxdocs) = @_;
231
232 my @all_dirs = ();
233 rec_get_all_hash_dirs($start_dir,\@all_dirs);
234
235 if ((defined $maxdocs) && ($maxdocs ne "")) {
236 my @maxdoc_dirs = ();
237 for (my $i=0; $i<$maxdocs; $i++) {
238 push(@maxdoc_dirs,shift(@all_dirs));
239 }
240 @all_dirs = @maxdoc_dirs;
241 }
242
243 return @all_dirs;
244}
245
[14968]246sub get_hash_id
247{
248 my ($hash_dir) = @_;
249
250 my $hash_id = undef;
251
252 my $docmets_filename = &util::filename_cat($hash_dir,"docmets.xml");
253
254 if (open(DIN,"<$docmets_filename"))
255 {
256 while (defined (my $line = <DIN>))
257 {
258 if ($line =~ m/<dc:identifier>(.*?)<\/dc:identifier>/)
259 {
260 $hash_id = $1;
261 last;
262 }
263 }
264
265 close(DIN);
266 }
267 else
268 {
269 print STDERR "Warning: Unable to open \"$docmets_filename\"\n";
270 }
271
272 return $hash_id;
273
274}
275
276
[15395]277# Subroutine to write the gsdl.xml file in FEDORA_HOME/tomcat/conf/Catalina/<host/localhost>/
278# This xml file will tell Fedora where to find the parent folder of the GS collect dir
279# so that it can obtain the FedoraMETS files for ingestion.
280# It depends on the Fedora server being on the same machine as the Greenstone server that
281# this code is part of.
282sub write_gsdl_xml_file
283{
[15605]284 my ($fedora_host, $collect_dir, $options) = @_;
285 my $verbosity = $options->{'verbosity'};
[15656]286 my $hostname = $options->{'hostname'};
287 my $port = $options->{'port'};
288 my $protocol = $options->{'protocol'};
[15605]289
[15395]290 print STDERR "Ensuring that a correct gsdl.xml file exists on the Fedora server end\n";
[16395]291 # The top of this file has already made sure that FEDORA_HOME is set, but for GS3
292 # CATALINA_HOME is set to GS' own tomcat. Since we'll be working with fedora, we need
[16398]293 # to temporarily set CATALINA_HOME to fedora's tomcat. (Catalina is undefined for GS2).
[16407]294 my $gs_catalina_home = $ENV{'CATALINA_HOME'} if defined $ENV{'CATALINA_HOME'};
295 $ENV{'CATALINA_HOME'} = &util::filename_cat($ENV{'FEDORA_HOME'}, "tomcat");
296
[15395]297 # 1. Find out which folder to write to: fedora_host or localhost
[15656]298 # whichever contains fedora.xml is the one we want (if none, exit with error value?)
[15395]299 my $fedora_home = $ENV{'FEDORA_HOME'};
300 my $base_path = &util::filename_cat($fedora_home, "tomcat", "conf", "Catalina");
[14968]301
[15395]302 my $host_path = &util::filename_cat($base_path, $fedora_host);
303 my $xmlFile = &util::filename_cat($host_path, "fedora.xml");
304 if (!-e $xmlFile) {
[15656]305 # check if the folder localhost contains fedoraXML
[15395]306 $host_path = &util::filename_cat($base_path, "localhost");
307 $xmlFile = &util::filename_cat($host_path, "fedora.xml");
308 if(!-e $xmlFile) {
309 # try putting gsdl in this folder, but still print a warning
[16395]310 print STDERR "$host_path does not contain file fedora.xml. Hoping gsdl.xml belongs there anyway\n";
[15395]311 }
312 }
313
314 # 2. Construct the string we are going write to the gsdl.xml file
315 # a. get the parent directory of collect_dir by removinbg the word
[15656]316 # "collect" from it and any optional OS-type slash at the end.
317 # (Path slash direction does not matter here.)
[15395]318 my $collectParentDir = $collect_dir;
319 $collectParentDir =~ s/collect(\/|\\)?//;
[15656]320
[15395]321 # b. Use the collectParentDir to create the contents of gsdl.xml
322 my $gsdlXMLcontents = "<?xml version='1.0' encoding='utf-8'?>\n<Context docBase=\"";
323 $gsdlXMLcontents = $gsdlXMLcontents.$collectParentDir."\" path=\"/gsdl\"></Context>";
324
325 # 3. If there is already a gsdl.xml file in host_path, compare the string we
326 # want to write with what is already in there. If they're the same, we can return
327 $xmlFile = &util::filename_cat($host_path, "gsdl.xml");
328 if(-e $xmlFile) {
329 # such a file exists, so read the contents
330 unless(open(FIN, "<$xmlFile")) {
[16363]331 print STDERR "g2f-import.pl: Unable to open existing $xmlFile for comparing...Recoverable. $!\n";
[15395]332 # doesn't matter, we'll just overwrite it then
333 }
334 my $xml_contents;
335 {
336 local $/ = undef; # Read entire file at once
337 $xml_contents = <FIN>; # Now file is read in as one single 'line'
338 }
339 close(FIN); # close the file
340 if($xml_contents eq $gsdlXMLcontents) {
[16363]341 print STDERR "Fedora links to the FLI import folder through gsdl.xml.\n";
[15395]342 # it already contains what we want, we're done
343 return "gsdl.xml";
344 }
345 }
346
347 # 4. If we're here, the contents of gsdl.xml need to be updated:
348 # a. First stop the fedora server
[15605]349 my $script_ext = ($ENV{'GSDLOS'} =~ m/^windows/) ? ".bat" : ".sh";
350 my $stop_tomcat = &util::filename_cat($fedora_home, "tomcat", "bin", "shutdown".$script_ext);
[15395]351 # execute the command
[16395]352 $! = 0; # does this initialise the return value?
[15605]353 my $status = system($stop_tomcat);
354 if ($status!=0) { # to get the actual exit value, divide by 256, but not useful here
[15395]355 # possible tomcat was already stopped - it's not the end of the world
[16363]356 print STDERR "Failed to stop Fedora server. Perhaps it was not running. $!\n";
[15605]357 print "Exit status = ", $status/256, "\n";
[15395]358 }
359
360 # b. overwrite the file that has outdated contents with the contents we just constructed
361 unless(open(FOUT, ">$xmlFile")) { # create or overwrite gsdl.xml file
362 die "g2f-import.pl: Unable to open $xmlFile for telling Fedora where the collect dir is...ERROR: $!\n";
363 }
364 # write out the updated contents and close the file
365 print FOUT $gsdlXMLcontents;
366 close(FOUT);
367
368 # c. Restart the fedora server
[15605]369 my $start_tomcat = &util::filename_cat($fedora_home, "tomcat", "bin", "startup".$script_ext);
[16395]370 $! = 0;
[15605]371 $status = system($start_tomcat);
372 if ($status!=0) {
[15395]373 print STDERR "Failed to restart the Fedora server... ERROR: $!\n";
[15605]374 print "Exit status = ", $status/256, "\n";
[15395]375 }
[16395]376
[16398]377 # reset CATALINA_HOME to GS' Tomcat (it is undefined for GS2 since GS2 has no tomcat):
[16407]378 $ENV{'CATALINA_HOME'} = $gs_catalina_home if defined $gs_catalina_home;
379
[15656]380 # Starting up the Fedora server takes a long time. We need to wait for the server to be
381 # ready before import can continue, because g2f-import relies on an up-and-running Fedora
382 # server to purge the collection from it while g2f-build.pl needs a ready Fedora server
383 # in order to make it ingest the FedoraMETS. Sleeping is not sufficient (#sleep 10;) since
384 # the subsequent steps depend on a proper server restart.
385 # Dr Bainbridge's suggestion: test the server is ready with a call to wget.
386
387 # Wget tries to retrieve the fedora search page (protocol://host:port/fedora/search)
388 # 20 times, waiting 3 seconds between each failed attempt. If it ultimately fails, we
389 # print a message to the user.
390 # The wget --spider option makes it check that the page is merely there rather than
391 # downloading it (see http://www.gnu.org/software/wget/manual/wget.html#Download-Options)
392 # -q is for quiet, --tries for the number of retries, --waitretry is the number of seconds
393 # between each attempt. Usually wget returns the contents of the page, but in our case it
394 # will return 0 for success since we are not downloading.
[15395]395
[15656]396 print STDERR "Fedora server restarted. Waiting for it to become ready...\n";
[16395]397 #print STDERR "****$protocol://$hostname:$port/fedora/search\n";
[15656]398
[16395]399 $! = 0;
[21588]400
401 #my $fedoraServerReady = system("wget -q --spider --waitretry=10 --tries=20 $protocol://$hostname:$port/fedora/search");
402 # The retries above won't work if the server isn't running:
403 # http://www.gnu.org/software/wget/manual/wget.html
404 #'--tries=number'
405 # Set number of retries to number. Specify 0 or 'inf' for infinite retrying. The default is to retry 20 times,
406 # with the exception of fatal errors like "connection refused" or "not found" (404), which ARE NOT RETRIED.
407
408 # retry fedora server every second for a total of 15 times until the server is ready
409 my $fedoraServerReady = 0;
410 my $count = 0;
411 do {
412 $fedoraServerReady = system("wget -q --spider $protocol://$hostname:$port/fedora/search");
413 if($fedoraServerReady != 0) {
414 sleep(1);
415 $count++;
416 #print STDERR "$count second(s)\n";
417 }
418 } while($fedoraServerReady != 0 && $count < 15);
419
[15656]420 if($fedoraServerReady != 0) {
421 print STDERR "Fedora server is still not ready... ERROR: $!\n";
[16395]422 print "Exit status = ", $fedoraServerReady/256, "\n";
[15656]423 die "Exiting....\n";
424 }
[16395]425
[15395]426 # return some indication that things went well
427 return "gsdl.xml";
428}
429
430
[14968]4311;
Note: See TracBrowser for help on using the repository browser.