source: main/trunk/greenstone2/perllib/g2futil.pm@ 27220

Last change on this file since 27220 was 26440, checked in by ak19, 11 years ago

On NZDL, using the args fromPid (and deletePid) instead of fromPID (and deletePID) during an updateIndex operation produced more error reporting. It did not affect the behaviour on my local GS3 installation. It may be another side effect of the version of Java on NZDL (the version of Java used there seemed to have an effect on how fedora manifested an issue on NZDL initially).

File size: 15.8 KB
Line 
1package g2futil;
2
3
4BEGIN
5{
6 if (!defined $ENV{'FEDORA_HOME'}) {
7 print STDERR "Error: Environment variable FEDORA_HOME not set.\n";
8 exit 1;
9 }
10
11 my $fedora_client_bin = &util::filename_cat($ENV{'FEDORA_HOME'},"client","bin");
12 &util::envvar_append("PATH",$fedora_client_bin);
13}
14
15use strict;
16use util;
17
18sub run_cmd_old
19{
20 my ($cmd,$verbosity,$tolerate_error) = @_;
21
22 if (($verbosity == 0)
23 || (defined $tolerate_error && ($tolerate_error eq "tolerate_error"))) {
24 if($ENV{'GSDLOS'} =~ /^windows$/i) {
25 $cmd .= " > nul";
26 } else {
27 $cmd .= " > /dev/null";
28 }
29 }
30
31 if ($verbosity >= 2) {
32 print "Running command:\n";
33 print "$cmd\n";
34 }
35
36 my $status = system($cmd);
37
38 if ($verbosity >= 2) {
39 print "Exit status = ", $status/256, "\n";
40 }
41
42 if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error")) {
43 if ($status>0) {
44 print STDERR "Error executing:\n$cmd\n";
45 print STDERR "$!\n";
46 }
47 }
48
49 return $status;
50}
51
52
53sub run_cmd
54{
55 my ($prog,$arguments,$verbosity,$tolerate_error) = @_;
56
57 my $cmd_status = undef;
58
59 my $script_ext = ($ENV{'GSDLOS'} =~ m/^windows/) ? ".bat" : ".sh";
60
61 if ($prog =~ m/^fedora-/ || $prog =~ m/^run[A-Z]*Client/) { # fedora or fedoragsearch script
62 $prog .= $script_ext;
63 }
64 if (($prog =~ m/.pl$/i) && ($ENV{'GSDLOS'} =~ m/^windows/)) {
65 $prog ="\"".&util::get_perl_exec()."\" -S $prog";
66 }
67
68 my $cmd = "$prog $arguments";
69
70### print "*** cmd = $cmd\n";
71
72 if (open(CMD,"$cmd 2>&1 |"))
73 {
74 my $result = "";
75 my $line;
76 while (defined ($line = <CMD>))
77 {
78 $result .= $line;
79
80 if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error"))
81 {
82 print $line;
83 }
84
85
86 }
87
88 close(CMD);
89
90 $cmd_status = $?;
91
92 if ($cmd_status == 0) {
93 # Check for any lines in result begining 'Error:'
94
95 if ($result =~ m/^Error\s*:/m) {
96 # Fedora script generated an error, but did not exit
97 # with an error status => artificially raise one
98
99 $cmd_status = -1;
100 }
101 }
102
103 if ($cmd_status != 0) {
104
105 if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error"))
106 {
107 print STDERR "Error: processing command failed. Exit status $cmd_status\n";
108
109 if ($verbosity >= 2) {
110 print STDERR " Command was: $cmd\n";
111 }
112 if ($verbosity >= 3) {
113 print STDERR "result: $result\n";
114 }
115
116 }
117 }
118 }
119 else
120 {
121 print STDERR "Error: failed to execute $cmd\n";
122 }
123
124
125 return $cmd_status;
126}
127
128
129sub run_datastore_info
130{
131 my ($pid,$options) = @_;
132
133 my $verbosity = $options->{'verbosity'};
134
135 my $hostname = $options->{'hostname'};
136 my $port = $options->{'port'};
137 my $username = $options->{'username'};
138 my $password = $options->{'password'};
139 my $protocol = $options->{'protocol'};
140
141 my $prog = "fedora-dsinfo";
142 my $arguments = "$hostname $port $username $password $pid $protocol";
143 my $status = run_cmd($prog,$arguments,$verbosity,"tolerate_error");
144
145 return $status;
146}
147
148sub run_purge
149{
150 my ($pid,$options) = @_;
151
152 my $verbosity = $options->{'verbosity'};
153
154 my $hostname = $options->{'hostname'};
155 my $port = $options->{'port'};
156 my $username = $options->{'username'};
157 my $password = $options->{'password'};
158 my $protocol = $options->{'protocol'};
159
160 my $server = "$hostname:$port";
161
162 my $prog = "fedora-purge";
163 my $arguments = "$server $username $password $pid $protocol";
164 $arguments .= " \\\n \"Automated_purge_by_g2f_script\"";
165
166 my $status = run_cmd($prog,$arguments,$verbosity);
167
168 return $status;
169}
170
171# runs fedora gsearch's runRESTClient.sh: updateIndex deletePID <PID>
172sub run_delete_from_index
173{
174 my ($fedoragsearch_webapp,$pid,$options) = @_;
175
176 my $verbosity = $options->{'verbosity'};
177
178 my $hostname = $options->{'hostname'};
179 my $port = $options->{'port'};
180 my $username = $options->{'username'};
181 my $password = $options->{'password'};
182 my $protocol = $options->{'protocol'};
183
184 my $server = "$hostname:$port";
185 #$ENV{'fgsUserName'} = $options->{'username'};
186 #$ENV{'fgsPassword'} = $options->{'password'};
187
188 #my $prog = &util::filename_cat($ENV{'FEDORA_GSEARCH'}, "runRESTClient.sh");
189 my $prog = &util::filename_cat($fedoragsearch_webapp, "client", "runRESTClient.sh");
190
191 my $gsearch_commands = "updateIndex deletePid"; # deletePID
192 my $arguments = "$server $gsearch_commands $pid";
193
194 my $status = run_cmd($prog,$arguments,$verbosity);
195
196 return $status;
197}
198
199# runs fedora gsearch's runRESTClient.sh: updateIndex fromPID <PID>
200sub run_update_index
201{
202 my ($fedoragsearch_webapp,$pid,$options) = @_;
203
204 my $verbosity = $options->{'verbosity'};
205
206 my $hostname = $options->{'hostname'};
207 my $port = $options->{'port'};
208 my $username = $options->{'username'};
209 my $password = $options->{'password'};
210 my $protocol = $options->{'protocol'};
211
212 my $server = "$hostname:$port";
213 #$ENV{'fgsUserName'} = $options->{'username'};
214 #$ENV{'fgsPassword'} = $options->{'password'};
215
216 #my $prog = &util::filename_cat($ENV{'FEDORA_GSEARCH'}, "runRESTClient.sh");
217 my $prog = &util::filename_cat($fedoragsearch_webapp, "client", "runRESTClient.sh");
218
219 my $gsearch_commands = "updateIndex fromPid"; # fromPID
220 my $arguments = "$server $gsearch_commands $pid";
221
222 my $status = run_cmd($prog,$arguments,$verbosity);
223
224 return $status;
225}
226
227sub gsearch_webapp_folder
228{
229 my $fedoragsearch_webapp = undef;
230
231 # if GS3, first look for a fedoragsearch webapp installed in Greenstone's tomcat
232 if(defined $ENV{'GSDL3SRCHOME'}) {
233 $fedoragsearch_webapp = &util::filename_cat($ENV{'GSDL3SRCHOME'},"packages","tomcat","webapps","fedoragsearch");
234 return $fedoragsearch_webapp if (&util::dir_exists($fedoragsearch_webapp));
235 }
236
237 # next look for a fedoragsearch webapp installed in Fedora's tomcat
238 if(defined $ENV{'FEDORA_HOME'}) {
239 $fedoragsearch_webapp = &util::filename_cat($ENV{'FEDORA_HOME'},"tomcat","webapps","fedoragsearch");
240 return $fedoragsearch_webapp if (&util::dir_exists($fedoragsearch_webapp));
241 }
242
243 ## check for a user-defined $ENV{'FEDORA_GSEARCH'} variable first, which points to a gsearch webapp folder??
244
245 # assume no fedoragsearch
246 return $fedoragsearch_webapp; # undef
247}
248
249
250sub run_ingest
251{
252 my ($docmets_filename,$options) = @_;
253
254 my $verbosity = $options->{'verbosity'};
255
256 my $hostname = $options->{'hostname'};
257 my $port = $options->{'port'};
258 my $username = $options->{'username'};
259 my $password = $options->{'password'};
260 my $protocol = $options->{'protocol'};
261
262 my $server = "$hostname:$port";
263
264 my $prog = "fedora-ingest";
265
266 my $type = undef;
267
268 if ($ENV{'FEDORA_VERSION'} =~ m/^2/) { # checking if major version is 2
269 $type = "metslikefedora1";
270 }
271 else {
272 $type = "info:fedora/fedora-system:METSFedoraExt-1.1";
273 }
274
275 my $arguments = "file \"$docmets_filename\" $type $server $username $password $protocol";
276 $arguments .= " \\\n \"Automated_purge_by_g2f_script\"";
277
278 my $status = run_cmd($prog,$arguments,$verbosity);
279
280 return $status;
281}
282
283
284sub rec_get_all_hash_dirs
285{
286 my ($full_dir,$all_dirs) = @_;
287
288 if (opendir(DIR, $full_dir)) {
289 my @sub_dirs = grep { ($_ !~ /^\./) && (-d &util::filename_cat($full_dir,$_)) } readdir(DIR);
290 closedir DIR;
291
292 my @hash_dirs = grep { $_ =~ m/\.dir$/ } @sub_dirs;
293 my @rec_dirs = grep { $_ !~ m/\.dir$/ } @sub_dirs;
294
295 foreach my $hd (@hash_dirs) {
296 my $full_hash_dir = &util::filename_cat($full_dir,$hd);
297 push(@$all_dirs,$full_hash_dir);
298 }
299
300 foreach my $rd (@rec_dirs) {
301 my $full_rec_dir = &util::filename_cat($full_dir,$rd);
302 rec_get_all_hash_dirs($full_rec_dir,$all_dirs);
303 }
304 }
305}
306
307sub get_all_hash_dirs
308{
309 my ($start_dir,$maxdocs) = @_;
310
311 my @all_dirs = ();
312 rec_get_all_hash_dirs($start_dir,\@all_dirs);
313
314 if ((defined $maxdocs) && ($maxdocs ne "")) {
315 my @maxdoc_dirs = ();
316 for (my $i=0; $i<$maxdocs; $i++) {
317 push(@maxdoc_dirs,shift(@all_dirs));
318 }
319 @all_dirs = @maxdoc_dirs;
320 }
321
322 return @all_dirs;
323}
324
325sub get_hash_id
326{
327 my ($hash_dir) = @_;
328
329 my $hash_id = undef;
330
331 my $docmets_filename = &util::filename_cat($hash_dir,"docmets.xml");
332
333 if (open(DIN,"<$docmets_filename"))
334 {
335 while (defined (my $line = <DIN>))
336 {
337 if ($line =~ m/<dc:identifier>(.*?)<\/dc:identifier>/)
338 {
339 $hash_id = $1;
340 last;
341 }
342 }
343
344 close(DIN);
345 }
346 else
347 {
348 print STDERR "Warning: Unable to open \"$docmets_filename\"\n";
349 }
350
351 return $hash_id;
352
353}
354
355
356# Subroutine to write the gsdl.xml file in FEDORA_HOME/tomcat/conf/Catalina/<host/localhost>/
357# This xml file will tell Fedora where to find the parent folder of the GS collect dir
358# so that it can obtain the FedoraMETS files for ingestion.
359# It depends on the Fedora server being on the same machine as the Greenstone server that
360# this code is part of.
361sub write_gsdl_xml_file
362{
363 my ($fedora_host, $collect_dir, $options) = @_;
364 my $verbosity = $options->{'verbosity'};
365 my $hostname = $options->{'hostname'};
366 my $port = $options->{'port'};
367 my $protocol = $options->{'protocol'};
368
369 print STDERR "Ensuring that a correct gsdl.xml file exists on the Fedora server end\n";
370 # The top of this file has already made sure that FEDORA_HOME is set, but for GS3
371 # CATALINA_HOME is set to GS' own tomcat. Since we'll be working with fedora, we need
372 # to temporarily set CATALINA_HOME to fedora's tomcat. (Catalina is undefined for GS2.)
373 my $gs_catalina_home = $ENV{'CATALINA_HOME'} if defined $ENV{'CATALINA_HOME'};
374 $ENV{'CATALINA_HOME'} = &util::filename_cat($ENV{'FEDORA_HOME'}, "tomcat");
375
376 # 1. Find out which folder to write to: fedora_host or localhost
377 # whichever contains fedora.xml is the one we want (if none, exit with error value?)
378 my $fedora_home = $ENV{'FEDORA_HOME'};
379 my $base_path = &util::filename_cat($fedora_home, "tomcat", "conf", "Catalina");
380
381 my $host_path = &util::filename_cat($base_path, $fedora_host);
382 my $xmlFile = &util::filename_cat($host_path, "fedora.xml");
383 if (!-e $xmlFile) {
384 # check if the folder localhost contains fedoraXML
385 $host_path = &util::filename_cat($base_path, "localhost");
386 $xmlFile = &util::filename_cat($host_path, "fedora.xml");
387 if(!-e $xmlFile) {
388 # try putting gsdl in this folder, but still print a warning
389 print STDERR "$host_path does not contain file fedora.xml. Hoping gsdl.xml belongs there anyway\n";
390 }
391 }
392
393 # 2. Construct the string we are going write to the gsdl.xml file
394 # a. get the parent directory of collect_dir by removinbg the word
395 # "collect" from it and any optional OS-type slash at the end.
396 # (Path slash direction does not matter here.)
397 my $collectParentDir = $collect_dir;
398 $collectParentDir =~ s/collect(\/|\\)?//;
399
400 # b. Use the collectParentDir to create the contents of gsdl.xml
401 my $greenstone_url_prefix = &util::get_greenstone_url_prefix(); # would have the required slash at front
402 my $gsdlXMLcontents = "<?xml version='1.0' encoding='utf-8'?>\n<Context docBase=\"";
403 $gsdlXMLcontents = $gsdlXMLcontents.$collectParentDir."\" path=\"$greenstone_url_prefix\"></Context>";
404
405 # 3. If there is already a gsdl.xml file in host_path, compare the string we
406 # want to write with what is already in there. If they're the same, we can return
407 $xmlFile = &util::filename_cat($host_path, "gsdl.xml");
408 if(-e $xmlFile) {
409 # such a file exists, so read the contents
410 unless(open(FIN, "<$xmlFile")) {
411 print STDERR "g2f-import.pl: Unable to open existing $xmlFile for comparing...Recoverable. $!\n";
412 # doesn't matter, we'll just overwrite it then
413 }
414 my $xml_contents;
415 {
416 local $/ = undef; # Read entire file at once
417 $xml_contents = <FIN>; # Now file is read in as one single 'line'
418 }
419 close(FIN); # close the file
420 if($xml_contents eq $gsdlXMLcontents) {
421 print STDERR "Fedora links to the FLI import folder through gsdl.xml.\n";
422 # it already contains what we want, we're done
423 return "gsdl.xml";
424 }
425 }
426
427 # 4. If we're here, the contents of gsdl.xml need to be updated:
428 # a. First stop the fedora server
429 my $script_ext = ($ENV{'GSDLOS'} =~ m/^windows/) ? ".bat" : ".sh";
430 my $stop_tomcat = &util::filename_cat($fedora_home, "tomcat", "bin", "shutdown".$script_ext);
431 # execute the command
432 $! = 0; # does this initialise the return value?
433 my $status = system($stop_tomcat);
434 if ($status!=0) { # to get the actual exit value, divide by 256, but not useful here
435 # possible tomcat was already stopped - it's not the end of the world
436 print STDERR "Failed to stop Fedora server. Perhaps it was not running. $!\n";
437 print "Exit status = ", $status/256, "\n";
438 }
439
440 # b. overwrite the file that has outdated contents with the contents we just constructed
441 unless(open(FOUT, ">$xmlFile")) { # create or overwrite gsdl.xml file
442 die "g2f-import.pl: Unable to open $xmlFile for telling Fedora where the collect dir is...ERROR: $!\n";
443 }
444 # write out the updated contents and close the file
445 print FOUT $gsdlXMLcontents;
446 close(FOUT);
447
448 # c. Restart the fedora server
449 my $start_tomcat = &util::filename_cat($fedora_home, "tomcat", "bin", "startup".$script_ext);
450 $! = 0;
451 $status = system($start_tomcat);
452 if ($status!=0) {
453 print STDERR "Failed to restart the Fedora server... ERROR: $!\n";
454 print "Exit status = ", $status/256, "\n";
455 }
456
457 # reset CATALINA_HOME to GS' Tomcat (it is undefined for GS2 since GS2 has no tomcat):
458 $ENV{'CATALINA_HOME'} = $gs_catalina_home if defined $gs_catalina_home;
459
460 # Starting up the Fedora server takes a long time. We need to wait for the server to be
461 # ready before import can continue, because g2f-import relies on an up-and-running Fedora
462 # server to purge the collection from it while g2f-build.pl needs a ready Fedora server
463 # in order to make it ingest the FedoraMETS. Sleeping is not sufficient (#sleep 10;) since
464 # the subsequent steps depend on a proper server restart.
465 # Dr Bainbridge's suggestion: test the server is ready with a call to wget.
466
467 # Wget tries to retrieve the fedora search page (protocol://host:port/fedora/search)
468 # 20 times, waiting 3 seconds between each failed attempt. If it ultimately fails, we
469 # print a message to the user.
470 # The wget --spider option makes it check that the page is merely there rather than
471 # downloading it (see http://www.gnu.org/software/wget/manual/wget.html#Download-Options)
472 # -q is for quiet, --tries for the number of retries, --waitretry is the number of seconds
473 # between each attempt. Usually wget returns the contents of the page, but in our case it
474 # will return 0 for success since we are not downloading.
475
476 print STDERR "Fedora server restarted. Waiting for it to become ready...\n";
477 #print STDERR "****$protocol://$hostname:$port/fedora/search\n";
478 $! = 0;
479 #my $fedoraServerReady = system("wget -q --spider --waitretry=10 --tries=20 $protocol://$hostname:$port/fedora/search");
480
481 # The retries above won't work if the server isn't running:
482 # http://www.gnu.org/software/wget/manual/wget.html
483 #'--tries=number'
484 # Set number of retries to number. Specify 0 or 'inf' for infinite retrying. The default is to retry 20 times,
485 # with the exception of fatal errors like "connection refused" or "not found" (404), which ARE NOT RETRIED.
486
487 # retry fedora server every second for a total of 20 times until the server is ready
488 my $fedoraServerReady = 0;
489 my $count = 0;
490 do {
491 $fedoraServerReady = system("wget -q --spider $protocol://$hostname:$port/fedora/search");
492 if($fedoraServerReady != 0) {
493 sleep(1);
494 $count++;
495 #print STDERR "$count second(s)\n";
496 }
497 } while($fedoraServerReady != 0 && $count < 20);
498
499 if($fedoraServerReady != 0) {
500 print STDERR "Fedora server is still not ready... ERROR: $!\n";
501 print "Exit status = ", $fedoraServerReady/256, "\n";
502 die "Exiting....\n";
503 }
504
505 # return some indication that things went well
506 return "gsdl.xml";
507}
508
509
5101;
Note: See TracBrowser for help on using the repository browser.