source: main/trunk/greenstone2/perllib/g2futil.pm@ 32530

Last change on this file since 32530 was 28560, checked in by ak19, 10 years ago
  1. New subroutine util::set_gnomelib_env that sets the environment for gnomelib needed for running hashfile, suffix and wget which are dependent on the libiconv dll in ext/gnome-lib(-minimal). It's particularly the Mac Lions that need libiconv.2.dylib. 2. Updated the call to hashfile in doc.pm, the call to suffix in Phind.pm and the calls to wget in several perl scripts and modules to call util::set_gnomelib_env, though this will only set the environment once for each subshell.
File size: 16.4 KB
Line 
1package g2futil;
2
3
4BEGIN
5{
6 if (!defined $ENV{'FEDORA_HOME'}) {
7 print STDERR "Error: Environment variable FEDORA_HOME not set.\n";
8 exit 1;
9 }
10
11 my $fedora_client_bin = &FileUtils::filenameConcatenate($ENV{'FEDORA_HOME'},"client","bin");
12 &util::envvar_append("PATH",$fedora_client_bin);
13}
14
15use strict;
16use util;
17use FileUtils;
18
19sub run_cmd_old
20{
21 my ($cmd,$verbosity,$tolerate_error) = @_;
22
23 if (($verbosity == 0)
24 || (defined $tolerate_error && ($tolerate_error eq "tolerate_error"))) {
25 if($ENV{'GSDLOS'} =~ /^windows$/i) {
26 $cmd .= " > nul";
27 } else {
28 $cmd .= " > /dev/null";
29 }
30 }
31
32 if ($verbosity >= 2) {
33 print "Running command:\n";
34 print "$cmd\n";
35 }
36
37 my $status = system($cmd);
38
39 if ($verbosity >= 2) {
40 print "Exit status = ", $status/256, "\n";
41 }
42
43 if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error")) {
44 if ($status>0) {
45 print STDERR "Error executing:\n$cmd\n";
46 print STDERR "$!\n";
47 }
48 }
49
50 return $status;
51}
52
53
54sub run_cmd
55{
56 my ($prog,$arguments,$verbosity,$tolerate_error) = @_;
57
58 my $cmd_status = undef;
59
60 my $script_ext = ($ENV{'GSDLOS'} =~ m/^windows/) ? ".bat" : ".sh";
61
62 if ($prog =~ m/^fedora-/ || $prog =~ m/^run[A-Z]*Client/) { # fedora or fedoragsearch script
63 $prog .= $script_ext;
64 }
65 if (($prog =~ m/.pl$/i) && ($ENV{'GSDLOS'} =~ m/^windows/)) {
66 $prog ="\"".&util::get_perl_exec()."\" -S $prog";
67 }
68
69 my $cmd = "$prog $arguments";
70
71### print "*** cmd = $cmd\n";
72
73 if (open(CMD,"$cmd 2>&1 |"))
74 {
75 my $result = "";
76 my $line;
77 while (defined ($line = <CMD>))
78 {
79 $result .= $line;
80
81 if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error"))
82 {
83 print $line;
84 }
85
86
87 }
88
89 close(CMD);
90
91 $cmd_status = $?;
92
93 if ($cmd_status == 0) {
94 # Check for any lines in result begining 'Error:'
95
96 if ($result =~ m/^Error\s*:/m) {
97 # Fedora script generated an error, but did not exit
98 # with an error status => artificially raise one
99
100 $cmd_status = -1;
101 }
102 }
103
104 if ($cmd_status != 0) {
105
106 if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error"))
107 {
108 print STDERR "Error: processing command failed. Exit status $cmd_status\n";
109
110 if ($verbosity >= 2) {
111 print STDERR " Command was: $cmd\n";
112 }
113 if ($verbosity >= 3) {
114 print STDERR "result: $result\n";
115 }
116
117 }
118 }
119 }
120 else
121 {
122 print STDERR "Error: failed to execute $cmd\n";
123 }
124
125
126 return $cmd_status;
127}
128
129
130sub run_datastore_info
131{
132 my ($pid,$options) = @_;
133
134 my $verbosity = $options->{'verbosity'};
135
136 my $hostname = $options->{'hostname'};
137 my $port = $options->{'port'};
138 my $username = $options->{'username'};
139 my $password = $options->{'password'};
140 my $protocol = $options->{'protocol'};
141
142 my $prog = "fedora-dsinfo";
143 my $arguments = "$hostname $port $username $password $pid $protocol";
144 my $status = run_cmd($prog,$arguments,$verbosity,"tolerate_error");
145
146 return $status;
147}
148
149sub run_purge
150{
151 my ($pid,$options) = @_;
152
153 my $verbosity = $options->{'verbosity'};
154
155 my $hostname = $options->{'hostname'};
156 my $port = $options->{'port'};
157 my $username = $options->{'username'};
158 my $password = $options->{'password'};
159 my $protocol = $options->{'protocol'};
160
161 my $server = "$hostname:$port";
162
163 my $prog = "fedora-purge";
164 my $arguments = "$server $username $password $pid $protocol";
165 $arguments .= " \\\n \"Automated_purge_by_g2f_script\"";
166
167 my $status = run_cmd($prog,$arguments,$verbosity);
168
169 return $status;
170}
171
172# runs fedora gsearch's runRESTClient.sh: updateIndex deletePID <PID>
173sub run_delete_from_index
174{
175 my ($fedoragsearch_webapp,$pid,$options) = @_;
176
177 my $verbosity = $options->{'verbosity'};
178
179 my $hostname = $options->{'hostname'};
180 my $port = $options->{'port'};
181 my $username = $options->{'username'};
182 my $password = $options->{'password'};
183 my $protocol = $options->{'protocol'};
184
185 my $server = "$hostname:$port";
186 #$ENV{'fgsUserName'} = $options->{'username'};
187 #$ENV{'fgsPassword'} = $options->{'password'};
188
189 #my $prog = &FileUtils::filenameConcatenate($ENV{'FEDORA_GSEARCH'}, "runRESTClient.sh");
190 my $prog = &FileUtils::filenameConcatenate($fedoragsearch_webapp, "client", "runRESTClient.sh");
191
192 my $gsearch_commands = "updateIndex deletePid"; # deletePID
193 my $arguments = "$server $gsearch_commands $pid";
194
195 my $status = run_cmd($prog,$arguments,$verbosity);
196
197 return $status;
198}
199
200# runs fedora gsearch's runRESTClient.sh: updateIndex fromPID <PID>
201sub run_update_index
202{
203 my ($fedoragsearch_webapp,$pid,$options) = @_;
204
205 my $verbosity = $options->{'verbosity'};
206
207 my $hostname = $options->{'hostname'};
208 my $port = $options->{'port'};
209 my $username = $options->{'username'};
210 my $password = $options->{'password'};
211 my $protocol = $options->{'protocol'};
212
213 my $server = "$hostname:$port";
214 #$ENV{'fgsUserName'} = $options->{'username'};
215 #$ENV{'fgsPassword'} = $options->{'password'};
216
217 #my $prog = &FileUtils::filenameConcatenate($ENV{'FEDORA_GSEARCH'}, "runRESTClient.sh");
218 my $prog = &FileUtils::filenameConcatenate($fedoragsearch_webapp, "client", "runRESTClient.sh");
219
220 my $gsearch_commands = "updateIndex fromPid"; # fromPID
221 my $arguments = "$server $gsearch_commands $pid";
222
223 my $status = run_cmd($prog,$arguments,$verbosity);
224
225 return $status;
226}
227
228sub gsearch_webapp_folder
229{
230 my $fedoragsearch_webapp = undef;
231
232 # if GS3, first look for a fedoragsearch webapp installed in Greenstone's tomcat
233 if(defined $ENV{'GSDL3SRCHOME'}) {
234 $fedoragsearch_webapp = &FileUtils::filenameConcatenate($ENV{'GSDL3SRCHOME'},"packages","tomcat","webapps","fedoragsearch");
235 return $fedoragsearch_webapp if (&FileUtils::directoryExists($fedoragsearch_webapp));
236 }
237
238 # next look for a fedoragsearch webapp installed in Fedora's tomcat
239 if(defined $ENV{'FEDORA_HOME'}) {
240 $fedoragsearch_webapp = &FileUtils::filenameConcatenate($ENV{'FEDORA_HOME'},"tomcat","webapps","fedoragsearch");
241 return $fedoragsearch_webapp if (&FileUtils::directoryExists($fedoragsearch_webapp));
242 }
243
244 ## check for a user-defined $ENV{'FEDORA_GSEARCH'} variable first, which points to a gsearch webapp folder??
245
246 # assume no fedoragsearch
247 return $fedoragsearch_webapp; # undef
248}
249
250
251sub run_ingest
252{
253 my ($docmets_filename,$options) = @_;
254
255 my $verbosity = $options->{'verbosity'};
256
257 my $hostname = $options->{'hostname'};
258 my $port = $options->{'port'};
259 my $username = $options->{'username'};
260 my $password = $options->{'password'};
261 my $protocol = $options->{'protocol'};
262
263 my $server = "$hostname:$port";
264
265 my $prog = "fedora-ingest";
266
267 my $type = undef;
268
269 if ($ENV{'FEDORA_VERSION'} =~ m/^2/) { # checking if major version is 2
270 $type = "metslikefedora1";
271 }
272 else {
273 $type = "info:fedora/fedora-system:METSFedoraExt-1.1";
274 }
275
276 my $arguments = "file \"$docmets_filename\" $type $server $username $password $protocol";
277 $arguments .= " \\\n \"Automated_purge_by_g2f_script\"";
278
279 my $status = run_cmd($prog,$arguments,$verbosity);
280
281 return $status;
282}
283
284
285sub rec_get_all_hash_dirs
286{
287 my ($full_dir,$all_dirs) = @_;
288
289 if (opendir(DIR, $full_dir)) {
290 my @sub_dirs = grep { ($_ !~ /^\./) && (-d &FileUtils::filenameConcatenate($full_dir,$_)) } readdir(DIR);
291 closedir DIR;
292
293 my @hash_dirs = grep { $_ =~ m/\.dir$/ } @sub_dirs;
294 my @rec_dirs = grep { $_ !~ m/\.dir$/ } @sub_dirs;
295
296 foreach my $hd (@hash_dirs) {
297 my $full_hash_dir = &FileUtils::filenameConcatenate($full_dir,$hd);
298 push(@$all_dirs,$full_hash_dir);
299 }
300
301 foreach my $rd (@rec_dirs) {
302 my $full_rec_dir = &FileUtils::filenameConcatenate($full_dir,$rd);
303 rec_get_all_hash_dirs($full_rec_dir,$all_dirs);
304 }
305 }
306}
307
308sub get_all_hash_dirs
309{
310 my ($start_dir,$maxdocs) = @_;
311
312 my @all_dirs = ();
313 rec_get_all_hash_dirs($start_dir,\@all_dirs);
314
315 if ((defined $maxdocs) && ($maxdocs ne "")) {
316 my @maxdoc_dirs = ();
317 for (my $i=0; $i<$maxdocs; $i++) {
318 push(@maxdoc_dirs,shift(@all_dirs));
319 }
320 @all_dirs = @maxdoc_dirs;
321 }
322
323 return @all_dirs;
324}
325
326sub get_hash_id
327{
328 my ($hash_dir) = @_;
329
330 my $hash_id = undef;
331
332 my $docmets_filename = &FileUtils::filenameConcatenate($hash_dir,"docmets.xml");
333
334 if (open(DIN,"<$docmets_filename"))
335 {
336 while (defined (my $line = <DIN>))
337 {
338 if ($line =~ m/<dc:identifier>(.*?)<\/dc:identifier>/)
339 {
340 $hash_id = $1;
341 last;
342 }
343 }
344
345 close(DIN);
346 }
347 else
348 {
349 print STDERR "Warning: Unable to open \"$docmets_filename\"\n";
350 }
351
352 return $hash_id;
353
354}
355
356
357# Subroutine to write the gsdl.xml file in FEDORA_HOME/tomcat/conf/Catalina/<host/localhost>/
358# This xml file will tell Fedora where to find the parent folder of the GS collect dir
359# so that it can obtain the FedoraMETS files for ingestion.
360# It depends on the Fedora server being on the same machine as the Greenstone server that
361# this code is part of.
362sub write_gsdl_xml_file
363{
364 my ($fedora_host, $collect_dir, $options) = @_;
365 my $verbosity = $options->{'verbosity'};
366 my $hostname = $options->{'hostname'};
367 my $port = $options->{'port'};
368 my $protocol = $options->{'protocol'};
369
370 print STDERR "Ensuring that a correct gsdl.xml file exists on the Fedora server end\n";
371 # The top of this file has already made sure that FEDORA_HOME is set, but for GS3
372 # CATALINA_HOME is set to GS' own tomcat. Since we'll be working with fedora, we need
373 # to temporarily set CATALINA_HOME to fedora's tomcat. (Catalina is undefined for GS2.)
374 my $gs_catalina_home = $ENV{'CATALINA_HOME'} if defined $ENV{'CATALINA_HOME'};
375 $ENV{'CATALINA_HOME'} = &FileUtils::filenameConcatenate($ENV{'FEDORA_HOME'}, "tomcat");
376
377 # 1. Find out which folder to write to: fedora_host or localhost
378 # whichever contains fedora.xml is the one we want (if none, exit with error value?)
379 my $fedora_home = $ENV{'FEDORA_HOME'};
380 my $base_path = &FileUtils::filenameConcatenate($fedora_home, "tomcat", "conf", "Catalina");
381
382 my $host_path = &FileUtils::filenameConcatenate($base_path, $fedora_host);
383 my $xmlFile = &FileUtils::filenameConcatenate($host_path, "fedora.xml");
384 if (!-e $xmlFile) {
385 # check if the folder localhost contains fedoraXML
386 $host_path = &FileUtils::filenameConcatenate($base_path, "localhost");
387 $xmlFile = &FileUtils::filenameConcatenate($host_path, "fedora.xml");
388 if(!-e $xmlFile) {
389 # try putting gsdl in this folder, but still print a warning
390 print STDERR "$host_path does not contain file fedora.xml. Hoping gsdl.xml belongs there anyway\n";
391 }
392 }
393
394 # 2. Construct the string we are going write to the gsdl.xml file
395 # a. get the parent directory of collect_dir by removinbg the word
396 # "collect" from it and any optional OS-type slash at the end.
397 # (Path slash direction does not matter here.)
398 my $collectParentDir = $collect_dir;
399 $collectParentDir =~ s/collect(\/|\\)?//;
400
401 # b. Use the collectParentDir to create the contents of gsdl.xml
402 my $greenstone_url_prefix = &util::get_greenstone_url_prefix(); # would have the required slash at front
403 my $gsdlXMLcontents = "<?xml version='1.0' encoding='utf-8'?>\n<Context docBase=\"";
404 $gsdlXMLcontents = $gsdlXMLcontents.$collectParentDir."\" path=\"$greenstone_url_prefix\"></Context>";
405
406 # 3. If there is already a gsdl.xml file in host_path, compare the string we
407 # want to write with what is already in there. If they're the same, we can return
408 $xmlFile = &FileUtils::filenameConcatenate($host_path, "gsdl.xml");
409 if(-e $xmlFile) {
410 # such a file exists, so read the contents
411 unless(open(FIN, "<$xmlFile")) {
412 print STDERR "g2f-import.pl: Unable to open existing $xmlFile for comparing...Recoverable. $!\n";
413 # doesn't matter, we'll just overwrite it then
414 }
415 my $xml_contents;
416 {
417 local $/ = undef; # Read entire file at once
418 $xml_contents = <FIN>; # Now file is read in as one single 'line'
419 }
420 close(FIN); # close the file
421 if($xml_contents eq $gsdlXMLcontents) {
422 print STDERR "Fedora links to the FLI import folder through gsdl.xml.\n";
423 # it already contains what we want, we're done
424 return "gsdl.xml";
425 }
426 }
427
428 # 4. If we're here, the contents of gsdl.xml need to be updated:
429 # a. First stop the fedora server
430 my $script_ext = ($ENV{'GSDLOS'} =~ m/^windows/) ? ".bat" : ".sh";
431 my $stop_tomcat = &FileUtils::filenameConcatenate($fedora_home, "tomcat", "bin", "shutdown".$script_ext);
432 # execute the command
433 $! = 0; # does this initialise the return value?
434 my $status = system($stop_tomcat);
435 if ($status!=0) { # to get the actual exit value, divide by 256, but not useful here
436 # possible tomcat was already stopped - it's not the end of the world
437 print STDERR "Failed to stop Fedora server. Perhaps it was not running. $!\n";
438 print "Exit status = ", $status/256, "\n";
439 }
440
441 # b. overwrite the file that has outdated contents with the contents we just constructed
442 unless(open(FOUT, ">$xmlFile")) { # create or overwrite gsdl.xml file
443 die "g2f-import.pl: Unable to open $xmlFile for telling Fedora where the collect dir is...ERROR: $!\n";
444 }
445 # write out the updated contents and close the file
446 print FOUT $gsdlXMLcontents;
447 close(FOUT);
448
449 # c. Restart the fedora server
450 my $start_tomcat = &FileUtils::filenameConcatenate($fedora_home, "tomcat", "bin", "startup".$script_ext);
451 $! = 0;
452 $status = system($start_tomcat);
453 if ($status!=0) {
454 print STDERR "Failed to restart the Fedora server... ERROR: $!\n";
455 print "Exit status = ", $status/256, "\n";
456 }
457
458 # reset CATALINA_HOME to GS' Tomcat (it is undefined for GS2 since GS2 has no tomcat):
459 $ENV{'CATALINA_HOME'} = $gs_catalina_home if defined $gs_catalina_home;
460
461 # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lions (android too?)
462 &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set
463
464 # Starting up the Fedora server takes a long time. We need to wait for the server to be
465 # ready before import can continue, because g2f-import relies on an up-and-running Fedora
466 # server to purge the collection from it while g2f-build.pl needs a ready Fedora server
467 # in order to make it ingest the FedoraMETS. Sleeping is not sufficient (#sleep 10;) since
468 # the subsequent steps depend on a proper server restart.
469 # Dr Bainbridge's suggestion: test the server is ready with a call to wget.
470
471 # Wget tries to retrieve the fedora search page (protocol://host:port/fedora/search)
472 # 20 times, waiting 3 seconds between each failed attempt. If it ultimately fails, we
473 # print a message to the user.
474 # The wget --spider option makes it check that the page is merely there rather than
475 # downloading it (see http://www.gnu.org/software/wget/manual/wget.html#Download-Options)
476 # -q is for quiet, --tries for the number of retries, --waitretry is the number of seconds
477 # between each attempt. Usually wget returns the contents of the page, but in our case it
478 # will return 0 for success since we are not downloading.
479
480 print STDERR "Fedora server restarted. Waiting for it to become ready...\n";
481 #print STDERR "****$protocol://$hostname:$port/fedora/search\n";
482 $! = 0;
483 #my $fedoraServerReady = system("wget -q --spider --waitretry=10 --tries=20 $protocol://$hostname:$port/fedora/search");
484
485 # The retries above won't work if the server isn't running:
486 # http://www.gnu.org/software/wget/manual/wget.html
487 #'--tries=number'
488 # Set number of retries to number. Specify 0 or 'inf' for infinite retrying. The default is to retry 20 times,
489 # with the exception of fatal errors like "connection refused" or "not found" (404), which ARE NOT RETRIED.
490
491 # retry fedora server every second for a total of 20 times until the server is ready
492 my $fedoraServerReady = 0;
493 my $count = 0;
494 do {
495 $fedoraServerReady = system("wget -q --spider $protocol://$hostname:$port/fedora/search");
496 if($fedoraServerReady != 0) {
497 sleep(1);
498 $count++;
499 #print STDERR "$count second(s)\n";
500 }
501 } while($fedoraServerReady != 0 && $count < 20);
502
503 if($fedoraServerReady != 0) {
504 print STDERR "Fedora server is still not ready... ERROR: $!\n";
505 print "Exit status = ", $fedoraServerReady/256, "\n";
506 die "Exiting....\n";
507 }
508
509 # return some indication that things went well
510 return "gsdl.xml";
511}
512
513
5141;
Note: See TracBrowser for help on using the repository browser.