package g2futil; BEGIN { if (!defined $ENV{'FEDORA_HOME'}) { print STDERR "Environment variable FEDORA_HOME not set.\n"; exit 1; } my $fedora_home = $ENV{'FEDORA_HOME'}; my $fedora_client_bin = &util::filename_cat($fedora_home,"client","bin"); $ENV{'PATH'} .= ":$fedora_client_bin"; } use util; sub run_cmd_old { my ($cmd,$verbosity,$tolerate_error) = @_; if (($verbosity == 0) || (defined $tolerate_error && ($tolerate_error eq "tolerate_error"))) { $cmd .= " > /dev/null"; # Too Unix specific? } if ($verbosity >= 2) { print "Runing command:\n"; print "$cmd\n"; } my $status = system($cmd); if ($verbosity >= 2) { print "Exit status = ", $status/256, "\n"; } if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error")) { if ($status>0) { print STDERR "Error executing:\n$cmd\n"; print STDERR "$!\n"; } } return $status; } sub run_cmd { my ($prog,$arguments,$verbosity,$tolerate_error) = @_; my $script_ext = ($ENV{'GSDLOS'} =~ m/^windows/) ? ".bat" : ".sh"; if ($prog =~ m/^fedora-/) { $prog .= $script_ext; } my $cmd = "$prog $arguments"; ### print "*** cmd = $cmd\n"; if (open(CMD,"$cmd 2>&1 |")) { my $result = ""; my $line; while (defined ($line = )) { $result .= $line; if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error")) { print $line; } } close(CMD); $cmd_status = $?; if ($cmd_status == 0) { # Check for any lines in result begining 'Error:' if ($result =~ m/^Error\s*:/m) { # Fedora script generated an error, but did not exit # with an error status => artificially raise one $cmd_status = -1; } } if ($cmd_status != 0) { if ((!defined $tolerate_error) || ($tolerate_error ne "tolerate_error")) { print STDERR "Error: processing command failed. Exit status $cmd_status\n"; if ($verbosity >= 2) { print STDERR " Command was: $cmd\n"; } if ($verbosity >= 3) { print STDERR "result: $result\n"; } } } } else { print STDERR "Error: failed to execute $cmd\n"; } return $cmd_status; } sub run_datastore_info { my ($pid,$options) = @_; my $verbosity = $options->{'verbosity'}; my $hostname = $options->{'hostname'}; my $port = $options->{'port'}; my $username = $options->{'username'}; my $password = $options->{'password'}; my $protocol = $options->{'protocol'}; my $prog = "fedora-dsinfo"; my $arguments = "$hostname $port $username $password $pid $protocol"; my $status = run_cmd($prog,$arguments,$verbosity,"tolerate_error"); return $status; } sub run_purge { my ($pid,$options) = @_; my $verbosity = $options->{'verbosity'}; my $hostname = $options->{'hostname'}; my $port = $options->{'port'}; my $username = $options->{'username'}; my $password = $options->{'password'}; my $protocol = $options->{'protocol'}; my $server = "$hostname:$port"; my $prog = "fedora-purge"; my $arguments = "$server $username $password $pid $protocol"; $arguments .= " \\\n \"Automated_purge_by_g2f_script\""; my $status = run_cmd($prog,$arguments,$verbosity); return $status; } sub run_ingest { my ($docmets_filename,$options) = @_; my $verbosity = $options->{'verbosity'}; my $hostname = $options->{'hostname'}; my $port = $options->{'port'}; my $username = $options->{'username'}; my $password = $options->{'password'}; my $protocol = $options->{'protocol'}; my $server = "$hostname:$port"; my $prog = "fedora-ingest"; my $type = undef; if ($ENV{'FEDORA2_HOME'}) { $type = "metslikefedora1"; } else { $type = "info:fedora/fedora-system:METSFedoraExt-1.1"; } my $arguments = "file \"$docmets_filename\" $type $server $username $password $protocol"; $arguments .= " \\\n \"Automated_purge_by_g2f_script\""; my $status = run_cmd($prog,$arguments,$verbosity); return $status; } sub get_hash_id { my ($hash_dir) = @_; my $hash_id = undef; my $docmets_filename = &util::filename_cat($hash_dir,"docmets.xml"); if (open(DIN,"<$docmets_filename")) { while (defined (my $line = )) { if ($line =~ m/(.*?)<\/dc:identifier>/) { $hash_id = $1; last; } } close(DIN); } else { print STDERR "Warning: Unable to open \"$docmets_filename\"\n"; } return $hash_id; } # Subroutine to write the gsdl.xml file in FEDORA_HOME/tomcat/conf/Catalina// # This xml file will tell Fedora where to find the parent folder of the GS collect dir # so that it can obtain the FedoraMETS files for ingestion. # It depends on the Fedora server being on the same machine as the Greenstone server that # this code is part of. sub write_gsdl_xml_file { my ($fedora_host, $collect_dir) = @_; print STDERR "Ensuring that a correct gsdl.xml file exists on the Fedora server end\n"; # The top of this file has already made sure that FEDORA_HOME is set # 1. Find out which folder to write to: fedora_host or localhost # whichever contains fedora.xml is the one we want - if none, exit with error value my $fedora_home = $ENV{'FEDORA_HOME'}; my $base_path = &util::filename_cat($fedora_home, "tomcat", "conf", "Catalina"); my $host_path = &util::filename_cat($base_path, $fedora_host); my $xmlFile = &util::filename_cat($host_path, "fedora.xml"); if (!-e $xmlFile) { # try seeing if folder localhost contains fedoraXML $host_path = &util::filename_cat($base_path, "localhost"); $xmlFile = &util::filename_cat($host_path, "fedora.xml"); if(!-e $xmlFile) { # try putting gsdl in this folder, but still print a warning print STDERR "**** $host_path does not contain file fedora.xml. Hoping gsdl.xml belongs there anyway\n"; } } # 2. Construct the string we are going write to the gsdl.xml file # a. get the parent directory of collect_dir by removinbg the word # "collect" from it and any optional OS-type slash at the end. my $collectParentDir = $collect_dir; $collectParentDir =~ s/collect(\/|\\)?//; #print STDERR "**** collect's parent dir is: $collectParentDir\n"; # b. Use the collectParentDir to create the contents of gsdl.xml my $gsdlXMLcontents = "\n"; # 3. If there is already a gsdl.xml file in host_path, compare the string we # want to write with what is already in there. If they're the same, we can return $xmlFile = &util::filename_cat($host_path, "gsdl.xml"); if(-e $xmlFile) { # such a file exists, so read the contents unless(open(FIN, "<$xmlFile")) { print STDERR "***g2f-import.pl: Unable to open existing $xmlFile for comparing...Recoverable. $!\n"; # doesn't matter, we'll just overwrite it then } my $xml_contents; { local $/ = undef; # Read entire file at once $xml_contents = ; # Now file is read in as one single 'line' } close(FIN); # close the file if($xml_contents eq $gsdlXMLcontents) { print STDERR "The old gsdl.xml file already contains the same.\n"; # it already contains what we want, we're done return "gsdl.xml"; } } # 4. If we're here, the contents of gsdl.xml need to be updated: # a. First stop the fedora server my $stop_tomcat = &util::filename_cat($fedora_home, "tomcat", "bin", "shutdown.sh"); # execute the command $!=0; # does this initialise the return value? if (system($stop_tomcat)!=0) { # to get the actual exit value, divide by 256, but not useful here # possible tomcat was already stopped - it's not the end of the world print STDERR "**** Failed to stop Fedora server. Perhaps it was not running. $!\n"; } # b. overwrite the file that has outdated contents with the contents we just constructed unless(open(FOUT, ">$xmlFile")) { # create or overwrite gsdl.xml file die "g2f-import.pl: Unable to open $xmlFile for telling Fedora where the collect dir is...ERROR: $!\n"; } # write out the updated contents and close the file print FOUT $gsdlXMLcontents; close(FOUT); # c. Restart the fedora server my $start_tomcat = &util::filename_cat($fedora_home, "tomcat", "bin", "startup.sh"); $!=0; if (system($start_tomcat)!=0) { print STDERR "Failed to restart the Fedora server... ERROR: $!\n"; } # QUESTION: # Starting up the Fedora server takes a long time. How long should we wait before # import continues? g2f-import relies on an up-and-running Fedora server to purge the # collection from it whereas g2f-build.pl needs a ready Fedora server in order to make # it ingest the FedoraMETS. # Let's try waiting 10s for the Fedora server to really be up and running after the # restart so import and build can work without glitches. But how can we check if this # duration is actually sufficient? print STDERR "Fedora server restarted. Waiting 10 seconds to ensure the server is ready...\n"; sleep 10; # return some indication that things went well return "gsdl.xml"; } 1;