########################################################################### # # metadataaction.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 2009 New Zealand Digital Library Project # # This program is free software; you can redistr te it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### package metadataaction; use strict; use cgiactions::baseaction; use dbutil; use ghtml; BEGIN { # unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8"); require XML::Rules; } @metadataaction::ISA = ('baseaction'); my $action_table = { "get-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ], 'optional-args' => [] }, "get-metadata" => { 'compulsory-args' => [ "d", "metaname" ], 'optional-args' => [ "metapos" ] }, "set-live-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ], 'optional-args' => [ ] }, "set-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ], 'optional-args' => [ "metapos" ] }, "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ], 'optional-args' => [ "metapos" ] }, "set-import-metadata" => { 'compulsory-args' => [ "metaname", "metavalue" ], 'optional-args' => [ "d", "f", "metamode" ] # metamode can be "accumulate", "override", or "unique-id" }, "remove-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ], 'optional-args' => [ ] }, "remove-metadata" => { 'compulsory-args' => [ "d", "metaname" ], 'optional-args' => [ "metapos" ] }, "insert-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ], 'optional-args' => [ ] } }; sub new { my $class = shift (@_); my ($gsdl_cgi,$iis6_mode) = @_; my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode); return bless $self, $class; } sub get_live_metadata { my $self = shift @_; my $username = $self->{'username'}; my $collect = $self->{'collect'}; my $gsdl_cgi = $self->{'gsdl_cgi'}; my $gsdlhome = $self->{'gsdlhome'}; # Note: Not sure why get_live_metadata doesn't need the authentication check # Obtain the collect dir my $collect_dir = &util::filename_cat($gsdlhome, "collect"); # Make sure the collection isn't locked by someone else $self->lock_collection($username, $collect); # look up additional args my $docid = $self->{'d'}; if ((!defined $docid) || ($docid =~ m/^\s*$/)) { $gsdl_cgi->generate_error("No docid (d=...) specified."); } # Generate the dbkey my $metaname = $self->{'metaname'}; my $dbkey = "$docid.$metaname"; # To people who know $collect_tail please add some comments # Obtain path to the database my $collect_tail = $collect; $collect_tail =~ s/^.*[\/|\\]//; my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", "live-$collect_tail", $index_text_directory); # Obtain the content of the key my $cmd = "gdbmget $infodb_file_path $dbkey"; if (open(GIN,"$cmd |") == 0) { # Catch error if gdbmget failed my $mess = "Failed to get metadata key: $metaname\n"; $mess .= "$!\n"; $gsdl_cgi->generate_error($mess); } else { # Read everything in and concatenate them into $metavalue my $metavalue = ""; my $line; while (defined ($line=)) { $metavalue .= $line; } close(GIN); chomp($metavalue); # Get rid off the tailing newlines $gsdl_cgi->generate_ok_message("$metavalue"); } # Release the lock once it is done $self->unlock_collection($username, $collect); } sub get_metadata { my $self = shift @_; my $username = $self->{'username'}; my $collect = $self->{'collect'}; my $gsdl_cgi = $self->{'gsdl_cgi'}; my $gsdlhome = $self->{'gsdlhome'}; # Authenticate user if it is enabled if ($baseaction::authentication_enabled) { # Ensure the user is allowed to edit this collection &authenticate_user($gsdl_cgi, $username, $collect); } # Obtain the collect dir my $collect_dir = &util::filename_cat($gsdlhome, "collect"); # Make sure the collection isn't locked by someone else $self->lock_collection($username, $collect); # look up additional args my $docid = $self->{'d'}; my $metaname = $self->{'metaname'}; my $metapos = $self->{'metapos'}; # To people who know $collect_tail please add some comments # Obtain path to the database my $collect_tail = $collect; $collect_tail =~ s/^.*[\/\\]//; my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collect_tail, $index_text_directory); # Read the docid entry my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $docid); my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string); # Basically loop through and unescape_html the values foreach my $k (keys %$doc_rec) { my @escaped_v = (); foreach my $v (@{$doc_rec->{$k}}) { my $ev = &ghtml::unescape_html($v); push(@escaped_v, $ev); } $doc_rec->{$k} = \@escaped_v; } # Obtain the specified metadata value $metapos = 0 if (!defined $metapos); my $metavalue = $doc_rec->{$metaname}->[$metapos]; $gsdl_cgi->generate_ok_message("$metavalue"); # Release the lock once it is done $self->unlock_collection($username, $collect); } sub set_live_metadata { my $self = shift @_; my $username = $self->{'username'}; my $collect = $self->{'collect'}; my $gsdl_cgi = $self->{'gsdl_cgi'}; my $gsdlhome = $self->{'gsdlhome'}; # don't user authenticate for now if ($baseaction::authentication_enabled) { # Ensure the user is allowed to edit this collection &authenticate_user($gsdl_cgi, $username, $collect); } # Obtain the collect dir my $collect_dir = &util::filename_cat($gsdlhome, "collect"); # Make sure the collection isn't locked by someone else $self->lock_collection($username, $collect); # look up additional args my $docid = $self->{'d'}; if ((!defined $docid) || ($docid =~ m/^\s*$/)) { $gsdl_cgi->generate_error("No docid (d=...) specified."); } my $metavalue = $self->{'metavalue'}; # Generate the dbkey my $metaname = $self->{'metaname'}; my $dbkey = "$docid.$metaname"; # To people who know $collect_tail please add some comments # Obtain path to the database my $collect_tail = $collect; $collect_tail =~ s/^.*[\/\\]//; my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", "live-$collect_tail", $index_text_directory); # Set the new value my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\""; my $status = system($cmd); if ($status != 0) { # Catch error if gdbmget failed my $mess = "Failed to set metadata key: $dbkey\n"; $mess .= "PATH: $ENV{'PATH'}\n"; $mess .= "cmd = $cmd\n"; $mess .= "Exit status: $status\n"; $mess .= "System Error Message: $!\n"; $gsdl_cgi->generate_error($mess); } else { $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue"); } # Release the lock once it is done $self->unlock_collection($username, $collect); } sub set_metadata { my $self = shift @_; my $username = $self->{'username'}; my $collect = $self->{'collect'}; my $gsdl_cgi = $self->{'gsdl_cgi'}; my $gsdlhome = $self->{'gsdlhome'}; # don't user authenticate for now if ($baseaction::authentication_enabled) { # Ensure the user is allowed to edit this collection &authenticate_user($gsdl_cgi, $username, $collect); } # Obtain the collect dir my $collect_dir = &util::filename_cat($gsdlhome, "collect"); # Make sure the collection isn't locked by someone else $self->lock_collection($username, $collect); # look up additional args my $docid = $self->{'d'}; my $metaname = $self->{'metaname'}; my $metapos = $self->{'metapos'}; my $metavalue = $self->{'metavalue'}; # To people who know $collect_tail please add some comments # Obtain path to the database my $collect_tail = $collect; $collect_tail =~ s/^.*[\/\\]//; my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collect_tail, $index_text_directory); # Read the docid entry my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $docid); my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string); foreach my $k (keys %$doc_rec) { my @escaped_v = (); foreach my $v (@{$doc_rec->{$k}}) { if ($k eq "contains") { # protect quotes in ".2;".3 etc $v =~ s/\"/\\\"/g; push(@escaped_v, $v); } else { my $ev = &ghtml::unescape_html($v); $ev =~ s/\"/\\\"/g; push(@escaped_v, $ev); } } $doc_rec->{$k} = \@escaped_v; } ## print STDERR "**** metavalue = $metavalue\n"; # Protect the quotes $metavalue =~ s/\"/\\\"/g; # Set the metadata value if (defined $metapos) { $doc_rec->{$metaname}->[$metapos] = $metavalue; } else { $doc_rec->{$metaname} = [ $metavalue ]; } ## print STDERR "**** metavalue = $metavalue\n"; # Generate the record string my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec); ## print STDERR "**** ser dr\n$serialized_doc_rec\n\n\n"; # Store it into GDBM my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\""; my $status = system($cmd); if ($status != 0) { # Catch error if gdbmget failed my $mess = "Failed to set metadata key: $docid\n"; $mess .= "PATH: $ENV{'PATH'}\n"; $mess .= "cmd = $cmd\n"; $mess .= "Exit status: $status\n"; $mess .= "System Error Message: $!\n"; $gsdl_cgi->generate_error($mess); } else { my $mess = "set-document-metadata successful: Key[$docid]\n"; $mess .= " $metaname"; $mess .= "->[$metapos]" if (defined $metapos); $mess .= " = $metavalue"; $gsdl_cgi->generate_ok_message($mess); } # Release the lock once it is done $self->unlock_collection($username, $collect); } sub dxml_metadata { my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; my $metaname = $parser->{'parameters'}->{'metaname'}; my $metamode = $parser->{'parameters'}->{'metamode'}; # Find the right metadata tag and checks if we are going to override it # Note: This over writes the first metadata block it encountered. If there are multiple Sections in the doc.xml, it might not behave as you would expect my $name_attr = $attrHash->{'name'}; if (($name_attr eq $metaname) && ($metamode eq "override")) { # Get the value and override the current value my $metavalue = $parser->{'parameters'}->{'metavalue'}; $attrHash->{'_content'} = $metavalue; # Don't want it to wipe out any other pieces of metadata $parser->{'parameters'}->{'metamode'} = "done"; } # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! return [$tagname => $attrHash]; } sub dxml_description { my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; my $metamode = $parser->{'parameters'}->{'metamode'}; # Accumulate the metadata # NOTE: This appends new metadata element to all description fields. # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them if ($metamode eq "accumulate") { # tack a new metadata tag on to the end of the + block my $metaname = $parser->{'parameters'}->{'metaname'}; my $metavalue = $parser->{'parameters'}->{'metavalue'}; my $metadata_attr = { '_content' => $metavalue, 'name' => $metaname, 'mode' => "accumulate" }; my $append_metadata = [ "Metadata" => $metadata_attr ]; my $description_content = $attrHash->{'_content'}; push(@$description_content," ", $append_metadata ,"\n "); } # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! return [$tagname => $attrHash]; } sub edit_xml_file { my $self = shift @_; my ($gsdl_cgi, $filename, $rules, $options) = @_; # use XML::Rules to add it in (read in and out again) my $parser = XML::Rules->new(rules => $rules, style => 'filter' ); my $xml_in = ""; if (!open(MIN,"<$filename")) { $gsdl_cgi->generate_error("Unable to read in $filename: $!"); } else { # Read all the text in my $line; while (defined ($line=)) { $xml_in .= $line; } close(MIN); # Matched lines will get handled by the call backs my $xml_out = ""; $parser->filter($xml_in,\$xml_out, $options); if (!open(MOUT,">$filename")) { $gsdl_cgi->generate_error("Unable to write out to $filename: $!"); } else { print MOUT $xml_out; close(MOUT); } } } sub edit_doc_xml { my $self = shift @_; my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos) = @_; # use XML::Rules to add it in (read in and out again) # Set the call back functions my @rules = ( _default => 'raw', 'Metadata' => \&dxml_metadata, 'Description' => \&dxml_description ); # Sets the parameters my $options = { 'metaname' => $metaname, 'metapos' => $metapos, 'metavalue' => $metavalue }; $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@rules,$options); } sub set_archives_metadata { my $self = shift @_; my $username = $self->{'username'}; my $collect = $self->{'collect'}; my $gsdl_cgi = $self->{'gsdl_cgi'}; my $gsdlhome = $self->{'gsdlhome'}; # don't user authenticate for now if ($baseaction::authentication_enabled) { # Ensure the user is allowed to edit this collection $self->authenticate_user($username, $collect); } # Obtain the collect and archive dir my $collect_dir = &util::filename_cat($gsdlhome, "collect"); my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives"); # Make sure the collection isn't locked by someone else $self->lock_collection($username, $collect); # look up additional args my $docid = $self->{'d'}; my $metaname = $self->{'metaname'}; my $metavalue = $self->{'metavalue'}; my $metapos = $self->{'metapos'}; $metapos = 0 if (!defined $metapos); # Obtain the doc.xml path for the specified docID my $arcinfo_doc_filename = &dbutil::get_infodb_file_path("gdbm", "archiveinf-doc", $archive_dir); my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $arcinfo_doc_filename, $docid); my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string); my $doc_xml_file = $doc_rec->{'doc-file'}->[0]; # The $doc_xml_file is relative to the archives, and now let's get the full path my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives"); my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file); # Edit the doc.xml file with the specified metadata name, value and position. # TODO: there is a potential problem here as this edit_doc_xml function # is assuming the simple doc.xml situation where there is only one Section and no SubSections. # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename, $metaname,$metavalue,$metapos); # Release the lock once it is done $self->unlock_collection($username, $collect); } sub mxml_metadata { my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; my $metaname = $parser->{'parameters'}->{'metaname'}; my $metamode = $parser->{'parameters'}->{'metamode'}; # Report error if we don't see FileName tag before this die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'})); # Don't do anything if we are not in the right FileSet my $file_regexp = $parser->{'parameters'}->{'current_file'}; return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/)); # Find the right metadata tag and checks if we are going to override it # Note: This over writes the first metadata block it encountered even if it doesn't belong to the source file we specified my $name_attr = $attrHash->{'name'}; if (($name_attr eq $metaname) && ($metamode eq "override")) { # Get the value and override the current value my $metavalue = $parser->{'parameters'}->{'metavalue'}; $attrHash->{'_content'} = $metavalue; # Don't want it to wipe out any other pieces of metadata $parser->{'parameters'}->{'metamode'} = "done"; } # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! return [$tagname => $attrHash]; } sub mxml_description { my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; my $metamode = $parser->{'parameters'}->{'metamode'}; # Failed... Report error if we don't see FileName tag before this die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'})); # Don't do anything if we are not in the right FileSet my $file_regexp = $parser->{'parameters'}->{'current_file'}; return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/)); # Accumulate the metadata block to the end of the description block # Note: This adds metadata block to all description blocks, so if there are # multiple FileSets, it will add to all of them if ($metamode eq "accumulate") { # tack a new metadata tag on to the end of the + block my $metaname = $parser->{'parameters'}->{'metaname'}; my $metavalue = $parser->{'parameters'}->{'metavalue'}; my $metadata_attr = { '_content' => $metavalue, 'name' => $metaname, 'mode' => "accumulate" }; my $append_metadata = [ "Metadata" => $metadata_attr ]; my $description_content = $attrHash->{'_content'}; push(@$description_content," ", $append_metadata ,"\n "); } # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! return [$tagname => $attrHash]; } sub mxml_filename { my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; # Store the filename of the Current Fileset # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd # FileName tag must come before Description tag $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'}; # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! return [$tagname => $attrHash]; } sub mxml_fileset { my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; # Initilise the current_file # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd # FileName tag must come before Description tag $parser->{'parameters'}->{'current_file'} = ""; # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! return [$tagname => $attrHash]; } sub edit_metadata_xml { my $self = shift @_; my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_; # Set the call-back functions for the metadata tags my @rules = ( _default => 'raw', 'FileName' => \&mxml_filename, 'Metadata' => \&mxml_metadata, 'Description' => \&mxml_description, 'FileSet' => \&mxml_fileset); # use XML::Rules to add it in (read in and out again) my $parser = XML::Rules->new(rules => \@rules, style => 'filter', output_encoding => 'utf8'); my $xml_in = ""; if (!open(MIN,"<$metadata_xml_filename")) { $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!"); } else { # Read them in my $line; while (defined ($line=)) { $xml_in .= $line; } close(MIN); # Filter with the call-back functions my $xml_out = ""; $parser->filter($xml_in,\$xml_out, { metaname => $metaname, metavalue => $metavalue, metamode => $metamode, src_file => $src_file, current_file => undef} ); if (!open(MOUT,">$metadata_xml_filename")) { $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!"); } else { # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules # At the moment, I will just hack it! my $header_with_utf8_dtd = "\n"; $xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/; print MOUT $xml_out; close(MOUT); } } } sub set_import_metadata { my $self = shift @_; my $username = $self->{'username'}; my $collect = $self->{'collect'}; my $gsdl_cgi = $self->{'gsdl_cgi'}; my $gsdlhome = $self->{'gsdlhome'}; # don't user authenticate for now if ($baseaction::authentication_enabled) { # Ensure the user is allowed to edit this collection $self->authenticate_user($username, $collect); } # Obtain the collect and archive dir my $collect_dir = &util::filename_cat($gsdlhome, "collect"); my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives"); # Make sure the collection isn't locked by someone else $self->lock_collection($username, $collect); # look up additional args # want either d= or f= my $docid = $self->{'d'}; my $import_file = $self->{'f'}; if ((!defined $docid) && (!defined $import_file)) { $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified."); } # Get the parameters and set default mode to "accumulate" my $metaname = $self->{'metaname'}; my $metavalue = $self->{'metavalue'}; my $metamode = $self->{'metamode'}; if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) { # make "accumulate" the default (less destructive, as won't actually # delete any existing values) $metamode = "accumulate"; } # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f" my $metadata_xml_file; my $import_filename = undef; if (defined $docid) { my $arcinfo_doc_filename = &dbutil::get_infodb_file_path("gdbm", "archiveinf-doc", $archive_dir); my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $arcinfo_doc_filename, $docid); my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string); # This now stores the full pathname $import_filename = $doc_rec->{'src-file'}->[0]; } else { $import_filename = &util::filename_cat($collect_dir,$collect,$import_file); } # figure out correct metadata.xml file [?] # Assuming the metadata.xml file is next to the source file # Note: This will not work if it is using the inherited metadata from the parent folder my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename); my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml"); # Edit the metadata.xml # Modified by Jeffrey from DL Consulting # Handle the case where there is one metadata.xml file for multiple FileSets # The XML filter needs to know whether it is in the right FileSet # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file. # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file) $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname); # Release the lock once it is done $self->unlock_collection($username, $collect); } sub remove_live_metadata { my $self = shift @_; my $username = $self->{'username'}; my $collect = $self->{'collect'}; my $gsdl_cgi = $self->{'gsdl_cgi'}; my $gsdlhome = $self->{'gsdlhome'}; if ($baseaction::authentication_enabled) { # Ensure the user is allowed to edit this collection &authenticate_user($gsdl_cgi, $username, $collect); } # Obtain the collect dir my $collect_dir = &util::filename_cat($gsdlhome, "collect"); # Make sure the collection isn't locked by someone else $self->lock_collection($username, $collect); # look up additional args my $docid = $self->{'d'}; if ((!defined $docid) || ($docid =~ m/^\s*$/)) { $gsdl_cgi->generate_error("No docid (d=...) specified."); } # Generate the dbkey my $metaname = $self->{'metaname'}; my $dbkey = "$docid.$metaname"; # To people who know $collect_tail please add some comments # Obtain the live gdbm_db path my $collect_tail = $collect; $collect_tail =~ s/^.*[\/\\]//; my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", "live-$collect_tail", $index_text_directory); # Remove the key my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\""; my $status = system($cmd); if ($status != 0) { # Catch error if gdbmdel failed my $mess = "Failed to set metadata key: $dbkey\n"; $mess .= "PATH: $ENV{'PATH'}\n"; $mess .= "cmd = $cmd\n"; $mess .= "Exit status: $status\n"; $mess .= "System Error Message: $!\n"; $gsdl_cgi->generate_error($mess); } else { $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]"); } } sub remove_metadata { my $self = shift @_; my $username = $self->{'username'}; my $collect = $self->{'collect'}; my $gsdl_cgi = $self->{'gsdl_cgi'}; my $gsdlhome = $self->{'gsdlhome'}; # don't user authenticate for now if ($baseaction::authentication_enabled) { # Ensure the user is allowed to edit this collection &authenticate_user($gsdl_cgi, $username, $collect); } # Obtain the collect dir my $collect_dir = &util::filename_cat($gsdlhome, "collect"); # Make sure the collection isn't locked by someone else $self->lock_collection($username, $collect); # look up additional args my $docid = $self->{'d'}; if ((!defined $docid) || ($docid =~ m/^\s*$/)) { $gsdl_cgi->generate_error("No docid (d=...) specified."); } my $metaname = $self->{'metaname'}; my $metapos = $self->{'metapos'}; # To people who know $collect_tail please add some comments # Obtain the path to the database my $collect_tail = $collect; $collect_tail =~ s/^.*[\/\\]//; my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collect_tail, $index_text_directory); # Read the docid entry my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $docid); my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string); # Basically loop through and unescape_html the values foreach my $k (keys %$doc_rec) { my @escaped_v = (); foreach my $v (@{$doc_rec->{$k}}) { if ($k eq "contains") { # protect quotes in ".2;".3 etc $v =~ s/\"/\\\"/g; push(@escaped_v, $v); } else { my $ev = &ghtml::unescape_html($v); $ev =~ s/\"/\\\"/g; push(@escaped_v, $ev); } } $doc_rec->{$k} = \@escaped_v; } # Check to make sure the key does exist if (!defined ($doc_rec->{$metaname})) { $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]"); } # Obtain the specified metadata pos $metapos = 0 if (!defined $metapos); # consider check key is defined before deleting? # Loop through the metadata array and ignore the specified position my $filtered_metadata = []; my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}}); for (my $i=0; $i<$num_metadata_vals; $i++) { my $metavalue = shift(@{$doc_rec->{$metaname}}); if ($i != $metapos) { push(@$filtered_metadata,$metavalue) } } $doc_rec->{$metaname} = $filtered_metadata; # Turn the record back to string my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec); # Store it back to the database my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\""; my $status = system($cmd); if ($status != 0) { my $mess = "Failed to set metadata key: $docid\n"; $mess .= "PATH: $ENV{'PATH'}\n"; $mess .= "cmd = $cmd\n"; $mess .= "Exit status: $status\n"; $mess .= "System Error Message: $!\n"; $gsdl_cgi->generate_error($mess); } else { my $mess = "DB set (with item deleted) successful: Key[$docid]\n"; $mess .= " $metaname"; $mess .= "->[$metapos]" if (defined $metapos); $gsdl_cgi->generate_ok_message($mess); } } # Was trying to reused the codes, but the functions need to be broken down more before they can be reused, otherwise there will be too much overhead and duplicate process... sub insert_metadata { my $self = shift @_; my $username = $self->{'username'}; my $collect = $self->{'collect'}; my $gsdl_cgi = $self->{'gsdl_cgi'}; my $gsdlhome = $self->{'gsdlhome'}; # If the import metadata and gdbm database have been updated, we need to insert some notification to warn user that the the text they see at the moment is not indexed and require a rebuild. my $rebuild_pending_macro = "_rebuildpendingmessage_"; # don't user authenticate for now if ($baseaction::authentication_enabled) { # Ensure the user is allowed to edit this collection $self->authenticate_user($username, $collect); } # Obtain the collect and archive dir my $collect_dir = &util::filename_cat($gsdlhome, "collect"); my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives"); # Make sure the collection isn't locked by someone else $self->lock_collection($username, $collect); # Check additional args my $docid = $self->{'d'}; if (!defined($docid)) { $gsdl_cgi->generate_error("No document id is specified: d=..."); } my $metaname = $self->{'metaname'}; if (!defined($metaname)) { $gsdl_cgi->generate_error("No metaname is specified: metadataname=..."); } my $metavalue = $self->{'metavalue'}; if (!defined($metavalue) || $metavalue eq "") { $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=..."); } # make "accumulate" the default (less destructive, as won't actually # delete any existing values) my $metamode = "accumulate"; #=======================================================================# # set_import_metadata [START] #=======================================================================# # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f" my $metadata_xml_file; my $arcinfo_doc_filename = &dbutil::get_infodb_file_path("gdbm", "archiveinf-doc", $archive_dir); my $archive_doc_rec_string = &dbutil::read_infodb_entry("gdbm", $arcinfo_doc_filename, $docid); my $archive_doc_rec = &dbutil::convert_infodb_string_to_hash($archive_doc_rec_string); # This now stores the full pathname my $import_filename = $archive_doc_rec->{'src-file'}->[0]; # figure out correct metadata.xml file [?] # Assuming the metadata.xml file is next to the source file # Note: This will not work if it is using the inherited metadata from the parent folder my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename); my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml"); # Shane's escape characters $metavalue = pack "U0C*", unpack "C*", $metavalue; $metavalue =~ s/\,/,/g; $metavalue =~ s/\:/:/g; $metavalue =~ s/\|/|/g; $metavalue =~ s/\(/(/g; $metavalue =~ s/\)/)/g; $metavalue =~ s/\[/[/g; $metavalue =~ s/\\/\/g; $metavalue =~ s/\]/]/g; $metavalue =~ s/\{/{/g; $metavalue =~ s/\}/}/g; $metavalue =~ s/\"/"/g; $metavalue =~ s/\`/`/g; $metavalue =~ s/\n/_newline_/g; # Edit the metadata.xml # Modified by Jeffrey from DL Consulting # Handle the case where there is one metadata.xml file for multiple FileSets # The XML filter needs to know whether it is in the right FileSet # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file. # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file) $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname); #=======================================================================# # set_import_metadata [END] #=======================================================================# #=======================================================================# # set_metadata (accumulate version) [START] #=======================================================================# # To people who know $collect_tail please add some comments # Obtain path to the database my $collect_tail = $collect; $collect_tail =~ s/^.*[\/\\]//; my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collect_tail, $index_text_directory); # Read the docid entry my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $docid); my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string); foreach my $k (keys %$doc_rec) { my @escaped_v = (); foreach my $v (@{$doc_rec->{$k}}) { if ($k eq "contains") { # protect quotes in ".2;".3 etc $v =~ s/\"/\\\"/g; push(@escaped_v, $v); } else { my $ev = &ghtml::unescape_html($v); $ev =~ s/\"/\\\"/g; push(@escaped_v, $ev); } } $doc_rec->{$k} = \@escaped_v; } # Protect the quotes $metavalue =~ s/\"/\\\"/g; # Adds the pending macro my $macro_metavalue = $rebuild_pending_macro . $metavalue; # If the metadata doesn't exist, create a new one if (!defined($doc_rec->{$metaname})){ $doc_rec->{$metaname} = [ $macro_metavalue ]; } # Else, let's acculumate the values else { push(@{$doc_rec->{$metaname}},$macro_metavalue); } # Generate the record string my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec); # Store it into GDBM my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\""; my $status = system($cmd); if ($status != 0) { # Catch error if gdbmget failed my $mess = "Failed to set metadata key: $docid\n"; $mess .= "PATH: $ENV{'PATH'}\n"; $mess .= "cmd = $cmd\n"; $mess .= "Exit status: $status\n"; $mess .= "System Error Message: $!\n"; $gsdl_cgi->generate_error($mess); } else { my $mess = "insert-metadata successful: Key[$docid]\n"; $mess .= " [In metadata.xml] $metaname"; $mess .= " = $metavalue\n"; $mess .= " [In database] $metaname"; $mess .= " = $macro_metavalue\n"; $mess .= " The new text has not been indexed, rebuilding collection is required\n"; $gsdl_cgi->generate_ok_message($mess); } #=======================================================================# # set_metadata (accumulate version) [END] #=======================================================================# # Release the lock once it is done $self->unlock_collection($username, $collect); } 1;