- Timestamp:
- 2010-02-05T15:30:50+13:00 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm
r21715 r21716 72 72 73 73 "remove-metadata" => { 'compulsory-args' => [ "d", "metaname" ], 74 'optional-args' => [ "metapos" ] } 74 'optional-args' => [ "metapos" ] }, 75 76 "insert-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ], 77 'optional-args' => [ ] 78 } 75 79 }; 76 80 … … 383 387 } 384 388 385 # raw extended 386 # Someone please write some comments on why adding ':'.$tagname => $attrHash 387 return (':'.$tagname => $attrHash, [$tagname => $attrHash]); 389 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 390 return [$tagname => $attrHash]; 388 391 } 389 392 … … 412 415 } 413 416 414 # raw 415 return $tagname => $attrHash; 417 418 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 419 return [$tagname => $attrHash]; 416 420 } 417 421 … … 461 465 # Set the call back functions 462 466 my @rules = 463 ( _default => 'raw extended',467 ( _default => 'raw', 464 468 'Metadata' => \&dxml_metadata, 465 469 'Description' => \&dxml_description ); … … 530 534 my $metamode = $parser->{'parameters'}->{'metamode'}; 531 535 536 # Report error if we don't see FileName tag before this 537 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'})); 538 539 # Don't do anything if we are not in the right FileSet 540 my $file_regexp = $parser->{'parameters'}->{'current_file'}; 541 return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/)); 542 532 543 # Find the right metadata tag and checks if we are going to override it 533 544 # Note: This over writes the first metadata block it encountered even if it doesn't belong to the source file we specified … … 542 553 } 543 554 544 # raw extended 545 # Someone please write some comments on why adding ':'.$tagname => $attrHash 546 return (':'.$tagname => $attrHash, [$tagname => $attrHash]); 555 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 556 return [$tagname => $attrHash]; 547 557 } 548 558 … … 551 561 { 552 562 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 553 my $metamode = $parser->{'parameters'}->{'metamode'}; 563 my $metamode = $parser->{'parameters'}->{'metamode'}; 564 565 # Failed... Report error if we don't see FileName tag before this 566 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'})); 567 568 # Don't do anything if we are not in the right FileSet 569 my $file_regexp = $parser->{'parameters'}->{'current_file'}; 570 return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/)); 554 571 555 572 # Accumulate the metadata block to the end of the description block … … 571 588 } 572 589 573 # raw 574 return $tagname => $attrHash; 590 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 591 return [$tagname => $attrHash]; 592 } 593 594 595 sub mxml_filename 596 { 597 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 598 599 # Store the filename of the Current Fileset 600 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd 601 # FileName tag must come before Description tag 602 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'}; 603 604 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 605 return [$tagname => $attrHash]; 606 } 607 608 609 sub mxml_fileset 610 { 611 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 612 613 # Initilise the current_file 614 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd 615 # FileName tag must come before Description tag 616 $parser->{'parameters'}->{'current_file'} = ""; 617 618 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 619 return [$tagname => $attrHash]; 575 620 } 576 621 … … 579 624 { 580 625 my $self = shift @_; 581 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode ) = @_;626 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_; 582 627 583 628 # Set the call-back functions for the metadata tags 584 629 my @rules = 585 ( _default => 'raw extended', 630 ( _default => 'raw', 631 'FileName' => \&mxml_filename, 586 632 'Metadata' => \&mxml_metadata, 587 'Description' => \&mxml_description ); 633 'Description' => \&mxml_description, 634 'FileSet' => \&mxml_fileset); 588 635 589 636 # use XML::Rules to add it in (read in and out again) 590 637 my $parser = XML::Rules->new(rules => \@rules, 591 style => 'filter' ); 638 style => 'filter', 639 output_encoding => 'utf8'); 592 640 593 641 my $xml_in = ""; … … 607 655 $parser->filter($xml_in,\$xml_out, { metaname => $metaname, 608 656 metavalue => $metavalue, 609 metamode => $metamode } ); 610 657 metamode => $metamode, 658 src_file => $src_file, 659 current_file => undef} ); 660 611 661 if (!open(MOUT,">$metadata_xml_filename")) { 612 662 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!"); 613 663 } 614 664 else { 665 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules 666 # At the moment, I will just hack it! 667 my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">"; 668 $xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/; 615 669 print MOUT $xml_out; 616 670 close(MOUT); … … 641 695 # Make sure the collection isn't locked by someone else 642 696 $self->lock_collection($username, $collect); 643 697 644 698 # look up additional args 645 699 # want either d= or f= … … 675 729 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file); 676 730 } 677 678 731 679 732 # figure out correct metadata.xml file [?] 680 733 # Assuming the metadata.xml file is next to the source file … … 685 738 686 739 # Edit the metadata.xml 687 # Note: At moment it doesn't correctly on metadata.xml with multiple FileSets 688 # "accumulate" mode will add new metadata block to all FileSets 689 # "override" mode will over write the first encountered metadata block, even if it doesn't have the right source 690 $self->edit_metadata_xml($gsdl_cgi,$metadata_xml_filename, 691 $metaname,$metavalue,$metamode); 740 # Modified by Jeffrey from DL Consulting 741 # Handle the case where there is one metadata.xml file for multiple FileSets 742 # The XML filter needs to know whether it is in the right FileSet 743 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file. 744 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file) 745 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, 746 $metaname, $metavalue, $metamode, $import_tailname); 692 747 693 748 # Release the lock once it is done … … 859 914 860 915 916 # Was trying to reused the codes, but the functions need to be broken down more before they can be reused, otherwise there will be too much overhead and duplicate process... 917 sub insert_metadata 918 { 919 my $self = shift @_; 920 921 my $username = $self->{'username'}; 922 my $collect = $self->{'collect'}; 923 my $gsdl_cgi = $self->{'gsdl_cgi'}; 924 my $gsdlhome = $self->{'gsdlhome'}; 925 926 # If the import metadata and gdbm database have been updated, we need to insert some notification to warn user that the the text they see at the moment is not indexed and require a rebuild. 927 my $rebuild_pending_macro = "_rebuildpendingmessage_"; 928 929 # don't user authenticate for now 930 if ($baseaction::authentication_enabled) { 931 # Ensure the user is allowed to edit this collection 932 $self->authenticate_user($username, $collect); 933 } 934 935 # Obtain the collect and archive dir 936 my $collect_dir = &util::filename_cat($gsdlhome, "collect"); 937 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives"); 938 939 # Make sure the collection isn't locked by someone else 940 $self->lock_collection($username, $collect); 941 942 # Check additional args 943 my $docid = $self->{'d'}; 944 if (!defined($docid)) { 945 $gsdl_cgi->generate_error("No document id is specified: d=..."); 946 } 947 my $metaname = $self->{'metaname'}; 948 if (!defined($metaname)) { 949 $gsdl_cgi->generate_error("No metaname is specified: metadataname=..."); 950 } 951 my $metavalue = $self->{'metavalue'}; 952 if (!defined($metavalue) || $metavalue eq "") { 953 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=..."); 954 } 955 # make "accumulate" the default (less destructive, as won't actually 956 # delete any existing values) 957 my $metamode = "accumulate"; 958 959 #=======================================================================# 960 # set_import_metadata [START] 961 #=======================================================================# 962 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file 963 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f" 964 my $metadata_xml_file; 965 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path("gdbm", "archiveinf-doc", $archive_dir); 966 my $archive_doc_rec_string = &dbutil::read_infodb_entry("gdbm", $arcinfo_doc_filename, $docid); 967 my $archive_doc_rec = &dbutil::convert_infodb_string_to_hash($archive_doc_rec_string); 968 969 # This now stores the full pathname 970 my $import_filename = $archive_doc_rec->{'src-file'}->[0]; 971 972 # figure out correct metadata.xml file [?] 973 # Assuming the metadata.xml file is next to the source file 974 # Note: This will not work if it is using the inherited metadata from the parent folder 975 my ($import_tailname, $import_dirname) 976 = File::Basename::fileparse($import_filename); 977 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml"); 978 979 # Shane's escape characters 980 $metavalue = pack "U0C*", unpack "C*", $metavalue; 981 $metavalue =~ s/\,/,/g; 982 $metavalue =~ s/\:/:/g; 983 $metavalue =~ s/\|/|/g; 984 $metavalue =~ s/\(/(/g; 985 $metavalue =~ s/\)/)/g; 986 $metavalue =~ s/\[/[/g; 987 $metavalue =~ s/\\/\/g; 988 $metavalue =~ s/\]/]/g; 989 $metavalue =~ s/\{/{/g; 990 $metavalue =~ s/\}/}/g; 991 $metavalue =~ s/\"/"/g; 992 $metavalue =~ s/\`/`/g; 993 $metavalue =~ s/\n/_newline_/g; 994 995 # Edit the metadata.xml 996 # Modified by Jeffrey from DL Consulting 997 # Handle the case where there is one metadata.xml file for multiple FileSets 998 # The XML filter needs to know whether it is in the right FileSet 999 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file. 1000 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file) 1001 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, 1002 $metaname, $metavalue, $metamode, $import_tailname); 1003 #=======================================================================# 1004 # set_import_metadata [END] 1005 #=======================================================================# 1006 1007 1008 #=======================================================================# 1009 # set_metadata (accumulate version) [START] 1010 #=======================================================================# 1011 # To people who know $collect_tail please add some comments 1012 # Obtain path to the database 1013 my $collect_tail = $collect; 1014 $collect_tail =~ s/^.*[\/\\]//; 1015 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); 1016 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collect_tail, $index_text_directory); 1017 1018 # Read the docid entry 1019 my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $docid); 1020 my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string); 1021 foreach my $k (keys %$doc_rec) { 1022 my @escaped_v = (); 1023 foreach my $v (@{$doc_rec->{$k}}) { 1024 if ($k eq "contains") { 1025 # protect quotes in ".2;".3 etc 1026 $v =~ s/\"/\\\"/g; 1027 push(@escaped_v, $v); 1028 } 1029 else { 1030 my $ev = &ghtml::unescape_html($v); 1031 $ev =~ s/\"/\\\"/g; 1032 push(@escaped_v, $ev); 1033 } 1034 } 1035 $doc_rec->{$k} = \@escaped_v; 1036 } 1037 1038 # Protect the quotes 1039 $metavalue =~ s/\"/\\\"/g; 1040 1041 # Adds the pending macro 1042 my $macro_metavalue = $rebuild_pending_macro . $metavalue; 1043 1044 # If the metadata doesn't exist, create a new one 1045 if (!defined($doc_rec->{$metaname})){ 1046 $doc_rec->{$metaname} = [ $macro_metavalue ]; 1047 } 1048 # Else, let's acculumate the values 1049 else { 1050 push(@{$doc_rec->{$metaname}},$macro_metavalue); 1051 } 1052 1053 # Generate the record string 1054 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec); 1055 1056 # Store it into GDBM 1057 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\""; 1058 my $status = system($cmd); 1059 if ($status != 0) { 1060 # Catch error if gdbmget failed 1061 my $mess = "Failed to set metadata key: $docid\n"; 1062 1063 $mess .= "PATH: $ENV{'PATH'}\n"; 1064 $mess .= "cmd = $cmd\n"; 1065 $mess .= "Exit status: $status\n"; 1066 $mess .= "System Error Message: $!\n"; 1067 1068 $gsdl_cgi->generate_error($mess); 1069 } 1070 else { 1071 my $mess = "insert-metadata successful: Key[$docid]\n"; 1072 $mess .= " [In metadata.xml] $metaname"; 1073 $mess .= " = $metavalue\n"; 1074 $mess .= " [In database] $metaname"; 1075 $mess .= " = $macro_metavalue\n"; 1076 $mess .= " The new text has not been indexed, rebuilding collection is required\n"; 1077 $gsdl_cgi->generate_ok_message($mess); 1078 } 1079 #=======================================================================# 1080 # set_metadata (accumulate version) [END] 1081 #=======================================================================# 1082 1083 # Release the lock once it is done 1084 $self->unlock_collection($username, $collect); 1085 } 1086 861 1087 1;
Note:
See TracChangeset
for help on using the changeset viewer.