Changeset 31602

Show
Ignore:
Timestamp:
19.04.2017 21:35:50 (2 years ago)
Author:
ak19
Message:

Follows Dr Bainbridge's suggestion to prevent URL based calls to set-metadata and remove-meta metadataserver.pl operations. Split metadataaction.pm into modmetadataaction.pm and metadataaction.pm, shifting the methods that modify metadata (set and remove subroutines) into the first. Now GS3 sets an env var that will control whether the meta-modifying subroutines will be available when called. If the env var is set, then metadataaction.pm will include the modmetadataaction.pm file in the begin block. For GS2, it works as before, always including the meta modifying subroutines. Tested on Linux with the GS3 web doc editor vs calling metadataserver.pl to set metadata directly from a URL.

Location:
main/trunk
Files:
1 added
2 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm

    r31589 r31602  
    3535use JSON; 
    3636 
     37# http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl 
     38# http://www.perlmonks.org/?node_id=881761 - splitting module into multiple files 
     39# http://www.perlmonks.org/?node_id=524456 - merging hashes 
     40 
     41our $modmeta_action_table; # don't init to empty hash here, else it will overwrite whatever BEGIN sets this to 
     42                  # see http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl 
    3743 
    3844BEGIN { 
    3945#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8"); 
    4046    require XML::Rules; 
     47 
     48    # if we're GS3, then GS3_AUTHENTICATED must be defined and set to true 
     49    # in order to have access to subroutines that modify metadata (the set-  
     50    # and remove- metadata subroutines). 
     51    # TODO: if we're GS2, then we continue to behave as before? 
     52 
     53    if(!defined $ENV{'GSDL3HOME'} || (defined $ENV{'GS3_AUTHENTICATED'} && $ENV{'GS3_AUTHENTICATED'} eq "true")) { 
     54    print STDERR "\nIncluding modmeta...\n"; 
     55    require modmetadataaction; 
     56    } 
     57    else { 
     58    $modmeta_action_table = {}; 
     59    } 
     60    print STDERR "End of metadata::BEGIN\n\n"; 
    4161} 
    4262 
    4363@metadataaction::ISA = ('baseaction'); 
    4464 
    45 my $action_table = 
     65 
     66my $getmeta_action_table = 
    4667{ 
    4768    #GET METHODS 
     
    7192        'help-string' => [ 
    7293        'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]' 
    73         ]}, 
    74  
    75     #SET METHODS 
    76     "set-live-metadata" => {  
    77         'compulsory-args' => [ "d", "metaname", "metavalue" ], 
    78         'optional-args'   => [ ] }, 
    79  
    80     "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta 
    81         'compulsory-args' => [ "metaname", "metavalue" ], 
    82         'optional-args'   => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },  
    83  
    84     "set-index-metadata" => {  
    85         'compulsory-args' => [ "d", "metaname", "metavalue" ], 
    86         'optional-args'   => [ "metapos", "metamode" ] }, 
    87  
    88     "set-archives-metadata" => {  
    89         'compulsory-args' => [ "d", "metaname", "metavalue" ], 
    90         'optional-args'   => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override", 
    91      
    92     "set-import-metadata" => {  
    93         'compulsory-args' => [ "metaname", "metavalue" ], 
    94         'optional-args'   => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection 
    95                   
    96     #SET METHODS (ARRAY) 
    97     "set-metadata-array" => {  
    98         'compulsory-args' => [ "where", "json" ], 
    99         'optional-args'   => [ ], 
    100         'help-string' => [ 
    101         'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',  
    102          
    103         'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] }, 
    104  
    105 # The same examples rewritten for when running the metadata-server.pl script from the commandline: 
    106  
    107 # the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]", 
    108          
    109 # the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]" 
    110                       
    111     "set-archives-metadata-array" => {  
    112         'compulsory-args' => [ "json" ], 
    113         'optional-args'   => [ ] }, 
    114          
    115     "set-import-metadata-array" => { 
    116         'compulsory-args' => [ "json" ], 
    117         'optional-args'   => [ ] }, 
    118  
    119     "set-index-metadata-array" => { 
    120         'compulsory-args' => [ "json" ], 
    121         'optional-args'   => [ ] }, 
    122      
    123     "set-live-metadata-array" => { 
    124         'compulsory-args' => [ "json" ], 
    125         'optional-args'   => [ ] }, 
    126          
    127     #REMOVE METHODS 
    128     "remove-import-metadata" => {  
    129         'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument 
    130         'optional-args'   => [ "metapos", "metavalue", "metamode" ] }, # only provide metapos arg for SIMPLE collections. 
    131 # Metavalue is now an optional arg for remove_import_metadata() based on what the implementation did, which allowed metavalue to be undefined, and if so, used metapos. 
    132                       
    133     "remove-archives-metadata" => {  
    134         'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument 
    135         'optional-args'   => [ "metapos", "metavalue", "metamode" ] }, 
    136  
    137     "remove-live-metadata" => { 
    138         'compulsory-args' => [ "d", "metaname" ], 
    139         'optional-args'   => [ ] }, 
    140  
    141     "remove-index-metadata" => { 
    142         'compulsory-args' => [ "d", "metaname" ], 
    143         'optional-args'   => [ "metapos", "metavalue" ] }, 
    144  
    145     "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta 
    146         'compulsory-args' => [ "d", "metaname" ], 
    147         'optional-args'   => [ "where", "metapos", "metavalue", "metamode" ] }, # metamode is optional since remove-metadata can call any of remove_import_meta and remove_archives_meta, remove_index_meta, of which the first two accept metamode as an optional param 
    148  
    149     #INSERT METHODS 
    150     "insert-metadata" => {  
    151         'compulsory-args' => [ "d", "metaname", "metavalue" ], 
    152         'optional-args'   => [ ] } 
     94        ]} 
    15395}; 
     96 
     97print STDERR "\nMODMETA_TABLE 2: " .join(",", keys $metadataaction::modmeta_action_table) ."\n"; 
     98 
     99 
     100# merge the hashes - http://www.perlmonks.org/?node_id=524456 
     101my $action_table = { %$getmeta_action_table, %$modmeta_action_table }; 
    154102 
    155103 
     
    1000948} 
    1001949 
    1002 sub _set_live_metadata 
    1003 { 
    1004     my $self = shift @_; 
    1005  
    1006     my $collect   = $self->{'collect'}; 
    1007     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    1008     #my $gsdlhome  = $self->{'gsdlhome'}; 
    1009     my $infodbtype = $self->{'infodbtype'}; 
    1010  
    1011     # Obtain the collect dir 
    1012     my $site = $self->{'site'}; 
    1013     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    1014     ##my $collect_dir = &util::filename_cat($gsdlhome, "collect"); 
    1015  
    1016  
    1017     # look up additional args 
    1018     my $docid     = $self->{'d'}; 
    1019     if ((!defined $docid) || ($docid =~ m/^\s*$/)) { 
    1020       $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies 
    1021     } 
    1022     my $metavalue = $self->{'metavalue'}; 
    1023  
    1024     # Generate the dbkey     
    1025     my $metaname  = $self->{'metaname'}; 
    1026     my $dbkey = "$docid.$metaname"; 
    1027  
    1028     # To people who know $collect_tail please add some comments 
    1029     # Obtain path to the database 
    1030     my $collect_tail = $collect; 
    1031     $collect_tail =~ s/^.*[\/|\\]//; 
    1032     my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); 
    1033     my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory); 
    1034  
    1035     # Set the new value 
    1036     my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\""; 
    1037     my $status = system($cmd); 
    1038     if ($status != 0) { 
    1039         # Catch error if gdbmget failed 
    1040     my $mess = "Failed to set metadata key: $dbkey\n"; 
    1041  
    1042     $mess .= "PATH: $ENV{'PATH'}\n"; 
    1043     $mess .= "cmd = $cmd\n"; 
    1044     $mess .= "Exit status: $status\n"; 
    1045     $mess .= "System Error Message: $!\n"; 
    1046  
    1047     $gsdl_cgi->generate_error($mess); 
    1048     } 
    1049     else { 
    1050     $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue"); 
    1051     } 
    1052  
    1053     #return $status; # in case calling functions have any further use for this 
    1054 } 
    1055  
    1056 sub set_live_metadata 
    1057 { 
    1058     my $self = shift @_; 
    1059  
    1060     my $username  = $self->{'username'}; 
    1061     my $collect   = $self->{'collect'}; 
    1062     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    1063    
    1064     if ($baseaction::authentication_enabled) { 
    1065     # Ensure the user is allowed to edit this collection     
    1066     $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect); 
    1067     } 
    1068  
    1069     # Make sure the collection isn't locked by someone else 
    1070     $self->lock_collection($username, $collect); 
    1071  
    1072     $self->_set_live_metadata(@_); 
    1073  
    1074     # Release the lock once it is done 
    1075     $self->unlock_collection($username, $collect); 
    1076 } 
    1077  
    1078 sub set_index_metadata_entry 
    1079 { 
    1080     print STDERR "1\n"; 
    1081     my $self = shift @_; 
    1082     my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_; 
    1083      
    1084     # To people who know $collect_tail please add some comments 
    1085     # Obtain path to the database 
    1086     my $collect_tail = $collect; 
    1087     $collect_tail =~ s/^.*[\/|\\]//; 
    1088     my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); 
    1089     my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory); 
    1090      
    1091     print STDERR "2\n"; 
    1092 #   print STDERR "**** infodb file path = $infodb_file_path\n"; 
    1093 #   print STDERR "***** infodb type = $infodbtype\n"; 
    1094      
    1095     # Read the docid entry 
    1096     my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid); 
    1097      
    1098     # Set the metadata value 
    1099     if (defined $metapos) { 
    1100     print STDERR "3\n"; 
    1101     # if metamode=accumulate AND metapos, warn user and then use metapos 
    1102     if (defined $metamode && $metamode eq "accumulate") { 
    1103         print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n"; 
    1104         print STDERR "**** Proceeding by using metapos\n"; 
    1105     } 
    1106     $doc_rec->{$metaname}->[$metapos] = $metavalue; 
    1107     } 
    1108     elsif (defined $prevmetavalue) { 
    1109     print STDERR "4\n"; 
    1110         my $array = $doc_rec->{$metaname}; 
    1111         my $length = @$array; 
    1112  
    1113         my $found = 0; 
    1114         for (my $i = 0; $i < $length; $i++){ 
    1115             if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){ 
    1116                 $doc_rec->{$metaname}->[$i] = $metavalue; 
    1117                 $found = 1; 
    1118                 last;                
    1119             } 
    1120         } 
    1121  
    1122         if($found == 0){ 
    1123             $doc_rec->{$metaname} = [ $metavalue ]; 
    1124         } 
    1125     } 
    1126     elsif (defined $metamode && $metamode eq "override") { 
    1127     print STDERR "5\n"; 
    1128     $doc_rec->{$metaname} = [ $metavalue ];  
    1129     } 
    1130     else { # default for index was to override, but because accumulate is less destructive,  
    1131     # and because accumulate is the default for archives and import, that's the new default for index too 
    1132     print STDERR "6\n"; 
    1133     if(defined $doc_rec->{$metaname}) { 
    1134         push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname 
    1135     } else { 
    1136         $doc_rec->{$metaname} = [ $metavalue ]; 
    1137     } 
    1138     } 
    1139     print STDERR "6\n"; 
    1140   
    1141     my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec); 
    1142      
    1143     return $status; 
    1144      
    1145 } 
    1146  
    1147 sub _set_import_metadata 
    1148 { 
    1149     my $self = shift @_; 
    1150  
    1151     my $collect   = $self->{'collect'}; 
    1152     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    1153     my $infodbtype = $self->{'infodbtype'}; 
    1154 #    my $gsdlhome  = $self->{'gsdlhome'}; 
    1155       
    1156     # Obtain the collect and archive dir     
    1157     my $site = $self->{'site'}; 
    1158     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    1159     ## my $collect_dir = &util::filename_cat($gsdlhome, "collect"); 
    1160     my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives"); 
    1161     
    1162     # look up additional args 
    1163     # want either d= or f= 
    1164     my $docid  = $self->{'d'}; 
    1165     my ($docid_root,$docid_secnum); 
    1166     if(defined $docid) {     
    1167     ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);     
    1168     # as yet no support for setting subsection metadata in metadata.xml 
    1169     if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) { 
    1170         $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n"); 
    1171         return; 
    1172     } 
    1173     } 
    1174  
    1175     my $import_file  = $self->{'f'}; 
    1176     if ((!defined $docid || $docid =~ m/^\s*$/) && (!defined $import_file || $import_file =~ m/^\s*$/)) { 
    1177     $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified."); # at least d or f must be specified 
    1178     }  
    1179  
    1180     # Get the parameters and set default mode to "accumulate" 
    1181     my $metaname   = $self->{'metaname'}; 
    1182     my $metavalue  = $self->{'metavalue'}; 
    1183 ##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g; 
    1184     $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g; 
    1185      
    1186     my $metamode   = $self->{'metamode'}; 
    1187     if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) { 
    1188     # make "accumulate" the default (less destructive, as it won't actually  
    1189     # delete any existing values) 
    1190     $metamode = "accumulate"; 
    1191     } 
    1192  
    1193     # adding metapos and prevmetavalue support to import_metadata subroutines 
    1194     my $metapos   = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override 
    1195     my $prevmetavalue = $self->{'prevmetavalue'}; 
    1196     $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/)); 
    1197     $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/)); 
    1198  
    1199     my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir); 
    1200     my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid 
    1201  
    1202     my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n"; 
    1203     $mess .= "  $metaname"; 
    1204     $mess .= " = $metavalue"; 
    1205     $mess .= " ($metamode)\n"; 
    1206  
    1207     $gsdl_cgi->generate_ok_message($mess); 
    1208  
    1209     #return $status; # in case calling functions have any further use for this 
    1210 } 
    1211  
    1212 # the version of set_index_meta that doesn't do authentication 
    1213 sub _set_archives_metadata 
    1214 { 
    1215     my $self = shift @_; 
    1216  
    1217     my $collect   = $self->{'collect'}; 
    1218     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    1219     my $infodbtype = $self->{'infodbtype'}; 
    1220      
    1221     # Obtain the collect and archive dir    
    1222     my $site = $self->{'site'}; 
    1223     my $collect_dir = $gsdl_cgi->get_collection_dir($site);  
    1224     my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives"); 
    1225  
    1226     # look up additional args 
    1227     my $docid  = $self->{'d'}; 
    1228     my $metaname   = $self->{'metaname'}; 
    1229     my $metavalue  = $self->{'metavalue'}; 
    1230     my $prevmetavalue = $self->{'prevmetavalue'}; 
    1231      
    1232     my $metapos    = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override 
    1233                                   # Don't append "|| undef", since if metapos=0 it will then be set to undef 
    1234  
    1235     $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/)); 
    1236     $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/)); 
    1237  
    1238     my $metamode   = $self->{'metamode'}; 
    1239     if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) { 
    1240     # make "accumulate" the default (less destructive, as it won't actually  
    1241     # delete any existing values) 
    1242     $metamode = "accumulate"; 
    1243     } 
    1244  
    1245     my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid, 
    1246                 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue); 
    1247     
    1248     if ($status == 0) { 
    1249     my $mess = "set-archives-metadata successful: Key[$docid]\n"; 
    1250     $mess .= "  $metaname"; 
    1251     $mess .= "->[$metapos]" if (defined $metapos); 
    1252     $mess .= " = $metavalue"; 
    1253     $mess .= " ($metamode)\n"; 
    1254      
    1255     $gsdl_cgi->generate_ok_message($mess);   
    1256     } 
    1257     else { 
    1258     my $mess .= "Failed to set archives metadata key: $docid\n"; 
    1259     $mess .= "Exit status: $status\n"; 
    1260     if(defined $self->{'error_msg'}) { 
    1261         $mess .= "Error Message: $self->{'error_msg'}\n"; 
    1262     } else { 
    1263         $mess .= "System Error Message: $!\n"; 
    1264     } 
    1265     $mess .= "-" x 20 . "\n"; 
    1266      
    1267     $gsdl_cgi->generate_error($mess); 
    1268     } 
    1269  
    1270     #return $status; # in case calling functions have any further use for this 
    1271 } 
    1272  
    1273  
    1274 # the version of set_index_meta that doesn't do authentication 
    1275 sub _set_index_metadata 
    1276 { 
    1277     print STDERR "START SET INDEX METADATA\n"; 
    1278     my $self = shift @_; 
    1279  
    1280     my $collect   = $self->{'collect'}; 
    1281     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    1282  
    1283     my $site = $self->{'site'}; 
    1284     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    1285  
    1286     # look up additional args 
    1287     my $docid     = $self->{'d'}; 
    1288     my $metaname  = $self->{'metaname'}; 
    1289     my $metapos   = $self->{'metapos'}; # undef has meaning 
    1290     my $metavalue = $self->{'metavalue'}; 
    1291     my $infodbtype = $self->{'infodbtype'}; 
    1292     my $metamode  = $self->{'metamode'}; 
    1293     my $prevmetavalue = $self->{'prevmetavalue'}; 
    1294  
    1295     $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/)); 
    1296     $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/)); 
    1297  
    1298     print STDERR "SETTING INDEX METADATA ENTRY\n"; 
    1299     my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue); 
    1300     print STDERR "DONE SETTING INDEX METADATA ENTRY\n"; 
    1301     if ($status != 0) { 
    1302         # Catch error if set infodb entry failed 
    1303     my $mess = "Failed to set metadata key: $docid\n"; 
    1304      
    1305     $mess .= "PATH: $ENV{'PATH'}\n"; 
    1306     $mess .= "Exit status: $status\n"; 
    1307     $mess .= "System Error Message: $!\n"; 
    1308      
    1309     $gsdl_cgi->generate_error($mess); 
    1310     } 
    1311     else { 
    1312     my $mess = "set-index-metadata successful: Key[$docid]\n"; 
    1313     $mess .= "  $metaname"; 
    1314     $mess .= "->[$metapos]" if (defined $metapos); 
    1315     $mess .= " = $metavalue\n"; 
    1316      
    1317     $gsdl_cgi->generate_ok_message($mess); 
    1318     } 
    1319  
    1320     print STDERR "END SET INDEX METADATA\n"; 
    1321     #return $status; # in case calling functions have any further use for this 
    1322 } 
    1323  
    1324 sub set_index_metadata 
    1325 { 
    1326     my $self = shift @_; 
    1327  
    1328     my $username  = $self->{'username'}; 
    1329     my $collect   = $self->{'collect'}; 
    1330     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    1331     #my $gsdlhome  = $self->{'gsdlhome'}; 
    1332  
    1333     if ($baseaction::authentication_enabled) { 
    1334     # Ensure the user is allowed to edit this collection     
    1335     $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect); 
    1336     } 
    1337  
    1338     my $site = $self->{'site'}; 
    1339     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    1340      
    1341     $gsdl_cgi->checked_chdir($collect_dir); 
    1342  
    1343     # Obtain the collect dir 
    1344     ## my $collect_dir = &util::filename_cat($gsdlhome, "collect"); 
    1345  
    1346     # Make sure the collection isn't locked by someone else 
    1347     $self->lock_collection($username, $collect); 
    1348  
    1349     $self->_set_index_metadata(@_); 
    1350      
    1351     # Release the lock once it is done 
    1352     $self->unlock_collection($username, $collect); 
    1353 } 
    1354  
    1355 # call this to set the metadata for a combination of dirs archives, import or index, or live 
    1356 # if none specified, defaults to index which was the original behaviour of set_metadata. 
    1357 sub set_metadata 
    1358 { 
    1359     my $self = shift @_; 
    1360  
    1361     # Testing that not defining a variable, setting it to "" or to "  " all return false 
    1362     # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};' 
    1363  
    1364     my $where = $self->{'where'}; 
    1365     if(!$where || ($where =~ m/^\s*$/)) {    
    1366     $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour 
    1367     return; 
    1368     }  
    1369  
    1370     # authenticate and lock collection once, even if processing multiple dirs 
    1371     my $username  = $self->{'username'}; 
    1372     my $collect   = $self->{'collect'}; 
    1373     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    1374      
    1375     if ($baseaction::authentication_enabled) { 
    1376     # Ensure the user is allowed to edit this collection     
    1377     $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect); 
    1378     } 
    1379      
    1380     if($where =~ m/index/) { 
    1381     my $site = $self->{'site'}; 
    1382     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    1383     $gsdl_cgi->checked_chdir($collect_dir); 
    1384     } 
    1385  
    1386     # Make sure the collection isn't locked by someone else 
    1387     $self->lock_collection($username, $collect); 
    1388  
    1389  
    1390     # now at last can set the metadata. $where can specify multiple 
    1391     # $where is of the form: import|archives|index, or a subset thereof 
    1392  
    1393     #my @whichdirs = split('\|', $where); 
    1394  
    1395     # just check whether $where contains import/archives/index/live in turn, and  
    1396     # for each case, process it accordingly 
    1397     if($where =~ m/import/) { 
    1398     $self->_set_import_metadata(@_);         
    1399     } 
    1400  
    1401     if($where =~ m/archives/) { 
    1402  
    1403     # look up docID arg which is optional to set_metadata because it's optional  
    1404     # to set_import, but which is compulsory to set_archives_metadata 
    1405     my $docid     = $self->{'d'}; 
    1406     if ((!defined $docid) || ($docid =~ m/^\s*$/)) { 
    1407         $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies 
    1408     }  
    1409     # we have a docid, so can set archives meta 
    1410     $self->_set_archives_metadata(@_);   
    1411     } 
    1412  
    1413     if($where =~ m/index/) { 
    1414      
    1415     # look up docID arg which is optional to set_metadata because it's optional  
    1416     # to set_import, but which is compulsory to set_archives_metadata 
    1417     my $docid     = $self->{'d'}; 
    1418     if ((!defined $docid) || ($docid =~ m/^\s*$/)) { 
    1419         $gsdl_cgi->generate_error("No docid (d=...) specified."); 
    1420     } 
    1421     # we have a docid, so can set index meta 
    1422     $self->_set_index_metadata(@_);  
    1423     }    
    1424  
    1425     if($where =~ m/live/) { 
    1426     $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine 
    1427     } 
    1428  
    1429     # Release the lock once it is done 
    1430     $self->unlock_collection($username, $collect); 
    1431 } 
    1432  
    1433 sub set_metadata_array 
    1434 { 
    1435     my $self = shift @_; 
    1436  
    1437     my $where = $self->{'where'}; 
    1438     if(!$where || ($where =~ m/^\s*$/)) {    
    1439     $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array 
    1440     return; 
    1441     } 
    1442  
    1443     my $username  = $self->{'username'}; 
    1444     my $collect   = $self->{'collect'}; 
    1445     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    1446  
    1447     if ($baseaction::authentication_enabled) { 
    1448     # Ensure the user is allowed to edit this collection 
    1449     $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect); 
    1450     } 
    1451  
    1452     # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir 
    1453     # But including the stmt during this code reorganisation to preserve as-is what used to happen 
    1454     my $site = $self->{'site'}; 
    1455     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    1456     $gsdl_cgi->checked_chdir($collect_dir); 
    1457  
    1458     # Make sure the collection isn't locked by someone else 
    1459     $self->lock_collection($username, $collect); 
    1460  
    1461     if($where =~ m/import/) { 
    1462     $self->_set_import_metadata_array(@_); 
    1463     } 
    1464     if($where =~ m/archives/) { 
    1465     $self->_set_archives_metadata_array(@_); 
    1466     } 
    1467     if($where =~ m/index/) { 
    1468     $self->_set_index_metadata_array(@_); 
    1469     } 
    1470     if($where =~ m/live/) { 
    1471         $self->_set_live_metadata_array(@_); 
    1472     } 
    1473  
    1474     # Release the lock once it is done 
    1475     $self->unlock_collection($username, $collect); 
    1476 } 
    1477  
    1478 sub _set_index_metadata_array 
    1479 { 
    1480     my $self = shift @_; 
    1481  
    1482     my $collect   = $self->{'collect'}; 
    1483     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    1484     my $site = $self->{'site'}; 
    1485     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    1486  
    1487      
    1488     # look up additional args 
    1489      
    1490     my $infodbtype = $self->{'infodbtype'}; 
    1491      
    1492     my $json_str      = $self->{'json'}; 
    1493     my $doc_array = decode_json $json_str; 
    1494      
    1495      
    1496     my $global_status = 0; 
    1497     my $global_mess = ""; 
    1498      
    1499     my @all_docids = (); 
    1500      
    1501     foreach my $doc_array_rec ( @$doc_array ) { 
    1502      
    1503     my $status = -1; 
    1504     my $docid     = $doc_array_rec->{'docid'}; 
    1505      
    1506     push(@all_docids,$docid); 
    1507  
    1508     my $metaname  = $doc_array_rec->{'metaname'}; 
    1509     if(defined $metaname) { 
    1510         my $metapos   = $doc_array_rec->{'metapos'}; # can legitimately be undef 
    1511         my $metavalue = $doc_array_rec->{'metavalue'}; 
    1512         my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'}; 
    1513  
    1514         $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); 
    1515     } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable 
    1516         my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong 
    1517          
    1518         foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps 
    1519         $metaname  = $metatable_rec->{'metaname'}; 
    1520         my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'}; 
    1521         my $metapos = undef; 
    1522         my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray 
    1523  
    1524         foreach my $metavalue ( @$metavals ) { # metavals is an array 
    1525             $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry? 
    1526             if($metamode eq "override") { # now, having overridden the metavalue for the first,  
    1527             # need to accumulate subsequent metavals for this metaname, else the just-assigned 
    1528             # metavalue for this metaname will be lost 
    1529             $metamode = "accumulate"; 
    1530             } 
    1531         }            
    1532         } 
    1533     } 
    1534      
    1535     if ($status != 0) { 
    1536         # Catch error if set infodb entry failed 
    1537         $global_status = $status; 
    1538         $global_mess .= "Failed to set metadata key: $docid\n"; 
    1539         $global_mess .= "Exit status: $status\n"; 
    1540         $global_mess .= "System Error Message: $!\n"; 
    1541         $global_mess .= "-" x 20; 
    1542     } 
    1543     } 
    1544      
    1545     if ($global_status != 0) { 
    1546     $global_mess .= "PATH: $ENV{'PATH'}\n"; 
    1547     $gsdl_cgi->generate_error($global_mess); 
    1548     } 
    1549     else { 
    1550     my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n"; 
    1551     $gsdl_cgi->generate_ok_message($mess); 
    1552     } 
    1553 } 
    1554  
    1555 sub set_index_metadata_array 
    1556 { 
    1557     my $self = shift @_; 
    1558  
    1559     my $username  = $self->{'username'}; 
    1560     my $collect   = $self->{'collect'}; 
    1561     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    1562 #    my $gsdlhome  = $self->{'gsdlhome'}; 
    1563  
    1564     if ($baseaction::authentication_enabled) { 
    1565     # Ensure the user is allowed to edit this collection     
    1566     $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect); 
    1567     } 
    1568  
    1569     my $site = $self->{'site'}; 
    1570     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    1571      
    1572     $gsdl_cgi->checked_chdir($collect_dir); 
    1573  
    1574     # Obtain the collect dir 
    1575     ## my $collect_dir = &util::filename_cat($gsdlhome, "collect"); 
    1576  
    1577     # Make sure the collection isn't locked by someone else 
    1578     $self->lock_collection($username, $collect); 
    1579  
    1580     $self->_set_index_metadata_array(@_); 
    1581  
    1582     # Release the lock once it is done 
    1583     $self->unlock_collection($username, $collect); 
    1584 } 
    1585  
    1586 # experimental, newly added in and untested 
    1587 sub _set_live_metadata_array 
    1588 { 
    1589     my $self = shift @_; 
    1590  
    1591     my $collect   = $self->{'collect'}; 
    1592     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    1593  
    1594     my $site = $self->{'site'}; 
    1595     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    1596  
    1597      
    1598     # look up additional args 
    1599     my $infodbtype = $self->{'infodbtype'}; 
    1600     # To people who know $collect_tail please add some comments 
    1601     # Obtain path to the database 
    1602     my $collect_tail = $collect; 
    1603     $collect_tail =~ s/^.*[\/|\\]//; 
    1604     my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); 
    1605     my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory); 
    1606  
    1607      
    1608     my $json_str      = $self->{'json'}; 
    1609     my $doc_array = decode_json $json_str; 
    1610      
    1611      
    1612     my $global_status = 0; 
    1613     my $global_mess = ""; 
    1614      
    1615     my @all_docids = (); 
    1616  
    1617  
    1618     foreach my $doc_array_rec ( @$doc_array ) { 
    1619      
    1620     my $status = -1; 
    1621     my $docid     = $doc_array_rec->{'docid'}; 
    1622  
    1623     push(@all_docids,$docid); 
    1624  
    1625     my $metaname  = $doc_array_rec->{'metaname'}; 
    1626     if(defined $metaname) { 
    1627         my $dbkey = "$docid.$metaname"; 
    1628         my $metavalue = $doc_array_rec->{'metavalue'}; 
    1629  
    1630         # Set the new value 
    1631         my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\""; 
    1632         $status = system($cmd); 
    1633  
    1634     } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable 
    1635         my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong 
    1636         foreach my $metatable_rec ( @$metatable ) { 
    1637         $metaname  = $metatable_rec->{'metaname'}; 
    1638         my $dbkey = "$docid.$metaname"; 
    1639  
    1640         my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray 
    1641         foreach my $metavalue ( @$metavals ) { 
    1642              my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\""; 
    1643              $status = system($cmd); 
    1644         } 
    1645         } 
    1646          
    1647     } 
    1648  
    1649     if ($status != 0) { 
    1650         # Catch error if gdbmget failed 
    1651         $global_status = $status; 
    1652         $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey 
    1653         $global_mess .= "Exit status: $status\n"; 
    1654         $global_mess .= "System Error Message: $!\n"; 
    1655         $global_mess .= "-" x 20; 
    1656     } 
    1657     } 
    1658      
    1659     if ($global_status != 0) { 
    1660     $global_mess .= "PATH: $ENV{'PATH'}\n"; 
    1661     $gsdl_cgi->generate_error($global_mess); 
    1662     } 
    1663     else { 
    1664     my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n"; 
    1665     $gsdl_cgi->generate_ok_message($mess); 
    1666     } 
    1667 } 
    1668  
    1669 sub set_live_metadata_array 
    1670 { 
    1671     my $self = shift @_; 
    1672  
    1673     my $username  = $self->{'username'}; 
    1674     my $collect   = $self->{'collect'}; 
    1675     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    1676  
    1677     if ($baseaction::authentication_enabled) { 
    1678     # Ensure the user is allowed to edit this collection     
    1679     $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect); 
    1680     } 
    1681  
    1682     my $site = $self->{'site'}; 
    1683     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    1684      
    1685     $gsdl_cgi->checked_chdir($collect_dir); 
    1686  
    1687     # Make sure the collection isn't locked by someone else 
    1688     $self->lock_collection($username, $collect); 
    1689  
    1690     $self->_set_live_metadata_array(@_); 
    1691  
    1692     # Release the lock once it is done 
    1693     $self->unlock_collection($username, $collect); 
    1694 } 
    1695  
    1696  
    1697 sub dxml_metadata 
    1698 { 
    1699     my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 
    1700     my $metaname = $parser->{'parameters'}->{'metaname'}; 
    1701     my $metamode = $parser->{'parameters'}->{'metamode'}; 
    1702      
    1703 ###!!!    print STDERR "**** Processing closing </Metadata> tag\n"; 
    1704      
    1705     my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'}; 
    1706      
    1707     # Find the right metadata tag and checks if we are going to 
    1708     # override it 
    1709     # 
    1710     # Note: This over writes the first metadata block it 
    1711     # encountered. If there are multiple Sections in the doc.xml, it 
    1712     # might not behave as you would expect 
    1713  
    1714     my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'}; 
    1715 ##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n"; 
    1716 ##    print STDERR "**** metamode = $metamode\n"; 
    1717      
    1718     if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))  
    1719     { 
    1720         my $name_attr = $attrHash->{'name'}; 
    1721         # print STDOUT "*** testing: $name_attr eq $metaname ?   and  $metamode eq override ?\n"; 
    1722         if (($name_attr eq $metaname) && ($metamode eq "override"))  
    1723         { 
    1724             if (!defined $parser->{'parameters'}->{'poscount'})  
    1725             {  
    1726                 $parser->{'parameters'}->{'poscount'} = 0;  
    1727             }  
    1728             else  
    1729             {  
    1730                 $parser->{'parameters'}->{'poscount'}++;  
    1731             }  
    1732              
    1733             if ((defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}) 
    1734                 || (!defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == 0)) 
    1735                  
    1736             { 
    1737                 ##print STDERR "#### got match!!\n"; 
    1738                 # Get the value and override the current value 
    1739                 my $metavalue = $parser->{'parameters'}->{'metavalue'}; 
    1740                 $attrHash->{'_content'} = $metavalue; 
    1741                  
    1742                 # Don't want it to wipe out any other pieces of metadata 
    1743                 $parser->{'parameters'}->{'metamode'} = "done"; 
    1744             } 
    1745             elsif (defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'}) 
    1746             { 
    1747                 my $metavalue = $parser->{'parameters'}->{'metavalue'}; 
    1748                 $attrHash->{'_content'} = $metavalue; 
    1749                 $parser->{'parameters'}->{'metamode'} = "done"; 
    1750             } 
    1751         } 
    1752     } 
    1753  
    1754     # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 
    1755     return [$tagname => $attrHash]; 
    1756 } 
    1757  
    1758 # This method exists purely for catching invalid section numbers that the client 
    1759 # requested to edit. Once the parser has reached the end (the final </Archive> tag), 
    1760 # we've seen all the Sections in the doc.xml, and none of their section nums matched 
    1761 # if the metamode has not been set to 'done' by then. 
    1762 sub dxml_archive 
    1763 { 
    1764     my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 
    1765     my $metamode = $parser->{'parameters'}->{'metamode'}; 
    1766      
    1767     my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'}; 
    1768     my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'}; 
    1769      
    1770 #    print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n"; 
    1771      
    1772     if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) { 
    1773     print STDERR "@@@ $tagname Finished processing FINAL Section.\n"; 
    1774  
    1775     my $metaname = $parser->{'parameters'}->{'metaname'}; 
    1776     my $metavalue = $parser->{'parameters'}->{'metavalue'}; 
    1777      
    1778     print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n"; 
    1779     print STDERR "\t(last seen section number in document was $curr_secnum)\n"; 
    1780     print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n"; 
    1781     print STDERR "\tin section $opt_doc_secnum.\n"; 
    1782     $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found."; 
    1783     } 
    1784      
    1785     # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 
    1786     return [$tagname => $attrHash]; 
    1787 } 
    1788  
    1789 sub dxml_description 
    1790 { 
    1791     my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 
    1792     my $metamode = $parser->{'parameters'}->{'metamode'}; 
    1793  
    1794     my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'}; 
    1795     my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || ""; 
    1796      
    1797 ###!!!  print STDERR "**** Processing a closing </Description> tag \n"; 
    1798 #   print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n"; 
    1799      
    1800     # Accumulate the metadata 
    1801  
    1802     # We'll be accumulating metadata at this point if we haven't found and therefore 
    1803     # haven't processed the metadata yet.  
    1804     # For subsections, this means that if we're at a matching subsection, but haven't  
    1805     # found the correct metaname to override in that subsection, we accumulate it as new 
    1806     # meta in the subsection by adding it to the current description. 
    1807     # If there's no subsection info for the metadata, it will accumulate at the top level 
    1808     # section description if we hadn't found a matching metaname to override at this point. 
    1809  
    1810     # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now 
    1811     # at the toplevel section. In the latter case, it means we want to process meta in the 
    1812     # toplevel section. So the eq check between the values below will work in all cases. 
    1813      
    1814     # The only time this won't work is if an opt_doc_secnum beyond the section numbers of 
    1815     # this document has been provided. In that case, the metadata for that opt_doc_secnum 
    1816     # won't get attached/accumulated to any part of the doc, not even its top-level section. 
    1817  
    1818     if ($curr_secnum eq $opt_doc_secnum  
    1819         && ($metamode eq "accumulate" || $metamode eq "override")) { 
    1820         if ($metamode eq "override") { 
    1821         print "Got to end of <Description> block. No metadata value to override.  Switching 'metamode' to accumulate\n"; 
    1822         } 
    1823  
    1824         # If we get to here and metamode is override, this means there  
    1825         # was no existing value to overide => treat as an append operation 
    1826          
    1827         # Tack a new metadata tag on to the end of the <Metadata>+ block 
    1828         my $metaname = $parser->{'parameters'}->{'metaname'}; 
    1829         my $metavalue = $parser->{'parameters'}->{'metavalue'}; 
    1830          
    1831         my $metadata_attr = {  
    1832         '_content' => $metavalue,  
    1833         'name' => $metaname,  
    1834         'mode' => "accumulate"  
    1835         }; 
    1836          
    1837         my $append_metadata = [ "Metadata" => $metadata_attr ]; 
    1838         my $description_content = $attrHash->{'_content'}; 
    1839          
    1840         print "Appending metadata to doc.xml\n"; 
    1841          
    1842         if (ref($description_content)) { 
    1843         # got some existing interesting nested content 
    1844         push(@$description_content, "    ", $append_metadata ,"\n        "); 
    1845         } 
    1846         else { 
    1847         #description_content is most likely a string such as "\n" 
    1848         $attrHash->{'_content'} = [$description_content, "    ", $append_metadata ,"\n" ]; 
    1849         } 
    1850          
    1851         $parser->{'parameters'}->{'metamode'} = "done"; 
    1852     }        
    1853     else { 
    1854         # metamode most likely "done" signifying that it has already found a position to add the metadata to.  
    1855 ##      print STDERR "**** NOT ACCUMULATE?!? \n"; 
    1856     } 
    1857  
    1858     # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 
    1859     return [$tagname => $attrHash]; 
    1860 } 
    1861  
    1862  
    1863 sub dxml_start_section 
    1864 { 
    1865     my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 
    1866  
    1867     my $new_depth = scalar(@$contextArray); 
    1868  
    1869 #   print STDERR "**** START SECTION \n"; 
    1870      
    1871     if ($new_depth == 1) { 
    1872     $parser->{'parameters'}->{'curr_section_depth'} = 1; 
    1873     $parser->{'parameters'}->{'curr_section_num'}   = ""; 
    1874     } 
    1875  
    1876     my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'}; 
    1877     my $old_secnum = $parser->{'parameters'}->{'curr_section_num'}; 
    1878  
    1879     my $new_secnum; 
    1880  
    1881     if ($new_depth > $old_depth) { 
    1882     # child subsection 
    1883     $new_secnum = "$old_secnum.1"; 
    1884     } 
    1885     elsif ($new_depth == $old_depth) { 
    1886     # sibling section => increase it's value by 1 
    1887     my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/); 
    1888     $tail_num++; 
    1889     $new_secnum = $old_secnum; 
    1890     $new_secnum =~ s/\.(\d+)$/\.$tail_num/; 
    1891     } 
    1892     else { 
    1893     # back up to parent section => lopp off tail 
    1894     $new_secnum = $old_secnum; 
    1895     $new_secnum =~ s/\.\d+$//; 
    1896     } 
    1897  
    1898     $parser->{'parameters'}->{'curr_section_depth'} = $new_depth; 
    1899     $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum; 
    1900      
    1901     1; 
    1902 } 
    1903  
    1904 sub edit_xml_file 
    1905 { 
    1906     my $self = shift @_; 
    1907     my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_; 
    1908  
    1909     # use XML::Rules to add it in (read in and out again) 
    1910     my $parser = XML::Rules->new(start_rules     => $start_rules, 
    1911                  rules           => $rules,  
    1912                  style           => 'filter', 
    1913                                  output_encoding => 'utf8' ); 
    1914  
    1915     my $xml_in = ""; 
    1916     if (!open(MIN,"<$filename")) { 
    1917     $gsdl_cgi->generate_error("Unable to read in $filename: $!"); 
    1918     } 
    1919     else { 
    1920         # Read all the text in 
    1921     my $line; 
    1922     while (defined ($line=<MIN>)) { 
    1923         $xml_in .= $line; 
    1924     } 
    1925     close(MIN); 
    1926      
    1927     my $MOUT;     
    1928     if (!open($MOUT,">$filename")) { 
    1929         $gsdl_cgi->generate_error("Unable to write out to $filename: $!"); 
    1930     } 
    1931     else { 
    1932         # Matched lines will get handled by the call backs 
    1933 ##      my $xml_out = ""; 
    1934  
    1935         binmode($MOUT,":utf8"); 
    1936         $parser->filter($xml_in,$MOUT, $options); 
    1937  
    1938 #       binmode(MOUT,":utf8"); 
    1939 #       print MOUT $xml_out; 
    1940         close($MOUT);        
    1941     } 
    1942     } 
    1943  
    1944     # copy across any custom error information that was stored during parsing 
    1945     $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});     
    1946 } 
    1947  
    1948 sub edit_doc_xml 
    1949 { 
    1950     my $self = shift @_; 
    1951     my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_; 
    1952  
    1953     my $info_mess = <<RAWEND; 
    1954 **************************** 
    1955   edit_doc_xml() 
    1956 **************************** 
    1957 RAWEND 
    1958  
    1959     $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename); 
    1960     $info_mess .= " metaname    = $metaname\n"    if defined($metaname); 
    1961     $info_mess .= " metapos     = $metapos\n"     if defined($metapos); 
    1962     $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue); 
    1963     $info_mess .= " metamode    = $metamode\n"    if defined($metamode); 
    1964     $info_mess .= " opt_secnum  = $opt_secnum\n"  if defined($opt_secnum); 
    1965     $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue); 
    1966       
    1967     $info_mess .= "****************************\n"; 
    1968  
    1969     $gsdl_cgi->generate_message($info_mess); 
    1970      
    1971     # To monitor which section/subsection number we are in 
    1972     my @start_rules =  
    1973     ( 'Section'    => \&dxml_start_section ); 
    1974  
    1975     # use XML::Rules to add it in (read in and out again) 
    1976     # Set the call back functions 
    1977     my @rules =  
    1978     ( _default => 'raw', 
    1979       'Metadata'    => \&dxml_metadata, 
    1980       'Description' => \&dxml_description, 
    1981       'Archive'     => \&dxml_archive); # just for catching errors at end 
    1982        
    1983     # Sets the parameters 
    1984     my $options = { 'metaname'  => $metaname, 
    1985             'metapos'   => $metapos, 
    1986             'metavalue' => $metavalue, 
    1987             'metamode'  => $metamode, 
    1988             'prevmetavalue' => $prevmetavalue }; 
    1989              
    1990     if (defined $opt_secnum) { 
    1991     $options->{'secnum'} = $opt_secnum; 
    1992     } 
    1993  
    1994     $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options); 
    1995 } 
    1996  
    1997 sub set_archives_metadata_entry 
    1998 { 
    1999     my $self = shift @_; 
    2000     my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_; 
    2001  
    2002     my $info_mess = <<RAWEND; 
    2003 **************************** 
    2004   set_archives_metadata_entry() 
    2005 **************************** 
    2006 RAWEND 
    2007  
    2008     $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir); 
    2009     $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir); 
    2010     $info_mess .= " collect     = $collect\n"     if defined($collect); 
    2011     $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype); 
    2012     $info_mess .= " docid       = $docid\n"       if defined($docid); 
    2013     $info_mess .= " metaname    = $metaname\n"    if defined($metaname); 
    2014     $info_mess .= " metapos     = $metapos\n"     if defined($metapos); 
    2015     $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue); 
    2016     $info_mess .= " metamode    = $metamode\n"    if defined($metamode); 
    2017     $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue); 
    2018       
    2019     $info_mess .= "****************************\n"; 
    2020  
    2021     $gsdl_cgi->generate_message($info_mess); 
    2022      
    2023     # Obtain the doc.xml path for the specified docID 
    2024     my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/); 
    2025  
    2026     my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir); 
    2027     my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root); 
    2028     my $doc_xml_file = $doc_rec->{'doc-file'}->[0]; 
    2029      
    2030     # The $doc_xml_file is relative to the archives, and now let's get the full path 
    2031     my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");     
    2032     my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file); 
    2033  
    2034     # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined 
    2035     # in which case, we need to remove all metavalues for the metaname at the given (sub)section 
    2036     # Thereafter, we will finally be setting the overriding metavalue for this metaname 
    2037     if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") { 
    2038     # remove all values of $metaname metadata  
    2039     $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode); 
    2040     } 
    2041     # Edit the doc.xml file with the specified metadata name, value and position. 
    2042     # TODO: there is a potential problem here as this edit_doc_xml function  
    2043     # is assuming the simple doc.xml situation where there is only one Section and no SubSections. 
    2044     # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml 
    2045      
    2046     # dxml_metadata method ignores metapos if metamode anything other than override 
    2047     $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename, 
    2048             $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue); 
    2049  
    2050     # return 0; # return 0 for now to indicate no error 
    2051     return (defined $self->{'error_msg'}) ? 1 : 0; 
    2052 } 
    2053  
    2054  
    2055 sub set_archives_metadata 
    2056 { 
    2057     my $self = shift @_; 
    2058  
    2059     my $username  = $self->{'username'}; 
    2060     my $collect   = $self->{'collect'}; 
    2061     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    2062      
    2063     if ($baseaction::authentication_enabled) { 
    2064     # Ensure the user is allowed to edit this collection 
    2065     $self->authenticate_user($username, $collect); 
    2066     } 
    2067  
    2068     # Make sure the collection isn't locked by someone else 
    2069     $self->lock_collection($username, $collect); 
    2070  
    2071     $self->_set_archives_metadata(@_); 
    2072  
    2073     # Release the lock once it is done 
    2074     $self->unlock_collection($username, $collect); 
    2075 } 
    2076  
    2077 sub _set_archives_metadata_array 
    2078 { 
    2079     my $self = shift @_; 
    2080      
    2081     my $collect   = $self->{'collect'}; 
    2082     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    2083     my $site = $self->{'site'}; 
    2084     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    2085  
    2086     # look up additional args 
    2087      
    2088     my $infodbtype = $self->{'infodbtype'}; 
    2089      
    2090     my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives"); 
    2091      
    2092     my $json_str      = $self->{'json'}; 
    2093     my $doc_array = decode_json $json_str; 
    2094      
    2095      
    2096     my $global_status = 0; 
    2097     my $global_mess = ""; 
    2098      
    2099     my @all_docids = (); 
    2100      
    2101     foreach my $doc_array_rec ( @$doc_array ) { 
    2102     my $status    = -1; 
    2103     my $docid     = $doc_array_rec->{'docid'}; 
    2104  
    2105     push(@all_docids,$docid); 
    2106      
    2107     my $metaname  = $doc_array_rec->{'metaname'}; 
    2108     if(defined $metaname) { 
    2109          
    2110         my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override 
    2111  
    2112         my $metamode  = $doc_array_rec->{'metamode'} || $self->{'metamode'}; 
    2113         my $metavalue = $doc_array_rec->{'metavalue'}; 
    2114         my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata 
    2115          
    2116          
    2117         if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) { 
    2118         # make "accumulate" the default (less destructive, as it won't actually  
    2119         # delete any existing values) 
    2120         $metamode = "accumulate"; 
    2121         }        
    2122          
    2123         $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid, 
    2124                 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue); 
    2125     } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable 
    2126         my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong 
    2127          
    2128         foreach my $metatable_rec ( @$metatable ) { 
    2129         $metaname  = $metatable_rec->{'metaname'}; 
    2130         my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'}; 
    2131         my $metapos = undef; 
    2132         my $prevmetavalue = undef; 
    2133         my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray 
    2134          
    2135         foreach my $metavalue ( @$metavals ) { 
    2136             $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype, 
    2137                                  $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue); 
    2138              
    2139             if($metamode eq "override") { # now, having overridden the metavalue for the first,  
    2140             # need to accumulate subsequent metavals for this metaname, else the just-assigned 
    2141             # metavalue for this metaname will be lost 
    2142             $metamode = "accumulate"; 
    2143             } 
    2144         }            
    2145         }        
    2146     } 
    2147          
    2148     if ($status != 0) { 
    2149         # Catch error if set infodb entry failed 
    2150         $global_status = $status; 
    2151         $global_mess .= "Failed to set metadata key: $docid\n"; 
    2152         $global_mess .= "Exit status: $status\n"; 
    2153         $global_mess .= "System Error Message: $!\n"; 
    2154         $global_mess .= "-" x 20 . "\n"; 
    2155     } 
    2156     } 
    2157      
    2158     if ($global_status != 0) { 
    2159     $global_mess .= "PATH: $ENV{'PATH'}\n"; 
    2160     $gsdl_cgi->generate_error($global_mess); 
    2161     } 
    2162     else { 
    2163     my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n"; 
    2164     $gsdl_cgi->generate_ok_message($mess); 
    2165     } 
    2166 } 
    2167  
    2168 sub set_archives_metadata_array 
    2169 { 
    2170     my $self = shift @_; 
    2171  
    2172     my $username  = $self->{'username'}; 
    2173     my $collect   = $self->{'collect'}; 
    2174     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    2175 #    my $gsdlhome  = $self->{'gsdlhome'}; 
    2176  
    2177     if ($baseaction::authentication_enabled) { 
    2178     # Ensure the user is allowed to edit this collection     
    2179     $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect); 
    2180     } 
    2181  
    2182     my $site = $self->{'site'}; 
    2183     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    2184      
    2185     $gsdl_cgi->checked_chdir($collect_dir); 
    2186  
    2187     # Obtain the collect dir 
    2188     ## my $collect_dir = &util::filename_cat($gsdlhome, "collect"); 
    2189  
    2190     # Make sure the collection isn't locked by someone else 
    2191     $self->lock_collection($username, $collect); 
    2192  
    2193     $self->_set_archives_metadata_array(@_); 
    2194      
    2195     # Release the lock once it is done 
    2196     $self->unlock_collection($username, $collect); 
    2197 } 
    2198  
    2199 sub _remove_archives_metadata 
    2200 { 
    2201     my $self = shift @_; 
    2202  
    2203     my $collect   = $self->{'collect'}; 
    2204     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    2205 #   my $gsdlhome  = $self->{'gsdlhome'}; 
    2206     my $infodbtype = $self->{'infodbtype'}; 
    2207      
    2208     my $site = $self->{'site'}; 
    2209          
    2210     # Obtain the collect and archive dir    
    2211     my $collect_dir = $gsdl_cgi->get_collection_dir($site);  
    2212      
    2213     my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives"); 
    2214  
    2215     # look up additional args 
    2216     my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/); 
    2217      
    2218     my $metaname = $self->{'metaname'}; 
    2219     my $metapos = $self->{'metapos'}; 
    2220     my $metavalue = $self->{'metavalue'}; 
    2221  
    2222     $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/)); 
    2223     $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here 
    2224  
    2225     # if the user hasn't told us what to delete, not having given a metavalue or metapos, 
    2226     # default to deleting the first metavalue for the given metaname 
    2227     # Beware that if both metapos AND metavalue are defined, both matches (if any)  
    2228     # seem to get deleted in one single remove_archives_meta action invocation. 
    2229     # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being 
    2230     # deleted, both get deleted. 
    2231     if(!defined $metapos && !defined $metavalue) { 
    2232         $metapos = 0; 
    2233     } 
    2234  
    2235     my $metamode = $self->{'metamode'}; 
    2236     $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/)); 
    2237  
    2238     my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir); 
    2239     my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid); 
    2240  
    2241     # This now stores the full pathname 
    2242     my $doc_filename = $doc_rec->{'doc-file'}->[0];  
    2243  
    2244     my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode); 
    2245 #   my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum); 
    2246      
    2247     if ($status == 0)  
    2248     { 
    2249         my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n"; 
    2250         $mess .= "  $metaname"; 
    2251         $mess .= "->[$metapos]" if (defined $metapos); 
    2252         $mess .= " ($metavalue)" if (defined $metavalue); 
    2253         $gsdl_cgi->generate_ok_message($mess);   
    2254     } 
    2255     else  
    2256     { 
    2257         my $mess .= "Failed to remove archives metadata key: $docid\n"; 
    2258         $mess .= "Exit status: $status\n"; 
    2259         $mess .= "System Error Message: $!\n"; 
    2260         $mess .= "-" x 20 . "\n"; 
    2261          
    2262         $gsdl_cgi->generate_error($mess); 
    2263     } 
    2264      
    2265     #return $status; # in case calling functions have a use for this 
    2266 } 
    2267  
    2268 sub remove_archives_metadata 
    2269 { 
    2270     my $self = shift @_; 
    2271  
    2272     my $username  = $self->{'username'}; 
    2273     my $collect   = $self->{'collect'}; 
    2274     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    2275      
    2276     if ($baseaction::authentication_enabled)  
    2277     { 
    2278         # Ensure the user is allowed to edit this collection         
    2279         $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);  
    2280     } 
    2281  
    2282     # Make sure the collection isn't locked by someone else 
    2283     $self->lock_collection($username, $collect); 
    2284  
    2285     $self->_remove_archives_metadata(@_); 
    2286  
    2287     # Release the lock once it is done 
    2288     $self->unlock_collection($username, $collect); 
    2289 } 
    2290  
    2291 sub remove_from_doc_xml 
    2292 { 
    2293     my $self = shift @_; 
    2294     my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_; 
    2295      
    2296     my @start_rules = ('Section' => \&dxml_start_section); 
    2297      
    2298     # Set the call-back functions for the metadata tags 
    2299     my @rules =  
    2300     (  
    2301         _default => 'raw', 
    2302         'Metadata' => \&rfdxml_metadata 
    2303     ); 
    2304          
    2305     my $parser = XML::Rules->new 
    2306     ( 
    2307         start_rules => \@start_rules, 
    2308         rules => \@rules,  
    2309         style => 'filter', 
    2310         output_encoding => 'utf8', 
    2311 #    normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm 
    2312         stripspaces => 2|0|0 # ineffectual 
    2313     ); 
    2314      
    2315     my $status = 0; 
    2316     my $xml_in = ""; 
    2317     if (!open(MIN,"<$doc_xml_filename"))  
    2318     { 
    2319         $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!"); 
    2320         $status = 1; 
    2321     } 
    2322     else  
    2323     { 
    2324         # Read them in 
    2325         my $line; 
    2326         while (defined ($line=<MIN>)) { 
    2327             $xml_in .= $line; 
    2328         } 
    2329         close(MIN);  
    2330  
    2331         # Filter with the call-back functions 
    2332         my $xml_out = ""; 
    2333  
    2334         my $MOUT; 
    2335         if (!open($MOUT,">$doc_xml_filename")) { 
    2336             $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!"); 
    2337             $status = 1; 
    2338         } 
    2339         else { 
    2340             binmode($MOUT,":utf8"); 
    2341             $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode}); 
    2342             close($MOUT);        
    2343         } 
    2344     } 
    2345     return $status; 
    2346 } 
    2347  
    2348 sub rfdxml_metadata 
    2349 { 
    2350     my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 
    2351  
    2352     # For comparisons, toplevel section is indicated by "" 
    2353     my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || ""; 
    2354     my $secid = $parser->{'parameters'}->{'secid'} || ""; 
    2355  
    2356     if (!($secid eq $curr_sec_num)) 
    2357     { 
    2358         # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 
    2359         return [$tagname => $attrHash]; 
    2360     } 
    2361  
    2362     if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) 
    2363     { 
    2364         if (!defined $parser->{'parameters'}->{'poscount'}) 
    2365         { 
    2366             $parser->{'parameters'}->{'poscount'} = 0; 
    2367         } 
    2368         else 
    2369         { 
    2370             $parser->{'parameters'}->{'poscount'}++; 
    2371         } 
    2372          
    2373         # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname 
    2374         if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {           
    2375             return []; 
    2376         } 
    2377  
    2378         if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})) 
    2379         {    
    2380             return []; 
    2381         } 
    2382          
    2383         if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'})) 
    2384         {    
    2385             return []; 
    2386         } 
    2387     } 
    2388      
    2389     # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 
    2390     return [$tagname => $attrHash]; 
    2391 } 
    2392  
    2393 sub mxml_metadata 
    2394 { 
    2395     my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 
    2396     my $metaname = $parser->{'parameters'}->{'metaname'}; 
    2397     my $metamode = $parser->{'parameters'}->{'metamode'}; 
    2398  
    2399     # Report error if we don't see FileName tag before this 
    2400     die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'})); 
    2401      
    2402     # Don't do anything if we are not in the right FileSet 
    2403     my $file_regexp = $parser->{'parameters'}->{'current_file'}; 
    2404     if ($file_regexp =~ /\.\*/) { 
    2405     # Only interested in a file_regexp if it specifies precisely one 
    2406     # file.   
    2407     # So, skip anything with a .* in it as it is too general 
    2408 ##  print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n"; 
    2409     return [$tagname => $attrHash]; 
    2410     } 
    2411     my $src_file = $parser->{'parameters'}->{'src_file'}; 
    2412     if (!($src_file =~ /$file_regexp/)) { 
    2413     return [$tagname => $attrHash]; 
    2414     } 
    2415 ##    print STDERR "*** mxl metamode = $metamode\n"; 
    2416  
    2417     # Find the right metadata tag and checks if we are going to override it 
    2418     my $name_attr = $attrHash->{'name'}; 
    2419     if (($name_attr eq $metaname) && ($metamode eq "override")) { 
    2420  
    2421     # now metadata.xml functions need to keep track of metapos 
    2422     if (!defined $parser->{'parameters'}->{'poscount'})  
    2423     {  
    2424         $parser->{'parameters'}->{'poscount'} = 0;  
    2425     }  
    2426     else  
    2427     {  
    2428         $parser->{'parameters'}->{'poscount'}++;  
    2429     }  
    2430  
    2431     # If either the metapos or prevmetavalue is set, 
    2432         # get the value and override the current value 
    2433     my $metavalue = $parser->{'parameters'}->{'metavalue'}; 
    2434  
    2435     if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'}) 
    2436     { 
    2437         $attrHash->{'_content'} = $metavalue; 
    2438  
    2439         ##  print STDERR "**** overriding metadata.xml\n"; 
    2440          
    2441         # Don't want it to wipe out any other pieces of metadata 
    2442         $parser->{'parameters'}->{'metamode'} = "done"; 
    2443     } 
    2444     elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}) 
    2445     { 
    2446         $attrHash->{'_content'} = $metavalue; 
    2447         $parser->{'parameters'}->{'metamode'} = "done"; 
    2448     } 
    2449     }  
    2450  
    2451     # mxml_description will process the metadata if metadata is accumulate,  
    2452     # or if we haven't found the metadata to override 
    2453  
    2454     # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 
    2455     return [$tagname => $attrHash]; 
    2456 } 
    2457  
    2458  
    2459 sub mxml_description 
    2460 { 
    2461     my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 
    2462     my $metamode = $parser->{'parameters'}->{'metamode'};     
    2463  
    2464     # Failed... Report error if we don't see FileName tag before this 
    2465     die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'})); 
    2466  
    2467     # Don't do anything if we are not in the right FileSet 
    2468     my $file_regexp = $parser->{'parameters'}->{'current_file'}; 
    2469     if ($file_regexp =~ m/\.\*/) { 
    2470     # Only interested in a file_regexp if it specifies precisely one 
    2471     # file.   
    2472     # So, skip anything with a .* in it as it is too general 
    2473     return [$tagname => $attrHash]; 
    2474     } 
    2475     my $src_file = $parser->{'parameters'}->{'src_file'}; 
    2476      
    2477     if (!($src_file =~ m/$file_regexp/)) { 
    2478     return [$tagname => $attrHash]; 
    2479     } 
    2480  
    2481     # Accumulate the metadata block to the end of the description block 
    2482     # Note: This adds metadata block to all description blocks, so if there are  
    2483     # multiple FileSets, it will add to all of them 
    2484     if (($metamode eq "accumulate") || ($metamode eq "override")) { 
    2485  
    2486     # if metamode was "override" but get to here then it failed to 
    2487     # find an item to override, in which case it should append its  
    2488     # value to the end, just like the "accumulate" mode 
    2489  
    2490     if ($metamode eq "override") { 
    2491         print "No metadata value to override.  Switching 'metamode' to accumulate\n"; 
    2492     } 
    2493  
    2494     # tack a new metadata tag on to the end of the <Metadata>+ block 
    2495     my $metaname = $parser->{'parameters'}->{'metaname'}; 
    2496     my $metavalue = $parser->{'parameters'}->{'metavalue'}; 
    2497      
    2498     my $metadata_attr = { '_content' => $metavalue,  
    2499                   'name'     => $metaname,  
    2500                   'mode'     => "accumulate" }; 
    2501  
    2502     my $append_metadata = [ "Metadata" => $metadata_attr ]; 
    2503     my $description_content = $attrHash->{'_content'}; 
    2504      
    2505 ##  print STDERR "*** appending to metadata.xml\n"; 
    2506  
    2507     # append the new metadata element to the end of the current 
    2508     # content contained inside this tag 
    2509     if (ref($description_content) eq "") { 
    2510         # => string or numeric literal 
    2511         # this is caused by a <Description> block has no <Metadata> child elements 
    2512         # => set up an empty array in '_content' 
    2513         $attrHash->{'_content'} = [ "\n" ];  
    2514         $description_content = $attrHash->{'_content'}; 
    2515     } 
    2516  
    2517     push(@$description_content,"    ", $append_metadata ,"\n        "); 
    2518     $parser->{'parameters'}->{'metamode'} = "done"; 
    2519     } 
    2520  
    2521     # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 
    2522     return [$tagname => $attrHash]; 
    2523 } 
    2524  
    2525  
    2526 sub mxml_filename 
    2527 { 
    2528     my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 
    2529  
    2530     # Store the filename of the Current Fileset 
    2531     # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd 
    2532     # FileName tag must come before Description tag 
    2533     $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'}; 
    2534  
    2535     # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 
    2536     return [$tagname => $attrHash]; 
    2537 } 
    2538  
    2539  
    2540 sub mxml_fileset 
    2541 { 
    2542     my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 
    2543  
    2544     # Initilise the current_file 
    2545     # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd 
    2546     # FileName tag must come before Description tag 
    2547     $parser->{'parameters'}->{'current_file'} = ""; 
    2548  
    2549     # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 
    2550     return [$tagname => $attrHash]; 
    2551 } 
    2552  
    2553 sub mxml_directorymetadata 
    2554 { 
    2555     my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 
    2556  
    2557     # if we haven't processed the metadata when we reach the end of metadata.xml 
    2558     # it's because there's no particular FileSet element whose FileName matched 
    2559     # In which case, add a new FileSet for this FileName 
    2560     my $metamode = $parser->{'parameters'}->{'metamode'}; 
    2561     if($metamode ne "done") { 
    2562      
    2563     if ($metamode eq "override") { 
    2564         print "No metadata value to override.  Switching 'metamode' to accumulate\n"; 
    2565     } 
    2566  
    2567     # If we get to here and metamode is override, this means there  
    2568     # was no existing value to overide => treat as an append operation 
    2569      
    2570     # Create a new FileSet element and append to DirectoryMetadata 
    2571     # <FileSet> 
    2572     # <FileName>src_file</FileName> 
    2573     # <Description> 
    2574     # <Metadata mode="" name="">metavalue</Metadata> 
    2575     # </Description> 
    2576     # </FileSet> 
    2577     my $src_file = $parser->{'parameters'}->{'src_file'}; 
    2578     my $metaname = $parser->{'parameters'}->{'metaname'}; 
    2579     my $metavalue = $parser->{'parameters'}->{'metavalue'}; 
    2580     my $metadata_attr = {  
    2581         '_content' => $metavalue,  
    2582         'name' => $metaname,  
    2583         'mode' => "accumulate"  
    2584     }; 
    2585     my $append_metadata = [ "Metadata" => $metadata_attr ]; 
    2586     my $description_attr = { '_content' => [ "\n\t\t   ", $append_metadata, "\n\t\t"] }; 
    2587     my $description_element = [ "Description" => $description_attr ]; 
    2588  
    2589     #_content is not an attribute, it's special and holds the children of this element 
    2590     # including the textnode value embedded in this element if any. 
    2591     my $filename_attr = {'_content' => $src_file}; 
    2592     my $filename_element = [ "FileName" => $filename_attr ]; 
    2593  
    2594     my $fileset_attr = {}; 
    2595     $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ]; 
    2596     my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ]; 
    2597      
    2598      
    2599     # get children of dirmeta, and push the new FileSet element onto it 
    2600     print "Appending metadata to metadata.xml\n"; 
    2601     my $dirmeta_content = $attrHash->{'_content'}; 
    2602     if (ref($dirmeta_content)) { 
    2603         # got some existing interesting nested content 
    2604         #push(@$dirmeta_content, "    ", $fileset ,"\n        "); 
    2605         push(@$dirmeta_content, "\t", $fileset ,"\n"); 
    2606     } 
    2607     else { 
    2608         #description_content is most likely a string such as "\n" 
    2609         #$attrHash->{'_content'} = [$dirmeta_content, "    ", $fileset ,"\n" ]; 
    2610         $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ]; 
    2611     }    
    2612  
    2613     $parser->{'parameters'}->{'metamode'} = "done"; 
    2614     } 
    2615     # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 
    2616     return [$tagname => $attrHash]; 
    2617 } 
    2618  
    2619  
    2620 sub edit_metadata_xml 
    2621 { 
    2622     my $self = shift @_; 
    2623     my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_; 
    2624  
    2625     # Set the call-back functions for the metadata tags 
    2626     my @rules =  
    2627     ( _default => 'raw', 
    2628           'FileName' => \&mxml_filename, 
    2629       'Metadata' => \&mxml_metadata, 
    2630       'Description' => \&mxml_description, 
    2631           'FileSet' => \&mxml_fileset, 
    2632       'DirectoryMetadata' => \&mxml_directorymetadata); 
    2633  
    2634     # use XML::Rules to add it in (read in and out again) 
    2635     my $parser = XML::Rules->new(rules => \@rules,  
    2636                  style => 'filter', 
    2637                                  output_encoding => 'utf8', 
    2638                  stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm 
    2639  
    2640     if (!-e $metadata_xml_filename) { 
    2641      
    2642         if (open(MOUT,">$metadata_xml_filename")) { 
    2643              
    2644             my $src_file_re = &util::filename_to_regex($src_file); 
    2645             # shouldn't the following also be in the above utility routine?? 
    2646             # $src_file_re =~ s/\./\\./g; 
    2647          
    2648             print MOUT "<?xml version=\"1.0\"?>\n"; 
    2649             print MOUT "<DirectoryMetadata>\n"; 
    2650             print MOUT " <FileSet>\n"; 
    2651             print MOUT "  <FileName>$src_file_re</FileName>\n"; 
    2652             print MOUT "  <Description>\n"; 
    2653             print MOUT "  </Description>\n"; 
    2654             print MOUT " </FileSet>\n"; 
    2655             print MOUT "</DirectoryMetadata>\n"; 
    2656  
    2657             close(MOUT); 
    2658         } 
    2659         else { 
    2660             $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!"); 
    2661         } 
    2662     } 
    2663      
    2664      
    2665     my $xml_in = ""; 
    2666     if (!open(MIN,"<$metadata_xml_filename")) { 
    2667         $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!"); 
    2668     } 
    2669     else { 
    2670         # Read them in 
    2671         my $line; 
    2672         while (defined ($line=<MIN>)) { 
    2673             $xml_in .= $line; 
    2674         } 
    2675         close(MIN);  
    2676  
    2677         # Filter with the call-back functions 
    2678         my $xml_out = ""; 
    2679  
    2680         my $MOUT; 
    2681         if (!open($MOUT,">$metadata_xml_filename")) { 
    2682             $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!"); 
    2683         } 
    2684         else { 
    2685             binmode($MOUT,":utf8"); 
    2686  
    2687             # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules 
    2688             # At the moment, I will just hack it! 
    2689             #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"; 
    2690             #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">"; 
    2691             #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/; 
    2692             #print MOUT $xml_out; 
    2693  
    2694             $parser->filter($xml_in, $MOUT, { metaname => $metaname, 
    2695                               metapos => $metapos, 
    2696                       metavalue => $metavalue, 
    2697                       metamode => $metamode, 
    2698                       src_file => $src_file, 
    2699                       prevmetavalue => $prevmetavalue, 
    2700                       current_file => undef} ); 
    2701             close($MOUT);        
    2702         } 
    2703    } 
    2704 } 
    2705  
    2706  
    2707 sub set_import_metadata 
    2708 { 
    2709     my $self = shift @_; 
    2710      
    2711     my $username  = $self->{'username'}; 
    2712     my $collect   = $self->{'collect'}; 
    2713     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    2714      
    2715     if ($baseaction::authentication_enabled) { 
    2716     # Ensure the user is allowed to edit this collection 
    2717     $self->authenticate_user($username, $collect); 
    2718     } 
    2719  
    2720     # Make sure the collection isn't locked by someone else 
    2721     $self->lock_collection($username, $collect); 
    2722   
    2723     $self->_set_import_metadata(@_); 
    2724  
    2725     # Release the lock once it is done 
    2726     $self->unlock_collection($username, $collect); 
    2727      
    2728 } 
    2729  
    2730 sub set_import_metadata_array 
    2731 { 
    2732     my $self = shift @_; 
    2733  
    2734     my $username  = $self->{'username'}; 
    2735     my $collect   = $self->{'collect'}; 
    2736     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    2737 #    my $gsdlhome  = $self->{'gsdlhome'}; 
    2738  
    2739     if ($baseaction::authentication_enabled) { 
    2740     # Ensure the user is allowed to edit this collection     
    2741     $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect); 
    2742     } 
    2743  
    2744     my $site = $self->{'site'}; 
    2745     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    2746      
    2747     $gsdl_cgi->checked_chdir($collect_dir); 
    2748  
    2749     # Make sure the collection isn't locked by someone else 
    2750     $self->lock_collection($username, $collect); 
    2751  
    2752     $self->_set_import_metadata_array(@_); 
    2753  
    2754     # Release the lock once it is done 
    2755     $self->unlock_collection($username, $collect); 
    2756  
    2757 } 
    2758  
    2759  
    2760 sub _set_import_metadata_array 
    2761 { 
    2762     my $self = shift @_; 
    2763  
    2764     my $collect   = $self->{'collect'}; 
    2765     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    2766  
    2767     my $site = $self->{'site'}; 
    2768     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    2769      
    2770     # look up additional args 
    2771      
    2772     my $infodbtype = $self->{'infodbtype'}; 
    2773      
    2774     my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");     
    2775     my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir); 
    2776      
    2777     my $json_str = $self->{'json'}; 
    2778     my $doc_array = decode_json $json_str; 
    2779      
    2780     my $global_status = 0; 
    2781     my $global_mess = ""; 
    2782      
    2783     my @all_docids = (); 
    2784      
    2785     foreach my $doc_array_rec ( @$doc_array )  
    2786     { 
    2787     my $status = -1; 
    2788     my $docid = $doc_array_rec->{'docid'}; 
    2789      
    2790     my ($docid_root,$docid_secnum); 
    2791     if(defined $docid) {     
    2792         ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);     
    2793         # as yet no support for setting subsection metadata in metadata.xml 
    2794         if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) { 
    2795         $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n"); 
    2796         next; # skip this docid in for loop 
    2797         } 
    2798     } 
    2799  
    2800     push(@all_docids,$docid); # docid_root rather 
    2801      
    2802     my $metaname = $doc_array_rec->{'metaname'}; 
    2803     if (defined $metaname) { 
    2804         my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'}; 
    2805         my $metavalue = $doc_array_rec->{'metavalue'}; 
    2806         $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g; 
    2807  
    2808         if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) { 
    2809         # make "accumulate" the default (less destructive, as it won't actually  
    2810         # delete any existing values) 
    2811         $metamode = "accumulate"; 
    2812         } 
    2813  
    2814         # adding metapos and prevmetavalue support to import_metadata subroutines 
    2815         my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override 
    2816         my $prevmetavalue = $self->{'prevmetavalue'}; 
    2817  
    2818         $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid 
    2819          
    2820     } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable 
    2821         my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong 
    2822          
    2823         foreach my $metatable_rec ( @$metatable ) { 
    2824         $metaname  = $metatable_rec->{'metaname'};  
    2825         my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'}; 
    2826         if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) { 
    2827             # make "accumulate" the default (less destructive, as it won't actually  
    2828             # delete any existing values) 
    2829             $metamode = "accumulate"; 
    2830         } 
    2831  
    2832         # No support for metapos and prevmetavalue in the JSON metatable substructure 
    2833         my $metapos = undef;  
    2834         my $prevmetavalue = undef; 
    2835         my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray 
    2836          
    2837         foreach my $metavalue ( @$metavals ) { 
    2838             $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g; 
    2839  
    2840             $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid 
    2841             if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,  
    2842             # need to accumulate subsequent metavals for this metaname, else the just-assigned 
    2843             # metavalue for this metaname will be lost 
    2844             $metamode = "accumulate"; 
    2845             } 
    2846         } 
    2847         } 
    2848     }        
    2849     } 
    2850  
    2851     # always a success message 
    2852     my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n"; 
    2853     $gsdl_cgi->generate_ok_message($mess); 
    2854 } 
    2855  
    2856 # always returns true (1) 
    2857 sub set_import_metadata_entry 
    2858 { 
    2859     my $self = shift @_; 
    2860     my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_; 
    2861  
    2862     my $info_mess = <<RAWEND; 
    2863 **************************** 
    2864   set_import_metadata_entry() 
    2865 **************************** 
    2866 RAWEND 
    2867  
    2868     $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir); 
    2869     $info_mess .= " collect     = $collect\n"     if defined($collect); 
    2870     $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype); 
    2871     $info_mess .= " arcinfo_doc_filename  = $arcinfo_doc_filename\n"  if defined($arcinfo_doc_filename); 
    2872     $info_mess .= " docid       = $docid\n"       if defined($docid); 
    2873     $info_mess .= " metaname    = $metaname\n"    if defined($metaname); 
    2874     $info_mess .= " metapos     = $metapos\n"     if defined($metapos); 
    2875     $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue); 
    2876     $info_mess .= " metamode    = $metamode\n"    if defined($metamode); 
    2877     $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue); 
    2878       
    2879     $info_mess .= "****************************\n"; 
    2880  
    2881     $gsdl_cgi->generate_message($info_mess); 
    2882  
    2883     # import works with metadata.xml which can have inherited metadata 
    2884     # so setting or removing at a metapos can have unintended effects for a COMPLEX collection 
    2885     # (a collection that has or can have inherited metadata). Metapos has expected behaviour for 
    2886     # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows  
    2887     # what they're doing if they provide a metapos. 
    2888     if(defined $metapos) { 
    2889     print STDERR "@@@@ WARNING: metapos defined.\n"; 
    2890     print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n"; 
    2891     } 
    2892  
    2893     # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file 
    2894     # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f" 
    2895     my $metadata_xml_file; 
    2896     my $import_filename = undef; 
    2897      
    2898     if (defined $docid) { 
    2899     # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir); 
    2900     my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid); 
    2901  
    2902     # This now stores the full pathname 
    2903     $import_filename = $doc_rec->{'src-file'}->[0];  
    2904     $import_filename = &util::placeholders_to_abspath($import_filename); 
    2905  
    2906     } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array 
    2907          # as the array version of the method doesn't support the -f parameter yet 
    2908     my $import_file  = $self->{'f'}; 
    2909     $import_filename = &util::filename_cat($collect_dir,$collect,$import_file); 
    2910     } 
    2911      
    2912     # figure out correct metadata.xml file [?] 
    2913     # Assuming the metadata.xml file is next to the source file 
    2914     # Note: This will not work if it is using the inherited metadata from the parent folder 
    2915     my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename); 
    2916     my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml"); 
    2917      
    2918     # If we're overriding everything, then $prevmetavalue=undefined and 
    2919     # $metamode=override combined with $metapos=undefined 
    2920     # in which case we need to remove all metavalues for the metaname at the given (sub)section 
    2921     # Thereafter, we will finally be able to set the overriding metavalue for this metaname 
    2922     if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") { 
    2923 ##  print STDERR "@@@ REMOVING all import metadata for $metaname\n"; 
    2924     $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef 
    2925  
    2926     } 
    2927  
    2928     # Edit the metadata.xml 
    2929     # Modified by Jeffrey from DL Consulting 
    2930     # Handle the case where there is one metadata.xml file for multiple FileSets 
    2931     # The XML filter needs to know whether it is in the right FileSet 
    2932     # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file. 
    2933     # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file) 
    2934     $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,  
    2935                  $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue); 
    2936     #return 0; 
    2937     return $metadata_xml_filename; 
    2938 } 
    2939  
    2940 sub _remove_import_metadata 
    2941 { 
    2942     my $self = shift @_; 
    2943  
    2944     my $collect   = $self->{'collect'}; 
    2945     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    2946 #   my $gsdlhome  = $self->{'gsdlhome'}; 
    2947     my $infodbtype = $self->{'infodbtype'}; 
    2948      
    2949     # Obtain the collect dir 
    2950     ## my $collect_dir = &util::filename_cat($gsdlhome, "collect"); 
    2951     my $site = $self->{'site'}; 
    2952     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    2953      
    2954     ## my $collect_dir = &util::filename_cat($gsdlhome, "collect"); 
    2955     my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives"); 
    2956  
    2957     # look up additional args 
    2958     my $docid = $self->{'d'}; 
    2959     if ((!defined $docid) || ($docid =~ m/^\s*$/))  
    2960     { 
    2961         $gsdl_cgi->generate_error("No docid (d=...) specified.\n"); 
    2962     } 
    2963      
    2964     my $metaname = $self->{'metaname'}; 
    2965     my $metapos = $self->{'metapos'}; 
    2966     my $metavalue = $self->{'metavalue'}; 
    2967  
    2968     $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/)); 
    2969     $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); 
    2970  
    2971     if(defined $metavalue) { # metavalue is now a compulsory arg for remove_import_metadata() 
    2972         $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g; 
    2973     } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st 
    2974         $metapos = 0; 
    2975     } 
    2976     my $metamode = $self->{'metamode'}; 
    2977     $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/)); 
    2978  
    2979     # import works with metadata.xml which can have inherited metadata 
    2980     # so setting or removing at a metapos can have unintended effects for a COMPLEX collection 
    2981     # (a collection that has or can have inherited metadata). Metapos has expected behaviour for 
    2982     # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows  
    2983     # what they're doing if they provide a metapos. 
    2984     if(defined $metapos) { 
    2985         print STDERR "@@@@ WARNING: metapos defined.\n"; 
    2986         print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n"; 
    2987     } 
    2988      
    2989     # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file 
    2990     # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f" 
    2991     my $metadata_xml_file; 
    2992     my $import_filename = undef; 
    2993     if (defined $docid)  
    2994     { 
    2995         my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir); 
    2996         my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid); 
    2997  
    2998         # This now stores the full pathname 
    2999         $import_filename = $doc_rec->{'src-file'}->[0];  
    3000         $import_filename = &util::placeholders_to_abspath($import_filename); 
    3001     } 
    3002  
    3003     if((!defined $import_filename) || ($import_filename =~ m/^\s*$/)) 
    3004     { 
    3005         $gsdl_cgi->generate_error("There is no metadata\n"); 
    3006     } 
    3007      
    3008     # figure out correct metadata.xml file [?] 
    3009     # Assuming the metadata.xml file is next to the source file 
    3010     # Note: This will not work if it is using the inherited metadata from the parent folder 
    3011     my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename); 
    3012     my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml"); 
    3013      
    3014     $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All 
    3015      
    3016     my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n"; 
    3017     $mess .= "  $metaname"; 
    3018     $mess .= " = $metavalue\n"; 
    3019      
    3020     $gsdl_cgi->generate_ok_message($mess); 
    3021  
    3022     #return $status; # in case calling functions have a use for this 
    3023 } 
    3024  
    3025 sub remove_import_metadata 
    3026 { 
    3027     my $self = shift @_; 
    3028      
    3029     my $username = $self->{'username'}; 
    3030     my $collect   = $self->{'collect'}; 
    3031     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    3032      
    3033     if ($baseaction::authentication_enabled) { 
    3034         # Ensure the user is allowed to edit this collection         
    3035         $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect); 
    3036     } 
    3037  
    3038     # Make sure the collection isn't locked by someone else 
    3039     $self->lock_collection($username, $collect); 
    3040      
    3041     $self->_remove_import_metadata(@_); 
    3042  
    3043     # Release the lock once it is done 
    3044     $self->unlock_collection($username, $collect); 
    3045  
    3046 } 
    3047  
    3048 sub remove_from_metadata_xml 
    3049 { 
    3050     my $self = shift @_; 
    3051     my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_; 
    3052     # metamode generally has no meaning for removing meta, but is used by set_meta  
    3053     # when overriding all metavals for a metaname, in which case remove_meta is called with metamode 
    3054  
    3055     # Set the call-back functions for the metadata tags 
    3056     my @rules =  
    3057     (  
    3058         _default => 'raw', 
    3059         'Metadata' => \&rfmxml_metadata, 
    3060         'FileName' => \&mxml_filename 
    3061     ); 
    3062          
    3063     my $parser = XML::Rules->new 
    3064     ( 
    3065         rules => \@rules,  
    3066         style => 'filter', 
    3067         output_encoding => 'utf8', 
    3068      #normalisespaces => 1, 
    3069             stripspaces => 2|0|0 # ineffectual 
    3070     ); 
    3071      
    3072     my $xml_in = ""; 
    3073     if (!open(MIN,"<$metadata_xml_filename"))  
    3074     { 
    3075         $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!"); 
    3076     } 
    3077     else  
    3078     { 
    3079         # Read them in 
    3080         my $line; 
    3081         while (defined ($line=<MIN>)) { 
    3082             $xml_in .= $line; 
    3083         } 
    3084         close(MIN);  
    3085  
    3086         # Filter with the call-back functions 
    3087         my $xml_out = ""; 
    3088  
    3089         my $MOUT; 
    3090         if (!open($MOUT,">$metadata_xml_filename")) { 
    3091             $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!"); 
    3092         } 
    3093         else { 
    3094             binmode($MOUT,":utf8"); 
    3095             $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef}); 
    3096             close($MOUT);        
    3097         } 
    3098     } 
    3099 } 
    3100  
    3101 sub rfmxml_metadata 
    3102 { 
    3103     my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_; 
    3104  
    3105     # metadata.xml does not handle subsections 
    3106  
    3107     # since metadata.xml now has to deal with metapos, we keep track of the metadata position 
    3108     if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})  
    3109         && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) 
    3110     { 
    3111         if (!defined $parser->{'parameters'}->{'poscount'}) 
    3112         { 
    3113             $parser->{'parameters'}->{'poscount'} = 0; 
    3114         } 
    3115         else 
    3116         { 
    3117             $parser->{'parameters'}->{'poscount'}++; 
    3118         } 
    3119  
    3120         # if overriding but no metapos, then clear all the meta for this metaname 
    3121         if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) { 
    3122             return []; 
    3123         } 
    3124      
    3125         if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})) 
    3126         { 
    3127             return []; 
    3128         } 
    3129         if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'})) 
    3130         { 
    3131             return []; 
    3132         }        
    3133     } 
    3134  
    3135     # RAW is [$tagname => $attrHash] not $tagname => $attrHash!! 
    3136     return [$tagname => $attrHash]; 
    3137 } 
    3138  
    3139 sub _remove_live_metadata 
    3140 { 
    3141     my $self = shift @_; 
    3142  
    3143     my $collect   = $self->{'collect'}; 
    3144     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    3145 #    my $gsdlhome  = $self->{'gsdlhome'}; 
    3146     my $infodbtype = $self->{'infodbtype'}; 
    3147  
    3148     # Obtain the collect dir 
    3149     ## my $collect_dir = &util::filename_cat($gsdlhome, "collect"); 
    3150     my $site = $self->{'site'}; 
    3151     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    3152  
    3153      
    3154     # look up additional args 
    3155     my $docid     = $self->{'d'}; 
    3156     if ((!defined $docid) || ($docid =~ m/^\s*$/)) { 
    3157       $gsdl_cgi->generate_error("No docid (d=...) specified."); 
    3158     } 
    3159      
    3160     # Generate the dbkey 
    3161     my $metaname  = $self->{'metaname'}; 
    3162     my $dbkey = "$docid.$metaname"; 
    3163  
    3164     # To people who know $collect_tail please add some comments 
    3165     # Obtain the live gdbm_db path  
    3166     my $collect_tail = $collect; 
    3167     $collect_tail =~ s/^.*[\/|\\]//; 
    3168     my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); 
    3169     my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory); 
    3170  
    3171     # Remove the key 
    3172     my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\""; 
    3173     my $status = system($cmd); 
    3174     if ($status != 0) { 
    3175         # Catch error if gdbmdel failed 
    3176     my $mess = "Failed to set metadata key: $dbkey\n"; 
    3177      
    3178     $mess .= "PATH: $ENV{'PATH'}\n"; 
    3179     $mess .= "cmd = $cmd\n"; 
    3180     $mess .= "Exit status: $status\n"; 
    3181     $mess .= "System Error Message: $!\n"; 
    3182  
    3183     $gsdl_cgi->generate_error($mess); 
    3184     } 
    3185     else { 
    3186     $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]"); 
    3187     } 
    3188  
    3189 } 
    3190  
    3191 sub remove_live_metadata 
    3192 { 
    3193     my $self = shift @_; 
    3194  
    3195     my $username  = $self->{'username'}; 
    3196     my $collect   = $self->{'collect'}; 
    3197     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    3198     my $gsdlhome  = $self->{'gsdlhome'}; 
    3199      
    3200     if ($baseaction::authentication_enabled) { 
    3201     # Ensure the user is allowed to edit this collection     
    3202     $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect); 
    3203     } 
    3204  
    3205     # Make sure the collection isn't locked by someone else 
    3206     $self->lock_collection($username, $collect); 
    3207  
    3208     $self->_remove_live_metadata(@_); 
    3209  
    3210     $self->unlock_collection($username, $collect); 
    3211 } 
    3212  
    3213 sub remove_metadata 
    3214 { 
    3215     my $self = shift @_; 
    3216  
    3217     my $where = $self->{'where'}; 
    3218     if(!$where || ($where =~ m/^\s*$/)) { 
    3219     $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour 
    3220     return; 
    3221     } 
    3222  
    3223     my $username  = $self->{'username'}; 
    3224     my $collect   = $self->{'collect'}; 
    3225     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    3226  
    3227     if ($baseaction::authentication_enabled) { 
    3228     # Ensure the user is allowed to edit this collection     
    3229     $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect); 
    3230     } 
    3231  
    3232     # Make sure the collection isn't locked by someone else 
    3233     $self->lock_collection($username, $collect); 
    3234  
    3235     # check which directories need to be processed, specified in $where as  
    3236     # any combination of import|archives|index|live 
    3237     if($where =~ m/import/) { 
    3238     $self->_remove_import_metadata(@_);      
    3239     } 
    3240     if($where =~ m/archives/) { 
    3241     $self->_remove_archives_metadata(@_);        
    3242    }  
    3243     if($where =~ m/index/) { 
    3244     $self->_remove_index_metadata(@_);       
    3245     } 
    3246  
    3247     # Release the lock once it is done 
    3248     $self->unlock_collection($username, $collect); 
    3249 } 
    3250  
    3251 # the internal version, without authentication 
    3252 sub _remove_index_metadata 
    3253 {     
    3254     my $self = shift @_; 
    3255  
    3256     my $collect   = $self->{'collect'}; 
    3257     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    3258 #    my $gsdlhome  = $self->{'gsdlhome'}; 
    3259     my $infodbtype = $self->{'infodbtype'}; 
    3260      
    3261     # Obtain the collect dir 
    3262     my $site = $self->{'site'}; 
    3263     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    3264     ## my $collect_dir = &util::filename_cat($gsdlhome, "collect"); 
    3265  
    3266         # look up additional args 
    3267     my $docid     = $self->{'d'}; 
    3268     if ((!defined $docid) || ($docid =~ m/^\s*$/)) { 
    3269       $gsdl_cgi->generate_error("No docid (d=...) specified."); 
    3270     } 
    3271     my $metaname  = $self->{'metaname'}; 
    3272     my $metapos   = $self->{'metapos'}; 
    3273     my $metavalue = $self->{'metavalue'}; 
    3274  
    3275     $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/)); 
    3276     $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here 
    3277  
    3278     # To people who know $collect_tail please add some comments 
    3279     # -> In collection groups, I think collect_tailname is the subcollection name, 
    3280     # e.g. colgroup-name/col-tail-name 
    3281     # Obtain the path to the database 
    3282     my $collect_tail = $collect; 
    3283     $collect_tail =~ s/^.*[\/|\\]//; 
    3284     my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); 
    3285     my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory); 
    3286  
    3287     # Read the docid entry 
    3288     my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid); 
    3289  
    3290     # Check to make sure the key does exist 
    3291     if (!defined ($doc_rec->{$metaname})) { 
    3292         $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]"); 
    3293     } 
    3294  
    3295     # Obtain the specified metadata pos 
    3296     # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname 
    3297     if(!defined $metapos && !defined $metavalue) { 
    3298         $metapos = 0; 
    3299     } 
    3300      
    3301  
    3302     # consider check key is defined before deleting? 
    3303     # Loop through the metadata array and ignore the specified position 
    3304     my $filtered_metadata = []; 
    3305     my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});     
    3306     for (my $i=0; $i<$num_metadata_vals; $i++) { 
    3307     my $metaval = shift(@{$doc_rec->{$metaname}}); 
    3308  
    3309     if (!defined $metavalue && $i != $metapos) { 
    3310         push(@$filtered_metadata,$metaval); 
    3311     } 
    3312      
    3313     if(defined $metavalue && !($metavalue eq $metaval)) 
    3314     { 
    3315         push(@$filtered_metadata,$metaval); 
    3316     } 
    3317     } 
    3318     $doc_rec->{$metaname} = $filtered_metadata; 
    3319  
    3320     ## Use the dbutil set_entry method instead of assuming the database is gdbm 
    3321     my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec); 
    3322  
    3323     if ($status != 0) { 
    3324     my $mess = "Failed to set metadata key: $docid\n"; 
    3325      
    3326     $mess .= "PATH: $ENV{'PATH'}\n"; 
    3327     $mess .= "Exit status: $status\n"; 
    3328     $mess .= "System Error Message: $!\n"; 
    3329  
    3330     $gsdl_cgi->generate_error($mess); 
    3331     } 
    3332     else { 
    3333     my $mess = "DB set (with item deleted) successful: Key[$docid]\n"; 
    3334     $mess .= "  $metaname"; 
    3335     $mess .= "->[$metapos]" if (defined $metapos); 
    3336     $mess .= " ($metavalue)" if (defined $metavalue); 
    3337  
    3338     $gsdl_cgi->generate_ok_message($mess); 
    3339     } 
    3340  
    3341     #return $status; # in case calling functions have a use for this 
    3342 } 
    3343  
    3344 sub remove_index_metadata 
    3345 { 
    3346     my $self = shift @_; 
    3347  
    3348     my $username  = $self->{'username'}; 
    3349     my $collect   = $self->{'collect'}; 
    3350     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    3351 #    my $gsdlhome  = $self->{'gsdlhome'}; 
    3352      
    3353     if ($baseaction::authentication_enabled) { 
    3354     # Ensure the user is allowed to edit this collection     
    3355     $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect); 
    3356     } 
    3357  
    3358     # Obtain the collect dir 
    3359     my $site = $self->{'site'}; 
    3360     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    3361     ## my $collect_dir = &util::filename_cat($gsdlhome, "collect"); 
    3362  
    3363     # Make sure the collection isn't locked by someone else 
    3364     $self->lock_collection($username, $collect); 
    3365  
    3366     $self->_remove_index_metadata(@_); 
    3367  
    3368     # Release the lock once it is done 
    3369     $self->unlock_collection($username, $collect); 
    3370 } 
    3371  
    3372  
    3373 # Was trying to reused the codes, but the functions need to be broken 
    3374 # down more before they can be reused, otherwise there will be too 
    3375 # much overhead and duplicate process... 
    3376 sub insert_metadata 
    3377 { 
    3378     my $self = shift @_; 
    3379      
    3380     my $username  = $self->{'username'}; 
    3381     my $collect   = $self->{'collect'}; 
    3382     my $gsdl_cgi  = $self->{'gsdl_cgi'}; 
    3383     my $gsdlhome  = $self->{'gsdlhome'}; 
    3384     my $infodbtype = $self->{'infodbtype'}; 
    3385      
    3386     # If the import metadata and gdbm database have been updated, we 
    3387     # need to insert some notification to warn user that the the text 
    3388     # they see at the moment is not indexed and require a rebuild. 
    3389     my $rebuild_pending_macro = "_rebuildpendingmessage_"; 
    3390  
    3391     if ($baseaction::authentication_enabled) { 
    3392     # Ensure the user is allowed to edit this collection 
    3393     $self->authenticate_user($username, $collect); 
    3394     } 
    3395  
    3396     # Obtain the collect and archive dir    
    3397     my $site = $self->{'site'}; 
    3398     my $collect_dir = $gsdl_cgi->get_collection_dir($site); 
    3399     ##my $collect_dir = &util::filename_cat($gsdlhome, "collect"); 
    3400     my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives"); 
    3401  
    3402     # Make sure the collection isn't locked by someone else 
    3403     $self->lock_collection($username, $collect); 
    3404      
    3405     # Check additional args 
    3406     my $docid = $self->{'d'}; 
    3407     if (!defined($docid) || ($docid =~ m/^\s*$/)) { 
    3408     $gsdl_cgi->generate_error("No document id is specified: d=..."); 
    3409     }  
    3410     my $metaname = $self->{'metaname'}; 
    3411     if (!defined($metaname) || ($metaname =~ m/^\s*$/)) { 
    3412     $gsdl_cgi->generate_error("No metaname is specified: metadataname=..."); 
    3413     }  
    3414     my $metavalue = $self->{'metavalue'}; 
    3415     if (!defined($metavalue) || ($metavalue =~ m/^\s*$/)) { 
    3416     $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=..."); 
    3417     }  
    3418     # make "accumulate" the default (less destructive, as it won't actually  
    3419     # delete any existing values) 
    3420     my $metamode = "accumulate"; 
    3421  
    3422     # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined 
    3423     my $metapos   = undef; 
    3424     my $prevmetavalue = undef; 
    3425  
    3426     #=======================================================================# 
    3427     # set_import_metadata [START] 
    3428     #=======================================================================# 
    3429     # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file 
    3430     # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f" 
    3431     my $metadata_xml_file; 
    3432     my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir); 
    3433     my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid); 
    3434      
    3435     # This now stores the full pathname 
    3436     my $import_filename = $archive_doc_rec->{'src-file'}->[0]; 
    3437     $import_filename = &util::placeholders_to_abspath($import_filename); 
    3438      
    3439     # figure out correct metadata.xml file [?] 
    3440     # Assuming the metadata.xml file is next to the source file 
    3441     # Note: This will not work if it is using the inherited metadata from the parent folder 
    3442     my ($import_tailname, $import_dirname)  
    3443     = File::Basename::fileparse($import_filename); 
    3444     my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml"); 
    3445  
    3446     # Shane's escape characters 
    3447     $metavalue = pack "U0C*", unpack "C*", $metavalue; 
    3448     $metavalue =~ s/\,/&#44;/g; 
    3449     $metavalue =~ s/\:/&#58;/g; 
    3450     $metavalue =~ s/\|/&#124;/g; 
    3451     $metavalue =~ s/\(/&#40;/g; 
    3452     $metavalue =~ s/\)/&#41;/g; 
    3453     $metavalue =~ s/\[/&#91;/g; 
    3454     $metavalue =~ s/\\/&#92;/g; 
    3455     $metavalue =~ s/\]/&#93;/g; 
    3456     $metavalue =~ s/\{/&#123;/g; 
    3457     $metavalue =~ s/\}/&#125;/g; 
    3458     $metavalue =~ s/\"/&#34;/g; 
    3459     $metavalue =~ s/\`/&#96;/g; 
    3460     $metavalue =~ s/\n/_newline_/g; 
    3461  
    3462     # Edit the metadata.xml 
    3463     # Modified by Jeffrey from DL Consulting 
    3464     # Handle the case where there is one metadata.xml file for multiple FileSets 
    3465     # The XML filter needs to know whether it is in the right FileSet 
    3466     # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file. 
    3467     # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file) 
    3468     $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, 
    3469                  $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue); 
    3470     #=======================================================================# 
    3471     # set_import_metadata [END] 
    3472     #=======================================================================# 
    3473  
    3474  
    3475     #=======================================================================# 
    3476     # set_metadata (accumulate version) [START] 
    3477     #=======================================================================# 
    3478     # To people who know $collect_tail please add some comments 
    3479     # Obtain path to the database 
    3480     my $collect_tail = $collect; 
    3481     $collect_tail =~ s/^.*[\/|\\]//; 
    3482     my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text"); 
    3483     my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory); 
    3484  
    3485     # Read the docid entry 
    3486     my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid); 
    3487  
    3488     # Protect the quotes 
    3489     $metavalue =~ s/\"/\\\"/g; 
    3490  
    3491     # Adds the pending macro 
    3492     my $macro_metavalue = $rebuild_pending_macro . $metavalue; 
    3493  
    3494     # If the metadata doesn't exist, create a new one 
    3495     if (!defined($doc_rec->{$metaname})){     
    3496     $doc_rec->{$metaname} = [ $macro_metavalue ]; 
    3497     } 
    3498     # Else, let's acculumate the values 
    3499     else { 
    3500         push(@{$doc_rec->{$metaname}},$macro_metavalue); 
    3501     } 
    3502  
    3503     ## Use the dbutil set_entry method instead of assuming the database is gdbm 
    3504     my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec); 
    3505  
    3506     if ($status != 0) { 
    3507         # Catch error if gdbmget failed 
    3508     my $mess = "Failed to set metadata key: $docid\n"; 
    3509      
    3510     $mess .= "PATH: $ENV{'PATH'}\n"; 
    3511     $mess .= "Exit status: $status\n"; 
    3512     $mess .= "System Error Message: $!\n"; 
    3513  
    3514     $gsdl_cgi->generate_error($mess); 
    3515     } 
    3516     else { 
    3517     my $mess = "insert-metadata successful: Key[$docid]\n"; 
    3518     $mess .= "  [In metadata.xml] $metaname"; 
    3519     $mess .= " = $metavalue\n"; 
    3520     $mess .= "  [In database] $metaname"; 
    3521     $mess .= " = $macro_metavalue\n"; 
    3522     $mess .= "  The new text has not been indexed, rebuilding collection is required\n"; 
    3523         $gsdl_cgi->generate_ok_message($mess); 
    3524     }     
    3525     #=======================================================================# 
    3526     # set_metadata (accumulate version) [END] 
    3527     #=======================================================================# 
    3528  
    3529     # Release the lock once it is done 
    3530     $self->unlock_collection($username, $collect); 
    3531 } 
    3532  
    35339501; 
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/build/GS2PerlConstructor.java

    r31592 r31602  
    324324         
    325325        // Need to set QUERY_STRING and REQUEST_METHOD=GET in environment 
     326        // Also set GS3_AUTHENTICATED, to allow running metadata-server.pl with mod (set and remove) commands 
    326327        // http://www.cgi101.com/class/ch3/text.html 
    327328        String[] envvars = { 
    328329            "QUERY_STRING=" + this.query_string, 
    329             "REQUEST_METHOD=GET" 
     330            "REQUEST_METHOD=GET", 
     331            "GS3_AUTHENTICATED=true" // how do we set the env var without having to assign it a value? 
    330332        }; 
    331333