source: main/trunk/greenstone2/perllib/cgiactions/modmetadataaction.pm@ 38217

Last change on this file since 38217 was 38217, checked in by anupama, 9 months ago

When committing r38193 yesterday (after only eyeballing the changes and refactorings to make sure everything made sense), it turns out there were lots of errors that I only discovered when testing the code today. Now there are no errors listed in my JS console output to do with syntax errors in the modmetadataaction.pm perl file, but although remove_import_metadata_array is displayed as working (still need to check the actual metadata.xml file), attempts to delete from archives fails with something to do with a dv file not being found (dv appears to be document versioning). I still have to investigate this and then once fixed, check if remove_index_metadata_array also works or not. Still need to commit changes made today tosrc/java

File size: 133.7 KB
Line 
1##########################################################################
2#
3# modmetadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This is a submodule of metadataaction.pm. It is loaded conditionally
27# upon having the permissions to modify (not just get) metadata.
28# With sufficient permissions, these additional subroutines should be made
29# available to metadataaction.pm
30
31# See http://www.perlmonks.org/?node_id=881761 for splitting module into multiple files
32# and how variables declared with 'our' are used there.
33
34package metadataaction;
35
36use strict;
37
38use cgiactions::baseaction;
39use cgiactions::metadataactioncommon;
40
41use dbutil;
42use ghtml;
43
44use JSON;
45
46# convenient to have shorter version around
47my $FLDV_HISTORY_DIR = $metadataactioncommon::FLDV_HISTORY_DIR;
48
49$metadataaction::modmeta_action_table = #OR: our $modmeta_action_table =
50{
51 #SET METHODS
52 "set-live-metadata" => {
53 'compulsory-args' => [ "d", "metaname", "metavalue" ],
54 'optional-args' => [ ] },
55
56 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
57 'compulsory-args' => [ "metaname", "metavalue" ],
58 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "dv", "f" ] },
59
60 "set-index-metadata" => {
61 'compulsory-args' => [ "d", "metaname", "metavalue" ],
62 'optional-args' => [ "metapos", "metamode" ] },
63
64 "set-archives-metadata" => {
65 'compulsory-args' => [ "d", "metaname", "metavalue" ],
66 'optional-args' => [ "dv", "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
67
68 ###### !!!!!
69 # Should there not be a "set-archives-text" also ???!!!!??
70 ###### !!!!!
71
72 "set-archives-assocfile" => {
73 'compulsory-args' => [ "d", "assocname" ],
74 'optional-args' => [ "dv", "fileupload", "filedata" ] ,
75 'help-string' => [ "Because this action uploads a file, this action needs to be POSTed. The 'fileupload' parameter/field is the uploaded file content; 'assocname' specifies the filename within the archives's document's area that it will be saved as" ]
76 },
77
78 "set-import-metadata" => {
79 'compulsory-args' => [ "metaname", "metavalue" ],
80 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
81
82 #SET METHODS (ARRAY)
83 "set-metadata-array" => {
84 'compulsory-args' => [ "where", "json" ],
85 'optional-args' => [ ],
86 'help-string' => [
87 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
88
89 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
90
91# The same examples rewritten for when running the metadata-server.pl script from the commandline:
92
93# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
94
95# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
96
97 "set-archives-metadata-array" => {
98 'compulsory-args' => [ "json" ],
99 'optional-args' => [ ] },
100
101 "set-import-metadata-array" => {
102 'compulsory-args' => [ "json" ],
103 'optional-args' => [ ] },
104
105 "set-index-metadata-array" => {
106 'compulsory-args' => [ "json" ],
107 'optional-args' => [ ] },
108
109 "set-live-metadata-array" => {
110 'compulsory-args' => [ "json" ],
111 'optional-args' => [ ] },
112
113 #REMOVE METHODS
114 "remove-import-metadata" => {
115 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
116 'optional-args' => [ "metapos", "metavalue", "metamode" ] }, # only provide metapos arg for SIMPLE collections.
117# Metavalue is now an optional arg for remove_import_metadata() based on what the implementation did, which allowed metavalue to be undefined, and if so, used metapos.
118
119 "remove-archives-metadata" => {
120 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
121 'optional-args' => [ "dv", "metapos", "metavalue", "metamode" ] },
122
123 "remove-live-metadata" => {
124 'compulsory-args' => [ "d", "metaname" ],
125 'optional-args' => [ ] },
126
127 "remove-index-metadata" => {
128 'compulsory-args' => [ "d", "metaname" ],
129 'optional-args' => [ "metapos", "metavalue" ] },
130
131 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
132 'compulsory-args' => [ "d", "metaname" ],
133 'optional-args' => [ "where", "dv", "metapos", "metavalue", "metamode" ] }, # metamode is optional since remove-metadata can call any of remove_import_meta and remove_archives_meta, remove_index_meta, of which the first two accept metamode as an optional param
134
135 #REMOVE METHODS (ARRAY)
136 "remove-metadata-array" => {
137 'compulsory-args' => [ "where", "json" ],
138 'optional-args' => [ ],
139 'help-string' => [
140 "No remove-metadata-array examples yet."
141 ] },
142 "remove-archives-metadata-array" => {
143 'compulsory-args' => [ "json" ],
144 'optional-args' => [ ] },
145
146 "remove-import-metadata-array" => {
147 'compulsory-args' => [ "json" ],
148 'optional-args' => [ ] },
149
150 "remove-index-metadata-array" => {
151 'compulsory-args' => [ "json" ],
152 'optional-args' => [ ] },
153
154 "remove-live-metadata-array" => {
155 'compulsory-args' => [ "json" ],
156 'optional-args' => [ ] },
157
158 #REMOVE MULTIPLE (REMOVE ALL MATCHING METANAME AT DOCID)
159 "erase-import-metadata" => {
160 'compulsory-args' => [ "d", "metaname" ] },
161
162 "erase-archives-metadata" => {
163 'compulsory-args' => [ "d", "metaname" ],
164 'optional-args' => [ "dv" ] },
165
166 "erase-index-metadata" => {
167 'compulsory-args' => [ "d", "metaname" ] },
168
169 "erase-live-metadata" => {
170 'compulsory-args' => [ "d", "metaname" ] },
171
172 "erase-metadata" => { # generic erase-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
173 'compulsory-args' => [ "d", "metaname" ],
174 'optional-args' => [ "where" ] },
175
176
177 #INSERT METHODS
178 "insert-metadata" => {
179 'compulsory-args' => [ "d", "metaname", "metavalue" ],
180 'optional-args' => [ ] },
181
182
183 #INC METHODS
184 "inc-fldv-nminus1" => {
185 'compulsory-args' => [ "d" ],
186 'optional-args' => [ ] }
187};
188
189sub _set_live_metadata
190{
191 my $self = shift @_;
192
193 my $collect = $self->{'collect'};
194 my $gsdl_cgi = $self->{'gsdl_cgi'};
195 my $infodbtype = $self->{'infodbtype'};
196
197 # Obtain the collect dir
198 my $site = $self->{'site'};
199 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
200
201
202 # look up additional args
203 my $docid = $self->{'d'};
204 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
205 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
206 }
207 my $metavalue = $self->{'metavalue'};
208
209 # Generate the dbkey
210 my $metaname = $self->{'metaname'};
211 my $dbkey = "$docid.$metaname";
212
213 # To people who know $collect_tail please add some comments
214 # Obtain path to the database
215 my $collect_tail = $collect;
216 $collect_tail =~ s/^.*[\/|\\]//;
217 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
218 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
219
220 # Set the new value
221 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
222 my $status = system($cmd);
223 if ($status != 0) {
224 # Catch error if gdbmget failed
225 my $mess = "Failed to set metadata key: $dbkey\n";
226
227 $mess .= "PATH: $ENV{'PATH'}\n";
228 $mess .= "cmd = $cmd\n";
229 $mess .= "Exit status: $status\n";
230 $mess .= "System Error Message: $!\n";
231
232 $gsdl_cgi->generate_error($mess);
233 }
234 else {
235 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
236 }
237
238 #return $status; # in case calling functions have any further use for this
239}
240
241sub set_live_metadata
242{
243 my $self = shift @_;
244
245 my $username = $self->{'username'};
246 my $collect = $self->{'collect'};
247 my $gsdl_cgi = $self->{'gsdl_cgi'};
248
249 if ($baseaction::authentication_enabled) {
250 # Ensure the user is allowed to edit this collection
251 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
252 }
253
254 # Make sure the collection isn't locked by someone else
255 $self->lock_collection($username, $collect);
256
257 $self->_set_live_metadata(@_);
258
259 # Release the lock once it is done
260 $self->unlock_collection($username, $collect);
261}
262
263sub set_index_metadata_entry
264{
265 my $self = shift @_;
266 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
267
268 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
269 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
270
271 # To people who know $collect_tail please add some comments
272 # Obtain path to the database
273 my $collect_tail = $collect;
274 $collect_tail =~ s/^.*[\/|\\]//;
275 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
276 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
277
278# print STDERR "**** infodb file path = $infodb_file_path\n";
279# print STDERR "***** infodb type = $infodbtype\n";
280
281 # Read the docid entry
282 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
283
284 # Set the metadata value
285 if (defined $metapos) {
286 # if metamode=accumulate AND metapos, warn user and then use metapos
287 if (defined $metamode && $metamode eq "accumulate") {
288 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
289 print STDERR "**** Proceeding by using metapos\n";
290 }
291 $doc_rec->{$metaname}->[$metapos] = $metavalue;
292 }
293 elsif (defined $prevmetavalue) {
294 my $array = $doc_rec->{$metaname};
295 my $length = @$array;
296
297 my $found = 0;
298 for (my $i = 0; $i < $length; $i++){
299 if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
300 $doc_rec->{$metaname}->[$i] = $metavalue;
301 $found = 1;
302 last;
303 }
304 }
305
306 if($found == 0){
307 print STDERR "**** Warning: could not find $prevmetavalue to replace. Appending (accumulating) replacement value.\n";
308 ##$doc_rec->{$metaname} = [ $metavalue ]; # if prevmetavalue not found, don't overwrite all previous values.
309 # Accumulate is less destructive. But should the correct behaviour be to not do Anything?
310
311 if(defined $doc_rec->{$metaname}) {
312 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
313 } else {
314 $doc_rec->{$metaname} = [ $metavalue ];
315 }
316 }
317 }
318 elsif (defined $metamode && $metamode eq "override") {
319 $doc_rec->{$metaname} = [ $metavalue ];
320 }
321 else { # default for index was to override, but because accumulate is less destructive,
322 # and because accumulate is the default for archives and import, that's the new default for index too
323 if(defined $doc_rec->{$metaname}) {
324 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
325 } else {
326 $doc_rec->{$metaname} = [ $metavalue ];
327 }
328 }
329
330 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
331
332 return $status;
333
334}
335
336sub _set_import_metadata
337{
338 my $self = shift @_;
339
340 my $collect = $self->{'collect'};
341 my $gsdl_cgi = $self->{'gsdl_cgi'};
342 my $infodbtype = $self->{'infodbtype'};
343
344 # Obtain the collect and archive dir
345 my $site = $self->{'site'};
346 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
347 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
348
349 # look up additional args
350 # want either d= or f=
351 my $docid = $self->{'d'};
352 my ($docid_root,$docid_secnum);
353 if(defined $docid) {
354 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
355 # as yet no support for setting subsection metadata in metadata.xml
356 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
357 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
358 return;
359 }
360 }
361
362 my $import_file = $self->{'f'};
363 if ((!defined $docid || $docid =~ m/^\s*$/) && (!defined $import_file || $import_file =~ m/^\s*$/)) {
364 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified."); # at least d or f must be specified
365 }
366
367 # Get the parameters and set default mode to "accumulate"
368 my $metaname = $self->{'metaname'};
369 my $metavalue = $self->{'metavalue'};
370## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
371 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
372
373 my $metamode = $self->{'metamode'};
374 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
375 # make "accumulate" the default (less destructive, as it won't actually
376 # delete any existing values)
377 $metamode = "accumulate";
378 }
379
380 # adding metapos and prevmetavalue support to import_metadata subroutines
381 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
382 my $prevmetavalue = $self->{'prevmetavalue'};
383 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
384 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
385
386 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
387 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
388
389 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
390 $mess .= " $metaname";
391 $mess .= " = $metavalue";
392 $mess .= " ($metamode)\n";
393
394 $gsdl_cgi->generate_ok_message($mess);
395
396 #return $status; # in case calling functions have any further use for this
397}
398
399# the version of set_archives_meta that doesn't do authentication
400sub _set_archives_metadata
401{
402 my $self = shift @_;
403
404 my $collect = $self->{'collect'};
405 my $gsdl_cgi = $self->{'gsdl_cgi'};
406 my $infodbtype = $self->{'infodbtype'};
407
408 # Obtain the collect and archive dir
409 my $site = $self->{'site'};
410 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
411 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
412
413 # look up additional args
414 my $docid = $self->{'d'};
415 my $dv = $self->{'dv'};
416
417 my $metaname = $self->{'metaname'};
418 my $metavalue = $self->{'metavalue'};
419 my $prevmetavalue = $self->{'prevmetavalue'};
420
421 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
422 # Don't append "|| undef", since if metapos=0 it will then be set to undef
423
424 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
425 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
426
427 my $metamode = $self->{'metamode'};
428 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
429 # make "accumulate" the default (less destructive, as it won't actually
430 # delete any existing values)
431 $metamode = "accumulate";
432 }
433
434 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,
435 $docid,$dv, $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
436
437 if ($status == 0) {
438 my $mess = "set-archives-metadata successful: Key[$docid]\n";
439 $mess .= " $metaname";
440 $mess .= "->[$metapos]" if (defined $metapos);
441 $mess .= " = $metavalue";
442 $mess .= " ($metamode)\n";
443
444 $gsdl_cgi->generate_ok_message($mess);
445 }
446 else {
447 my $mess .= "Failed to set archives metadata key: $docid\n";
448 $mess .= "Exit status: $status\n";
449 if(defined $self->{'error_msg'}) {
450 $mess .= "Error Message: $self->{'error_msg'}\n";
451 } else {
452 $mess .= "System Error Message: $!\n";
453 }
454 $mess .= "-" x 20 . "\n";
455
456 $gsdl_cgi->generate_error($mess);
457 }
458
459 #return $status; # in case calling functions have any further use for this
460}
461
462
463# the version of set_index_meta that doesn't do authentication
464sub _set_index_metadata
465{
466 print STDERR "START SET INDEX METADATA\n";
467 my $self = shift @_;
468
469 my $collect = $self->{'collect'};
470 my $gsdl_cgi = $self->{'gsdl_cgi'};
471
472 # Obtain the collect dir
473 my $site = $self->{'site'};
474 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
475
476 # look up additional args
477 my $docid = $self->{'d'};
478 my $metaname = $self->{'metaname'};
479 my $metapos = $self->{'metapos'}; # undef has meaning
480 my $metavalue = $self->{'metavalue'};
481 my $infodbtype = $self->{'infodbtype'};
482 my $metamode = $self->{'metamode'};
483 my $prevmetavalue = $self->{'prevmetavalue'};
484
485 print STDERR "SETTING INDEX METADATA ENTRY\n";
486 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
487 print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
488 if ($status != 0) {
489 # Catch error if set infodb entry failed
490 my $mess = "Failed to set metadata key: $docid\n";
491
492 $mess .= "PATH: $ENV{'PATH'}\n";
493 $mess .= "Exit status: $status\n";
494 $mess .= "System Error Message: $!\n";
495
496 $gsdl_cgi->generate_error($mess);
497 }
498 else {
499 my $mess = "set-index-metadata successful: Key[$docid]\n";
500 $mess .= " $metaname";
501 $mess .= "->[$metapos]" if (defined $metapos);
502 $mess .= " = $metavalue\n";
503
504 $gsdl_cgi->generate_ok_message($mess);
505 }
506
507 print STDERR "END SET INDEX METADATA\n";
508 #return $status; # in case calling functions have any further use for this
509}
510
511sub set_index_metadata
512{
513 my $self = shift @_;
514
515 my $username = $self->{'username'};
516 my $collect = $self->{'collect'};
517 my $gsdl_cgi = $self->{'gsdl_cgi'};
518
519 if ($baseaction::authentication_enabled) {
520 # Ensure the user is allowed to edit this collection
521 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
522 }
523
524 # Obtain the collect dir
525 my $site = $self->{'site'};
526 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
527
528 $gsdl_cgi->checked_chdir($collect_dir);
529
530 # Make sure the collection isn't locked by someone else
531 $self->lock_collection($username, $collect);
532
533 $self->_set_index_metadata(@_);
534
535 # Release the lock once it is done
536 $self->unlock_collection($username, $collect);
537}
538
539# call this to set the metadata for a combination of dirs archives, import or index, or live
540# if none specified, defaults to index which was the original behaviour of set_metadata.
541sub set_metadata
542{
543 my $self = shift @_;
544
545 # Testing that not defining a variable, setting it to "" or to " " all return false
546 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
547
548 my $where = $self->{'where'};
549 if(!$where || ($where =~ m/^\s*$/)) {
550 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
551 return;
552 }
553
554 # authenticate and lock collection once, even if processing multiple dirs
555 my $username = $self->{'username'};
556 my $collect = $self->{'collect'};
557 my $gsdl_cgi = $self->{'gsdl_cgi'};
558
559 if ($baseaction::authentication_enabled) {
560 # Ensure the user is allowed to edit this collection
561 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
562 }
563
564 if($where =~ m/index/) {
565 my $site = $self->{'site'};
566 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
567 $gsdl_cgi->checked_chdir($collect_dir);
568 }
569
570 # Make sure the collection isn't locked by someone else
571 $self->lock_collection($username, $collect);
572
573
574 # now at last can set the metadata. $where can specify multiple
575 # $where is of the form: import|archives|index, or a subset thereof
576
577 #my @whichdirs = split('\|', $where);
578
579 # just check whether $where contains import/archives/index/live in turn, and
580 # for each case, process it accordingly
581 if($where =~ m/import/) {
582 $self->_set_import_metadata(@_);
583 }
584
585 if($where =~ m/archives/) {
586
587 # look up docID arg which is optional to set_metadata because it's optional
588 # to set_import, but which is compulsory to set_archives_metadata
589 my $docid = $self->{'d'};
590 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
591 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
592 }
593 # we have a docid, so can set archives meta
594 $self->_set_archives_metadata(@_);
595 }
596
597 if($where =~ m/index/) {
598
599 # look up docID arg which is optional to set_metadata because it's optional
600 # to set_import, but which is compulsory to set_archives_metadata
601 my $docid = $self->{'d'};
602 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
603 $gsdl_cgi->generate_error("No docid (d=...) specified.");
604 }
605 # we have a docid, so can set index meta
606 $self->_set_index_metadata(@_);
607 }
608
609 if($where =~ m/live/) {
610 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
611 }
612
613 # Release the lock once it is done
614 $self->unlock_collection($username, $collect);
615}
616
617sub set_metadata_array
618{
619 my $self = shift @_;
620
621 my $where = $self->{'where'};
622 if(!$where || ($where =~ m/^\s*$/)) {
623 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
624 return;
625 }
626
627 my $username = $self->{'username'};
628 my $collect = $self->{'collect'};
629 my $gsdl_cgi = $self->{'gsdl_cgi'};
630
631 if ($baseaction::authentication_enabled) {
632 # Ensure the user is allowed to edit this collection
633 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
634 }
635
636 # Not sure if the checked_chdir into the collect directory is necessary,
637 # since lock_collection does a further chdir into the collection directory anyway
638 # But including the stmt during this code reorganisation to preserve as-is what used to happen
639 my $site = $self->{'site'};
640 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
641 $gsdl_cgi->checked_chdir($collect_dir);
642
643 # Make sure the collection isn't locked by someone else
644 $self->lock_collection($username, $collect);
645
646 if($where =~ m/import/) {
647 $self->_set_import_metadata_array(@_);
648 }
649 if($where =~ m/archives/) {
650 $self->_set_archives_metadata_array(@_);
651 }
652 if($where =~ m/index/) {
653 $self->_set_index_metadata_array(@_);
654 }
655 if($where =~ m/live/) {
656 $self->_set_live_metadata_array(@_);
657 }
658
659 # Release the lock once it is done
660 $self->unlock_collection($username, $collect);
661}
662
663sub _set_index_metadata_array
664{
665 my $self = shift @_;
666
667 my $collect = $self->{'collect'};
668 my $gsdl_cgi = $self->{'gsdl_cgi'};
669
670 # Obtain the collect dir
671 my $site = $self->{'site'};
672 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
673
674 # look up additional args
675
676 my $infodbtype = $self->{'infodbtype'};
677
678 my $json_str = $self->{'json'};
679 my $doc_array = decode_json $json_str;
680
681
682 my $global_status = 0;
683 my $global_mess = "";
684
685 my @all_docids = ();
686
687 foreach my $doc_array_rec ( @$doc_array ) {
688
689 my $status = -1;
690 my $docid = $doc_array_rec->{'docid'};
691
692 push(@all_docids,$docid);
693
694 my $metaname = $doc_array_rec->{'metaname'};
695 if(defined $metaname) {
696 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
697 my $metavalue = $doc_array_rec->{'metavalue'};
698 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
699
700 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
701 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
702 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
703
704 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
705 $metaname = $metatable_rec->{'metaname'};
706 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
707 my $metapos = undef;
708 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
709
710 foreach my $metavalue ( @$metavals ) { # metavals is an array
711 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
712 if($metamode eq "override") { # now, having overridden the metavalue for the first,
713 # need to accumulate subsequent metavals for this metaname, else the just-assigned
714 # metavalue for this metaname will be lost
715 $metamode = "accumulate";
716 }
717 }
718 }
719 }
720
721 if ($status != 0) {
722 # Catch error if set infodb entry failed
723 $global_status = $status;
724 $global_mess .= "Failed to set metadata key: $docid\n";
725 $global_mess .= "Exit status: $status\n";
726 $global_mess .= "System Error Message: $!\n";
727 $global_mess .= "-" x 20;
728 }
729 }
730
731 if ($global_status != 0) {
732 $global_mess .= "PATH: $ENV{'PATH'}\n";
733 $gsdl_cgi->generate_error($global_mess);
734 }
735 else {
736 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
737 $gsdl_cgi->generate_ok_message($mess);
738 }
739}
740
741sub set_index_metadata_array
742{
743 my $self = shift @_;
744
745 my $username = $self->{'username'};
746 my $collect = $self->{'collect'};
747 my $gsdl_cgi = $self->{'gsdl_cgi'};
748
749 if ($baseaction::authentication_enabled) {
750 # Ensure the user is allowed to edit this collection
751 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
752 }
753
754 # Obtain the collect dir
755 my $site = $self->{'site'};
756 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
757
758 $gsdl_cgi->checked_chdir($collect_dir);
759
760
761 # Make sure the collection isn't locked by someone else
762 $self->lock_collection($username, $collect);
763
764 $self->_set_index_metadata_array(@_);
765
766 # Release the lock once it is done
767 $self->unlock_collection($username, $collect);
768}
769
770# experimental, newly added in and untested
771sub _set_live_metadata_array
772{
773 my $self = shift @_;
774
775 my $collect = $self->{'collect'};
776 my $gsdl_cgi = $self->{'gsdl_cgi'};
777
778 # Obtain the collect dir
779 my $site = $self->{'site'};
780 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
781
782 # look up additional args
783 my $infodbtype = $self->{'infodbtype'};
784 # To people who know $collect_tail please add some comments
785 # Obtain path to the database
786 my $collect_tail = $collect;
787 $collect_tail =~ s/^.*[\/|\\]//;
788 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
789 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
790
791
792 my $json_str = $self->{'json'};
793 my $doc_array = decode_json $json_str;
794
795
796 my $global_status = 0;
797 my $global_mess = "";
798
799 my @all_docids = ();
800
801
802 foreach my $doc_array_rec ( @$doc_array ) {
803
804 my $status = -1;
805 my $docid = $doc_array_rec->{'docid'};
806
807 push(@all_docids,$docid);
808
809 my $metaname = $doc_array_rec->{'metaname'};
810 if(defined $metaname) {
811 my $dbkey = "$docid.$metaname";
812 my $metavalue = $doc_array_rec->{'metavalue'};
813
814 # Set the new value
815 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
816 $status = system($cmd);
817
818 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
819 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
820 foreach my $metatable_rec ( @$metatable ) {
821 $metaname = $metatable_rec->{'metaname'};
822 my $dbkey = "$docid.$metaname";
823
824 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
825 foreach my $metavalue ( @$metavals ) {
826 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
827 $status = system($cmd);
828 }
829 }
830
831 }
832
833 if ($status != 0) {
834 # Catch error if gdbmget failed
835 $global_status = $status;
836 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
837 $global_mess .= "Exit status: $status\n";
838 $global_mess .= "System Error Message: $!\n";
839 $global_mess .= "-" x 20;
840 }
841 }
842
843 if ($global_status != 0) {
844 $global_mess .= "PATH: $ENV{'PATH'}\n";
845 $gsdl_cgi->generate_error($global_mess);
846 }
847 else {
848 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
849 $gsdl_cgi->generate_ok_message($mess);
850 }
851}
852
853sub set_live_metadata_array
854{
855 my $self = shift @_;
856
857 my $username = $self->{'username'};
858 my $collect = $self->{'collect'};
859 my $gsdl_cgi = $self->{'gsdl_cgi'};
860
861 if ($baseaction::authentication_enabled) {
862 # Ensure the user is allowed to edit this collection
863 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
864 }
865
866 # Obtain the collect dir
867 my $site = $self->{'site'};
868 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
869
870 $gsdl_cgi->checked_chdir($collect_dir);
871
872 # Make sure the collection isn't locked by someone else
873 $self->lock_collection($username, $collect);
874
875 $self->_set_live_metadata_array(@_);
876
877 # Release the lock once it is done
878 $self->unlock_collection($username, $collect);
879}
880
881
882sub dxml_metadata
883{
884 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
885 my $metaname = $parser->{'parameters'}->{'metaname'};
886 my $metamode = $parser->{'parameters'}->{'metamode'};
887
888###!!! print STDERR "**** Processing closing </Metadata> tag\n";
889
890 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
891
892 # Find the right metadata tag and checks if we are going to
893 # override it
894 #
895 # Note: This over writes the first metadata block it
896 # encountered. If there are multiple Sections in the doc.xml, it
897 # might not behave as you would expect
898
899 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
900## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n" if (defined $opt_doc_secnum);
901## print STDERR "**** metamode = $metamode\n";
902
903 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
904 {
905 my $name_attr = $attrHash->{'name'};
906 # print STDOUT "*** testing: $name_attr eq $metaname ? and $metamode eq override ?\n";
907 if (($name_attr eq $metaname) && ($metamode eq "override"))
908 {
909 if (!defined $parser->{'parameters'}->{'poscount'})
910 {
911 $parser->{'parameters'}->{'poscount'} = 0;
912 }
913 else
914 {
915 $parser->{'parameters'}->{'poscount'}++;
916 }
917
918 if (defined $parser->{'parameters'}->{'metapos'}) { # if pos defined, ignore any prevmeta if also defined, and just wait for pos match
919
920 if ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}) {
921 ###print STDERR "#### got pos match!!\n";
922 # Get the value and override the current value
923 my $metavalue = $parser->{'parameters'}->{'metavalue'};
924 $attrHash->{'_content'} = $metavalue;
925
926 # until we have clearly determined the role of metamode in doc.xml files (we know what it means in import/metadata.xml files)
927 # we're not setting metamode to override here.
928
929 # Don't want it to wipe out any other pieces of metadata
930 $parser->{'parameters'}->{'metamode'} = "done";
931 }
932 }
933 elsif (defined $parser->{'parameters'}->{'prevmetavalue'}) { # if no pos defined but prevmeta defined, then wait for prevmeta match
934 if ($parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'}) {
935 ###print STDERR "### prev meta value matches\n";
936 my $metavalue = $parser->{'parameters'}->{'metavalue'};
937 $attrHash->{'_content'} = $metavalue;
938
939 # until we have clearly determined the role of metamode in doc.xml files (we know what it means in import/metadata.xml files)
940 # we're not setting metamode to override here.
941
942 $parser->{'parameters'}->{'metamode'} = "done";
943
944 }
945 }
946 # Note that before commit rev 32076, if neither metapos nor prevmeta defined, then code tried to set pos 0 for that metaname to the new value
947 # However that is not how the code works now (and couldn't get it to work that way yet after rev 32076).
948 # What it does now, and what Dr Bainbridge approved of is that:
949 # If no pos or prevmeta defined, then on override, the metavalue set for the specificed metaname becomes the ONLY instance of that metaname
950 # So any earlier instances of that metaname is removed and replaced by the single new instance.
951 }
952 }
953
954 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
955 return [$tagname => $attrHash];
956}
957
958# This method exists purely for catching invalid section numbers that the client
959# requested to edit. Once the parser has reached the end (the final </Archive> tag),
960# we've seen all the Sections in the doc.xml, and none of their section nums matched
961# if the metamode has not been set to 'done' by then.
962sub dxml_archive
963{
964 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
965 my $metamode = $parser->{'parameters'}->{'metamode'};
966
967 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
968 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
969
970 # print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
971
972 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
973 #print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
974
975 my $metaname = $parser->{'parameters'}->{'metaname'};
976 my $metavalue = $parser->{'parameters'}->{'metavalue'};
977
978 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
979 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
980 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
981 print STDERR "\tin section $opt_doc_secnum.\n";
982 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
983 }
984
985 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
986 return [$tagname => $attrHash];
987}
988
989sub dxml_description
990{
991 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
992 my $metamode = $parser->{'parameters'}->{'metamode'};
993
994 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
995 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
996
997###!!! print STDERR "**** Processing a closing </Description> tag \n";
998 # print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
999
1000 # Accumulate the metadata
1001
1002 # We'll be accumulating metadata at this point if we haven't found and therefore
1003 # haven't processed the metadata yet.
1004 # For subsections, this means that if we're at a matching subsection, but haven't
1005 # found the correct metaname to override in that subsection, we accumulate it as new
1006 # meta in the subsection by adding it to the current description.
1007 # If there's no subsection info for the metadata, it will accumulate at the top level
1008 # section description if we hadn't found a matching metaname to override at this point.
1009
1010 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1011 # at the toplevel section. In the latter case, it means we want to process meta in the
1012 # toplevel section. So the eq check between the values below will work in all cases.
1013
1014 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1015 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1016 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1017
1018 if ($curr_secnum eq $opt_doc_secnum
1019 && ($metamode eq "accumulate" || $metamode eq "override")) {
1020 # note, metamode would have been set to done if a metadata operation had already succeeded.
1021 # If we get here, we still have the metadata to append.
1022 if ($metamode eq "override") {
1023 print "Got to end of <Description> block. No metadata value to override. Switching 'metamode' to accumulate\n";
1024 }
1025
1026 # If we get to here and metamode is override, this means there
1027 # was no existing value to overide => treat as an append operation
1028
1029 # Tack a new metadata tag on to the end of the <Metadata>+ block
1030 my $metaname = $parser->{'parameters'}->{'metaname'};
1031 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1032
1033 my $metadata_attr = {
1034 '_content' => $metavalue,
1035 'name' => $metaname,
1036 'mode' => "accumulate"
1037 };
1038
1039 my $append_metadata = [ "Metadata" => $metadata_attr ];
1040 my $description_content = $attrHash->{'_content'};
1041 print "Appending metadata to doc.xml\n";
1042
1043 if (ref($description_content)) {
1044 # got some existing interesting nested content
1045 push(@$description_content, " ", $append_metadata, "\n ");
1046 }
1047 else {
1048 #description_content is most likely a string such as "\n"
1049 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1050 }
1051
1052 $parser->{'parameters'}->{'metamode'} = "done";
1053 }
1054 else {
1055 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1056## print STDERR "**** NOT ACCUMULATE?!? \n";
1057 }
1058 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1059 return [$tagname => $attrHash];
1060}
1061
1062sub edit_xml_file
1063{
1064 my $self = shift @_;
1065 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1066
1067 # use XML::Rules to add it in (read in and out again)
1068 my $parser = XML::Rules->new(start_rules => $start_rules,
1069 rules => $rules,
1070 style => 'filter',
1071 output_encoding => 'utf8' );
1072
1073 my $xml_in = "";
1074 if (!open(MIN,"<$filename")) {
1075 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1076 }
1077 else {
1078 # Read all the text in
1079 my $line;
1080 while (defined ($line=<MIN>)) {
1081 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1082 }
1083 close(MIN);
1084
1085 my $MOUT;
1086 if (!open($MOUT,">$filename")) {
1087 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1088 }
1089 else {
1090 # Matched lines will get handled by the call backs
1091## my $xml_out = "";
1092
1093 binmode($MOUT,":utf8");
1094 $parser->filter($xml_in,$MOUT, $options);
1095
1096# binmode(MOUT,":utf8");
1097# print MOUT $xml_out;
1098 close($MOUT);
1099 }
1100 }
1101
1102 # copy across any custom error information that was stored during parsing
1103 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1104}
1105
1106sub edit_doc_xml
1107{
1108 my $self = shift @_;
1109 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1110
1111 my $info_mess = <<RAWEND;
1112****************************
1113 edit_doc_xml()
1114****************************
1115RAWEND
1116
1117 $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1118 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1119 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1120 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1121 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1122 $info_mess .= " opt_secnum = $opt_secnum\n" if defined($opt_secnum);
1123 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1124
1125 $info_mess .= "****************************\n";
1126
1127 $gsdl_cgi->generate_message($info_mess);
1128
1129 # To monitor which section/subsection number we are in
1130 my @start_rules =
1131 ( 'Section' => \&dxml_start_section );
1132
1133 # use XML::Rules to add it in (read in and out again)
1134 # Set the call back functions
1135 my @rules =
1136 ( _default => 'raw',
1137 'Metadata' => \&dxml_metadata,
1138 'Description' => \&dxml_description,
1139 'Archive' => \&dxml_archive); # just for catching errors at end
1140
1141 # Sets the parameters
1142 my $options = { 'metaname' => $metaname,
1143 'metapos' => $metapos,
1144 'metavalue' => $metavalue,
1145 'metamode' => $metamode,
1146 'prevmetavalue' => $prevmetavalue };
1147
1148 if (defined $opt_secnum) {
1149 $options->{'secnum'} = $opt_secnum;
1150 }
1151
1152 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1153}
1154
1155sub set_archives_metadata_entry
1156{
1157 my $self = shift @_;
1158 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $dv, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1159
1160 my $info_mess = <<RAWEND;
1161****************************
1162 set_archives_metadata_entry()
1163****************************
1164RAWEND
1165
1166 $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1167 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1168 $info_mess .= " collect = $collect\n" if defined($collect);
1169 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
1170 $info_mess .= " docid = $docid\n" if defined($docid);
1171 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1172 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1173 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1174 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1175 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1176
1177 $info_mess .= "****************************\n";
1178
1179 $gsdl_cgi->generate_message($info_mess);
1180
1181 # Obtain the doc.xml path for the specified docID
1182 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1183
1184 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1185 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1186 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1187
1188 # check if request if for file-level doc-version history 'nminus-<n>' version
1189 if (defined $dv && ($dv ne "")) {
1190 # Need to insert '_fldv_history/nminus-<n>' into doc_filename
1191
1192 my ($doc_xml_tailname, $doc_xml_dirname) = File::Basename::fileparse($doc_xml_file);
1193 $doc_xml_file = &util::filename_cat($doc_xml_dirname,$FLDV_HISTORY_DIR,$dv,$doc_xml_tailname);
1194 }
1195
1196 # The $doc_xml_file is relative to the archives, and now let's get the full path
1197 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1198
1199 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1200
1201 # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
1202 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1203 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1204 if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
1205 print "override mode with no pos or prev value set. Removing all existing values for $metaname, and setting mode to accumulate for the new value.\n";
1206 # remove all values of $metaname metadata
1207 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
1208 $metamode = "accumulate";
1209 }
1210 # Edit the doc.xml file with the specified metadata name, value and position.
1211 # TODO: there is a potential problem here as this edit_doc_xml function
1212 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1213 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1214
1215 # dxml_metadata method ignores metapos if metamode anything other than override
1216 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1217 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1218
1219 # return 0; # return 0 for now to indicate no error
1220 return (defined $self->{'error_msg'}) ? 1 : 0;
1221}
1222
1223
1224sub set_archives_metadata
1225{
1226 my $self = shift @_;
1227
1228 my $username = $self->{'username'};
1229 my $collect = $self->{'collect'};
1230 my $gsdl_cgi = $self->{'gsdl_cgi'};
1231
1232 if ($baseaction::authentication_enabled) {
1233 # Ensure the user is allowed to edit this collection
1234 $self->authenticate_user($username, $collect);
1235 }
1236
1237 # Make sure the collection isn't locked by someone else
1238 $self->lock_collection($username, $collect);
1239
1240 $self->_set_archives_metadata(@_);
1241
1242 # Release the lock once it is done
1243 $self->unlock_collection($username, $collect);
1244}
1245
1246sub _set_archives_metadata_array
1247{
1248 my $self = shift @_;
1249
1250 my $collect = $self->{'collect'};
1251 my $gsdl_cgi = $self->{'gsdl_cgi'};
1252
1253 # Obtain the collect dir
1254 my $site = $self->{'site'};
1255 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1256
1257 # look up additional args
1258
1259 my $infodbtype = $self->{'infodbtype'};
1260
1261 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1262
1263 my $json_str = $self->{'json'};
1264 my $doc_array = decode_json $json_str;
1265
1266
1267 my $global_status = 0;
1268 my $global_mess = "";
1269
1270 my @all_docids = ();
1271
1272 foreach my $doc_array_rec ( @$doc_array ) {
1273 my $status = -1;
1274 my $docid = $doc_array_rec->{'docid'};
1275 my $dv = $doc_array_rec->{'dv'};
1276
1277 push(@all_docids,$docid);
1278
1279 my $metaname = $doc_array_rec->{'metaname'};
1280 if(defined $metaname) {
1281
1282 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1283
1284 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1285 my $metavalue = $doc_array_rec->{'metavalue'};
1286 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1287
1288
1289 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1290 # make "accumulate" the default (less destructive, as it won't actually
1291 # delete any existing values)
1292 $metamode = "accumulate";
1293 }
1294
1295 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,
1296 $docid,$dv, $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1297 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1298 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1299
1300 foreach my $metatable_rec ( @$metatable ) {
1301 $metaname = $metatable_rec->{'metaname'};
1302 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1303 my $metapos = undef;
1304 my $prevmetavalue = undef;
1305 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1306
1307 foreach my $metavalue ( @$metavals ) {
1308 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1309 $docid,$dv, $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1310
1311 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1312 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1313 # metavalue for this metaname will be lost
1314 $metamode = "accumulate";
1315 }
1316 }
1317 }
1318 }
1319
1320 if ($status != 0) {
1321 # Catch error if set infodb entry failed
1322 $global_status = $status;
1323 $global_mess .= "Failed to set metadata key: $docid\n";
1324 $global_mess .= "Exit status: $status\n";
1325 $global_mess .= "System Error Message: $!\n";
1326 $global_mess .= "-" x 20 . "\n";
1327 }
1328 }
1329
1330 if ($global_status != 0) {
1331 $global_mess .= "PATH: $ENV{'PATH'}\n";
1332 $gsdl_cgi->generate_error($global_mess);
1333 }
1334 else {
1335 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1336 $gsdl_cgi->generate_ok_message($mess);
1337 }
1338}
1339
1340sub set_archives_metadata_array
1341{
1342 my $self = shift @_;
1343
1344 my $username = $self->{'username'};
1345 my $collect = $self->{'collect'};
1346 my $gsdl_cgi = $self->{'gsdl_cgi'};
1347
1348 if ($baseaction::authentication_enabled) {
1349 # Ensure the user is allowed to edit this collection
1350 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1351 }
1352
1353 # Obtain the collect dir
1354 my $site = $self->{'site'};
1355 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1356
1357 $gsdl_cgi->checked_chdir($collect_dir);
1358
1359 # Make sure the collection isn't locked by someone else
1360 $self->lock_collection($username, $collect);
1361
1362 $self->_set_archives_metadata_array(@_);
1363
1364 # Release the lock once it is done
1365 $self->unlock_collection($username, $collect);
1366}
1367
1368
1369
1370# the version of set_archives_meta that doesn't do authentication
1371sub _set_archives_assocfile
1372{
1373 my $self = shift @_;
1374
1375 my $collect = $self->{'collect'};
1376 my $gsdl_cgi = $self->{'gsdl_cgi'};
1377 my $infodbtype = $self->{'infodbtype'};
1378
1379 # Obtain the collect and archive dir
1380 my $site = $self->{'site'};
1381 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1382 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1383
1384 # look up additional args
1385 my $docid = $self->{'d'};
1386
1387 my $assocname = $self->{'assocname'};
1388
1389 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1390 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1391
1392 my $doc_file = $doc_rec->{'doc-file'}->[0];
1393
1394 # check if request if for file-level doc-version history 'nminus-<n>' version
1395 my $dv = $self->{'dv'};
1396
1397 my ($unused_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
1398
1399 my $output_file = $assocname;
1400
1401 if (defined $dv && ($dv ne "")) {
1402 # Need to insert '_fldv_history/nminus-<n>' into output_file
1403
1404 $output_file = &util::filename_cat($doc_dirname,$FLDV_HISTORY_DIR,$dv,$output_file);
1405 }
1406 else {
1407 $output_file = &util::filename_cat($doc_dirname,$output_file);
1408 }
1409
1410 my $output_filename = &util::filename_cat($archive_dir, $output_file);
1411
1412 my $filedata = $self->{'filedata'};
1413 my $FIN = $self->{'fileupload'};
1414
1415 if ((!defined $filedata) && (!defined $FIN)) {
1416 $gsdl_cgi->generate_error("set-archives-assocfile: either 'fileupload' or 'filedata' must be set");
1417 }
1418 else {
1419
1420 my $had_error = 0;
1421 my $had_error_mess = undef;
1422
1423 if (defined $filedata) {
1424 if (open(FOUT,">$output_filename")) {
1425 print FOUT $filedata;
1426 close(FOUT);
1427 }
1428 else {
1429 $had_error = 1;
1430 $had_error_mess = "Failed to save filedata to: $output_filename";
1431 print STDERR "Error - $had_error_mess:\n$!\n";
1432 }
1433 }
1434 else {
1435 my $FIN = $self->{'fileupload'};
1436 if (open(FOUT,">$output_filename")) {
1437 binmode(FOUT, ":raw");
1438
1439 while (1) {
1440 my $buffer = "";
1441 my $bytes_read = read($FIN, $buffer, 1024);
1442
1443 if (defined $bytes_read) {
1444 if ($bytes_read>0) {
1445 print FOUT $buffer;
1446 }
1447 last if $bytes_read < 1024;
1448 }
1449 else {
1450 $had_error = 1;
1451 $had_error_mess = "set-archives-assocfile: Failed to open uploaded file for reading: $!\n";
1452 print STDERR "Error - $had_error_mess";
1453 last;
1454 }
1455 }
1456
1457 close($FIN);
1458 close(FOUT);
1459 }
1460 else {
1461 $had_error = 1;
1462 $had_error_mess = "Failed to save file uploaded file as: $output_filename";
1463 print STDERR "Error - $had_error_mess:\n$!\n";
1464 }
1465 }
1466
1467 if (!$had_error) {
1468 my $mess = "set-archives-assocfile successful save uploaded content into 'archives' as: $output_file\n";
1469 $gsdl_cgi->generate_ok_message($mess);
1470 }
1471 else {
1472 $gsdl_cgi->generate_error($had_error_mess);
1473 }
1474 }
1475}
1476
1477
1478sub set_archives_assocfile
1479{
1480 my $self = shift @_;
1481
1482 my $username = $self->{'username'};
1483 my $collect = $self->{'collect'};
1484 my $gsdl_cgi = $self->{'gsdl_cgi'};
1485
1486 if ($baseaction::authentication_enabled) {
1487 # Ensure the user is allowed to edit this collection
1488 $self->authenticate_user($username, $collect);
1489 }
1490
1491
1492 # Make sure the collection isn't locked by someone else
1493 $self->lock_collection($username, $collect);
1494
1495 $self->_set_archives_assocfile(@_);
1496
1497 # Release the lock once it is done
1498 $self->unlock_collection($username, $collect);
1499}
1500
1501
1502sub _remove_archives_metadata
1503{
1504 my $self = shift @_;
1505
1506 my $collect = $self->{'collect'};
1507 my $gsdl_cgi = $self->{'gsdl_cgi'};
1508 my $infodbtype = $self->{'infodbtype'};
1509
1510
1511 # Obtain the collect and archive dir
1512 my $site = $self->{'site'};
1513 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1514
1515 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1516
1517 # look up additional args
1518 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1519
1520 my $metaname = $self->{'metaname'};
1521 my $metapos = $self->{'metapos'};
1522 my $metavalue = $self->{'metavalue'};
1523
1524 my $metamode = $self->{'metamode'};
1525
1526 my $dv = $self->{'dv'};
1527
1528 my $status = $self->remove_archives_metadata_entry($gsdl_cgi,$archive_dir, $infodbtype,
1529 $docid,$docid_secnum,$dv, $metaname,$metapos,$metavalue,$metamode);
1530
1531 if ($status == 0)
1532 {
1533 my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
1534 $mess .= " $metaname";
1535 $mess .= "->[$metapos]" if (defined $metapos);
1536 $mess .= " ($metavalue)" if (defined $metavalue);
1537 $gsdl_cgi->generate_ok_message($mess);
1538 }
1539 else
1540 {
1541 my $mess .= "Failed to remove archives metadata key: $docid\n";
1542 $mess .= "Exit status: $status\n";
1543 $mess .= "System Error Message: $!\n";
1544 $mess .= "-" x 20 . "\n";
1545
1546 $gsdl_cgi->generate_error($mess);
1547 }
1548
1549 #return $status; # in case calling functions have a use for this
1550}
1551
1552sub remove_archives_metadata_entry
1553{
1554 my $self = shift @_;
1555 my ($gsdl_cgi, $archive_dir, $infodbtype, $docid, $docid_secnum, $dv, $metaname, $metapos, $metavalue, $metamode) = @_;
1556
1557 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1558 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
1559
1560 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1561 # default to deleting the first metavalue for the given metaname
1562 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1563 # seem to get deleted in one single remove_archives_meta action invocation.
1564 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1565 # deleted, both get deleted.
1566 if(!defined $metapos && !defined $metavalue) {
1567 $metapos = 0;
1568 }
1569
1570 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
1571
1572 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1573 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1574
1575 # This now stores the full pathname
1576 my $doc_file = $doc_rec->{'doc-file'}->[0];
1577
1578 # check if request if for file-level doc-version history 'nminus-<n>' version
1579 if (defined $dv && ($dv ne "")) {
1580 # Need to insert '_fldv_history/nminus-<n>' into doc_filename
1581
1582 my ($doc_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
1583 $doc_file = &util::filename_cat($doc_dirname,$FLDV_HISTORY_DIR,$dv,$doc_tailname);
1584 }
1585
1586 my $doc_filename = &util::filename_cat($archive_dir, $doc_file);
1587
1588 my $status = $self->remove_from_doc_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1589# my $status = $self->remove_from_doc_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, undef, $docid_secnum);
1590
1591 return $status;
1592}
1593
1594sub remove_archives_metadata
1595{
1596 my $self = shift @_;
1597
1598 my $username = $self->{'username'};
1599 my $collect = $self->{'collect'};
1600 my $gsdl_cgi = $self->{'gsdl_cgi'};
1601
1602 if ($baseaction::authentication_enabled)
1603 {
1604 # Ensure the user is allowed to edit this collection
1605 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1606 }
1607
1608 # Make sure the collection isn't locked by someone else
1609 $self->lock_collection($username, $collect);
1610
1611 $self->_remove_archives_metadata(@_);
1612
1613 # Release the lock once it is done
1614 $self->unlock_collection($username, $collect);
1615}
1616
1617sub remove_from_doc_xml
1618{
1619 my $self = shift @_;
1620 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1621
1622 my @start_rules = ('Section' => \&dxml_start_section);
1623
1624 # Set the call-back functions for the metadata tags
1625 my @rules =
1626 (
1627 _default => 'raw',
1628 'Metadata' => \&rfdxml_metadata
1629 );
1630
1631 my $parser = XML::Rules->new
1632 (
1633 start_rules => \@start_rules,
1634 rules => \@rules,
1635 style => 'filter',
1636 output_encoding => 'utf8',
1637# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1638 stripspaces => 2|0|0 # ineffectual
1639 );
1640
1641 my $status = 0;
1642 my $xml_in = "";
1643 if (!open(MIN,"<$doc_xml_filename"))
1644 {
1645 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1646 $status = 1;
1647 }
1648 else
1649 {
1650 # Read them in
1651 my $line;
1652 while (defined ($line=<MIN>)) {
1653 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1654 }
1655 close(MIN);
1656
1657 # Filter with the call-back functions
1658 my $xml_out = "";
1659
1660 my $MOUT;
1661 if (!open($MOUT,">$doc_xml_filename")) {
1662 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1663 $status = 1;
1664 }
1665 else {
1666 binmode($MOUT,":utf8");
1667 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1668 close($MOUT);
1669 }
1670 }
1671 return $status;
1672}
1673
1674sub rfdxml_metadata
1675{
1676 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1677
1678 # For comparisons, toplevel section is indicated by ""
1679 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1680 my $secid = $parser->{'parameters'}->{'secid'} || "";
1681
1682 if (!($secid eq $curr_sec_num))
1683 {
1684 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1685 return [$tagname => $attrHash];
1686 }
1687
1688 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1689 {
1690 if (!defined $parser->{'parameters'}->{'poscount'})
1691 {
1692 $parser->{'parameters'}->{'poscount'} = 0;
1693 }
1694 else
1695 {
1696 $parser->{'parameters'}->{'poscount'}++;
1697 }
1698
1699 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
1700 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {
1701 return [];
1702 }
1703
1704 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1705 {
1706 return [];
1707 }
1708
1709 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1710 {
1711 return [];
1712 }
1713 }
1714
1715 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1716 return [$tagname => $attrHash];
1717}
1718
1719sub mxml_metadata
1720{
1721 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1722 my $metaname = $parser->{'parameters'}->{'metaname'};
1723 my $metamode = $parser->{'parameters'}->{'metamode'};
1724
1725 # Report error if we don't see FileName tag before this
1726 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1727
1728 # Don't do anything if we are not in the right FileSet
1729 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1730 if ($file_regexp =~ /\.\*/) {
1731 # Only interested in a file_regexp if it specifies precisely one
1732 # file.
1733 # So, skip anything with a .* in it as it is too general
1734## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
1735 return [$tagname => $attrHash];
1736 }
1737 my $src_file = $parser->{'parameters'}->{'src_file'};
1738 if (!($src_file =~ /$file_regexp/)) {
1739 return [$tagname => $attrHash];
1740 }
1741## print STDERR "*** mxl metamode = $metamode\n";
1742
1743 # Find the right metadata tag and checks if we are going to override it
1744 my $name_attr = $attrHash->{'name'};
1745 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1746
1747 # now metadata.xml functions need to keep track of metapos
1748 if (!defined $parser->{'parameters'}->{'poscount'})
1749 {
1750 $parser->{'parameters'}->{'poscount'} = 0;
1751 }
1752 else
1753 {
1754 $parser->{'parameters'}->{'poscount'}++;
1755 }
1756
1757 # If either the metapos or prevmetavalue is set,
1758 # get the value and override the current value
1759 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1760
1761 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1762 {
1763 $attrHash->{'_content'} = $metavalue;
1764
1765 ## print STDERR "**** overriding metadata.xml\n";
1766
1767 # Don't want it to wipe out any other pieces of metadata
1768 $parser->{'parameters'}->{'metamode'} = "done";
1769 }
1770 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1771 {
1772 $attrHash->{'_content'} = $metavalue;
1773 $parser->{'parameters'}->{'metamode'} = "done";
1774 }
1775 }
1776
1777 # mxml_description will process the metadata if metadata is accumulate,
1778 # or if we haven't found the metadata to override
1779
1780 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1781 return [$tagname => $attrHash];
1782}
1783
1784
1785sub mxml_description
1786{
1787 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1788 my $metamode = $parser->{'parameters'}->{'metamode'};
1789
1790 # Failed... Report error if we don't see FileName tag before this
1791 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1792
1793 # Don't do anything if we are not in the right FileSet
1794 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1795 if ($file_regexp =~ m/\.\*/) {
1796 # Only interested in a file_regexp if it specifies precisely one
1797 # file.
1798 # So, skip anything with a .* in it as it is too general
1799 return [$tagname => $attrHash];
1800 }
1801 my $src_file = $parser->{'parameters'}->{'src_file'};
1802
1803 if (!($src_file =~ m/$file_regexp/)) {
1804 return [$tagname => $attrHash];
1805 }
1806
1807 # Accumulate the metadata block to the end of the description block
1808 # Note: This adds metadata block to all description blocks, so if there are
1809 # multiple FileSets, it will add to all of them
1810 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1811
1812 # if metamode was "override" but get to here then it failed to
1813 # find an item to override, in which case it should append its
1814 # value to the end, just like the "accumulate" mode
1815
1816 if ($metamode eq "override") {
1817 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1818 }
1819
1820 # tack a new metadata tag on to the end of the <Metadata>+ block
1821 my $metaname = $parser->{'parameters'}->{'metaname'};
1822 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1823
1824 my $metadata_attr = { '_content' => $metavalue,
1825 'name' => $metaname,
1826 'mode' => "accumulate" };
1827
1828 my $append_metadata = [ "Metadata" => $metadata_attr ];
1829 my $description_content = $attrHash->{'_content'};
1830
1831## print STDERR "*** appending to metadata.xml\n";
1832
1833 # append the new metadata element to the end of the current
1834 # content contained inside this tag
1835 if (ref($description_content) eq "") {
1836 # => string or numeric literal
1837 # this is caused by a <Description> block has no <Metadata> child elements
1838 # => set up an empty array in '_content'
1839 $attrHash->{'_content'} = [ "\n" ];
1840 $description_content = $attrHash->{'_content'};
1841 }
1842
1843 push(@$description_content, " ", $append_metadata, "\n ");
1844 $parser->{'parameters'}->{'metamode'} = "done";
1845 }
1846
1847 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1848 return [$tagname => $attrHash];
1849}
1850
1851
1852sub mxml_fileset
1853{
1854 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1855
1856 # Initilise the current_file
1857 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1858 # FileName tag must come before Description tag
1859 $parser->{'parameters'}->{'current_file'} = "";
1860
1861 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1862 return [$tagname => $attrHash];
1863}
1864
1865sub mxml_directorymetadata
1866{
1867 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1868
1869 # if we haven't processed the metadata when we reach the end of metadata.xml
1870 # it's because there's no particular FileSet element whose FileName matched
1871 # In which case, add a new FileSet for this FileName
1872 my $metamode = $parser->{'parameters'}->{'metamode'};
1873 if($metamode ne "done") {
1874
1875 if ($metamode eq "override") {
1876 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1877 }
1878
1879 # If we get to here and metamode is override, this means there
1880 # was no existing value to overide => treat as an append operation
1881
1882 # Create a new FileSet element and append to DirectoryMetadata
1883 # <FileSet>
1884 # <FileName>src_file</FileName>
1885 # <Description>
1886 # <Metadata mode="" name="">metavalue</Metadata>
1887 # </Description>
1888 # </FileSet>
1889 my $src_file = $parser->{'parameters'}->{'src_file'};
1890 my $metaname = $parser->{'parameters'}->{'metaname'};
1891 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1892 my $metadata_attr = {
1893 '_content' => $metavalue,
1894 'name' => $metaname,
1895 'mode' => "accumulate"
1896 };
1897 my $append_metadata = [ "Metadata" => $metadata_attr ];
1898 my $description_attr = { '_content' => [ "\n\t\t ", $append_metadata, "\n\t\t"] };
1899 my $description_element = [ "Description" => $description_attr ];
1900
1901 #_content is not an attribute, it's special and holds the children of this element
1902 # including the textnode value embedded in this element if any.
1903 my $filename_attr = {'_content' => $src_file};
1904 my $filename_element = [ "FileName" => $filename_attr ];
1905
1906 my $fileset_attr = {};
1907 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
1908 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
1909
1910
1911 # get children of dirmeta, and push the new FileSet element onto it
1912 print "Appending metadata to metadata.xml\n";
1913 my $dirmeta_content = $attrHash->{'_content'};
1914 if (ref($dirmeta_content)) {
1915 # got some existing interesting nested content
1916 #push(@$dirmeta_content, " ", $fileset ,"\n ");
1917 push(@$dirmeta_content, "\t", $fileset ,"\n");
1918 }
1919 else {
1920 #description_content is most likely a string such as "\n"
1921 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
1922 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
1923 }
1924
1925 $parser->{'parameters'}->{'metamode'} = "done";
1926 }
1927 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1928 return [$tagname => $attrHash];
1929}
1930
1931
1932sub edit_metadata_xml
1933{
1934 my $self = shift @_;
1935 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
1936
1937 # Set the call-back functions for the metadata tags
1938 my @rules =
1939 ( _default => 'raw',
1940 'FileName' => \&mxml_filename,
1941 'Metadata' => \&mxml_metadata,
1942 'Description' => \&mxml_description,
1943 'FileSet' => \&mxml_fileset,
1944 'DirectoryMetadata' => \&mxml_directorymetadata);
1945
1946 # use XML::Rules to add it in (read in and out again)
1947 my $parser = XML::Rules->new(rules => \@rules,
1948 style => 'filter',
1949 output_encoding => 'utf8',
1950 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1951
1952 if (!-e $metadata_xml_filename) {
1953
1954 if (open(MOUT,">$metadata_xml_filename")) {
1955
1956 my $src_file_re = &util::filename_to_regex($src_file);
1957 # shouldn't the following also be in the above utility routine??
1958 # $src_file_re =~ s/\./\\./g;
1959
1960 print MOUT "<?xml version=\"1.0\"?>\n";
1961 print MOUT "<DirectoryMetadata>\n";
1962 print MOUT " <FileSet>\n";
1963 print MOUT " <FileName>$src_file_re</FileName>\n";
1964 print MOUT " <Description>\n";
1965 print MOUT " </Description>\n";
1966 print MOUT " </FileSet>\n";
1967 print MOUT "</DirectoryMetadata>\n";
1968
1969 close(MOUT);
1970 }
1971 else {
1972 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1973 }
1974 }
1975
1976
1977 my $xml_in = "";
1978 if (!open(MIN,"<$metadata_xml_filename")) {
1979 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1980 }
1981 else {
1982 # Read them in
1983 my $line;
1984 while (defined ($line=<MIN>)) {
1985 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1986 }
1987 close(MIN);
1988
1989 # Filter with the call-back functions
1990 my $xml_out = "";
1991
1992 my $MOUT;
1993 if (!open($MOUT,">$metadata_xml_filename")) {
1994 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1995 }
1996 else {
1997 binmode($MOUT,":utf8");
1998
1999 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2000 # At the moment, I will just hack it!
2001 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2002 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2003 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2004 #print MOUT $xml_out;
2005
2006 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2007 metapos => $metapos,
2008 metavalue => $metavalue,
2009 metamode => $metamode,
2010 src_file => $src_file,
2011 prevmetavalue => $prevmetavalue,
2012 current_file => undef} );
2013 close($MOUT);
2014 }
2015 }
2016}
2017
2018
2019sub set_import_metadata
2020{
2021 my $self = shift @_;
2022
2023 my $username = $self->{'username'};
2024 my $collect = $self->{'collect'};
2025 my $gsdl_cgi = $self->{'gsdl_cgi'};
2026
2027 if ($baseaction::authentication_enabled) {
2028 # Ensure the user is allowed to edit this collection
2029 $self->authenticate_user($username, $collect);
2030 }
2031
2032 # Make sure the collection isn't locked by someone else
2033 $self->lock_collection($username, $collect);
2034
2035 $self->_set_import_metadata(@_);
2036
2037 # Release the lock once it is done
2038 $self->unlock_collection($username, $collect);
2039
2040}
2041
2042sub set_import_metadata_array
2043{
2044 my $self = shift @_;
2045
2046 my $username = $self->{'username'};
2047 my $collect = $self->{'collect'};
2048 my $gsdl_cgi = $self->{'gsdl_cgi'};
2049
2050 if ($baseaction::authentication_enabled) {
2051 # Ensure the user is allowed to edit this collection
2052 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2053 }
2054
2055 # Obtain the collect dir
2056 my $site = $self->{'site'};
2057 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2058
2059 $gsdl_cgi->checked_chdir($collect_dir);
2060
2061 # Make sure the collection isn't locked by someone else
2062 $self->lock_collection($username, $collect);
2063
2064 $self->_set_import_metadata_array(@_);
2065
2066 # Release the lock once it is done
2067 $self->unlock_collection($username, $collect);
2068
2069}
2070
2071
2072sub _set_import_metadata_array
2073{
2074 my $self = shift @_;
2075
2076 my $collect = $self->{'collect'};
2077 my $gsdl_cgi = $self->{'gsdl_cgi'};
2078
2079 # Obtain the collect dir
2080 my $site = $self->{'site'};
2081 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2082
2083 # look up additional args
2084
2085 my $infodbtype = $self->{'infodbtype'};
2086
2087 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2088 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2089
2090 my $json_str = $self->{'json'};
2091 my $doc_array = decode_json $json_str;
2092
2093 my $global_status = 0;
2094 my $global_mess = "";
2095
2096 my @all_docids = ();
2097
2098 foreach my $doc_array_rec ( @$doc_array )
2099 {
2100 my $status = -1;
2101 my $docid = $doc_array_rec->{'docid'};
2102
2103 my ($docid_root,$docid_secnum);
2104 if(defined $docid) {
2105 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2106 # as yet no support for setting subsection metadata in metadata.xml
2107 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2108 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2109 next; # skip this docid in for loop
2110 }
2111 }
2112
2113 push(@all_docids,$docid); # docid_root rather
2114
2115 my $metaname = $doc_array_rec->{'metaname'};
2116 if (defined $metaname) {
2117 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2118 my $metavalue = $doc_array_rec->{'metavalue'};
2119 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2120
2121 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2122 # make "accumulate" the default (less destructive, as it won't actually
2123 # delete any existing values)
2124 $metamode = "accumulate";
2125 }
2126
2127 # adding metapos and prevmetavalue support to import_metadata subroutines
2128 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2129 my $prevmetavalue = $self->{'prevmetavalue'};
2130
2131 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2132
2133 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2134 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2135
2136 foreach my $metatable_rec ( @$metatable ) {
2137 $metaname = $metatable_rec->{'metaname'};
2138 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2139 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2140 # make "accumulate" the default (less destructive, as it won't actually
2141 # delete any existing values)
2142 $metamode = "accumulate";
2143 }
2144
2145 # No support for metapos and prevmetavalue in the JSON metatable substructure
2146 my $metapos = undef;
2147 my $prevmetavalue = undef;
2148 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2149
2150 foreach my $metavalue ( @$metavals ) {
2151 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2152
2153 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2154 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2155 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2156 # metavalue for this metaname will be lost
2157 $metamode = "accumulate";
2158 }
2159 }
2160 }
2161 }
2162 }
2163
2164 # always a success message
2165 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2166 $gsdl_cgi->generate_ok_message($mess);
2167}
2168
2169# always returns true (1)
2170sub set_import_metadata_entry
2171{
2172 my $self = shift @_;
2173 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2174
2175 my $info_mess = <<RAWEND;
2176****************************
2177 set_import_metadata_entry()
2178****************************
2179RAWEND
2180
2181 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2182 $info_mess .= " collect = $collect\n" if defined($collect);
2183 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
2184 $info_mess .= " arcinfo_doc_filename = $arcinfo_doc_filename\n" if defined($arcinfo_doc_filename);
2185 $info_mess .= " docid = $docid\n" if defined($docid);
2186 $info_mess .= " metaname = $metaname\n" if defined($metaname);
2187 $info_mess .= " metapos = $metapos\n" if defined($metapos);
2188 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
2189 $info_mess .= " metamode = $metamode\n" if defined($metamode);
2190 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2191
2192 $info_mess .= "****************************\n";
2193
2194 $gsdl_cgi->generate_message($info_mess);
2195
2196 # import works with metadata.xml which can have inherited metadata
2197 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2198 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2199 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2200 # what they're doing if they provide a metapos.
2201 if(defined $metapos) {
2202 print STDERR "@@@@ WARNING: metapos defined.\n";
2203 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2204 }
2205
2206 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2207 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2208 my $metadata_xml_file;
2209 my $import_filename = undef;
2210
2211 if (defined $docid) {
2212 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2213 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2214
2215 # This now stores the full pathname
2216 $import_filename = $doc_rec->{'src-file'}->[0];
2217 $import_filename = &util::placeholders_to_abspath($import_filename);
2218
2219 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2220 # as the array version of the method doesn't support the -f parameter yet
2221 my $import_file = $self->{'f'};
2222 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2223 }
2224
2225 # figure out correct metadata.xml file [?]
2226 # Assuming the metadata.xml file is next to the source file
2227 # Note: This will not work if it is using the inherited metadata from the parent folder
2228 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2229 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2230
2231 # If we're overriding everything, then $prevmetavalue=undefined and
2232 # $metamode=override combined with $metapos=undefined
2233 # in which case we need to remove all metavalues for the metaname at the given (sub)section
2234 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2235 if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2236## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2237 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2238
2239 }
2240
2241 # Edit the metadata.xml
2242 # Modified by Jeffrey from DL Consulting
2243 # Handle the case where there is one metadata.xml file for multiple FileSets
2244 # The XML filter needs to know whether it is in the right FileSet
2245 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2246 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2247 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2248 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2249 #return 0;
2250 return $metadata_xml_filename;
2251}
2252
2253sub _remove_import_metadata
2254{
2255 my $self = shift @_;
2256
2257 my $collect = $self->{'collect'};
2258 my $gsdl_cgi = $self->{'gsdl_cgi'};
2259 my $infodbtype = $self->{'infodbtype'};
2260
2261 # Obtain the collect and archive dir
2262 my $site = $self->{'site'};
2263 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2264
2265 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2266
2267 # look up additional args
2268 my $docid = $self->{'d'};
2269
2270 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2271
2272 my $metaname = $self->{'metaname'};
2273 my $metapos = $self->{'metapos'};
2274 my $metavalue = $self->{'metavalue'};
2275
2276 my $metamode = $self->{'metamode'};
2277
2278 my $metadata_xml_filename = $self->remove_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode);
2279
2280 # any errors would have been reported inside the remove_import_metadata_entry
2281
2282 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2283 $mess .= " $metaname";
2284 $mess .= " = $metavalue\n";
2285
2286 $gsdl_cgi->generate_ok_message($mess);
2287
2288 #return $status; # in case calling functions have a use for this
2289}
2290
2291sub remove_import_metadata_entry
2292{
2293 my $self = shift @_;
2294 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode) = @_;
2295
2296 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2297 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/));
2298
2299 if(defined $metavalue) { # metavalue is not a compulsory arg for remove_import_metadata()
2300 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2301 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2302 $metapos = 0;
2303 }
2304
2305 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
2306
2307 # import works with metadata.xml which can have inherited metadata
2308 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2309 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2310 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2311 # what they're doing if they provide a metapos.
2312 if(defined $metapos) {
2313 print STDERR "@@@@ WARNING: metapos defined.\n";
2314 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2315 }
2316
2317 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2318 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2319 my $metadata_xml_file;
2320 my $import_filename = undef;
2321 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2322 {
2323 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2324 } else {
2325 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2326
2327 # This now stores the full pathname
2328 $import_filename = $doc_rec->{'src-file'}->[0];
2329 $import_filename = &util::placeholders_to_abspath($import_filename);
2330 }
2331
2332 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2333 {
2334 $gsdl_cgi->generate_error("There is no metadata\n");
2335 }
2336
2337 # figure out correct metadata.xml file [?]
2338 # Assuming the metadata.xml file is next to the source file
2339 # Note: This will not work if it is using the inherited metadata from the parent folder
2340 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2341 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2342
2343 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2344
2345 return $metadata_xml_filename;
2346}
2347
2348sub remove_import_metadata
2349{
2350 my $self = shift @_;
2351
2352 my $username = $self->{'username'};
2353 my $collect = $self->{'collect'};
2354 my $gsdl_cgi = $self->{'gsdl_cgi'};
2355
2356 if ($baseaction::authentication_enabled) {
2357 # Ensure the user is allowed to edit this collection
2358 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2359 }
2360
2361 # Make sure the collection isn't locked by someone else
2362 $self->lock_collection($username, $collect);
2363
2364 $self->_remove_import_metadata(@_);
2365
2366 # Release the lock once it is done
2367 $self->unlock_collection($username, $collect);
2368
2369}
2370
2371sub remove_from_metadata_xml
2372{
2373 my $self = shift @_;
2374 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2375 # metamode generally has no meaning for removing meta, but is used by set_meta
2376 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2377
2378 # Set the call-back functions for the metadata tags
2379 my @rules =
2380 (
2381 _default => 'raw',
2382 'Metadata' => \&rfmxml_metadata,
2383 'FileName' => \&mxml_filename
2384 );
2385
2386 my $parser = XML::Rules->new
2387 (
2388 rules => \@rules,
2389 style => 'filter',
2390 output_encoding => 'utf8',
2391 #normalisespaces => 1,
2392 stripspaces => 2|0|0 # ineffectual
2393 );
2394
2395 my $xml_in = "";
2396 if (!open(MIN,"<$metadata_xml_filename"))
2397 {
2398 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2399 }
2400 else
2401 {
2402 # Read them in
2403 my $line;
2404 while (defined ($line=<MIN>)) {
2405 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
2406 }
2407 close(MIN);
2408
2409 # Filter with the call-back functions
2410 my $xml_out = "";
2411
2412 my $MOUT;
2413 if (!open($MOUT,">$metadata_xml_filename")) {
2414 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2415 }
2416 else {
2417 binmode($MOUT,":utf8");
2418 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2419 close($MOUT);
2420 }
2421 }
2422}
2423
2424sub rfmxml_metadata
2425{
2426 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2427
2428 # metadata.xml does not handle subsections
2429
2430 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2431 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2432 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2433 {
2434 if (!defined $parser->{'parameters'}->{'poscount'})
2435 {
2436 $parser->{'parameters'}->{'poscount'} = 0;
2437 }
2438 else
2439 {
2440 $parser->{'parameters'}->{'poscount'}++;
2441 }
2442
2443 # if overriding but no metapos, then clear all the meta for this metaname
2444 # This is used by set_import_metadata_entry, so don't change this behaviour
2445 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
2446 return [];
2447 }
2448
2449 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2450 {
2451 return [];
2452 }
2453 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2454 {
2455 return [];
2456 }
2457 }
2458
2459 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2460 return [$tagname => $attrHash];
2461}
2462
2463sub _remove_live_metadata
2464{
2465 my $self = shift @_;
2466
2467 my $collect = $self->{'collect'};
2468 my $gsdl_cgi = $self->{'gsdl_cgi'};
2469 my $infodbtype = $self->{'infodbtype'};
2470
2471 # Obtain the collect dir
2472 my $site = $self->{'site'};
2473 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2474
2475 # look up additional args
2476 my $docid = $self->{'d'};
2477 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2478 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2479 }
2480
2481 # Generate the dbkey
2482 my $metaname = $self->{'metaname'};
2483 my $dbkey = "$docid.$metaname";
2484
2485 # To people who know $collect_tail please add some comments
2486 # Obtain the live gdbm_db path
2487 my $collect_tail = $collect;
2488 $collect_tail =~ s/^.*[\/|\\]//;
2489 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2490 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2491
2492 # Remove the key
2493 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2494 my $status = system($cmd);
2495 if ($status != 0) {
2496 # Catch error if gdbmdel failed
2497 my $mess = "Failed to delete metadata key: $dbkey\n";
2498
2499 $mess .= "PATH: $ENV{'PATH'}\n";
2500 $mess .= "cmd = $cmd\n";
2501 $mess .= "Exit status: $status\n";
2502 $mess .= "System Error Message: $!\n";
2503
2504 $gsdl_cgi->generate_error($mess);
2505 }
2506 else {
2507 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2508 }
2509
2510}
2511
2512sub remove_live_metadata
2513{
2514 my $self = shift @_;
2515
2516 my $username = $self->{'username'};
2517 my $collect = $self->{'collect'};
2518 my $gsdl_cgi = $self->{'gsdl_cgi'};
2519
2520 if ($baseaction::authentication_enabled) {
2521 # Ensure the user is allowed to edit this collection
2522 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2523 }
2524
2525 # Make sure the collection isn't locked by someone else
2526 $self->lock_collection($username, $collect);
2527
2528 $self->_remove_live_metadata(@_);
2529
2530 $self->unlock_collection($username, $collect);
2531}
2532
2533sub remove_metadata
2534{
2535 my $self = shift @_;
2536
2537 my $where = $self->{'where'};
2538 if(!$where || ($where =~ m/^\s*$/)) {
2539 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2540 return;
2541 }
2542
2543 my $username = $self->{'username'};
2544 my $collect = $self->{'collect'};
2545 my $gsdl_cgi = $self->{'gsdl_cgi'};
2546
2547 if ($baseaction::authentication_enabled) {
2548 # Ensure the user is allowed to edit this collection
2549 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2550 }
2551
2552 # Make sure the collection isn't locked by someone else
2553 $self->lock_collection($username, $collect);
2554
2555 # check which directories need to be processed, specified in $where as
2556 # any combination of import|archives|index|live
2557 if($where =~ m/import/) {
2558 $self->_remove_import_metadata(@_);
2559 }
2560 if($where =~ m/archives/) {
2561 $self->_remove_archives_metadata(@_);
2562 }
2563 if($where =~ m/index/) {
2564 $self->_remove_index_metadata(@_);
2565 }
2566
2567 # Release the lock once it is done
2568 $self->unlock_collection($username, $collect);
2569}
2570
2571# the internal version, without authentication
2572sub _remove_index_metadata
2573{
2574 my $self = shift @_;
2575
2576 my $collect = $self->{'collect'};
2577 my $gsdl_cgi = $self->{'gsdl_cgi'};
2578 my $infodbtype = $self->{'infodbtype'};
2579
2580 # Obtain the collect dir
2581 my $site = $self->{'site'};
2582 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2583
2584 # look up additional args
2585 my $docid = $self->{'d'};
2586 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2587 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2588 }
2589 my $metaname = $self->{'metaname'};
2590 my $metapos = $self->{'metapos'};
2591 my $metavalue = $self->{'metavalue'};
2592
2593 my $status = $self->remove_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
2594
2595 if ($status != 0) {
2596 my $mess = "Failed to remove metadata key: $docid\n";
2597
2598 $mess .= "PATH: $ENV{'PATH'}\n";
2599 $mess .= "Exit status: $status\n";
2600 $mess .= "System Error Message: $!\n";
2601
2602 $gsdl_cgi->generate_error($mess);
2603 }
2604 else {
2605 my $mess = "DB remove successful: Key[$docid]\n";
2606 $mess .= " $metaname";
2607 $mess .= "->[$metapos]" if (defined $metapos);
2608 $mess .= " ($metavalue)" if (defined $metavalue);
2609
2610 $gsdl_cgi->generate_ok_message($mess);
2611 }
2612
2613 #return $status; # in case calling functions have a use for this
2614}
2615
2616sub remove_index_metadata_entry
2617{
2618 my $self = shift @_;
2619 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
2620 my $gsdl_cgi = $self->{'gsdl_cgi'};
2621
2622 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2623 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
2624
2625 # To people who know $collect_tail please add some comments
2626 # -> In collection groups, I think collect_tailname is the subcollection name,
2627 # e.g. colgroup-name/col-tail-name
2628 # Obtain the path to the database
2629 my $collect_tail = $collect;
2630 $collect_tail =~ s/^.*[\/|\\]//;
2631 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2632 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2633
2634 # Read the docid entry
2635 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2636
2637 # Check to make sure the key does exist
2638 if (!defined ($doc_rec->{$metaname})) {
2639 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2640 }
2641
2642 # Obtain the specified metadata pos
2643 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2644 if(!defined $metapos && !defined $metavalue) {
2645 $metapos = 0;
2646 }
2647
2648
2649 # consider check key is defined before deleting?
2650 # Loop through the metadata array and ignore the specified position
2651 my $filtered_metadata = [];
2652 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2653 for (my $i=0; $i<$num_metadata_vals; $i++) {
2654 my $metaval = shift(@{$doc_rec->{$metaname}});
2655
2656 if (!defined $metavalue && $i != $metapos) {
2657 push(@$filtered_metadata,$metaval);
2658 }
2659
2660 if(defined $metavalue && !($metavalue eq $metaval))
2661 {
2662 push(@$filtered_metadata,$metaval);
2663 }
2664 }
2665 $doc_rec->{$metaname} = $filtered_metadata;
2666
2667 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2668 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2669
2670 return $status;
2671}
2672
2673sub remove_index_metadata
2674{
2675 my $self = shift @_;
2676
2677 my $username = $self->{'username'};
2678 my $collect = $self->{'collect'};
2679 my $gsdl_cgi = $self->{'gsdl_cgi'};
2680
2681 if ($baseaction::authentication_enabled) {
2682 # Ensure the user is allowed to edit this collection
2683 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2684 }
2685
2686 # Obtain the collect dir
2687 my $site = $self->{'site'};
2688 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2689
2690 # Make sure the collection isn't locked by someone else
2691 $self->lock_collection($username, $collect);
2692
2693 $self->_remove_index_metadata(@_);
2694
2695 # Release the lock once it is done
2696 $self->unlock_collection($username, $collect);
2697}
2698
2699#################################################################################
2700# ERASE META METHODS: ERASE ALL VALUES FOR MATCHING METANAME AT SPECIFIED DOCID #
2701#################################################################################
2702
2703sub erase_metadata
2704{
2705 my $self = shift @_;
2706
2707 my $where = $self->{'where'};
2708 # when $where is unspecified, following behaviour of sub remove_metadata by defaulting to erasing metadata from index
2709 if(!$where || ($where =~ m/^\s*$/)) {
2710 $self->erase_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2711 return;
2712 }
2713
2714 my $username = $self->{'username'};
2715 my $collect = $self->{'collect'};
2716 my $gsdl_cgi = $self->{'gsdl_cgi'};
2717
2718 if ($baseaction::authentication_enabled) {
2719 # Ensure the user is allowed to edit this collection
2720 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2721 }
2722
2723 # Make sure the collection isn't locked by someone else
2724 $self->lock_collection($username, $collect);
2725
2726 # check which directories need to be processed, specified in $where as
2727 # any combination of import|archives|index|live
2728 if($where =~ m/import/) {
2729 $self->_erase_import_metadata(@_);
2730 }
2731 if($where =~ m/archives/) {
2732 $self->_erase_archives_metadata(@_);
2733 }
2734 if($where =~ m/index/) {
2735 $self->_erase_index_metadata(@_);
2736 }
2737
2738 # Release the lock once it is done
2739 $self->unlock_collection($username, $collect);
2740}
2741
2742# the internal version, without authentication
2743sub _erase_index_metadata
2744{
2745 my $self = shift @_;
2746
2747 my $collect = $self->{'collect'};
2748 my $gsdl_cgi = $self->{'gsdl_cgi'};
2749 my $infodbtype = $self->{'infodbtype'};
2750
2751 # Obtain the collect dir
2752 my $site = $self->{'site'};
2753 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2754
2755 # look up additional args
2756 my $docid = $self->{'d'};
2757 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2758 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2759 }
2760 my $metaname = $self->{'metaname'};
2761 my $metapos = undef;
2762 my $metavalue = undef;
2763
2764 # To people who know $collect_tail please add some comments
2765 # -> In collection groups, I think collect_tailname is the subcollection name,
2766 # e.g. colgroup-name/col-tail-name
2767 # Obtain the path to the database
2768 my $collect_tail = $collect;
2769 $collect_tail =~ s/^.*[\/|\\]//;
2770 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2771 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2772
2773 # Read the docid entry
2774 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2775
2776 # Check to make sure the key does exist
2777 if (defined ($doc_rec->{$metaname})) {
2778
2779 $doc_rec->{$metaname} = [];
2780
2781 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2782 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2783
2784 if ($status != 0) {
2785 my $mess = "Failed to erase metadata key: $docid\n";
2786
2787 $mess .= "PATH: $ENV{'PATH'}\n";
2788 $mess .= "Exit status: $status\n";
2789 $mess .= "System Error Message: $!\n";
2790
2791 $gsdl_cgi->generate_error($mess);
2792 }
2793 else {
2794 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2795 $mess .= " $metaname";
2796 $mess .= "->[$metapos]" if (defined $metapos);
2797 $mess .= " ($metavalue)" if (defined $metavalue);
2798
2799 $gsdl_cgi->generate_ok_message($mess);
2800 }
2801 }
2802 else { # no such metaname. Is it really an error?
2803 $gsdl_cgi->generate_ok_message("Can't erase. No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2804 }
2805 #return $status; # in case calling functions have a use for this
2806}
2807
2808sub erase_index_metadata
2809{
2810 my $self = shift @_;
2811
2812 my $username = $self->{'username'};
2813 my $collect = $self->{'collect'};
2814 my $gsdl_cgi = $self->{'gsdl_cgi'};
2815
2816 if ($baseaction::authentication_enabled) {
2817 # Ensure the user is allowed to edit this collection
2818 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2819 }
2820
2821 # Obtain the collect dir
2822 my $site = $self->{'site'};
2823 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2824
2825 # Make sure the collection isn't locked by someone else
2826 $self->lock_collection($username, $collect);
2827
2828 $self->_erase_index_metadata(@_);
2829
2830 # Release the lock once it is done
2831 $self->unlock_collection($username, $collect);
2832}
2833
2834
2835# Given a metaname, removes ALL metadata with that metaname at the provided docid
2836sub _erase_archives_metadata
2837{
2838 my $self = shift @_;
2839
2840 my $collect = $self->{'collect'};
2841 my $gsdl_cgi = $self->{'gsdl_cgi'};
2842 my $infodbtype = $self->{'infodbtype'};
2843
2844 # Obtain the collect and archive dir
2845 my $site = $self->{'site'};
2846 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2847
2848 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2849
2850 # look up additional args
2851 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
2852
2853 my $metaname = $self->{'metaname'};
2854
2855 # metapos, metavalue and metamode are never provided and therefore all undefined for erase_archives_metadata
2856 my $metapos = undef;
2857 my $metavalue = undef;
2858 my $metamode = "override"; # when override and no metapos or metaval, all values for metaname will be affected
2859
2860 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2861 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2862
2863 # This now stores the full pathname
2864 my $doc_file = $doc_rec->{'doc-file'}->[0];
2865
2866 # check if request if for file-level doc-version history 'nminus-<n>' version
2867 my $dv = $self->{'dv'};
2868 if (defined $dv && ($dv ne "")) {
2869 # Need to insert '_fldv_history/nminus-<n>' into doc_filename
2870
2871 my ($doc_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
2872 $doc_file = &util::filename_cat($doc_dirname,$FLDV_HISTORY_DIR,$dv,$doc_tailname);
2873 }
2874
2875 my $doc_filename = &util::filename_cat($archive_dir, $doc_file);
2876
2877 my $status = $self->remove_from_doc_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
2878
2879 if ($status == 0)
2880 {
2881 my $mess = "\nerase-archives-metadata successful: \nKey[$docid]\n";
2882 $mess .= " $metaname";
2883 $gsdl_cgi->generate_ok_message($mess);
2884 }
2885 else
2886 {
2887 my $mess .= "Failed to erase archives metadata key: $docid\n";
2888 $mess .= "Exit status: $status\n";
2889 $mess .= "System Error Message: $!\n";
2890 $mess .= "-" x 20 . "\n";
2891
2892 $gsdl_cgi->generate_error($mess);
2893 }
2894
2895 #return $status; # in case calling functions have a use for this
2896}
2897
2898# practically identical to remove_archives_metadata
2899sub erase_archives_metadata
2900{
2901 my $self = shift @_;
2902
2903 my $username = $self->{'username'};
2904 my $collect = $self->{'collect'};
2905 my $gsdl_cgi = $self->{'gsdl_cgi'};
2906
2907 if ($baseaction::authentication_enabled)
2908 {
2909 # Ensure the user is allowed to edit this collection
2910 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2911 }
2912
2913 # Make sure the collection isn't locked by someone else
2914 $self->lock_collection($username, $collect);
2915
2916 $self->_erase_archives_metadata(@_);
2917
2918 # Release the lock once it is done
2919 $self->unlock_collection($username, $collect);
2920}
2921
2922sub _erase_import_metadata
2923{
2924 my $self = shift @_;
2925
2926 my $collect = $self->{'collect'};
2927 my $gsdl_cgi = $self->{'gsdl_cgi'};
2928 my $infodbtype = $self->{'infodbtype'};
2929
2930 # Obtain the collect dir
2931 my $site = $self->{'site'};
2932 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2933
2934 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2935
2936 # look up additional args
2937 my $docid = $self->{'d'};
2938 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2939 {
2940 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2941 }
2942
2943 my $metaname = $self->{'metaname'};
2944 my $metapos = undef;
2945 my $metavalue = undef;
2946 my $metamode = "override";
2947
2948 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2949 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2950 my $metadata_xml_file;
2951 my $import_filename = undef;
2952 if (defined $docid)
2953 {
2954 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2955 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2956
2957 # This now stores the full pathname
2958 $import_filename = $doc_rec->{'src-file'}->[0];
2959 $import_filename = &util::placeholders_to_abspath($import_filename);
2960 }
2961
2962 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2963 {
2964 $gsdl_cgi->generate_error("There is no metadata\n");
2965 }
2966
2967 # figure out correct metadata.xml file [?]
2968 # Assuming the metadata.xml file is next to the source file
2969 # Note: This will not work if it is using the inherited metadata from the parent folder
2970 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2971 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2972
2973 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode=override, means all values of metaname are affected
2974
2975 my $mess = "erase-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2976 $mess .= " $metaname";
2977 $mess .= " = $metavalue\n";
2978
2979 $gsdl_cgi->generate_ok_message($mess);
2980
2981 #return $status; # in case calling functions have a use for this
2982}
2983
2984sub erase_import_metadata
2985{
2986 my $self = shift @_;
2987
2988 my $username = $self->{'username'};
2989 my $collect = $self->{'collect'};
2990 my $gsdl_cgi = $self->{'gsdl_cgi'};
2991
2992 if ($baseaction::authentication_enabled) {
2993 # Ensure the user is allowed to edit this collection
2994 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2995 }
2996
2997 # Make sure the collection isn't locked by someone else
2998 $self->lock_collection($username, $collect);
2999
3000 $self->_erase_import_metadata(@_);
3001
3002 # Release the lock once it is done
3003 $self->unlock_collection($username, $collect);
3004
3005}
3006
3007# removes all meta with matching metaname for the specified docid from the live metadata index
3008sub _erase_live_metadata
3009{
3010 my $self = shift @_;
3011 # At present, _remove_live_metadata doesn't take metaval or metapos either and therefore has the desired effect
3012 $self->_remove_live_metadata(@_);
3013
3014}
3015
3016sub erase_live_metadata
3017{
3018 my $self = shift @_;
3019
3020 my $username = $self->{'username'};
3021 my $collect = $self->{'collect'};
3022 my $gsdl_cgi = $self->{'gsdl_cgi'};
3023
3024 if ($baseaction::authentication_enabled) {
3025 # Ensure the user is allowed to edit this collection
3026 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3027 }
3028
3029 # Make sure the collection isn't locked by someone else
3030 $self->lock_collection($username, $collect);
3031
3032 $self->_erase_live_metadata(@_);
3033
3034 $self->unlock_collection($username, $collect);
3035}
3036
3037
3038# Was trying to reused the codes, but the functions need to be broken
3039# down more before they can be reused, otherwise there will be too
3040# much overhead and duplicate process...
3041sub insert_metadata
3042{
3043 my $self = shift @_;
3044
3045 my $username = $self->{'username'};
3046 my $collect = $self->{'collect'};
3047 my $gsdl_cgi = $self->{'gsdl_cgi'};
3048 my $infodbtype = $self->{'infodbtype'};
3049
3050 # If the import metadata and gdbm database have been updated, we
3051 # need to insert some notification to warn user that the the text
3052 # they see at the moment is not indexed and require a rebuild.
3053 my $rebuild_pending_macro = "_rebuildpendingmessage_";
3054
3055 if ($baseaction::authentication_enabled) {
3056 # Ensure the user is allowed to edit this collection
3057 $self->authenticate_user($username, $collect);
3058 }
3059
3060 # Obtain the collect and archive dir
3061 my $site = $self->{'site'};
3062 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3063 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3064
3065 # Make sure the collection isn't locked by someone else
3066 $self->lock_collection($username, $collect);
3067
3068 # Check additional args
3069 my $docid = $self->{'d'};
3070 if (!defined($docid) || ($docid =~ m/^\s*$/)) {
3071 $gsdl_cgi->generate_error("No document id is specified: d=...");
3072 }
3073 my $metaname = $self->{'metaname'};
3074 if (!defined($metaname) || ($metaname =~ m/^\s*$/)) {
3075 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3076 }
3077 my $metavalue = $self->{'metavalue'};
3078 if (!defined($metavalue) || ($metavalue =~ m/^\s*$/)) {
3079 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3080 }
3081 # make "accumulate" the default (less destructive, as it won't actually
3082 # delete any existing values)
3083 my $metamode = "accumulate";
3084
3085 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3086 my $metapos = undef;
3087 my $prevmetavalue = undef;
3088
3089 #=======================================================================#
3090 # set_import_metadata [START]
3091 #=======================================================================#
3092 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3093 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3094 my $metadata_xml_file;
3095 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3096 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3097
3098 # This now stores the full pathname
3099 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3100 $import_filename = &util::placeholders_to_abspath($import_filename);
3101
3102 # figure out correct metadata.xml file [?]
3103 # Assuming the metadata.xml file is next to the source file
3104 # Note: This will not work if it is using the inherited metadata from the parent folder
3105 my ($import_tailname, $import_dirname)
3106 = File::Basename::fileparse($import_filename);
3107 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3108
3109 # Shane's escape characters
3110 $metavalue = pack "U0C*", unpack "C*", $metavalue;
3111 $metavalue =~ s/\,/&#44;/g;
3112 $metavalue =~ s/\:/&#58;/g;
3113 $metavalue =~ s/\|/&#124;/g;
3114 $metavalue =~ s/\(/&#40;/g;
3115 $metavalue =~ s/\)/&#41;/g;
3116 $metavalue =~ s/\[/&#91;/g;
3117 $metavalue =~ s/\\/&#92;/g;
3118 $metavalue =~ s/\]/&#93;/g;
3119 $metavalue =~ s/\{/&#123;/g;
3120 $metavalue =~ s/\}/&#125;/g;
3121 $metavalue =~ s/\"/&#34;/g;
3122 $metavalue =~ s/\`/&#96;/g;
3123 $metavalue =~ s/\n/_newline_/g;
3124
3125 # Edit the metadata.xml
3126 # Modified by Jeffrey from DL Consulting
3127 # Handle the case where there is one metadata.xml file for multiple FileSets
3128 # The XML filter needs to know whether it is in the right FileSet
3129 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3130 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3131 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3132 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3133 #=======================================================================#
3134 # set_import_metadata [END]
3135 #=======================================================================#
3136
3137
3138 #=======================================================================#
3139 # set_metadata (accumulate version) [START]
3140 #=======================================================================#
3141 # To people who know $collect_tail please add some comments
3142 # Obtain path to the database
3143 my $collect_tail = $collect;
3144 $collect_tail =~ s/^.*[\/|\\]//;
3145 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3146 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3147
3148 # Read the docid entry
3149 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3150
3151 # Protect the quotes
3152 $metavalue =~ s/\"/\\\"/g;
3153
3154 # Adds the pending macro
3155 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3156
3157 # If the metadata doesn't exist, create a new one
3158 if (!defined($doc_rec->{$metaname})){
3159 $doc_rec->{$metaname} = [ $macro_metavalue ];
3160 }
3161 # Else, let's acculumate the values
3162 else {
3163 push(@{$doc_rec->{$metaname}},$macro_metavalue);
3164 }
3165
3166 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3167 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3168
3169 if ($status != 0) {
3170 # Catch error if gdbmget failed
3171 my $mess = "Failed to set metadata key: $docid\n";
3172
3173 $mess .= "PATH: $ENV{'PATH'}\n";
3174 $mess .= "Exit status: $status\n";
3175 $mess .= "System Error Message: $!\n";
3176
3177 $gsdl_cgi->generate_error($mess);
3178 }
3179 else {
3180 my $mess = "insert-metadata successful: Key[$docid]\n";
3181 $mess .= " [In metadata.xml] $metaname";
3182 $mess .= " = $metavalue\n";
3183 $mess .= " [In database] $metaname";
3184 $mess .= " = $macro_metavalue\n";
3185 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3186 $gsdl_cgi->generate_ok_message($mess);
3187 }
3188 #=======================================================================#
3189 # set_metadata (accumulate version) [END]
3190 #=======================================================================#
3191
3192 # Release the lock once it is done
3193 $self->unlock_collection($username, $collect);
3194}
3195
3196
3197
3198
3199sub inc_fldv_nminus1
3200{
3201 my $self = shift @_;
3202
3203 my $username = $self->{'username'};
3204 my $collect = $self->{'collect'};
3205 my $gsdl_cgi = $self->{'gsdl_cgi'};
3206 my $infodbtype = $self->{'infodbtype'};
3207
3208 # Obtain the collect dir
3209 my $site = $self->{'site'};
3210 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3211
3212 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
3213
3214 # look up additional args
3215 my $docid = $self->{'d'};
3216
3217
3218 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3219 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3220
3221 # This now stores the full pathname [is this still true??]
3222 my $doc_file = $doc_rec->{'doc-file'}->[0];
3223
3224 my ($unused_doc_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
3225 my $doc_full_dirname = &util::filename_cat($archive_dir,$doc_dirname);
3226
3227 my $fldv_full_dirname = &util::filename_cat($doc_full_dirname,$FLDV_HISTORY_DIR);
3228
3229 my $had_error = 0;
3230 my $had_error_mess = undef;
3231
3232 if (-d $fldv_full_dirname) {
3233 my $fldv_filtered_dirs = &FileUtils::readDirectoryFiltered($fldv_full_dirname,undef,"^nminus-\\d+\$");
3234
3235 my @sorted_fldv_filtered_dirs = sort {
3236 my ($a_num) = ($a =~ m/(\d+)$/);
3237 my ($b_num) = ($b =~ m/(\d+)$/);
3238
3239 # sort into descending order
3240 return $b_num <=> $a_num;
3241 } @$fldv_filtered_dirs;
3242
3243 ## shuffle all the nminus-<n> folders down by one
3244
3245 foreach my $nminus_n (@sorted_fldv_filtered_dirs) {
3246
3247 my $nminus_n_full_dir = &FileUtils::filenameConcatenate($fldv_full_dirname,$nminus_n);
3248 if (-d $nminus_n_full_dir) {
3249
3250 my ($n) = ($nminus_n =~ m/(\d+)$/);
3251 my $new_n = $n + 1;
3252
3253 my $new_full_dir_plus1 = &FileUtils::filenameConcatenate($fldv_full_dirname,"nminus-$new_n");
3254
3255 my $move_ok = &FileUtils::renameDirectory($nminus_n_full_dir,$new_full_dir_plus1,
3256 { 'strict' => 1 } );
3257
3258 if (!$move_ok) {
3259 $had_error_mess = "Error: Failed to move '$nminus_n' to nminus-$new_n in $fldv_full_dirname\n";
3260 $had_error = 1;
3261 last;
3262 }
3263
3264 }
3265 else {
3266 print STDERR "Warning: skipping $nminus_n_full_dir as it is not a directory\n";
3267 }
3268 }
3269 }
3270 else {
3271 # first time asked to perform the inc-nminus-1 action for this doc,
3272 # and there is not yet even a _fldv directory!
3273 # => make the directory (and that's all that's needed at this stage)
3274 print STDERR "First time file-level document-version history has been applied to this document\n";
3275 print STDERR "Creating $fldv_full_dirname\n";
3276
3277 &FileUtils::makeDirectory($fldv_full_dirname);
3278 }
3279
3280 if (!$had_error) {
3281 ### Now need to copy everything top level in doc dir into nminus-1
3282
3283 my $nminus_1_full_dir = &FileUtils::filenameConcatenate($fldv_full_dirname,"nminus-1");
3284
3285 my $copy_ok = &FileUtils::copyFilesRefRecursive([$doc_full_dirname],$nminus_1_full_dir,
3286 { 'strict' => 1, 'exclude_filter_re' => "^$FLDV_HISTORY_DIR\$" } );
3287
3288 if (!$copy_ok) {
3289 $had_error_mess = "Error: Failed to clone document $docid to form 'nminus-1' directory in $fldv_full_dirname";
3290 $had_error = 1;
3291 }
3292 }
3293
3294 if ($had_error) {
3295 $gsdl_cgi->generate_error($had_error_mess);
3296 }
3297 else {
3298 $gsdl_cgi->generate_ok_message("Successfully incremented the File-Level Document-Version History for $docid");
3299 }
3300}
3301
3302
3303############################# REMOVE METADATA ARRAY METHODS ############################
3304
3305
3306sub remove_metadata_array
3307{
3308 my $self = shift @_;
3309
3310 my $where = $self->{'where'};
3311 if(!$where || ($where =~ m/^\s*$/)) {
3312 $self->remove_index_metadata_array(@_); # mimic set_metadata_array for default behaviour
3313 return;
3314 }
3315
3316 my $username = $self->{'username'};
3317 my $collect = $self->{'collect'};
3318 my $gsdl_cgi = $self->{'gsdl_cgi'};
3319
3320 if ($baseaction::authentication_enabled) {
3321 # Ensure the user is allowed to edit this collection
3322 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3323 }
3324
3325 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
3326 # But copied code from set_metadata_array
3327 my $site = $self->{'site'};
3328 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3329 $gsdl_cgi->checked_chdir($collect_dir);
3330
3331 # Make sure the collection isn't locked by someone else
3332 $self->lock_collection($username, $collect);
3333
3334 if($where =~ m/import/) {
3335 $self->_remove_import_metadata_array(@_);
3336 }
3337 if($where =~ m/archives/) {
3338 $self->_remove_archives_metadata_array(@_);
3339 }
3340 if($where =~ m/index/) {
3341 $self->_remove_index_metadata_array(@_);
3342 }
3343 if($where =~ m/live/) {
3344 $self->_remove_live_metadata_array(@_);
3345 }
3346
3347 # Release the lock once it is done
3348 $self->unlock_collection($username, $collect);
3349}
3350
3351sub _remove_index_metadata_array
3352{
3353 my $self = shift @_;
3354
3355 my $collect = $self->{'collect'};
3356 my $gsdl_cgi = $self->{'gsdl_cgi'};
3357
3358 # Obtain the collect dir
3359 my $site = $self->{'site'};
3360 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3361
3362 # look up additional args
3363
3364 my $infodbtype = $self->{'infodbtype'};
3365
3366 my $json_str = $self->{'json'};
3367 my $doc_array = decode_json $json_str;
3368
3369
3370 my $global_status = 0;
3371 my $global_mess = "";
3372
3373 my @all_docids = ();
3374
3375 foreach my $doc_array_rec ( @$doc_array ) {
3376
3377 my $status = -1;
3378 my $docid = $doc_array_rec->{'docid'};
3379
3380 push(@all_docids,$docid);
3381
3382 my $metaname = $doc_array_rec->{'metaname'};
3383 if(defined $metaname) {
3384 # metapos can be undef and if metaval is also undef, metapos=0 will be used
3385 my $metapos = $doc_array_rec->{'metapos'};
3386 my $metavalue = $doc_array_rec->{'metavalue'};
3387 # remove-import-metadata and remove-archives-metadata recognise metamode option
3388 # but not remove-index-metadata (nor remove-live-metadata)
3389 #my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
3390
3391 $status = $self->remove_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
3392 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
3393 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
3394
3395 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
3396 $metaname = $metatable_rec->{'metaname'};
3397 #my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'}; # metamode not used by remove_index_metadata
3398 my $metapos = $metatable_rec->{'metapos'} || $doc_array_rec->{'metapos'} || $self->{'metapos'} || undef; # try most-to-least specific metapos setting, and fallback on undef
3399 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
3400
3401 foreach my $metavalue ( @$metavals ) { # metavals is an array
3402 $status = $self->remove_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue); # how do we use metamode in set_meta_entry?
3403
3404 }
3405 }
3406 }
3407
3408 if ($status != 0) {
3409 # Catch error if set infodb entry failed
3410 $global_status = $status;
3411 $global_mess .= "Failed to remove metadata key: $docid\n";
3412 $global_mess .= "Exit status: $status\n";
3413 $global_mess .= "System Error Message: $!\n";
3414 $global_mess .= "-" x 20;
3415 }
3416 }
3417
3418 if ($global_status != 0) {
3419 $global_mess .= "PATH: $ENV{'PATH'}\n";
3420 $gsdl_cgi->generate_error($global_mess);
3421 }
3422 else {
3423 my $mess = "remove-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
3424 $gsdl_cgi->generate_ok_message($mess);
3425 }
3426}
3427
3428sub remove_index_metadata_array
3429{
3430 my $self = shift @_;
3431
3432 my $username = $self->{'username'};
3433 my $collect = $self->{'collect'};
3434 my $gsdl_cgi = $self->{'gsdl_cgi'};
3435
3436 if ($baseaction::authentication_enabled) {
3437 # Ensure the user is allowed to edit this collection
3438 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3439 }
3440
3441 # Obtain the collect dir
3442 my $site = $self->{'site'};
3443 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3444
3445 # Skip this step as lock_collection does a chdir into final destination directory anyway?
3446 $gsdl_cgi->checked_chdir($collect_dir);
3447
3448
3449 # Make sure the collection isn't locked by someone else
3450 $self->lock_collection($username, $collect);
3451
3452 $self->_remove_index_metadata_array(@_);
3453
3454 # Release the lock once it is done
3455 $self->unlock_collection($username, $collect);
3456}
3457
3458# Untested, based on also untested experimental _set_live_metadata_array
3459# TODO: How can we delete at a dbkey with a particular value or at a particular metapos?
3460# Same question for sub remove_live_metadata
3461sub _remove_live_metadata_array
3462{
3463 my $self = shift @_;
3464
3465 my $collect = $self->{'collect'};
3466 my $gsdl_cgi = $self->{'gsdl_cgi'};
3467
3468 # Obtain the collect dir
3469 my $site = $self->{'site'};
3470 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3471
3472 # look up additional args
3473 my $infodbtype = $self->{'infodbtype'};
3474 # To people who know $collect_tail please add some comments
3475 # Obtain path to the database
3476 my $collect_tail = $collect;
3477 $collect_tail =~ s/^.*[\/|\\]//;
3478 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3479 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
3480
3481
3482 my $json_str = $self->{'json'};
3483 my $doc_array = decode_json $json_str;
3484
3485
3486 my $global_status = 0;
3487 my $global_mess = "";
3488
3489 my @all_docids = ();
3490
3491
3492 foreach my $doc_array_rec ( @$doc_array ) {
3493
3494 my $status = -1;
3495 my $docid = $doc_array_rec->{'docid'};
3496
3497 push(@all_docids,$docid);
3498
3499 my $dbkey = "";
3500 my $metaname = $doc_array_rec->{'metaname'};
3501 if(defined $metaname) {
3502 $dbkey = "$docid.$metaname";
3503
3504 # Remove the key
3505 #my $metavalue = $doc_array_rec->{'metavalue'};
3506 #my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
3507 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
3508 $status = system($cmd);
3509
3510 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
3511 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
3512 foreach my $metatable_rec ( @$metatable ) {
3513 $metaname = $metatable_rec->{'metaname'};
3514 $dbkey = "$docid.$metaname";
3515
3516 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
3517 $status = system($cmd);
3518 #my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
3519 #foreach my $metavalue ( @$metavals ) {
3520 #my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
3521 #$status = system($cmd);
3522 #}
3523 }
3524
3525 }
3526
3527 if ($status != 0) {
3528 # Catch error if gdbmdel failed
3529 $global_status = $status;
3530 $global_mess .= "Failed to delete metadata key: $dbkey\n"; # $dbkey
3531 $global_mess .= "Exit status: $status\n";
3532 $global_mess .= "System Error Message: $!\n";
3533 $global_mess .= "-" x 20;
3534 }
3535 }
3536
3537 if ($global_status != 0) {
3538 $global_mess .= "PATH: $ENV{'PATH'}\n";
3539 $gsdl_cgi->generate_error($global_mess);
3540 }
3541 else {
3542 my $mess = "remove-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
3543 $gsdl_cgi->generate_ok_message($mess);
3544 }
3545}
3546
3547sub remove_live_metadata_array
3548{
3549 my $self = shift @_;
3550
3551 my $username = $self->{'username'};
3552 my $collect = $self->{'collect'};
3553 my $gsdl_cgi = $self->{'gsdl_cgi'};
3554
3555 if ($baseaction::authentication_enabled) {
3556 # Ensure the user is allowed to edit this collection
3557 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3558 }
3559
3560 # Obtain the collect dir
3561 my $site = $self->{'site'};
3562 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3563
3564 $gsdl_cgi->checked_chdir($collect_dir);
3565
3566 # Make sure the collection isn't locked by someone else
3567 $self->lock_collection($username, $collect);
3568
3569 $self->_remove_live_metadata_array(@_);
3570
3571 # Release the lock once it is done
3572 $self->unlock_collection($username, $collect);
3573}
3574
3575
3576sub remove_import_metadata_array
3577{
3578 my $self = shift @_;
3579
3580 my $username = $self->{'username'};
3581 my $collect = $self->{'collect'};
3582 my $gsdl_cgi = $self->{'gsdl_cgi'};
3583
3584 if ($baseaction::authentication_enabled) {
3585 # Ensure the user is allowed to edit this collection
3586 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3587 }
3588
3589 # Obtain the collect dir
3590 my $site = $self->{'site'};
3591 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3592
3593 $gsdl_cgi->checked_chdir($collect_dir);
3594
3595 # Make sure the collection isn't locked by someone else
3596 $self->lock_collection($username, $collect);
3597
3598 $self->_remove_import_metadata_array(@_);
3599
3600 # Release the lock once it is done
3601 $self->unlock_collection($username, $collect);
3602
3603}
3604
3605
3606sub _remove_import_metadata_array
3607{
3608 my $self = shift @_;
3609
3610 my $collect = $self->{'collect'};
3611 my $gsdl_cgi = $self->{'gsdl_cgi'};
3612
3613 # Obtain the collect dir
3614 my $site = $self->{'site'};
3615 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3616
3617 # look up additional args
3618
3619 my $infodbtype = $self->{'infodbtype'};
3620
3621 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3622 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3623
3624 my $json_str = $self->{'json'};
3625 my $doc_array = decode_json $json_str;
3626
3627 my $global_status = 0;
3628 my $global_mess = "";
3629
3630 my @all_docids = ();
3631
3632 foreach my $doc_array_rec ( @$doc_array )
3633 {
3634 my $status = -1;
3635 my $docid = $doc_array_rec->{'docid'};
3636
3637 my $metadata_xml_filename = undef;
3638
3639 my ($docid_root,$docid_secnum);
3640 if(defined $docid) {
3641 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
3642 # as yet no support for setting or removing subsection metadata in metadata.xml
3643 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
3644 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for deleting import metadata at subsections' level.\n");
3645 next; # skip this docid in for loop
3646 }
3647 }
3648
3649 push(@all_docids,$docid); # docid_root rather
3650
3651 my $metaname = $doc_array_rec->{'metaname'};
3652 if (defined $metaname) {
3653 my $metapos = $doc_array_rec->{'metapos'} || $self->{'metapos'};
3654 my $metavalue = $doc_array_rec->{'metavalue'}; #|| $self->{'metavalue'};
3655 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'} || undef; # see set_import_meta_array for fallback
3656
3657 $metadata_xml_filename = $self->remove_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode); # at this point, docid_root = docid
3658
3659 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
3660 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
3661
3662 foreach my $metatable_rec ( @$metatable ) {
3663 $metaname = $metatable_rec->{'metaname'};
3664 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'} || undef;
3665
3666 my $metapositions = $metatable_rec->{'metapositions'}; # a sub-subarray
3667 if(defined $metapositions) {
3668 foreach my $metapos ( @$metapositions ) {
3669 my $metavalue = undef;
3670
3671 $metadata_xml_filename = $self->remove_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode); # at this point, docid_root = docid
3672
3673 #if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
3674 # need to accumulate subsequent metavals for this metaname, else the just-assigned
3675 # metavalue for this metaname will be lost
3676 #$metamode = "accumulate";
3677 #}
3678 }
3679 }
3680 else {
3681 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
3682 next unless defined $metavals;
3683 foreach my $metavalue ( @$metavals ) {
3684 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
3685 my $metapos = undef;
3686
3687 $self->remove_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode); # at this point, docid_root = docid
3688
3689 #if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
3690 # need to accumulate subsequent metavals for this metaname, else the just-assigned
3691 # metavalue for this metaname will be lost
3692 #$metamode = "accumulate";
3693 #}
3694 }
3695 }
3696 }
3697 }
3698
3699 # errors would have caused a call to die, so can't report those
3700 if (defined $metadata_xml_filename) {
3701
3702 $global_mess .= "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
3703 }
3704 }
3705
3706
3707 # always a success message, as any failure would have caused a call to die within remove_import_metadata_entry
3708 my $mess = "remove-import-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
3709 $mess .= $global_mess . "\n";
3710 $gsdl_cgi->generate_ok_message($mess);
3711}
3712
3713
3714sub _remove_archives_metadata_array
3715{
3716 my $self = shift @_;
3717
3718 my $collect = $self->{'collect'};
3719 my $gsdl_cgi = $self->{'gsdl_cgi'};
3720
3721 # Obtain the collect dir
3722 my $site = $self->{'site'};
3723 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3724
3725 # look up additional args
3726
3727 my $infodbtype = $self->{'infodbtype'};
3728
3729 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3730
3731 my $json_str = $self->{'json'};
3732 my $doc_array = decode_json $json_str;
3733
3734
3735 my $global_status = 0;
3736 my $global_mess = "";
3737
3738 my @all_docids = ();
3739
3740 foreach my $doc_array_rec ( @$doc_array ) {
3741 my $status = -1;
3742 my $full_docid = $doc_array_rec->{'docid'};
3743 my ($docid, $docid_secnum) = ($full_docid =~ m/^(.*?)(\..*)?$/);
3744 # usercomments meta are at root and so docid_secnum will be undef which is acceptable
3745 my $dv = $doc_array_rec->{'dv'};
3746
3747 push(@all_docids,$full_docid);
3748
3749 my $metaname = $doc_array_rec->{'metaname'};
3750 if(defined $metaname) {
3751
3752 my $metapos = $doc_array_rec->{'metapos'};
3753 my $metavalue = $doc_array_rec->{'metavalue'};
3754 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
3755
3756 $status = $self->remove_archives_metadata_entry($gsdl_cgi,$archive_dir,$infodbtype,
3757 $docid, $docid_secnum, $dv, $metaname,$metapos,$metavalue,$metamode);
3758
3759 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
3760 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
3761
3762 foreach my $metatable_rec ( @$metatable ) {
3763 $metaname = $metatable_rec->{'metaname'};
3764 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
3765 my $metapositions = $metatable_rec->{'metapositions'}; # a sub-subarray
3766 if(defined $metapositions) {
3767 foreach my $metapos ( @$metapositions ) {
3768 my $metavalue = undef;
3769
3770 $self->remove_archives_metadata_entry($gsdl_cgi, $archive_dir, $infodbtype,
3771 $docid,$docid_secnum,$dv, $metaname,$metapos,$metavalue,$metamode);
3772
3773 #if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
3774 # need to accumulate subsequent metavals for this metaname, else the just-assigned
3775 # metavalue for this metaname will be lost
3776 #$metamode = "accumulate";
3777 #}
3778 }
3779 }
3780 else {
3781 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
3782 next unless defined $metavals;
3783 foreach my $metavalue ( @$metavals ) {
3784 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
3785 my $metapos = undef;
3786
3787 $status = $self->remove_archives_metadata_entry($gsdl_cgi,$archive_dir,$infodbtype,
3788 $docid,$dv, $metaname,$metapos,$metavalue,$metamode);
3789 #if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
3790 # need to accumulate subsequent metavals for this metaname, else the just-assigned
3791 # metavalue for this metaname will be lost
3792 #$metamode = "accumulate";
3793 #}
3794 }
3795 }
3796 }
3797 }
3798
3799 if ($status != 0) {
3800 # Catch error if set infodb entry failed
3801 $global_status = $status;
3802 $global_mess .= "Failed to remove archives metadata key: $full_docid\n";
3803 $global_mess .= "Exit status: $status\n";
3804 $global_mess .= "System Error Message: $!\n";
3805 $global_mess .= "-" x 20 . "\n";
3806 }
3807 }
3808
3809 if ($global_status != 0) {
3810 $global_mess .= "PATH: $ENV{'PATH'}\n";
3811 $gsdl_cgi->generate_error($global_mess);
3812 }
3813 else {
3814 my $mess = "remove-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
3815 $gsdl_cgi->generate_ok_message($mess);
3816 }
3817}
3818
3819sub remove_archives_metadata_array
3820{
3821 my $self = shift @_;
3822
3823 my $username = $self->{'username'};
3824 my $collect = $self->{'collect'};
3825 my $gsdl_cgi = $self->{'gsdl_cgi'};
3826
3827 if ($baseaction::authentication_enabled) {
3828 # Ensure the user is allowed to edit this collection
3829 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3830 }
3831
3832 # Obtain the collect dir
3833 my $site = $self->{'site'};
3834 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3835
3836 $gsdl_cgi->checked_chdir($collect_dir);
3837
3838 # Make sure the collection isn't locked by someone else
3839 $self->lock_collection($username, $collect);
3840
3841 $self->_remove_archives_metadata_array(@_);
3842
3843 # Release the lock once it is done
3844 $self->unlock_collection($username, $collect);
3845}
3846
3847
3848##################################### END REMOVE ARRAY METHODS #################
3849
3850# not returning 1; here since this file is conditionally included by metadataction.pm
3851# and not otherwise meant to be used on its own
3852
Note: See TracBrowser for help on using the repository browser.