source: main/trunk/greenstone2/perllib/cgiactions/modmetadataaction.pm@ 37734

Last change on this file since 37734 was 37734, checked in by davidb, 12 months ago

Fix for when the first ever file-level doc version history action is applied to a document

File size: 113.1 KB
Line 
1##########################################################################
2#
3# modmetadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This is a submodule of metadataaction.pm. It is loaded conditionally
27# upon having the permissions to modify (not just get) metadata.
28# With sufficient permissions, these additional subroutines should be made
29# available to metadataaction.pm
30
31# See http://www.perlmonks.org/?node_id=881761 for splitting module into multiple files
32# and how variables declared with 'our' are used there.
33
34package metadataaction;
35
36use strict;
37
38use cgiactions::baseaction;
39use cgiactions::metadataactioncommon;
40
41use dbutil;
42use ghtml;
43
44use JSON;
45
46# convenient to have shorter version around
47my $FLDV_HISTORY_DIR = $metadataactioncommon::FLDV_HISTORY_DIR;
48
49$metadataaction::modmeta_action_table = #OR: our $modmeta_action_table =
50{
51 #SET METHODS
52 "set-live-metadata" => {
53 'compulsory-args' => [ "d", "metaname", "metavalue" ],
54 'optional-args' => [ ] },
55
56 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
57 'compulsory-args' => [ "metaname", "metavalue" ],
58 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "dv", "f" ] },
59
60 "set-index-metadata" => {
61 'compulsory-args' => [ "d", "metaname", "metavalue" ],
62 'optional-args' => [ "metapos", "metamode" ] },
63
64 "set-archives-metadata" => {
65 'compulsory-args' => [ "d", "metaname", "metavalue" ],
66 'optional-args' => [ "dv", "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
67
68 ###### !!!!!
69 # Should there not be a "set-archives-text" also ???!!!!??
70 ###### !!!!!
71
72 "set-archives-assocfile" => {
73 'compulsory-args' => [ "d", "assocname" ],
74 'optional-args' => [ "dv", "fileupload", "filedata" ] ,
75 'help-string' => [ "Because this action uploads a file, this action needs to be POSTed. The 'fileupload' parameter/field is the uploaded file content; 'assocname' specifies the filename within the archives's document's area that it will be saved as" ]
76 },
77
78 "set-import-metadata" => {
79 'compulsory-args' => [ "metaname", "metavalue" ],
80 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
81
82 #SET METHODS (ARRAY)
83 "set-metadata-array" => {
84 'compulsory-args' => [ "where", "json" ],
85 'optional-args' => [ ],
86 'help-string' => [
87 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
88
89 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
90
91# The same examples rewritten for when running the metadata-server.pl script from the commandline:
92
93# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
94
95# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
96
97 "set-archives-metadata-array" => {
98 'compulsory-args' => [ "json" ],
99 'optional-args' => [ ] },
100
101 "set-import-metadata-array" => {
102 'compulsory-args' => [ "json" ],
103 'optional-args' => [ ] },
104
105 "set-index-metadata-array" => {
106 'compulsory-args' => [ "json" ],
107 'optional-args' => [ ] },
108
109 "set-live-metadata-array" => {
110 'compulsory-args' => [ "json" ],
111 'optional-args' => [ ] },
112
113 #REMOVE METHODS
114 "remove-import-metadata" => {
115 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
116 'optional-args' => [ "metapos", "metavalue", "metamode" ] }, # only provide metapos arg for SIMPLE collections.
117# Metavalue is now an optional arg for remove_import_metadata() based on what the implementation did, which allowed metavalue to be undefined, and if so, used metapos.
118
119 "remove-archives-metadata" => {
120 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
121 'optional-args' => [ "dv", "metapos", "metavalue", "metamode" ] },
122
123 "remove-live-metadata" => {
124 'compulsory-args' => [ "d", "metaname" ],
125 'optional-args' => [ ] },
126
127 "remove-index-metadata" => {
128 'compulsory-args' => [ "d", "metaname" ],
129 'optional-args' => [ "metapos", "metavalue" ] },
130
131 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
132 'compulsory-args' => [ "d", "metaname" ],
133 'optional-args' => [ "where", "dv", "metapos", "metavalue", "metamode" ] }, # metamode is optional since remove-metadata can call any of remove_import_meta and remove_archives_meta, remove_index_meta, of which the first two accept metamode as an optional param
134
135
136 #REMOVE MULTIPLE (REMOVE ALL MATCHING METANAME AT DOCID)
137 "erase-import-metadata" => {
138 'compulsory-args' => [ "d", "metaname" ] },
139
140 "erase-archives-metadata" => {
141 'compulsory-args' => [ "d", "metaname" ],
142 'optional-args' => [ "dv" ] },
143
144 "erase-index-metadata" => {
145 'compulsory-args' => [ "d", "metaname" ] },
146
147 "erase-live-metadata" => {
148 'compulsory-args' => [ "d", "metaname" ] },
149
150 "erase-metadata" => { # generic erase-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
151 'compulsory-args' => [ "d", "metaname" ],
152 'optional-args' => [ "where" ] },
153
154
155 #INSERT METHODS
156 "insert-metadata" => {
157 'compulsory-args' => [ "d", "metaname", "metavalue" ],
158 'optional-args' => [ ] },
159
160
161 #INC METHODS
162 "inc-fldv-nminus1" => {
163 'compulsory-args' => [ "d" ],
164 'optional-args' => [ ] }
165};
166
167sub _set_live_metadata
168{
169 my $self = shift @_;
170
171 my $collect = $self->{'collect'};
172 my $gsdl_cgi = $self->{'gsdl_cgi'};
173 my $infodbtype = $self->{'infodbtype'};
174
175 # Obtain the collect dir
176 my $site = $self->{'site'};
177 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
178
179
180 # look up additional args
181 my $docid = $self->{'d'};
182 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
183 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
184 }
185 my $metavalue = $self->{'metavalue'};
186
187 # Generate the dbkey
188 my $metaname = $self->{'metaname'};
189 my $dbkey = "$docid.$metaname";
190
191 # To people who know $collect_tail please add some comments
192 # Obtain path to the database
193 my $collect_tail = $collect;
194 $collect_tail =~ s/^.*[\/|\\]//;
195 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
196 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
197
198 # Set the new value
199 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
200 my $status = system($cmd);
201 if ($status != 0) {
202 # Catch error if gdbmget failed
203 my $mess = "Failed to set metadata key: $dbkey\n";
204
205 $mess .= "PATH: $ENV{'PATH'}\n";
206 $mess .= "cmd = $cmd\n";
207 $mess .= "Exit status: $status\n";
208 $mess .= "System Error Message: $!\n";
209
210 $gsdl_cgi->generate_error($mess);
211 }
212 else {
213 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
214 }
215
216 #return $status; # in case calling functions have any further use for this
217}
218
219sub set_live_metadata
220{
221 my $self = shift @_;
222
223 my $username = $self->{'username'};
224 my $collect = $self->{'collect'};
225 my $gsdl_cgi = $self->{'gsdl_cgi'};
226
227 if ($baseaction::authentication_enabled) {
228 # Ensure the user is allowed to edit this collection
229 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
230 }
231
232 # Make sure the collection isn't locked by someone else
233 $self->lock_collection($username, $collect);
234
235 $self->_set_live_metadata(@_);
236
237 # Release the lock once it is done
238 $self->unlock_collection($username, $collect);
239}
240
241sub set_index_metadata_entry
242{
243 my $self = shift @_;
244 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
245
246 # To people who know $collect_tail please add some comments
247 # Obtain path to the database
248 my $collect_tail = $collect;
249 $collect_tail =~ s/^.*[\/|\\]//;
250 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
251 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
252
253# print STDERR "**** infodb file path = $infodb_file_path\n";
254# print STDERR "***** infodb type = $infodbtype\n";
255
256 # Read the docid entry
257 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
258
259 # Set the metadata value
260 if (defined $metapos) {
261 # if metamode=accumulate AND metapos, warn user and then use metapos
262 if (defined $metamode && $metamode eq "accumulate") {
263 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
264 print STDERR "**** Proceeding by using metapos\n";
265 }
266 $doc_rec->{$metaname}->[$metapos] = $metavalue;
267 }
268 elsif (defined $prevmetavalue) {
269 my $array = $doc_rec->{$metaname};
270 my $length = @$array;
271
272 my $found = 0;
273 for (my $i = 0; $i < $length; $i++){
274 if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
275 $doc_rec->{$metaname}->[$i] = $metavalue;
276 $found = 1;
277 last;
278 }
279 }
280
281 if($found == 0){
282 $doc_rec->{$metaname} = [ $metavalue ];
283 }
284 }
285 elsif (defined $metamode && $metamode eq "override") {
286 $doc_rec->{$metaname} = [ $metavalue ];
287 }
288 else { # default for index was to override, but because accumulate is less destructive,
289 # and because accumulate is the default for archives and import, that's the new default for index too
290 if(defined $doc_rec->{$metaname}) {
291 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
292 } else {
293 $doc_rec->{$metaname} = [ $metavalue ];
294 }
295 }
296
297 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
298
299 return $status;
300
301}
302
303sub _set_import_metadata
304{
305 my $self = shift @_;
306
307 my $collect = $self->{'collect'};
308 my $gsdl_cgi = $self->{'gsdl_cgi'};
309 my $infodbtype = $self->{'infodbtype'};
310
311 # Obtain the collect and archive dir
312 my $site = $self->{'site'};
313 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
314 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
315
316 # look up additional args
317 # want either d= or f=
318 my $docid = $self->{'d'};
319 my ($docid_root,$docid_secnum);
320 if(defined $docid) {
321 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
322 # as yet no support for setting subsection metadata in metadata.xml
323 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
324 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
325 return;
326 }
327 }
328
329 my $import_file = $self->{'f'};
330 if ((!defined $docid || $docid =~ m/^\s*$/) && (!defined $import_file || $import_file =~ m/^\s*$/)) {
331 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified."); # at least d or f must be specified
332 }
333
334 # Get the parameters and set default mode to "accumulate"
335 my $metaname = $self->{'metaname'};
336 my $metavalue = $self->{'metavalue'};
337## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
338 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
339
340 my $metamode = $self->{'metamode'};
341 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
342 # make "accumulate" the default (less destructive, as it won't actually
343 # delete any existing values)
344 $metamode = "accumulate";
345 }
346
347 # adding metapos and prevmetavalue support to import_metadata subroutines
348 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
349 my $prevmetavalue = $self->{'prevmetavalue'};
350 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
351 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
352
353 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
354 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
355
356 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
357 $mess .= " $metaname";
358 $mess .= " = $metavalue";
359 $mess .= " ($metamode)\n";
360
361 $gsdl_cgi->generate_ok_message($mess);
362
363 #return $status; # in case calling functions have any further use for this
364}
365
366# the version of set_archives_meta that doesn't do authentication
367sub _set_archives_metadata
368{
369 my $self = shift @_;
370
371 my $collect = $self->{'collect'};
372 my $gsdl_cgi = $self->{'gsdl_cgi'};
373 my $infodbtype = $self->{'infodbtype'};
374
375 # Obtain the collect and archive dir
376 my $site = $self->{'site'};
377 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
378 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
379
380 # look up additional args
381 my $docid = $self->{'d'};
382 my $dv = $self->{'dv'};
383
384 my $metaname = $self->{'metaname'};
385 my $metavalue = $self->{'metavalue'};
386 my $prevmetavalue = $self->{'prevmetavalue'};
387
388 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
389 # Don't append "|| undef", since if metapos=0 it will then be set to undef
390
391 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
392 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
393
394 my $metamode = $self->{'metamode'};
395 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
396 # make "accumulate" the default (less destructive, as it won't actually
397 # delete any existing values)
398 $metamode = "accumulate";
399 }
400
401 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,
402 $docid,$dv, $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
403
404 if ($status == 0) {
405 my $mess = "set-archives-metadata successful: Key[$docid]\n";
406 $mess .= " $metaname";
407 $mess .= "->[$metapos]" if (defined $metapos);
408 $mess .= " = $metavalue";
409 $mess .= " ($metamode)\n";
410
411 $gsdl_cgi->generate_ok_message($mess);
412 }
413 else {
414 my $mess .= "Failed to set archives metadata key: $docid\n";
415 $mess .= "Exit status: $status\n";
416 if(defined $self->{'error_msg'}) {
417 $mess .= "Error Message: $self->{'error_msg'}\n";
418 } else {
419 $mess .= "System Error Message: $!\n";
420 }
421 $mess .= "-" x 20 . "\n";
422
423 $gsdl_cgi->generate_error($mess);
424 }
425
426 #return $status; # in case calling functions have any further use for this
427}
428
429
430# the version of set_index_meta that doesn't do authentication
431sub _set_index_metadata
432{
433 print STDERR "START SET INDEX METADATA\n";
434 my $self = shift @_;
435
436 my $collect = $self->{'collect'};
437 my $gsdl_cgi = $self->{'gsdl_cgi'};
438
439 # Obtain the collect dir
440 my $site = $self->{'site'};
441 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
442
443 # look up additional args
444 my $docid = $self->{'d'};
445 my $metaname = $self->{'metaname'};
446 my $metapos = $self->{'metapos'}; # undef has meaning
447 my $metavalue = $self->{'metavalue'};
448 my $infodbtype = $self->{'infodbtype'};
449 my $metamode = $self->{'metamode'};
450 my $prevmetavalue = $self->{'prevmetavalue'};
451
452 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
453 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
454
455 print STDERR "SETTING INDEX METADATA ENTRY\n";
456 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
457 print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
458 if ($status != 0) {
459 # Catch error if set infodb entry failed
460 my $mess = "Failed to set metadata key: $docid\n";
461
462 $mess .= "PATH: $ENV{'PATH'}\n";
463 $mess .= "Exit status: $status\n";
464 $mess .= "System Error Message: $!\n";
465
466 $gsdl_cgi->generate_error($mess);
467 }
468 else {
469 my $mess = "set-index-metadata successful: Key[$docid]\n";
470 $mess .= " $metaname";
471 $mess .= "->[$metapos]" if (defined $metapos);
472 $mess .= " = $metavalue\n";
473
474 $gsdl_cgi->generate_ok_message($mess);
475 }
476
477 print STDERR "END SET INDEX METADATA\n";
478 #return $status; # in case calling functions have any further use for this
479}
480
481sub set_index_metadata
482{
483 my $self = shift @_;
484
485 my $username = $self->{'username'};
486 my $collect = $self->{'collect'};
487 my $gsdl_cgi = $self->{'gsdl_cgi'};
488
489 if ($baseaction::authentication_enabled) {
490 # Ensure the user is allowed to edit this collection
491 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
492 }
493
494 # Obtain the collect dir
495 my $site = $self->{'site'};
496 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
497
498 $gsdl_cgi->checked_chdir($collect_dir);
499
500 # Make sure the collection isn't locked by someone else
501 $self->lock_collection($username, $collect);
502
503 $self->_set_index_metadata(@_);
504
505 # Release the lock once it is done
506 $self->unlock_collection($username, $collect);
507}
508
509# call this to set the metadata for a combination of dirs archives, import or index, or live
510# if none specified, defaults to index which was the original behaviour of set_metadata.
511sub set_metadata
512{
513 my $self = shift @_;
514
515 # Testing that not defining a variable, setting it to "" or to " " all return false
516 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
517
518 my $where = $self->{'where'};
519 if(!$where || ($where =~ m/^\s*$/)) {
520 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
521 return;
522 }
523
524 # authenticate and lock collection once, even if processing multiple dirs
525 my $username = $self->{'username'};
526 my $collect = $self->{'collect'};
527 my $gsdl_cgi = $self->{'gsdl_cgi'};
528
529 if ($baseaction::authentication_enabled) {
530 # Ensure the user is allowed to edit this collection
531 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
532 }
533
534 if($where =~ m/index/) {
535 my $site = $self->{'site'};
536 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
537 $gsdl_cgi->checked_chdir($collect_dir);
538 }
539
540 # Make sure the collection isn't locked by someone else
541 $self->lock_collection($username, $collect);
542
543
544 # now at last can set the metadata. $where can specify multiple
545 # $where is of the form: import|archives|index, or a subset thereof
546
547 #my @whichdirs = split('\|', $where);
548
549 # just check whether $where contains import/archives/index/live in turn, and
550 # for each case, process it accordingly
551 if($where =~ m/import/) {
552 $self->_set_import_metadata(@_);
553 }
554
555 if($where =~ m/archives/) {
556
557 # look up docID arg which is optional to set_metadata because it's optional
558 # to set_import, but which is compulsory to set_archives_metadata
559 my $docid = $self->{'d'};
560 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
561 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
562 }
563 # we have a docid, so can set archives meta
564 $self->_set_archives_metadata(@_);
565 }
566
567 if($where =~ m/index/) {
568
569 # look up docID arg which is optional to set_metadata because it's optional
570 # to set_import, but which is compulsory to set_archives_metadata
571 my $docid = $self->{'d'};
572 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
573 $gsdl_cgi->generate_error("No docid (d=...) specified.");
574 }
575 # we have a docid, so can set index meta
576 $self->_set_index_metadata(@_);
577 }
578
579 if($where =~ m/live/) {
580 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
581 }
582
583 # Release the lock once it is done
584 $self->unlock_collection($username, $collect);
585}
586
587sub set_metadata_array
588{
589 my $self = shift @_;
590
591 my $where = $self->{'where'};
592 if(!$where || ($where =~ m/^\s*$/)) {
593 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
594 return;
595 }
596
597 my $username = $self->{'username'};
598 my $collect = $self->{'collect'};
599 my $gsdl_cgi = $self->{'gsdl_cgi'};
600
601 if ($baseaction::authentication_enabled) {
602 # Ensure the user is allowed to edit this collection
603 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
604 }
605
606 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
607 # But including the stmt during this code reorganisation to preserve as-is what used to happen
608 my $site = $self->{'site'};
609 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
610 $gsdl_cgi->checked_chdir($collect_dir);
611
612 # Make sure the collection isn't locked by someone else
613 $self->lock_collection($username, $collect);
614
615 if($where =~ m/import/) {
616 $self->_set_import_metadata_array(@_);
617 }
618 if($where =~ m/archives/) {
619 $self->_set_archives_metadata_array(@_);
620 }
621 if($where =~ m/index/) {
622 $self->_set_index_metadata_array(@_);
623 }
624 if($where =~ m/live/) {
625 $self->_set_live_metadata_array(@_);
626 }
627
628 # Release the lock once it is done
629 $self->unlock_collection($username, $collect);
630}
631
632sub _set_index_metadata_array
633{
634 my $self = shift @_;
635
636 my $collect = $self->{'collect'};
637 my $gsdl_cgi = $self->{'gsdl_cgi'};
638
639 # Obtain the collect dir
640 my $site = $self->{'site'};
641 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
642
643 # look up additional args
644
645 my $infodbtype = $self->{'infodbtype'};
646
647 my $json_str = $self->{'json'};
648 my $doc_array = decode_json $json_str;
649
650
651 my $global_status = 0;
652 my $global_mess = "";
653
654 my @all_docids = ();
655
656 foreach my $doc_array_rec ( @$doc_array ) {
657
658 my $status = -1;
659 my $docid = $doc_array_rec->{'docid'};
660
661 push(@all_docids,$docid);
662
663 my $metaname = $doc_array_rec->{'metaname'};
664 if(defined $metaname) {
665 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
666 my $metavalue = $doc_array_rec->{'metavalue'};
667 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
668
669 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
670 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
671 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
672
673 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
674 $metaname = $metatable_rec->{'metaname'};
675 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
676 my $metapos = undef;
677 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
678
679 foreach my $metavalue ( @$metavals ) { # metavals is an array
680 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
681 if($metamode eq "override") { # now, having overridden the metavalue for the first,
682 # need to accumulate subsequent metavals for this metaname, else the just-assigned
683 # metavalue for this metaname will be lost
684 $metamode = "accumulate";
685 }
686 }
687 }
688 }
689
690 if ($status != 0) {
691 # Catch error if set infodb entry failed
692 $global_status = $status;
693 $global_mess .= "Failed to set metadata key: $docid\n";
694 $global_mess .= "Exit status: $status\n";
695 $global_mess .= "System Error Message: $!\n";
696 $global_mess .= "-" x 20;
697 }
698 }
699
700 if ($global_status != 0) {
701 $global_mess .= "PATH: $ENV{'PATH'}\n";
702 $gsdl_cgi->generate_error($global_mess);
703 }
704 else {
705 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
706 $gsdl_cgi->generate_ok_message($mess);
707 }
708}
709
710sub set_index_metadata_array
711{
712 my $self = shift @_;
713
714 my $username = $self->{'username'};
715 my $collect = $self->{'collect'};
716 my $gsdl_cgi = $self->{'gsdl_cgi'};
717
718 if ($baseaction::authentication_enabled) {
719 # Ensure the user is allowed to edit this collection
720 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
721 }
722
723 # Obtain the collect dir
724 my $site = $self->{'site'};
725 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
726
727 $gsdl_cgi->checked_chdir($collect_dir);
728
729
730 # Make sure the collection isn't locked by someone else
731 $self->lock_collection($username, $collect);
732
733 $self->_set_index_metadata_array(@_);
734
735 # Release the lock once it is done
736 $self->unlock_collection($username, $collect);
737}
738
739# experimental, newly added in and untested
740sub _set_live_metadata_array
741{
742 my $self = shift @_;
743
744 my $collect = $self->{'collect'};
745 my $gsdl_cgi = $self->{'gsdl_cgi'};
746
747 # Obtain the collect dir
748 my $site = $self->{'site'};
749 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
750
751 # look up additional args
752 my $infodbtype = $self->{'infodbtype'};
753 # To people who know $collect_tail please add some comments
754 # Obtain path to the database
755 my $collect_tail = $collect;
756 $collect_tail =~ s/^.*[\/|\\]//;
757 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
758 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
759
760
761 my $json_str = $self->{'json'};
762 my $doc_array = decode_json $json_str;
763
764
765 my $global_status = 0;
766 my $global_mess = "";
767
768 my @all_docids = ();
769
770
771 foreach my $doc_array_rec ( @$doc_array ) {
772
773 my $status = -1;
774 my $docid = $doc_array_rec->{'docid'};
775
776 push(@all_docids,$docid);
777
778 my $metaname = $doc_array_rec->{'metaname'};
779 if(defined $metaname) {
780 my $dbkey = "$docid.$metaname";
781 my $metavalue = $doc_array_rec->{'metavalue'};
782
783 # Set the new value
784 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
785 $status = system($cmd);
786
787 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
788 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
789 foreach my $metatable_rec ( @$metatable ) {
790 $metaname = $metatable_rec->{'metaname'};
791 my $dbkey = "$docid.$metaname";
792
793 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
794 foreach my $metavalue ( @$metavals ) {
795 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
796 $status = system($cmd);
797 }
798 }
799
800 }
801
802 if ($status != 0) {
803 # Catch error if gdbmget failed
804 $global_status = $status;
805 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
806 $global_mess .= "Exit status: $status\n";
807 $global_mess .= "System Error Message: $!\n";
808 $global_mess .= "-" x 20;
809 }
810 }
811
812 if ($global_status != 0) {
813 $global_mess .= "PATH: $ENV{'PATH'}\n";
814 $gsdl_cgi->generate_error($global_mess);
815 }
816 else {
817 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
818 $gsdl_cgi->generate_ok_message($mess);
819 }
820}
821
822sub set_live_metadata_array
823{
824 my $self = shift @_;
825
826 my $username = $self->{'username'};
827 my $collect = $self->{'collect'};
828 my $gsdl_cgi = $self->{'gsdl_cgi'};
829
830 if ($baseaction::authentication_enabled) {
831 # Ensure the user is allowed to edit this collection
832 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
833 }
834
835 # Obtain the collect dir
836 my $site = $self->{'site'};
837 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
838
839 $gsdl_cgi->checked_chdir($collect_dir);
840
841 # Make sure the collection isn't locked by someone else
842 $self->lock_collection($username, $collect);
843
844 $self->_set_live_metadata_array(@_);
845
846 # Release the lock once it is done
847 $self->unlock_collection($username, $collect);
848}
849
850
851sub dxml_metadata
852{
853 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
854 my $metaname = $parser->{'parameters'}->{'metaname'};
855 my $metamode = $parser->{'parameters'}->{'metamode'};
856
857###!!! print STDERR "**** Processing closing </Metadata> tag\n";
858
859 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
860
861 # Find the right metadata tag and checks if we are going to
862 # override it
863 #
864 # Note: This over writes the first metadata block it
865 # encountered. If there are multiple Sections in the doc.xml, it
866 # might not behave as you would expect
867
868 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
869## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n" if (defined $opt_doc_secnum);
870## print STDERR "**** metamode = $metamode\n";
871
872 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
873 {
874 my $name_attr = $attrHash->{'name'};
875 # print STDOUT "*** testing: $name_attr eq $metaname ? and $metamode eq override ?\n";
876 if (($name_attr eq $metaname) && ($metamode eq "override"))
877 {
878 if (!defined $parser->{'parameters'}->{'poscount'})
879 {
880 $parser->{'parameters'}->{'poscount'} = 0;
881 }
882 else
883 {
884 $parser->{'parameters'}->{'poscount'}++;
885 }
886
887 if (defined $parser->{'parameters'}->{'metapos'}) { # if pos defined, ignore any prevmeta if also defined, and just wait for pos match
888
889 if ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}) {
890 ###print STDERR "#### got pos match!!\n";
891 # Get the value and override the current value
892 my $metavalue = $parser->{'parameters'}->{'metavalue'};
893 $attrHash->{'_content'} = $metavalue;
894
895 # until we have clearly determined the role of metamode in doc.xml files (we know what it means in import/metadata.xml files)
896 # we're not setting metamode to override here.
897
898 # Don't want it to wipe out any other pieces of metadata
899 $parser->{'parameters'}->{'metamode'} = "done";
900 }
901 }
902 elsif (defined $parser->{'parameters'}->{'prevmetavalue'}) { # if no pos defined but prevmeta defined, then wait for prevmeta match
903 if ($parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'}) {
904 ###print STDERR "### prev meta value matches\n";
905 my $metavalue = $parser->{'parameters'}->{'metavalue'};
906 $attrHash->{'_content'} = $metavalue;
907
908 # until we have clearly determined the role of metamode in doc.xml files (we know what it means in import/metadata.xml files)
909 # we're not setting metamode to override here.
910
911 $parser->{'parameters'}->{'metamode'} = "done";
912
913 }
914 }
915 # Note that before commit rev 32076, if neither metapos nor prevmeta defined, then code tried to set pos 0 for that metaname to the new value
916 # However that is not how the code works now (and couldn't get it to work that way yet after rev 32076).
917 # What it does now, and what Dr Bainbridge approved of is that:
918 # If no pos or prevmeta defined, then on override, the metavalue set for the specificed metaname becomes the ONLY instance of that metaname
919 # So any earlier instances of that metaname is removed and replaced by the single new instance.
920 }
921 }
922
923 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
924 return [$tagname => $attrHash];
925}
926
927# This method exists purely for catching invalid section numbers that the client
928# requested to edit. Once the parser has reached the end (the final </Archive> tag),
929# we've seen all the Sections in the doc.xml, and none of their section nums matched
930# if the metamode has not been set to 'done' by then.
931sub dxml_archive
932{
933 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
934 my $metamode = $parser->{'parameters'}->{'metamode'};
935
936 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
937 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
938
939 # print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
940
941 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
942 #print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
943
944 my $metaname = $parser->{'parameters'}->{'metaname'};
945 my $metavalue = $parser->{'parameters'}->{'metavalue'};
946
947 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
948 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
949 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
950 print STDERR "\tin section $opt_doc_secnum.\n";
951 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
952 }
953
954 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
955 return [$tagname => $attrHash];
956}
957
958sub dxml_description
959{
960 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
961 my $metamode = $parser->{'parameters'}->{'metamode'};
962
963 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
964 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
965
966###!!! print STDERR "**** Processing a closing </Description> tag \n";
967 # print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
968
969 # Accumulate the metadata
970
971 # We'll be accumulating metadata at this point if we haven't found and therefore
972 # haven't processed the metadata yet.
973 # For subsections, this means that if we're at a matching subsection, but haven't
974 # found the correct metaname to override in that subsection, we accumulate it as new
975 # meta in the subsection by adding it to the current description.
976 # If there's no subsection info for the metadata, it will accumulate at the top level
977 # section description if we hadn't found a matching metaname to override at this point.
978
979 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
980 # at the toplevel section. In the latter case, it means we want to process meta in the
981 # toplevel section. So the eq check between the values below will work in all cases.
982
983 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
984 # this document has been provided. In that case, the metadata for that opt_doc_secnum
985 # won't get attached/accumulated to any part of the doc, not even its top-level section.
986
987 if ($curr_secnum eq $opt_doc_secnum
988 && ($metamode eq "accumulate" || $metamode eq "override")) {
989 # note, metamode would have been set to done if a metadata operation had already succeeded.
990 # If we get here, we still have the metadata to append.
991 if ($metamode eq "override") {
992 print "Got to end of <Description> block. No metadata value to override. Switching 'metamode' to accumulate\n";
993 }
994
995 # If we get to here and metamode is override, this means there
996 # was no existing value to overide => treat as an append operation
997
998 # Tack a new metadata tag on to the end of the <Metadata>+ block
999 my $metaname = $parser->{'parameters'}->{'metaname'};
1000 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1001
1002 my $metadata_attr = {
1003 '_content' => $metavalue,
1004 'name' => $metaname,
1005 'mode' => "accumulate"
1006 };
1007
1008 my $append_metadata = [ "Metadata" => $metadata_attr ];
1009 my $description_content = $attrHash->{'_content'};
1010 print "Appending metadata to doc.xml\n";
1011
1012 if (ref($description_content)) {
1013 # got some existing interesting nested content
1014 push(@$description_content, " ", $append_metadata, "\n ");
1015 }
1016 else {
1017 #description_content is most likely a string such as "\n"
1018 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1019 }
1020
1021 $parser->{'parameters'}->{'metamode'} = "done";
1022 }
1023 else {
1024 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1025## print STDERR "**** NOT ACCUMULATE?!? \n";
1026 }
1027 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1028 return [$tagname => $attrHash];
1029}
1030
1031sub edit_xml_file
1032{
1033 my $self = shift @_;
1034 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1035
1036 # use XML::Rules to add it in (read in and out again)
1037 my $parser = XML::Rules->new(start_rules => $start_rules,
1038 rules => $rules,
1039 style => 'filter',
1040 output_encoding => 'utf8' );
1041
1042 my $xml_in = "";
1043 if (!open(MIN,"<$filename")) {
1044 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1045 }
1046 else {
1047 # Read all the text in
1048 my $line;
1049 while (defined ($line=<MIN>)) {
1050 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1051 }
1052 close(MIN);
1053
1054 my $MOUT;
1055 if (!open($MOUT,">$filename")) {
1056 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1057 }
1058 else {
1059 # Matched lines will get handled by the call backs
1060## my $xml_out = "";
1061
1062 binmode($MOUT,":utf8");
1063 $parser->filter($xml_in,$MOUT, $options);
1064
1065# binmode(MOUT,":utf8");
1066# print MOUT $xml_out;
1067 close($MOUT);
1068 }
1069 }
1070
1071 # copy across any custom error information that was stored during parsing
1072 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1073}
1074
1075sub edit_doc_xml
1076{
1077 my $self = shift @_;
1078 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1079
1080 my $info_mess = <<RAWEND;
1081****************************
1082 edit_doc_xml()
1083****************************
1084RAWEND
1085
1086 $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1087 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1088 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1089 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1090 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1091 $info_mess .= " opt_secnum = $opt_secnum\n" if defined($opt_secnum);
1092 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1093
1094 $info_mess .= "****************************\n";
1095
1096 $gsdl_cgi->generate_message($info_mess);
1097
1098 # To monitor which section/subsection number we are in
1099 my @start_rules =
1100 ( 'Section' => \&dxml_start_section );
1101
1102 # use XML::Rules to add it in (read in and out again)
1103 # Set the call back functions
1104 my @rules =
1105 ( _default => 'raw',
1106 'Metadata' => \&dxml_metadata,
1107 'Description' => \&dxml_description,
1108 'Archive' => \&dxml_archive); # just for catching errors at end
1109
1110 # Sets the parameters
1111 my $options = { 'metaname' => $metaname,
1112 'metapos' => $metapos,
1113 'metavalue' => $metavalue,
1114 'metamode' => $metamode,
1115 'prevmetavalue' => $prevmetavalue };
1116
1117 if (defined $opt_secnum) {
1118 $options->{'secnum'} = $opt_secnum;
1119 }
1120
1121 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1122}
1123
1124sub set_archives_metadata_entry
1125{
1126 my $self = shift @_;
1127 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $dv, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1128
1129 my $info_mess = <<RAWEND;
1130****************************
1131 set_archives_metadata_entry()
1132****************************
1133RAWEND
1134
1135 $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1136 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1137 $info_mess .= " collect = $collect\n" if defined($collect);
1138 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
1139 $info_mess .= " docid = $docid\n" if defined($docid);
1140 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1141 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1142 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1143 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1144 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1145
1146 $info_mess .= "****************************\n";
1147
1148 $gsdl_cgi->generate_message($info_mess);
1149
1150 # Obtain the doc.xml path for the specified docID
1151 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1152
1153 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1154 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1155 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1156
1157 # check if request if for file-level doc-version history 'nminus-<n>' version
1158 if (defined $dv && ($dv ne "")) {
1159 # Need to insert '_fldv_history/nminus-<n>' into doc_filename
1160
1161 my ($doc_xml_tailname, $doc_xml_dirname) = File::Basename::fileparse($doc_xml_file);
1162 $doc_xml_file = &util::filename_cat($doc_xml_dirname,$FLDV_HISTORY_DIR,$dv,$doc_xml_tailname);
1163 }
1164
1165 # The $doc_xml_file is relative to the archives, and now let's get the full path
1166 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1167
1168 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1169
1170 # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
1171 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1172 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1173 if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
1174 print "override mode with no pos or prev value set. Removing all existing values for $metaname, and setting mode to accumulate for the new value.\n";
1175 # remove all values of $metaname metadata
1176 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
1177 $metamode = "accumulate";
1178 }
1179 # Edit the doc.xml file with the specified metadata name, value and position.
1180 # TODO: there is a potential problem here as this edit_doc_xml function
1181 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1182 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1183
1184 # dxml_metadata method ignores metapos if metamode anything other than override
1185 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1186 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1187
1188 # return 0; # return 0 for now to indicate no error
1189 return (defined $self->{'error_msg'}) ? 1 : 0;
1190}
1191
1192
1193sub set_archives_metadata
1194{
1195 my $self = shift @_;
1196
1197 my $username = $self->{'username'};
1198 my $collect = $self->{'collect'};
1199 my $gsdl_cgi = $self->{'gsdl_cgi'};
1200
1201 if ($baseaction::authentication_enabled) {
1202 # Ensure the user is allowed to edit this collection
1203 $self->authenticate_user($username, $collect);
1204 }
1205
1206 # Make sure the collection isn't locked by someone else
1207 $self->lock_collection($username, $collect);
1208
1209 $self->_set_archives_metadata(@_);
1210
1211 # Release the lock once it is done
1212 $self->unlock_collection($username, $collect);
1213}
1214
1215sub _set_archives_metadata_array
1216{
1217 my $self = shift @_;
1218
1219 my $collect = $self->{'collect'};
1220 my $gsdl_cgi = $self->{'gsdl_cgi'};
1221
1222 # Obtain the collect dir
1223 my $site = $self->{'site'};
1224 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1225
1226 # look up additional args
1227
1228 my $infodbtype = $self->{'infodbtype'};
1229
1230 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1231
1232 my $json_str = $self->{'json'};
1233 my $doc_array = decode_json $json_str;
1234
1235
1236 my $global_status = 0;
1237 my $global_mess = "";
1238
1239 my @all_docids = ();
1240
1241 foreach my $doc_array_rec ( @$doc_array ) {
1242 my $status = -1;
1243 my $docid = $doc_array_rec->{'docid'};
1244 my $dv = $doc_array_rec->{'dv'};
1245
1246 push(@all_docids,$docid);
1247
1248 my $metaname = $doc_array_rec->{'metaname'};
1249 if(defined $metaname) {
1250
1251 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1252
1253 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1254 my $metavalue = $doc_array_rec->{'metavalue'};
1255 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1256
1257
1258 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1259 # make "accumulate" the default (less destructive, as it won't actually
1260 # delete any existing values)
1261 $metamode = "accumulate";
1262 }
1263
1264 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,
1265 $docid,$dv, $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1266 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1267 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1268
1269 foreach my $metatable_rec ( @$metatable ) {
1270 $metaname = $metatable_rec->{'metaname'};
1271 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1272 my $metapos = undef;
1273 my $prevmetavalue = undef;
1274 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1275
1276 foreach my $metavalue ( @$metavals ) {
1277 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1278 $docid,$dv, $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1279
1280 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1281 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1282 # metavalue for this metaname will be lost
1283 $metamode = "accumulate";
1284 }
1285 }
1286 }
1287 }
1288
1289 if ($status != 0) {
1290 # Catch error if set infodb entry failed
1291 $global_status = $status;
1292 $global_mess .= "Failed to set metadata key: $docid\n";
1293 $global_mess .= "Exit status: $status\n";
1294 $global_mess .= "System Error Message: $!\n";
1295 $global_mess .= "-" x 20 . "\n";
1296 }
1297 }
1298
1299 if ($global_status != 0) {
1300 $global_mess .= "PATH: $ENV{'PATH'}\n";
1301 $gsdl_cgi->generate_error($global_mess);
1302 }
1303 else {
1304 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1305 $gsdl_cgi->generate_ok_message($mess);
1306 }
1307}
1308
1309sub set_archives_metadata_array
1310{
1311 my $self = shift @_;
1312
1313 my $username = $self->{'username'};
1314 my $collect = $self->{'collect'};
1315 my $gsdl_cgi = $self->{'gsdl_cgi'};
1316
1317 if ($baseaction::authentication_enabled) {
1318 # Ensure the user is allowed to edit this collection
1319 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1320 }
1321
1322 # Obtain the collect dir
1323 my $site = $self->{'site'};
1324 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1325
1326 $gsdl_cgi->checked_chdir($collect_dir);
1327
1328 # Make sure the collection isn't locked by someone else
1329 $self->lock_collection($username, $collect);
1330
1331 $self->_set_archives_metadata_array(@_);
1332
1333 # Release the lock once it is done
1334 $self->unlock_collection($username, $collect);
1335}
1336
1337
1338
1339# the version of set_archives_meta that doesn't do authentication
1340sub _set_archives_assocfile
1341{
1342 my $self = shift @_;
1343
1344 my $collect = $self->{'collect'};
1345 my $gsdl_cgi = $self->{'gsdl_cgi'};
1346 my $infodbtype = $self->{'infodbtype'};
1347
1348 # Obtain the collect and archive dir
1349 my $site = $self->{'site'};
1350 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1351 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1352
1353 # look up additional args
1354 my $docid = $self->{'d'};
1355
1356 my $assocname = $self->{'assocname'};
1357
1358 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1359 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1360
1361 my $doc_file = $doc_rec->{'doc-file'}->[0];
1362
1363 # check if request if for file-level doc-version history 'nminus-<n>' version
1364 my $dv = $self->{'dv'};
1365
1366 my ($unused_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
1367
1368 my $output_file = $assocname;
1369
1370 if (defined $dv && ($dv ne "")) {
1371 # Need to insert '_fldv_history/nminus-<n>' into output_file
1372
1373 $output_file = &util::filename_cat($doc_dirname,$FLDV_HISTORY_DIR,$dv,$output_file);
1374 }
1375 else {
1376 $output_file = &util::filename_cat($doc_dirname,$output_file);
1377 }
1378
1379 my $output_filename = &util::filename_cat($archive_dir, $output_file);
1380
1381 my $filedata = $self->{'filedata'};
1382 my $FIN = $self->{'fileupload'};
1383
1384 if ((!defined $filedata) && (!defined $FIN)) {
1385 $gsdl_cgi->generate_error("set-archives-assocfile: either 'fileupload' or 'filedata' must be set");
1386 }
1387 else {
1388
1389 my $had_error = 0;
1390 my $had_error_mess = undef;
1391
1392 if (defined $filedata) {
1393 if (open(FOUT,">$output_filename")) {
1394 print FOUT $filedata;
1395 close(FOUT);
1396 }
1397 else {
1398 $had_error = 1;
1399 $had_error_mess = "Failed to save filedata to: $output_filename";
1400 print STDERR "Error - $had_error_mess:\n$!\n";
1401 }
1402 }
1403 else {
1404 my $FIN = $self->{'fileupload'};
1405 if (open(FOUT,">$output_filename")) {
1406 binmode(FOUT, ":raw");
1407
1408 while (1) {
1409 my $buffer = "";
1410 my $bytes_read = read($FIN, $buffer, 1024);
1411
1412 if (defined $bytes_read) {
1413 if ($bytes_read>0) {
1414 print FOUT $buffer;
1415 }
1416 last if $bytes_read < 1024;
1417 }
1418 else {
1419 $had_error = 1;
1420 $had_error_mess = "set-archives-assocfile: Failed to open uploaded file for reading: $!\n";
1421 print STDERR "Error - $had_error_mess";
1422 last;
1423 }
1424 }
1425
1426 close($FIN);
1427 close(FOUT);
1428 }
1429 else {
1430 $had_error = 1;
1431 $had_error_mess = "Failed to save file uploaded file as: $output_filename";
1432 print STDERR "Error - $had_error_mess:\n$!\n";
1433 }
1434 }
1435
1436 if (!$had_error) {
1437 my $mess = "set-archives-assocfile successful save uploaded content into 'archives' as: $output_file\n";
1438 $gsdl_cgi->generate_ok_message($mess);
1439 }
1440 else {
1441 $gsdl_cgi->generate_error($had_error_mess);
1442 }
1443 }
1444}
1445
1446
1447sub set_archives_assocfile
1448{
1449 my $self = shift @_;
1450
1451 my $username = $self->{'username'};
1452 my $collect = $self->{'collect'};
1453 my $gsdl_cgi = $self->{'gsdl_cgi'};
1454
1455 if ($baseaction::authentication_enabled) {
1456 # Ensure the user is allowed to edit this collection
1457 $self->authenticate_user($username, $collect);
1458 }
1459
1460
1461 # Make sure the collection isn't locked by someone else
1462 $self->lock_collection($username, $collect);
1463
1464 $self->_set_archives_assocfile(@_);
1465
1466 # Release the lock once it is done
1467 $self->unlock_collection($username, $collect);
1468}
1469
1470
1471sub _remove_archives_metadata
1472{
1473 my $self = shift @_;
1474
1475 my $collect = $self->{'collect'};
1476 my $gsdl_cgi = $self->{'gsdl_cgi'};
1477 my $infodbtype = $self->{'infodbtype'};
1478
1479
1480 # Obtain the collect and archive dir
1481 my $site = $self->{'site'};
1482 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1483
1484 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1485
1486 # look up additional args
1487 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1488
1489 my $metaname = $self->{'metaname'};
1490 my $metapos = $self->{'metapos'};
1491 my $metavalue = $self->{'metavalue'};
1492
1493 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1494 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
1495
1496 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1497 # default to deleting the first metavalue for the given metaname
1498 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1499 # seem to get deleted in one single remove_archives_meta action invocation.
1500 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1501 # deleted, both get deleted.
1502 if(!defined $metapos && !defined $metavalue) {
1503 $metapos = 0;
1504 }
1505
1506 my $metamode = $self->{'metamode'};
1507 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
1508
1509 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1510 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1511
1512 # This now stores the full pathname
1513 my $doc_file = $doc_rec->{'doc-file'}->[0];
1514
1515 # check if request if for file-level doc-version history 'nminus-<n>' version
1516 my $dv = $self->{'dv'};
1517 if (defined $dv && ($dv ne "")) {
1518 # Need to insert '_fldv_history/nminus-<n>' into doc_filename
1519
1520 my ($doc_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
1521 $doc_file = &util::filename_cat($doc_dirname,$FLDV_HISTORY_DIR,$dv,$doc_tailname);
1522 }
1523
1524 my $doc_filename = &util::filename_cat($archive_dir, $doc_file);
1525
1526 my $status = $self->remove_from_doc_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1527# my $status = $self->remove_from_doc_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, undef, $docid_secnum);
1528
1529 if ($status == 0)
1530 {
1531 my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
1532 $mess .= " $metaname";
1533 $mess .= "->[$metapos]" if (defined $metapos);
1534 $mess .= " ($metavalue)" if (defined $metavalue);
1535 $gsdl_cgi->generate_ok_message($mess);
1536 }
1537 else
1538 {
1539 my $mess .= "Failed to remove archives metadata key: $docid\n";
1540 $mess .= "Exit status: $status\n";
1541 $mess .= "System Error Message: $!\n";
1542 $mess .= "-" x 20 . "\n";
1543
1544 $gsdl_cgi->generate_error($mess);
1545 }
1546
1547 #return $status; # in case calling functions have a use for this
1548}
1549
1550sub remove_archives_metadata
1551{
1552 my $self = shift @_;
1553
1554 my $username = $self->{'username'};
1555 my $collect = $self->{'collect'};
1556 my $gsdl_cgi = $self->{'gsdl_cgi'};
1557
1558 if ($baseaction::authentication_enabled)
1559 {
1560 # Ensure the user is allowed to edit this collection
1561 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1562 }
1563
1564 # Make sure the collection isn't locked by someone else
1565 $self->lock_collection($username, $collect);
1566
1567 $self->_remove_archives_metadata(@_);
1568
1569 # Release the lock once it is done
1570 $self->unlock_collection($username, $collect);
1571}
1572
1573sub remove_from_doc_xml
1574{
1575 my $self = shift @_;
1576 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1577
1578 my @start_rules = ('Section' => \&dxml_start_section);
1579
1580 # Set the call-back functions for the metadata tags
1581 my @rules =
1582 (
1583 _default => 'raw',
1584 'Metadata' => \&rfdxml_metadata
1585 );
1586
1587 my $parser = XML::Rules->new
1588 (
1589 start_rules => \@start_rules,
1590 rules => \@rules,
1591 style => 'filter',
1592 output_encoding => 'utf8',
1593# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1594 stripspaces => 2|0|0 # ineffectual
1595 );
1596
1597 my $status = 0;
1598 my $xml_in = "";
1599 if (!open(MIN,"<$doc_xml_filename"))
1600 {
1601 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1602 $status = 1;
1603 }
1604 else
1605 {
1606 # Read them in
1607 my $line;
1608 while (defined ($line=<MIN>)) {
1609 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1610 }
1611 close(MIN);
1612
1613 # Filter with the call-back functions
1614 my $xml_out = "";
1615
1616 my $MOUT;
1617 if (!open($MOUT,">$doc_xml_filename")) {
1618 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1619 $status = 1;
1620 }
1621 else {
1622 binmode($MOUT,":utf8");
1623 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1624 close($MOUT);
1625 }
1626 }
1627 return $status;
1628}
1629
1630sub rfdxml_metadata
1631{
1632 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1633
1634 # For comparisons, toplevel section is indicated by ""
1635 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1636 my $secid = $parser->{'parameters'}->{'secid'} || "";
1637
1638 if (!($secid eq $curr_sec_num))
1639 {
1640 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1641 return [$tagname => $attrHash];
1642 }
1643
1644 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1645 {
1646 if (!defined $parser->{'parameters'}->{'poscount'})
1647 {
1648 $parser->{'parameters'}->{'poscount'} = 0;
1649 }
1650 else
1651 {
1652 $parser->{'parameters'}->{'poscount'}++;
1653 }
1654
1655 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
1656 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {
1657 return [];
1658 }
1659
1660 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1661 {
1662 return [];
1663 }
1664
1665 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1666 {
1667 return [];
1668 }
1669 }
1670
1671 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1672 return [$tagname => $attrHash];
1673}
1674
1675sub mxml_metadata
1676{
1677 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1678 my $metaname = $parser->{'parameters'}->{'metaname'};
1679 my $metamode = $parser->{'parameters'}->{'metamode'};
1680
1681 # Report error if we don't see FileName tag before this
1682 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1683
1684 # Don't do anything if we are not in the right FileSet
1685 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1686 if ($file_regexp =~ /\.\*/) {
1687 # Only interested in a file_regexp if it specifies precisely one
1688 # file.
1689 # So, skip anything with a .* in it as it is too general
1690## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
1691 return [$tagname => $attrHash];
1692 }
1693 my $src_file = $parser->{'parameters'}->{'src_file'};
1694 if (!($src_file =~ /$file_regexp/)) {
1695 return [$tagname => $attrHash];
1696 }
1697## print STDERR "*** mxl metamode = $metamode\n";
1698
1699 # Find the right metadata tag and checks if we are going to override it
1700 my $name_attr = $attrHash->{'name'};
1701 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1702
1703 # now metadata.xml functions need to keep track of metapos
1704 if (!defined $parser->{'parameters'}->{'poscount'})
1705 {
1706 $parser->{'parameters'}->{'poscount'} = 0;
1707 }
1708 else
1709 {
1710 $parser->{'parameters'}->{'poscount'}++;
1711 }
1712
1713 # If either the metapos or prevmetavalue is set,
1714 # get the value and override the current value
1715 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1716
1717 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1718 {
1719 $attrHash->{'_content'} = $metavalue;
1720
1721 ## print STDERR "**** overriding metadata.xml\n";
1722
1723 # Don't want it to wipe out any other pieces of metadata
1724 $parser->{'parameters'}->{'metamode'} = "done";
1725 }
1726 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1727 {
1728 $attrHash->{'_content'} = $metavalue;
1729 $parser->{'parameters'}->{'metamode'} = "done";
1730 }
1731 }
1732
1733 # mxml_description will process the metadata if metadata is accumulate,
1734 # or if we haven't found the metadata to override
1735
1736 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1737 return [$tagname => $attrHash];
1738}
1739
1740
1741sub mxml_description
1742{
1743 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1744 my $metamode = $parser->{'parameters'}->{'metamode'};
1745
1746 # Failed... Report error if we don't see FileName tag before this
1747 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1748
1749 # Don't do anything if we are not in the right FileSet
1750 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1751 if ($file_regexp =~ m/\.\*/) {
1752 # Only interested in a file_regexp if it specifies precisely one
1753 # file.
1754 # So, skip anything with a .* in it as it is too general
1755 return [$tagname => $attrHash];
1756 }
1757 my $src_file = $parser->{'parameters'}->{'src_file'};
1758
1759 if (!($src_file =~ m/$file_regexp/)) {
1760 return [$tagname => $attrHash];
1761 }
1762
1763 # Accumulate the metadata block to the end of the description block
1764 # Note: This adds metadata block to all description blocks, so if there are
1765 # multiple FileSets, it will add to all of them
1766 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1767
1768 # if metamode was "override" but get to here then it failed to
1769 # find an item to override, in which case it should append its
1770 # value to the end, just like the "accumulate" mode
1771
1772 if ($metamode eq "override") {
1773 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1774 }
1775
1776 # tack a new metadata tag on to the end of the <Metadata>+ block
1777 my $metaname = $parser->{'parameters'}->{'metaname'};
1778 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1779
1780 my $metadata_attr = { '_content' => $metavalue,
1781 'name' => $metaname,
1782 'mode' => "accumulate" };
1783
1784 my $append_metadata = [ "Metadata" => $metadata_attr ];
1785 my $description_content = $attrHash->{'_content'};
1786
1787## print STDERR "*** appending to metadata.xml\n";
1788
1789 # append the new metadata element to the end of the current
1790 # content contained inside this tag
1791 if (ref($description_content) eq "") {
1792 # => string or numeric literal
1793 # this is caused by a <Description> block has no <Metadata> child elements
1794 # => set up an empty array in '_content'
1795 $attrHash->{'_content'} = [ "\n" ];
1796 $description_content = $attrHash->{'_content'};
1797 }
1798
1799 push(@$description_content, " ", $append_metadata, "\n ");
1800 $parser->{'parameters'}->{'metamode'} = "done";
1801 }
1802
1803 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1804 return [$tagname => $attrHash];
1805}
1806
1807
1808sub mxml_fileset
1809{
1810 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1811
1812 # Initilise the current_file
1813 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1814 # FileName tag must come before Description tag
1815 $parser->{'parameters'}->{'current_file'} = "";
1816
1817 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1818 return [$tagname => $attrHash];
1819}
1820
1821sub mxml_directorymetadata
1822{
1823 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1824
1825 # if we haven't processed the metadata when we reach the end of metadata.xml
1826 # it's because there's no particular FileSet element whose FileName matched
1827 # In which case, add a new FileSet for this FileName
1828 my $metamode = $parser->{'parameters'}->{'metamode'};
1829 if($metamode ne "done") {
1830
1831 if ($metamode eq "override") {
1832 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1833 }
1834
1835 # If we get to here and metamode is override, this means there
1836 # was no existing value to overide => treat as an append operation
1837
1838 # Create a new FileSet element and append to DirectoryMetadata
1839 # <FileSet>
1840 # <FileName>src_file</FileName>
1841 # <Description>
1842 # <Metadata mode="" name="">metavalue</Metadata>
1843 # </Description>
1844 # </FileSet>
1845 my $src_file = $parser->{'parameters'}->{'src_file'};
1846 my $metaname = $parser->{'parameters'}->{'metaname'};
1847 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1848 my $metadata_attr = {
1849 '_content' => $metavalue,
1850 'name' => $metaname,
1851 'mode' => "accumulate"
1852 };
1853 my $append_metadata = [ "Metadata" => $metadata_attr ];
1854 my $description_attr = { '_content' => [ "\n\t\t ", $append_metadata, "\n\t\t"] };
1855 my $description_element = [ "Description" => $description_attr ];
1856
1857 #_content is not an attribute, it's special and holds the children of this element
1858 # including the textnode value embedded in this element if any.
1859 my $filename_attr = {'_content' => $src_file};
1860 my $filename_element = [ "FileName" => $filename_attr ];
1861
1862 my $fileset_attr = {};
1863 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
1864 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
1865
1866
1867 # get children of dirmeta, and push the new FileSet element onto it
1868 print "Appending metadata to metadata.xml\n";
1869 my $dirmeta_content = $attrHash->{'_content'};
1870 if (ref($dirmeta_content)) {
1871 # got some existing interesting nested content
1872 #push(@$dirmeta_content, " ", $fileset ,"\n ");
1873 push(@$dirmeta_content, "\t", $fileset ,"\n");
1874 }
1875 else {
1876 #description_content is most likely a string such as "\n"
1877 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
1878 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
1879 }
1880
1881 $parser->{'parameters'}->{'metamode'} = "done";
1882 }
1883 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1884 return [$tagname => $attrHash];
1885}
1886
1887
1888sub edit_metadata_xml
1889{
1890 my $self = shift @_;
1891 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
1892
1893 # Set the call-back functions for the metadata tags
1894 my @rules =
1895 ( _default => 'raw',
1896 'FileName' => \&mxml_filename,
1897 'Metadata' => \&mxml_metadata,
1898 'Description' => \&mxml_description,
1899 'FileSet' => \&mxml_fileset,
1900 'DirectoryMetadata' => \&mxml_directorymetadata);
1901
1902 # use XML::Rules to add it in (read in and out again)
1903 my $parser = XML::Rules->new(rules => \@rules,
1904 style => 'filter',
1905 output_encoding => 'utf8',
1906 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1907
1908 if (!-e $metadata_xml_filename) {
1909
1910 if (open(MOUT,">$metadata_xml_filename")) {
1911
1912 my $src_file_re = &util::filename_to_regex($src_file);
1913 # shouldn't the following also be in the above utility routine??
1914 # $src_file_re =~ s/\./\\./g;
1915
1916 print MOUT "<?xml version=\"1.0\"?>\n";
1917 print MOUT "<DirectoryMetadata>\n";
1918 print MOUT " <FileSet>\n";
1919 print MOUT " <FileName>$src_file_re</FileName>\n";
1920 print MOUT " <Description>\n";
1921 print MOUT " </Description>\n";
1922 print MOUT " </FileSet>\n";
1923 print MOUT "</DirectoryMetadata>\n";
1924
1925 close(MOUT);
1926 }
1927 else {
1928 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1929 }
1930 }
1931
1932
1933 my $xml_in = "";
1934 if (!open(MIN,"<$metadata_xml_filename")) {
1935 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1936 }
1937 else {
1938 # Read them in
1939 my $line;
1940 while (defined ($line=<MIN>)) {
1941 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1942 }
1943 close(MIN);
1944
1945 # Filter with the call-back functions
1946 my $xml_out = "";
1947
1948 my $MOUT;
1949 if (!open($MOUT,">$metadata_xml_filename")) {
1950 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1951 }
1952 else {
1953 binmode($MOUT,":utf8");
1954
1955 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1956 # At the moment, I will just hack it!
1957 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1958 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1959 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1960 #print MOUT $xml_out;
1961
1962 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1963 metapos => $metapos,
1964 metavalue => $metavalue,
1965 metamode => $metamode,
1966 src_file => $src_file,
1967 prevmetavalue => $prevmetavalue,
1968 current_file => undef} );
1969 close($MOUT);
1970 }
1971 }
1972}
1973
1974
1975sub set_import_metadata
1976{
1977 my $self = shift @_;
1978
1979 my $username = $self->{'username'};
1980 my $collect = $self->{'collect'};
1981 my $gsdl_cgi = $self->{'gsdl_cgi'};
1982
1983 if ($baseaction::authentication_enabled) {
1984 # Ensure the user is allowed to edit this collection
1985 $self->authenticate_user($username, $collect);
1986 }
1987
1988 # Make sure the collection isn't locked by someone else
1989 $self->lock_collection($username, $collect);
1990
1991 $self->_set_import_metadata(@_);
1992
1993 # Release the lock once it is done
1994 $self->unlock_collection($username, $collect);
1995
1996}
1997
1998sub set_import_metadata_array
1999{
2000 my $self = shift @_;
2001
2002 my $username = $self->{'username'};
2003 my $collect = $self->{'collect'};
2004 my $gsdl_cgi = $self->{'gsdl_cgi'};
2005
2006 if ($baseaction::authentication_enabled) {
2007 # Ensure the user is allowed to edit this collection
2008 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2009 }
2010
2011 # Obtain the collect dir
2012 my $site = $self->{'site'};
2013 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2014
2015 $gsdl_cgi->checked_chdir($collect_dir);
2016
2017 # Make sure the collection isn't locked by someone else
2018 $self->lock_collection($username, $collect);
2019
2020 $self->_set_import_metadata_array(@_);
2021
2022 # Release the lock once it is done
2023 $self->unlock_collection($username, $collect);
2024
2025}
2026
2027
2028sub _set_import_metadata_array
2029{
2030 my $self = shift @_;
2031
2032 my $collect = $self->{'collect'};
2033 my $gsdl_cgi = $self->{'gsdl_cgi'};
2034
2035 # Obtain the collect dir
2036 my $site = $self->{'site'};
2037 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2038
2039 # look up additional args
2040
2041 my $infodbtype = $self->{'infodbtype'};
2042
2043 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2044 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2045
2046 my $json_str = $self->{'json'};
2047 my $doc_array = decode_json $json_str;
2048
2049 my $global_status = 0;
2050 my $global_mess = "";
2051
2052 my @all_docids = ();
2053
2054 foreach my $doc_array_rec ( @$doc_array )
2055 {
2056 my $status = -1;
2057 my $docid = $doc_array_rec->{'docid'};
2058
2059 my ($docid_root,$docid_secnum);
2060 if(defined $docid) {
2061 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2062 # as yet no support for setting subsection metadata in metadata.xml
2063 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2064 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2065 next; # skip this docid in for loop
2066 }
2067 }
2068
2069 push(@all_docids,$docid); # docid_root rather
2070
2071 my $metaname = $doc_array_rec->{'metaname'};
2072 if (defined $metaname) {
2073 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2074 my $metavalue = $doc_array_rec->{'metavalue'};
2075 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2076
2077 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2078 # make "accumulate" the default (less destructive, as it won't actually
2079 # delete any existing values)
2080 $metamode = "accumulate";
2081 }
2082
2083 # adding metapos and prevmetavalue support to import_metadata subroutines
2084 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2085 my $prevmetavalue = $self->{'prevmetavalue'};
2086
2087 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2088
2089 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2090 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2091
2092 foreach my $metatable_rec ( @$metatable ) {
2093 $metaname = $metatable_rec->{'metaname'};
2094 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2095 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2096 # make "accumulate" the default (less destructive, as it won't actually
2097 # delete any existing values)
2098 $metamode = "accumulate";
2099 }
2100
2101 # No support for metapos and prevmetavalue in the JSON metatable substructure
2102 my $metapos = undef;
2103 my $prevmetavalue = undef;
2104 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2105
2106 foreach my $metavalue ( @$metavals ) {
2107 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2108
2109 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2110 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2111 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2112 # metavalue for this metaname will be lost
2113 $metamode = "accumulate";
2114 }
2115 }
2116 }
2117 }
2118 }
2119
2120 # always a success message
2121 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2122 $gsdl_cgi->generate_ok_message($mess);
2123}
2124
2125# always returns true (1)
2126sub set_import_metadata_entry
2127{
2128 my $self = shift @_;
2129 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2130
2131 my $info_mess = <<RAWEND;
2132****************************
2133 set_import_metadata_entry()
2134****************************
2135RAWEND
2136
2137 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2138 $info_mess .= " collect = $collect\n" if defined($collect);
2139 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
2140 $info_mess .= " arcinfo_doc_filename = $arcinfo_doc_filename\n" if defined($arcinfo_doc_filename);
2141 $info_mess .= " docid = $docid\n" if defined($docid);
2142 $info_mess .= " metaname = $metaname\n" if defined($metaname);
2143 $info_mess .= " metapos = $metapos\n" if defined($metapos);
2144 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
2145 $info_mess .= " metamode = $metamode\n" if defined($metamode);
2146 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2147
2148 $info_mess .= "****************************\n";
2149
2150 $gsdl_cgi->generate_message($info_mess);
2151
2152 # import works with metadata.xml which can have inherited metadata
2153 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2154 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2155 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2156 # what they're doing if they provide a metapos.
2157 if(defined $metapos) {
2158 print STDERR "@@@@ WARNING: metapos defined.\n";
2159 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2160 }
2161
2162 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2163 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2164 my $metadata_xml_file;
2165 my $import_filename = undef;
2166
2167 if (defined $docid) {
2168 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2169 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2170
2171 # This now stores the full pathname
2172 $import_filename = $doc_rec->{'src-file'}->[0];
2173 $import_filename = &util::placeholders_to_abspath($import_filename);
2174
2175 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2176 # as the array version of the method doesn't support the -f parameter yet
2177 my $import_file = $self->{'f'};
2178 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2179 }
2180
2181 # figure out correct metadata.xml file [?]
2182 # Assuming the metadata.xml file is next to the source file
2183 # Note: This will not work if it is using the inherited metadata from the parent folder
2184 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2185 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2186
2187 # If we're overriding everything, then $prevmetavalue=undefined and
2188 # $metamode=override combined with $metapos=undefined
2189 # in which case we need to remove all metavalues for the metaname at the given (sub)section
2190 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2191 if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2192## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2193 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2194
2195 }
2196
2197 # Edit the metadata.xml
2198 # Modified by Jeffrey from DL Consulting
2199 # Handle the case where there is one metadata.xml file for multiple FileSets
2200 # The XML filter needs to know whether it is in the right FileSet
2201 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2202 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2203 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2204 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2205 #return 0;
2206 return $metadata_xml_filename;
2207}
2208
2209sub _remove_import_metadata
2210{
2211 my $self = shift @_;
2212
2213 my $collect = $self->{'collect'};
2214 my $gsdl_cgi = $self->{'gsdl_cgi'};
2215 my $infodbtype = $self->{'infodbtype'};
2216
2217 # Obtain the collect and archive dir
2218 my $site = $self->{'site'};
2219 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2220
2221 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2222
2223 # look up additional args
2224 my $docid = $self->{'d'};
2225 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2226 {
2227 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2228 }
2229
2230 my $metaname = $self->{'metaname'};
2231 my $metapos = $self->{'metapos'};
2232 my $metavalue = $self->{'metavalue'};
2233
2234 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2235 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/));
2236
2237 if(defined $metavalue) { # metavalue is now a compulsory arg for remove_import_metadata()
2238 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2239 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2240 $metapos = 0;
2241 }
2242 my $metamode = $self->{'metamode'};
2243 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
2244
2245 # import works with metadata.xml which can have inherited metadata
2246 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2247 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2248 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2249 # what they're doing if they provide a metapos.
2250 if(defined $metapos) {
2251 print STDERR "@@@@ WARNING: metapos defined.\n";
2252 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2253 }
2254
2255 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2256 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2257 my $metadata_xml_file;
2258 my $import_filename = undef;
2259 if (defined $docid)
2260 {
2261 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2262 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2263
2264 # This now stores the full pathname
2265 $import_filename = $doc_rec->{'src-file'}->[0];
2266 $import_filename = &util::placeholders_to_abspath($import_filename);
2267 }
2268
2269 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2270 {
2271 $gsdl_cgi->generate_error("There is no metadata\n");
2272 }
2273
2274 # figure out correct metadata.xml file [?]
2275 # Assuming the metadata.xml file is next to the source file
2276 # Note: This will not work if it is using the inherited metadata from the parent folder
2277 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2278 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2279
2280 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2281
2282 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2283 $mess .= " $metaname";
2284 $mess .= " = $metavalue\n";
2285
2286 $gsdl_cgi->generate_ok_message($mess);
2287
2288 #return $status; # in case calling functions have a use for this
2289}
2290
2291sub remove_import_metadata
2292{
2293 my $self = shift @_;
2294
2295 my $username = $self->{'username'};
2296 my $collect = $self->{'collect'};
2297 my $gsdl_cgi = $self->{'gsdl_cgi'};
2298
2299 if ($baseaction::authentication_enabled) {
2300 # Ensure the user is allowed to edit this collection
2301 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2302 }
2303
2304 # Make sure the collection isn't locked by someone else
2305 $self->lock_collection($username, $collect);
2306
2307 $self->_remove_import_metadata(@_);
2308
2309 # Release the lock once it is done
2310 $self->unlock_collection($username, $collect);
2311
2312}
2313
2314sub remove_from_metadata_xml
2315{
2316 my $self = shift @_;
2317 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2318 # metamode generally has no meaning for removing meta, but is used by set_meta
2319 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2320
2321 # Set the call-back functions for the metadata tags
2322 my @rules =
2323 (
2324 _default => 'raw',
2325 'Metadata' => \&rfmxml_metadata,
2326 'FileName' => \&mxml_filename
2327 );
2328
2329 my $parser = XML::Rules->new
2330 (
2331 rules => \@rules,
2332 style => 'filter',
2333 output_encoding => 'utf8',
2334 #normalisespaces => 1,
2335 stripspaces => 2|0|0 # ineffectual
2336 );
2337
2338 my $xml_in = "";
2339 if (!open(MIN,"<$metadata_xml_filename"))
2340 {
2341 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2342 }
2343 else
2344 {
2345 # Read them in
2346 my $line;
2347 while (defined ($line=<MIN>)) {
2348 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
2349 }
2350 close(MIN);
2351
2352 # Filter with the call-back functions
2353 my $xml_out = "";
2354
2355 my $MOUT;
2356 if (!open($MOUT,">$metadata_xml_filename")) {
2357 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2358 }
2359 else {
2360 binmode($MOUT,":utf8");
2361 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2362 close($MOUT);
2363 }
2364 }
2365}
2366
2367sub rfmxml_metadata
2368{
2369 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2370
2371 # metadata.xml does not handle subsections
2372
2373 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2374 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2375 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2376 {
2377 if (!defined $parser->{'parameters'}->{'poscount'})
2378 {
2379 $parser->{'parameters'}->{'poscount'} = 0;
2380 }
2381 else
2382 {
2383 $parser->{'parameters'}->{'poscount'}++;
2384 }
2385
2386 # if overriding but no metapos, then clear all the meta for this metaname
2387 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
2388 return [];
2389 }
2390
2391 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2392 {
2393 return [];
2394 }
2395 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2396 {
2397 return [];
2398 }
2399 }
2400
2401 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2402 return [$tagname => $attrHash];
2403}
2404
2405sub _remove_live_metadata
2406{
2407 my $self = shift @_;
2408
2409 my $collect = $self->{'collect'};
2410 my $gsdl_cgi = $self->{'gsdl_cgi'};
2411 my $infodbtype = $self->{'infodbtype'};
2412
2413 # Obtain the collect dir
2414 my $site = $self->{'site'};
2415 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2416
2417 # look up additional args
2418 my $docid = $self->{'d'};
2419 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2420 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2421 }
2422
2423 # Generate the dbkey
2424 my $metaname = $self->{'metaname'};
2425 my $dbkey = "$docid.$metaname";
2426
2427 # To people who know $collect_tail please add some comments
2428 # Obtain the live gdbm_db path
2429 my $collect_tail = $collect;
2430 $collect_tail =~ s/^.*[\/|\\]//;
2431 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2432 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2433
2434 # Remove the key
2435 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2436 my $status = system($cmd);
2437 if ($status != 0) {
2438 # Catch error if gdbmdel failed
2439 my $mess = "Failed to delete metadata key: $dbkey\n";
2440
2441 $mess .= "PATH: $ENV{'PATH'}\n";
2442 $mess .= "cmd = $cmd\n";
2443 $mess .= "Exit status: $status\n";
2444 $mess .= "System Error Message: $!\n";
2445
2446 $gsdl_cgi->generate_error($mess);
2447 }
2448 else {
2449 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2450 }
2451
2452}
2453
2454sub remove_live_metadata
2455{
2456 my $self = shift @_;
2457
2458 my $username = $self->{'username'};
2459 my $collect = $self->{'collect'};
2460 my $gsdl_cgi = $self->{'gsdl_cgi'};
2461
2462 if ($baseaction::authentication_enabled) {
2463 # Ensure the user is allowed to edit this collection
2464 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2465 }
2466
2467 # Make sure the collection isn't locked by someone else
2468 $self->lock_collection($username, $collect);
2469
2470 $self->_remove_live_metadata(@_);
2471
2472 $self->unlock_collection($username, $collect);
2473}
2474
2475sub remove_metadata
2476{
2477 my $self = shift @_;
2478
2479 my $where = $self->{'where'};
2480 if(!$where || ($where =~ m/^\s*$/)) {
2481 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2482 return;
2483 }
2484
2485 my $username = $self->{'username'};
2486 my $collect = $self->{'collect'};
2487 my $gsdl_cgi = $self->{'gsdl_cgi'};
2488
2489 if ($baseaction::authentication_enabled) {
2490 # Ensure the user is allowed to edit this collection
2491 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2492 }
2493
2494 # Make sure the collection isn't locked by someone else
2495 $self->lock_collection($username, $collect);
2496
2497 # check which directories need to be processed, specified in $where as
2498 # any combination of import|archives|index|live
2499 if($where =~ m/import/) {
2500 $self->_remove_import_metadata(@_);
2501 }
2502 if($where =~ m/archives/) {
2503 $self->_remove_archives_metadata(@_);
2504 }
2505 if($where =~ m/index/) {
2506 $self->_remove_index_metadata(@_);
2507 }
2508
2509 # Release the lock once it is done
2510 $self->unlock_collection($username, $collect);
2511}
2512
2513# the internal version, without authentication
2514sub _remove_index_metadata
2515{
2516 my $self = shift @_;
2517
2518 my $collect = $self->{'collect'};
2519 my $gsdl_cgi = $self->{'gsdl_cgi'};
2520 my $infodbtype = $self->{'infodbtype'};
2521
2522 # Obtain the collect dir
2523 my $site = $self->{'site'};
2524 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2525
2526 # look up additional args
2527 my $docid = $self->{'d'};
2528 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2529 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2530 }
2531 my $metaname = $self->{'metaname'};
2532 my $metapos = $self->{'metapos'};
2533 my $metavalue = $self->{'metavalue'};
2534
2535 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2536 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
2537
2538 # To people who know $collect_tail please add some comments
2539 # -> In collection groups, I think collect_tailname is the subcollection name,
2540 # e.g. colgroup-name/col-tail-name
2541 # Obtain the path to the database
2542 my $collect_tail = $collect;
2543 $collect_tail =~ s/^.*[\/|\\]//;
2544 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2545 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2546
2547 # Read the docid entry
2548 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2549
2550 # Check to make sure the key does exist
2551 if (!defined ($doc_rec->{$metaname})) {
2552 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2553 }
2554
2555 # Obtain the specified metadata pos
2556 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2557 if(!defined $metapos && !defined $metavalue) {
2558 $metapos = 0;
2559 }
2560
2561
2562 # consider check key is defined before deleting?
2563 # Loop through the metadata array and ignore the specified position
2564 my $filtered_metadata = [];
2565 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2566 for (my $i=0; $i<$num_metadata_vals; $i++) {
2567 my $metaval = shift(@{$doc_rec->{$metaname}});
2568
2569 if (!defined $metavalue && $i != $metapos) {
2570 push(@$filtered_metadata,$metaval);
2571 }
2572
2573 if(defined $metavalue && !($metavalue eq $metaval))
2574 {
2575 push(@$filtered_metadata,$metaval);
2576 }
2577 }
2578 $doc_rec->{$metaname} = $filtered_metadata;
2579
2580 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2581 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2582
2583 if ($status != 0) {
2584 my $mess = "Failed to set metadata key: $docid\n";
2585
2586 $mess .= "PATH: $ENV{'PATH'}\n";
2587 $mess .= "Exit status: $status\n";
2588 $mess .= "System Error Message: $!\n";
2589
2590 $gsdl_cgi->generate_error($mess);
2591 }
2592 else {
2593 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2594 $mess .= " $metaname";
2595 $mess .= "->[$metapos]" if (defined $metapos);
2596 $mess .= " ($metavalue)" if (defined $metavalue);
2597
2598 $gsdl_cgi->generate_ok_message($mess);
2599 }
2600
2601 #return $status; # in case calling functions have a use for this
2602}
2603
2604sub remove_index_metadata
2605{
2606 my $self = shift @_;
2607
2608 my $username = $self->{'username'};
2609 my $collect = $self->{'collect'};
2610 my $gsdl_cgi = $self->{'gsdl_cgi'};
2611
2612 if ($baseaction::authentication_enabled) {
2613 # Ensure the user is allowed to edit this collection
2614 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2615 }
2616
2617 # Obtain the collect dir
2618 my $site = $self->{'site'};
2619 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2620
2621 # Make sure the collection isn't locked by someone else
2622 $self->lock_collection($username, $collect);
2623
2624 $self->_remove_index_metadata(@_);
2625
2626 # Release the lock once it is done
2627 $self->unlock_collection($username, $collect);
2628}
2629
2630#################################################################################
2631# ERASE META METHODS: ERASE ALL VALUES FOR MATCHING METANAME AT SPECIFIED DOCID #
2632#################################################################################
2633
2634sub erase_metadata
2635{
2636 my $self = shift @_;
2637
2638 my $where = $self->{'where'};
2639 # when $where is unspecified, following behaviour of sub remove_metadata by defaulting to erasing metadata from index
2640 if(!$where || ($where =~ m/^\s*$/)) {
2641 $self->erase_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2642 return;
2643 }
2644
2645 my $username = $self->{'username'};
2646 my $collect = $self->{'collect'};
2647 my $gsdl_cgi = $self->{'gsdl_cgi'};
2648
2649 if ($baseaction::authentication_enabled) {
2650 # Ensure the user is allowed to edit this collection
2651 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2652 }
2653
2654 # Make sure the collection isn't locked by someone else
2655 $self->lock_collection($username, $collect);
2656
2657 # check which directories need to be processed, specified in $where as
2658 # any combination of import|archives|index|live
2659 if($where =~ m/import/) {
2660 $self->_erase_import_metadata(@_);
2661 }
2662 if($where =~ m/archives/) {
2663 $self->_erase_archives_metadata(@_);
2664 }
2665 if($where =~ m/index/) {
2666 $self->_erase_index_metadata(@_);
2667 }
2668
2669 # Release the lock once it is done
2670 $self->unlock_collection($username, $collect);
2671}
2672
2673# the internal version, without authentication
2674sub _erase_index_metadata
2675{
2676 my $self = shift @_;
2677
2678 my $collect = $self->{'collect'};
2679 my $gsdl_cgi = $self->{'gsdl_cgi'};
2680 my $infodbtype = $self->{'infodbtype'};
2681
2682 # Obtain the collect dir
2683 my $site = $self->{'site'};
2684 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2685
2686 # look up additional args
2687 my $docid = $self->{'d'};
2688 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2689 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2690 }
2691 my $metaname = $self->{'metaname'};
2692 my $metapos = undef;
2693 my $metavalue = undef;
2694
2695 # To people who know $collect_tail please add some comments
2696 # -> In collection groups, I think collect_tailname is the subcollection name,
2697 # e.g. colgroup-name/col-tail-name
2698 # Obtain the path to the database
2699 my $collect_tail = $collect;
2700 $collect_tail =~ s/^.*[\/|\\]//;
2701 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2702 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2703
2704 # Read the docid entry
2705 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2706
2707 # Check to make sure the key does exist
2708 if (defined ($doc_rec->{$metaname})) {
2709
2710 $doc_rec->{$metaname} = [];
2711
2712 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2713 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2714
2715 if ($status != 0) {
2716 my $mess = "Failed to erase metadata key: $docid\n";
2717
2718 $mess .= "PATH: $ENV{'PATH'}\n";
2719 $mess .= "Exit status: $status\n";
2720 $mess .= "System Error Message: $!\n";
2721
2722 $gsdl_cgi->generate_error($mess);
2723 }
2724 else {
2725 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2726 $mess .= " $metaname";
2727 $mess .= "->[$metapos]" if (defined $metapos);
2728 $mess .= " ($metavalue)" if (defined $metavalue);
2729
2730 $gsdl_cgi->generate_ok_message($mess);
2731 }
2732 }
2733 else { # no such metaname. Is it really an error?
2734 $gsdl_cgi->generate_ok_message("Can't erase. No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2735 }
2736 #return $status; # in case calling functions have a use for this
2737}
2738
2739sub erase_index_metadata
2740{
2741 my $self = shift @_;
2742
2743 my $username = $self->{'username'};
2744 my $collect = $self->{'collect'};
2745 my $gsdl_cgi = $self->{'gsdl_cgi'};
2746
2747 if ($baseaction::authentication_enabled) {
2748 # Ensure the user is allowed to edit this collection
2749 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2750 }
2751
2752 # Obtain the collect dir
2753 my $site = $self->{'site'};
2754 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2755
2756 # Make sure the collection isn't locked by someone else
2757 $self->lock_collection($username, $collect);
2758
2759 $self->_erase_index_metadata(@_);
2760
2761 # Release the lock once it is done
2762 $self->unlock_collection($username, $collect);
2763}
2764
2765
2766# Given a metaname, removes ALL metadata with that metaname at the provided docid
2767sub _erase_archives_metadata
2768{
2769 my $self = shift @_;
2770
2771 my $collect = $self->{'collect'};
2772 my $gsdl_cgi = $self->{'gsdl_cgi'};
2773 my $infodbtype = $self->{'infodbtype'};
2774
2775 # Obtain the collect and archive dir
2776 my $site = $self->{'site'};
2777 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2778
2779 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2780
2781 # look up additional args
2782 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
2783
2784 my $metaname = $self->{'metaname'};
2785
2786 # metapos, metavalue and metamode are never provided and therefore all undefined for erase_archives_metadata
2787 my $metapos = undef;
2788 my $metavalue = undef;
2789 my $metamode = "override"; # when override and no metapos or metaval, all values for metaname will be affected
2790
2791 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2792 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2793
2794 # This now stores the full pathname
2795 my $doc_file = $doc_rec->{'doc-file'}->[0];
2796
2797 # check if request if for file-level doc-version history 'nminus-<n>' version
2798 my $dv = $self->{'dv'};
2799 if (defined $dv && ($dv ne "")) {
2800 # Need to insert '_fldv_history/nminus-<n>' into doc_filename
2801
2802 my ($doc_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
2803 $doc_file = &util::filename_cat($doc_dirname,$FLDV_HISTORY_DIR,$dv,$doc_tailname);
2804 }
2805
2806 my $doc_filename = &util::filename_cat($archive_dir, $doc_file);
2807
2808 my $status = $self->remove_from_doc_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
2809
2810 if ($status == 0)
2811 {
2812 my $mess = "\nerase-archives-metadata successful: \nKey[$docid]\n";
2813 $mess .= " $metaname";
2814 $gsdl_cgi->generate_ok_message($mess);
2815 }
2816 else
2817 {
2818 my $mess .= "Failed to erase archives metadata key: $docid\n";
2819 $mess .= "Exit status: $status\n";
2820 $mess .= "System Error Message: $!\n";
2821 $mess .= "-" x 20 . "\n";
2822
2823 $gsdl_cgi->generate_error($mess);
2824 }
2825
2826 #return $status; # in case calling functions have a use for this
2827}
2828
2829# practically identical to remove_archives_metadata
2830sub erase_archives_metadata
2831{
2832 my $self = shift @_;
2833
2834 my $username = $self->{'username'};
2835 my $collect = $self->{'collect'};
2836 my $gsdl_cgi = $self->{'gsdl_cgi'};
2837
2838 if ($baseaction::authentication_enabled)
2839 {
2840 # Ensure the user is allowed to edit this collection
2841 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2842 }
2843
2844 # Make sure the collection isn't locked by someone else
2845 $self->lock_collection($username, $collect);
2846
2847 $self->_erase_archives_metadata(@_);
2848
2849 # Release the lock once it is done
2850 $self->unlock_collection($username, $collect);
2851}
2852
2853sub _erase_import_metadata
2854{
2855 my $self = shift @_;
2856
2857 my $collect = $self->{'collect'};
2858 my $gsdl_cgi = $self->{'gsdl_cgi'};
2859 my $infodbtype = $self->{'infodbtype'};
2860
2861 # Obtain the collect dir
2862 my $site = $self->{'site'};
2863 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2864
2865 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2866
2867 # look up additional args
2868 my $docid = $self->{'d'};
2869 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2870 {
2871 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2872 }
2873
2874 my $metaname = $self->{'metaname'};
2875 my $metapos = undef;
2876 my $metavalue = undef;
2877 my $metamode = "override";
2878
2879 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2880 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2881 my $metadata_xml_file;
2882 my $import_filename = undef;
2883 if (defined $docid)
2884 {
2885 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2886 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2887
2888 # This now stores the full pathname
2889 $import_filename = $doc_rec->{'src-file'}->[0];
2890 $import_filename = &util::placeholders_to_abspath($import_filename);
2891 }
2892
2893 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2894 {
2895 $gsdl_cgi->generate_error("There is no metadata\n");
2896 }
2897
2898 # figure out correct metadata.xml file [?]
2899 # Assuming the metadata.xml file is next to the source file
2900 # Note: This will not work if it is using the inherited metadata from the parent folder
2901 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2902 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2903
2904 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode=override, means all values of metaname are affected
2905
2906 my $mess = "erase-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2907 $mess .= " $metaname";
2908 $mess .= " = $metavalue\n";
2909
2910 $gsdl_cgi->generate_ok_message($mess);
2911
2912 #return $status; # in case calling functions have a use for this
2913}
2914
2915sub erase_import_metadata
2916{
2917 my $self = shift @_;
2918
2919 my $username = $self->{'username'};
2920 my $collect = $self->{'collect'};
2921 my $gsdl_cgi = $self->{'gsdl_cgi'};
2922
2923 if ($baseaction::authentication_enabled) {
2924 # Ensure the user is allowed to edit this collection
2925 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2926 }
2927
2928 # Make sure the collection isn't locked by someone else
2929 $self->lock_collection($username, $collect);
2930
2931 $self->_erase_import_metadata(@_);
2932
2933 # Release the lock once it is done
2934 $self->unlock_collection($username, $collect);
2935
2936}
2937
2938# removes all meta with matching metaname for the specified docid from the live metadata index
2939sub _erase_live_metadata
2940{
2941 my $self = shift @_;
2942 # At present, _remove_live_metadata doesn't take metaval or metapos either and therefore has the desired effect
2943 $self->_remove_live_metadata(@_);
2944
2945}
2946
2947sub erase_live_metadata
2948{
2949 my $self = shift @_;
2950
2951 my $username = $self->{'username'};
2952 my $collect = $self->{'collect'};
2953 my $gsdl_cgi = $self->{'gsdl_cgi'};
2954
2955 if ($baseaction::authentication_enabled) {
2956 # Ensure the user is allowed to edit this collection
2957 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2958 }
2959
2960 # Make sure the collection isn't locked by someone else
2961 $self->lock_collection($username, $collect);
2962
2963 $self->_erase_live_metadata(@_);
2964
2965 $self->unlock_collection($username, $collect);
2966}
2967
2968
2969# Was trying to reused the codes, but the functions need to be broken
2970# down more before they can be reused, otherwise there will be too
2971# much overhead and duplicate process...
2972sub insert_metadata
2973{
2974 my $self = shift @_;
2975
2976 my $username = $self->{'username'};
2977 my $collect = $self->{'collect'};
2978 my $gsdl_cgi = $self->{'gsdl_cgi'};
2979 my $infodbtype = $self->{'infodbtype'};
2980
2981 # If the import metadata and gdbm database have been updated, we
2982 # need to insert some notification to warn user that the the text
2983 # they see at the moment is not indexed and require a rebuild.
2984 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2985
2986 if ($baseaction::authentication_enabled) {
2987 # Ensure the user is allowed to edit this collection
2988 $self->authenticate_user($username, $collect);
2989 }
2990
2991 # Obtain the collect and archive dir
2992 my $site = $self->{'site'};
2993 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2994 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2995
2996 # Make sure the collection isn't locked by someone else
2997 $self->lock_collection($username, $collect);
2998
2999 # Check additional args
3000 my $docid = $self->{'d'};
3001 if (!defined($docid) || ($docid =~ m/^\s*$/)) {
3002 $gsdl_cgi->generate_error("No document id is specified: d=...");
3003 }
3004 my $metaname = $self->{'metaname'};
3005 if (!defined($metaname) || ($metaname =~ m/^\s*$/)) {
3006 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3007 }
3008 my $metavalue = $self->{'metavalue'};
3009 if (!defined($metavalue) || ($metavalue =~ m/^\s*$/)) {
3010 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3011 }
3012 # make "accumulate" the default (less destructive, as it won't actually
3013 # delete any existing values)
3014 my $metamode = "accumulate";
3015
3016 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3017 my $metapos = undef;
3018 my $prevmetavalue = undef;
3019
3020 #=======================================================================#
3021 # set_import_metadata [START]
3022 #=======================================================================#
3023 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3024 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3025 my $metadata_xml_file;
3026 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3027 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3028
3029 # This now stores the full pathname
3030 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3031 $import_filename = &util::placeholders_to_abspath($import_filename);
3032
3033 # figure out correct metadata.xml file [?]
3034 # Assuming the metadata.xml file is next to the source file
3035 # Note: This will not work if it is using the inherited metadata from the parent folder
3036 my ($import_tailname, $import_dirname)
3037 = File::Basename::fileparse($import_filename);
3038 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3039
3040 # Shane's escape characters
3041 $metavalue = pack "U0C*", unpack "C*", $metavalue;
3042 $metavalue =~ s/\,/&#44;/g;
3043 $metavalue =~ s/\:/&#58;/g;
3044 $metavalue =~ s/\|/&#124;/g;
3045 $metavalue =~ s/\(/&#40;/g;
3046 $metavalue =~ s/\)/&#41;/g;
3047 $metavalue =~ s/\[/&#91;/g;
3048 $metavalue =~ s/\\/&#92;/g;
3049 $metavalue =~ s/\]/&#93;/g;
3050 $metavalue =~ s/\{/&#123;/g;
3051 $metavalue =~ s/\}/&#125;/g;
3052 $metavalue =~ s/\"/&#34;/g;
3053 $metavalue =~ s/\`/&#96;/g;
3054 $metavalue =~ s/\n/_newline_/g;
3055
3056 # Edit the metadata.xml
3057 # Modified by Jeffrey from DL Consulting
3058 # Handle the case where there is one metadata.xml file for multiple FileSets
3059 # The XML filter needs to know whether it is in the right FileSet
3060 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3061 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3062 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3063 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3064 #=======================================================================#
3065 # set_import_metadata [END]
3066 #=======================================================================#
3067
3068
3069 #=======================================================================#
3070 # set_metadata (accumulate version) [START]
3071 #=======================================================================#
3072 # To people who know $collect_tail please add some comments
3073 # Obtain path to the database
3074 my $collect_tail = $collect;
3075 $collect_tail =~ s/^.*[\/|\\]//;
3076 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3077 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3078
3079 # Read the docid entry
3080 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3081
3082 # Protect the quotes
3083 $metavalue =~ s/\"/\\\"/g;
3084
3085 # Adds the pending macro
3086 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3087
3088 # If the metadata doesn't exist, create a new one
3089 if (!defined($doc_rec->{$metaname})){
3090 $doc_rec->{$metaname} = [ $macro_metavalue ];
3091 }
3092 # Else, let's acculumate the values
3093 else {
3094 push(@{$doc_rec->{$metaname}},$macro_metavalue);
3095 }
3096
3097 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3098 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3099
3100 if ($status != 0) {
3101 # Catch error if gdbmget failed
3102 my $mess = "Failed to set metadata key: $docid\n";
3103
3104 $mess .= "PATH: $ENV{'PATH'}\n";
3105 $mess .= "Exit status: $status\n";
3106 $mess .= "System Error Message: $!\n";
3107
3108 $gsdl_cgi->generate_error($mess);
3109 }
3110 else {
3111 my $mess = "insert-metadata successful: Key[$docid]\n";
3112 $mess .= " [In metadata.xml] $metaname";
3113 $mess .= " = $metavalue\n";
3114 $mess .= " [In database] $metaname";
3115 $mess .= " = $macro_metavalue\n";
3116 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3117 $gsdl_cgi->generate_ok_message($mess);
3118 }
3119 #=======================================================================#
3120 # set_metadata (accumulate version) [END]
3121 #=======================================================================#
3122
3123 # Release the lock once it is done
3124 $self->unlock_collection($username, $collect);
3125}
3126
3127
3128
3129
3130sub inc_fldv_nminus1
3131{
3132 my $self = shift @_;
3133
3134 my $username = $self->{'username'};
3135 my $collect = $self->{'collect'};
3136 my $gsdl_cgi = $self->{'gsdl_cgi'};
3137 my $infodbtype = $self->{'infodbtype'};
3138
3139 # Obtain the collect dir
3140 my $site = $self->{'site'};
3141 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3142
3143 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
3144
3145 # look up additional args
3146 my $docid = $self->{'d'};
3147
3148
3149 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3150 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3151
3152 # This now stores the full pathname [is this still true??]
3153 my $doc_file = $doc_rec->{'doc-file'}->[0];
3154
3155 my ($unused_doc_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
3156 my $doc_full_dirname = &util::filename_cat($archive_dir,$doc_dirname);
3157
3158 my $fldv_full_dirname = &util::filename_cat($doc_full_dirname,$FLDV_HISTORY_DIR);
3159
3160 my $had_error = 0;
3161 my $had_error_mess = undef;
3162
3163 if (-d $fldv_full_dirname) {
3164 my $fldv_filtered_dirs = &FileUtils::readDirectoryFiltered($fldv_full_dirname,undef,"^nminus-\\d+\$");
3165
3166 my @sorted_fldv_filtered_dirs = sort {
3167 my ($a_num) = ($a =~ m/(\d+)$/);
3168 my ($b_num) = ($b =~ m/(\d+)$/);
3169
3170 # sort into descending order
3171 return $b_num <=> $a_num;
3172 } @$fldv_filtered_dirs;
3173
3174 ## shuffle all the nminus-<n> folders down by one
3175
3176 foreach my $nminus_n (@sorted_fldv_filtered_dirs) {
3177
3178 my $nminus_n_full_dir = &FileUtils::filenameConcatenate($fldv_full_dirname,$nminus_n);
3179 if (-d $nminus_n_full_dir) {
3180
3181 my ($n) = ($nminus_n =~ m/(\d+)$/);
3182 my $new_n = $n + 1;
3183
3184 my $new_full_dir_plus1 = &FileUtils::filenameConcatenate($fldv_full_dirname,"nminus-$new_n");
3185
3186 my $move_ok = &FileUtils::renameDirectory($nminus_n_full_dir,$new_full_dir_plus1,
3187 { 'strict' => 1 } );
3188
3189 if (!$move_ok) {
3190 $had_error_mess = "Error: Failed to move '$nminus_n' to nminus-$new_n in $fldv_full_dirname\n";
3191 $had_error = 1;
3192 last;
3193 }
3194
3195 }
3196 else {
3197 print STDERR "Warning: skipping $nminus_n_full_dir as it is not a directory\n";
3198 }
3199 }
3200 }
3201 else {
3202 # first time asked to perform the inc-nminus-1 action for this doc,
3203 # and there is not yet even a _fldv directory!
3204 # => make the directory (and that's all that's needed at this stage)
3205 print STDERR "First time file-level document-version history has been applied to this document\n";
3206 print STDERR "Creating $fldv_full_dirname\n";
3207
3208 &FileUtils::makeDirectory($fldv_full_dirname);
3209 }
3210
3211 if (!$had_error) {
3212 ### Now need to copy everything top level in doc dir into nminus-1
3213
3214 my $nminus_1_full_dir = &FileUtils::filenameConcatenate($fldv_full_dirname,"nminus-1");
3215
3216 my $copy_ok = &FileUtils::copyFilesRefRecursive([$doc_full_dirname],$nminus_1_full_dir,
3217 { 'strict' => 1, 'exclude_filter_re' => "^$FLDV_HISTORY_DIR\$" } );
3218
3219 if (!$copy_ok) {
3220 $had_error_mess = "Error: Failed to clone document $docid to form 'nminus-1' directory in $fldv_full_dirname";
3221 $had_error = 1;
3222 }
3223 }
3224
3225 if ($had_error) {
3226 $gsdl_cgi->generate_error($had_error_mess);
3227 }
3228 else {
3229 $gsdl_cgi->generate_ok_message("Successfully incremented the File-Level Document-Version History for $docid");
3230 }
3231}
3232
3233
3234# not returning 1; here since this file is conditionally included by metadataction.pm
3235# and not otherwise meant to be used on its own
3236
Note: See TracBrowser for help on using the repository browser.