source: main/trunk/greenstone2/perllib/cgiactions/modmetadataaction.pm

Last change on this file was 38431, checked in by anupama, 5 months ago
  1. For editing user comments with the document editor, I want to set metamode to override just once instead of for each docid (that is, for each docrecord). This didn't work before because the perl code didn't allow it as optional argument, although the actual set-metadata-array() perl subroutines were coded for it. 2. The Java code needs to allow an array of document records, without requiring that each docrecord contains a metatable field (a sub array of meta records). This is because the perl code set-metadata-array() subroutines allowed for this, but when set-metadata functionality started needing authentication/user editing powers (authentication goes through java, which then relays the command to perl), this pre-existing, optional way of setting metadata in the perl code wasn't opened up through java.
File size: 136.6 KB
Line 
1##########################################################################
2#
3# modmetadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This is a submodule of metadataaction.pm. It is loaded conditionally
27# upon having the permissions to modify (not just get) metadata.
28# With sufficient permissions, these additional subroutines should be made
29# available to metadataaction.pm
30
31# See http://www.perlmonks.org/?node_id=881761 for splitting module into multiple files
32# and how variables declared with 'our' are used there.
33
34package metadataaction;
35
36use strict;
37
38use cgiactions::baseaction;
39use cgiactions::metadataactioncommon;
40
41use dbutil;
42use ghtml;
43
44use JSON;
45
46# convenient to have shorter version around
47my $FLDV_HISTORY_DIR = $metadataactioncommon::FLDV_HISTORY_DIR;
48
49$metadataaction::modmeta_action_table = #OR: our $modmeta_action_table =
50{
51 #SET METHODS
52 "set-live-metadata" => {
53 'compulsory-args' => [ "d", "metaname", "metavalue" ],
54 'optional-args' => [ ] },
55
56 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
57 'compulsory-args' => [ "metaname", "metavalue" ],
58 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "dv", "f" ] },
59
60 "set-index-metadata" => {
61 'compulsory-args' => [ "d", "metaname", "metavalue" ],
62 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] },
63
64 "set-archives-metadata" => {
65 'compulsory-args' => [ "d", "metaname", "metavalue" ],
66 'optional-args' => [ "dv", "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
67
68 ###### !!!!!
69 # Should there not be a "set-archives-text" also ???!!!!??
70 ###### !!!!!
71
72 "set-archives-assocfile" => {
73 'compulsory-args' => [ "d", "assocname" ],
74 'optional-args' => [ "dv", "fileupload", "filedata" ] ,
75 'help-string' => [ "Because this action uploads a file, this action needs to be POSTed. The 'fileupload' parameter/field is the uploaded file content; 'assocname' specifies the filename within the archives's document's area that it will be saved as" ]
76 },
77
78 "set-import-metadata" => {
79 'compulsory-args' => [ "metaname", "metavalue" ],
80 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
81
82 #SET METHODS (ARRAY)
83 "set-metadata-array" => {
84 'compulsory-args' => [ "where", "json" ],
85 'optional-args' => [ "metamode" ],
86 'help-string' => [
87 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
88
89 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
90
91# The same examples rewritten for when running the metadata-server.pl script from the commandline:
92
93# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
94
95# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
96
97 "set-archives-metadata-array" => {
98 'compulsory-args' => [ "json" ],
99 'optional-args' => [ "metamode" ] },
100
101 "set-import-metadata-array" => {
102 'compulsory-args' => [ "json" ],
103 'optional-args' => [ "metamode" ] },
104
105 "set-index-metadata-array" => {
106 'compulsory-args' => [ "json" ],
107 'optional-args' => [ "metamode" ] },
108
109 "set-live-metadata-array" => {
110 'compulsory-args' => [ "json" ],
111 'optional-args' => [ ] },
112
113 #REMOVE METHODS
114 "remove-import-metadata" => {
115 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
116 'optional-args' => [ "metapos", "metavalue", "metamode" ] }, # only provide metapos arg for SIMPLE collections.
117# Metavalue is now an optional arg for remove_import_metadata() based on what the implementation did, which allowed metavalue to be undefined, and if so, used metapos.
118
119 "remove-archives-metadata" => {
120 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
121 'optional-args' => [ "dv", "metapos", "metavalue", "metamode" ] },
122
123 "remove-live-metadata" => {
124 'compulsory-args' => [ "d", "metaname" ],
125 'optional-args' => [ ] },
126
127 "remove-index-metadata" => {
128 'compulsory-args' => [ "d", "metaname" ],
129 'optional-args' => [ "metapos", "metavalue" ] },
130
131 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
132 'compulsory-args' => [ "d", "metaname" ],
133 'optional-args' => [ "where", "dv", "metapos", "metavalue", "metamode" ] }, # metamode is optional since remove-metadata can call any of remove_import_meta and remove_archives_meta, remove_index_meta, of which the first two accept metamode as an optional param
134
135 #REMOVE METHODS (ARRAY)
136 "remove-metadata-array" => {
137 'compulsory-args' => [ "where", "json" ],
138 'optional-args' => [ ],
139 'help-string' => [
140 "No remove-metadata-array examples yet."
141 ] },
142 "remove-archives-metadata-array" => {
143 'compulsory-args' => [ "json" ],
144 'optional-args' => [ ] },
145
146 "remove-import-metadata-array" => {
147 'compulsory-args' => [ "json" ],
148 'optional-args' => [ ] },
149
150 "remove-index-metadata-array" => {
151 'compulsory-args' => [ "json" ],
152 'optional-args' => [ ] },
153
154 "remove-live-metadata-array" => {
155 'compulsory-args' => [ "json" ],
156 'optional-args' => [ ] },
157
158 #REMOVE MULTIPLE (REMOVE ALL MATCHING METANAME AT DOCID)
159 "erase-import-metadata" => {
160 'compulsory-args' => [ "d", "metaname" ] },
161
162 "erase-archives-metadata" => {
163 'compulsory-args' => [ "d", "metaname" ],
164 'optional-args' => [ "dv" ] },
165
166 "erase-index-metadata" => {
167 'compulsory-args' => [ "d", "metaname" ] },
168
169 "erase-live-metadata" => {
170 'compulsory-args' => [ "d", "metaname" ] },
171
172 "erase-metadata" => { # generic erase-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
173 'compulsory-args' => [ "d", "metaname" ],
174 'optional-args' => [ "where" ] },
175
176
177 #INSERT METHODS
178 "insert-metadata" => {
179 'compulsory-args' => [ "d", "metaname", "metavalue" ],
180 'optional-args' => [ ] },
181
182
183 #INC METHODS
184 "inc-fldv-nminus1" => {
185 'compulsory-args' => [ "d" ],
186 'optional-args' => [ ] }
187};
188
189sub _set_live_metadata
190{
191 my $self = shift @_;
192
193 my $collect = $self->{'collect'};
194 my $gsdl_cgi = $self->{'gsdl_cgi'};
195 my $infodbtype = $self->{'infodbtype'};
196
197 # Obtain the collect dir
198 my $site = $self->{'site'};
199 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
200
201
202 # look up additional args
203 my $docid = $self->{'d'};
204 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
205 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
206 }
207 my $metavalue = $self->{'metavalue'};
208
209 # Generate the dbkey
210 my $metaname = $self->{'metaname'};
211 my $dbkey = "$docid.$metaname";
212
213 # To people who know $collect_tail please add some comments
214 # Obtain path to the database
215 my $collect_tail = $collect;
216 $collect_tail =~ s/^.*[\/|\\]//;
217 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
218 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
219
220 # Set the new value
221 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
222 my $status = system($cmd);
223 if ($status != 0) {
224 # Catch error if gdbmget failed
225 my $mess = "Failed to set metadata key: $dbkey\n";
226
227 $mess .= "PATH: $ENV{'PATH'}\n";
228 $mess .= "cmd = $cmd\n";
229 $mess .= "Exit status: $status\n";
230 $mess .= "System Error Message: $!\n";
231
232 $gsdl_cgi->generate_error($mess);
233 }
234 else {
235 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
236 }
237
238 #return $status; # in case calling functions have any further use for this
239}
240
241sub set_live_metadata
242{
243 my $self = shift @_;
244
245 my $username = $self->{'username'};
246 my $collect = $self->{'collect'};
247 my $gsdl_cgi = $self->{'gsdl_cgi'};
248
249 if ($baseaction::authentication_enabled) {
250 # Ensure the user is allowed to edit this collection
251 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
252 }
253
254 # Make sure the collection isn't locked by someone else
255 $self->lock_collection($username, $collect);
256
257 $self->_set_live_metadata(@_);
258
259 # Release the lock once it is done
260 $self->unlock_collection($username, $collect);
261}
262
263sub set_index_metadata_entry
264{
265 my $self = shift @_;
266 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
267
268 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
269 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
270
271 # To people who know $collect_tail please add some comments
272 # Obtain path to the database
273 my $collect_tail = $collect;
274 $collect_tail =~ s/^.*[\/|\\]//;
275 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
276 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
277
278# print STDERR "**** infodb file path = $infodb_file_path\n";
279# print STDERR "***** infodb type = $infodbtype\n";
280
281 # Read the docid entry
282 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
283
284 # Set the metadata value
285 if (defined $metapos) {
286 # if metamode=accumulate AND metapos, warn user and then use metapos
287 if (defined $metamode && $metamode eq "accumulate") {
288 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
289 print STDERR "**** Proceeding by using metapos\n";
290 }
291 $doc_rec->{$metaname}->[$metapos] = $metavalue;
292 }
293 elsif (defined $prevmetavalue) {
294 my $array = $doc_rec->{$metaname};
295 my $length = @$array;
296
297 my $found = 0;
298 for (my $i = 0; $i < $length; $i++){
299 if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
300 $doc_rec->{$metaname}->[$i] = $metavalue;
301 $found = 1;
302 last;
303 }
304 }
305
306 if($found == 0){
307 print STDERR "**** Warning: could not find $prevmetavalue to replace. Appending (accumulating) replacement value.\n";
308 ##$doc_rec->{$metaname} = [ $metavalue ]; # if prevmetavalue not found, don't overwrite all previous values.
309 # Accumulate is less destructive. But should the correct behaviour be to not do Anything?
310
311 if(defined $doc_rec->{$metaname}) {
312 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
313 } else {
314 $doc_rec->{$metaname} = [ $metavalue ];
315 }
316 }
317 }
318 elsif (defined $metamode && $metamode eq "override") {
319 $doc_rec->{$metaname} = [ $metavalue ];
320 }
321 else { # default for index was to override, but because accumulate is less destructive,
322 # and because accumulate is the default for archives and import, that's the new default for index too
323 if(defined $doc_rec->{$metaname}) {
324 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
325 } else {
326 $doc_rec->{$metaname} = [ $metavalue ];
327 }
328 }
329
330 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
331
332 return $status;
333
334}
335
336sub _set_import_metadata
337{
338 my $self = shift @_;
339
340 my $collect = $self->{'collect'};
341 my $gsdl_cgi = $self->{'gsdl_cgi'};
342 my $infodbtype = $self->{'infodbtype'};
343
344 # Obtain the collect and archive dir
345 my $site = $self->{'site'};
346 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
347 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
348
349 # look up additional args
350 # want either d= or f=
351 my $docid = $self->{'d'};
352 my ($docid_root,$docid_secnum);
353 if(defined $docid) {
354 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
355 # as yet no support for setting subsection metadata in metadata.xml
356 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
357 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
358 return;
359 }
360 }
361
362 my $import_file = $self->{'f'};
363 if ((!defined $docid || $docid =~ m/^\s*$/) && (!defined $import_file || $import_file =~ m/^\s*$/)) {
364 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified."); # at least d or f must be specified
365 }
366
367 # Get the parameters and set default mode to "accumulate"
368 my $metaname = $self->{'metaname'};
369 my $metavalue = $self->{'metavalue'};
370## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
371 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
372
373 my $metamode = $self->{'metamode'};
374 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
375 # make "accumulate" the default (less destructive, as it won't actually
376 # delete any existing values)
377 $metamode = "accumulate";
378 }
379
380 # adding metapos and prevmetavalue support to import_metadata subroutines
381 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
382 my $prevmetavalue = $self->{'prevmetavalue'};
383 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
384 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
385
386 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
387 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
388
389 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
390 $mess .= " $metaname";
391 $mess .= " = $metavalue";
392 $mess .= " ($metamode)\n";
393
394 $gsdl_cgi->generate_ok_message($mess);
395
396 #return $status; # in case calling functions have any further use for this
397}
398
399# the version of set_archives_meta that doesn't do authentication
400sub _set_archives_metadata
401{
402 my $self = shift @_;
403
404 my $collect = $self->{'collect'};
405 my $gsdl_cgi = $self->{'gsdl_cgi'};
406 my $infodbtype = $self->{'infodbtype'};
407
408 # Obtain the collect and archive dir
409 my $site = $self->{'site'};
410 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
411 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
412
413 # look up additional args
414 my $docid = $self->{'d'};
415 my $dv = $self->{'dv'};
416
417 my $metaname = $self->{'metaname'};
418 my $metavalue = $self->{'metavalue'};
419 my $prevmetavalue = $self->{'prevmetavalue'};
420
421 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
422 # Don't append "|| undef", since if metapos=0 it will then be set to undef
423
424 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
425 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
426
427 my $metamode = $self->{'metamode'};
428 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
429 # make "accumulate" the default (less destructive, as it won't actually
430 # delete any existing values)
431 $metamode = "accumulate";
432 }
433
434 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,
435 $docid,$dv, $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
436
437 if ($status == 0) {
438 my $mess = "set-archives-metadata successful: Key[$docid]\n";
439 $mess .= " $metaname";
440 $mess .= "->[$metapos]" if (defined $metapos);
441 $mess .= " = $metavalue";
442 $mess .= " ($metamode)\n";
443
444 $gsdl_cgi->generate_ok_message($mess);
445 }
446 else {
447 my $mess .= "Failed to set archives metadata key: $docid\n";
448 $mess .= "Exit status: $status\n";
449 if(defined $self->{'error_msg'}) {
450 $mess .= "Error Message: $self->{'error_msg'}\n";
451 } else {
452 $mess .= "System Error Message: $!\n";
453 }
454 $mess .= "-" x 20 . "\n";
455
456 $gsdl_cgi->generate_error($mess);
457 }
458
459 #return $status; # in case calling functions have any further use for this
460}
461
462
463# the version of set_index_meta that doesn't do authentication
464sub _set_index_metadata
465{
466 print STDERR "START SET INDEX METADATA\n";
467 my $self = shift @_;
468
469 my $collect = $self->{'collect'};
470 my $gsdl_cgi = $self->{'gsdl_cgi'};
471
472 # Obtain the collect dir
473 my $site = $self->{'site'};
474 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
475
476 # look up additional args
477 my $docid = $self->{'d'};
478 my $metaname = $self->{'metaname'};
479 my $metapos = $self->{'metapos'}; # undef has meaning
480 my $metavalue = $self->{'metavalue'};
481 my $infodbtype = $self->{'infodbtype'};
482 my $metamode = $self->{'metamode'};
483 my $prevmetavalue = $self->{'prevmetavalue'};
484
485 print STDERR "SETTING INDEX METADATA ENTRY\n";
486 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
487 print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
488 if ($status != 0) {
489 # Catch error if set infodb entry failed
490 my $mess = "Failed to set metadata key: $docid\n";
491
492 $mess .= "PATH: $ENV{'PATH'}\n";
493 $mess .= "Exit status: $status\n";
494 $mess .= "System Error Message: $!\n";
495
496 $gsdl_cgi->generate_error($mess);
497 }
498 else {
499 my $mess = "set-index-metadata successful: Key[$docid]\n";
500 $mess .= " $metaname";
501 $mess .= "->[$metapos]" if (defined $metapos);
502 $mess .= " = $metavalue\n";
503
504 $gsdl_cgi->generate_ok_message($mess);
505 }
506
507 print STDERR "END SET INDEX METADATA\n";
508 #return $status; # in case calling functions have any further use for this
509}
510
511sub set_index_metadata
512{
513 my $self = shift @_;
514
515 my $username = $self->{'username'};
516 my $collect = $self->{'collect'};
517 my $gsdl_cgi = $self->{'gsdl_cgi'};
518
519 if ($baseaction::authentication_enabled) {
520 # Ensure the user is allowed to edit this collection
521 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
522 }
523
524 # Obtain the collect dir
525 my $site = $self->{'site'};
526 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
527
528 $gsdl_cgi->checked_chdir($collect_dir);
529
530 # Make sure the collection isn't locked by someone else
531 $self->lock_collection($username, $collect);
532
533 $self->_set_index_metadata(@_);
534
535 # Release the lock once it is done
536 $self->unlock_collection($username, $collect);
537}
538
539# call this to set the metadata for a combination of dirs archives, import or index, or live
540# if none specified, defaults to index which was the original behaviour of set_metadata.
541sub set_metadata
542{
543 my $self = shift @_;
544
545 # Testing that not defining a variable, setting it to "" or to " " all return false
546 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
547
548 my $where = $self->{'where'};
549 if(!$where || ($where =~ m/^\s*$/)) {
550 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
551 return;
552 }
553
554 # authenticate and lock collection once, even if processing multiple dirs
555 my $username = $self->{'username'};
556 my $collect = $self->{'collect'};
557 my $gsdl_cgi = $self->{'gsdl_cgi'};
558
559 if ($baseaction::authentication_enabled) {
560 # Ensure the user is allowed to edit this collection
561 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
562 }
563
564 if($where =~ m/index/) {
565 my $site = $self->{'site'};
566 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
567 $gsdl_cgi->checked_chdir($collect_dir);
568 }
569
570 # Make sure the collection isn't locked by someone else
571 $self->lock_collection($username, $collect);
572
573
574 # now at last can set the metadata. $where can specify multiple
575 # $where is of the form: import|archives|index, or a subset thereof
576
577 #my @whichdirs = split('\|', $where);
578
579 # just check whether $where contains import/archives/index/live in turn, and
580 # for each case, process it accordingly
581 if($where =~ m/import/) {
582 $self->_set_import_metadata(@_);
583 }
584
585 if($where =~ m/archives/) {
586
587 # look up docID arg which is optional to set_metadata because it's optional
588 # to set_import, but which is compulsory to set_archives_metadata
589 my $docid = $self->{'d'};
590 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
591 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
592 }
593 # we have a docid, so can set archives meta
594 $self->_set_archives_metadata(@_);
595 }
596
597 if($where =~ m/index/) {
598
599 # look up docID arg which is optional to set_metadata because it's optional
600 # to set_import, but which is compulsory to set_archives_metadata
601 my $docid = $self->{'d'};
602 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
603 $gsdl_cgi->generate_error("No docid (d=...) specified.");
604 }
605 # we have a docid, so can set index meta
606 $self->_set_index_metadata(@_);
607 }
608
609 if($where =~ m/live/) {
610 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
611 }
612
613 # Release the lock once it is done
614 $self->unlock_collection($username, $collect);
615}
616
617sub set_metadata_array
618{
619 my $self = shift @_;
620
621 my $where = $self->{'where'};
622 if(!$where || ($where =~ m/^\s*$/)) {
623 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
624 return;
625 }
626
627 my $username = $self->{'username'};
628 my $collect = $self->{'collect'};
629 my $gsdl_cgi = $self->{'gsdl_cgi'};
630
631 if ($baseaction::authentication_enabled) {
632 # Ensure the user is allowed to edit this collection
633 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
634 }
635
636 # Not sure if the checked_chdir into the collect directory is necessary,
637 # since lock_collection does a further chdir into the collection directory anyway
638 # But including the stmt during this code reorganisation to preserve as-is what used to happen
639 my $site = $self->{'site'};
640 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
641 $gsdl_cgi->checked_chdir($collect_dir);
642
643 # Make sure the collection isn't locked by someone else
644 $self->lock_collection($username, $collect);
645
646 if($where =~ m/import/) {
647 $self->_set_import_metadata_array(@_);
648 }
649 if($where =~ m/archives/) {
650 $self->_set_archives_metadata_array(@_);
651 }
652 if($where =~ m/index/) {
653 $self->_set_index_metadata_array(@_);
654 }
655 if($where =~ m/live/) {
656 $self->_set_live_metadata_array(@_);
657 }
658
659 # Release the lock once it is done
660 $self->unlock_collection($username, $collect);
661}
662
663sub _set_index_metadata_array
664{
665 my $self = shift @_;
666
667 my $collect = $self->{'collect'};
668 my $gsdl_cgi = $self->{'gsdl_cgi'};
669
670 # Obtain the collect dir
671 my $site = $self->{'site'};
672 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
673
674 # look up additional args
675
676 my $infodbtype = $self->{'infodbtype'};
677
678 my $json_str = $self->{'json'};
679 my $doc_array = decode_json $json_str;
680
681
682 my $global_status = 0;
683 my $global_mess = "";
684
685 my @all_docids = ();
686
687 foreach my $doc_array_rec ( @$doc_array ) {
688
689 my $status = -1;
690 my $docid = $doc_array_rec->{'docid'};
691
692 push(@all_docids,$docid);
693
694 my $metaname = $doc_array_rec->{'metaname'};
695 if(defined $metaname) {
696 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
697 my $metavalue = $doc_array_rec->{'metavalue'};
698 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
699
700 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
701 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
702 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
703
704 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
705 $metaname = $metatable_rec->{'metaname'};
706 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
707 my $metapos = undef;
708 my $prevmetaval = undef;
709 if(defined $metatable_rec->{'metapos'}) {
710 $metapos = $metatable_rec->{'metapos'};
711 my $metavalue = $metatable_rec->{'metavalue'};
712 $prevmetaval = $metatable_rec->{'prevmetavalue'};
713 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetaval);
714 } else {
715 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
716
717 foreach my $metavalue ( @$metavals ) { # metavals is an array
718 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetaval); # how do we use metamode in set_meta_entry?
719 if($metamode eq "override") { # now, having overridden the metavalue for the first,
720 # need to accumulate subsequent metavals for this metaname, else the just-assigned
721 # metavalue for this metaname will be lost
722 $metamode = "accumulate";
723 }
724 }
725 }
726 }
727 }
728
729 if ($status != 0) {
730 # Catch error if set infodb entry failed
731 $global_status = $status;
732 $global_mess .= "Failed to set metadata key: $docid\n";
733 $global_mess .= "Exit status: $status\n";
734 $global_mess .= "System Error Message: $!\n";
735 $global_mess .= "-" x 20;
736 }
737 }
738
739 if ($global_status != 0) {
740 $global_mess .= "PATH: $ENV{'PATH'}\n";
741 $gsdl_cgi->generate_error($global_mess);
742 }
743 else {
744 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
745 $gsdl_cgi->generate_ok_message($mess);
746 }
747}
748
749sub set_index_metadata_array
750{
751 my $self = shift @_;
752
753 my $username = $self->{'username'};
754 my $collect = $self->{'collect'};
755 my $gsdl_cgi = $self->{'gsdl_cgi'};
756
757 if ($baseaction::authentication_enabled) {
758 # Ensure the user is allowed to edit this collection
759 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
760 }
761
762 # Obtain the collect dir
763 my $site = $self->{'site'};
764 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
765
766 $gsdl_cgi->checked_chdir($collect_dir);
767
768
769 # Make sure the collection isn't locked by someone else
770 $self->lock_collection($username, $collect);
771
772 $self->_set_index_metadata_array(@_);
773
774 # Release the lock once it is done
775 $self->unlock_collection($username, $collect);
776}
777
778# experimental, newly added in and untested
779sub _set_live_metadata_array
780{
781 my $self = shift @_;
782
783 my $collect = $self->{'collect'};
784 my $gsdl_cgi = $self->{'gsdl_cgi'};
785
786 # Obtain the collect dir
787 my $site = $self->{'site'};
788 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
789
790 # look up additional args
791 my $infodbtype = $self->{'infodbtype'};
792 # To people who know $collect_tail please add some comments
793 # Obtain path to the database
794 my $collect_tail = $collect;
795 $collect_tail =~ s/^.*[\/|\\]//;
796 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
797 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
798
799
800 my $json_str = $self->{'json'};
801 my $doc_array = decode_json $json_str;
802
803
804 my $global_status = 0;
805 my $global_mess = "";
806
807 my @all_docids = ();
808
809
810 foreach my $doc_array_rec ( @$doc_array ) {
811
812 my $status = -1;
813 my $docid = $doc_array_rec->{'docid'};
814
815 push(@all_docids,$docid);
816
817 my $metaname = $doc_array_rec->{'metaname'};
818 if(defined $metaname) {
819 my $dbkey = "$docid.$metaname";
820 my $metavalue = $doc_array_rec->{'metavalue'};
821
822 # Set the new value
823 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
824 $status = system($cmd);
825
826 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
827 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
828 foreach my $metatable_rec ( @$metatable ) {
829 $metaname = $metatable_rec->{'metaname'};
830 my $dbkey = "$docid.$metaname";
831
832 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
833 foreach my $metavalue ( @$metavals ) {
834 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
835 $status = system($cmd);
836 }
837 }
838
839 }
840
841 if ($status != 0) {
842 # Catch error if gdbmget failed
843 $global_status = $status;
844 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
845 $global_mess .= "Exit status: $status\n";
846 $global_mess .= "System Error Message: $!\n";
847 $global_mess .= "-" x 20;
848 }
849 }
850
851 if ($global_status != 0) {
852 $global_mess .= "PATH: $ENV{'PATH'}\n";
853 $gsdl_cgi->generate_error($global_mess);
854 }
855 else {
856 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
857 $gsdl_cgi->generate_ok_message($mess);
858 }
859}
860
861sub set_live_metadata_array
862{
863 my $self = shift @_;
864
865 my $username = $self->{'username'};
866 my $collect = $self->{'collect'};
867 my $gsdl_cgi = $self->{'gsdl_cgi'};
868
869 if ($baseaction::authentication_enabled) {
870 # Ensure the user is allowed to edit this collection
871 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
872 }
873
874 # Obtain the collect dir
875 my $site = $self->{'site'};
876 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
877
878 $gsdl_cgi->checked_chdir($collect_dir);
879
880 # Make sure the collection isn't locked by someone else
881 $self->lock_collection($username, $collect);
882
883 $self->_set_live_metadata_array(@_);
884
885 # Release the lock once it is done
886 $self->unlock_collection($username, $collect);
887}
888
889
890sub dxml_metadata
891{
892 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
893 my $metaname = $parser->{'parameters'}->{'metaname'};
894 my $metamode = $parser->{'parameters'}->{'metamode'};
895
896###!!! print STDERR "**** Processing closing </Metadata> tag\n";
897
898 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
899
900 # Find the right metadata tag and checks if we are going to
901 # override it
902 #
903 # Note: This over writes the first metadata block it
904 # encountered. If there are multiple Sections in the doc.xml, it
905 # might not behave as you would expect
906
907 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
908## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n" if (defined $opt_doc_secnum);
909## print STDERR "**** metamode = $metamode\n";
910
911 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
912 {
913 my $name_attr = $attrHash->{'name'};
914 # print STDOUT "*** testing: $name_attr eq $metaname ? and $metamode eq override ?\n";
915 if (($name_attr eq $metaname) && ($metamode eq "override"))
916 {
917 if (!defined $parser->{'parameters'}->{'poscount'})
918 {
919 $parser->{'parameters'}->{'poscount'} = 0;
920 }
921 else
922 {
923 $parser->{'parameters'}->{'poscount'}++;
924 }
925
926 if (defined $parser->{'parameters'}->{'metapos'}) { # if pos defined, ignore any prevmeta if also defined, and just wait for pos match
927
928 if ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}) {
929 ###print STDERR "#### got pos match!!\n";
930 # Get the value and override the current value
931 my $metavalue = $parser->{'parameters'}->{'metavalue'};
932 $attrHash->{'_content'} = $metavalue;
933
934 # until we have clearly determined the role of metamode in doc.xml files (we know what it means in import/metadata.xml files)
935 # we're not setting metamode to override here.
936
937 # Don't want it to wipe out any other pieces of metadata
938 $parser->{'parameters'}->{'metamode'} = "done";
939 }
940 }
941 elsif (defined $parser->{'parameters'}->{'prevmetavalue'}) { # if no pos defined but prevmeta defined, then wait for prevmeta match
942 if ($parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'}) {
943 ###print STDERR "### prev meta value matches\n";
944 my $metavalue = $parser->{'parameters'}->{'metavalue'};
945 $attrHash->{'_content'} = $metavalue;
946
947 # until we have clearly determined the role of metamode in doc.xml files (we know what it means in import/metadata.xml files)
948 # we're not setting metamode to override here.
949
950 $parser->{'parameters'}->{'metamode'} = "done";
951
952 }
953 }
954 # Note that before commit rev 32076, if neither metapos nor prevmeta defined, then code tried to set pos 0 for that metaname to the new value
955 # However that is not how the code works now (and couldn't get it to work that way yet after rev 32076).
956 # What it does now, and what Dr Bainbridge approved of is that:
957 # If no pos or prevmeta defined, then on override, the metavalue set for the specificed metaname becomes the ONLY instance of that metaname
958 # So any earlier instances of that metaname is removed and replaced by the single new instance.
959 }
960 }
961
962 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
963 return [$tagname => $attrHash];
964}
965
966# This method exists purely for catching invalid section numbers that the client
967# requested to edit. Once the parser has reached the end (the final </Archive> tag),
968# we've seen all the Sections in the doc.xml, and none of their section nums matched
969# if the metamode has not been set to 'done' by then.
970sub dxml_archive
971{
972 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
973 my $metamode = $parser->{'parameters'}->{'metamode'};
974
975 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
976 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
977
978 # print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
979
980 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
981 #print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
982
983 my $metaname = $parser->{'parameters'}->{'metaname'};
984 my $metavalue = $parser->{'parameters'}->{'metavalue'};
985
986 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
987 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
988 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
989 print STDERR "\tin section $opt_doc_secnum.\n";
990 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
991 }
992
993 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
994 return [$tagname => $attrHash];
995}
996
997sub dxml_description
998{
999 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1000 my $metamode = $parser->{'parameters'}->{'metamode'};
1001
1002 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1003 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1004
1005###!!! print STDERR "**** Processing a closing </Description> tag \n";
1006 # print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1007
1008 # Accumulate the metadata
1009
1010 # We'll be accumulating metadata at this point if we haven't found and therefore
1011 # haven't processed the metadata yet.
1012 # For subsections, this means that if we're at a matching subsection, but haven't
1013 # found the correct metaname to override in that subsection, we accumulate it as new
1014 # meta in the subsection by adding it to the current description.
1015 # If there's no subsection info for the metadata, it will accumulate at the top level
1016 # section description if we hadn't found a matching metaname to override at this point.
1017
1018 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1019 # at the toplevel section. In the latter case, it means we want to process meta in the
1020 # toplevel section. So the eq check between the values below will work in all cases.
1021
1022 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1023 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1024 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1025
1026 if ($curr_secnum eq $opt_doc_secnum
1027 && ($metamode eq "accumulate" || $metamode eq "override")) {
1028 # note, metamode would have been set to done if a metadata operation had already succeeded.
1029 # If we get here, we still have the metadata to append.
1030 if ($metamode eq "override") {
1031 print "Got to end of <Description> block. No metadata value to override. Switching 'metamode' to accumulate\n";
1032 }
1033
1034 # If we get to here and metamode is override, this means there
1035 # was no existing value to overide => treat as an append operation
1036
1037 # Tack a new metadata tag on to the end of the <Metadata>+ block
1038 my $metaname = $parser->{'parameters'}->{'metaname'};
1039 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1040
1041 my $metadata_attr = {
1042 '_content' => $metavalue,
1043 'name' => $metaname,
1044 'mode' => "accumulate"
1045 };
1046
1047 my $append_metadata = [ "Metadata" => $metadata_attr ];
1048 my $description_content = $attrHash->{'_content'};
1049 print "Appending metadata to doc.xml\n";
1050
1051 if (ref($description_content)) {
1052 # got some existing interesting nested content
1053 push(@$description_content, " ", $append_metadata, "\n ");
1054 }
1055 else {
1056 #description_content is most likely a string such as "\n"
1057 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1058 }
1059
1060 $parser->{'parameters'}->{'metamode'} = "done";
1061 }
1062 else {
1063 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1064## print STDERR "**** NOT ACCUMULATE?!? \n";
1065 }
1066 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1067 return [$tagname => $attrHash];
1068}
1069
1070sub edit_xml_file
1071{
1072 my $self = shift @_;
1073 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1074
1075 # use XML::Rules to add it in (read in and out again)
1076 my $parser = XML::Rules->new(start_rules => $start_rules,
1077 rules => $rules,
1078 style => 'filter',
1079 output_encoding => 'utf8' );
1080
1081 my $xml_in = "";
1082 if (!open(MIN,"<$filename")) {
1083 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1084 }
1085 else {
1086 # Read all the text in
1087 my $line;
1088 while (defined ($line=<MIN>)) {
1089 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1090 }
1091 close(MIN);
1092
1093 my $MOUT;
1094 if (!open($MOUT,">$filename")) {
1095 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1096 }
1097 else {
1098 # Matched lines will get handled by the call backs
1099## my $xml_out = "";
1100
1101 binmode($MOUT,":utf8");
1102 $parser->filter($xml_in,$MOUT, $options);
1103
1104# binmode(MOUT,":utf8");
1105# print MOUT $xml_out;
1106 close($MOUT);
1107 }
1108 }
1109
1110 # copy across any custom error information that was stored during parsing
1111 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1112}
1113
1114sub edit_doc_xml
1115{
1116 my $self = shift @_;
1117 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1118
1119 my $info_mess = <<RAWEND;
1120****************************
1121 edit_doc_xml()
1122****************************
1123RAWEND
1124
1125 $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1126 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1127 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1128 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1129 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1130 $info_mess .= " opt_secnum = $opt_secnum\n" if defined($opt_secnum);
1131 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1132
1133 $info_mess .= "****************************\n";
1134
1135 $gsdl_cgi->generate_message($info_mess);
1136
1137 # To monitor which section/subsection number we are in
1138 my @start_rules =
1139 ( 'Section' => \&dxml_start_section );
1140
1141 # use XML::Rules to add it in (read in and out again)
1142 # Set the call back functions
1143 my @rules =
1144 ( _default => 'raw',
1145 'Metadata' => \&dxml_metadata,
1146 'Description' => \&dxml_description,
1147 'Archive' => \&dxml_archive); # just for catching errors at end
1148
1149 # Sets the parameters
1150 my $options = { 'metaname' => $metaname,
1151 'metapos' => $metapos,
1152 'metavalue' => $metavalue,
1153 'metamode' => $metamode,
1154 'prevmetavalue' => $prevmetavalue };
1155
1156 if (defined $opt_secnum) {
1157 $options->{'secnum'} = $opt_secnum;
1158 }
1159
1160 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1161}
1162
1163sub set_archives_metadata_entry
1164{
1165 my $self = shift @_;
1166 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $dv, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1167
1168 my $info_mess = <<RAWEND;
1169****************************
1170 set_archives_metadata_entry()
1171****************************
1172RAWEND
1173
1174 $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1175 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1176 $info_mess .= " collect = $collect\n" if defined($collect);
1177 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
1178 $info_mess .= " docid = $docid\n" if defined($docid);
1179 $info_mess .= " dv = $dv\n" if defined($dv);
1180 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1181 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1182 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1183 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1184 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1185
1186 $info_mess .= "****************************\n";
1187
1188 $gsdl_cgi->generate_message($info_mess);
1189
1190 # Obtain the doc.xml path for the specified docID
1191 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1192
1193 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1194 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1195 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1196
1197 # check if request if for file-level doc-version history 'nminus-<n>' version
1198 if (defined $dv && ($dv ne "")) {
1199 # Need to insert '_fldv_history/nminus-<n>' into doc_filename
1200
1201 my ($doc_xml_tailname, $doc_xml_dirname) = File::Basename::fileparse($doc_xml_file);
1202 $doc_xml_file = &util::filename_cat($doc_xml_dirname,$FLDV_HISTORY_DIR,$dv,$doc_xml_tailname);
1203 }
1204
1205 # The $doc_xml_file is relative to the archives, and now let's get the full path
1206 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1207
1208 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1209
1210 # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
1211 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1212 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1213 if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
1214 print "override mode with no pos or prev value set. Removing all existing values for $metaname, and setting mode to accumulate for the new value.\n";
1215 # remove all values of $metaname metadata
1216 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
1217 $metamode = "accumulate";
1218 }
1219 # Edit the doc.xml file with the specified metadata name, value and position.
1220 # TODO: there is a potential problem here as this edit_doc_xml function
1221 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1222 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1223
1224 # dxml_metadata method ignores metapos if metamode anything other than override
1225 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1226 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1227
1228 # return 0; # return 0 for now to indicate no error
1229 return (defined $self->{'error_msg'}) ? 1 : 0;
1230}
1231
1232
1233sub set_archives_metadata
1234{
1235 my $self = shift @_;
1236
1237 my $username = $self->{'username'};
1238 my $collect = $self->{'collect'};
1239 my $gsdl_cgi = $self->{'gsdl_cgi'};
1240
1241 if ($baseaction::authentication_enabled) {
1242 # Ensure the user is allowed to edit this collection
1243 $self->authenticate_user($username, $collect);
1244 }
1245
1246 # Make sure the collection isn't locked by someone else
1247 $self->lock_collection($username, $collect);
1248
1249 $self->_set_archives_metadata(@_);
1250
1251 # Release the lock once it is done
1252 $self->unlock_collection($username, $collect);
1253}
1254
1255sub _set_archives_metadata_array
1256{
1257 my $self = shift @_;
1258
1259 my $collect = $self->{'collect'};
1260 my $gsdl_cgi = $self->{'gsdl_cgi'};
1261
1262 # Obtain the collect dir
1263 my $site = $self->{'site'};
1264 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1265
1266 # look up additional args
1267
1268 my $infodbtype = $self->{'infodbtype'};
1269
1270 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1271
1272 my $json_str = $self->{'json'};
1273 my $doc_array = decode_json $json_str;
1274
1275
1276 my $global_status = 0;
1277 my $global_mess = "";
1278
1279 my @all_docids = ();
1280
1281 foreach my $doc_array_rec ( @$doc_array ) {
1282 my $status = -1;
1283 my $docid = $doc_array_rec->{'docid'};
1284 my $dv = $doc_array_rec->{'dv'} || undef;
1285
1286 push(@all_docids,$docid);
1287
1288 my $metaname = $doc_array_rec->{'metaname'};
1289 if(defined $metaname) {
1290
1291 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1292
1293 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1294 my $metavalue = $doc_array_rec->{'metavalue'};
1295 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1296
1297
1298 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1299 # make "accumulate" the default (less destructive, as it won't actually
1300 # delete any existing values)
1301 $metamode = "accumulate";
1302 }
1303
1304 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,
1305 $docid,$dv, $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1306 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1307 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1308
1309 foreach my $metatable_rec ( @$metatable ) {
1310 $metaname = $metatable_rec->{'metaname'};
1311 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1312 my $metapos = undef;
1313 my $prevmetavalue = undef;
1314 if(defined $metatable_rec->{'metapos'}) {
1315 $metapos = $metatable_rec->{'metapos'};
1316 $prevmetavalue = $metatable_rec->{'prevmetavalue'}; # or still undef
1317 my $metavalue = $metatable_rec->{'metavalue'};
1318 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir,
1319 $collect_dir,$collect,$infodbtype,
1320 $docid,$dv, $metaname,$metapos,
1321 $metavalue,$metamode,$prevmetavalue);
1322 } else {
1323 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1324
1325 foreach my $metavalue ( @$metavals ) {
1326 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1327 $docid,$dv, $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1328
1329 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1330 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1331 # metavalue for this metaname will be lost
1332 $metamode = "accumulate";
1333 }
1334 }
1335 }
1336 }
1337 }
1338
1339 if ($status != 0) {
1340 # Catch error if set infodb entry failed
1341 $global_status = $status;
1342 $global_mess .= "Failed to set metadata key: $docid\n";
1343 $global_mess .= "Exit status: $status\n";
1344 $global_mess .= "System Error Message: $!\n";
1345 $global_mess .= "-" x 20 . "\n";
1346 }
1347 }
1348
1349 if ($global_status != 0) {
1350 $global_mess .= "PATH: $ENV{'PATH'}\n";
1351 $gsdl_cgi->generate_error($global_mess);
1352 }
1353 else {
1354 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1355 $gsdl_cgi->generate_ok_message($mess);
1356 }
1357}
1358
1359sub set_archives_metadata_array
1360{
1361 my $self = shift @_;
1362
1363 my $username = $self->{'username'};
1364 my $collect = $self->{'collect'};
1365 my $gsdl_cgi = $self->{'gsdl_cgi'};
1366
1367 if ($baseaction::authentication_enabled) {
1368 # Ensure the user is allowed to edit this collection
1369 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1370 }
1371
1372 # Obtain the collect dir
1373 my $site = $self->{'site'};
1374 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1375
1376 $gsdl_cgi->checked_chdir($collect_dir);
1377
1378 # Make sure the collection isn't locked by someone else
1379 $self->lock_collection($username, $collect);
1380
1381 $self->_set_archives_metadata_array(@_);
1382
1383 # Release the lock once it is done
1384 $self->unlock_collection($username, $collect);
1385}
1386
1387
1388
1389# the version of set_archives_meta that doesn't do authentication
1390sub _set_archives_assocfile
1391{
1392 my $self = shift @_;
1393
1394 my $collect = $self->{'collect'};
1395 my $gsdl_cgi = $self->{'gsdl_cgi'};
1396 my $infodbtype = $self->{'infodbtype'};
1397
1398 # Obtain the collect and archive dir
1399 my $site = $self->{'site'};
1400 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1401 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1402
1403 # look up additional args
1404 my $docid = $self->{'d'};
1405
1406 my $assocname = $self->{'assocname'};
1407
1408 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1409 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1410
1411 my $doc_file = $doc_rec->{'doc-file'}->[0];
1412
1413 # check if request if for file-level doc-version history 'nminus-<n>' version
1414 my $dv = $self->{'dv'};
1415
1416 my ($unused_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
1417
1418 my $output_file = $assocname;
1419
1420 if (defined $dv && ($dv ne "")) {
1421 # Need to insert '_fldv_history/nminus-<n>' into output_file
1422
1423 $output_file = &util::filename_cat($doc_dirname,$FLDV_HISTORY_DIR,$dv,$output_file);
1424 }
1425 else {
1426 $output_file = &util::filename_cat($doc_dirname,$output_file);
1427 }
1428
1429 my $output_filename = &util::filename_cat($archive_dir, $output_file);
1430
1431 my $filedata = $self->{'filedata'};
1432 my $FIN = $self->{'fileupload'};
1433
1434 if ((!defined $filedata) && (!defined $FIN)) {
1435 $gsdl_cgi->generate_error("set-archives-assocfile: either 'fileupload' or 'filedata' must be set");
1436 }
1437 else {
1438
1439 my $had_error = 0;
1440 my $had_error_mess = undef;
1441
1442 if (defined $filedata) {
1443 if (open(FOUT,">$output_filename")) {
1444 print FOUT $filedata;
1445 close(FOUT);
1446 }
1447 else {
1448 $had_error = 1;
1449 $had_error_mess = "Failed to save filedata to: $output_filename";
1450 print STDERR "Error - $had_error_mess:\n$!\n";
1451 }
1452 }
1453 else {
1454 my $FIN = $self->{'fileupload'};
1455 if (open(FOUT,">$output_filename")) {
1456 binmode(FOUT, ":raw");
1457
1458 while (1) {
1459 my $buffer = "";
1460 my $bytes_read = read($FIN, $buffer, 1024);
1461
1462 if (defined $bytes_read) {
1463 if ($bytes_read>0) {
1464 print FOUT $buffer;
1465 }
1466 last if $bytes_read < 1024;
1467 }
1468 else {
1469 $had_error = 1;
1470 $had_error_mess = "set-archives-assocfile: Failed to open uploaded file for reading: $!\n";
1471 print STDERR "Error - $had_error_mess";
1472 last;
1473 }
1474 }
1475
1476 close($FIN);
1477 close(FOUT);
1478 }
1479 else {
1480 $had_error = 1;
1481 $had_error_mess = "Failed to save file uploaded file as: $output_filename";
1482 print STDERR "Error - $had_error_mess:\n$!\n";
1483 }
1484 }
1485
1486 if (!$had_error) {
1487 my $mess = "set-archives-assocfile successful save uploaded content into 'archives' as: $output_file\n";
1488 $gsdl_cgi->generate_ok_message($mess);
1489 }
1490 else {
1491 $gsdl_cgi->generate_error($had_error_mess);
1492 }
1493 }
1494}
1495
1496
1497sub set_archives_assocfile
1498{
1499 my $self = shift @_;
1500
1501 my $username = $self->{'username'};
1502 my $collect = $self->{'collect'};
1503 my $gsdl_cgi = $self->{'gsdl_cgi'};
1504
1505 if ($baseaction::authentication_enabled) {
1506 # Ensure the user is allowed to edit this collection
1507 $self->authenticate_user($username, $collect);
1508 }
1509
1510
1511 # Make sure the collection isn't locked by someone else
1512 $self->lock_collection($username, $collect);
1513
1514 $self->_set_archives_assocfile(@_);
1515
1516 # Release the lock once it is done
1517 $self->unlock_collection($username, $collect);
1518}
1519
1520
1521sub _remove_archives_metadata
1522{
1523 my $self = shift @_;
1524
1525 my $collect = $self->{'collect'};
1526 my $gsdl_cgi = $self->{'gsdl_cgi'};
1527 my $infodbtype = $self->{'infodbtype'};
1528
1529
1530 # Obtain the collect and archive dir
1531 my $site = $self->{'site'};
1532 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1533
1534 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1535
1536 # look up additional args
1537 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1538
1539 my $metaname = $self->{'metaname'};
1540 my $metapos = $self->{'metapos'};
1541 my $metavalue = $self->{'metavalue'};
1542
1543 my $metamode = $self->{'metamode'};
1544
1545 my $dv = $self->{'dv'};
1546
1547 my $status = $self->remove_archives_metadata_entry($gsdl_cgi,$archive_dir, $infodbtype,
1548 $docid,$docid_secnum,$dv, $metaname,$metapos,$metavalue,$metamode);
1549
1550 if ($status == 0)
1551 {
1552 my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
1553 $mess .= " $metaname";
1554 $mess .= "->[$metapos]" if (defined $metapos);
1555 $mess .= " ($metavalue)" if (defined $metavalue);
1556 $gsdl_cgi->generate_ok_message($mess);
1557 }
1558 else
1559 {
1560 my $mess .= "Failed to remove archives metadata key: $docid\n";
1561 $mess .= "Exit status: $status\n";
1562 $mess .= "System Error Message: $!\n";
1563 $mess .= "-" x 20 . "\n";
1564
1565 $gsdl_cgi->generate_error($mess);
1566 }
1567
1568 #return $status; # in case calling functions have a use for this
1569}
1570
1571sub remove_archives_metadata_entry
1572{
1573 my $self = shift @_;
1574 my ($gsdl_cgi, $archive_dir, $infodbtype, $docid, $docid_secnum, $dv, $metaname, $metapos, $metavalue, $metamode) = @_;
1575
1576 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1577 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
1578
1579 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1580 # default to deleting the first metavalue for the given metaname
1581 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1582 # seem to get deleted in one single remove_archives_meta action invocation.
1583 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1584 # deleted, both get deleted.
1585 if(!defined $metapos && !defined $metavalue) {
1586 $metapos = 0;
1587 }
1588
1589 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
1590
1591 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1592 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1593
1594 # This now stores the full pathname
1595 my $doc_file = $doc_rec->{'doc-file'}->[0];
1596
1597 # check if request if for file-level doc-version history 'nminus-<n>' version
1598 if (defined $dv && ($dv ne "")) {
1599 # Need to insert '_fldv_history/nminus-<n>' into doc_filename
1600
1601 my ($doc_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
1602 $doc_file = &util::filename_cat($doc_dirname,$FLDV_HISTORY_DIR,$dv,$doc_tailname);
1603 }
1604
1605 my $doc_filename = &util::filename_cat($archive_dir, $doc_file);
1606
1607 my $status = $self->remove_from_doc_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1608# my $status = $self->remove_from_doc_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, undef, $docid_secnum);
1609
1610 return $status;
1611}
1612
1613sub remove_archives_metadata
1614{
1615 my $self = shift @_;
1616
1617 my $username = $self->{'username'};
1618 my $collect = $self->{'collect'};
1619 my $gsdl_cgi = $self->{'gsdl_cgi'};
1620
1621 if ($baseaction::authentication_enabled)
1622 {
1623 # Ensure the user is allowed to edit this collection
1624 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1625 }
1626
1627 # Make sure the collection isn't locked by someone else
1628 $self->lock_collection($username, $collect);
1629
1630 $self->_remove_archives_metadata(@_);
1631
1632 # Release the lock once it is done
1633 $self->unlock_collection($username, $collect);
1634}
1635
1636sub remove_from_doc_xml
1637{
1638 my $self = shift @_;
1639 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1640
1641 my @start_rules = ('Section' => \&dxml_start_section);
1642
1643 # Set the call-back functions for the metadata tags
1644 my @rules =
1645 (
1646 _default => 'raw',
1647 'Metadata' => \&rfdxml_metadata
1648 );
1649
1650 my $parser = XML::Rules->new
1651 (
1652 start_rules => \@start_rules,
1653 rules => \@rules,
1654 style => 'filter',
1655 output_encoding => 'utf8',
1656# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1657 stripspaces => 2|0|0 # ineffectual
1658 );
1659
1660 my $status = 0;
1661 my $xml_in = "";
1662 if (!open(MIN,"<$doc_xml_filename"))
1663 {
1664 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1665 $status = 1;
1666 }
1667 else
1668 {
1669 # Read them in
1670 my $line;
1671 while (defined ($line=<MIN>)) {
1672 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1673 }
1674 close(MIN);
1675
1676 # Filter with the call-back functions
1677 my $xml_out = "";
1678
1679 my $MOUT;
1680 if (!open($MOUT,">$doc_xml_filename")) {
1681 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1682 $status = 1;
1683 }
1684 else {
1685 binmode($MOUT,":utf8");
1686 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1687 close($MOUT);
1688 }
1689 }
1690 return $status;
1691}
1692
1693sub rfdxml_metadata
1694{
1695 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1696
1697 # For comparisons, toplevel section is indicated by ""
1698 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1699 my $secid = $parser->{'parameters'}->{'secid'} || "";
1700
1701 if (!($secid eq $curr_sec_num))
1702 {
1703 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1704 return [$tagname => $attrHash];
1705 }
1706
1707 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1708 {
1709 if (!defined $parser->{'parameters'}->{'poscount'})
1710 {
1711 $parser->{'parameters'}->{'poscount'} = 0;
1712 }
1713 else
1714 {
1715 $parser->{'parameters'}->{'poscount'}++;
1716 }
1717
1718 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
1719 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {
1720 return [];
1721 }
1722
1723 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1724 {
1725 return [];
1726 }
1727
1728 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1729 {
1730 return [];
1731 }
1732 }
1733
1734 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1735 return [$tagname => $attrHash];
1736}
1737
1738sub mxml_metadata
1739{
1740 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1741 my $metaname = $parser->{'parameters'}->{'metaname'};
1742 my $metamode = $parser->{'parameters'}->{'metamode'};
1743
1744 # Report error if we don't see FileName tag before this
1745 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1746
1747 # Don't do anything if we are not in the right FileSet
1748 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1749 if ($file_regexp =~ /\.\*/) {
1750 # Only interested in a file_regexp if it specifies precisely one
1751 # file.
1752 # So, skip anything with a .* in it as it is too general
1753## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
1754 return [$tagname => $attrHash];
1755 }
1756 my $src_file = $parser->{'parameters'}->{'src_file'};
1757 if (!($src_file =~ /$file_regexp/)) {
1758 return [$tagname => $attrHash];
1759 }
1760## print STDERR "*** mxl metamode = $metamode\n";
1761
1762 # Find the right metadata tag and checks if we are going to override it
1763 my $name_attr = $attrHash->{'name'};
1764 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1765
1766 # now metadata.xml functions need to keep track of metapos
1767 if (!defined $parser->{'parameters'}->{'poscount'})
1768 {
1769 $parser->{'parameters'}->{'poscount'} = 0;
1770 }
1771 else
1772 {
1773 $parser->{'parameters'}->{'poscount'}++;
1774 }
1775
1776 # If either the metapos or prevmetavalue is set,
1777 # get the value and override the current value
1778 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1779
1780 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1781 {
1782 $attrHash->{'_content'} = $metavalue;
1783
1784 ## print STDERR "**** overriding metadata.xml\n";
1785
1786 # Don't want it to wipe out any other pieces of metadata
1787 $parser->{'parameters'}->{'metamode'} = "done";
1788 }
1789 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1790 {
1791 $attrHash->{'_content'} = $metavalue;
1792 $parser->{'parameters'}->{'metamode'} = "done";
1793 }
1794 }
1795
1796 # mxml_description will process the metadata if metadata is accumulate,
1797 # or if we haven't found the metadata to override
1798
1799 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1800 return [$tagname => $attrHash];
1801}
1802
1803
1804sub mxml_description
1805{
1806 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1807 my $metamode = $parser->{'parameters'}->{'metamode'};
1808
1809 # Failed... Report error if we don't see FileName tag before this
1810 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1811
1812 # Don't do anything if we are not in the right FileSet
1813 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1814 if ($file_regexp =~ m/\.\*/) {
1815 # Only interested in a file_regexp if it specifies precisely one
1816 # file.
1817 # So, skip anything with a .* in it as it is too general
1818 return [$tagname => $attrHash];
1819 }
1820 my $src_file = $parser->{'parameters'}->{'src_file'};
1821
1822 if (!($src_file =~ m/$file_regexp/)) {
1823 return [$tagname => $attrHash];
1824 }
1825
1826 # Accumulate the metadata block to the end of the description block
1827 # Note: This adds metadata block to all description blocks, so if there are
1828 # multiple FileSets, it will add to all of them
1829 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1830
1831 # if metamode was "override" but get to here then it failed to
1832 # find an item to override, in which case it should append its
1833 # value to the end, just like the "accumulate" mode
1834
1835 if ($metamode eq "override") {
1836 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1837 }
1838
1839 # tack a new metadata tag on to the end of the <Metadata>+ block
1840 my $metaname = $parser->{'parameters'}->{'metaname'};
1841 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1842
1843 my $metadata_attr = { '_content' => $metavalue,
1844 'name' => $metaname,
1845 'mode' => "accumulate" };
1846
1847 my $append_metadata = [ "Metadata" => $metadata_attr ];
1848 my $description_content = $attrHash->{'_content'};
1849
1850## print STDERR "*** appending to metadata.xml\n";
1851
1852 # append the new metadata element to the end of the current
1853 # content contained inside this tag
1854 if (ref($description_content) eq "") {
1855 # => string or numeric literal
1856 # this is caused by a <Description> block has no <Metadata> child elements
1857 # => set up an empty array in '_content'
1858 $attrHash->{'_content'} = [ "\n" ];
1859 $description_content = $attrHash->{'_content'};
1860 }
1861
1862 push(@$description_content, " ", $append_metadata, "\n ");
1863 $parser->{'parameters'}->{'metamode'} = "done";
1864 }
1865
1866 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1867 return [$tagname => $attrHash];
1868}
1869
1870
1871sub mxml_fileset
1872{
1873 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1874
1875 # Initilise the current_file
1876 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1877 # FileName tag must come before Description tag
1878 $parser->{'parameters'}->{'current_file'} = "";
1879
1880 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1881 return [$tagname => $attrHash];
1882}
1883
1884sub mxml_directorymetadata
1885{
1886 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1887
1888 # if we haven't processed the metadata when we reach the end of metadata.xml
1889 # it's because there's no particular FileSet element whose FileName matched
1890 # In which case, add a new FileSet for this FileName
1891 my $metamode = $parser->{'parameters'}->{'metamode'};
1892 if($metamode ne "done") {
1893
1894 if ($metamode eq "override") {
1895 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1896 }
1897
1898 # If we get to here and metamode is override, this means there
1899 # was no existing value to overide => treat as an append operation
1900
1901 # Create a new FileSet element and append to DirectoryMetadata
1902 # <FileSet>
1903 # <FileName>src_file</FileName>
1904 # <Description>
1905 # <Metadata mode="" name="">metavalue</Metadata>
1906 # </Description>
1907 # </FileSet>
1908 my $src_file = $parser->{'parameters'}->{'src_file'};
1909 my $metaname = $parser->{'parameters'}->{'metaname'};
1910 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1911 my $metadata_attr = {
1912 '_content' => $metavalue,
1913 'name' => $metaname,
1914 'mode' => "accumulate"
1915 };
1916 my $append_metadata = [ "Metadata" => $metadata_attr ];
1917 my $description_attr = { '_content' => [ "\n\t\t ", $append_metadata, "\n\t\t"] };
1918 my $description_element = [ "Description" => $description_attr ];
1919
1920 #_content is not an attribute, it's special and holds the children of this element
1921 # including the textnode value embedded in this element if any.
1922 my $filename_attr = {'_content' => $src_file};
1923 my $filename_element = [ "FileName" => $filename_attr ];
1924
1925 my $fileset_attr = {};
1926 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
1927 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
1928
1929
1930 # get children of dirmeta, and push the new FileSet element onto it
1931 print "Appending metadata to metadata.xml\n";
1932 my $dirmeta_content = $attrHash->{'_content'};
1933 if (ref($dirmeta_content)) {
1934 # got some existing interesting nested content
1935 #push(@$dirmeta_content, " ", $fileset ,"\n ");
1936 push(@$dirmeta_content, "\t", $fileset ,"\n");
1937 }
1938 else {
1939 #description_content is most likely a string such as "\n"
1940 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
1941 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
1942 }
1943
1944 $parser->{'parameters'}->{'metamode'} = "done";
1945 }
1946 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1947 return [$tagname => $attrHash];
1948}
1949
1950
1951sub edit_metadata_xml
1952{
1953 my $self = shift @_;
1954 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
1955
1956 # Set the call-back functions for the metadata tags
1957 my @rules =
1958 ( _default => 'raw',
1959 'FileName' => \&mxml_filename,
1960 'Metadata' => \&mxml_metadata,
1961 'Description' => \&mxml_description,
1962 'FileSet' => \&mxml_fileset,
1963 'DirectoryMetadata' => \&mxml_directorymetadata);
1964
1965 # use XML::Rules to add it in (read in and out again)
1966 my $parser = XML::Rules->new(rules => \@rules,
1967 style => 'filter',
1968 output_encoding => 'utf8',
1969 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1970
1971 if (!-e $metadata_xml_filename) {
1972
1973 if (open(MOUT,">$metadata_xml_filename")) {
1974
1975 my $src_file_re = &util::filename_to_regex($src_file);
1976 # shouldn't the following also be in the above utility routine??
1977 # $src_file_re =~ s/\./\\./g;
1978
1979 print MOUT "<?xml version=\"1.0\"?>\n";
1980 print MOUT "<DirectoryMetadata>\n";
1981 print MOUT " <FileSet>\n";
1982 print MOUT " <FileName>$src_file_re</FileName>\n";
1983 print MOUT " <Description>\n";
1984 print MOUT " </Description>\n";
1985 print MOUT " </FileSet>\n";
1986 print MOUT "</DirectoryMetadata>\n";
1987
1988 close(MOUT);
1989 }
1990 else {
1991 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1992 }
1993 }
1994
1995
1996 my $xml_in = "";
1997 if (!open(MIN,"<$metadata_xml_filename")) {
1998 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1999 }
2000 else {
2001 # Read them in
2002 my $line;
2003 while (defined ($line=<MIN>)) {
2004 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
2005 }
2006 close(MIN);
2007
2008 # Filter with the call-back functions
2009 my $xml_out = "";
2010
2011 my $MOUT;
2012 if (!open($MOUT,">$metadata_xml_filename")) {
2013 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2014 }
2015 else {
2016 binmode($MOUT,":utf8");
2017
2018 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2019 # At the moment, I will just hack it!
2020 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2021 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2022 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2023 #print MOUT $xml_out;
2024
2025 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2026 metapos => $metapos,
2027 metavalue => $metavalue,
2028 metamode => $metamode,
2029 src_file => $src_file,
2030 prevmetavalue => $prevmetavalue,
2031 current_file => undef} );
2032 close($MOUT);
2033 }
2034 }
2035}
2036
2037
2038sub set_import_metadata
2039{
2040 my $self = shift @_;
2041
2042 my $username = $self->{'username'};
2043 my $collect = $self->{'collect'};
2044 my $gsdl_cgi = $self->{'gsdl_cgi'};
2045
2046 if ($baseaction::authentication_enabled) {
2047 # Ensure the user is allowed to edit this collection
2048 $self->authenticate_user($username, $collect);
2049 }
2050
2051 # Make sure the collection isn't locked by someone else
2052 $self->lock_collection($username, $collect);
2053
2054 $self->_set_import_metadata(@_);
2055
2056 # Release the lock once it is done
2057 $self->unlock_collection($username, $collect);
2058
2059}
2060
2061sub set_import_metadata_array
2062{
2063 my $self = shift @_;
2064
2065 my $username = $self->{'username'};
2066 my $collect = $self->{'collect'};
2067 my $gsdl_cgi = $self->{'gsdl_cgi'};
2068
2069 if ($baseaction::authentication_enabled) {
2070 # Ensure the user is allowed to edit this collection
2071 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2072 }
2073
2074 # Obtain the collect dir
2075 my $site = $self->{'site'};
2076 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2077
2078 $gsdl_cgi->checked_chdir($collect_dir);
2079
2080 # Make sure the collection isn't locked by someone else
2081 $self->lock_collection($username, $collect);
2082
2083 $self->_set_import_metadata_array(@_);
2084
2085 # Release the lock once it is done
2086 $self->unlock_collection($username, $collect);
2087
2088}
2089
2090
2091sub _set_import_metadata_array
2092{
2093 my $self = shift @_;
2094
2095 my $collect = $self->{'collect'};
2096 my $gsdl_cgi = $self->{'gsdl_cgi'};
2097
2098 # Obtain the collect dir
2099 my $site = $self->{'site'};
2100 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2101
2102 # look up additional args
2103
2104 my $infodbtype = $self->{'infodbtype'};
2105
2106 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2107 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2108
2109 my $json_str = $self->{'json'};
2110 my $doc_array = decode_json $json_str;
2111
2112 my $global_status = 0;
2113 my $global_mess = "";
2114
2115 my @all_docids = ();
2116
2117 foreach my $doc_array_rec ( @$doc_array )
2118 {
2119 my $status = -1;
2120 my $docid = $doc_array_rec->{'docid'};
2121
2122 my ($docid_root,$docid_secnum);
2123 if(defined $docid) {
2124 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2125 # as yet no support for setting subsection metadata in metadata.xml
2126 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2127 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2128 next; # skip this docid in for loop
2129 }
2130 }
2131
2132 push(@all_docids,$docid); # docid_root rather
2133
2134 my $metaname = $doc_array_rec->{'metaname'};
2135 if (defined $metaname) {
2136 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2137 my $metavalue = $doc_array_rec->{'metavalue'};
2138 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2139
2140 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2141 # make "accumulate" the default (less destructive, as it won't actually
2142 # delete any existing values)
2143 $metamode = "accumulate";
2144 }
2145
2146 # adding metapos and prevmetavalue support to import_metadata subroutines
2147 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2148 my $prevmetavalue = $self->{'prevmetavalue'};
2149
2150 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2151
2152 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2153 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2154
2155 foreach my $metatable_rec ( @$metatable ) {
2156 $metaname = $metatable_rec->{'metaname'};
2157 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2158 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2159 # make "accumulate" the default (less destructive, as it won't actually
2160 # delete any existing values)
2161 $metamode = "accumulate";
2162 }
2163
2164 # No support for metapos and prevmetavalue in the JSON metatable substructure
2165 my $metapos = undef;
2166 my $prevmetavalue = undef;
2167 if(defined $metatable_rec->{'metapos'}) {
2168 $metapos = $metatable_rec->{'metapos'};
2169 $prevmetavalue = $metatable_rec->{'prevmetavalue'}; # or still undef
2170 my $metavalue = $metatable_rec->{'metavalue'};
2171 #$metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2172 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2173 } else {
2174
2175 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2176
2177 foreach my $metavalue ( @$metavals ) {
2178 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2179
2180 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2181 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2182 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2183 # metavalue for this metaname will be lost
2184 $metamode = "accumulate";
2185 }
2186 }
2187 }
2188 }
2189 }
2190 }
2191
2192 # always a success message
2193 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2194 $gsdl_cgi->generate_ok_message($mess);
2195}
2196
2197# always returns true (1)
2198sub set_import_metadata_entry
2199{
2200 my $self = shift @_;
2201 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2202
2203 my $info_mess = <<RAWEND;
2204****************************
2205 set_import_metadata_entry()
2206****************************
2207RAWEND
2208
2209 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2210 $info_mess .= " collect = $collect\n" if defined($collect);
2211 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
2212 $info_mess .= " arcinfo_doc_filename = $arcinfo_doc_filename\n" if defined($arcinfo_doc_filename);
2213 $info_mess .= " docid = $docid\n" if defined($docid);
2214 $info_mess .= " metaname = $metaname\n" if defined($metaname);
2215 $info_mess .= " metapos = $metapos\n" if defined($metapos);
2216 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
2217 $info_mess .= " metamode = $metamode\n" if defined($metamode);
2218 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2219
2220 $info_mess .= "****************************\n";
2221
2222 $gsdl_cgi->generate_message($info_mess);
2223
2224 # import works with metadata.xml which can have inherited metadata
2225 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2226 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2227 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2228 # what they're doing if they provide a metapos.
2229 if(defined $metapos) {
2230 print STDERR "@@@@ WARNING: metapos defined.\n";
2231 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2232 }
2233
2234 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2235 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2236 my $metadata_xml_file;
2237 my $import_filename = undef;
2238
2239 if (defined $docid) {
2240 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2241 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2242
2243 # This now stores the full pathname
2244 $import_filename = $doc_rec->{'src-file'}->[0];
2245 $import_filename = &util::placeholders_to_abspath($import_filename);
2246
2247 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2248 # as the array version of the method doesn't support the -f parameter yet
2249 my $import_file = $self->{'f'};
2250 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2251 }
2252
2253 # figure out correct metadata.xml file [?]
2254 # Assuming the metadata.xml file is next to the source file
2255 # Note: This will not work if it is using the inherited metadata from the parent folder
2256 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2257 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2258
2259 # If we're overriding everything, then $prevmetavalue=undefined and
2260 # $metamode=override combined with $metapos=undefined
2261 # in which case we need to remove all metavalues for the metaname at the given (sub)section
2262 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2263 if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2264## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2265 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2266
2267 }
2268
2269 # Edit the metadata.xml
2270 # Modified by Jeffrey from DL Consulting
2271 # Handle the case where there is one metadata.xml file for multiple FileSets
2272 # The XML filter needs to know whether it is in the right FileSet
2273 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2274 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2275 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2276 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2277 #return 0;
2278 return $metadata_xml_filename;
2279}
2280
2281sub _remove_import_metadata
2282{
2283 my $self = shift @_;
2284
2285 my $collect = $self->{'collect'};
2286 my $gsdl_cgi = $self->{'gsdl_cgi'};
2287 my $infodbtype = $self->{'infodbtype'};
2288
2289 # Obtain the collect and archive dir
2290 my $site = $self->{'site'};
2291 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2292
2293 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2294
2295 # look up additional args
2296 my $docid = $self->{'d'};
2297
2298 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2299
2300 my $metaname = $self->{'metaname'};
2301 my $metapos = $self->{'metapos'};
2302 my $metavalue = $self->{'metavalue'};
2303
2304 my $metamode = $self->{'metamode'};
2305
2306 my $metadata_xml_filename = $self->remove_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode);
2307
2308 # any errors would have been reported inside the remove_import_metadata_entry
2309
2310 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2311 $mess .= " $metaname";
2312 $mess .= " = $metavalue\n";
2313
2314 $gsdl_cgi->generate_ok_message($mess);
2315
2316 #return $status; # in case calling functions have a use for this
2317}
2318
2319sub remove_import_metadata_entry
2320{
2321 my $self = shift @_;
2322 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode) = @_;
2323
2324 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2325 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/));
2326
2327 if(defined $metavalue) { # metavalue is not a compulsory arg for remove_import_metadata()
2328 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2329 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2330 $metapos = 0;
2331 }
2332
2333 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
2334
2335 # import works with metadata.xml which can have inherited metadata
2336 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2337 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2338 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2339 # what they're doing if they provide a metapos.
2340 if(defined $metapos) {
2341 print STDERR "@@@@ WARNING: metapos defined.\n";
2342 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2343 }
2344
2345 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2346 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2347 my $metadata_xml_file;
2348 my $import_filename = undef;
2349 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2350 {
2351 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2352 } else {
2353 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2354
2355 # This now stores the full pathname
2356 $import_filename = $doc_rec->{'src-file'}->[0];
2357 $import_filename = &util::placeholders_to_abspath($import_filename);
2358 }
2359
2360 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2361 {
2362 $gsdl_cgi->generate_error("There is no metadata\n");
2363 }
2364
2365 # figure out correct metadata.xml file [?]
2366 # Assuming the metadata.xml file is next to the source file
2367 # Note: This will not work if it is using the inherited metadata from the parent folder
2368 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2369 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2370
2371 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2372
2373 return $metadata_xml_filename;
2374}
2375
2376sub remove_import_metadata
2377{
2378 my $self = shift @_;
2379
2380 my $username = $self->{'username'};
2381 my $collect = $self->{'collect'};
2382 my $gsdl_cgi = $self->{'gsdl_cgi'};
2383
2384 if ($baseaction::authentication_enabled) {
2385 # Ensure the user is allowed to edit this collection
2386 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2387 }
2388
2389 # Make sure the collection isn't locked by someone else
2390 $self->lock_collection($username, $collect);
2391
2392 $self->_remove_import_metadata(@_);
2393
2394 # Release the lock once it is done
2395 $self->unlock_collection($username, $collect);
2396
2397}
2398
2399sub remove_from_metadata_xml
2400{
2401 my $self = shift @_;
2402 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2403 # metamode generally has no meaning for removing meta, but is used by set_meta
2404 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2405
2406 # Set the call-back functions for the metadata tags
2407 my @rules =
2408 (
2409 _default => 'raw',
2410 'Metadata' => \&rfmxml_metadata,
2411 'FileName' => \&mxml_filename
2412 );
2413
2414 my $parser = XML::Rules->new
2415 (
2416 rules => \@rules,
2417 style => 'filter',
2418 output_encoding => 'utf8',
2419 #normalisespaces => 1,
2420 stripspaces => 2|0|0 # ineffectual
2421 );
2422
2423 my $xml_in = "";
2424 if (!open(MIN,"<$metadata_xml_filename"))
2425 {
2426 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2427 }
2428 else
2429 {
2430 # Read them in
2431 my $line;
2432 while (defined ($line=<MIN>)) {
2433 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
2434 }
2435 close(MIN);
2436
2437 # Filter with the call-back functions
2438 my $xml_out = "";
2439
2440 my $MOUT;
2441 if (!open($MOUT,">$metadata_xml_filename")) {
2442 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2443 }
2444 else {
2445 binmode($MOUT,":utf8");
2446 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2447 close($MOUT);
2448 }
2449 }
2450}
2451
2452sub rfmxml_metadata
2453{
2454 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2455
2456 # metadata.xml does not handle subsections
2457
2458 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2459 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2460 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2461 {
2462 if (!defined $parser->{'parameters'}->{'poscount'})
2463 {
2464 $parser->{'parameters'}->{'poscount'} = 0;
2465 }
2466 else
2467 {
2468 $parser->{'parameters'}->{'poscount'}++;
2469 }
2470
2471 # if overriding but no metapos, then clear all the meta for this metaname
2472 # This is used by set_import_metadata_entry, so don't change this behaviour
2473 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
2474 return [];
2475 }
2476
2477 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2478 {
2479 return [];
2480 }
2481 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2482 {
2483 return [];
2484 }
2485 }
2486
2487 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2488 return [$tagname => $attrHash];
2489}
2490
2491sub _remove_live_metadata
2492{
2493 my $self = shift @_;
2494
2495 my $collect = $self->{'collect'};
2496 my $gsdl_cgi = $self->{'gsdl_cgi'};
2497 my $infodbtype = $self->{'infodbtype'};
2498
2499 # Obtain the collect dir
2500 my $site = $self->{'site'};
2501 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2502
2503 # look up additional args
2504 my $docid = $self->{'d'};
2505 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2506 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2507 }
2508
2509 # Generate the dbkey
2510 my $metaname = $self->{'metaname'};
2511 my $dbkey = "$docid.$metaname";
2512
2513 # To people who know $collect_tail please add some comments
2514 # Obtain the live gdbm_db path
2515 my $collect_tail = $collect;
2516 $collect_tail =~ s/^.*[\/|\\]//;
2517 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2518 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2519
2520 # Remove the key
2521 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2522 my $status = system($cmd);
2523 if ($status != 0) {
2524 # Catch error if gdbmdel failed
2525 my $mess = "Failed to delete metadata key: $dbkey\n";
2526
2527 $mess .= "PATH: $ENV{'PATH'}\n";
2528 $mess .= "cmd = $cmd\n";
2529 $mess .= "Exit status: $status\n";
2530 $mess .= "System Error Message: $!\n";
2531
2532 $gsdl_cgi->generate_error($mess);
2533 }
2534 else {
2535 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2536 }
2537
2538}
2539
2540sub remove_live_metadata
2541{
2542 my $self = shift @_;
2543
2544 my $username = $self->{'username'};
2545 my $collect = $self->{'collect'};
2546 my $gsdl_cgi = $self->{'gsdl_cgi'};
2547
2548 if ($baseaction::authentication_enabled) {
2549 # Ensure the user is allowed to edit this collection
2550 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2551 }
2552
2553 # Make sure the collection isn't locked by someone else
2554 $self->lock_collection($username, $collect);
2555
2556 $self->_remove_live_metadata(@_);
2557
2558 $self->unlock_collection($username, $collect);
2559}
2560
2561sub remove_metadata
2562{
2563 my $self = shift @_;
2564
2565 my $where = $self->{'where'};
2566 if(!$where || ($where =~ m/^\s*$/)) {
2567 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2568 return;
2569 }
2570
2571 my $username = $self->{'username'};
2572 my $collect = $self->{'collect'};
2573 my $gsdl_cgi = $self->{'gsdl_cgi'};
2574
2575 if ($baseaction::authentication_enabled) {
2576 # Ensure the user is allowed to edit this collection
2577 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2578 }
2579
2580 # Make sure the collection isn't locked by someone else
2581 $self->lock_collection($username, $collect);
2582
2583 # check which directories need to be processed, specified in $where as
2584 # any combination of import|archives|index|live
2585 if($where =~ m/import/) {
2586 $self->_remove_import_metadata(@_);
2587 }
2588 if($where =~ m/archives/) {
2589 $self->_remove_archives_metadata(@_);
2590 }
2591 if($where =~ m/index/) {
2592 $self->_remove_index_metadata(@_);
2593 }
2594
2595 # Release the lock once it is done
2596 $self->unlock_collection($username, $collect);
2597}
2598
2599# the internal version, without authentication
2600sub _remove_index_metadata
2601{
2602 my $self = shift @_;
2603
2604 my $collect = $self->{'collect'};
2605 my $gsdl_cgi = $self->{'gsdl_cgi'};
2606 my $infodbtype = $self->{'infodbtype'};
2607
2608 # Obtain the collect dir
2609 my $site = $self->{'site'};
2610 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2611
2612 # look up additional args
2613 my $docid = $self->{'d'};
2614 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2615 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2616 }
2617 my $metaname = $self->{'metaname'};
2618 my $metapos = $self->{'metapos'};
2619 my $metavalue = $self->{'metavalue'};
2620
2621 my $status = $self->remove_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
2622
2623 if ($status != 0) {
2624 my $mess = "Failed to remove metadata key: $docid\n";
2625
2626 $mess .= "PATH: $ENV{'PATH'}\n";
2627 $mess .= "Exit status: $status\n";
2628 $mess .= "System Error Message: $!\n";
2629
2630 $gsdl_cgi->generate_error($mess);
2631 }
2632 else {
2633 my $mess = "DB remove successful: Key[$docid]\n";
2634 $mess .= " $metaname";
2635 $mess .= "->[$metapos]" if (defined $metapos);
2636 $mess .= " ($metavalue)" if (defined $metavalue);
2637
2638 $gsdl_cgi->generate_ok_message($mess);
2639 }
2640
2641 #return $status; # in case calling functions have a use for this
2642}
2643
2644sub remove_index_metadata_entry
2645{
2646 my $self = shift @_;
2647 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
2648 my $gsdl_cgi = $self->{'gsdl_cgi'};
2649
2650 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2651 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
2652
2653 # To people who know $collect_tail please add some comments
2654 # -> In collection groups, I think collect_tailname is the subcollection name,
2655 # e.g. colgroup-name/col-tail-name
2656 # Obtain the path to the database
2657 my $collect_tail = $collect;
2658 $collect_tail =~ s/^.*[\/|\\]//;
2659 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2660 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2661
2662 # Read the docid entry
2663 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2664
2665 # Check to make sure the key does exist
2666 if (!defined ($doc_rec->{$metaname})) {
2667 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2668 }
2669
2670 # Obtain the specified metadata pos
2671 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2672 if(!defined $metapos && !defined $metavalue) {
2673 $metapos = 0;
2674 }
2675
2676
2677 # consider check key is defined before deleting?
2678 # Loop through the metadata array and ignore the specified position
2679 my $filtered_metadata = [];
2680 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2681 for (my $i=0; $i<$num_metadata_vals; $i++) {
2682 my $metaval = shift(@{$doc_rec->{$metaname}});
2683
2684 if (!defined $metavalue && $i != $metapos) {
2685 push(@$filtered_metadata,$metaval);
2686 }
2687
2688 if(defined $metavalue && !($metavalue eq $metaval))
2689 {
2690 push(@$filtered_metadata,$metaval);
2691 }
2692 }
2693 $doc_rec->{$metaname} = $filtered_metadata;
2694
2695 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2696 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2697 return $status;
2698}
2699
2700sub remove_index_metadata
2701{
2702 my $self = shift @_;
2703
2704 my $username = $self->{'username'};
2705 my $collect = $self->{'collect'};
2706 my $gsdl_cgi = $self->{'gsdl_cgi'};
2707
2708 if ($baseaction::authentication_enabled) {
2709 # Ensure the user is allowed to edit this collection
2710 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2711 }
2712
2713 # Obtain the collect dir
2714 my $site = $self->{'site'};
2715 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2716
2717 # Make sure the collection isn't locked by someone else
2718 $self->lock_collection($username, $collect);
2719
2720 $self->_remove_index_metadata(@_);
2721
2722 # Release the lock once it is done
2723 $self->unlock_collection($username, $collect);
2724}
2725
2726#################################################################################
2727# ERASE META METHODS: ERASE ALL VALUES FOR MATCHING METANAME AT SPECIFIED DOCID #
2728#################################################################################
2729
2730sub erase_metadata
2731{
2732 my $self = shift @_;
2733
2734 my $where = $self->{'where'};
2735 # when $where is unspecified, following behaviour of sub remove_metadata by defaulting to erasing metadata from index
2736 if(!$where || ($where =~ m/^\s*$/)) {
2737 $self->erase_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2738 return;
2739 }
2740
2741 my $username = $self->{'username'};
2742 my $collect = $self->{'collect'};
2743 my $gsdl_cgi = $self->{'gsdl_cgi'};
2744
2745 if ($baseaction::authentication_enabled) {
2746 # Ensure the user is allowed to edit this collection
2747 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2748 }
2749
2750 # Make sure the collection isn't locked by someone else
2751 $self->lock_collection($username, $collect);
2752
2753 # check which directories need to be processed, specified in $where as
2754 # any combination of import|archives|index|live
2755 if($where =~ m/import/) {
2756 $self->_erase_import_metadata(@_);
2757 }
2758 if($where =~ m/archives/) {
2759 $self->_erase_archives_metadata(@_);
2760 }
2761 if($where =~ m/index/) {
2762 $self->_erase_index_metadata(@_);
2763 }
2764
2765 # Release the lock once it is done
2766 $self->unlock_collection($username, $collect);
2767}
2768
2769# the internal version, without authentication
2770sub _erase_index_metadata
2771{
2772 my $self = shift @_;
2773
2774 my $collect = $self->{'collect'};
2775 my $gsdl_cgi = $self->{'gsdl_cgi'};
2776 my $infodbtype = $self->{'infodbtype'};
2777
2778 # Obtain the collect dir
2779 my $site = $self->{'site'};
2780 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2781
2782 # look up additional args
2783 my $docid = $self->{'d'};
2784 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2785 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2786 }
2787 my $metaname = $self->{'metaname'};
2788 my $metapos = undef;
2789 my $metavalue = undef;
2790
2791 # To people who know $collect_tail please add some comments
2792 # -> In collection groups, I think collect_tailname is the subcollection name,
2793 # e.g. colgroup-name/col-tail-name
2794 # Obtain the path to the database
2795 my $collect_tail = $collect;
2796 $collect_tail =~ s/^.*[\/|\\]//;
2797 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2798 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2799
2800 # Read the docid entry
2801 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2802
2803 # Check to make sure the key does exist
2804 if (defined ($doc_rec->{$metaname})) {
2805
2806 $doc_rec->{$metaname} = [];
2807
2808 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2809 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2810
2811 if ($status != 0) {
2812 my $mess = "Failed to erase metadata key: $docid\n";
2813
2814 $mess .= "PATH: $ENV{'PATH'}\n";
2815 $mess .= "Exit status: $status\n";
2816 $mess .= "System Error Message: $!\n";
2817
2818 $gsdl_cgi->generate_error($mess);
2819 }
2820 else {
2821 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2822 $mess .= " $metaname";
2823 $mess .= "->[$metapos]" if (defined $metapos);
2824 $mess .= " ($metavalue)" if (defined $metavalue);
2825
2826 $gsdl_cgi->generate_ok_message($mess);
2827 }
2828 }
2829 else { # no such metaname. Is it really an error?
2830 $gsdl_cgi->generate_ok_message("Can't erase. No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2831 }
2832 #return $status; # in case calling functions have a use for this
2833}
2834
2835sub erase_index_metadata
2836{
2837 my $self = shift @_;
2838
2839 my $username = $self->{'username'};
2840 my $collect = $self->{'collect'};
2841 my $gsdl_cgi = $self->{'gsdl_cgi'};
2842
2843 if ($baseaction::authentication_enabled) {
2844 # Ensure the user is allowed to edit this collection
2845 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2846 }
2847
2848 # Obtain the collect dir
2849 my $site = $self->{'site'};
2850 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2851
2852 # Make sure the collection isn't locked by someone else
2853 $self->lock_collection($username, $collect);
2854
2855 $self->_erase_index_metadata(@_);
2856
2857 # Release the lock once it is done
2858 $self->unlock_collection($username, $collect);
2859}
2860
2861
2862# Given a metaname, removes ALL metadata with that metaname at the provided docid
2863sub _erase_archives_metadata
2864{
2865 my $self = shift @_;
2866
2867 my $collect = $self->{'collect'};
2868 my $gsdl_cgi = $self->{'gsdl_cgi'};
2869 my $infodbtype = $self->{'infodbtype'};
2870
2871 # Obtain the collect and archive dir
2872 my $site = $self->{'site'};
2873 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2874
2875 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2876
2877 # look up additional args
2878 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
2879
2880 my $metaname = $self->{'metaname'};
2881
2882 # metapos, metavalue and metamode are never provided and therefore all undefined for erase_archives_metadata
2883 my $metapos = undef;
2884 my $metavalue = undef;
2885 my $metamode = "override"; # when override and no metapos or metaval, all values for metaname will be affected
2886
2887 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2888 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2889
2890 # This now stores the full pathname
2891 my $doc_file = $doc_rec->{'doc-file'}->[0];
2892
2893 # check if request if for file-level doc-version history 'nminus-<n>' version
2894 my $dv = $self->{'dv'};
2895 if (defined $dv && ($dv ne "")) {
2896 # Need to insert '_fldv_history/nminus-<n>' into doc_filename
2897
2898 my ($doc_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
2899 $doc_file = &util::filename_cat($doc_dirname,$FLDV_HISTORY_DIR,$dv,$doc_tailname);
2900 }
2901
2902 my $doc_filename = &util::filename_cat($archive_dir, $doc_file);
2903
2904 my $status = $self->remove_from_doc_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
2905
2906 if ($status == 0)
2907 {
2908 my $mess = "\nerase-archives-metadata successful: \nKey[$docid]\n";
2909 $mess .= " $metaname";
2910 $gsdl_cgi->generate_ok_message($mess);
2911 }
2912 else
2913 {
2914 my $mess .= "Failed to erase archives metadata key: $docid\n";
2915 $mess .= "Exit status: $status\n";
2916 $mess .= "System Error Message: $!\n";
2917 $mess .= "-" x 20 . "\n";
2918
2919 $gsdl_cgi->generate_error($mess);
2920 }
2921
2922 #return $status; # in case calling functions have a use for this
2923}
2924
2925# practically identical to remove_archives_metadata
2926sub erase_archives_metadata
2927{
2928 my $self = shift @_;
2929
2930 my $username = $self->{'username'};
2931 my $collect = $self->{'collect'};
2932 my $gsdl_cgi = $self->{'gsdl_cgi'};
2933
2934 if ($baseaction::authentication_enabled)
2935 {
2936 # Ensure the user is allowed to edit this collection
2937 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2938 }
2939
2940 # Make sure the collection isn't locked by someone else
2941 $self->lock_collection($username, $collect);
2942
2943 $self->_erase_archives_metadata(@_);
2944
2945 # Release the lock once it is done
2946 $self->unlock_collection($username, $collect);
2947}
2948
2949sub _erase_import_metadata
2950{
2951 my $self = shift @_;
2952
2953 my $collect = $self->{'collect'};
2954 my $gsdl_cgi = $self->{'gsdl_cgi'};
2955 my $infodbtype = $self->{'infodbtype'};
2956
2957 # Obtain the collect dir
2958 my $site = $self->{'site'};
2959 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2960
2961 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2962
2963 # look up additional args
2964 my $docid = $self->{'d'};
2965 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2966 {
2967 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2968 }
2969
2970 my $metaname = $self->{'metaname'};
2971 my $metapos = undef;
2972 my $metavalue = undef;
2973 my $metamode = "override";
2974
2975 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2976 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2977 my $metadata_xml_file;
2978 my $import_filename = undef;
2979 if (defined $docid)
2980 {
2981 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2982 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2983
2984 # This now stores the full pathname
2985 $import_filename = $doc_rec->{'src-file'}->[0];
2986 $import_filename = &util::placeholders_to_abspath($import_filename);
2987 }
2988
2989 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2990 {
2991 $gsdl_cgi->generate_error("There is no metadata\n");
2992 }
2993
2994 # figure out correct metadata.xml file [?]
2995 # Assuming the metadata.xml file is next to the source file
2996 # Note: This will not work if it is using the inherited metadata from the parent folder
2997 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2998 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2999
3000 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode=override, means all values of metaname are affected
3001
3002 my $mess = "erase-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
3003 $mess .= " $metaname";
3004 $mess .= " = $metavalue\n";
3005
3006 $gsdl_cgi->generate_ok_message($mess);
3007
3008 #return $status; # in case calling functions have a use for this
3009}
3010
3011sub erase_import_metadata
3012{
3013 my $self = shift @_;
3014
3015 my $username = $self->{'username'};
3016 my $collect = $self->{'collect'};
3017 my $gsdl_cgi = $self->{'gsdl_cgi'};
3018
3019 if ($baseaction::authentication_enabled) {
3020 # Ensure the user is allowed to edit this collection
3021 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3022 }
3023
3024 # Make sure the collection isn't locked by someone else
3025 $self->lock_collection($username, $collect);
3026
3027 $self->_erase_import_metadata(@_);
3028
3029 # Release the lock once it is done
3030 $self->unlock_collection($username, $collect);
3031
3032}
3033
3034# removes all meta with matching metaname for the specified docid from the live metadata index
3035sub _erase_live_metadata
3036{
3037 my $self = shift @_;
3038 # At present, _remove_live_metadata doesn't take metaval or metapos either and therefore has the desired effect
3039 $self->_remove_live_metadata(@_);
3040
3041}
3042
3043sub erase_live_metadata
3044{
3045 my $self = shift @_;
3046
3047 my $username = $self->{'username'};
3048 my $collect = $self->{'collect'};
3049 my $gsdl_cgi = $self->{'gsdl_cgi'};
3050
3051 if ($baseaction::authentication_enabled) {
3052 # Ensure the user is allowed to edit this collection
3053 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3054 }
3055
3056 # Make sure the collection isn't locked by someone else
3057 $self->lock_collection($username, $collect);
3058
3059 $self->_erase_live_metadata(@_);
3060
3061 $self->unlock_collection($username, $collect);
3062}
3063
3064
3065# Was trying to reused the codes, but the functions need to be broken
3066# down more before they can be reused, otherwise there will be too
3067# much overhead and duplicate process...
3068sub insert_metadata
3069{
3070 my $self = shift @_;
3071
3072 my $username = $self->{'username'};
3073 my $collect = $self->{'collect'};
3074 my $gsdl_cgi = $self->{'gsdl_cgi'};
3075 my $infodbtype = $self->{'infodbtype'};
3076
3077 # If the import metadata and gdbm database have been updated, we
3078 # need to insert some notification to warn user that the the text
3079 # they see at the moment is not indexed and require a rebuild.
3080 my $rebuild_pending_macro = "_rebuildpendingmessage_";
3081
3082 if ($baseaction::authentication_enabled) {
3083 # Ensure the user is allowed to edit this collection
3084 $self->authenticate_user($username, $collect);
3085 }
3086
3087 # Obtain the collect and archive dir
3088 my $site = $self->{'site'};
3089 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3090 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3091
3092 # Make sure the collection isn't locked by someone else
3093 $self->lock_collection($username, $collect);
3094
3095 # Check additional args
3096 my $docid = $self->{'d'};
3097 if (!defined($docid) || ($docid =~ m/^\s*$/)) {
3098 $gsdl_cgi->generate_error("No document id is specified: d=...");
3099 }
3100 my $metaname = $self->{'metaname'};
3101 if (!defined($metaname) || ($metaname =~ m/^\s*$/)) {
3102 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3103 }
3104 my $metavalue = $self->{'metavalue'};
3105 if (!defined($metavalue) || ($metavalue =~ m/^\s*$/)) {
3106 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3107 }
3108 # make "accumulate" the default (less destructive, as it won't actually
3109 # delete any existing values)
3110 my $metamode = "accumulate";
3111
3112 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3113 my $metapos = undef;
3114 my $prevmetavalue = undef;
3115
3116 #=======================================================================#
3117 # set_import_metadata [START]
3118 #=======================================================================#
3119 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3120 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3121 my $metadata_xml_file;
3122 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3123 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3124
3125 # This now stores the full pathname
3126 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3127 $import_filename = &util::placeholders_to_abspath($import_filename);
3128
3129 # figure out correct metadata.xml file [?]
3130 # Assuming the metadata.xml file is next to the source file
3131 # Note: This will not work if it is using the inherited metadata from the parent folder
3132 my ($import_tailname, $import_dirname)
3133 = File::Basename::fileparse($import_filename);
3134 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3135
3136 # Shane's escape characters
3137 $metavalue = pack "U0C*", unpack "C*", $metavalue;
3138 $metavalue =~ s/\,/&#44;/g;
3139 $metavalue =~ s/\:/&#58;/g;
3140 $metavalue =~ s/\|/&#124;/g;
3141 $metavalue =~ s/\(/&#40;/g;
3142 $metavalue =~ s/\)/&#41;/g;
3143 $metavalue =~ s/\[/&#91;/g;
3144 $metavalue =~ s/\\/&#92;/g;
3145 $metavalue =~ s/\]/&#93;/g;
3146 $metavalue =~ s/\{/&#123;/g;
3147 $metavalue =~ s/\}/&#125;/g;
3148 $metavalue =~ s/\"/&#34;/g;
3149 $metavalue =~ s/\`/&#96;/g;
3150 $metavalue =~ s/\n/_newline_/g;
3151
3152 # Edit the metadata.xml
3153 # Modified by Jeffrey from DL Consulting
3154 # Handle the case where there is one metadata.xml file for multiple FileSets
3155 # The XML filter needs to know whether it is in the right FileSet
3156 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3157 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3158 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3159 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3160 #=======================================================================#
3161 # set_import_metadata [END]
3162 #=======================================================================#
3163
3164
3165 #=======================================================================#
3166 # set_metadata (accumulate version) [START]
3167 #=======================================================================#
3168 # To people who know $collect_tail please add some comments
3169 # Obtain path to the database
3170 my $collect_tail = $collect;
3171 $collect_tail =~ s/^.*[\/|\\]//;
3172 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3173 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3174
3175 # Read the docid entry
3176 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3177
3178 # Protect the quotes
3179 $metavalue =~ s/\"/\\\"/g;
3180
3181 # Adds the pending macro
3182 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3183
3184 # If the metadata doesn't exist, create a new one
3185 if (!defined($doc_rec->{$metaname})){
3186 $doc_rec->{$metaname} = [ $macro_metavalue ];
3187 }
3188 # Else, let's acculumate the values
3189 else {
3190 push(@{$doc_rec->{$metaname}},$macro_metavalue);
3191 }
3192
3193 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3194 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3195
3196 if ($status != 0) {
3197 # Catch error if gdbmget failed
3198 my $mess = "Failed to set metadata key: $docid\n";
3199
3200 $mess .= "PATH: $ENV{'PATH'}\n";
3201 $mess .= "Exit status: $status\n";
3202 $mess .= "System Error Message: $!\n";
3203
3204 $gsdl_cgi->generate_error($mess);
3205 }
3206 else {
3207 my $mess = "insert-metadata successful: Key[$docid]\n";
3208 $mess .= " [In metadata.xml] $metaname";
3209 $mess .= " = $metavalue\n";
3210 $mess .= " [In database] $metaname";
3211 $mess .= " = $macro_metavalue\n";
3212 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3213 $gsdl_cgi->generate_ok_message($mess);
3214 }
3215 #=======================================================================#
3216 # set_metadata (accumulate version) [END]
3217 #=======================================================================#
3218
3219 # Release the lock once it is done
3220 $self->unlock_collection($username, $collect);
3221}
3222
3223
3224
3225
3226sub inc_fldv_nminus1
3227{
3228 my $self = shift @_;
3229
3230 my $username = $self->{'username'};
3231 my $collect = $self->{'collect'};
3232 my $gsdl_cgi = $self->{'gsdl_cgi'};
3233 my $infodbtype = $self->{'infodbtype'};
3234
3235 # Obtain the collect dir
3236 my $site = $self->{'site'};
3237 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3238
3239 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
3240
3241 # look up additional args
3242 my $docid = $self->{'d'};
3243
3244
3245 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3246 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3247
3248 # This now stores the full pathname [is this still true??]
3249 my $doc_file = $doc_rec->{'doc-file'}->[0];
3250
3251 my ($unused_doc_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
3252 my $doc_full_dirname = &util::filename_cat($archive_dir,$doc_dirname);
3253
3254 my $fldv_full_dirname = &util::filename_cat($doc_full_dirname,$FLDV_HISTORY_DIR);
3255
3256 my $had_error = 0;
3257 my $had_error_mess = undef;
3258
3259 if (-d $fldv_full_dirname) {
3260 my $fldv_filtered_dirs = &FileUtils::readDirectoryFiltered($fldv_full_dirname,undef,"^nminus-\\d+\$");
3261
3262 my @sorted_fldv_filtered_dirs = sort {
3263 my ($a_num) = ($a =~ m/(\d+)$/);
3264 my ($b_num) = ($b =~ m/(\d+)$/);
3265
3266 # sort into descending order
3267 return $b_num <=> $a_num;
3268 } @$fldv_filtered_dirs;
3269
3270 ## shuffle all the nminus-<n> folders down by one
3271
3272 foreach my $nminus_n (@sorted_fldv_filtered_dirs) {
3273
3274 my $nminus_n_full_dir = &FileUtils::filenameConcatenate($fldv_full_dirname,$nminus_n);
3275 if (-d $nminus_n_full_dir) {
3276
3277 my ($n) = ($nminus_n =~ m/(\d+)$/);
3278 my $new_n = $n + 1;
3279
3280 my $new_full_dir_plus1 = &FileUtils::filenameConcatenate($fldv_full_dirname,"nminus-$new_n");
3281
3282 my $move_ok = &FileUtils::renameDirectory($nminus_n_full_dir,$new_full_dir_plus1,
3283 { 'strict' => 1 } );
3284
3285 if (!$move_ok) {
3286 $had_error_mess = "Error: Failed to move '$nminus_n' to nminus-$new_n in $fldv_full_dirname\n";
3287 $had_error = 1;
3288 last;
3289 }
3290
3291 }
3292 else {
3293 print STDERR "Warning: skipping $nminus_n_full_dir as it is not a directory\n";
3294 }
3295 }
3296 }
3297 else {
3298 # first time asked to perform the inc-nminus-1 action for this doc,
3299 # and there is not yet even a _fldv directory!
3300 # => make the directory (and that's all that's needed at this stage)
3301 print STDERR "First time file-level document-version history has been applied to this document\n";
3302 print STDERR "Creating $fldv_full_dirname\n";
3303
3304 &FileUtils::makeDirectory($fldv_full_dirname);
3305 }
3306
3307 if (!$had_error) {
3308 ### Now need to copy everything top level in doc dir into nminus-1
3309
3310 my $nminus_1_full_dir = &FileUtils::filenameConcatenate($fldv_full_dirname,"nminus-1");
3311
3312 my $copy_ok = &FileUtils::copyFilesRefRecursive([$doc_full_dirname],$nminus_1_full_dir,
3313 { 'strict' => 1, 'exclude_filter_re' => "^$FLDV_HISTORY_DIR\$" } );
3314
3315 if (!$copy_ok) {
3316 $had_error_mess = "Error: Failed to clone document $docid to form 'nminus-1' directory in $fldv_full_dirname";
3317 $had_error = 1;
3318 }
3319 }
3320
3321 if ($had_error) {
3322 $gsdl_cgi->generate_error($had_error_mess);
3323 }
3324 else {
3325 $gsdl_cgi->generate_ok_message("Successfully incremented the File-Level Document-Version History for $docid");
3326 }
3327}
3328
3329
3330############################# REMOVE METADATA ARRAY METHODS ############################
3331
3332
3333sub remove_metadata_array
3334{
3335 my $self = shift @_;
3336
3337 my $where = $self->{'where'};
3338 if(!$where || ($where =~ m/^\s*$/)) {
3339 $self->remove_index_metadata_array(@_); # mimic set_metadata_array for default behaviour
3340 return;
3341 }
3342
3343 my $username = $self->{'username'};
3344 my $collect = $self->{'collect'};
3345 my $gsdl_cgi = $self->{'gsdl_cgi'};
3346
3347 if ($baseaction::authentication_enabled) {
3348 # Ensure the user is allowed to edit this collection
3349 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3350 }
3351
3352 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
3353 # But copied code from set_metadata_array
3354 my $site = $self->{'site'};
3355 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3356 $gsdl_cgi->checked_chdir($collect_dir);
3357
3358 # Make sure the collection isn't locked by someone else
3359 $self->lock_collection($username, $collect);
3360
3361 if($where =~ m/import/) {
3362 $self->_remove_import_metadata_array(@_);
3363 }
3364 if($where =~ m/archives/) {
3365 $self->_remove_archives_metadata_array(@_);
3366 }
3367 if($where =~ m/index/) {
3368 $self->_remove_index_metadata_array(@_);
3369 }
3370 if($where =~ m/live/) {
3371 $self->_remove_live_metadata_array(@_);
3372 }
3373
3374 # Release the lock once it is done
3375 $self->unlock_collection($username, $collect);
3376}
3377
3378sub _remove_index_metadata_array
3379{
3380 my $self = shift @_;
3381
3382 my $collect = $self->{'collect'};
3383 my $gsdl_cgi = $self->{'gsdl_cgi'};
3384
3385 # Obtain the collect dir
3386 my $site = $self->{'site'};
3387 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3388
3389 # look up additional args
3390
3391 my $infodbtype = $self->{'infodbtype'};
3392
3393 my $json_str = $self->{'json'};
3394 my $doc_array = decode_json $json_str;
3395
3396
3397 my $global_status = 0;
3398 my $global_mess = "";
3399
3400 my @all_docids = ();
3401
3402 foreach my $doc_array_rec ( @$doc_array ) {
3403
3404 my $status = -1;
3405 my $docid = $doc_array_rec->{'docid'};
3406
3407 push(@all_docids,$docid);
3408
3409 my $metaname = $doc_array_rec->{'metaname'};
3410 if(defined $metaname) {
3411 # metapos can be undef and if metaval is also undef, metapos=0 will be used
3412 my $metapos = $doc_array_rec->{'metapos'};
3413 my $metavalue = $doc_array_rec->{'metavalue'};
3414 # remove-import-metadata and remove-archives-metadata recognise metamode option
3415 # but not remove-index-metadata (nor remove-live-metadata)
3416 #my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
3417
3418 $status = $self->remove_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
3419 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
3420 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
3421
3422 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
3423 $metaname = $metatable_rec->{'metaname'};
3424 #my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'}; # metamode not used by remove_index_metadata
3425
3426 #my $metapos = $metatable_rec->{'metapositions'} || $doc_array_rec->{'metapos'} || $self->{'metapos'} || undef; # try most-to-least specific metapos setting, and fallback on undef
3427
3428
3429 my $metapositions = $metatable_rec->{'metapositions'} || undef; # a sub-subarray
3430 if(defined $metapositions) {
3431
3432 my $metavalue = undef;
3433 # Need to have metapositions sorted in *descending* order for
3434 # remove_metadata_array to work. Start deleting from end of metadata list,
3435 # so we don't have to recalculate metapos after each delete at a metapos
3436 @$metapositions = reverse(sort(@$metapositions)); # https://perldoc.perl.org/functions/reverse
3437
3438 foreach my $metapos ( @$metapositions ) { # metapositions is an array
3439 $status = $self->remove_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue); # how do we use metamode in set_meta_entry?
3440
3441 }
3442 } else {
3443 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
3444 next unless defined $metavals;
3445 my $metapos = undef;
3446
3447 foreach my $metavalue ( @$metavals ) { # metavals is an array
3448 $status = $self->remove_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue); # how do we use metamode in set_meta_entry?
3449 }
3450 }
3451 }
3452 }
3453
3454 if ($status != 0) {
3455 # Catch error if set infodb entry failed
3456 $global_status = $status;
3457 $global_mess .= "Failed to remove metadata key from index: $docid\n";
3458 $global_mess .= "Exit status: $status\n";
3459 # Need to check the error message $! immediately after an IO call goes wrong
3460 # not after returning from many nested calls
3461 # https://stackoverflow.com/questions/1605195/inappropriate-ioctl-for-device
3462 # $global_mess .= "System Error Message: $!\n";
3463 $global_mess .= "-" x 20;
3464 }
3465 }
3466
3467 if ($global_status != 0) {
3468 $global_mess .= "PATH: $ENV{'PATH'}\n";
3469 $gsdl_cgi->generate_error($global_mess);
3470 }
3471 else {
3472 my $mess = "remove-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
3473 $gsdl_cgi->generate_ok_message($mess);
3474 }
3475}
3476
3477sub remove_index_metadata_array
3478{
3479 my $self = shift @_;
3480
3481 my $username = $self->{'username'};
3482 my $collect = $self->{'collect'};
3483 my $gsdl_cgi = $self->{'gsdl_cgi'};
3484
3485 if ($baseaction::authentication_enabled) {
3486 # Ensure the user is allowed to edit this collection
3487 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3488 }
3489
3490 # Obtain the collect dir
3491 my $site = $self->{'site'};
3492 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3493
3494 # Skip this step as lock_collection does a chdir into final destination directory anyway?
3495 $gsdl_cgi->checked_chdir($collect_dir);
3496
3497
3498 # Make sure the collection isn't locked by someone else
3499 $self->lock_collection($username, $collect);
3500
3501 $self->_remove_index_metadata_array(@_);
3502
3503 # Release the lock once it is done
3504 $self->unlock_collection($username, $collect);
3505}
3506
3507# Untested, based on also (probably) untested, experimental _set_live_metadata_array
3508# TODO: How can we delete at a dbkey with a particular value or at a particular metapos?
3509# Same question for sub remove_live_metadata
3510sub _remove_live_metadata_array
3511{
3512 my $self = shift @_;
3513
3514 my $collect = $self->{'collect'};
3515 my $gsdl_cgi = $self->{'gsdl_cgi'};
3516
3517 # Obtain the collect dir
3518 my $site = $self->{'site'};
3519 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3520
3521 # look up additional args
3522 my $infodbtype = $self->{'infodbtype'};
3523 # To people who know $collect_tail please add some comments
3524 # Obtain path to the database
3525 my $collect_tail = $collect;
3526 $collect_tail =~ s/^.*[\/|\\]//;
3527 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3528 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
3529
3530
3531 my $json_str = $self->{'json'};
3532 my $doc_array = decode_json $json_str;
3533
3534
3535 my $global_status = 0;
3536 my $global_mess = "";
3537
3538 my @all_docids = ();
3539
3540
3541 foreach my $doc_array_rec ( @$doc_array ) {
3542
3543 my $status = -1;
3544 my $docid = $doc_array_rec->{'docid'};
3545
3546 push(@all_docids,$docid);
3547
3548 my $dbkey = "";
3549 my $metaname = $doc_array_rec->{'metaname'};
3550 if(defined $metaname) {
3551 $dbkey = "$docid.$metaname";
3552
3553 # Remove the key
3554 #my $metavalue = $doc_array_rec->{'metavalue'};
3555 #my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
3556 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
3557 $status = system($cmd);
3558
3559 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
3560 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
3561 foreach my $metatable_rec ( @$metatable ) {
3562 $metaname = $metatable_rec->{'metaname'};
3563 $dbkey = "$docid.$metaname";
3564
3565 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
3566 $status = system($cmd);
3567 #my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
3568 #foreach my $metavalue ( @$metavals ) {
3569 #my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
3570 #$status = system($cmd);
3571 #}
3572 }
3573
3574 }
3575
3576 if ($status != 0) {
3577 # Catch error if gdbmdel failed
3578 $global_status = $status;
3579 $global_mess .= "Failed to delete metadata key: $dbkey\n"; # $dbkey
3580 $global_mess .= "Exit status: $status\n";
3581 $global_mess .= "System Error Message: $!\n";
3582 $global_mess .= "-" x 20;
3583 }
3584 }
3585
3586 if ($global_status != 0) {
3587 $global_mess .= "PATH: $ENV{'PATH'}\n";
3588 $gsdl_cgi->generate_error($global_mess);
3589 }
3590 else {
3591 my $mess = "remove-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
3592 $gsdl_cgi->generate_ok_message($mess);
3593 }
3594}
3595
3596sub remove_live_metadata_array
3597{
3598 my $self = shift @_;
3599
3600 my $username = $self->{'username'};
3601 my $collect = $self->{'collect'};
3602 my $gsdl_cgi = $self->{'gsdl_cgi'};
3603
3604 if ($baseaction::authentication_enabled) {
3605 # Ensure the user is allowed to edit this collection
3606 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3607 }
3608
3609 # Obtain the collect dir
3610 my $site = $self->{'site'};
3611 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3612
3613 $gsdl_cgi->checked_chdir($collect_dir);
3614
3615 # Make sure the collection isn't locked by someone else
3616 $self->lock_collection($username, $collect);
3617
3618 $self->_remove_live_metadata_array(@_);
3619
3620 # Release the lock once it is done
3621 $self->unlock_collection($username, $collect);
3622}
3623
3624
3625sub remove_import_metadata_array
3626{
3627 my $self = shift @_;
3628
3629 my $username = $self->{'username'};
3630 my $collect = $self->{'collect'};
3631 my $gsdl_cgi = $self->{'gsdl_cgi'};
3632
3633 if ($baseaction::authentication_enabled) {
3634 # Ensure the user is allowed to edit this collection
3635 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3636 }
3637
3638 # Obtain the collect dir
3639 my $site = $self->{'site'};
3640 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3641
3642 $gsdl_cgi->checked_chdir($collect_dir);
3643
3644 # Make sure the collection isn't locked by someone else
3645 $self->lock_collection($username, $collect);
3646
3647 $self->_remove_import_metadata_array(@_);
3648
3649 # Release the lock once it is done
3650 $self->unlock_collection($username, $collect);
3651
3652}
3653
3654
3655sub _remove_import_metadata_array
3656{
3657 my $self = shift @_;
3658
3659 my $collect = $self->{'collect'};
3660 my $gsdl_cgi = $self->{'gsdl_cgi'};
3661
3662 # Obtain the collect dir
3663 my $site = $self->{'site'};
3664 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3665
3666 # look up additional args
3667
3668 my $infodbtype = $self->{'infodbtype'};
3669
3670 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3671 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3672
3673 my $json_str = $self->{'json'};
3674 my $doc_array = decode_json $json_str;
3675
3676 my $global_status = 0;
3677 my $global_mess = "";
3678
3679# $gsdl_cgi->generate_error("*** Deliberately causing error to test errorResponse function\n");
3680
3681 my @all_docids = ();
3682
3683 foreach my $doc_array_rec ( @$doc_array )
3684 {
3685 my $status = -1;
3686 my $docid = $doc_array_rec->{'docid'};
3687
3688 my $metadata_xml_filename = undef;
3689
3690 my ($docid_root,$docid_secnum);
3691 if(defined $docid) {
3692 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
3693 # as yet no support for setting or removing subsection metadata in metadata.xml
3694 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
3695 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for deleting import metadata at subsections' level.\n");
3696 next; # skip this docid in for loop
3697 }
3698 }
3699
3700 push(@all_docids,$docid); # docid_root rather
3701
3702 my $metaname = $doc_array_rec->{'metaname'};
3703 if (defined $metaname) {
3704 my $metapos = $doc_array_rec->{'metapos'} || $self->{'metapos'};
3705 my $metavalue = $doc_array_rec->{'metavalue'}; #|| $self->{'metavalue'};
3706 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'} || undef; # see set_import_meta_array for fallback
3707
3708 $metadata_xml_filename = $self->remove_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode); # at this point, docid_root = docid
3709
3710 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
3711 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
3712
3713 foreach my $metatable_rec ( @$metatable ) {
3714 $metaname = $metatable_rec->{'metaname'};
3715 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'} || undef;
3716
3717 my $metapositions = $metatable_rec->{'metapositions'}; # a sub-subarray
3718 if(defined $metapositions) {
3719
3720 my $metavalue = undef;
3721
3722 # need metapositions sorted in *descending* order
3723 @$metapositions = reverse(sort(@$metapositions));
3724
3725 foreach my $metapos ( @$metapositions ) {
3726
3727 $metadata_xml_filename = $self->remove_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode); # at this point, docid_root = docid
3728
3729 #if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
3730 # need to accumulate subsequent metavals for this metaname, else the just-assigned
3731 # metavalue for this metaname will be lost
3732 #$metamode = "accumulate";
3733 #}
3734 }
3735 }
3736 else {
3737 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
3738 next unless defined $metavals;
3739 foreach my $metavalue ( @$metavals ) {
3740 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
3741 my $metapos = undef;
3742
3743 $self->remove_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode); # at this point, docid_root = docid
3744
3745 #if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
3746 # need to accumulate subsequent metavals for this metaname, else the just-assigned
3747 # metavalue for this metaname will be lost
3748 #$metamode = "accumulate";
3749 #}
3750 }
3751 }
3752 }
3753 }
3754
3755 # errors would have caused a call to die, so can't report those
3756 if (defined $metadata_xml_filename) {
3757
3758 $global_mess .= "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
3759 }
3760 }
3761
3762
3763 # always a success message, as any failure would have caused a call to die within remove_import_metadata_entry
3764 my $mess = "remove-import-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
3765 $mess .= $global_mess . "\n";
3766 $gsdl_cgi->generate_ok_message($mess);
3767}
3768
3769
3770sub _remove_archives_metadata_array
3771{
3772 my $self = shift @_;
3773
3774 my $collect = $self->{'collect'};
3775 my $gsdl_cgi = $self->{'gsdl_cgi'};
3776
3777 # Obtain the collect dir
3778 my $site = $self->{'site'};
3779 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3780
3781 # look up additional args
3782
3783 my $infodbtype = $self->{'infodbtype'};
3784
3785 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3786
3787 my $json_str = $self->{'json'};
3788 my $doc_array = decode_json $json_str;
3789
3790
3791 my $global_status = 0;
3792 my $global_mess = "";
3793
3794 my @all_docids = ();
3795
3796 foreach my $doc_array_rec ( @$doc_array ) {
3797 my $status = -1;
3798 my $full_docid = $doc_array_rec->{'docid'};
3799 my ($docid, $docid_secnum) = ($full_docid =~ m/^(.*?)(\..*)?$/);
3800 # usercomments meta are at root and so docid_secnum will be undef which is acceptable
3801 my $dv = $doc_array_rec->{'dv'};
3802
3803 push(@all_docids,$full_docid);
3804
3805 my $metaname = $doc_array_rec->{'metaname'};
3806 if(defined $metaname) {
3807
3808 my $metapos = $doc_array_rec->{'metapos'};
3809 my $metavalue = $doc_array_rec->{'metavalue'};
3810 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
3811
3812 $status = $self->remove_archives_metadata_entry($gsdl_cgi,$archive_dir,$infodbtype,
3813 $docid, $docid_secnum, $dv, $metaname,$metapos,$metavalue,$metamode);
3814
3815 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
3816 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
3817
3818 foreach my $metatable_rec ( @$metatable ) {
3819 $metaname = $metatable_rec->{'metaname'};
3820 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
3821
3822 my $metapositions = $metatable_rec->{'metapositions'}; # a sub-subarray
3823 if(defined $metapositions) {
3824
3825 my $metavalue = undef;
3826
3827 # need metapositions sorted in *descending* order
3828 @$metapositions = reverse(sort(@$metapositions));
3829
3830 foreach my $metapos ( @$metapositions ) {
3831
3832 $status = $self->remove_archives_metadata_entry($gsdl_cgi, $archive_dir, $infodbtype,
3833 $docid,$docid_secnum,$dv, $metaname,$metapos,$metavalue,$metamode);
3834
3835 #if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
3836 # need to accumulate subsequent metavals for this metaname, else the just-assigned
3837 # metavalue for this metaname will be lost
3838 #$metamode = "accumulate";
3839 #}
3840 }
3841 }
3842 else {
3843 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
3844 next unless defined $metavals;
3845 foreach my $metavalue ( @$metavals ) {
3846 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
3847 my $metapos = undef;
3848
3849 $status = $self->remove_archives_metadata_entry($gsdl_cgi,$archive_dir,$infodbtype,
3850 $docid,$docid_secnum, $dv, $metaname,$metapos,$metavalue,$metamode);
3851 #if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
3852 # need to accumulate subsequent metavals for this metaname, else the just-assigned
3853 # metavalue for this metaname will be lost
3854 #$metamode = "accumulate";
3855 #}
3856 }
3857 }
3858 }
3859 }
3860
3861 if ($status != 0) {
3862 # Catch error if set infodb entry failed
3863 $global_status = $status;
3864 $global_mess .= "Failed to remove archives metadata key: $full_docid\n";
3865 $global_mess .= "Exit status: $status\n";
3866 $global_mess .= "System Error Message: $!\n";
3867 $global_mess .= "-" x 20 . "\n";
3868 }
3869 }
3870
3871 if ($global_status != 0) {
3872 $global_mess .= "PATH: $ENV{'PATH'}\n";
3873 $gsdl_cgi->generate_error($global_mess);
3874 }
3875 else {
3876 my $mess = "remove-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
3877 $gsdl_cgi->generate_ok_message($mess);
3878 }
3879}
3880
3881sub remove_archives_metadata_array
3882{
3883 my $self = shift @_;
3884
3885 my $username = $self->{'username'};
3886 my $collect = $self->{'collect'};
3887 my $gsdl_cgi = $self->{'gsdl_cgi'};
3888
3889 if ($baseaction::authentication_enabled) {
3890 # Ensure the user is allowed to edit this collection
3891 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3892 }
3893
3894 # Obtain the collect dir
3895 my $site = $self->{'site'};
3896 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3897
3898 $gsdl_cgi->checked_chdir($collect_dir);
3899
3900 # Make sure the collection isn't locked by someone else
3901 $self->lock_collection($username, $collect);
3902
3903 $self->_remove_archives_metadata_array(@_);
3904
3905 # Release the lock once it is done
3906 $self->unlock_collection($username, $collect);
3907}
3908
3909
3910##################################### END REMOVE ARRAY METHODS #################
3911
3912# not returning 1; here since this file is conditionally included by metadataction.pm
3913# and not otherwise meant to be used on its own
3914
Note: See TracBrowser for help on using the repository browser.