source: main/trunk/greenstone2/perllib/cgiactions/modmetadataaction.pm@ 38191

Last change on this file since 38191 was 38191, checked in by anupama, 9 months ago

Moving the cleaning of parameters into more atomic function set-index-metadata-entry, so it's always executed. It doesn't hurt to always do it, and will ensure more consistent results when the subroutine is called directly as it sometimes is from other functions. Committing changes ahead of my work adding remove-metadata-array subroutines.

File size: 113.6 KB
Line 
1##########################################################################
2#
3# modmetadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This is a submodule of metadataaction.pm. It is loaded conditionally
27# upon having the permissions to modify (not just get) metadata.
28# With sufficient permissions, these additional subroutines should be made
29# available to metadataaction.pm
30
31# See http://www.perlmonks.org/?node_id=881761 for splitting module into multiple files
32# and how variables declared with 'our' are used there.
33
34package metadataaction;
35
36use strict;
37
38use cgiactions::baseaction;
39use cgiactions::metadataactioncommon;
40
41use dbutil;
42use ghtml;
43
44use JSON;
45
46# convenient to have shorter version around
47my $FLDV_HISTORY_DIR = $metadataactioncommon::FLDV_HISTORY_DIR;
48
49$metadataaction::modmeta_action_table = #OR: our $modmeta_action_table =
50{
51 #SET METHODS
52 "set-live-metadata" => {
53 'compulsory-args' => [ "d", "metaname", "metavalue" ],
54 'optional-args' => [ ] },
55
56 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
57 'compulsory-args' => [ "metaname", "metavalue" ],
58 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "dv", "f" ] },
59
60 "set-index-metadata" => {
61 'compulsory-args' => [ "d", "metaname", "metavalue" ],
62 'optional-args' => [ "metapos", "metamode" ] },
63
64 "set-archives-metadata" => {
65 'compulsory-args' => [ "d", "metaname", "metavalue" ],
66 'optional-args' => [ "dv", "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
67
68 ###### !!!!!
69 # Should there not be a "set-archives-text" also ???!!!!??
70 ###### !!!!!
71
72 "set-archives-assocfile" => {
73 'compulsory-args' => [ "d", "assocname" ],
74 'optional-args' => [ "dv", "fileupload", "filedata" ] ,
75 'help-string' => [ "Because this action uploads a file, this action needs to be POSTed. The 'fileupload' parameter/field is the uploaded file content; 'assocname' specifies the filename within the archives's document's area that it will be saved as" ]
76 },
77
78 "set-import-metadata" => {
79 'compulsory-args' => [ "metaname", "metavalue" ],
80 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
81
82 #SET METHODS (ARRAY)
83 "set-metadata-array" => {
84 'compulsory-args' => [ "where", "json" ],
85 'optional-args' => [ ],
86 'help-string' => [
87 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
88
89 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
90
91# The same examples rewritten for when running the metadata-server.pl script from the commandline:
92
93# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
94
95# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
96
97 "set-archives-metadata-array" => {
98 'compulsory-args' => [ "json" ],
99 'optional-args' => [ ] },
100
101 "set-import-metadata-array" => {
102 'compulsory-args' => [ "json" ],
103 'optional-args' => [ ] },
104
105 "set-index-metadata-array" => {
106 'compulsory-args' => [ "json" ],
107 'optional-args' => [ ] },
108
109 "set-live-metadata-array" => {
110 'compulsory-args' => [ "json" ],
111 'optional-args' => [ ] },
112
113 #REMOVE METHODS
114 "remove-import-metadata" => {
115 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
116 'optional-args' => [ "metapos", "metavalue", "metamode" ] }, # only provide metapos arg for SIMPLE collections.
117# Metavalue is now an optional arg for remove_import_metadata() based on what the implementation did, which allowed metavalue to be undefined, and if so, used metapos.
118
119 "remove-archives-metadata" => {
120 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
121 'optional-args' => [ "dv", "metapos", "metavalue", "metamode" ] },
122
123 "remove-live-metadata" => {
124 'compulsory-args' => [ "d", "metaname" ],
125 'optional-args' => [ ] },
126
127 "remove-index-metadata" => {
128 'compulsory-args' => [ "d", "metaname" ],
129 'optional-args' => [ "metapos", "metavalue" ] },
130
131 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
132 'compulsory-args' => [ "d", "metaname" ],
133 'optional-args' => [ "where", "dv", "metapos", "metavalue", "metamode" ] }, # metamode is optional since remove-metadata can call any of remove_import_meta and remove_archives_meta, remove_index_meta, of which the first two accept metamode as an optional param
134
135
136 #REMOVE MULTIPLE (REMOVE ALL MATCHING METANAME AT DOCID)
137 "erase-import-metadata" => {
138 'compulsory-args' => [ "d", "metaname" ] },
139
140 "erase-archives-metadata" => {
141 'compulsory-args' => [ "d", "metaname" ],
142 'optional-args' => [ "dv" ] },
143
144 "erase-index-metadata" => {
145 'compulsory-args' => [ "d", "metaname" ] },
146
147 "erase-live-metadata" => {
148 'compulsory-args' => [ "d", "metaname" ] },
149
150 "erase-metadata" => { # generic erase-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
151 'compulsory-args' => [ "d", "metaname" ],
152 'optional-args' => [ "where" ] },
153
154
155 #INSERT METHODS
156 "insert-metadata" => {
157 'compulsory-args' => [ "d", "metaname", "metavalue" ],
158 'optional-args' => [ ] },
159
160
161 #INC METHODS
162 "inc-fldv-nminus1" => {
163 'compulsory-args' => [ "d" ],
164 'optional-args' => [ ] }
165};
166
167sub _set_live_metadata
168{
169 my $self = shift @_;
170
171 my $collect = $self->{'collect'};
172 my $gsdl_cgi = $self->{'gsdl_cgi'};
173 my $infodbtype = $self->{'infodbtype'};
174
175 # Obtain the collect dir
176 my $site = $self->{'site'};
177 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
178
179
180 # look up additional args
181 my $docid = $self->{'d'};
182 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
183 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
184 }
185 my $metavalue = $self->{'metavalue'};
186
187 # Generate the dbkey
188 my $metaname = $self->{'metaname'};
189 my $dbkey = "$docid.$metaname";
190
191 # To people who know $collect_tail please add some comments
192 # Obtain path to the database
193 my $collect_tail = $collect;
194 $collect_tail =~ s/^.*[\/|\\]//;
195 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
196 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
197
198 # Set the new value
199 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
200 my $status = system($cmd);
201 if ($status != 0) {
202 # Catch error if gdbmget failed
203 my $mess = "Failed to set metadata key: $dbkey\n";
204
205 $mess .= "PATH: $ENV{'PATH'}\n";
206 $mess .= "cmd = $cmd\n";
207 $mess .= "Exit status: $status\n";
208 $mess .= "System Error Message: $!\n";
209
210 $gsdl_cgi->generate_error($mess);
211 }
212 else {
213 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
214 }
215
216 #return $status; # in case calling functions have any further use for this
217}
218
219sub set_live_metadata
220{
221 my $self = shift @_;
222
223 my $username = $self->{'username'};
224 my $collect = $self->{'collect'};
225 my $gsdl_cgi = $self->{'gsdl_cgi'};
226
227 if ($baseaction::authentication_enabled) {
228 # Ensure the user is allowed to edit this collection
229 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
230 }
231
232 # Make sure the collection isn't locked by someone else
233 $self->lock_collection($username, $collect);
234
235 $self->_set_live_metadata(@_);
236
237 # Release the lock once it is done
238 $self->unlock_collection($username, $collect);
239}
240
241sub set_index_metadata_entry
242{
243 my $self = shift @_;
244 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
245
246 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
247 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
248
249 # To people who know $collect_tail please add some comments
250 # Obtain path to the database
251 my $collect_tail = $collect;
252 $collect_tail =~ s/^.*[\/|\\]//;
253 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
254 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
255
256# print STDERR "**** infodb file path = $infodb_file_path\n";
257# print STDERR "***** infodb type = $infodbtype\n";
258
259 # Read the docid entry
260 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
261
262 # Set the metadata value
263 if (defined $metapos) {
264 # if metamode=accumulate AND metapos, warn user and then use metapos
265 if (defined $metamode && $metamode eq "accumulate") {
266 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
267 print STDERR "**** Proceeding by using metapos\n";
268 }
269 $doc_rec->{$metaname}->[$metapos] = $metavalue;
270 }
271 elsif (defined $prevmetavalue) {
272 my $array = $doc_rec->{$metaname};
273 my $length = @$array;
274
275 my $found = 0;
276 for (my $i = 0; $i < $length; $i++){
277 if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
278 $doc_rec->{$metaname}->[$i] = $metavalue;
279 $found = 1;
280 last;
281 }
282 }
283
284 if($found == 0){
285 print STDERR "**** Warning: could not find $prevmetavalue to replace. Appending (accumulating) replacement value.\n";
286 ##$doc_rec->{$metaname} = [ $metavalue ]; # if prevmetavalue not found, don't overwrite all previous values.
287 # Accumulate is less destructive. But should the correct behaviour be to not do Anything?
288
289 if(defined $doc_rec->{$metaname}) {
290 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
291 } else {
292 $doc_rec->{$metaname} = [ $metavalue ];
293 }
294 }
295 }
296 elsif (defined $metamode && $metamode eq "override") {
297 $doc_rec->{$metaname} = [ $metavalue ];
298 }
299 else { # default for index was to override, but because accumulate is less destructive,
300 # and because accumulate is the default for archives and import, that's the new default for index too
301 if(defined $doc_rec->{$metaname}) {
302 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
303 } else {
304 $doc_rec->{$metaname} = [ $metavalue ];
305 }
306 }
307
308 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
309
310 return $status;
311
312}
313
314sub _set_import_metadata
315{
316 my $self = shift @_;
317
318 my $collect = $self->{'collect'};
319 my $gsdl_cgi = $self->{'gsdl_cgi'};
320 my $infodbtype = $self->{'infodbtype'};
321
322 # Obtain the collect and archive dir
323 my $site = $self->{'site'};
324 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
325 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
326
327 # look up additional args
328 # want either d= or f=
329 my $docid = $self->{'d'};
330 my ($docid_root,$docid_secnum);
331 if(defined $docid) {
332 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
333 # as yet no support for setting subsection metadata in metadata.xml
334 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
335 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
336 return;
337 }
338 }
339
340 my $import_file = $self->{'f'};
341 if ((!defined $docid || $docid =~ m/^\s*$/) && (!defined $import_file || $import_file =~ m/^\s*$/)) {
342 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified."); # at least d or f must be specified
343 }
344
345 # Get the parameters and set default mode to "accumulate"
346 my $metaname = $self->{'metaname'};
347 my $metavalue = $self->{'metavalue'};
348## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
349 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
350
351 my $metamode = $self->{'metamode'};
352 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
353 # make "accumulate" the default (less destructive, as it won't actually
354 # delete any existing values)
355 $metamode = "accumulate";
356 }
357
358 # adding metapos and prevmetavalue support to import_metadata subroutines
359 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
360 my $prevmetavalue = $self->{'prevmetavalue'};
361 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
362 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
363
364 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
365 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
366
367 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
368 $mess .= " $metaname";
369 $mess .= " = $metavalue";
370 $mess .= " ($metamode)\n";
371
372 $gsdl_cgi->generate_ok_message($mess);
373
374 #return $status; # in case calling functions have any further use for this
375}
376
377# the version of set_archives_meta that doesn't do authentication
378sub _set_archives_metadata
379{
380 my $self = shift @_;
381
382 my $collect = $self->{'collect'};
383 my $gsdl_cgi = $self->{'gsdl_cgi'};
384 my $infodbtype = $self->{'infodbtype'};
385
386 # Obtain the collect and archive dir
387 my $site = $self->{'site'};
388 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
389 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
390
391 # look up additional args
392 my $docid = $self->{'d'};
393 my $dv = $self->{'dv'};
394
395 my $metaname = $self->{'metaname'};
396 my $metavalue = $self->{'metavalue'};
397 my $prevmetavalue = $self->{'prevmetavalue'};
398
399 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
400 # Don't append "|| undef", since if metapos=0 it will then be set to undef
401
402 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
403 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
404
405 my $metamode = $self->{'metamode'};
406 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
407 # make "accumulate" the default (less destructive, as it won't actually
408 # delete any existing values)
409 $metamode = "accumulate";
410 }
411
412 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,
413 $docid,$dv, $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
414
415 if ($status == 0) {
416 my $mess = "set-archives-metadata successful: Key[$docid]\n";
417 $mess .= " $metaname";
418 $mess .= "->[$metapos]" if (defined $metapos);
419 $mess .= " = $metavalue";
420 $mess .= " ($metamode)\n";
421
422 $gsdl_cgi->generate_ok_message($mess);
423 }
424 else {
425 my $mess .= "Failed to set archives metadata key: $docid\n";
426 $mess .= "Exit status: $status\n";
427 if(defined $self->{'error_msg'}) {
428 $mess .= "Error Message: $self->{'error_msg'}\n";
429 } else {
430 $mess .= "System Error Message: $!\n";
431 }
432 $mess .= "-" x 20 . "\n";
433
434 $gsdl_cgi->generate_error($mess);
435 }
436
437 #return $status; # in case calling functions have any further use for this
438}
439
440
441# the version of set_index_meta that doesn't do authentication
442sub _set_index_metadata
443{
444 print STDERR "START SET INDEX METADATA\n";
445 my $self = shift @_;
446
447 my $collect = $self->{'collect'};
448 my $gsdl_cgi = $self->{'gsdl_cgi'};
449
450 # Obtain the collect dir
451 my $site = $self->{'site'};
452 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
453
454 # look up additional args
455 my $docid = $self->{'d'};
456 my $metaname = $self->{'metaname'};
457 my $metapos = $self->{'metapos'}; # undef has meaning
458 my $metavalue = $self->{'metavalue'};
459 my $infodbtype = $self->{'infodbtype'};
460 my $metamode = $self->{'metamode'};
461 my $prevmetavalue = $self->{'prevmetavalue'};
462
463 print STDERR "SETTING INDEX METADATA ENTRY\n";
464 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
465 print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
466 if ($status != 0) {
467 # Catch error if set infodb entry failed
468 my $mess = "Failed to set metadata key: $docid\n";
469
470 $mess .= "PATH: $ENV{'PATH'}\n";
471 $mess .= "Exit status: $status\n";
472 $mess .= "System Error Message: $!\n";
473
474 $gsdl_cgi->generate_error($mess);
475 }
476 else {
477 my $mess = "set-index-metadata successful: Key[$docid]\n";
478 $mess .= " $metaname";
479 $mess .= "->[$metapos]" if (defined $metapos);
480 $mess .= " = $metavalue\n";
481
482 $gsdl_cgi->generate_ok_message($mess);
483 }
484
485 print STDERR "END SET INDEX METADATA\n";
486 #return $status; # in case calling functions have any further use for this
487}
488
489sub set_index_metadata
490{
491 my $self = shift @_;
492
493 my $username = $self->{'username'};
494 my $collect = $self->{'collect'};
495 my $gsdl_cgi = $self->{'gsdl_cgi'};
496
497 if ($baseaction::authentication_enabled) {
498 # Ensure the user is allowed to edit this collection
499 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
500 }
501
502 # Obtain the collect dir
503 my $site = $self->{'site'};
504 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
505
506 $gsdl_cgi->checked_chdir($collect_dir);
507
508 # Make sure the collection isn't locked by someone else
509 $self->lock_collection($username, $collect);
510
511 $self->_set_index_metadata(@_);
512
513 # Release the lock once it is done
514 $self->unlock_collection($username, $collect);
515}
516
517# call this to set the metadata for a combination of dirs archives, import or index, or live
518# if none specified, defaults to index which was the original behaviour of set_metadata.
519sub set_metadata
520{
521 my $self = shift @_;
522
523 # Testing that not defining a variable, setting it to "" or to " " all return false
524 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
525
526 my $where = $self->{'where'};
527 if(!$where || ($where =~ m/^\s*$/)) {
528 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
529 return;
530 }
531
532 # authenticate and lock collection once, even if processing multiple dirs
533 my $username = $self->{'username'};
534 my $collect = $self->{'collect'};
535 my $gsdl_cgi = $self->{'gsdl_cgi'};
536
537 if ($baseaction::authentication_enabled) {
538 # Ensure the user is allowed to edit this collection
539 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
540 }
541
542 if($where =~ m/index/) {
543 my $site = $self->{'site'};
544 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
545 $gsdl_cgi->checked_chdir($collect_dir);
546 }
547
548 # Make sure the collection isn't locked by someone else
549 $self->lock_collection($username, $collect);
550
551
552 # now at last can set the metadata. $where can specify multiple
553 # $where is of the form: import|archives|index, or a subset thereof
554
555 #my @whichdirs = split('\|', $where);
556
557 # just check whether $where contains import/archives/index/live in turn, and
558 # for each case, process it accordingly
559 if($where =~ m/import/) {
560 $self->_set_import_metadata(@_);
561 }
562
563 if($where =~ m/archives/) {
564
565 # look up docID arg which is optional to set_metadata because it's optional
566 # to set_import, but which is compulsory to set_archives_metadata
567 my $docid = $self->{'d'};
568 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
569 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
570 }
571 # we have a docid, so can set archives meta
572 $self->_set_archives_metadata(@_);
573 }
574
575 if($where =~ m/index/) {
576
577 # look up docID arg which is optional to set_metadata because it's optional
578 # to set_import, but which is compulsory to set_archives_metadata
579 my $docid = $self->{'d'};
580 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
581 $gsdl_cgi->generate_error("No docid (d=...) specified.");
582 }
583 # we have a docid, so can set index meta
584 $self->_set_index_metadata(@_);
585 }
586
587 if($where =~ m/live/) {
588 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
589 }
590
591 # Release the lock once it is done
592 $self->unlock_collection($username, $collect);
593}
594
595sub set_metadata_array
596{
597 my $self = shift @_;
598
599 my $where = $self->{'where'};
600 if(!$where || ($where =~ m/^\s*$/)) {
601 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
602 return;
603 }
604
605 my $username = $self->{'username'};
606 my $collect = $self->{'collect'};
607 my $gsdl_cgi = $self->{'gsdl_cgi'};
608
609 if ($baseaction::authentication_enabled) {
610 # Ensure the user is allowed to edit this collection
611 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
612 }
613
614 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
615 # But including the stmt during this code reorganisation to preserve as-is what used to happen
616 my $site = $self->{'site'};
617 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
618 $gsdl_cgi->checked_chdir($collect_dir);
619
620 # Make sure the collection isn't locked by someone else
621 $self->lock_collection($username, $collect);
622
623 if($where =~ m/import/) {
624 $self->_set_import_metadata_array(@_);
625 }
626 if($where =~ m/archives/) {
627 $self->_set_archives_metadata_array(@_);
628 }
629 if($where =~ m/index/) {
630 $self->_set_index_metadata_array(@_);
631 }
632 if($where =~ m/live/) {
633 $self->_set_live_metadata_array(@_);
634 }
635
636 # Release the lock once it is done
637 $self->unlock_collection($username, $collect);
638}
639
640sub _set_index_metadata_array
641{
642 my $self = shift @_;
643
644 my $collect = $self->{'collect'};
645 my $gsdl_cgi = $self->{'gsdl_cgi'};
646
647 # Obtain the collect dir
648 my $site = $self->{'site'};
649 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
650
651 # look up additional args
652
653 my $infodbtype = $self->{'infodbtype'};
654
655 my $json_str = $self->{'json'};
656 my $doc_array = decode_json $json_str;
657
658
659 my $global_status = 0;
660 my $global_mess = "";
661
662 my @all_docids = ();
663
664 foreach my $doc_array_rec ( @$doc_array ) {
665
666 my $status = -1;
667 my $docid = $doc_array_rec->{'docid'};
668
669 push(@all_docids,$docid);
670
671 my $metaname = $doc_array_rec->{'metaname'};
672 if(defined $metaname) {
673 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
674 my $metavalue = $doc_array_rec->{'metavalue'};
675 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
676
677 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
678 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
679 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
680
681 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
682 $metaname = $metatable_rec->{'metaname'};
683 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
684 my $metapos = undef;
685 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
686
687 foreach my $metavalue ( @$metavals ) { # metavals is an array
688 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
689 if($metamode eq "override") { # now, having overridden the metavalue for the first,
690 # need to accumulate subsequent metavals for this metaname, else the just-assigned
691 # metavalue for this metaname will be lost
692 $metamode = "accumulate";
693 }
694 }
695 }
696 }
697
698 if ($status != 0) {
699 # Catch error if set infodb entry failed
700 $global_status = $status;
701 $global_mess .= "Failed to set metadata key: $docid\n";
702 $global_mess .= "Exit status: $status\n";
703 $global_mess .= "System Error Message: $!\n";
704 $global_mess .= "-" x 20;
705 }
706 }
707
708 if ($global_status != 0) {
709 $global_mess .= "PATH: $ENV{'PATH'}\n";
710 $gsdl_cgi->generate_error($global_mess);
711 }
712 else {
713 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
714 $gsdl_cgi->generate_ok_message($mess);
715 }
716}
717
718sub set_index_metadata_array
719{
720 my $self = shift @_;
721
722 my $username = $self->{'username'};
723 my $collect = $self->{'collect'};
724 my $gsdl_cgi = $self->{'gsdl_cgi'};
725
726 if ($baseaction::authentication_enabled) {
727 # Ensure the user is allowed to edit this collection
728 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
729 }
730
731 # Obtain the collect dir
732 my $site = $self->{'site'};
733 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
734
735 $gsdl_cgi->checked_chdir($collect_dir);
736
737
738 # Make sure the collection isn't locked by someone else
739 $self->lock_collection($username, $collect);
740
741 $self->_set_index_metadata_array(@_);
742
743 # Release the lock once it is done
744 $self->unlock_collection($username, $collect);
745}
746
747# experimental, newly added in and untested
748sub _set_live_metadata_array
749{
750 my $self = shift @_;
751
752 my $collect = $self->{'collect'};
753 my $gsdl_cgi = $self->{'gsdl_cgi'};
754
755 # Obtain the collect dir
756 my $site = $self->{'site'};
757 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
758
759 # look up additional args
760 my $infodbtype = $self->{'infodbtype'};
761 # To people who know $collect_tail please add some comments
762 # Obtain path to the database
763 my $collect_tail = $collect;
764 $collect_tail =~ s/^.*[\/|\\]//;
765 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
766 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
767
768
769 my $json_str = $self->{'json'};
770 my $doc_array = decode_json $json_str;
771
772
773 my $global_status = 0;
774 my $global_mess = "";
775
776 my @all_docids = ();
777
778
779 foreach my $doc_array_rec ( @$doc_array ) {
780
781 my $status = -1;
782 my $docid = $doc_array_rec->{'docid'};
783
784 push(@all_docids,$docid);
785
786 my $metaname = $doc_array_rec->{'metaname'};
787 if(defined $metaname) {
788 my $dbkey = "$docid.$metaname";
789 my $metavalue = $doc_array_rec->{'metavalue'};
790
791 # Set the new value
792 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
793 $status = system($cmd);
794
795 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
796 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
797 foreach my $metatable_rec ( @$metatable ) {
798 $metaname = $metatable_rec->{'metaname'};
799 my $dbkey = "$docid.$metaname";
800
801 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
802 foreach my $metavalue ( @$metavals ) {
803 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
804 $status = system($cmd);
805 }
806 }
807
808 }
809
810 if ($status != 0) {
811 # Catch error if gdbmget failed
812 $global_status = $status;
813 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
814 $global_mess .= "Exit status: $status\n";
815 $global_mess .= "System Error Message: $!\n";
816 $global_mess .= "-" x 20;
817 }
818 }
819
820 if ($global_status != 0) {
821 $global_mess .= "PATH: $ENV{'PATH'}\n";
822 $gsdl_cgi->generate_error($global_mess);
823 }
824 else {
825 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
826 $gsdl_cgi->generate_ok_message($mess);
827 }
828}
829
830sub set_live_metadata_array
831{
832 my $self = shift @_;
833
834 my $username = $self->{'username'};
835 my $collect = $self->{'collect'};
836 my $gsdl_cgi = $self->{'gsdl_cgi'};
837
838 if ($baseaction::authentication_enabled) {
839 # Ensure the user is allowed to edit this collection
840 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
841 }
842
843 # Obtain the collect dir
844 my $site = $self->{'site'};
845 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
846
847 $gsdl_cgi->checked_chdir($collect_dir);
848
849 # Make sure the collection isn't locked by someone else
850 $self->lock_collection($username, $collect);
851
852 $self->_set_live_metadata_array(@_);
853
854 # Release the lock once it is done
855 $self->unlock_collection($username, $collect);
856}
857
858
859sub dxml_metadata
860{
861 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
862 my $metaname = $parser->{'parameters'}->{'metaname'};
863 my $metamode = $parser->{'parameters'}->{'metamode'};
864
865###!!! print STDERR "**** Processing closing </Metadata> tag\n";
866
867 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
868
869 # Find the right metadata tag and checks if we are going to
870 # override it
871 #
872 # Note: This over writes the first metadata block it
873 # encountered. If there are multiple Sections in the doc.xml, it
874 # might not behave as you would expect
875
876 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
877## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n" if (defined $opt_doc_secnum);
878## print STDERR "**** metamode = $metamode\n";
879
880 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
881 {
882 my $name_attr = $attrHash->{'name'};
883 # print STDOUT "*** testing: $name_attr eq $metaname ? and $metamode eq override ?\n";
884 if (($name_attr eq $metaname) && ($metamode eq "override"))
885 {
886 if (!defined $parser->{'parameters'}->{'poscount'})
887 {
888 $parser->{'parameters'}->{'poscount'} = 0;
889 }
890 else
891 {
892 $parser->{'parameters'}->{'poscount'}++;
893 }
894
895 if (defined $parser->{'parameters'}->{'metapos'}) { # if pos defined, ignore any prevmeta if also defined, and just wait for pos match
896
897 if ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}) {
898 ###print STDERR "#### got pos match!!\n";
899 # Get the value and override the current value
900 my $metavalue = $parser->{'parameters'}->{'metavalue'};
901 $attrHash->{'_content'} = $metavalue;
902
903 # until we have clearly determined the role of metamode in doc.xml files (we know what it means in import/metadata.xml files)
904 # we're not setting metamode to override here.
905
906 # Don't want it to wipe out any other pieces of metadata
907 $parser->{'parameters'}->{'metamode'} = "done";
908 }
909 }
910 elsif (defined $parser->{'parameters'}->{'prevmetavalue'}) { # if no pos defined but prevmeta defined, then wait for prevmeta match
911 if ($parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'}) {
912 ###print STDERR "### prev meta value matches\n";
913 my $metavalue = $parser->{'parameters'}->{'metavalue'};
914 $attrHash->{'_content'} = $metavalue;
915
916 # until we have clearly determined the role of metamode in doc.xml files (we know what it means in import/metadata.xml files)
917 # we're not setting metamode to override here.
918
919 $parser->{'parameters'}->{'metamode'} = "done";
920
921 }
922 }
923 # Note that before commit rev 32076, if neither metapos nor prevmeta defined, then code tried to set pos 0 for that metaname to the new value
924 # However that is not how the code works now (and couldn't get it to work that way yet after rev 32076).
925 # What it does now, and what Dr Bainbridge approved of is that:
926 # If no pos or prevmeta defined, then on override, the metavalue set for the specificed metaname becomes the ONLY instance of that metaname
927 # So any earlier instances of that metaname is removed and replaced by the single new instance.
928 }
929 }
930
931 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
932 return [$tagname => $attrHash];
933}
934
935# This method exists purely for catching invalid section numbers that the client
936# requested to edit. Once the parser has reached the end (the final </Archive> tag),
937# we've seen all the Sections in the doc.xml, and none of their section nums matched
938# if the metamode has not been set to 'done' by then.
939sub dxml_archive
940{
941 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
942 my $metamode = $parser->{'parameters'}->{'metamode'};
943
944 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
945 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
946
947 # print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
948
949 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
950 #print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
951
952 my $metaname = $parser->{'parameters'}->{'metaname'};
953 my $metavalue = $parser->{'parameters'}->{'metavalue'};
954
955 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
956 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
957 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
958 print STDERR "\tin section $opt_doc_secnum.\n";
959 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
960 }
961
962 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
963 return [$tagname => $attrHash];
964}
965
966sub dxml_description
967{
968 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
969 my $metamode = $parser->{'parameters'}->{'metamode'};
970
971 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
972 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
973
974###!!! print STDERR "**** Processing a closing </Description> tag \n";
975 # print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
976
977 # Accumulate the metadata
978
979 # We'll be accumulating metadata at this point if we haven't found and therefore
980 # haven't processed the metadata yet.
981 # For subsections, this means that if we're at a matching subsection, but haven't
982 # found the correct metaname to override in that subsection, we accumulate it as new
983 # meta in the subsection by adding it to the current description.
984 # If there's no subsection info for the metadata, it will accumulate at the top level
985 # section description if we hadn't found a matching metaname to override at this point.
986
987 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
988 # at the toplevel section. In the latter case, it means we want to process meta in the
989 # toplevel section. So the eq check between the values below will work in all cases.
990
991 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
992 # this document has been provided. In that case, the metadata for that opt_doc_secnum
993 # won't get attached/accumulated to any part of the doc, not even its top-level section.
994
995 if ($curr_secnum eq $opt_doc_secnum
996 && ($metamode eq "accumulate" || $metamode eq "override")) {
997 # note, metamode would have been set to done if a metadata operation had already succeeded.
998 # If we get here, we still have the metadata to append.
999 if ($metamode eq "override") {
1000 print "Got to end of <Description> block. No metadata value to override. Switching 'metamode' to accumulate\n";
1001 }
1002
1003 # If we get to here and metamode is override, this means there
1004 # was no existing value to overide => treat as an append operation
1005
1006 # Tack a new metadata tag on to the end of the <Metadata>+ block
1007 my $metaname = $parser->{'parameters'}->{'metaname'};
1008 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1009
1010 my $metadata_attr = {
1011 '_content' => $metavalue,
1012 'name' => $metaname,
1013 'mode' => "accumulate"
1014 };
1015
1016 my $append_metadata = [ "Metadata" => $metadata_attr ];
1017 my $description_content = $attrHash->{'_content'};
1018 print "Appending metadata to doc.xml\n";
1019
1020 if (ref($description_content)) {
1021 # got some existing interesting nested content
1022 push(@$description_content, " ", $append_metadata, "\n ");
1023 }
1024 else {
1025 #description_content is most likely a string such as "\n"
1026 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1027 }
1028
1029 $parser->{'parameters'}->{'metamode'} = "done";
1030 }
1031 else {
1032 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1033## print STDERR "**** NOT ACCUMULATE?!? \n";
1034 }
1035 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1036 return [$tagname => $attrHash];
1037}
1038
1039sub edit_xml_file
1040{
1041 my $self = shift @_;
1042 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1043
1044 # use XML::Rules to add it in (read in and out again)
1045 my $parser = XML::Rules->new(start_rules => $start_rules,
1046 rules => $rules,
1047 style => 'filter',
1048 output_encoding => 'utf8' );
1049
1050 my $xml_in = "";
1051 if (!open(MIN,"<$filename")) {
1052 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1053 }
1054 else {
1055 # Read all the text in
1056 my $line;
1057 while (defined ($line=<MIN>)) {
1058 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1059 }
1060 close(MIN);
1061
1062 my $MOUT;
1063 if (!open($MOUT,">$filename")) {
1064 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1065 }
1066 else {
1067 # Matched lines will get handled by the call backs
1068## my $xml_out = "";
1069
1070 binmode($MOUT,":utf8");
1071 $parser->filter($xml_in,$MOUT, $options);
1072
1073# binmode(MOUT,":utf8");
1074# print MOUT $xml_out;
1075 close($MOUT);
1076 }
1077 }
1078
1079 # copy across any custom error information that was stored during parsing
1080 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1081}
1082
1083sub edit_doc_xml
1084{
1085 my $self = shift @_;
1086 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1087
1088 my $info_mess = <<RAWEND;
1089****************************
1090 edit_doc_xml()
1091****************************
1092RAWEND
1093
1094 $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1095 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1096 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1097 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1098 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1099 $info_mess .= " opt_secnum = $opt_secnum\n" if defined($opt_secnum);
1100 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1101
1102 $info_mess .= "****************************\n";
1103
1104 $gsdl_cgi->generate_message($info_mess);
1105
1106 # To monitor which section/subsection number we are in
1107 my @start_rules =
1108 ( 'Section' => \&dxml_start_section );
1109
1110 # use XML::Rules to add it in (read in and out again)
1111 # Set the call back functions
1112 my @rules =
1113 ( _default => 'raw',
1114 'Metadata' => \&dxml_metadata,
1115 'Description' => \&dxml_description,
1116 'Archive' => \&dxml_archive); # just for catching errors at end
1117
1118 # Sets the parameters
1119 my $options = { 'metaname' => $metaname,
1120 'metapos' => $metapos,
1121 'metavalue' => $metavalue,
1122 'metamode' => $metamode,
1123 'prevmetavalue' => $prevmetavalue };
1124
1125 if (defined $opt_secnum) {
1126 $options->{'secnum'} = $opt_secnum;
1127 }
1128
1129 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1130}
1131
1132sub set_archives_metadata_entry
1133{
1134 my $self = shift @_;
1135 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $dv, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1136
1137 my $info_mess = <<RAWEND;
1138****************************
1139 set_archives_metadata_entry()
1140****************************
1141RAWEND
1142
1143 $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1144 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1145 $info_mess .= " collect = $collect\n" if defined($collect);
1146 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
1147 $info_mess .= " docid = $docid\n" if defined($docid);
1148 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1149 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1150 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1151 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1152 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1153
1154 $info_mess .= "****************************\n";
1155
1156 $gsdl_cgi->generate_message($info_mess);
1157
1158 # Obtain the doc.xml path for the specified docID
1159 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1160
1161 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1162 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1163 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1164
1165 # check if request if for file-level doc-version history 'nminus-<n>' version
1166 if (defined $dv && ($dv ne "")) {
1167 # Need to insert '_fldv_history/nminus-<n>' into doc_filename
1168
1169 my ($doc_xml_tailname, $doc_xml_dirname) = File::Basename::fileparse($doc_xml_file);
1170 $doc_xml_file = &util::filename_cat($doc_xml_dirname,$FLDV_HISTORY_DIR,$dv,$doc_xml_tailname);
1171 }
1172
1173 # The $doc_xml_file is relative to the archives, and now let's get the full path
1174 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1175
1176 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1177
1178 # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
1179 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1180 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1181 if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
1182 print "override mode with no pos or prev value set. Removing all existing values for $metaname, and setting mode to accumulate for the new value.\n";
1183 # remove all values of $metaname metadata
1184 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
1185 $metamode = "accumulate";
1186 }
1187 # Edit the doc.xml file with the specified metadata name, value and position.
1188 # TODO: there is a potential problem here as this edit_doc_xml function
1189 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1190 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1191
1192 # dxml_metadata method ignores metapos if metamode anything other than override
1193 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1194 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1195
1196 # return 0; # return 0 for now to indicate no error
1197 return (defined $self->{'error_msg'}) ? 1 : 0;
1198}
1199
1200
1201sub set_archives_metadata
1202{
1203 my $self = shift @_;
1204
1205 my $username = $self->{'username'};
1206 my $collect = $self->{'collect'};
1207 my $gsdl_cgi = $self->{'gsdl_cgi'};
1208
1209 if ($baseaction::authentication_enabled) {
1210 # Ensure the user is allowed to edit this collection
1211 $self->authenticate_user($username, $collect);
1212 }
1213
1214 # Make sure the collection isn't locked by someone else
1215 $self->lock_collection($username, $collect);
1216
1217 $self->_set_archives_metadata(@_);
1218
1219 # Release the lock once it is done
1220 $self->unlock_collection($username, $collect);
1221}
1222
1223sub _set_archives_metadata_array
1224{
1225 my $self = shift @_;
1226
1227 my $collect = $self->{'collect'};
1228 my $gsdl_cgi = $self->{'gsdl_cgi'};
1229
1230 # Obtain the collect dir
1231 my $site = $self->{'site'};
1232 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1233
1234 # look up additional args
1235
1236 my $infodbtype = $self->{'infodbtype'};
1237
1238 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1239
1240 my $json_str = $self->{'json'};
1241 my $doc_array = decode_json $json_str;
1242
1243
1244 my $global_status = 0;
1245 my $global_mess = "";
1246
1247 my @all_docids = ();
1248
1249 foreach my $doc_array_rec ( @$doc_array ) {
1250 my $status = -1;
1251 my $docid = $doc_array_rec->{'docid'};
1252 my $dv = $doc_array_rec->{'dv'};
1253
1254 push(@all_docids,$docid);
1255
1256 my $metaname = $doc_array_rec->{'metaname'};
1257 if(defined $metaname) {
1258
1259 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1260
1261 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1262 my $metavalue = $doc_array_rec->{'metavalue'};
1263 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1264
1265
1266 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1267 # make "accumulate" the default (less destructive, as it won't actually
1268 # delete any existing values)
1269 $metamode = "accumulate";
1270 }
1271
1272 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,
1273 $docid,$dv, $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1274 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1275 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1276
1277 foreach my $metatable_rec ( @$metatable ) {
1278 $metaname = $metatable_rec->{'metaname'};
1279 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1280 my $metapos = undef;
1281 my $prevmetavalue = undef;
1282 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1283
1284 foreach my $metavalue ( @$metavals ) {
1285 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1286 $docid,$dv, $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1287
1288 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1289 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1290 # metavalue for this metaname will be lost
1291 $metamode = "accumulate";
1292 }
1293 }
1294 }
1295 }
1296
1297 if ($status != 0) {
1298 # Catch error if set infodb entry failed
1299 $global_status = $status;
1300 $global_mess .= "Failed to set metadata key: $docid\n";
1301 $global_mess .= "Exit status: $status\n";
1302 $global_mess .= "System Error Message: $!\n";
1303 $global_mess .= "-" x 20 . "\n";
1304 }
1305 }
1306
1307 if ($global_status != 0) {
1308 $global_mess .= "PATH: $ENV{'PATH'}\n";
1309 $gsdl_cgi->generate_error($global_mess);
1310 }
1311 else {
1312 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1313 $gsdl_cgi->generate_ok_message($mess);
1314 }
1315}
1316
1317sub set_archives_metadata_array
1318{
1319 my $self = shift @_;
1320
1321 my $username = $self->{'username'};
1322 my $collect = $self->{'collect'};
1323 my $gsdl_cgi = $self->{'gsdl_cgi'};
1324
1325 if ($baseaction::authentication_enabled) {
1326 # Ensure the user is allowed to edit this collection
1327 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1328 }
1329
1330 # Obtain the collect dir
1331 my $site = $self->{'site'};
1332 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1333
1334 $gsdl_cgi->checked_chdir($collect_dir);
1335
1336 # Make sure the collection isn't locked by someone else
1337 $self->lock_collection($username, $collect);
1338
1339 $self->_set_archives_metadata_array(@_);
1340
1341 # Release the lock once it is done
1342 $self->unlock_collection($username, $collect);
1343}
1344
1345
1346
1347# the version of set_archives_meta that doesn't do authentication
1348sub _set_archives_assocfile
1349{
1350 my $self = shift @_;
1351
1352 my $collect = $self->{'collect'};
1353 my $gsdl_cgi = $self->{'gsdl_cgi'};
1354 my $infodbtype = $self->{'infodbtype'};
1355
1356 # Obtain the collect and archive dir
1357 my $site = $self->{'site'};
1358 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1359 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1360
1361 # look up additional args
1362 my $docid = $self->{'d'};
1363
1364 my $assocname = $self->{'assocname'};
1365
1366 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1367 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1368
1369 my $doc_file = $doc_rec->{'doc-file'}->[0];
1370
1371 # check if request if for file-level doc-version history 'nminus-<n>' version
1372 my $dv = $self->{'dv'};
1373
1374 my ($unused_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
1375
1376 my $output_file = $assocname;
1377
1378 if (defined $dv && ($dv ne "")) {
1379 # Need to insert '_fldv_history/nminus-<n>' into output_file
1380
1381 $output_file = &util::filename_cat($doc_dirname,$FLDV_HISTORY_DIR,$dv,$output_file);
1382 }
1383 else {
1384 $output_file = &util::filename_cat($doc_dirname,$output_file);
1385 }
1386
1387 my $output_filename = &util::filename_cat($archive_dir, $output_file);
1388
1389 my $filedata = $self->{'filedata'};
1390 my $FIN = $self->{'fileupload'};
1391
1392 if ((!defined $filedata) && (!defined $FIN)) {
1393 $gsdl_cgi->generate_error("set-archives-assocfile: either 'fileupload' or 'filedata' must be set");
1394 }
1395 else {
1396
1397 my $had_error = 0;
1398 my $had_error_mess = undef;
1399
1400 if (defined $filedata) {
1401 if (open(FOUT,">$output_filename")) {
1402 print FOUT $filedata;
1403 close(FOUT);
1404 }
1405 else {
1406 $had_error = 1;
1407 $had_error_mess = "Failed to save filedata to: $output_filename";
1408 print STDERR "Error - $had_error_mess:\n$!\n";
1409 }
1410 }
1411 else {
1412 my $FIN = $self->{'fileupload'};
1413 if (open(FOUT,">$output_filename")) {
1414 binmode(FOUT, ":raw");
1415
1416 while (1) {
1417 my $buffer = "";
1418 my $bytes_read = read($FIN, $buffer, 1024);
1419
1420 if (defined $bytes_read) {
1421 if ($bytes_read>0) {
1422 print FOUT $buffer;
1423 }
1424 last if $bytes_read < 1024;
1425 }
1426 else {
1427 $had_error = 1;
1428 $had_error_mess = "set-archives-assocfile: Failed to open uploaded file for reading: $!\n";
1429 print STDERR "Error - $had_error_mess";
1430 last;
1431 }
1432 }
1433
1434 close($FIN);
1435 close(FOUT);
1436 }
1437 else {
1438 $had_error = 1;
1439 $had_error_mess = "Failed to save file uploaded file as: $output_filename";
1440 print STDERR "Error - $had_error_mess:\n$!\n";
1441 }
1442 }
1443
1444 if (!$had_error) {
1445 my $mess = "set-archives-assocfile successful save uploaded content into 'archives' as: $output_file\n";
1446 $gsdl_cgi->generate_ok_message($mess);
1447 }
1448 else {
1449 $gsdl_cgi->generate_error($had_error_mess);
1450 }
1451 }
1452}
1453
1454
1455sub set_archives_assocfile
1456{
1457 my $self = shift @_;
1458
1459 my $username = $self->{'username'};
1460 my $collect = $self->{'collect'};
1461 my $gsdl_cgi = $self->{'gsdl_cgi'};
1462
1463 if ($baseaction::authentication_enabled) {
1464 # Ensure the user is allowed to edit this collection
1465 $self->authenticate_user($username, $collect);
1466 }
1467
1468
1469 # Make sure the collection isn't locked by someone else
1470 $self->lock_collection($username, $collect);
1471
1472 $self->_set_archives_assocfile(@_);
1473
1474 # Release the lock once it is done
1475 $self->unlock_collection($username, $collect);
1476}
1477
1478
1479sub _remove_archives_metadata
1480{
1481 my $self = shift @_;
1482
1483 my $collect = $self->{'collect'};
1484 my $gsdl_cgi = $self->{'gsdl_cgi'};
1485 my $infodbtype = $self->{'infodbtype'};
1486
1487
1488 # Obtain the collect and archive dir
1489 my $site = $self->{'site'};
1490 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1491
1492 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1493
1494 # look up additional args
1495 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1496
1497 my $metaname = $self->{'metaname'};
1498 my $metapos = $self->{'metapos'};
1499 my $metavalue = $self->{'metavalue'};
1500
1501 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1502 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
1503
1504 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1505 # default to deleting the first metavalue for the given metaname
1506 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1507 # seem to get deleted in one single remove_archives_meta action invocation.
1508 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1509 # deleted, both get deleted.
1510 if(!defined $metapos && !defined $metavalue) {
1511 $metapos = 0;
1512 }
1513
1514 my $metamode = $self->{'metamode'};
1515 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
1516
1517 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1518 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1519
1520 # This now stores the full pathname
1521 my $doc_file = $doc_rec->{'doc-file'}->[0];
1522
1523 # check if request if for file-level doc-version history 'nminus-<n>' version
1524 my $dv = $self->{'dv'};
1525 if (defined $dv && ($dv ne "")) {
1526 # Need to insert '_fldv_history/nminus-<n>' into doc_filename
1527
1528 my ($doc_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
1529 $doc_file = &util::filename_cat($doc_dirname,$FLDV_HISTORY_DIR,$dv,$doc_tailname);
1530 }
1531
1532 my $doc_filename = &util::filename_cat($archive_dir, $doc_file);
1533
1534 my $status = $self->remove_from_doc_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1535# my $status = $self->remove_from_doc_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, undef, $docid_secnum);
1536
1537 if ($status == 0)
1538 {
1539 my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
1540 $mess .= " $metaname";
1541 $mess .= "->[$metapos]" if (defined $metapos);
1542 $mess .= " ($metavalue)" if (defined $metavalue);
1543 $gsdl_cgi->generate_ok_message($mess);
1544 }
1545 else
1546 {
1547 my $mess .= "Failed to remove archives metadata key: $docid\n";
1548 $mess .= "Exit status: $status\n";
1549 $mess .= "System Error Message: $!\n";
1550 $mess .= "-" x 20 . "\n";
1551
1552 $gsdl_cgi->generate_error($mess);
1553 }
1554
1555 #return $status; # in case calling functions have a use for this
1556}
1557
1558sub remove_archives_metadata
1559{
1560 my $self = shift @_;
1561
1562 my $username = $self->{'username'};
1563 my $collect = $self->{'collect'};
1564 my $gsdl_cgi = $self->{'gsdl_cgi'};
1565
1566 if ($baseaction::authentication_enabled)
1567 {
1568 # Ensure the user is allowed to edit this collection
1569 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1570 }
1571
1572 # Make sure the collection isn't locked by someone else
1573 $self->lock_collection($username, $collect);
1574
1575 $self->_remove_archives_metadata(@_);
1576
1577 # Release the lock once it is done
1578 $self->unlock_collection($username, $collect);
1579}
1580
1581sub remove_from_doc_xml
1582{
1583 my $self = shift @_;
1584 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1585
1586 my @start_rules = ('Section' => \&dxml_start_section);
1587
1588 # Set the call-back functions for the metadata tags
1589 my @rules =
1590 (
1591 _default => 'raw',
1592 'Metadata' => \&rfdxml_metadata
1593 );
1594
1595 my $parser = XML::Rules->new
1596 (
1597 start_rules => \@start_rules,
1598 rules => \@rules,
1599 style => 'filter',
1600 output_encoding => 'utf8',
1601# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1602 stripspaces => 2|0|0 # ineffectual
1603 );
1604
1605 my $status = 0;
1606 my $xml_in = "";
1607 if (!open(MIN,"<$doc_xml_filename"))
1608 {
1609 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1610 $status = 1;
1611 }
1612 else
1613 {
1614 # Read them in
1615 my $line;
1616 while (defined ($line=<MIN>)) {
1617 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1618 }
1619 close(MIN);
1620
1621 # Filter with the call-back functions
1622 my $xml_out = "";
1623
1624 my $MOUT;
1625 if (!open($MOUT,">$doc_xml_filename")) {
1626 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1627 $status = 1;
1628 }
1629 else {
1630 binmode($MOUT,":utf8");
1631 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1632 close($MOUT);
1633 }
1634 }
1635 return $status;
1636}
1637
1638sub rfdxml_metadata
1639{
1640 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1641
1642 # For comparisons, toplevel section is indicated by ""
1643 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1644 my $secid = $parser->{'parameters'}->{'secid'} || "";
1645
1646 if (!($secid eq $curr_sec_num))
1647 {
1648 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1649 return [$tagname => $attrHash];
1650 }
1651
1652 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1653 {
1654 if (!defined $parser->{'parameters'}->{'poscount'})
1655 {
1656 $parser->{'parameters'}->{'poscount'} = 0;
1657 }
1658 else
1659 {
1660 $parser->{'parameters'}->{'poscount'}++;
1661 }
1662
1663 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
1664 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {
1665 return [];
1666 }
1667
1668 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1669 {
1670 return [];
1671 }
1672
1673 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1674 {
1675 return [];
1676 }
1677 }
1678
1679 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1680 return [$tagname => $attrHash];
1681}
1682
1683sub mxml_metadata
1684{
1685 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1686 my $metaname = $parser->{'parameters'}->{'metaname'};
1687 my $metamode = $parser->{'parameters'}->{'metamode'};
1688
1689 # Report error if we don't see FileName tag before this
1690 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1691
1692 # Don't do anything if we are not in the right FileSet
1693 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1694 if ($file_regexp =~ /\.\*/) {
1695 # Only interested in a file_regexp if it specifies precisely one
1696 # file.
1697 # So, skip anything with a .* in it as it is too general
1698## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
1699 return [$tagname => $attrHash];
1700 }
1701 my $src_file = $parser->{'parameters'}->{'src_file'};
1702 if (!($src_file =~ /$file_regexp/)) {
1703 return [$tagname => $attrHash];
1704 }
1705## print STDERR "*** mxl metamode = $metamode\n";
1706
1707 # Find the right metadata tag and checks if we are going to override it
1708 my $name_attr = $attrHash->{'name'};
1709 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1710
1711 # now metadata.xml functions need to keep track of metapos
1712 if (!defined $parser->{'parameters'}->{'poscount'})
1713 {
1714 $parser->{'parameters'}->{'poscount'} = 0;
1715 }
1716 else
1717 {
1718 $parser->{'parameters'}->{'poscount'}++;
1719 }
1720
1721 # If either the metapos or prevmetavalue is set,
1722 # get the value and override the current value
1723 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1724
1725 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1726 {
1727 $attrHash->{'_content'} = $metavalue;
1728
1729 ## print STDERR "**** overriding metadata.xml\n";
1730
1731 # Don't want it to wipe out any other pieces of metadata
1732 $parser->{'parameters'}->{'metamode'} = "done";
1733 }
1734 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1735 {
1736 $attrHash->{'_content'} = $metavalue;
1737 $parser->{'parameters'}->{'metamode'} = "done";
1738 }
1739 }
1740
1741 # mxml_description will process the metadata if metadata is accumulate,
1742 # or if we haven't found the metadata to override
1743
1744 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1745 return [$tagname => $attrHash];
1746}
1747
1748
1749sub mxml_description
1750{
1751 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1752 my $metamode = $parser->{'parameters'}->{'metamode'};
1753
1754 # Failed... Report error if we don't see FileName tag before this
1755 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1756
1757 # Don't do anything if we are not in the right FileSet
1758 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1759 if ($file_regexp =~ m/\.\*/) {
1760 # Only interested in a file_regexp if it specifies precisely one
1761 # file.
1762 # So, skip anything with a .* in it as it is too general
1763 return [$tagname => $attrHash];
1764 }
1765 my $src_file = $parser->{'parameters'}->{'src_file'};
1766
1767 if (!($src_file =~ m/$file_regexp/)) {
1768 return [$tagname => $attrHash];
1769 }
1770
1771 # Accumulate the metadata block to the end of the description block
1772 # Note: This adds metadata block to all description blocks, so if there are
1773 # multiple FileSets, it will add to all of them
1774 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1775
1776 # if metamode was "override" but get to here then it failed to
1777 # find an item to override, in which case it should append its
1778 # value to the end, just like the "accumulate" mode
1779
1780 if ($metamode eq "override") {
1781 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1782 }
1783
1784 # tack a new metadata tag on to the end of the <Metadata>+ block
1785 my $metaname = $parser->{'parameters'}->{'metaname'};
1786 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1787
1788 my $metadata_attr = { '_content' => $metavalue,
1789 'name' => $metaname,
1790 'mode' => "accumulate" };
1791
1792 my $append_metadata = [ "Metadata" => $metadata_attr ];
1793 my $description_content = $attrHash->{'_content'};
1794
1795## print STDERR "*** appending to metadata.xml\n";
1796
1797 # append the new metadata element to the end of the current
1798 # content contained inside this tag
1799 if (ref($description_content) eq "") {
1800 # => string or numeric literal
1801 # this is caused by a <Description> block has no <Metadata> child elements
1802 # => set up an empty array in '_content'
1803 $attrHash->{'_content'} = [ "\n" ];
1804 $description_content = $attrHash->{'_content'};
1805 }
1806
1807 push(@$description_content, " ", $append_metadata, "\n ");
1808 $parser->{'parameters'}->{'metamode'} = "done";
1809 }
1810
1811 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1812 return [$tagname => $attrHash];
1813}
1814
1815
1816sub mxml_fileset
1817{
1818 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1819
1820 # Initilise the current_file
1821 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1822 # FileName tag must come before Description tag
1823 $parser->{'parameters'}->{'current_file'} = "";
1824
1825 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1826 return [$tagname => $attrHash];
1827}
1828
1829sub mxml_directorymetadata
1830{
1831 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1832
1833 # if we haven't processed the metadata when we reach the end of metadata.xml
1834 # it's because there's no particular FileSet element whose FileName matched
1835 # In which case, add a new FileSet for this FileName
1836 my $metamode = $parser->{'parameters'}->{'metamode'};
1837 if($metamode ne "done") {
1838
1839 if ($metamode eq "override") {
1840 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1841 }
1842
1843 # If we get to here and metamode is override, this means there
1844 # was no existing value to overide => treat as an append operation
1845
1846 # Create a new FileSet element and append to DirectoryMetadata
1847 # <FileSet>
1848 # <FileName>src_file</FileName>
1849 # <Description>
1850 # <Metadata mode="" name="">metavalue</Metadata>
1851 # </Description>
1852 # </FileSet>
1853 my $src_file = $parser->{'parameters'}->{'src_file'};
1854 my $metaname = $parser->{'parameters'}->{'metaname'};
1855 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1856 my $metadata_attr = {
1857 '_content' => $metavalue,
1858 'name' => $metaname,
1859 'mode' => "accumulate"
1860 };
1861 my $append_metadata = [ "Metadata" => $metadata_attr ];
1862 my $description_attr = { '_content' => [ "\n\t\t ", $append_metadata, "\n\t\t"] };
1863 my $description_element = [ "Description" => $description_attr ];
1864
1865 #_content is not an attribute, it's special and holds the children of this element
1866 # including the textnode value embedded in this element if any.
1867 my $filename_attr = {'_content' => $src_file};
1868 my $filename_element = [ "FileName" => $filename_attr ];
1869
1870 my $fileset_attr = {};
1871 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
1872 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
1873
1874
1875 # get children of dirmeta, and push the new FileSet element onto it
1876 print "Appending metadata to metadata.xml\n";
1877 my $dirmeta_content = $attrHash->{'_content'};
1878 if (ref($dirmeta_content)) {
1879 # got some existing interesting nested content
1880 #push(@$dirmeta_content, " ", $fileset ,"\n ");
1881 push(@$dirmeta_content, "\t", $fileset ,"\n");
1882 }
1883 else {
1884 #description_content is most likely a string such as "\n"
1885 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
1886 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
1887 }
1888
1889 $parser->{'parameters'}->{'metamode'} = "done";
1890 }
1891 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1892 return [$tagname => $attrHash];
1893}
1894
1895
1896sub edit_metadata_xml
1897{
1898 my $self = shift @_;
1899 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
1900
1901 # Set the call-back functions for the metadata tags
1902 my @rules =
1903 ( _default => 'raw',
1904 'FileName' => \&mxml_filename,
1905 'Metadata' => \&mxml_metadata,
1906 'Description' => \&mxml_description,
1907 'FileSet' => \&mxml_fileset,
1908 'DirectoryMetadata' => \&mxml_directorymetadata);
1909
1910 # use XML::Rules to add it in (read in and out again)
1911 my $parser = XML::Rules->new(rules => \@rules,
1912 style => 'filter',
1913 output_encoding => 'utf8',
1914 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1915
1916 if (!-e $metadata_xml_filename) {
1917
1918 if (open(MOUT,">$metadata_xml_filename")) {
1919
1920 my $src_file_re = &util::filename_to_regex($src_file);
1921 # shouldn't the following also be in the above utility routine??
1922 # $src_file_re =~ s/\./\\./g;
1923
1924 print MOUT "<?xml version=\"1.0\"?>\n";
1925 print MOUT "<DirectoryMetadata>\n";
1926 print MOUT " <FileSet>\n";
1927 print MOUT " <FileName>$src_file_re</FileName>\n";
1928 print MOUT " <Description>\n";
1929 print MOUT " </Description>\n";
1930 print MOUT " </FileSet>\n";
1931 print MOUT "</DirectoryMetadata>\n";
1932
1933 close(MOUT);
1934 }
1935 else {
1936 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1937 }
1938 }
1939
1940
1941 my $xml_in = "";
1942 if (!open(MIN,"<$metadata_xml_filename")) {
1943 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1944 }
1945 else {
1946 # Read them in
1947 my $line;
1948 while (defined ($line=<MIN>)) {
1949 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1950 }
1951 close(MIN);
1952
1953 # Filter with the call-back functions
1954 my $xml_out = "";
1955
1956 my $MOUT;
1957 if (!open($MOUT,">$metadata_xml_filename")) {
1958 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1959 }
1960 else {
1961 binmode($MOUT,":utf8");
1962
1963 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1964 # At the moment, I will just hack it!
1965 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1966 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1967 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1968 #print MOUT $xml_out;
1969
1970 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1971 metapos => $metapos,
1972 metavalue => $metavalue,
1973 metamode => $metamode,
1974 src_file => $src_file,
1975 prevmetavalue => $prevmetavalue,
1976 current_file => undef} );
1977 close($MOUT);
1978 }
1979 }
1980}
1981
1982
1983sub set_import_metadata
1984{
1985 my $self = shift @_;
1986
1987 my $username = $self->{'username'};
1988 my $collect = $self->{'collect'};
1989 my $gsdl_cgi = $self->{'gsdl_cgi'};
1990
1991 if ($baseaction::authentication_enabled) {
1992 # Ensure the user is allowed to edit this collection
1993 $self->authenticate_user($username, $collect);
1994 }
1995
1996 # Make sure the collection isn't locked by someone else
1997 $self->lock_collection($username, $collect);
1998
1999 $self->_set_import_metadata(@_);
2000
2001 # Release the lock once it is done
2002 $self->unlock_collection($username, $collect);
2003
2004}
2005
2006sub set_import_metadata_array
2007{
2008 my $self = shift @_;
2009
2010 my $username = $self->{'username'};
2011 my $collect = $self->{'collect'};
2012 my $gsdl_cgi = $self->{'gsdl_cgi'};
2013
2014 if ($baseaction::authentication_enabled) {
2015 # Ensure the user is allowed to edit this collection
2016 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2017 }
2018
2019 # Obtain the collect dir
2020 my $site = $self->{'site'};
2021 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2022
2023 $gsdl_cgi->checked_chdir($collect_dir);
2024
2025 # Make sure the collection isn't locked by someone else
2026 $self->lock_collection($username, $collect);
2027
2028 $self->_set_import_metadata_array(@_);
2029
2030 # Release the lock once it is done
2031 $self->unlock_collection($username, $collect);
2032
2033}
2034
2035
2036sub _set_import_metadata_array
2037{
2038 my $self = shift @_;
2039
2040 my $collect = $self->{'collect'};
2041 my $gsdl_cgi = $self->{'gsdl_cgi'};
2042
2043 # Obtain the collect dir
2044 my $site = $self->{'site'};
2045 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2046
2047 # look up additional args
2048
2049 my $infodbtype = $self->{'infodbtype'};
2050
2051 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2052 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2053
2054 my $json_str = $self->{'json'};
2055 my $doc_array = decode_json $json_str;
2056
2057 my $global_status = 0;
2058 my $global_mess = "";
2059
2060 my @all_docids = ();
2061
2062 foreach my $doc_array_rec ( @$doc_array )
2063 {
2064 my $status = -1;
2065 my $docid = $doc_array_rec->{'docid'};
2066
2067 my ($docid_root,$docid_secnum);
2068 if(defined $docid) {
2069 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2070 # as yet no support for setting subsection metadata in metadata.xml
2071 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2072 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2073 next; # skip this docid in for loop
2074 }
2075 }
2076
2077 push(@all_docids,$docid); # docid_root rather
2078
2079 my $metaname = $doc_array_rec->{'metaname'};
2080 if (defined $metaname) {
2081 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2082 my $metavalue = $doc_array_rec->{'metavalue'};
2083 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2084
2085 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2086 # make "accumulate" the default (less destructive, as it won't actually
2087 # delete any existing values)
2088 $metamode = "accumulate";
2089 }
2090
2091 # adding metapos and prevmetavalue support to import_metadata subroutines
2092 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2093 my $prevmetavalue = $self->{'prevmetavalue'};
2094
2095 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2096
2097 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2098 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2099
2100 foreach my $metatable_rec ( @$metatable ) {
2101 $metaname = $metatable_rec->{'metaname'};
2102 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2103 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2104 # make "accumulate" the default (less destructive, as it won't actually
2105 # delete any existing values)
2106 $metamode = "accumulate";
2107 }
2108
2109 # No support for metapos and prevmetavalue in the JSON metatable substructure
2110 my $metapos = undef;
2111 my $prevmetavalue = undef;
2112 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2113
2114 foreach my $metavalue ( @$metavals ) {
2115 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2116
2117 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2118 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2119 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2120 # metavalue for this metaname will be lost
2121 $metamode = "accumulate";
2122 }
2123 }
2124 }
2125 }
2126 }
2127
2128 # always a success message
2129 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2130 $gsdl_cgi->generate_ok_message($mess);
2131}
2132
2133# always returns true (1)
2134sub set_import_metadata_entry
2135{
2136 my $self = shift @_;
2137 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2138
2139 my $info_mess = <<RAWEND;
2140****************************
2141 set_import_metadata_entry()
2142****************************
2143RAWEND
2144
2145 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2146 $info_mess .= " collect = $collect\n" if defined($collect);
2147 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
2148 $info_mess .= " arcinfo_doc_filename = $arcinfo_doc_filename\n" if defined($arcinfo_doc_filename);
2149 $info_mess .= " docid = $docid\n" if defined($docid);
2150 $info_mess .= " metaname = $metaname\n" if defined($metaname);
2151 $info_mess .= " metapos = $metapos\n" if defined($metapos);
2152 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
2153 $info_mess .= " metamode = $metamode\n" if defined($metamode);
2154 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2155
2156 $info_mess .= "****************************\n";
2157
2158 $gsdl_cgi->generate_message($info_mess);
2159
2160 # import works with metadata.xml which can have inherited metadata
2161 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2162 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2163 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2164 # what they're doing if they provide a metapos.
2165 if(defined $metapos) {
2166 print STDERR "@@@@ WARNING: metapos defined.\n";
2167 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2168 }
2169
2170 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2171 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2172 my $metadata_xml_file;
2173 my $import_filename = undef;
2174
2175 if (defined $docid) {
2176 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2177 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2178
2179 # This now stores the full pathname
2180 $import_filename = $doc_rec->{'src-file'}->[0];
2181 $import_filename = &util::placeholders_to_abspath($import_filename);
2182
2183 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2184 # as the array version of the method doesn't support the -f parameter yet
2185 my $import_file = $self->{'f'};
2186 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2187 }
2188
2189 # figure out correct metadata.xml file [?]
2190 # Assuming the metadata.xml file is next to the source file
2191 # Note: This will not work if it is using the inherited metadata from the parent folder
2192 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2193 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2194
2195 # If we're overriding everything, then $prevmetavalue=undefined and
2196 # $metamode=override combined with $metapos=undefined
2197 # in which case we need to remove all metavalues for the metaname at the given (sub)section
2198 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2199 if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2200## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2201 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2202
2203 }
2204
2205 # Edit the metadata.xml
2206 # Modified by Jeffrey from DL Consulting
2207 # Handle the case where there is one metadata.xml file for multiple FileSets
2208 # The XML filter needs to know whether it is in the right FileSet
2209 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2210 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2211 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2212 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2213 #return 0;
2214 return $metadata_xml_filename;
2215}
2216
2217sub _remove_import_metadata
2218{
2219 my $self = shift @_;
2220
2221 my $collect = $self->{'collect'};
2222 my $gsdl_cgi = $self->{'gsdl_cgi'};
2223 my $infodbtype = $self->{'infodbtype'};
2224
2225 # Obtain the collect and archive dir
2226 my $site = $self->{'site'};
2227 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2228
2229 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2230
2231 # look up additional args
2232 my $docid = $self->{'d'};
2233 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2234 {
2235 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2236 }
2237
2238 my $metaname = $self->{'metaname'};
2239 my $metapos = $self->{'metapos'};
2240 my $metavalue = $self->{'metavalue'};
2241
2242 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2243 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/));
2244
2245 if(defined $metavalue) { # metavalue is now a compulsory arg for remove_import_metadata()
2246 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2247 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2248 $metapos = 0;
2249 }
2250 my $metamode = $self->{'metamode'};
2251 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
2252
2253 # import works with metadata.xml which can have inherited metadata
2254 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2255 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2256 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2257 # what they're doing if they provide a metapos.
2258 if(defined $metapos) {
2259 print STDERR "@@@@ WARNING: metapos defined.\n";
2260 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2261 }
2262
2263 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2264 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2265 my $metadata_xml_file;
2266 my $import_filename = undef;
2267 if (defined $docid)
2268 {
2269 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2270 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2271
2272 # This now stores the full pathname
2273 $import_filename = $doc_rec->{'src-file'}->[0];
2274 $import_filename = &util::placeholders_to_abspath($import_filename);
2275 }
2276
2277 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2278 {
2279 $gsdl_cgi->generate_error("There is no metadata\n");
2280 }
2281
2282 # figure out correct metadata.xml file [?]
2283 # Assuming the metadata.xml file is next to the source file
2284 # Note: This will not work if it is using the inherited metadata from the parent folder
2285 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2286 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2287
2288 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2289
2290 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2291 $mess .= " $metaname";
2292 $mess .= " = $metavalue\n";
2293
2294 $gsdl_cgi->generate_ok_message($mess);
2295
2296 #return $status; # in case calling functions have a use for this
2297}
2298
2299sub remove_import_metadata
2300{
2301 my $self = shift @_;
2302
2303 my $username = $self->{'username'};
2304 my $collect = $self->{'collect'};
2305 my $gsdl_cgi = $self->{'gsdl_cgi'};
2306
2307 if ($baseaction::authentication_enabled) {
2308 # Ensure the user is allowed to edit this collection
2309 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2310 }
2311
2312 # Make sure the collection isn't locked by someone else
2313 $self->lock_collection($username, $collect);
2314
2315 $self->_remove_import_metadata(@_);
2316
2317 # Release the lock once it is done
2318 $self->unlock_collection($username, $collect);
2319
2320}
2321
2322sub remove_from_metadata_xml
2323{
2324 my $self = shift @_;
2325 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2326 # metamode generally has no meaning for removing meta, but is used by set_meta
2327 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2328
2329 # Set the call-back functions for the metadata tags
2330 my @rules =
2331 (
2332 _default => 'raw',
2333 'Metadata' => \&rfmxml_metadata,
2334 'FileName' => \&mxml_filename
2335 );
2336
2337 my $parser = XML::Rules->new
2338 (
2339 rules => \@rules,
2340 style => 'filter',
2341 output_encoding => 'utf8',
2342 #normalisespaces => 1,
2343 stripspaces => 2|0|0 # ineffectual
2344 );
2345
2346 my $xml_in = "";
2347 if (!open(MIN,"<$metadata_xml_filename"))
2348 {
2349 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2350 }
2351 else
2352 {
2353 # Read them in
2354 my $line;
2355 while (defined ($line=<MIN>)) {
2356 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
2357 }
2358 close(MIN);
2359
2360 # Filter with the call-back functions
2361 my $xml_out = "";
2362
2363 my $MOUT;
2364 if (!open($MOUT,">$metadata_xml_filename")) {
2365 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2366 }
2367 else {
2368 binmode($MOUT,":utf8");
2369 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2370 close($MOUT);
2371 }
2372 }
2373}
2374
2375sub rfmxml_metadata
2376{
2377 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2378
2379 # metadata.xml does not handle subsections
2380
2381 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2382 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2383 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2384 {
2385 if (!defined $parser->{'parameters'}->{'poscount'})
2386 {
2387 $parser->{'parameters'}->{'poscount'} = 0;
2388 }
2389 else
2390 {
2391 $parser->{'parameters'}->{'poscount'}++;
2392 }
2393
2394 # if overriding but no metapos, then clear all the meta for this metaname
2395 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
2396 return [];
2397 }
2398
2399 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2400 {
2401 return [];
2402 }
2403 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2404 {
2405 return [];
2406 }
2407 }
2408
2409 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2410 return [$tagname => $attrHash];
2411}
2412
2413sub _remove_live_metadata
2414{
2415 my $self = shift @_;
2416
2417 my $collect = $self->{'collect'};
2418 my $gsdl_cgi = $self->{'gsdl_cgi'};
2419 my $infodbtype = $self->{'infodbtype'};
2420
2421 # Obtain the collect dir
2422 my $site = $self->{'site'};
2423 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2424
2425 # look up additional args
2426 my $docid = $self->{'d'};
2427 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2428 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2429 }
2430
2431 # Generate the dbkey
2432 my $metaname = $self->{'metaname'};
2433 my $dbkey = "$docid.$metaname";
2434
2435 # To people who know $collect_tail please add some comments
2436 # Obtain the live gdbm_db path
2437 my $collect_tail = $collect;
2438 $collect_tail =~ s/^.*[\/|\\]//;
2439 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2440 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2441
2442 # Remove the key
2443 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2444 my $status = system($cmd);
2445 if ($status != 0) {
2446 # Catch error if gdbmdel failed
2447 my $mess = "Failed to delete metadata key: $dbkey\n";
2448
2449 $mess .= "PATH: $ENV{'PATH'}\n";
2450 $mess .= "cmd = $cmd\n";
2451 $mess .= "Exit status: $status\n";
2452 $mess .= "System Error Message: $!\n";
2453
2454 $gsdl_cgi->generate_error($mess);
2455 }
2456 else {
2457 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2458 }
2459
2460}
2461
2462sub remove_live_metadata
2463{
2464 my $self = shift @_;
2465
2466 my $username = $self->{'username'};
2467 my $collect = $self->{'collect'};
2468 my $gsdl_cgi = $self->{'gsdl_cgi'};
2469
2470 if ($baseaction::authentication_enabled) {
2471 # Ensure the user is allowed to edit this collection
2472 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2473 }
2474
2475 # Make sure the collection isn't locked by someone else
2476 $self->lock_collection($username, $collect);
2477
2478 $self->_remove_live_metadata(@_);
2479
2480 $self->unlock_collection($username, $collect);
2481}
2482
2483sub remove_metadata
2484{
2485 my $self = shift @_;
2486
2487 my $where = $self->{'where'};
2488 if(!$where || ($where =~ m/^\s*$/)) {
2489 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2490 return;
2491 }
2492
2493 my $username = $self->{'username'};
2494 my $collect = $self->{'collect'};
2495 my $gsdl_cgi = $self->{'gsdl_cgi'};
2496
2497 if ($baseaction::authentication_enabled) {
2498 # Ensure the user is allowed to edit this collection
2499 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2500 }
2501
2502 # Make sure the collection isn't locked by someone else
2503 $self->lock_collection($username, $collect);
2504
2505 # check which directories need to be processed, specified in $where as
2506 # any combination of import|archives|index|live
2507 if($where =~ m/import/) {
2508 $self->_remove_import_metadata(@_);
2509 }
2510 if($where =~ m/archives/) {
2511 $self->_remove_archives_metadata(@_);
2512 }
2513 if($where =~ m/index/) {
2514 $self->_remove_index_metadata(@_);
2515 }
2516
2517 # Release the lock once it is done
2518 $self->unlock_collection($username, $collect);
2519}
2520
2521# the internal version, without authentication
2522sub _remove_index_metadata
2523{
2524 my $self = shift @_;
2525
2526 my $collect = $self->{'collect'};
2527 my $gsdl_cgi = $self->{'gsdl_cgi'};
2528 my $infodbtype = $self->{'infodbtype'};
2529
2530 # Obtain the collect dir
2531 my $site = $self->{'site'};
2532 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2533
2534 # look up additional args
2535 my $docid = $self->{'d'};
2536 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2537 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2538 }
2539 my $metaname = $self->{'metaname'};
2540 my $metapos = $self->{'metapos'};
2541 my $metavalue = $self->{'metavalue'};
2542
2543 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2544 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
2545
2546 # To people who know $collect_tail please add some comments
2547 # -> In collection groups, I think collect_tailname is the subcollection name,
2548 # e.g. colgroup-name/col-tail-name
2549 # Obtain the path to the database
2550 my $collect_tail = $collect;
2551 $collect_tail =~ s/^.*[\/|\\]//;
2552 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2553 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2554
2555 # Read the docid entry
2556 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2557
2558 # Check to make sure the key does exist
2559 if (!defined ($doc_rec->{$metaname})) {
2560 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2561 }
2562
2563 # Obtain the specified metadata pos
2564 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2565 if(!defined $metapos && !defined $metavalue) {
2566 $metapos = 0;
2567 }
2568
2569
2570 # consider check key is defined before deleting?
2571 # Loop through the metadata array and ignore the specified position
2572 my $filtered_metadata = [];
2573 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2574 for (my $i=0; $i<$num_metadata_vals; $i++) {
2575 my $metaval = shift(@{$doc_rec->{$metaname}});
2576
2577 if (!defined $metavalue && $i != $metapos) {
2578 push(@$filtered_metadata,$metaval);
2579 }
2580
2581 if(defined $metavalue && !($metavalue eq $metaval))
2582 {
2583 push(@$filtered_metadata,$metaval);
2584 }
2585 }
2586 $doc_rec->{$metaname} = $filtered_metadata;
2587
2588 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2589 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2590
2591 if ($status != 0) {
2592 my $mess = "Failed to set metadata key: $docid\n";
2593
2594 $mess .= "PATH: $ENV{'PATH'}\n";
2595 $mess .= "Exit status: $status\n";
2596 $mess .= "System Error Message: $!\n";
2597
2598 $gsdl_cgi->generate_error($mess);
2599 }
2600 else {
2601 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2602 $mess .= " $metaname";
2603 $mess .= "->[$metapos]" if (defined $metapos);
2604 $mess .= " ($metavalue)" if (defined $metavalue);
2605
2606 $gsdl_cgi->generate_ok_message($mess);
2607 }
2608
2609 #return $status; # in case calling functions have a use for this
2610}
2611
2612sub remove_index_metadata
2613{
2614 my $self = shift @_;
2615
2616 my $username = $self->{'username'};
2617 my $collect = $self->{'collect'};
2618 my $gsdl_cgi = $self->{'gsdl_cgi'};
2619
2620 if ($baseaction::authentication_enabled) {
2621 # Ensure the user is allowed to edit this collection
2622 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2623 }
2624
2625 # Obtain the collect dir
2626 my $site = $self->{'site'};
2627 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2628
2629 # Make sure the collection isn't locked by someone else
2630 $self->lock_collection($username, $collect);
2631
2632 $self->_remove_index_metadata(@_);
2633
2634 # Release the lock once it is done
2635 $self->unlock_collection($username, $collect);
2636}
2637
2638#################################################################################
2639# ERASE META METHODS: ERASE ALL VALUES FOR MATCHING METANAME AT SPECIFIED DOCID #
2640#################################################################################
2641
2642sub erase_metadata
2643{
2644 my $self = shift @_;
2645
2646 my $where = $self->{'where'};
2647 # when $where is unspecified, following behaviour of sub remove_metadata by defaulting to erasing metadata from index
2648 if(!$where || ($where =~ m/^\s*$/)) {
2649 $self->erase_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2650 return;
2651 }
2652
2653 my $username = $self->{'username'};
2654 my $collect = $self->{'collect'};
2655 my $gsdl_cgi = $self->{'gsdl_cgi'};
2656
2657 if ($baseaction::authentication_enabled) {
2658 # Ensure the user is allowed to edit this collection
2659 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2660 }
2661
2662 # Make sure the collection isn't locked by someone else
2663 $self->lock_collection($username, $collect);
2664
2665 # check which directories need to be processed, specified in $where as
2666 # any combination of import|archives|index|live
2667 if($where =~ m/import/) {
2668 $self->_erase_import_metadata(@_);
2669 }
2670 if($where =~ m/archives/) {
2671 $self->_erase_archives_metadata(@_);
2672 }
2673 if($where =~ m/index/) {
2674 $self->_erase_index_metadata(@_);
2675 }
2676
2677 # Release the lock once it is done
2678 $self->unlock_collection($username, $collect);
2679}
2680
2681# the internal version, without authentication
2682sub _erase_index_metadata
2683{
2684 my $self = shift @_;
2685
2686 my $collect = $self->{'collect'};
2687 my $gsdl_cgi = $self->{'gsdl_cgi'};
2688 my $infodbtype = $self->{'infodbtype'};
2689
2690 # Obtain the collect dir
2691 my $site = $self->{'site'};
2692 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2693
2694 # look up additional args
2695 my $docid = $self->{'d'};
2696 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2697 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2698 }
2699 my $metaname = $self->{'metaname'};
2700 my $metapos = undef;
2701 my $metavalue = undef;
2702
2703 # To people who know $collect_tail please add some comments
2704 # -> In collection groups, I think collect_tailname is the subcollection name,
2705 # e.g. colgroup-name/col-tail-name
2706 # Obtain the path to the database
2707 my $collect_tail = $collect;
2708 $collect_tail =~ s/^.*[\/|\\]//;
2709 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2710 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2711
2712 # Read the docid entry
2713 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2714
2715 # Check to make sure the key does exist
2716 if (defined ($doc_rec->{$metaname})) {
2717
2718 $doc_rec->{$metaname} = [];
2719
2720 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2721 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2722
2723 if ($status != 0) {
2724 my $mess = "Failed to erase metadata key: $docid\n";
2725
2726 $mess .= "PATH: $ENV{'PATH'}\n";
2727 $mess .= "Exit status: $status\n";
2728 $mess .= "System Error Message: $!\n";
2729
2730 $gsdl_cgi->generate_error($mess);
2731 }
2732 else {
2733 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2734 $mess .= " $metaname";
2735 $mess .= "->[$metapos]" if (defined $metapos);
2736 $mess .= " ($metavalue)" if (defined $metavalue);
2737
2738 $gsdl_cgi->generate_ok_message($mess);
2739 }
2740 }
2741 else { # no such metaname. Is it really an error?
2742 $gsdl_cgi->generate_ok_message("Can't erase. No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2743 }
2744 #return $status; # in case calling functions have a use for this
2745}
2746
2747sub erase_index_metadata
2748{
2749 my $self = shift @_;
2750
2751 my $username = $self->{'username'};
2752 my $collect = $self->{'collect'};
2753 my $gsdl_cgi = $self->{'gsdl_cgi'};
2754
2755 if ($baseaction::authentication_enabled) {
2756 # Ensure the user is allowed to edit this collection
2757 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2758 }
2759
2760 # Obtain the collect dir
2761 my $site = $self->{'site'};
2762 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2763
2764 # Make sure the collection isn't locked by someone else
2765 $self->lock_collection($username, $collect);
2766
2767 $self->_erase_index_metadata(@_);
2768
2769 # Release the lock once it is done
2770 $self->unlock_collection($username, $collect);
2771}
2772
2773
2774# Given a metaname, removes ALL metadata with that metaname at the provided docid
2775sub _erase_archives_metadata
2776{
2777 my $self = shift @_;
2778
2779 my $collect = $self->{'collect'};
2780 my $gsdl_cgi = $self->{'gsdl_cgi'};
2781 my $infodbtype = $self->{'infodbtype'};
2782
2783 # Obtain the collect and archive dir
2784 my $site = $self->{'site'};
2785 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2786
2787 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2788
2789 # look up additional args
2790 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
2791
2792 my $metaname = $self->{'metaname'};
2793
2794 # metapos, metavalue and metamode are never provided and therefore all undefined for erase_archives_metadata
2795 my $metapos = undef;
2796 my $metavalue = undef;
2797 my $metamode = "override"; # when override and no metapos or metaval, all values for metaname will be affected
2798
2799 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2800 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2801
2802 # This now stores the full pathname
2803 my $doc_file = $doc_rec->{'doc-file'}->[0];
2804
2805 # check if request if for file-level doc-version history 'nminus-<n>' version
2806 my $dv = $self->{'dv'};
2807 if (defined $dv && ($dv ne "")) {
2808 # Need to insert '_fldv_history/nminus-<n>' into doc_filename
2809
2810 my ($doc_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
2811 $doc_file = &util::filename_cat($doc_dirname,$FLDV_HISTORY_DIR,$dv,$doc_tailname);
2812 }
2813
2814 my $doc_filename = &util::filename_cat($archive_dir, $doc_file);
2815
2816 my $status = $self->remove_from_doc_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
2817
2818 if ($status == 0)
2819 {
2820 my $mess = "\nerase-archives-metadata successful: \nKey[$docid]\n";
2821 $mess .= " $metaname";
2822 $gsdl_cgi->generate_ok_message($mess);
2823 }
2824 else
2825 {
2826 my $mess .= "Failed to erase archives metadata key: $docid\n";
2827 $mess .= "Exit status: $status\n";
2828 $mess .= "System Error Message: $!\n";
2829 $mess .= "-" x 20 . "\n";
2830
2831 $gsdl_cgi->generate_error($mess);
2832 }
2833
2834 #return $status; # in case calling functions have a use for this
2835}
2836
2837# practically identical to remove_archives_metadata
2838sub erase_archives_metadata
2839{
2840 my $self = shift @_;
2841
2842 my $username = $self->{'username'};
2843 my $collect = $self->{'collect'};
2844 my $gsdl_cgi = $self->{'gsdl_cgi'};
2845
2846 if ($baseaction::authentication_enabled)
2847 {
2848 # Ensure the user is allowed to edit this collection
2849 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2850 }
2851
2852 # Make sure the collection isn't locked by someone else
2853 $self->lock_collection($username, $collect);
2854
2855 $self->_erase_archives_metadata(@_);
2856
2857 # Release the lock once it is done
2858 $self->unlock_collection($username, $collect);
2859}
2860
2861sub _erase_import_metadata
2862{
2863 my $self = shift @_;
2864
2865 my $collect = $self->{'collect'};
2866 my $gsdl_cgi = $self->{'gsdl_cgi'};
2867 my $infodbtype = $self->{'infodbtype'};
2868
2869 # Obtain the collect dir
2870 my $site = $self->{'site'};
2871 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2872
2873 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2874
2875 # look up additional args
2876 my $docid = $self->{'d'};
2877 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2878 {
2879 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2880 }
2881
2882 my $metaname = $self->{'metaname'};
2883 my $metapos = undef;
2884 my $metavalue = undef;
2885 my $metamode = "override";
2886
2887 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2888 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2889 my $metadata_xml_file;
2890 my $import_filename = undef;
2891 if (defined $docid)
2892 {
2893 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2894 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2895
2896 # This now stores the full pathname
2897 $import_filename = $doc_rec->{'src-file'}->[0];
2898 $import_filename = &util::placeholders_to_abspath($import_filename);
2899 }
2900
2901 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2902 {
2903 $gsdl_cgi->generate_error("There is no metadata\n");
2904 }
2905
2906 # figure out correct metadata.xml file [?]
2907 # Assuming the metadata.xml file is next to the source file
2908 # Note: This will not work if it is using the inherited metadata from the parent folder
2909 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2910 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2911
2912 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode=override, means all values of metaname are affected
2913
2914 my $mess = "erase-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2915 $mess .= " $metaname";
2916 $mess .= " = $metavalue\n";
2917
2918 $gsdl_cgi->generate_ok_message($mess);
2919
2920 #return $status; # in case calling functions have a use for this
2921}
2922
2923sub erase_import_metadata
2924{
2925 my $self = shift @_;
2926
2927 my $username = $self->{'username'};
2928 my $collect = $self->{'collect'};
2929 my $gsdl_cgi = $self->{'gsdl_cgi'};
2930
2931 if ($baseaction::authentication_enabled) {
2932 # Ensure the user is allowed to edit this collection
2933 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2934 }
2935
2936 # Make sure the collection isn't locked by someone else
2937 $self->lock_collection($username, $collect);
2938
2939 $self->_erase_import_metadata(@_);
2940
2941 # Release the lock once it is done
2942 $self->unlock_collection($username, $collect);
2943
2944}
2945
2946# removes all meta with matching metaname for the specified docid from the live metadata index
2947sub _erase_live_metadata
2948{
2949 my $self = shift @_;
2950 # At present, _remove_live_metadata doesn't take metaval or metapos either and therefore has the desired effect
2951 $self->_remove_live_metadata(@_);
2952
2953}
2954
2955sub erase_live_metadata
2956{
2957 my $self = shift @_;
2958
2959 my $username = $self->{'username'};
2960 my $collect = $self->{'collect'};
2961 my $gsdl_cgi = $self->{'gsdl_cgi'};
2962
2963 if ($baseaction::authentication_enabled) {
2964 # Ensure the user is allowed to edit this collection
2965 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2966 }
2967
2968 # Make sure the collection isn't locked by someone else
2969 $self->lock_collection($username, $collect);
2970
2971 $self->_erase_live_metadata(@_);
2972
2973 $self->unlock_collection($username, $collect);
2974}
2975
2976
2977# Was trying to reused the codes, but the functions need to be broken
2978# down more before they can be reused, otherwise there will be too
2979# much overhead and duplicate process...
2980sub insert_metadata
2981{
2982 my $self = shift @_;
2983
2984 my $username = $self->{'username'};
2985 my $collect = $self->{'collect'};
2986 my $gsdl_cgi = $self->{'gsdl_cgi'};
2987 my $infodbtype = $self->{'infodbtype'};
2988
2989 # If the import metadata and gdbm database have been updated, we
2990 # need to insert some notification to warn user that the the text
2991 # they see at the moment is not indexed and require a rebuild.
2992 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2993
2994 if ($baseaction::authentication_enabled) {
2995 # Ensure the user is allowed to edit this collection
2996 $self->authenticate_user($username, $collect);
2997 }
2998
2999 # Obtain the collect and archive dir
3000 my $site = $self->{'site'};
3001 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3002 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3003
3004 # Make sure the collection isn't locked by someone else
3005 $self->lock_collection($username, $collect);
3006
3007 # Check additional args
3008 my $docid = $self->{'d'};
3009 if (!defined($docid) || ($docid =~ m/^\s*$/)) {
3010 $gsdl_cgi->generate_error("No document id is specified: d=...");
3011 }
3012 my $metaname = $self->{'metaname'};
3013 if (!defined($metaname) || ($metaname =~ m/^\s*$/)) {
3014 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3015 }
3016 my $metavalue = $self->{'metavalue'};
3017 if (!defined($metavalue) || ($metavalue =~ m/^\s*$/)) {
3018 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3019 }
3020 # make "accumulate" the default (less destructive, as it won't actually
3021 # delete any existing values)
3022 my $metamode = "accumulate";
3023
3024 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3025 my $metapos = undef;
3026 my $prevmetavalue = undef;
3027
3028 #=======================================================================#
3029 # set_import_metadata [START]
3030 #=======================================================================#
3031 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3032 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3033 my $metadata_xml_file;
3034 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3035 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3036
3037 # This now stores the full pathname
3038 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3039 $import_filename = &util::placeholders_to_abspath($import_filename);
3040
3041 # figure out correct metadata.xml file [?]
3042 # Assuming the metadata.xml file is next to the source file
3043 # Note: This will not work if it is using the inherited metadata from the parent folder
3044 my ($import_tailname, $import_dirname)
3045 = File::Basename::fileparse($import_filename);
3046 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3047
3048 # Shane's escape characters
3049 $metavalue = pack "U0C*", unpack "C*", $metavalue;
3050 $metavalue =~ s/\,/&#44;/g;
3051 $metavalue =~ s/\:/&#58;/g;
3052 $metavalue =~ s/\|/&#124;/g;
3053 $metavalue =~ s/\(/&#40;/g;
3054 $metavalue =~ s/\)/&#41;/g;
3055 $metavalue =~ s/\[/&#91;/g;
3056 $metavalue =~ s/\\/&#92;/g;
3057 $metavalue =~ s/\]/&#93;/g;
3058 $metavalue =~ s/\{/&#123;/g;
3059 $metavalue =~ s/\}/&#125;/g;
3060 $metavalue =~ s/\"/&#34;/g;
3061 $metavalue =~ s/\`/&#96;/g;
3062 $metavalue =~ s/\n/_newline_/g;
3063
3064 # Edit the metadata.xml
3065 # Modified by Jeffrey from DL Consulting
3066 # Handle the case where there is one metadata.xml file for multiple FileSets
3067 # The XML filter needs to know whether it is in the right FileSet
3068 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3069 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3070 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3071 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3072 #=======================================================================#
3073 # set_import_metadata [END]
3074 #=======================================================================#
3075
3076
3077 #=======================================================================#
3078 # set_metadata (accumulate version) [START]
3079 #=======================================================================#
3080 # To people who know $collect_tail please add some comments
3081 # Obtain path to the database
3082 my $collect_tail = $collect;
3083 $collect_tail =~ s/^.*[\/|\\]//;
3084 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3085 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3086
3087 # Read the docid entry
3088 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3089
3090 # Protect the quotes
3091 $metavalue =~ s/\"/\\\"/g;
3092
3093 # Adds the pending macro
3094 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3095
3096 # If the metadata doesn't exist, create a new one
3097 if (!defined($doc_rec->{$metaname})){
3098 $doc_rec->{$metaname} = [ $macro_metavalue ];
3099 }
3100 # Else, let's acculumate the values
3101 else {
3102 push(@{$doc_rec->{$metaname}},$macro_metavalue);
3103 }
3104
3105 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3106 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3107
3108 if ($status != 0) {
3109 # Catch error if gdbmget failed
3110 my $mess = "Failed to set metadata key: $docid\n";
3111
3112 $mess .= "PATH: $ENV{'PATH'}\n";
3113 $mess .= "Exit status: $status\n";
3114 $mess .= "System Error Message: $!\n";
3115
3116 $gsdl_cgi->generate_error($mess);
3117 }
3118 else {
3119 my $mess = "insert-metadata successful: Key[$docid]\n";
3120 $mess .= " [In metadata.xml] $metaname";
3121 $mess .= " = $metavalue\n";
3122 $mess .= " [In database] $metaname";
3123 $mess .= " = $macro_metavalue\n";
3124 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3125 $gsdl_cgi->generate_ok_message($mess);
3126 }
3127 #=======================================================================#
3128 # set_metadata (accumulate version) [END]
3129 #=======================================================================#
3130
3131 # Release the lock once it is done
3132 $self->unlock_collection($username, $collect);
3133}
3134
3135
3136
3137
3138sub inc_fldv_nminus1
3139{
3140 my $self = shift @_;
3141
3142 my $username = $self->{'username'};
3143 my $collect = $self->{'collect'};
3144 my $gsdl_cgi = $self->{'gsdl_cgi'};
3145 my $infodbtype = $self->{'infodbtype'};
3146
3147 # Obtain the collect dir
3148 my $site = $self->{'site'};
3149 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3150
3151 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
3152
3153 # look up additional args
3154 my $docid = $self->{'d'};
3155
3156
3157 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3158 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3159
3160 # This now stores the full pathname [is this still true??]
3161 my $doc_file = $doc_rec->{'doc-file'}->[0];
3162
3163 my ($unused_doc_tailname, $doc_dirname) = File::Basename::fileparse($doc_file);
3164 my $doc_full_dirname = &util::filename_cat($archive_dir,$doc_dirname);
3165
3166 my $fldv_full_dirname = &util::filename_cat($doc_full_dirname,$FLDV_HISTORY_DIR);
3167
3168 my $had_error = 0;
3169 my $had_error_mess = undef;
3170
3171 if (-d $fldv_full_dirname) {
3172 my $fldv_filtered_dirs = &FileUtils::readDirectoryFiltered($fldv_full_dirname,undef,"^nminus-\\d+\$");
3173
3174 my @sorted_fldv_filtered_dirs = sort {
3175 my ($a_num) = ($a =~ m/(\d+)$/);
3176 my ($b_num) = ($b =~ m/(\d+)$/);
3177
3178 # sort into descending order
3179 return $b_num <=> $a_num;
3180 } @$fldv_filtered_dirs;
3181
3182 ## shuffle all the nminus-<n> folders down by one
3183
3184 foreach my $nminus_n (@sorted_fldv_filtered_dirs) {
3185
3186 my $nminus_n_full_dir = &FileUtils::filenameConcatenate($fldv_full_dirname,$nminus_n);
3187 if (-d $nminus_n_full_dir) {
3188
3189 my ($n) = ($nminus_n =~ m/(\d+)$/);
3190 my $new_n = $n + 1;
3191
3192 my $new_full_dir_plus1 = &FileUtils::filenameConcatenate($fldv_full_dirname,"nminus-$new_n");
3193
3194 my $move_ok = &FileUtils::renameDirectory($nminus_n_full_dir,$new_full_dir_plus1,
3195 { 'strict' => 1 } );
3196
3197 if (!$move_ok) {
3198 $had_error_mess = "Error: Failed to move '$nminus_n' to nminus-$new_n in $fldv_full_dirname\n";
3199 $had_error = 1;
3200 last;
3201 }
3202
3203 }
3204 else {
3205 print STDERR "Warning: skipping $nminus_n_full_dir as it is not a directory\n";
3206 }
3207 }
3208 }
3209 else {
3210 # first time asked to perform the inc-nminus-1 action for this doc,
3211 # and there is not yet even a _fldv directory!
3212 # => make the directory (and that's all that's needed at this stage)
3213 print STDERR "First time file-level document-version history has been applied to this document\n";
3214 print STDERR "Creating $fldv_full_dirname\n";
3215
3216 &FileUtils::makeDirectory($fldv_full_dirname);
3217 }
3218
3219 if (!$had_error) {
3220 ### Now need to copy everything top level in doc dir into nminus-1
3221
3222 my $nminus_1_full_dir = &FileUtils::filenameConcatenate($fldv_full_dirname,"nminus-1");
3223
3224 my $copy_ok = &FileUtils::copyFilesRefRecursive([$doc_full_dirname],$nminus_1_full_dir,
3225 { 'strict' => 1, 'exclude_filter_re' => "^$FLDV_HISTORY_DIR\$" } );
3226
3227 if (!$copy_ok) {
3228 $had_error_mess = "Error: Failed to clone document $docid to form 'nminus-1' directory in $fldv_full_dirname";
3229 $had_error = 1;
3230 }
3231 }
3232
3233 if ($had_error) {
3234 $gsdl_cgi->generate_error($had_error_mess);
3235 }
3236 else {
3237 $gsdl_cgi->generate_ok_message("Successfully incremented the File-Level Document-Version History for $docid");
3238 }
3239}
3240
3241
3242# not returning 1; here since this file is conditionally included by metadataction.pm
3243# and not otherwise meant to be used on its own
3244
Note: See TracBrowser for help on using the repository browser.