source: main/trunk/greenstone2/perllib/cgiactions/modmetadataaction.pm@ 32076

Last change on this file since 32076 was 32076, checked in by kjdon, 3 years ago

Bugfix (with debugging left in). The set-archive-metadata would overwrite the very first metadata value for the specified tag if metapos not provided even if prevmetavalue was provided. Changed the logic for handling metapos, prevmetavalue when setting archive metadata to fix this.

File size: 93.1 KB
Line 
1##########################################################################
2#
3# modmetadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This is a submodule of metadataaction.pm. It is loaded conditionally
27# upon having the permissions to modify (not just get) metadata.
28# With sufficient permissions, these additional subroutines should be made
29# available to metadataaction.pm
30
31# See http://www.perlmonks.org/?node_id=881761 for splitting module into multiple files
32# and how variables declared with 'our' are used there.
33
34package metadataaction;
35
36use strict;
37
38use cgiactions::baseaction;
39
40use dbutil;
41use ghtml;
42
43use JSON;
44
45
46$metadataaction::modmeta_action_table = #OR: our $modmeta_action_table =
47{
48 #SET METHODS
49 "set-live-metadata" => {
50 'compulsory-args' => [ "d", "metaname", "metavalue" ],
51 'optional-args' => [ ] },
52
53 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
54 'compulsory-args' => [ "metaname", "metavalue" ],
55 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
56
57 "set-index-metadata" => {
58 'compulsory-args' => [ "d", "metaname", "metavalue" ],
59 'optional-args' => [ "metapos", "metamode" ] },
60
61 "set-archives-metadata" => {
62 'compulsory-args' => [ "d", "metaname", "metavalue" ],
63 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
64
65 "set-import-metadata" => {
66 'compulsory-args' => [ "metaname", "metavalue" ],
67 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
68
69 #SET METHODS (ARRAY)
70 "set-metadata-array" => {
71 'compulsory-args' => [ "where", "json" ],
72 'optional-args' => [ ],
73 'help-string' => [
74 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
75
76 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
77
78# The same examples rewritten for when running the metadata-server.pl script from the commandline:
79
80# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
81
82# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
83
84 "set-archives-metadata-array" => {
85 'compulsory-args' => [ "json" ],
86 'optional-args' => [ ] },
87
88 "set-import-metadata-array" => {
89 'compulsory-args' => [ "json" ],
90 'optional-args' => [ ] },
91
92 "set-index-metadata-array" => {
93 'compulsory-args' => [ "json" ],
94 'optional-args' => [ ] },
95
96 "set-live-metadata-array" => {
97 'compulsory-args' => [ "json" ],
98 'optional-args' => [ ] },
99
100 #REMOVE METHODS
101 "remove-import-metadata" => {
102 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
103 'optional-args' => [ "metapos", "metavalue", "metamode" ] }, # only provide metapos arg for SIMPLE collections.
104# Metavalue is now an optional arg for remove_import_metadata() based on what the implementation did, which allowed metavalue to be undefined, and if so, used metapos.
105
106 "remove-archives-metadata" => {
107 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
108 'optional-args' => [ "metapos", "metavalue", "metamode" ] },
109
110 "remove-live-metadata" => {
111 'compulsory-args' => [ "d", "metaname" ],
112 'optional-args' => [ ] },
113
114 "remove-index-metadata" => {
115 'compulsory-args' => [ "d", "metaname" ],
116 'optional-args' => [ "metapos", "metavalue" ] },
117
118 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
119 'compulsory-args' => [ "d", "metaname" ],
120 'optional-args' => [ "where", "metapos", "metavalue", "metamode" ] }, # metamode is optional since remove-metadata can call any of remove_import_meta and remove_archives_meta, remove_index_meta, of which the first two accept metamode as an optional param
121
122 #INSERT METHODS
123 "insert-metadata" => {
124 'compulsory-args' => [ "d", "metaname", "metavalue" ],
125 'optional-args' => [ ] }
126};
127
128
129sub _set_live_metadata
130{
131 my $self = shift @_;
132
133 my $collect = $self->{'collect'};
134 my $gsdl_cgi = $self->{'gsdl_cgi'};
135 #my $gsdlhome = $self->{'gsdlhome'};
136 my $infodbtype = $self->{'infodbtype'};
137
138 # Obtain the collect dir
139 my $site = $self->{'site'};
140 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
141 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
142
143
144 # look up additional args
145 my $docid = $self->{'d'};
146 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
147 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
148 }
149 my $metavalue = $self->{'metavalue'};
150
151 # Generate the dbkey
152 my $metaname = $self->{'metaname'};
153 my $dbkey = "$docid.$metaname";
154
155 # To people who know $collect_tail please add some comments
156 # Obtain path to the database
157 my $collect_tail = $collect;
158 $collect_tail =~ s/^.*[\/|\\]//;
159 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
160 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
161
162 # Set the new value
163 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
164 my $status = system($cmd);
165 if ($status != 0) {
166 # Catch error if gdbmget failed
167 my $mess = "Failed to set metadata key: $dbkey\n";
168
169 $mess .= "PATH: $ENV{'PATH'}\n";
170 $mess .= "cmd = $cmd\n";
171 $mess .= "Exit status: $status\n";
172 $mess .= "System Error Message: $!\n";
173
174 $gsdl_cgi->generate_error($mess);
175 }
176 else {
177 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
178 }
179
180 #return $status; # in case calling functions have any further use for this
181}
182
183sub set_live_metadata
184{
185 my $self = shift @_;
186
187 my $username = $self->{'username'};
188 my $collect = $self->{'collect'};
189 my $gsdl_cgi = $self->{'gsdl_cgi'};
190
191 if ($baseaction::authentication_enabled) {
192 # Ensure the user is allowed to edit this collection
193 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
194 }
195
196 # Make sure the collection isn't locked by someone else
197 $self->lock_collection($username, $collect);
198
199 $self->_set_live_metadata(@_);
200
201 # Release the lock once it is done
202 $self->unlock_collection($username, $collect);
203}
204
205sub set_index_metadata_entry
206{
207 print STDERR "1\n";
208 my $self = shift @_;
209 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
210
211 # To people who know $collect_tail please add some comments
212 # Obtain path to the database
213 my $collect_tail = $collect;
214 $collect_tail =~ s/^.*[\/|\\]//;
215 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
216 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
217
218 print STDERR "2\n";
219# print STDERR "**** infodb file path = $infodb_file_path\n";
220# print STDERR "***** infodb type = $infodbtype\n";
221
222 # Read the docid entry
223 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
224
225 # Set the metadata value
226 if (defined $metapos) {
227 print STDERR "3\n";
228 # if metamode=accumulate AND metapos, warn user and then use metapos
229 if (defined $metamode && $metamode eq "accumulate") {
230 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
231 print STDERR "**** Proceeding by using metapos\n";
232 }
233 $doc_rec->{$metaname}->[$metapos] = $metavalue;
234 }
235 elsif (defined $prevmetavalue) {
236 print STDERR "4\n";
237 my $array = $doc_rec->{$metaname};
238 my $length = @$array;
239
240 my $found = 0;
241 for (my $i = 0; $i < $length; $i++){
242 if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
243 $doc_rec->{$metaname}->[$i] = $metavalue;
244 $found = 1;
245 last;
246 }
247 }
248
249 if($found == 0){
250 $doc_rec->{$metaname} = [ $metavalue ];
251 }
252 }
253 elsif (defined $metamode && $metamode eq "override") {
254 print STDERR "5\n";
255 $doc_rec->{$metaname} = [ $metavalue ];
256 }
257 else { # default for index was to override, but because accumulate is less destructive,
258 # and because accumulate is the default for archives and import, that's the new default for index too
259 print STDERR "6\n";
260 if(defined $doc_rec->{$metaname}) {
261 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
262 } else {
263 $doc_rec->{$metaname} = [ $metavalue ];
264 }
265 }
266 print STDERR "6\n";
267
268 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
269
270 return $status;
271
272}
273
274sub _set_import_metadata
275{
276 my $self = shift @_;
277
278 my $collect = $self->{'collect'};
279 my $gsdl_cgi = $self->{'gsdl_cgi'};
280 my $infodbtype = $self->{'infodbtype'};
281# my $gsdlhome = $self->{'gsdlhome'};
282
283 # Obtain the collect and archive dir
284 my $site = $self->{'site'};
285 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
286 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
287 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
288
289 # look up additional args
290 # want either d= or f=
291 my $docid = $self->{'d'};
292 my ($docid_root,$docid_secnum);
293 if(defined $docid) {
294 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
295 # as yet no support for setting subsection metadata in metadata.xml
296 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
297 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
298 return;
299 }
300 }
301
302 my $import_file = $self->{'f'};
303 if ((!defined $docid || $docid =~ m/^\s*$/) && (!defined $import_file || $import_file =~ m/^\s*$/)) {
304 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified."); # at least d or f must be specified
305 }
306
307 # Get the parameters and set default mode to "accumulate"
308 my $metaname = $self->{'metaname'};
309 my $metavalue = $self->{'metavalue'};
310## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
311 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
312
313 my $metamode = $self->{'metamode'};
314 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
315 # make "accumulate" the default (less destructive, as it won't actually
316 # delete any existing values)
317 $metamode = "accumulate";
318 }
319
320 # adding metapos and prevmetavalue support to import_metadata subroutines
321 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
322 my $prevmetavalue = $self->{'prevmetavalue'};
323 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
324 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
325
326 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
327 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
328
329 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
330 $mess .= " $metaname";
331 $mess .= " = $metavalue";
332 $mess .= " ($metamode)\n";
333
334 $gsdl_cgi->generate_ok_message($mess);
335
336 #return $status; # in case calling functions have any further use for this
337}
338
339# the version of set_index_meta that doesn't do authentication
340sub _set_archives_metadata
341{
342 my $self = shift @_;
343
344 my $collect = $self->{'collect'};
345 my $gsdl_cgi = $self->{'gsdl_cgi'};
346 my $infodbtype = $self->{'infodbtype'};
347
348 # Obtain the collect and archive dir
349 my $site = $self->{'site'};
350 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
351 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
352
353 # look up additional args
354 my $docid = $self->{'d'};
355 my $metaname = $self->{'metaname'};
356 my $metavalue = $self->{'metavalue'};
357 my $prevmetavalue = $self->{'prevmetavalue'};
358
359 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
360 # Don't append "|| undef", since if metapos=0 it will then be set to undef
361
362 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
363 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
364
365 my $metamode = $self->{'metamode'};
366 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
367 # make "accumulate" the default (less destructive, as it won't actually
368 # delete any existing values)
369 $metamode = "accumulate";
370 }
371
372 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
373 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
374
375 if ($status == 0) {
376 my $mess = "set-archives-metadata successful: Key[$docid]\n";
377 $mess .= " $metaname";
378 $mess .= "->[$metapos]" if (defined $metapos);
379 $mess .= " = $metavalue";
380 $mess .= " ($metamode)\n";
381
382 $gsdl_cgi->generate_ok_message($mess);
383 }
384 else {
385 my $mess .= "Failed to set archives metadata key: $docid\n";
386 $mess .= "Exit status: $status\n";
387 if(defined $self->{'error_msg'}) {
388 $mess .= "Error Message: $self->{'error_msg'}\n";
389 } else {
390 $mess .= "System Error Message: $!\n";
391 }
392 $mess .= "-" x 20 . "\n";
393
394 $gsdl_cgi->generate_error($mess);
395 }
396
397 #return $status; # in case calling functions have any further use for this
398}
399
400
401# the version of set_index_meta that doesn't do authentication
402sub _set_index_metadata
403{
404 print STDERR "START SET INDEX METADATA\n";
405 my $self = shift @_;
406
407 my $collect = $self->{'collect'};
408 my $gsdl_cgi = $self->{'gsdl_cgi'};
409
410 my $site = $self->{'site'};
411 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
412
413 # look up additional args
414 my $docid = $self->{'d'};
415 my $metaname = $self->{'metaname'};
416 my $metapos = $self->{'metapos'}; # undef has meaning
417 my $metavalue = $self->{'metavalue'};
418 my $infodbtype = $self->{'infodbtype'};
419 my $metamode = $self->{'metamode'};
420 my $prevmetavalue = $self->{'prevmetavalue'};
421
422 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
423 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
424
425 print STDERR "SETTING INDEX METADATA ENTRY\n";
426 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
427 print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
428 if ($status != 0) {
429 # Catch error if set infodb entry failed
430 my $mess = "Failed to set metadata key: $docid\n";
431
432 $mess .= "PATH: $ENV{'PATH'}\n";
433 $mess .= "Exit status: $status\n";
434 $mess .= "System Error Message: $!\n";
435
436 $gsdl_cgi->generate_error($mess);
437 }
438 else {
439 my $mess = "set-index-metadata successful: Key[$docid]\n";
440 $mess .= " $metaname";
441 $mess .= "->[$metapos]" if (defined $metapos);
442 $mess .= " = $metavalue\n";
443
444 $gsdl_cgi->generate_ok_message($mess);
445 }
446
447 print STDERR "END SET INDEX METADATA\n";
448 #return $status; # in case calling functions have any further use for this
449}
450
451sub set_index_metadata
452{
453 my $self = shift @_;
454
455 my $username = $self->{'username'};
456 my $collect = $self->{'collect'};
457 my $gsdl_cgi = $self->{'gsdl_cgi'};
458 #my $gsdlhome = $self->{'gsdlhome'};
459
460 if ($baseaction::authentication_enabled) {
461 # Ensure the user is allowed to edit this collection
462 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
463 }
464
465 my $site = $self->{'site'};
466 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
467
468 $gsdl_cgi->checked_chdir($collect_dir);
469
470 # Obtain the collect dir
471 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
472
473 # Make sure the collection isn't locked by someone else
474 $self->lock_collection($username, $collect);
475
476 $self->_set_index_metadata(@_);
477
478 # Release the lock once it is done
479 $self->unlock_collection($username, $collect);
480}
481
482# call this to set the metadata for a combination of dirs archives, import or index, or live
483# if none specified, defaults to index which was the original behaviour of set_metadata.
484sub set_metadata
485{
486 my $self = shift @_;
487
488 # Testing that not defining a variable, setting it to "" or to " " all return false
489 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
490
491 my $where = $self->{'where'};
492 if(!$where || ($where =~ m/^\s*$/)) {
493 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
494 return;
495 }
496
497 # authenticate and lock collection once, even if processing multiple dirs
498 my $username = $self->{'username'};
499 my $collect = $self->{'collect'};
500 my $gsdl_cgi = $self->{'gsdl_cgi'};
501
502 if ($baseaction::authentication_enabled) {
503 # Ensure the user is allowed to edit this collection
504 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
505 }
506
507 if($where =~ m/index/) {
508 my $site = $self->{'site'};
509 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
510 $gsdl_cgi->checked_chdir($collect_dir);
511 }
512
513 # Make sure the collection isn't locked by someone else
514 $self->lock_collection($username, $collect);
515
516
517 # now at last can set the metadata. $where can specify multiple
518 # $where is of the form: import|archives|index, or a subset thereof
519
520 #my @whichdirs = split('\|', $where);
521
522 # just check whether $where contains import/archives/index/live in turn, and
523 # for each case, process it accordingly
524 if($where =~ m/import/) {
525 $self->_set_import_metadata(@_);
526 }
527
528 if($where =~ m/archives/) {
529
530 # look up docID arg which is optional to set_metadata because it's optional
531 # to set_import, but which is compulsory to set_archives_metadata
532 my $docid = $self->{'d'};
533 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
534 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
535 }
536 # we have a docid, so can set archives meta
537 $self->_set_archives_metadata(@_);
538 }
539
540 if($where =~ m/index/) {
541
542 # look up docID arg which is optional to set_metadata because it's optional
543 # to set_import, but which is compulsory to set_archives_metadata
544 my $docid = $self->{'d'};
545 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
546 $gsdl_cgi->generate_error("No docid (d=...) specified.");
547 }
548 # we have a docid, so can set index meta
549 $self->_set_index_metadata(@_);
550 }
551
552 if($where =~ m/live/) {
553 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
554 }
555
556 # Release the lock once it is done
557 $self->unlock_collection($username, $collect);
558}
559
560sub set_metadata_array
561{
562 my $self = shift @_;
563
564 my $where = $self->{'where'};
565 if(!$where || ($where =~ m/^\s*$/)) {
566 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
567 return;
568 }
569
570 my $username = $self->{'username'};
571 my $collect = $self->{'collect'};
572 my $gsdl_cgi = $self->{'gsdl_cgi'};
573
574 if ($baseaction::authentication_enabled) {
575 # Ensure the user is allowed to edit this collection
576 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
577 }
578
579 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
580 # But including the stmt during this code reorganisation to preserve as-is what used to happen
581 my $site = $self->{'site'};
582 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
583 $gsdl_cgi->checked_chdir($collect_dir);
584
585 # Make sure the collection isn't locked by someone else
586 $self->lock_collection($username, $collect);
587
588 if($where =~ m/import/) {
589 $self->_set_import_metadata_array(@_);
590 }
591 if($where =~ m/archives/) {
592 $self->_set_archives_metadata_array(@_);
593 }
594 if($where =~ m/index/) {
595 $self->_set_index_metadata_array(@_);
596 }
597 if($where =~ m/live/) {
598 $self->_set_live_metadata_array(@_);
599 }
600
601 # Release the lock once it is done
602 $self->unlock_collection($username, $collect);
603}
604
605sub _set_index_metadata_array
606{
607 my $self = shift @_;
608
609 my $collect = $self->{'collect'};
610 my $gsdl_cgi = $self->{'gsdl_cgi'};
611 my $site = $self->{'site'};
612 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
613
614
615 # look up additional args
616
617 my $infodbtype = $self->{'infodbtype'};
618
619 my $json_str = $self->{'json'};
620 my $doc_array = decode_json $json_str;
621
622
623 my $global_status = 0;
624 my $global_mess = "";
625
626 my @all_docids = ();
627
628 foreach my $doc_array_rec ( @$doc_array ) {
629
630 my $status = -1;
631 my $docid = $doc_array_rec->{'docid'};
632
633 push(@all_docids,$docid);
634
635 my $metaname = $doc_array_rec->{'metaname'};
636 if(defined $metaname) {
637 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
638 my $metavalue = $doc_array_rec->{'metavalue'};
639 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
640
641 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
642 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
643 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
644
645 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
646 $metaname = $metatable_rec->{'metaname'};
647 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
648 my $metapos = undef;
649 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
650
651 foreach my $metavalue ( @$metavals ) { # metavals is an array
652 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
653 if($metamode eq "override") { # now, having overridden the metavalue for the first,
654 # need to accumulate subsequent metavals for this metaname, else the just-assigned
655 # metavalue for this metaname will be lost
656 $metamode = "accumulate";
657 }
658 }
659 }
660 }
661
662 if ($status != 0) {
663 # Catch error if set infodb entry failed
664 $global_status = $status;
665 $global_mess .= "Failed to set metadata key: $docid\n";
666 $global_mess .= "Exit status: $status\n";
667 $global_mess .= "System Error Message: $!\n";
668 $global_mess .= "-" x 20;
669 }
670 }
671
672 if ($global_status != 0) {
673 $global_mess .= "PATH: $ENV{'PATH'}\n";
674 $gsdl_cgi->generate_error($global_mess);
675 }
676 else {
677 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
678 $gsdl_cgi->generate_ok_message($mess);
679 }
680}
681
682sub set_index_metadata_array
683{
684 my $self = shift @_;
685
686 my $username = $self->{'username'};
687 my $collect = $self->{'collect'};
688 my $gsdl_cgi = $self->{'gsdl_cgi'};
689# my $gsdlhome = $self->{'gsdlhome'};
690
691 if ($baseaction::authentication_enabled) {
692 # Ensure the user is allowed to edit this collection
693 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
694 }
695
696 my $site = $self->{'site'};
697 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
698
699 $gsdl_cgi->checked_chdir($collect_dir);
700
701 # Obtain the collect dir
702 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
703
704 # Make sure the collection isn't locked by someone else
705 $self->lock_collection($username, $collect);
706
707 $self->_set_index_metadata_array(@_);
708
709 # Release the lock once it is done
710 $self->unlock_collection($username, $collect);
711}
712
713# experimental, newly added in and untested
714sub _set_live_metadata_array
715{
716 my $self = shift @_;
717
718 my $collect = $self->{'collect'};
719 my $gsdl_cgi = $self->{'gsdl_cgi'};
720
721 my $site = $self->{'site'};
722 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
723
724
725 # look up additional args
726 my $infodbtype = $self->{'infodbtype'};
727 # To people who know $collect_tail please add some comments
728 # Obtain path to the database
729 my $collect_tail = $collect;
730 $collect_tail =~ s/^.*[\/|\\]//;
731 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
732 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
733
734
735 my $json_str = $self->{'json'};
736 my $doc_array = decode_json $json_str;
737
738
739 my $global_status = 0;
740 my $global_mess = "";
741
742 my @all_docids = ();
743
744
745 foreach my $doc_array_rec ( @$doc_array ) {
746
747 my $status = -1;
748 my $docid = $doc_array_rec->{'docid'};
749
750 push(@all_docids,$docid);
751
752 my $metaname = $doc_array_rec->{'metaname'};
753 if(defined $metaname) {
754 my $dbkey = "$docid.$metaname";
755 my $metavalue = $doc_array_rec->{'metavalue'};
756
757 # Set the new value
758 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
759 $status = system($cmd);
760
761 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
762 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
763 foreach my $metatable_rec ( @$metatable ) {
764 $metaname = $metatable_rec->{'metaname'};
765 my $dbkey = "$docid.$metaname";
766
767 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
768 foreach my $metavalue ( @$metavals ) {
769 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
770 $status = system($cmd);
771 }
772 }
773
774 }
775
776 if ($status != 0) {
777 # Catch error if gdbmget failed
778 $global_status = $status;
779 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
780 $global_mess .= "Exit status: $status\n";
781 $global_mess .= "System Error Message: $!\n";
782 $global_mess .= "-" x 20;
783 }
784 }
785
786 if ($global_status != 0) {
787 $global_mess .= "PATH: $ENV{'PATH'}\n";
788 $gsdl_cgi->generate_error($global_mess);
789 }
790 else {
791 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
792 $gsdl_cgi->generate_ok_message($mess);
793 }
794}
795
796sub set_live_metadata_array
797{
798 my $self = shift @_;
799
800 my $username = $self->{'username'};
801 my $collect = $self->{'collect'};
802 my $gsdl_cgi = $self->{'gsdl_cgi'};
803
804 if ($baseaction::authentication_enabled) {
805 # Ensure the user is allowed to edit this collection
806 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
807 }
808
809 my $site = $self->{'site'};
810 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
811
812 $gsdl_cgi->checked_chdir($collect_dir);
813
814 # Make sure the collection isn't locked by someone else
815 $self->lock_collection($username, $collect);
816
817 $self->_set_live_metadata_array(@_);
818
819 # Release the lock once it is done
820 $self->unlock_collection($username, $collect);
821}
822
823
824sub dxml_metadata
825{
826 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
827 my $metaname = $parser->{'parameters'}->{'metaname'};
828 my $metamode = $parser->{'parameters'}->{'metamode'};
829
830###!!! print STDERR "**** Processing closing </Metadata> tag\n";
831
832 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
833
834 # Find the right metadata tag and checks if we are going to
835 # override it
836 #
837 # Note: This over writes the first metadata block it
838 # encountered. If there are multiple Sections in the doc.xml, it
839 # might not behave as you would expect
840
841 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
842 print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n" if (defined $opt_doc_secnum);
843 print STDERR "**** metamode = $metamode\n";
844
845 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
846 {
847 my $name_attr = $attrHash->{'name'};
848 print STDOUT "*** testing: $name_attr eq $metaname ? and $metamode eq override ?\n";
849 if (($name_attr eq $metaname) && ($metamode eq "override"))
850 {
851 if (!defined $parser->{'parameters'}->{'poscount'})
852 {
853 $parser->{'parameters'}->{'poscount'} = 0;
854 }
855 else
856 {
857 $parser->{'parameters'}->{'poscount'}++;
858 }
859
860 if (defined $parser->{'parameters'}->{'metapos'}) {
861
862 if ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}) {
863 print STDERR "#### got pos match!!\n";
864 # Get the value and override the current value
865 my $metavalue = $parser->{'parameters'}->{'metavalue'};
866 $attrHash->{'_content'} = $metavalue;
867
868 # Don't want it to wipe out any other pieces of metadata
869 $parser->{'parameters'}->{'metamode'} = "done";
870 }
871 }
872 elsif (defined $parser->{'parameters'}->{'prevmetavalue'}) {
873 if ($parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'}) {
874 print STDERR "### prev meta value matches\n";
875 my $metavalue = $parser->{'parameters'}->{'metavalue'};
876 $attrHash->{'_content'} = $metavalue;
877 $parser->{'parameters'}->{'metamode'} = "done";
878
879 }
880 }
881 elsif ($parser->{'parameters'}->{'poscount'} == 0) { # explicit catch all
882 print STDERR "#### got pos match!!\n";
883 # Get the value and override the current value
884 my $metavalue = $parser->{'parameters'}->{'metavalue'};
885 $attrHash->{'_content'} = $metavalue;
886
887 # Don't want it to wipe out any other pieces of metadata
888 $parser->{'parameters'}->{'metamode'} = "done";
889
890
891 }
892
893
894
895 }
896 }
897
898 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
899 return [$tagname => $attrHash];
900}
901
902# This method exists purely for catching invalid section numbers that the client
903# requested to edit. Once the parser has reached the end (the final </Archive> tag),
904# we've seen all the Sections in the doc.xml, and none of their section nums matched
905# if the metamode has not been set to 'done' by then.
906sub dxml_archive
907{
908 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
909 my $metamode = $parser->{'parameters'}->{'metamode'};
910
911 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
912 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
913
914# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
915
916 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
917 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
918
919 my $metaname = $parser->{'parameters'}->{'metaname'};
920 my $metavalue = $parser->{'parameters'}->{'metavalue'};
921
922 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
923 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
924 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
925 print STDERR "\tin section $opt_doc_secnum.\n";
926 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
927 }
928
929 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
930 return [$tagname => $attrHash];
931}
932
933sub dxml_description
934{
935 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
936 my $metamode = $parser->{'parameters'}->{'metamode'};
937
938 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
939 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
940
941###!!! print STDERR "**** Processing a closing </Description> tag \n";
942# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
943
944 # Accumulate the metadata
945
946 # We'll be accumulating metadata at this point if we haven't found and therefore
947 # haven't processed the metadata yet.
948 # For subsections, this means that if we're at a matching subsection, but haven't
949 # found the correct metaname to override in that subsection, we accumulate it as new
950 # meta in the subsection by adding it to the current description.
951 # If there's no subsection info for the metadata, it will accumulate at the top level
952 # section description if we hadn't found a matching metaname to override at this point.
953
954 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
955 # at the toplevel section. In the latter case, it means we want to process meta in the
956 # toplevel section. So the eq check between the values below will work in all cases.
957
958 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
959 # this document has been provided. In that case, the metadata for that opt_doc_secnum
960 # won't get attached/accumulated to any part of the doc, not even its top-level section.
961
962 if ($curr_secnum eq $opt_doc_secnum
963 && ($metamode eq "accumulate" || $metamode eq "override")) {
964 if ($metamode eq "override") {
965 print "Got to end of <Description> block. No metadata value to override. Switching 'metamode' to accumulate\n";
966 }
967
968 # If we get to here and metamode is override, this means there
969 # was no existing value to overide => treat as an append operation
970
971 # Tack a new metadata tag on to the end of the <Metadata>+ block
972 my $metaname = $parser->{'parameters'}->{'metaname'};
973 my $metavalue = $parser->{'parameters'}->{'metavalue'};
974
975 my $metadata_attr = {
976 '_content' => $metavalue,
977 'name' => $metaname,
978 'mode' => "accumulate"
979 };
980
981 my $append_metadata = [ "Metadata" => $metadata_attr ];
982 my $description_content = $attrHash->{'_content'};
983
984 print "Appending metadata to doc.xml\n";
985
986 if (ref($description_content)) {
987 # got some existing interesting nested content
988 push(@$description_content, " ", $append_metadata ,"\n ");
989 }
990 else {
991 #description_content is most likely a string such as "\n"
992 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
993 }
994
995 $parser->{'parameters'}->{'metamode'} = "done";
996 }
997 else {
998 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
999## print STDERR "**** NOT ACCUMULATE?!? \n";
1000 }
1001
1002 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1003 return [$tagname => $attrHash];
1004}
1005
1006sub edit_xml_file
1007{
1008 my $self = shift @_;
1009 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1010
1011 # use XML::Rules to add it in (read in and out again)
1012 my $parser = XML::Rules->new(start_rules => $start_rules,
1013 rules => $rules,
1014 style => 'filter',
1015 output_encoding => 'utf8' );
1016
1017 my $xml_in = "";
1018 if (!open(MIN,"<$filename")) {
1019 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1020 }
1021 else {
1022 # Read all the text in
1023 my $line;
1024 while (defined ($line=<MIN>)) {
1025 $xml_in .= $line;
1026 }
1027 close(MIN);
1028
1029 my $MOUT;
1030 if (!open($MOUT,">$filename")) {
1031 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1032 }
1033 else {
1034 # Matched lines will get handled by the call backs
1035## my $xml_out = "";
1036
1037 binmode($MOUT,":utf8");
1038 $parser->filter($xml_in,$MOUT, $options);
1039
1040# binmode(MOUT,":utf8");
1041# print MOUT $xml_out;
1042 close($MOUT);
1043 }
1044 }
1045
1046 # copy across any custom error information that was stored during parsing
1047 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1048}
1049
1050sub edit_doc_xml
1051{
1052 my $self = shift @_;
1053 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1054
1055 my $info_mess = <<RAWEND;
1056****************************
1057 edit_doc_xml()
1058****************************
1059RAWEND
1060
1061 $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1062 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1063 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1064 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1065 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1066 $info_mess .= " opt_secnum = $opt_secnum\n" if defined($opt_secnum);
1067 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1068
1069 $info_mess .= "****************************\n";
1070
1071 $gsdl_cgi->generate_message($info_mess);
1072
1073 # To monitor which section/subsection number we are in
1074 my @start_rules =
1075 ( 'Section' => \&dxml_start_section );
1076
1077 # use XML::Rules to add it in (read in and out again)
1078 # Set the call back functions
1079 my @rules =
1080 ( _default => 'raw',
1081 'Metadata' => \&dxml_metadata,
1082 'Description' => \&dxml_description,
1083 'Archive' => \&dxml_archive); # just for catching errors at end
1084
1085 # Sets the parameters
1086 my $options = { 'metaname' => $metaname,
1087 'metapos' => $metapos,
1088 'metavalue' => $metavalue,
1089 'metamode' => $metamode,
1090 'prevmetavalue' => $prevmetavalue };
1091
1092 if (defined $opt_secnum) {
1093 $options->{'secnum'} = $opt_secnum;
1094 }
1095
1096 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1097}
1098
1099sub set_archives_metadata_entry
1100{
1101 my $self = shift @_;
1102 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1103
1104 my $info_mess = <<RAWEND;
1105****************************
1106 set_archives_metadata_entry()
1107****************************
1108RAWEND
1109
1110 $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1111 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1112 $info_mess .= " collect = $collect\n" if defined($collect);
1113 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
1114 $info_mess .= " docid = $docid\n" if defined($docid);
1115 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1116 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1117 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1118 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1119 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1120
1121 $info_mess .= "****************************\n";
1122
1123 $gsdl_cgi->generate_message($info_mess);
1124
1125 # Obtain the doc.xml path for the specified docID
1126 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1127
1128 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1129 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1130 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1131
1132 # The $doc_xml_file is relative to the archives, and now let's get the full path
1133 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1134 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1135
1136 # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
1137 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1138 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1139 if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
1140 # remove all values of $metaname metadata
1141 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
1142 }
1143 # Edit the doc.xml file with the specified metadata name, value and position.
1144 # TODO: there is a potential problem here as this edit_doc_xml function
1145 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1146 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1147
1148 # dxml_metadata method ignores metapos if metamode anything other than override
1149 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1150 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1151
1152 # return 0; # return 0 for now to indicate no error
1153 return (defined $self->{'error_msg'}) ? 1 : 0;
1154}
1155
1156
1157sub set_archives_metadata
1158{
1159 my $self = shift @_;
1160
1161 my $username = $self->{'username'};
1162 my $collect = $self->{'collect'};
1163 my $gsdl_cgi = $self->{'gsdl_cgi'};
1164
1165 if ($baseaction::authentication_enabled) {
1166 # Ensure the user is allowed to edit this collection
1167 $self->authenticate_user($username, $collect);
1168 }
1169
1170 # Make sure the collection isn't locked by someone else
1171 $self->lock_collection($username, $collect);
1172
1173 $self->_set_archives_metadata(@_);
1174
1175 # Release the lock once it is done
1176 $self->unlock_collection($username, $collect);
1177}
1178
1179sub _set_archives_metadata_array
1180{
1181 my $self = shift @_;
1182
1183 my $collect = $self->{'collect'};
1184 my $gsdl_cgi = $self->{'gsdl_cgi'};
1185 my $site = $self->{'site'};
1186 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1187
1188 # look up additional args
1189
1190 my $infodbtype = $self->{'infodbtype'};
1191
1192 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1193
1194 my $json_str = $self->{'json'};
1195 my $doc_array = decode_json $json_str;
1196
1197
1198 my $global_status = 0;
1199 my $global_mess = "";
1200
1201 my @all_docids = ();
1202
1203 foreach my $doc_array_rec ( @$doc_array ) {
1204 my $status = -1;
1205 my $docid = $doc_array_rec->{'docid'};
1206
1207 push(@all_docids,$docid);
1208
1209 my $metaname = $doc_array_rec->{'metaname'};
1210 if(defined $metaname) {
1211
1212 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1213
1214 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1215 my $metavalue = $doc_array_rec->{'metavalue'};
1216 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1217
1218
1219 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1220 # make "accumulate" the default (less destructive, as it won't actually
1221 # delete any existing values)
1222 $metamode = "accumulate";
1223 }
1224
1225 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1226 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1227 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1228 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1229
1230 foreach my $metatable_rec ( @$metatable ) {
1231 $metaname = $metatable_rec->{'metaname'};
1232 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1233 my $metapos = undef;
1234 my $prevmetavalue = undef;
1235 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1236
1237 foreach my $metavalue ( @$metavals ) {
1238 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1239 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1240
1241 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1242 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1243 # metavalue for this metaname will be lost
1244 $metamode = "accumulate";
1245 }
1246 }
1247 }
1248 }
1249
1250 if ($status != 0) {
1251 # Catch error if set infodb entry failed
1252 $global_status = $status;
1253 $global_mess .= "Failed to set metadata key: $docid\n";
1254 $global_mess .= "Exit status: $status\n";
1255 $global_mess .= "System Error Message: $!\n";
1256 $global_mess .= "-" x 20 . "\n";
1257 }
1258 }
1259
1260 if ($global_status != 0) {
1261 $global_mess .= "PATH: $ENV{'PATH'}\n";
1262 $gsdl_cgi->generate_error($global_mess);
1263 }
1264 else {
1265 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1266 $gsdl_cgi->generate_ok_message($mess);
1267 }
1268}
1269
1270sub set_archives_metadata_array
1271{
1272 my $self = shift @_;
1273
1274 my $username = $self->{'username'};
1275 my $collect = $self->{'collect'};
1276 my $gsdl_cgi = $self->{'gsdl_cgi'};
1277# my $gsdlhome = $self->{'gsdlhome'};
1278
1279 if ($baseaction::authentication_enabled) {
1280 # Ensure the user is allowed to edit this collection
1281 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1282 }
1283
1284 my $site = $self->{'site'};
1285 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1286
1287 $gsdl_cgi->checked_chdir($collect_dir);
1288
1289 # Obtain the collect dir
1290 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1291
1292 # Make sure the collection isn't locked by someone else
1293 $self->lock_collection($username, $collect);
1294
1295 $self->_set_archives_metadata_array(@_);
1296
1297 # Release the lock once it is done
1298 $self->unlock_collection($username, $collect);
1299}
1300
1301sub _remove_archives_metadata
1302{
1303 my $self = shift @_;
1304
1305 my $collect = $self->{'collect'};
1306 my $gsdl_cgi = $self->{'gsdl_cgi'};
1307# my $gsdlhome = $self->{'gsdlhome'};
1308 my $infodbtype = $self->{'infodbtype'};
1309
1310 my $site = $self->{'site'};
1311
1312 # Obtain the collect and archive dir
1313 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1314
1315 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1316
1317 # look up additional args
1318 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1319
1320 my $metaname = $self->{'metaname'};
1321 my $metapos = $self->{'metapos'};
1322 my $metavalue = $self->{'metavalue'};
1323
1324 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1325 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
1326
1327 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1328 # default to deleting the first metavalue for the given metaname
1329 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1330 # seem to get deleted in one single remove_archives_meta action invocation.
1331 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1332 # deleted, both get deleted.
1333 if(!defined $metapos && !defined $metavalue) {
1334 $metapos = 0;
1335 }
1336
1337 my $metamode = $self->{'metamode'};
1338 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
1339
1340 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1341 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1342
1343 # This now stores the full pathname
1344 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1345
1346 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1347# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1348
1349 if ($status == 0)
1350 {
1351 my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
1352 $mess .= " $metaname";
1353 $mess .= "->[$metapos]" if (defined $metapos);
1354 $mess .= " ($metavalue)" if (defined $metavalue);
1355 $gsdl_cgi->generate_ok_message($mess);
1356 }
1357 else
1358 {
1359 my $mess .= "Failed to remove archives metadata key: $docid\n";
1360 $mess .= "Exit status: $status\n";
1361 $mess .= "System Error Message: $!\n";
1362 $mess .= "-" x 20 . "\n";
1363
1364 $gsdl_cgi->generate_error($mess);
1365 }
1366
1367 #return $status; # in case calling functions have a use for this
1368}
1369
1370sub remove_archives_metadata
1371{
1372 my $self = shift @_;
1373
1374 my $username = $self->{'username'};
1375 my $collect = $self->{'collect'};
1376 my $gsdl_cgi = $self->{'gsdl_cgi'};
1377
1378 if ($baseaction::authentication_enabled)
1379 {
1380 # Ensure the user is allowed to edit this collection
1381 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1382 }
1383
1384 # Make sure the collection isn't locked by someone else
1385 $self->lock_collection($username, $collect);
1386
1387 $self->_remove_archives_metadata(@_);
1388
1389 # Release the lock once it is done
1390 $self->unlock_collection($username, $collect);
1391}
1392
1393sub remove_from_doc_xml
1394{
1395 my $self = shift @_;
1396 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1397
1398 my @start_rules = ('Section' => \&dxml_start_section);
1399
1400 # Set the call-back functions for the metadata tags
1401 my @rules =
1402 (
1403 _default => 'raw',
1404 'Metadata' => \&rfdxml_metadata
1405 );
1406
1407 my $parser = XML::Rules->new
1408 (
1409 start_rules => \@start_rules,
1410 rules => \@rules,
1411 style => 'filter',
1412 output_encoding => 'utf8',
1413# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1414 stripspaces => 2|0|0 # ineffectual
1415 );
1416
1417 my $status = 0;
1418 my $xml_in = "";
1419 if (!open(MIN,"<$doc_xml_filename"))
1420 {
1421 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1422 $status = 1;
1423 }
1424 else
1425 {
1426 # Read them in
1427 my $line;
1428 while (defined ($line=<MIN>)) {
1429 $xml_in .= $line;
1430 }
1431 close(MIN);
1432
1433 # Filter with the call-back functions
1434 my $xml_out = "";
1435
1436 my $MOUT;
1437 if (!open($MOUT,">$doc_xml_filename")) {
1438 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1439 $status = 1;
1440 }
1441 else {
1442 binmode($MOUT,":utf8");
1443 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1444 close($MOUT);
1445 }
1446 }
1447 return $status;
1448}
1449
1450sub rfdxml_metadata
1451{
1452 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1453
1454 # For comparisons, toplevel section is indicated by ""
1455 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1456 my $secid = $parser->{'parameters'}->{'secid'} || "";
1457
1458 if (!($secid eq $curr_sec_num))
1459 {
1460 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1461 return [$tagname => $attrHash];
1462 }
1463
1464 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1465 {
1466 if (!defined $parser->{'parameters'}->{'poscount'})
1467 {
1468 $parser->{'parameters'}->{'poscount'} = 0;
1469 }
1470 else
1471 {
1472 $parser->{'parameters'}->{'poscount'}++;
1473 }
1474
1475 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
1476 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {
1477 return [];
1478 }
1479
1480 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1481 {
1482 return [];
1483 }
1484
1485 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1486 {
1487 return [];
1488 }
1489 }
1490
1491 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1492 return [$tagname => $attrHash];
1493}
1494
1495sub mxml_metadata
1496{
1497 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1498 my $metaname = $parser->{'parameters'}->{'metaname'};
1499 my $metamode = $parser->{'parameters'}->{'metamode'};
1500
1501 # Report error if we don't see FileName tag before this
1502 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1503
1504 # Don't do anything if we are not in the right FileSet
1505 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1506 if ($file_regexp =~ /\.\*/) {
1507 # Only interested in a file_regexp if it specifies precisely one
1508 # file.
1509 # So, skip anything with a .* in it as it is too general
1510## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
1511 return [$tagname => $attrHash];
1512 }
1513 my $src_file = $parser->{'parameters'}->{'src_file'};
1514 if (!($src_file =~ /$file_regexp/)) {
1515 return [$tagname => $attrHash];
1516 }
1517## print STDERR "*** mxl metamode = $metamode\n";
1518
1519 # Find the right metadata tag and checks if we are going to override it
1520 my $name_attr = $attrHash->{'name'};
1521 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1522
1523 # now metadata.xml functions need to keep track of metapos
1524 if (!defined $parser->{'parameters'}->{'poscount'})
1525 {
1526 $parser->{'parameters'}->{'poscount'} = 0;
1527 }
1528 else
1529 {
1530 $parser->{'parameters'}->{'poscount'}++;
1531 }
1532
1533 # If either the metapos or prevmetavalue is set,
1534 # get the value and override the current value
1535 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1536
1537 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1538 {
1539 $attrHash->{'_content'} = $metavalue;
1540
1541 ## print STDERR "**** overriding metadata.xml\n";
1542
1543 # Don't want it to wipe out any other pieces of metadata
1544 $parser->{'parameters'}->{'metamode'} = "done";
1545 }
1546 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1547 {
1548 $attrHash->{'_content'} = $metavalue;
1549 $parser->{'parameters'}->{'metamode'} = "done";
1550 }
1551 }
1552
1553 # mxml_description will process the metadata if metadata is accumulate,
1554 # or if we haven't found the metadata to override
1555
1556 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1557 return [$tagname => $attrHash];
1558}
1559
1560
1561sub mxml_description
1562{
1563 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1564 my $metamode = $parser->{'parameters'}->{'metamode'};
1565
1566 # Failed... Report error if we don't see FileName tag before this
1567 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1568
1569 # Don't do anything if we are not in the right FileSet
1570 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1571 if ($file_regexp =~ m/\.\*/) {
1572 # Only interested in a file_regexp if it specifies precisely one
1573 # file.
1574 # So, skip anything with a .* in it as it is too general
1575 return [$tagname => $attrHash];
1576 }
1577 my $src_file = $parser->{'parameters'}->{'src_file'};
1578
1579 if (!($src_file =~ m/$file_regexp/)) {
1580 return [$tagname => $attrHash];
1581 }
1582
1583 # Accumulate the metadata block to the end of the description block
1584 # Note: This adds metadata block to all description blocks, so if there are
1585 # multiple FileSets, it will add to all of them
1586 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1587
1588 # if metamode was "override" but get to here then it failed to
1589 # find an item to override, in which case it should append its
1590 # value to the end, just like the "accumulate" mode
1591
1592 if ($metamode eq "override") {
1593 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1594 }
1595
1596 # tack a new metadata tag on to the end of the <Metadata>+ block
1597 my $metaname = $parser->{'parameters'}->{'metaname'};
1598 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1599
1600 my $metadata_attr = { '_content' => $metavalue,
1601 'name' => $metaname,
1602 'mode' => "accumulate" };
1603
1604 my $append_metadata = [ "Metadata" => $metadata_attr ];
1605 my $description_content = $attrHash->{'_content'};
1606
1607## print STDERR "*** appending to metadata.xml\n";
1608
1609 # append the new metadata element to the end of the current
1610 # content contained inside this tag
1611 if (ref($description_content) eq "") {
1612 # => string or numeric literal
1613 # this is caused by a <Description> block has no <Metadata> child elements
1614 # => set up an empty array in '_content'
1615 $attrHash->{'_content'} = [ "\n" ];
1616 $description_content = $attrHash->{'_content'};
1617 }
1618
1619 push(@$description_content," ", $append_metadata ,"\n ");
1620 $parser->{'parameters'}->{'metamode'} = "done";
1621 }
1622
1623 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1624 return [$tagname => $attrHash];
1625}
1626
1627
1628sub mxml_fileset
1629{
1630 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1631
1632 # Initilise the current_file
1633 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1634 # FileName tag must come before Description tag
1635 $parser->{'parameters'}->{'current_file'} = "";
1636
1637 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1638 return [$tagname => $attrHash];
1639}
1640
1641sub mxml_directorymetadata
1642{
1643 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1644
1645 # if we haven't processed the metadata when we reach the end of metadata.xml
1646 # it's because there's no particular FileSet element whose FileName matched
1647 # In which case, add a new FileSet for this FileName
1648 my $metamode = $parser->{'parameters'}->{'metamode'};
1649 if($metamode ne "done") {
1650
1651 if ($metamode eq "override") {
1652 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1653 }
1654
1655 # If we get to here and metamode is override, this means there
1656 # was no existing value to overide => treat as an append operation
1657
1658 # Create a new FileSet element and append to DirectoryMetadata
1659 # <FileSet>
1660 # <FileName>src_file</FileName>
1661 # <Description>
1662 # <Metadata mode="" name="">metavalue</Metadata>
1663 # </Description>
1664 # </FileSet>
1665 my $src_file = $parser->{'parameters'}->{'src_file'};
1666 my $metaname = $parser->{'parameters'}->{'metaname'};
1667 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1668 my $metadata_attr = {
1669 '_content' => $metavalue,
1670 'name' => $metaname,
1671 'mode' => "accumulate"
1672 };
1673 my $append_metadata = [ "Metadata" => $metadata_attr ];
1674 my $description_attr = { '_content' => [ "\n\t\t ", $append_metadata, "\n\t\t"] };
1675 my $description_element = [ "Description" => $description_attr ];
1676
1677 #_content is not an attribute, it's special and holds the children of this element
1678 # including the textnode value embedded in this element if any.
1679 my $filename_attr = {'_content' => $src_file};
1680 my $filename_element = [ "FileName" => $filename_attr ];
1681
1682 my $fileset_attr = {};
1683 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
1684 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
1685
1686
1687 # get children of dirmeta, and push the new FileSet element onto it
1688 print "Appending metadata to metadata.xml\n";
1689 my $dirmeta_content = $attrHash->{'_content'};
1690 if (ref($dirmeta_content)) {
1691 # got some existing interesting nested content
1692 #push(@$dirmeta_content, " ", $fileset ,"\n ");
1693 push(@$dirmeta_content, "\t", $fileset ,"\n");
1694 }
1695 else {
1696 #description_content is most likely a string such as "\n"
1697 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
1698 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
1699 }
1700
1701 $parser->{'parameters'}->{'metamode'} = "done";
1702 }
1703 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1704 return [$tagname => $attrHash];
1705}
1706
1707
1708sub edit_metadata_xml
1709{
1710 my $self = shift @_;
1711 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
1712
1713 # Set the call-back functions for the metadata tags
1714 my @rules =
1715 ( _default => 'raw',
1716 'FileName' => \&mxml_filename,
1717 'Metadata' => \&mxml_metadata,
1718 'Description' => \&mxml_description,
1719 'FileSet' => \&mxml_fileset,
1720 'DirectoryMetadata' => \&mxml_directorymetadata);
1721
1722 # use XML::Rules to add it in (read in and out again)
1723 my $parser = XML::Rules->new(rules => \@rules,
1724 style => 'filter',
1725 output_encoding => 'utf8',
1726 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1727
1728 if (!-e $metadata_xml_filename) {
1729
1730 if (open(MOUT,">$metadata_xml_filename")) {
1731
1732 my $src_file_re = &util::filename_to_regex($src_file);
1733 # shouldn't the following also be in the above utility routine??
1734 # $src_file_re =~ s/\./\\./g;
1735
1736 print MOUT "<?xml version=\"1.0\"?>\n";
1737 print MOUT "<DirectoryMetadata>\n";
1738 print MOUT " <FileSet>\n";
1739 print MOUT " <FileName>$src_file_re</FileName>\n";
1740 print MOUT " <Description>\n";
1741 print MOUT " </Description>\n";
1742 print MOUT " </FileSet>\n";
1743 print MOUT "</DirectoryMetadata>\n";
1744
1745 close(MOUT);
1746 }
1747 else {
1748 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1749 }
1750 }
1751
1752
1753 my $xml_in = "";
1754 if (!open(MIN,"<$metadata_xml_filename")) {
1755 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1756 }
1757 else {
1758 # Read them in
1759 my $line;
1760 while (defined ($line=<MIN>)) {
1761 $xml_in .= $line;
1762 }
1763 close(MIN);
1764
1765 # Filter with the call-back functions
1766 my $xml_out = "";
1767
1768 my $MOUT;
1769 if (!open($MOUT,">$metadata_xml_filename")) {
1770 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1771 }
1772 else {
1773 binmode($MOUT,":utf8");
1774
1775 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1776 # At the moment, I will just hack it!
1777 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1778 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1779 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1780 #print MOUT $xml_out;
1781
1782 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1783 metapos => $metapos,
1784 metavalue => $metavalue,
1785 metamode => $metamode,
1786 src_file => $src_file,
1787 prevmetavalue => $prevmetavalue,
1788 current_file => undef} );
1789 close($MOUT);
1790 }
1791 }
1792}
1793
1794
1795sub set_import_metadata
1796{
1797 my $self = shift @_;
1798
1799 my $username = $self->{'username'};
1800 my $collect = $self->{'collect'};
1801 my $gsdl_cgi = $self->{'gsdl_cgi'};
1802
1803 if ($baseaction::authentication_enabled) {
1804 # Ensure the user is allowed to edit this collection
1805 $self->authenticate_user($username, $collect);
1806 }
1807
1808 # Make sure the collection isn't locked by someone else
1809 $self->lock_collection($username, $collect);
1810
1811 $self->_set_import_metadata(@_);
1812
1813 # Release the lock once it is done
1814 $self->unlock_collection($username, $collect);
1815
1816}
1817
1818sub set_import_metadata_array
1819{
1820 my $self = shift @_;
1821
1822 my $username = $self->{'username'};
1823 my $collect = $self->{'collect'};
1824 my $gsdl_cgi = $self->{'gsdl_cgi'};
1825# my $gsdlhome = $self->{'gsdlhome'};
1826
1827 if ($baseaction::authentication_enabled) {
1828 # Ensure the user is allowed to edit this collection
1829 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1830 }
1831
1832 my $site = $self->{'site'};
1833 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1834
1835 $gsdl_cgi->checked_chdir($collect_dir);
1836
1837 # Make sure the collection isn't locked by someone else
1838 $self->lock_collection($username, $collect);
1839
1840 $self->_set_import_metadata_array(@_);
1841
1842 # Release the lock once it is done
1843 $self->unlock_collection($username, $collect);
1844
1845}
1846
1847
1848sub _set_import_metadata_array
1849{
1850 my $self = shift @_;
1851
1852 my $collect = $self->{'collect'};
1853 my $gsdl_cgi = $self->{'gsdl_cgi'};
1854
1855 my $site = $self->{'site'};
1856 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1857
1858 # look up additional args
1859
1860 my $infodbtype = $self->{'infodbtype'};
1861
1862 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1863 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1864
1865 my $json_str = $self->{'json'};
1866 my $doc_array = decode_json $json_str;
1867
1868 my $global_status = 0;
1869 my $global_mess = "";
1870
1871 my @all_docids = ();
1872
1873 foreach my $doc_array_rec ( @$doc_array )
1874 {
1875 my $status = -1;
1876 my $docid = $doc_array_rec->{'docid'};
1877
1878 my ($docid_root,$docid_secnum);
1879 if(defined $docid) {
1880 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1881 # as yet no support for setting subsection metadata in metadata.xml
1882 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
1883 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
1884 next; # skip this docid in for loop
1885 }
1886 }
1887
1888 push(@all_docids,$docid); # docid_root rather
1889
1890 my $metaname = $doc_array_rec->{'metaname'};
1891 if (defined $metaname) {
1892 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1893 my $metavalue = $doc_array_rec->{'metavalue'};
1894 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1895
1896 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1897 # make "accumulate" the default (less destructive, as it won't actually
1898 # delete any existing values)
1899 $metamode = "accumulate";
1900 }
1901
1902 # adding metapos and prevmetavalue support to import_metadata subroutines
1903 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1904 my $prevmetavalue = $self->{'prevmetavalue'};
1905
1906 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1907
1908 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1909 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1910
1911 foreach my $metatable_rec ( @$metatable ) {
1912 $metaname = $metatable_rec->{'metaname'};
1913 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1914 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1915 # make "accumulate" the default (less destructive, as it won't actually
1916 # delete any existing values)
1917 $metamode = "accumulate";
1918 }
1919
1920 # No support for metapos and prevmetavalue in the JSON metatable substructure
1921 my $metapos = undef;
1922 my $prevmetavalue = undef;
1923 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1924
1925 foreach my $metavalue ( @$metavals ) {
1926 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1927
1928 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1929 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
1930 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1931 # metavalue for this metaname will be lost
1932 $metamode = "accumulate";
1933 }
1934 }
1935 }
1936 }
1937 }
1938
1939 # always a success message
1940 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1941 $gsdl_cgi->generate_ok_message($mess);
1942}
1943
1944# always returns true (1)
1945sub set_import_metadata_entry
1946{
1947 my $self = shift @_;
1948 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
1949
1950 my $info_mess = <<RAWEND;
1951****************************
1952 set_import_metadata_entry()
1953****************************
1954RAWEND
1955
1956 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1957 $info_mess .= " collect = $collect\n" if defined($collect);
1958 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
1959 $info_mess .= " arcinfo_doc_filename = $arcinfo_doc_filename\n" if defined($arcinfo_doc_filename);
1960 $info_mess .= " docid = $docid\n" if defined($docid);
1961 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1962 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1963 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1964 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1965 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1966
1967 $info_mess .= "****************************\n";
1968
1969 $gsdl_cgi->generate_message($info_mess);
1970
1971 # import works with metadata.xml which can have inherited metadata
1972 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
1973 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
1974 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
1975 # what they're doing if they provide a metapos.
1976 if(defined $metapos) {
1977 print STDERR "@@@@ WARNING: metapos defined.\n";
1978 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
1979 }
1980
1981 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1982 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1983 my $metadata_xml_file;
1984 my $import_filename = undef;
1985
1986 if (defined $docid) {
1987 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1988 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1989
1990 # This now stores the full pathname
1991 $import_filename = $doc_rec->{'src-file'}->[0];
1992 $import_filename = &util::placeholders_to_abspath($import_filename);
1993
1994 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
1995 # as the array version of the method doesn't support the -f parameter yet
1996 my $import_file = $self->{'f'};
1997 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1998 }
1999
2000 # figure out correct metadata.xml file [?]
2001 # Assuming the metadata.xml file is next to the source file
2002 # Note: This will not work if it is using the inherited metadata from the parent folder
2003 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2004 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2005
2006 # If we're overriding everything, then $prevmetavalue=undefined and
2007 # $metamode=override combined with $metapos=undefined
2008 # in which case we need to remove all metavalues for the metaname at the given (sub)section
2009 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2010 if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2011## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2012 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2013
2014 }
2015
2016 # Edit the metadata.xml
2017 # Modified by Jeffrey from DL Consulting
2018 # Handle the case where there is one metadata.xml file for multiple FileSets
2019 # The XML filter needs to know whether it is in the right FileSet
2020 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2021 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2022 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2023 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2024 #return 0;
2025 return $metadata_xml_filename;
2026}
2027
2028sub _remove_import_metadata
2029{
2030 my $self = shift @_;
2031
2032 my $collect = $self->{'collect'};
2033 my $gsdl_cgi = $self->{'gsdl_cgi'};
2034# my $gsdlhome = $self->{'gsdlhome'};
2035 my $infodbtype = $self->{'infodbtype'};
2036
2037 # Obtain the collect dir
2038 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2039 my $site = $self->{'site'};
2040 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2041
2042 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2043 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2044
2045 # look up additional args
2046 my $docid = $self->{'d'};
2047 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2048 {
2049 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2050 }
2051
2052 my $metaname = $self->{'metaname'};
2053 my $metapos = $self->{'metapos'};
2054 my $metavalue = $self->{'metavalue'};
2055
2056 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2057 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/));
2058
2059 if(defined $metavalue) { # metavalue is now a compulsory arg for remove_import_metadata()
2060 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2061 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2062 $metapos = 0;
2063 }
2064 my $metamode = $self->{'metamode'};
2065 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
2066
2067 # import works with metadata.xml which can have inherited metadata
2068 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2069 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2070 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2071 # what they're doing if they provide a metapos.
2072 if(defined $metapos) {
2073 print STDERR "@@@@ WARNING: metapos defined.\n";
2074 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2075 }
2076
2077 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2078 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2079 my $metadata_xml_file;
2080 my $import_filename = undef;
2081 if (defined $docid)
2082 {
2083 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2084 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2085
2086 # This now stores the full pathname
2087 $import_filename = $doc_rec->{'src-file'}->[0];
2088 $import_filename = &util::placeholders_to_abspath($import_filename);
2089 }
2090
2091 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2092 {
2093 $gsdl_cgi->generate_error("There is no metadata\n");
2094 }
2095
2096 # figure out correct metadata.xml file [?]
2097 # Assuming the metadata.xml file is next to the source file
2098 # Note: This will not work if it is using the inherited metadata from the parent folder
2099 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2100 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2101
2102 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2103
2104 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2105 $mess .= " $metaname";
2106 $mess .= " = $metavalue\n";
2107
2108 $gsdl_cgi->generate_ok_message($mess);
2109
2110 #return $status; # in case calling functions have a use for this
2111}
2112
2113sub remove_import_metadata
2114{
2115 my $self = shift @_;
2116
2117 my $username = $self->{'username'};
2118 my $collect = $self->{'collect'};
2119 my $gsdl_cgi = $self->{'gsdl_cgi'};
2120
2121 if ($baseaction::authentication_enabled) {
2122 # Ensure the user is allowed to edit this collection
2123 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2124 }
2125
2126 # Make sure the collection isn't locked by someone else
2127 $self->lock_collection($username, $collect);
2128
2129 $self->_remove_import_metadata(@_);
2130
2131 # Release the lock once it is done
2132 $self->unlock_collection($username, $collect);
2133
2134}
2135
2136sub remove_from_metadata_xml
2137{
2138 my $self = shift @_;
2139 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2140 # metamode generally has no meaning for removing meta, but is used by set_meta
2141 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2142
2143 # Set the call-back functions for the metadata tags
2144 my @rules =
2145 (
2146 _default => 'raw',
2147 'Metadata' => \&rfmxml_metadata,
2148 'FileName' => \&mxml_filename
2149 );
2150
2151 my $parser = XML::Rules->new
2152 (
2153 rules => \@rules,
2154 style => 'filter',
2155 output_encoding => 'utf8',
2156 #normalisespaces => 1,
2157 stripspaces => 2|0|0 # ineffectual
2158 );
2159
2160 my $xml_in = "";
2161 if (!open(MIN,"<$metadata_xml_filename"))
2162 {
2163 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2164 }
2165 else
2166 {
2167 # Read them in
2168 my $line;
2169 while (defined ($line=<MIN>)) {
2170 $xml_in .= $line;
2171 }
2172 close(MIN);
2173
2174 # Filter with the call-back functions
2175 my $xml_out = "";
2176
2177 my $MOUT;
2178 if (!open($MOUT,">$metadata_xml_filename")) {
2179 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2180 }
2181 else {
2182 binmode($MOUT,":utf8");
2183 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2184 close($MOUT);
2185 }
2186 }
2187}
2188
2189sub rfmxml_metadata
2190{
2191 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2192
2193 # metadata.xml does not handle subsections
2194
2195 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2196 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2197 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2198 {
2199 if (!defined $parser->{'parameters'}->{'poscount'})
2200 {
2201 $parser->{'parameters'}->{'poscount'} = 0;
2202 }
2203 else
2204 {
2205 $parser->{'parameters'}->{'poscount'}++;
2206 }
2207
2208 # if overriding but no metapos, then clear all the meta for this metaname
2209 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
2210 return [];
2211 }
2212
2213 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2214 {
2215 return [];
2216 }
2217 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2218 {
2219 return [];
2220 }
2221 }
2222
2223 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2224 return [$tagname => $attrHash];
2225}
2226
2227sub _remove_live_metadata
2228{
2229 my $self = shift @_;
2230
2231 my $collect = $self->{'collect'};
2232 my $gsdl_cgi = $self->{'gsdl_cgi'};
2233# my $gsdlhome = $self->{'gsdlhome'};
2234 my $infodbtype = $self->{'infodbtype'};
2235
2236 # Obtain the collect dir
2237 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2238 my $site = $self->{'site'};
2239 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2240
2241
2242 # look up additional args
2243 my $docid = $self->{'d'};
2244 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2245 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2246 }
2247
2248 # Generate the dbkey
2249 my $metaname = $self->{'metaname'};
2250 my $dbkey = "$docid.$metaname";
2251
2252 # To people who know $collect_tail please add some comments
2253 # Obtain the live gdbm_db path
2254 my $collect_tail = $collect;
2255 $collect_tail =~ s/^.*[\/|\\]//;
2256 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2257 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2258
2259 # Remove the key
2260 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2261 my $status = system($cmd);
2262 if ($status != 0) {
2263 # Catch error if gdbmdel failed
2264 my $mess = "Failed to set metadata key: $dbkey\n";
2265
2266 $mess .= "PATH: $ENV{'PATH'}\n";
2267 $mess .= "cmd = $cmd\n";
2268 $mess .= "Exit status: $status\n";
2269 $mess .= "System Error Message: $!\n";
2270
2271 $gsdl_cgi->generate_error($mess);
2272 }
2273 else {
2274 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2275 }
2276
2277}
2278
2279sub remove_live_metadata
2280{
2281 my $self = shift @_;
2282
2283 my $username = $self->{'username'};
2284 my $collect = $self->{'collect'};
2285 my $gsdl_cgi = $self->{'gsdl_cgi'};
2286 my $gsdlhome = $self->{'gsdlhome'};
2287
2288 if ($baseaction::authentication_enabled) {
2289 # Ensure the user is allowed to edit this collection
2290 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2291 }
2292
2293 # Make sure the collection isn't locked by someone else
2294 $self->lock_collection($username, $collect);
2295
2296 $self->_remove_live_metadata(@_);
2297
2298 $self->unlock_collection($username, $collect);
2299}
2300
2301sub remove_metadata
2302{
2303 my $self = shift @_;
2304
2305 my $where = $self->{'where'};
2306 if(!$where || ($where =~ m/^\s*$/)) {
2307 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2308 return;
2309 }
2310
2311 my $username = $self->{'username'};
2312 my $collect = $self->{'collect'};
2313 my $gsdl_cgi = $self->{'gsdl_cgi'};
2314
2315 if ($baseaction::authentication_enabled) {
2316 # Ensure the user is allowed to edit this collection
2317 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2318 }
2319
2320 # Make sure the collection isn't locked by someone else
2321 $self->lock_collection($username, $collect);
2322
2323 # check which directories need to be processed, specified in $where as
2324 # any combination of import|archives|index|live
2325 if($where =~ m/import/) {
2326 $self->_remove_import_metadata(@_);
2327 }
2328 if($where =~ m/archives/) {
2329 $self->_remove_archives_metadata(@_);
2330 }
2331 if($where =~ m/index/) {
2332 $self->_remove_index_metadata(@_);
2333 }
2334
2335 # Release the lock once it is done
2336 $self->unlock_collection($username, $collect);
2337}
2338
2339# the internal version, without authentication
2340sub _remove_index_metadata
2341{
2342 my $self = shift @_;
2343
2344 my $collect = $self->{'collect'};
2345 my $gsdl_cgi = $self->{'gsdl_cgi'};
2346# my $gsdlhome = $self->{'gsdlhome'};
2347 my $infodbtype = $self->{'infodbtype'};
2348
2349 # Obtain the collect dir
2350 my $site = $self->{'site'};
2351 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2352 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2353
2354 # look up additional args
2355 my $docid = $self->{'d'};
2356 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2357 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2358 }
2359 my $metaname = $self->{'metaname'};
2360 my $metapos = $self->{'metapos'};
2361 my $metavalue = $self->{'metavalue'};
2362
2363 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2364 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
2365
2366 # To people who know $collect_tail please add some comments
2367 # -> In collection groups, I think collect_tailname is the subcollection name,
2368 # e.g. colgroup-name/col-tail-name
2369 # Obtain the path to the database
2370 my $collect_tail = $collect;
2371 $collect_tail =~ s/^.*[\/|\\]//;
2372 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2373 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2374
2375 # Read the docid entry
2376 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2377
2378 # Check to make sure the key does exist
2379 if (!defined ($doc_rec->{$metaname})) {
2380 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2381 }
2382
2383 # Obtain the specified metadata pos
2384 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2385 if(!defined $metapos && !defined $metavalue) {
2386 $metapos = 0;
2387 }
2388
2389
2390 # consider check key is defined before deleting?
2391 # Loop through the metadata array and ignore the specified position
2392 my $filtered_metadata = [];
2393 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2394 for (my $i=0; $i<$num_metadata_vals; $i++) {
2395 my $metaval = shift(@{$doc_rec->{$metaname}});
2396
2397 if (!defined $metavalue && $i != $metapos) {
2398 push(@$filtered_metadata,$metaval);
2399 }
2400
2401 if(defined $metavalue && !($metavalue eq $metaval))
2402 {
2403 push(@$filtered_metadata,$metaval);
2404 }
2405 }
2406 $doc_rec->{$metaname} = $filtered_metadata;
2407
2408 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2409 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2410
2411 if ($status != 0) {
2412 my $mess = "Failed to set metadata key: $docid\n";
2413
2414 $mess .= "PATH: $ENV{'PATH'}\n";
2415 $mess .= "Exit status: $status\n";
2416 $mess .= "System Error Message: $!\n";
2417
2418 $gsdl_cgi->generate_error($mess);
2419 }
2420 else {
2421 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2422 $mess .= " $metaname";
2423 $mess .= "->[$metapos]" if (defined $metapos);
2424 $mess .= " ($metavalue)" if (defined $metavalue);
2425
2426 $gsdl_cgi->generate_ok_message($mess);
2427 }
2428
2429 #return $status; # in case calling functions have a use for this
2430}
2431
2432sub remove_index_metadata
2433{
2434 my $self = shift @_;
2435
2436 my $username = $self->{'username'};
2437 my $collect = $self->{'collect'};
2438 my $gsdl_cgi = $self->{'gsdl_cgi'};
2439# my $gsdlhome = $self->{'gsdlhome'};
2440
2441 if ($baseaction::authentication_enabled) {
2442 # Ensure the user is allowed to edit this collection
2443 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2444 }
2445
2446 # Obtain the collect dir
2447 my $site = $self->{'site'};
2448 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2449 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2450
2451 # Make sure the collection isn't locked by someone else
2452 $self->lock_collection($username, $collect);
2453
2454 $self->_remove_index_metadata(@_);
2455
2456 # Release the lock once it is done
2457 $self->unlock_collection($username, $collect);
2458}
2459
2460
2461# Was trying to reused the codes, but the functions need to be broken
2462# down more before they can be reused, otherwise there will be too
2463# much overhead and duplicate process...
2464sub insert_metadata
2465{
2466 my $self = shift @_;
2467
2468 my $username = $self->{'username'};
2469 my $collect = $self->{'collect'};
2470 my $gsdl_cgi = $self->{'gsdl_cgi'};
2471 my $gsdlhome = $self->{'gsdlhome'};
2472 my $infodbtype = $self->{'infodbtype'};
2473
2474 # If the import metadata and gdbm database have been updated, we
2475 # need to insert some notification to warn user that the the text
2476 # they see at the moment is not indexed and require a rebuild.
2477 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2478
2479 if ($baseaction::authentication_enabled) {
2480 # Ensure the user is allowed to edit this collection
2481 $self->authenticate_user($username, $collect);
2482 }
2483
2484 # Obtain the collect and archive dir
2485 my $site = $self->{'site'};
2486 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2487 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2488 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2489
2490 # Make sure the collection isn't locked by someone else
2491 $self->lock_collection($username, $collect);
2492
2493 # Check additional args
2494 my $docid = $self->{'d'};
2495 if (!defined($docid) || ($docid =~ m/^\s*$/)) {
2496 $gsdl_cgi->generate_error("No document id is specified: d=...");
2497 }
2498 my $metaname = $self->{'metaname'};
2499 if (!defined($metaname) || ($metaname =~ m/^\s*$/)) {
2500 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2501 }
2502 my $metavalue = $self->{'metavalue'};
2503 if (!defined($metavalue) || ($metavalue =~ m/^\s*$/)) {
2504 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2505 }
2506 # make "accumulate" the default (less destructive, as it won't actually
2507 # delete any existing values)
2508 my $metamode = "accumulate";
2509
2510 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
2511 my $metapos = undef;
2512 my $prevmetavalue = undef;
2513
2514 #=======================================================================#
2515 # set_import_metadata [START]
2516 #=======================================================================#
2517 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2518 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2519 my $metadata_xml_file;
2520 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2521 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2522
2523 # This now stores the full pathname
2524 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2525 $import_filename = &util::placeholders_to_abspath($import_filename);
2526
2527 # figure out correct metadata.xml file [?]
2528 # Assuming the metadata.xml file is next to the source file
2529 # Note: This will not work if it is using the inherited metadata from the parent folder
2530 my ($import_tailname, $import_dirname)
2531 = File::Basename::fileparse($import_filename);
2532 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2533
2534 # Shane's escape characters
2535 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2536 $metavalue =~ s/\,/&#44;/g;
2537 $metavalue =~ s/\:/&#58;/g;
2538 $metavalue =~ s/\|/&#124;/g;
2539 $metavalue =~ s/\(/&#40;/g;
2540 $metavalue =~ s/\)/&#41;/g;
2541 $metavalue =~ s/\[/&#91;/g;
2542 $metavalue =~ s/\\/&#92;/g;
2543 $metavalue =~ s/\]/&#93;/g;
2544 $metavalue =~ s/\{/&#123;/g;
2545 $metavalue =~ s/\}/&#125;/g;
2546 $metavalue =~ s/\"/&#34;/g;
2547 $metavalue =~ s/\`/&#96;/g;
2548 $metavalue =~ s/\n/_newline_/g;
2549
2550 # Edit the metadata.xml
2551 # Modified by Jeffrey from DL Consulting
2552 # Handle the case where there is one metadata.xml file for multiple FileSets
2553 # The XML filter needs to know whether it is in the right FileSet
2554 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2555 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2556 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2557 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2558 #=======================================================================#
2559 # set_import_metadata [END]
2560 #=======================================================================#
2561
2562
2563 #=======================================================================#
2564 # set_metadata (accumulate version) [START]
2565 #=======================================================================#
2566 # To people who know $collect_tail please add some comments
2567 # Obtain path to the database
2568 my $collect_tail = $collect;
2569 $collect_tail =~ s/^.*[\/|\\]//;
2570 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2571 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2572
2573 # Read the docid entry
2574 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2575
2576 # Protect the quotes
2577 $metavalue =~ s/\"/\\\"/g;
2578
2579 # Adds the pending macro
2580 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2581
2582 # If the metadata doesn't exist, create a new one
2583 if (!defined($doc_rec->{$metaname})){
2584 $doc_rec->{$metaname} = [ $macro_metavalue ];
2585 }
2586 # Else, let's acculumate the values
2587 else {
2588 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2589 }
2590
2591 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2592 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2593
2594 if ($status != 0) {
2595 # Catch error if gdbmget failed
2596 my $mess = "Failed to set metadata key: $docid\n";
2597
2598 $mess .= "PATH: $ENV{'PATH'}\n";
2599 $mess .= "Exit status: $status\n";
2600 $mess .= "System Error Message: $!\n";
2601
2602 $gsdl_cgi->generate_error($mess);
2603 }
2604 else {
2605 my $mess = "insert-metadata successful: Key[$docid]\n";
2606 $mess .= " [In metadata.xml] $metaname";
2607 $mess .= " = $metavalue\n";
2608 $mess .= " [In database] $metaname";
2609 $mess .= " = $macro_metavalue\n";
2610 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2611 $gsdl_cgi->generate_ok_message($mess);
2612 }
2613 #=======================================================================#
2614 # set_metadata (accumulate version) [END]
2615 #=======================================================================#
2616
2617 # Release the lock once it is done
2618 $self->unlock_collection($username, $collect);
2619}
2620
2621# not returning 1; here since this file is conditionally included by metadataction.pm
2622# and not otherwise meant to be used on its own
2623
Note: See TracBrowser for help on using the repository browser.