source: main/trunk/greenstone2/perllib/cgiactions/modmetadataaction.pm@ 32841

Last change on this file since 32841 was 32841, checked in by ak19, 5 years ago

Tentative fix to metadata GPS.mapOverlay that we specify ought to be overridden getting set to accumulate instead. I understand the first time that this meta gets sets that since there's no previous value to override that this gets set to accumulate then. But in future instances where this meta value is being changed with metamode=override, I think we should ensure that the meta's (meta)mode goes into doc.xml as override instead of remaining as accumulate. This should in theory work since when the value of a meta is instructed to be changed with metamode=override, all instances of that meta (with the same metaname) gets replaced with a new instance with the new specified value, leaving that one new meta with metamode override and any future changes to that meta's value and (meta)mode will affect only that once instance. I hope. Still would like to discuss this with Dr Bainbridge to make sure there are no side-effects I can't presently foresee.

File size: 94.3 KB
Line 
1##########################################################################
2#
3# modmetadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This is a submodule of metadataaction.pm. It is loaded conditionally
27# upon having the permissions to modify (not just get) metadata.
28# With sufficient permissions, these additional subroutines should be made
29# available to metadataaction.pm
30
31# See http://www.perlmonks.org/?node_id=881761 for splitting module into multiple files
32# and how variables declared with 'our' are used there.
33
34package metadataaction;
35
36use strict;
37
38use cgiactions::baseaction;
39
40use dbutil;
41use ghtml;
42
43use JSON;
44
45
46$metadataaction::modmeta_action_table = #OR: our $modmeta_action_table =
47{
48 #SET METHODS
49 "set-live-metadata" => {
50 'compulsory-args' => [ "d", "metaname", "metavalue" ],
51 'optional-args' => [ ] },
52
53 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
54 'compulsory-args' => [ "metaname", "metavalue" ],
55 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
56
57 "set-index-metadata" => {
58 'compulsory-args' => [ "d", "metaname", "metavalue" ],
59 'optional-args' => [ "metapos", "metamode" ] },
60
61 "set-archives-metadata" => {
62 'compulsory-args' => [ "d", "metaname", "metavalue" ],
63 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
64
65 "set-import-metadata" => {
66 'compulsory-args' => [ "metaname", "metavalue" ],
67 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
68
69 #SET METHODS (ARRAY)
70 "set-metadata-array" => {
71 'compulsory-args' => [ "where", "json" ],
72 'optional-args' => [ ],
73 'help-string' => [
74 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
75
76 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
77
78# The same examples rewritten for when running the metadata-server.pl script from the commandline:
79
80# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
81
82# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
83
84 "set-archives-metadata-array" => {
85 'compulsory-args' => [ "json" ],
86 'optional-args' => [ ] },
87
88 "set-import-metadata-array" => {
89 'compulsory-args' => [ "json" ],
90 'optional-args' => [ ] },
91
92 "set-index-metadata-array" => {
93 'compulsory-args' => [ "json" ],
94 'optional-args' => [ ] },
95
96 "set-live-metadata-array" => {
97 'compulsory-args' => [ "json" ],
98 'optional-args' => [ ] },
99
100 #REMOVE METHODS
101 "remove-import-metadata" => {
102 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
103 'optional-args' => [ "metapos", "metavalue", "metamode" ] }, # only provide metapos arg for SIMPLE collections.
104# Metavalue is now an optional arg for remove_import_metadata() based on what the implementation did, which allowed metavalue to be undefined, and if so, used metapos.
105
106 "remove-archives-metadata" => {
107 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
108 'optional-args' => [ "metapos", "metavalue", "metamode" ] },
109
110 "remove-live-metadata" => {
111 'compulsory-args' => [ "d", "metaname" ],
112 'optional-args' => [ ] },
113
114 "remove-index-metadata" => {
115 'compulsory-args' => [ "d", "metaname" ],
116 'optional-args' => [ "metapos", "metavalue" ] },
117
118 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
119 'compulsory-args' => [ "d", "metaname" ],
120 'optional-args' => [ "where", "metapos", "metavalue", "metamode" ] }, # metamode is optional since remove-metadata can call any of remove_import_meta and remove_archives_meta, remove_index_meta, of which the first two accept metamode as an optional param
121
122 #INSERT METHODS
123 "insert-metadata" => {
124 'compulsory-args' => [ "d", "metaname", "metavalue" ],
125 'optional-args' => [ ] }
126};
127
128
129sub _set_live_metadata
130{
131 my $self = shift @_;
132
133 my $collect = $self->{'collect'};
134 my $gsdl_cgi = $self->{'gsdl_cgi'};
135 #my $gsdlhome = $self->{'gsdlhome'};
136 my $infodbtype = $self->{'infodbtype'};
137
138 # Obtain the collect dir
139 my $site = $self->{'site'};
140 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
141 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
142
143
144 # look up additional args
145 my $docid = $self->{'d'};
146 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
147 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
148 }
149 my $metavalue = $self->{'metavalue'};
150
151 # Generate the dbkey
152 my $metaname = $self->{'metaname'};
153 my $dbkey = "$docid.$metaname";
154
155 # To people who know $collect_tail please add some comments
156 # Obtain path to the database
157 my $collect_tail = $collect;
158 $collect_tail =~ s/^.*[\/|\\]//;
159 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
160 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
161
162 # Set the new value
163 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
164 my $status = system($cmd);
165 if ($status != 0) {
166 # Catch error if gdbmget failed
167 my $mess = "Failed to set metadata key: $dbkey\n";
168
169 $mess .= "PATH: $ENV{'PATH'}\n";
170 $mess .= "cmd = $cmd\n";
171 $mess .= "Exit status: $status\n";
172 $mess .= "System Error Message: $!\n";
173
174 $gsdl_cgi->generate_error($mess);
175 }
176 else {
177 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
178 }
179
180 #return $status; # in case calling functions have any further use for this
181}
182
183sub set_live_metadata
184{
185 my $self = shift @_;
186
187 my $username = $self->{'username'};
188 my $collect = $self->{'collect'};
189 my $gsdl_cgi = $self->{'gsdl_cgi'};
190
191 if ($baseaction::authentication_enabled) {
192 # Ensure the user is allowed to edit this collection
193 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
194 }
195
196 # Make sure the collection isn't locked by someone else
197 $self->lock_collection($username, $collect);
198
199 $self->_set_live_metadata(@_);
200
201 # Release the lock once it is done
202 $self->unlock_collection($username, $collect);
203}
204
205sub set_index_metadata_entry
206{
207 my $self = shift @_;
208 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
209
210 # To people who know $collect_tail please add some comments
211 # Obtain path to the database
212 my $collect_tail = $collect;
213 $collect_tail =~ s/^.*[\/|\\]//;
214 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
215 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
216
217# print STDERR "**** infodb file path = $infodb_file_path\n";
218# print STDERR "***** infodb type = $infodbtype\n";
219
220 # Read the docid entry
221 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
222
223 # Set the metadata value
224 if (defined $metapos) {
225 # if metamode=accumulate AND metapos, warn user and then use metapos
226 if (defined $metamode && $metamode eq "accumulate") {
227 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
228 print STDERR "**** Proceeding by using metapos\n";
229 }
230 $doc_rec->{$metaname}->[$metapos] = $metavalue;
231 }
232 elsif (defined $prevmetavalue) {
233 my $array = $doc_rec->{$metaname};
234 my $length = @$array;
235
236 my $found = 0;
237 for (my $i = 0; $i < $length; $i++){
238 if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
239 $doc_rec->{$metaname}->[$i] = $metavalue;
240 $found = 1;
241 last;
242 }
243 }
244
245 if($found == 0){
246 $doc_rec->{$metaname} = [ $metavalue ];
247 }
248 }
249 elsif (defined $metamode && $metamode eq "override") {
250 $doc_rec->{$metaname} = [ $metavalue ];
251 }
252 else { # default for index was to override, but because accumulate is less destructive,
253 # and because accumulate is the default for archives and import, that's the new default for index too
254 if(defined $doc_rec->{$metaname}) {
255 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
256 } else {
257 $doc_rec->{$metaname} = [ $metavalue ];
258 }
259 }
260
261 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
262
263 return $status;
264
265}
266
267sub _set_import_metadata
268{
269 my $self = shift @_;
270
271 my $collect = $self->{'collect'};
272 my $gsdl_cgi = $self->{'gsdl_cgi'};
273 my $infodbtype = $self->{'infodbtype'};
274# my $gsdlhome = $self->{'gsdlhome'};
275
276 # Obtain the collect and archive dir
277 my $site = $self->{'site'};
278 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
279 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
280 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
281
282 # look up additional args
283 # want either d= or f=
284 my $docid = $self->{'d'};
285 my ($docid_root,$docid_secnum);
286 if(defined $docid) {
287 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
288 # as yet no support for setting subsection metadata in metadata.xml
289 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
290 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
291 return;
292 }
293 }
294
295 my $import_file = $self->{'f'};
296 if ((!defined $docid || $docid =~ m/^\s*$/) && (!defined $import_file || $import_file =~ m/^\s*$/)) {
297 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified."); # at least d or f must be specified
298 }
299
300 # Get the parameters and set default mode to "accumulate"
301 my $metaname = $self->{'metaname'};
302 my $metavalue = $self->{'metavalue'};
303## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
304 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
305
306 my $metamode = $self->{'metamode'};
307 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
308 # make "accumulate" the default (less destructive, as it won't actually
309 # delete any existing values)
310 $metamode = "accumulate";
311 }
312
313 # adding metapos and prevmetavalue support to import_metadata subroutines
314 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
315 my $prevmetavalue = $self->{'prevmetavalue'};
316 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
317 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
318
319 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
320 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
321
322 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
323 $mess .= " $metaname";
324 $mess .= " = $metavalue";
325 $mess .= " ($metamode)\n";
326
327 $gsdl_cgi->generate_ok_message($mess);
328
329 #return $status; # in case calling functions have any further use for this
330}
331
332# the version of set_archives_meta that doesn't do authentication
333sub _set_archives_metadata
334{
335 my $self = shift @_;
336
337 my $collect = $self->{'collect'};
338 my $gsdl_cgi = $self->{'gsdl_cgi'};
339 my $infodbtype = $self->{'infodbtype'};
340
341 # Obtain the collect and archive dir
342 my $site = $self->{'site'};
343 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
344 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
345
346 # look up additional args
347 my $docid = $self->{'d'};
348 my $metaname = $self->{'metaname'};
349 my $metavalue = $self->{'metavalue'};
350 my $prevmetavalue = $self->{'prevmetavalue'};
351
352 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
353 # Don't append "|| undef", since if metapos=0 it will then be set to undef
354
355 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
356 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
357
358 my $metamode = $self->{'metamode'};
359 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
360 # make "accumulate" the default (less destructive, as it won't actually
361 # delete any existing values)
362 $metamode = "accumulate";
363 }
364
365 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
366 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
367
368 if ($status == 0) {
369 my $mess = "set-archives-metadata successful: Key[$docid]\n";
370 $mess .= " $metaname";
371 $mess .= "->[$metapos]" if (defined $metapos);
372 $mess .= " = $metavalue";
373 $mess .= " ($metamode)\n";
374
375 $gsdl_cgi->generate_ok_message($mess);
376 }
377 else {
378 my $mess .= "Failed to set archives metadata key: $docid\n";
379 $mess .= "Exit status: $status\n";
380 if(defined $self->{'error_msg'}) {
381 $mess .= "Error Message: $self->{'error_msg'}\n";
382 } else {
383 $mess .= "System Error Message: $!\n";
384 }
385 $mess .= "-" x 20 . "\n";
386
387 $gsdl_cgi->generate_error($mess);
388 }
389
390 #return $status; # in case calling functions have any further use for this
391}
392
393
394# the version of set_index_meta that doesn't do authentication
395sub _set_index_metadata
396{
397 print STDERR "START SET INDEX METADATA\n";
398 my $self = shift @_;
399
400 my $collect = $self->{'collect'};
401 my $gsdl_cgi = $self->{'gsdl_cgi'};
402
403 my $site = $self->{'site'};
404 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
405
406 # look up additional args
407 my $docid = $self->{'d'};
408 my $metaname = $self->{'metaname'};
409 my $metapos = $self->{'metapos'}; # undef has meaning
410 my $metavalue = $self->{'metavalue'};
411 my $infodbtype = $self->{'infodbtype'};
412 my $metamode = $self->{'metamode'};
413 my $prevmetavalue = $self->{'prevmetavalue'};
414
415 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
416 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
417
418 print STDERR "SETTING INDEX METADATA ENTRY\n";
419 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
420 print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
421 if ($status != 0) {
422 # Catch error if set infodb entry failed
423 my $mess = "Failed to set metadata key: $docid\n";
424
425 $mess .= "PATH: $ENV{'PATH'}\n";
426 $mess .= "Exit status: $status\n";
427 $mess .= "System Error Message: $!\n";
428
429 $gsdl_cgi->generate_error($mess);
430 }
431 else {
432 my $mess = "set-index-metadata successful: Key[$docid]\n";
433 $mess .= " $metaname";
434 $mess .= "->[$metapos]" if (defined $metapos);
435 $mess .= " = $metavalue\n";
436
437 $gsdl_cgi->generate_ok_message($mess);
438 }
439
440 print STDERR "END SET INDEX METADATA\n";
441 #return $status; # in case calling functions have any further use for this
442}
443
444sub set_index_metadata
445{
446 my $self = shift @_;
447
448 my $username = $self->{'username'};
449 my $collect = $self->{'collect'};
450 my $gsdl_cgi = $self->{'gsdl_cgi'};
451 #my $gsdlhome = $self->{'gsdlhome'};
452
453 if ($baseaction::authentication_enabled) {
454 # Ensure the user is allowed to edit this collection
455 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
456 }
457
458 my $site = $self->{'site'};
459 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
460
461 $gsdl_cgi->checked_chdir($collect_dir);
462
463 # Obtain the collect dir
464 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
465
466 # Make sure the collection isn't locked by someone else
467 $self->lock_collection($username, $collect);
468
469 $self->_set_index_metadata(@_);
470
471 # Release the lock once it is done
472 $self->unlock_collection($username, $collect);
473}
474
475# call this to set the metadata for a combination of dirs archives, import or index, or live
476# if none specified, defaults to index which was the original behaviour of set_metadata.
477sub set_metadata
478{
479 my $self = shift @_;
480
481 # Testing that not defining a variable, setting it to "" or to " " all return false
482 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
483
484 my $where = $self->{'where'};
485 if(!$where || ($where =~ m/^\s*$/)) {
486 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
487 return;
488 }
489
490 # authenticate and lock collection once, even if processing multiple dirs
491 my $username = $self->{'username'};
492 my $collect = $self->{'collect'};
493 my $gsdl_cgi = $self->{'gsdl_cgi'};
494
495 if ($baseaction::authentication_enabled) {
496 # Ensure the user is allowed to edit this collection
497 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
498 }
499
500 if($where =~ m/index/) {
501 my $site = $self->{'site'};
502 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
503 $gsdl_cgi->checked_chdir($collect_dir);
504 }
505
506 # Make sure the collection isn't locked by someone else
507 $self->lock_collection($username, $collect);
508
509
510 # now at last can set the metadata. $where can specify multiple
511 # $where is of the form: import|archives|index, or a subset thereof
512
513 #my @whichdirs = split('\|', $where);
514
515 # just check whether $where contains import/archives/index/live in turn, and
516 # for each case, process it accordingly
517 if($where =~ m/import/) {
518 $self->_set_import_metadata(@_);
519 }
520
521 if($where =~ m/archives/) {
522
523 # look up docID arg which is optional to set_metadata because it's optional
524 # to set_import, but which is compulsory to set_archives_metadata
525 my $docid = $self->{'d'};
526 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
527 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
528 }
529 # we have a docid, so can set archives meta
530 $self->_set_archives_metadata(@_);
531 }
532
533 if($where =~ m/index/) {
534
535 # look up docID arg which is optional to set_metadata because it's optional
536 # to set_import, but which is compulsory to set_archives_metadata
537 my $docid = $self->{'d'};
538 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
539 $gsdl_cgi->generate_error("No docid (d=...) specified.");
540 }
541 # we have a docid, so can set index meta
542 $self->_set_index_metadata(@_);
543 }
544
545 if($where =~ m/live/) {
546 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
547 }
548
549 # Release the lock once it is done
550 $self->unlock_collection($username, $collect);
551}
552
553sub set_metadata_array
554{
555 my $self = shift @_;
556
557 my $where = $self->{'where'};
558 if(!$where || ($where =~ m/^\s*$/)) {
559 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
560 return;
561 }
562
563 my $username = $self->{'username'};
564 my $collect = $self->{'collect'};
565 my $gsdl_cgi = $self->{'gsdl_cgi'};
566
567 if ($baseaction::authentication_enabled) {
568 # Ensure the user is allowed to edit this collection
569 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
570 }
571
572 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
573 # But including the stmt during this code reorganisation to preserve as-is what used to happen
574 my $site = $self->{'site'};
575 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
576 $gsdl_cgi->checked_chdir($collect_dir);
577
578 # Make sure the collection isn't locked by someone else
579 $self->lock_collection($username, $collect);
580
581 if($where =~ m/import/) {
582 $self->_set_import_metadata_array(@_);
583 }
584 if($where =~ m/archives/) {
585 $self->_set_archives_metadata_array(@_);
586 }
587 if($where =~ m/index/) {
588 $self->_set_index_metadata_array(@_);
589 }
590 if($where =~ m/live/) {
591 $self->_set_live_metadata_array(@_);
592 }
593
594 # Release the lock once it is done
595 $self->unlock_collection($username, $collect);
596}
597
598sub _set_index_metadata_array
599{
600 my $self = shift @_;
601
602 my $collect = $self->{'collect'};
603 my $gsdl_cgi = $self->{'gsdl_cgi'};
604 my $site = $self->{'site'};
605 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
606
607
608 # look up additional args
609
610 my $infodbtype = $self->{'infodbtype'};
611
612 my $json_str = $self->{'json'};
613 my $doc_array = decode_json $json_str;
614
615
616 my $global_status = 0;
617 my $global_mess = "";
618
619 my @all_docids = ();
620
621 foreach my $doc_array_rec ( @$doc_array ) {
622
623 my $status = -1;
624 my $docid = $doc_array_rec->{'docid'};
625
626 push(@all_docids,$docid);
627
628 my $metaname = $doc_array_rec->{'metaname'};
629 if(defined $metaname) {
630 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
631 my $metavalue = $doc_array_rec->{'metavalue'};
632 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
633
634 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
635 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
636 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
637
638 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
639 $metaname = $metatable_rec->{'metaname'};
640 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
641 my $metapos = undef;
642 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
643
644 foreach my $metavalue ( @$metavals ) { # metavals is an array
645 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
646 if($metamode eq "override") { # now, having overridden the metavalue for the first,
647 # need to accumulate subsequent metavals for this metaname, else the just-assigned
648 # metavalue for this metaname will be lost
649 $metamode = "accumulate";
650 }
651 }
652 }
653 }
654
655 if ($status != 0) {
656 # Catch error if set infodb entry failed
657 $global_status = $status;
658 $global_mess .= "Failed to set metadata key: $docid\n";
659 $global_mess .= "Exit status: $status\n";
660 $global_mess .= "System Error Message: $!\n";
661 $global_mess .= "-" x 20;
662 }
663 }
664
665 if ($global_status != 0) {
666 $global_mess .= "PATH: $ENV{'PATH'}\n";
667 $gsdl_cgi->generate_error($global_mess);
668 }
669 else {
670 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
671 $gsdl_cgi->generate_ok_message($mess);
672 }
673}
674
675sub set_index_metadata_array
676{
677 my $self = shift @_;
678
679 my $username = $self->{'username'};
680 my $collect = $self->{'collect'};
681 my $gsdl_cgi = $self->{'gsdl_cgi'};
682# my $gsdlhome = $self->{'gsdlhome'};
683
684 if ($baseaction::authentication_enabled) {
685 # Ensure the user is allowed to edit this collection
686 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
687 }
688
689 my $site = $self->{'site'};
690 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
691
692 $gsdl_cgi->checked_chdir($collect_dir);
693
694 # Obtain the collect dir
695 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
696
697 # Make sure the collection isn't locked by someone else
698 $self->lock_collection($username, $collect);
699
700 $self->_set_index_metadata_array(@_);
701
702 # Release the lock once it is done
703 $self->unlock_collection($username, $collect);
704}
705
706# experimental, newly added in and untested
707sub _set_live_metadata_array
708{
709 my $self = shift @_;
710
711 my $collect = $self->{'collect'};
712 my $gsdl_cgi = $self->{'gsdl_cgi'};
713
714 my $site = $self->{'site'};
715 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
716
717
718 # look up additional args
719 my $infodbtype = $self->{'infodbtype'};
720 # To people who know $collect_tail please add some comments
721 # Obtain path to the database
722 my $collect_tail = $collect;
723 $collect_tail =~ s/^.*[\/|\\]//;
724 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
725 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
726
727
728 my $json_str = $self->{'json'};
729 my $doc_array = decode_json $json_str;
730
731
732 my $global_status = 0;
733 my $global_mess = "";
734
735 my @all_docids = ();
736
737
738 foreach my $doc_array_rec ( @$doc_array ) {
739
740 my $status = -1;
741 my $docid = $doc_array_rec->{'docid'};
742
743 push(@all_docids,$docid);
744
745 my $metaname = $doc_array_rec->{'metaname'};
746 if(defined $metaname) {
747 my $dbkey = "$docid.$metaname";
748 my $metavalue = $doc_array_rec->{'metavalue'};
749
750 # Set the new value
751 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
752 $status = system($cmd);
753
754 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
755 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
756 foreach my $metatable_rec ( @$metatable ) {
757 $metaname = $metatable_rec->{'metaname'};
758 my $dbkey = "$docid.$metaname";
759
760 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
761 foreach my $metavalue ( @$metavals ) {
762 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
763 $status = system($cmd);
764 }
765 }
766
767 }
768
769 if ($status != 0) {
770 # Catch error if gdbmget failed
771 $global_status = $status;
772 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
773 $global_mess .= "Exit status: $status\n";
774 $global_mess .= "System Error Message: $!\n";
775 $global_mess .= "-" x 20;
776 }
777 }
778
779 if ($global_status != 0) {
780 $global_mess .= "PATH: $ENV{'PATH'}\n";
781 $gsdl_cgi->generate_error($global_mess);
782 }
783 else {
784 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
785 $gsdl_cgi->generate_ok_message($mess);
786 }
787}
788
789sub set_live_metadata_array
790{
791 my $self = shift @_;
792
793 my $username = $self->{'username'};
794 my $collect = $self->{'collect'};
795 my $gsdl_cgi = $self->{'gsdl_cgi'};
796
797 if ($baseaction::authentication_enabled) {
798 # Ensure the user is allowed to edit this collection
799 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
800 }
801
802 my $site = $self->{'site'};
803 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
804
805 $gsdl_cgi->checked_chdir($collect_dir);
806
807 # Make sure the collection isn't locked by someone else
808 $self->lock_collection($username, $collect);
809
810 $self->_set_live_metadata_array(@_);
811
812 # Release the lock once it is done
813 $self->unlock_collection($username, $collect);
814}
815
816
817sub dxml_metadata
818{
819 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
820 my $metaname = $parser->{'parameters'}->{'metaname'};
821 my $metamode = $parser->{'parameters'}->{'metamode'};
822
823###!!! print STDERR "**** Processing closing </Metadata> tag\n";
824
825 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
826
827 # Find the right metadata tag and checks if we are going to
828 # override it
829 #
830 # Note: This over writes the first metadata block it
831 # encountered. If there are multiple Sections in the doc.xml, it
832 # might not behave as you would expect
833
834 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
835## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n" if (defined $opt_doc_secnum);
836## print STDERR "**** metamode = $metamode\n";
837
838 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
839 {
840 my $name_attr = $attrHash->{'name'};
841 # print STDOUT "*** testing: $name_attr eq $metaname ? and $metamode eq override ?\n";
842 if (($name_attr eq $metaname) && ($metamode eq "override"))
843 {
844 if (!defined $parser->{'parameters'}->{'poscount'})
845 {
846 $parser->{'parameters'}->{'poscount'} = 0;
847 }
848 else
849 {
850 $parser->{'parameters'}->{'poscount'}++;
851 }
852
853 if (defined $parser->{'parameters'}->{'metapos'}) { # if pos defined, ignore any prevmeta if also defined, and just wait for pos match
854
855 if ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}) {
856 ###print STDERR "#### got pos match!!\n";
857 # Get the value and override the current value
858 my $metavalue = $parser->{'parameters'}->{'metavalue'};
859 $attrHash->{'_content'} = $metavalue;
860
861 print STDERR "**** WARNING, NOT SURE IF IT'S ALWAYS THE RIGHT TO DO, but setting metamode to override as requested";
862
863 $attrHash->{'mode'} = $metamode; # if it was set to accumulate the first time this meta was added, make sure it is set to override now
864
865 # Don't want it to wipe out any other pieces of metadata
866 $parser->{'parameters'}->{'metamode'} = "done";
867 }
868 }
869 elsif (defined $parser->{'parameters'}->{'prevmetavalue'}) { # if no pos defined but prevmeta defined, then wait for prevmeta match
870 if ($parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'}) {
871 ###print STDERR "### prev meta value matches\n";
872 my $metavalue = $parser->{'parameters'}->{'metavalue'};
873 $attrHash->{'_content'} = $metavalue;
874
875 print STDERR "**** WARNING, NOT SURE IF IT'S ALWAYS THE RIGHT TO DO, but setting metamode to override as requested";
876 $attrHash->{'mode'} = $metamode; # if it was set to accumulate the first time this meta was added, make sure it is set to override now
877
878 $parser->{'parameters'}->{'metamode'} = "done";
879
880 }
881 }
882 # Note that before commit rev 32076, if neither metapos nor prevmeta defined, then code tried to set pos 0 for that metaname to the new value
883 # However that is not how the code works now (and couldn't get it to work that way yet after rev 32076).
884 # What it does now, and what Dr Bainbridge approved of is that:
885 # If no pos or prevmeta defined, then on override, the metavalue set for the specificed metaname becomes the ONLY instance of that metaname
886 # So any earlier instances of that metaname is removed and replaced by the single new instance.
887 }
888 }
889
890 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
891 return [$tagname => $attrHash];
892}
893
894# This method exists purely for catching invalid section numbers that the client
895# requested to edit. Once the parser has reached the end (the final </Archive> tag),
896# we've seen all the Sections in the doc.xml, and none of their section nums matched
897# if the metamode has not been set to 'done' by then.
898sub dxml_archive
899{
900 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
901 my $metamode = $parser->{'parameters'}->{'metamode'};
902
903 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
904 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
905
906 # print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
907
908 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
909 #print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
910
911 my $metaname = $parser->{'parameters'}->{'metaname'};
912 my $metavalue = $parser->{'parameters'}->{'metavalue'};
913
914 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
915 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
916 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
917 print STDERR "\tin section $opt_doc_secnum.\n";
918 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
919 }
920
921 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
922 return [$tagname => $attrHash];
923}
924
925sub dxml_description
926{
927 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
928 my $metamode = $parser->{'parameters'}->{'metamode'};
929
930 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
931 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
932
933###!!! print STDERR "**** Processing a closing </Description> tag \n";
934 # print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
935
936 # Accumulate the metadata
937
938 # We'll be accumulating metadata at this point if we haven't found and therefore
939 # haven't processed the metadata yet.
940 # For subsections, this means that if we're at a matching subsection, but haven't
941 # found the correct metaname to override in that subsection, we accumulate it as new
942 # meta in the subsection by adding it to the current description.
943 # If there's no subsection info for the metadata, it will accumulate at the top level
944 # section description if we hadn't found a matching metaname to override at this point.
945
946 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
947 # at the toplevel section. In the latter case, it means we want to process meta in the
948 # toplevel section. So the eq check between the values below will work in all cases.
949
950 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
951 # this document has been provided. In that case, the metadata for that opt_doc_secnum
952 # won't get attached/accumulated to any part of the doc, not even its top-level section.
953
954 if ($curr_secnum eq $opt_doc_secnum
955 && ($metamode eq "accumulate" || $metamode eq "override")) {
956 # note, metamode would have been set to done if a metadata operation had already succeeded.
957 # If we get here, we still have the metadata to append.
958 if ($metamode eq "override") {
959 print "Got to end of <Description> block. No metadata value to override. Switching 'metamode' to accumulate\n";
960 }
961
962 # If we get to here and metamode is override, this means there
963 # was no existing value to overide => treat as an append operation
964
965 # Tack a new metadata tag on to the end of the <Metadata>+ block
966 my $metaname = $parser->{'parameters'}->{'metaname'};
967 my $metavalue = $parser->{'parameters'}->{'metavalue'};
968
969 my $metadata_attr = {
970 '_content' => $metavalue,
971 'name' => $metaname,
972 'mode' => "accumulate"
973 };
974
975 my $append_metadata = [ "Metadata" => $metadata_attr ];
976 my $description_content = $attrHash->{'_content'};
977 print "Appending metadata to doc.xml\n";
978
979 if (ref($description_content)) {
980 # got some existing interesting nested content
981 push(@$description_content, " ", $append_metadata, "\n ");
982 }
983 else {
984 #description_content is most likely a string such as "\n"
985 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
986 }
987
988 $parser->{'parameters'}->{'metamode'} = "done";
989 }
990 else {
991 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
992## print STDERR "**** NOT ACCUMULATE?!? \n";
993 }
994 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
995 return [$tagname => $attrHash];
996}
997
998sub edit_xml_file
999{
1000 my $self = shift @_;
1001 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1002
1003 # use XML::Rules to add it in (read in and out again)
1004 my $parser = XML::Rules->new(start_rules => $start_rules,
1005 rules => $rules,
1006 style => 'filter',
1007 output_encoding => 'utf8' );
1008
1009 my $xml_in = "";
1010 if (!open(MIN,"<$filename")) {
1011 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1012 }
1013 else {
1014 # Read all the text in
1015 my $line;
1016 while (defined ($line=<MIN>)) {
1017 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1018 }
1019 close(MIN);
1020
1021 my $MOUT;
1022 if (!open($MOUT,">$filename")) {
1023 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1024 }
1025 else {
1026 # Matched lines will get handled by the call backs
1027## my $xml_out = "";
1028
1029 binmode($MOUT,":utf8");
1030 $parser->filter($xml_in,$MOUT, $options);
1031
1032# binmode(MOUT,":utf8");
1033# print MOUT $xml_out;
1034 close($MOUT);
1035 }
1036 }
1037
1038 # copy across any custom error information that was stored during parsing
1039 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1040}
1041
1042sub edit_doc_xml
1043{
1044 my $self = shift @_;
1045 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1046
1047 my $info_mess = <<RAWEND;
1048****************************
1049 edit_doc_xml()
1050****************************
1051RAWEND
1052
1053 $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1054 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1055 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1056 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1057 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1058 $info_mess .= " opt_secnum = $opt_secnum\n" if defined($opt_secnum);
1059 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1060
1061 $info_mess .= "****************************\n";
1062
1063 $gsdl_cgi->generate_message($info_mess);
1064
1065 # To monitor which section/subsection number we are in
1066 my @start_rules =
1067 ( 'Section' => \&dxml_start_section );
1068
1069 # use XML::Rules to add it in (read in and out again)
1070 # Set the call back functions
1071 my @rules =
1072 ( _default => 'raw',
1073 'Metadata' => \&dxml_metadata,
1074 'Description' => \&dxml_description,
1075 'Archive' => \&dxml_archive); # just for catching errors at end
1076
1077 # Sets the parameters
1078 my $options = { 'metaname' => $metaname,
1079 'metapos' => $metapos,
1080 'metavalue' => $metavalue,
1081 'metamode' => $metamode,
1082 'prevmetavalue' => $prevmetavalue };
1083
1084 if (defined $opt_secnum) {
1085 $options->{'secnum'} = $opt_secnum;
1086 }
1087
1088 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1089}
1090
1091sub set_archives_metadata_entry
1092{
1093 my $self = shift @_;
1094 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1095
1096 my $info_mess = <<RAWEND;
1097****************************
1098 set_archives_metadata_entry()
1099****************************
1100RAWEND
1101
1102 $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1103 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1104 $info_mess .= " collect = $collect\n" if defined($collect);
1105 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
1106 $info_mess .= " docid = $docid\n" if defined($docid);
1107 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1108 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1109 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1110 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1111 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1112
1113 $info_mess .= "****************************\n";
1114
1115 $gsdl_cgi->generate_message($info_mess);
1116
1117 # Obtain the doc.xml path for the specified docID
1118 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1119
1120 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1121 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1122 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1123
1124 # The $doc_xml_file is relative to the archives, and now let's get the full path
1125 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1126 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1127
1128 # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
1129 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1130 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1131 if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
1132 print "override mode with no pos or prev value set. Removing all existing values for $metaname, and setting mode to accumulate for the new value.\n";
1133 # remove all values of $metaname metadata
1134 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
1135 $metamode = "accumulate";
1136 }
1137 # Edit the doc.xml file with the specified metadata name, value and position.
1138 # TODO: there is a potential problem here as this edit_doc_xml function
1139 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1140 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1141
1142 # dxml_metadata method ignores metapos if metamode anything other than override
1143 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1144 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1145
1146 # return 0; # return 0 for now to indicate no error
1147 return (defined $self->{'error_msg'}) ? 1 : 0;
1148}
1149
1150
1151sub set_archives_metadata
1152{
1153 my $self = shift @_;
1154
1155 my $username = $self->{'username'};
1156 my $collect = $self->{'collect'};
1157 my $gsdl_cgi = $self->{'gsdl_cgi'};
1158
1159 if ($baseaction::authentication_enabled) {
1160 # Ensure the user is allowed to edit this collection
1161 $self->authenticate_user($username, $collect);
1162 }
1163
1164 # Make sure the collection isn't locked by someone else
1165 $self->lock_collection($username, $collect);
1166
1167 $self->_set_archives_metadata(@_);
1168
1169 # Release the lock once it is done
1170 $self->unlock_collection($username, $collect);
1171}
1172
1173sub _set_archives_metadata_array
1174{
1175 my $self = shift @_;
1176
1177 my $collect = $self->{'collect'};
1178 my $gsdl_cgi = $self->{'gsdl_cgi'};
1179 my $site = $self->{'site'};
1180 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1181
1182 # look up additional args
1183
1184 my $infodbtype = $self->{'infodbtype'};
1185
1186 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1187
1188 my $json_str = $self->{'json'};
1189 my $doc_array = decode_json $json_str;
1190
1191
1192 my $global_status = 0;
1193 my $global_mess = "";
1194
1195 my @all_docids = ();
1196
1197 foreach my $doc_array_rec ( @$doc_array ) {
1198 my $status = -1;
1199 my $docid = $doc_array_rec->{'docid'};
1200
1201 push(@all_docids,$docid);
1202
1203 my $metaname = $doc_array_rec->{'metaname'};
1204 if(defined $metaname) {
1205
1206 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1207
1208 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1209 my $metavalue = $doc_array_rec->{'metavalue'};
1210 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1211
1212
1213 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1214 # make "accumulate" the default (less destructive, as it won't actually
1215 # delete any existing values)
1216 $metamode = "accumulate";
1217 }
1218
1219 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1220 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1221 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1222 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1223
1224 foreach my $metatable_rec ( @$metatable ) {
1225 $metaname = $metatable_rec->{'metaname'};
1226 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1227 my $metapos = undef;
1228 my $prevmetavalue = undef;
1229 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1230
1231 foreach my $metavalue ( @$metavals ) {
1232 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1233 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1234
1235 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1236 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1237 # metavalue for this metaname will be lost
1238 $metamode = "accumulate";
1239 }
1240 }
1241 }
1242 }
1243
1244 if ($status != 0) {
1245 # Catch error if set infodb entry failed
1246 $global_status = $status;
1247 $global_mess .= "Failed to set metadata key: $docid\n";
1248 $global_mess .= "Exit status: $status\n";
1249 $global_mess .= "System Error Message: $!\n";
1250 $global_mess .= "-" x 20 . "\n";
1251 }
1252 }
1253
1254 if ($global_status != 0) {
1255 $global_mess .= "PATH: $ENV{'PATH'}\n";
1256 $gsdl_cgi->generate_error($global_mess);
1257 }
1258 else {
1259 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1260 $gsdl_cgi->generate_ok_message($mess);
1261 }
1262}
1263
1264sub set_archives_metadata_array
1265{
1266 my $self = shift @_;
1267
1268 my $username = $self->{'username'};
1269 my $collect = $self->{'collect'};
1270 my $gsdl_cgi = $self->{'gsdl_cgi'};
1271# my $gsdlhome = $self->{'gsdlhome'};
1272
1273 if ($baseaction::authentication_enabled) {
1274 # Ensure the user is allowed to edit this collection
1275 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1276 }
1277
1278 my $site = $self->{'site'};
1279 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1280
1281 $gsdl_cgi->checked_chdir($collect_dir);
1282
1283 # Obtain the collect dir
1284 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1285
1286 # Make sure the collection isn't locked by someone else
1287 $self->lock_collection($username, $collect);
1288
1289 $self->_set_archives_metadata_array(@_);
1290
1291 # Release the lock once it is done
1292 $self->unlock_collection($username, $collect);
1293}
1294
1295sub _remove_archives_metadata
1296{
1297 my $self = shift @_;
1298
1299 my $collect = $self->{'collect'};
1300 my $gsdl_cgi = $self->{'gsdl_cgi'};
1301# my $gsdlhome = $self->{'gsdlhome'};
1302 my $infodbtype = $self->{'infodbtype'};
1303
1304 my $site = $self->{'site'};
1305
1306 # Obtain the collect and archive dir
1307 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1308
1309 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1310
1311 # look up additional args
1312 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1313
1314 my $metaname = $self->{'metaname'};
1315 my $metapos = $self->{'metapos'};
1316 my $metavalue = $self->{'metavalue'};
1317
1318 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1319 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
1320
1321 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1322 # default to deleting the first metavalue for the given metaname
1323 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1324 # seem to get deleted in one single remove_archives_meta action invocation.
1325 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1326 # deleted, both get deleted.
1327 if(!defined $metapos && !defined $metavalue) {
1328 $metapos = 0;
1329 }
1330
1331 my $metamode = $self->{'metamode'};
1332 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
1333
1334 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1335 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1336
1337 # This now stores the full pathname
1338 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1339
1340 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1341# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1342
1343 if ($status == 0)
1344 {
1345 my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
1346 $mess .= " $metaname";
1347 $mess .= "->[$metapos]" if (defined $metapos);
1348 $mess .= " ($metavalue)" if (defined $metavalue);
1349 $gsdl_cgi->generate_ok_message($mess);
1350 }
1351 else
1352 {
1353 my $mess .= "Failed to remove archives metadata key: $docid\n";
1354 $mess .= "Exit status: $status\n";
1355 $mess .= "System Error Message: $!\n";
1356 $mess .= "-" x 20 . "\n";
1357
1358 $gsdl_cgi->generate_error($mess);
1359 }
1360
1361 #return $status; # in case calling functions have a use for this
1362}
1363
1364sub remove_archives_metadata
1365{
1366 my $self = shift @_;
1367
1368 my $username = $self->{'username'};
1369 my $collect = $self->{'collect'};
1370 my $gsdl_cgi = $self->{'gsdl_cgi'};
1371
1372 if ($baseaction::authentication_enabled)
1373 {
1374 # Ensure the user is allowed to edit this collection
1375 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1376 }
1377
1378 # Make sure the collection isn't locked by someone else
1379 $self->lock_collection($username, $collect);
1380
1381 $self->_remove_archives_metadata(@_);
1382
1383 # Release the lock once it is done
1384 $self->unlock_collection($username, $collect);
1385}
1386
1387sub remove_from_doc_xml
1388{
1389 my $self = shift @_;
1390 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1391
1392 my @start_rules = ('Section' => \&dxml_start_section);
1393
1394 # Set the call-back functions for the metadata tags
1395 my @rules =
1396 (
1397 _default => 'raw',
1398 'Metadata' => \&rfdxml_metadata
1399 );
1400
1401 my $parser = XML::Rules->new
1402 (
1403 start_rules => \@start_rules,
1404 rules => \@rules,
1405 style => 'filter',
1406 output_encoding => 'utf8',
1407# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1408 stripspaces => 2|0|0 # ineffectual
1409 );
1410
1411 my $status = 0;
1412 my $xml_in = "";
1413 if (!open(MIN,"<$doc_xml_filename"))
1414 {
1415 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1416 $status = 1;
1417 }
1418 else
1419 {
1420 # Read them in
1421 my $line;
1422 while (defined ($line=<MIN>)) {
1423 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1424 }
1425 close(MIN);
1426
1427 # Filter with the call-back functions
1428 my $xml_out = "";
1429
1430 my $MOUT;
1431 if (!open($MOUT,">$doc_xml_filename")) {
1432 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1433 $status = 1;
1434 }
1435 else {
1436 binmode($MOUT,":utf8");
1437 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1438 close($MOUT);
1439 }
1440 }
1441 return $status;
1442}
1443
1444sub rfdxml_metadata
1445{
1446 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1447
1448 # For comparisons, toplevel section is indicated by ""
1449 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1450 my $secid = $parser->{'parameters'}->{'secid'} || "";
1451
1452 if (!($secid eq $curr_sec_num))
1453 {
1454 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1455 return [$tagname => $attrHash];
1456 }
1457
1458 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1459 {
1460 if (!defined $parser->{'parameters'}->{'poscount'})
1461 {
1462 $parser->{'parameters'}->{'poscount'} = 0;
1463 }
1464 else
1465 {
1466 $parser->{'parameters'}->{'poscount'}++;
1467 }
1468
1469 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
1470 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {
1471 return [];
1472 }
1473
1474 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1475 {
1476 return [];
1477 }
1478
1479 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1480 {
1481 return [];
1482 }
1483 }
1484
1485 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1486 return [$tagname => $attrHash];
1487}
1488
1489sub mxml_metadata
1490{
1491 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1492 my $metaname = $parser->{'parameters'}->{'metaname'};
1493 my $metamode = $parser->{'parameters'}->{'metamode'};
1494
1495 # Report error if we don't see FileName tag before this
1496 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1497
1498 # Don't do anything if we are not in the right FileSet
1499 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1500 if ($file_regexp =~ /\.\*/) {
1501 # Only interested in a file_regexp if it specifies precisely one
1502 # file.
1503 # So, skip anything with a .* in it as it is too general
1504## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
1505 return [$tagname => $attrHash];
1506 }
1507 my $src_file = $parser->{'parameters'}->{'src_file'};
1508 if (!($src_file =~ /$file_regexp/)) {
1509 return [$tagname => $attrHash];
1510 }
1511## print STDERR "*** mxl metamode = $metamode\n";
1512
1513 # Find the right metadata tag and checks if we are going to override it
1514 my $name_attr = $attrHash->{'name'};
1515 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1516
1517 # now metadata.xml functions need to keep track of metapos
1518 if (!defined $parser->{'parameters'}->{'poscount'})
1519 {
1520 $parser->{'parameters'}->{'poscount'} = 0;
1521 }
1522 else
1523 {
1524 $parser->{'parameters'}->{'poscount'}++;
1525 }
1526
1527 # If either the metapos or prevmetavalue is set,
1528 # get the value and override the current value
1529 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1530
1531 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1532 {
1533 $attrHash->{'_content'} = $metavalue;
1534
1535 ## print STDERR "**** overriding metadata.xml\n";
1536
1537 # Don't want it to wipe out any other pieces of metadata
1538 $parser->{'parameters'}->{'metamode'} = "done";
1539 }
1540 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1541 {
1542 $attrHash->{'_content'} = $metavalue;
1543 $parser->{'parameters'}->{'metamode'} = "done";
1544 }
1545 }
1546
1547 # mxml_description will process the metadata if metadata is accumulate,
1548 # or if we haven't found the metadata to override
1549
1550 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1551 return [$tagname => $attrHash];
1552}
1553
1554
1555sub mxml_description
1556{
1557 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1558 my $metamode = $parser->{'parameters'}->{'metamode'};
1559
1560 # Failed... Report error if we don't see FileName tag before this
1561 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1562
1563 # Don't do anything if we are not in the right FileSet
1564 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1565 if ($file_regexp =~ m/\.\*/) {
1566 # Only interested in a file_regexp if it specifies precisely one
1567 # file.
1568 # So, skip anything with a .* in it as it is too general
1569 return [$tagname => $attrHash];
1570 }
1571 my $src_file = $parser->{'parameters'}->{'src_file'};
1572
1573 if (!($src_file =~ m/$file_regexp/)) {
1574 return [$tagname => $attrHash];
1575 }
1576
1577 # Accumulate the metadata block to the end of the description block
1578 # Note: This adds metadata block to all description blocks, so if there are
1579 # multiple FileSets, it will add to all of them
1580 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1581
1582 # if metamode was "override" but get to here then it failed to
1583 # find an item to override, in which case it should append its
1584 # value to the end, just like the "accumulate" mode
1585
1586 if ($metamode eq "override") {
1587 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1588 }
1589
1590 # tack a new metadata tag on to the end of the <Metadata>+ block
1591 my $metaname = $parser->{'parameters'}->{'metaname'};
1592 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1593
1594 my $metadata_attr = { '_content' => $metavalue,
1595 'name' => $metaname,
1596 'mode' => "accumulate" };
1597
1598 my $append_metadata = [ "Metadata" => $metadata_attr ];
1599 my $description_content = $attrHash->{'_content'};
1600
1601## print STDERR "*** appending to metadata.xml\n";
1602
1603 # append the new metadata element to the end of the current
1604 # content contained inside this tag
1605 if (ref($description_content) eq "") {
1606 # => string or numeric literal
1607 # this is caused by a <Description> block has no <Metadata> child elements
1608 # => set up an empty array in '_content'
1609 $attrHash->{'_content'} = [ "\n" ];
1610 $description_content = $attrHash->{'_content'};
1611 }
1612
1613 push(@$description_content, " ", $append_metadata, "\n ");
1614 $parser->{'parameters'}->{'metamode'} = "done";
1615 }
1616
1617 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1618 return [$tagname => $attrHash];
1619}
1620
1621
1622sub mxml_fileset
1623{
1624 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1625
1626 # Initilise the current_file
1627 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1628 # FileName tag must come before Description tag
1629 $parser->{'parameters'}->{'current_file'} = "";
1630
1631 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1632 return [$tagname => $attrHash];
1633}
1634
1635sub mxml_directorymetadata
1636{
1637 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1638
1639 # if we haven't processed the metadata when we reach the end of metadata.xml
1640 # it's because there's no particular FileSet element whose FileName matched
1641 # In which case, add a new FileSet for this FileName
1642 my $metamode = $parser->{'parameters'}->{'metamode'};
1643 if($metamode ne "done") {
1644
1645 if ($metamode eq "override") {
1646 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1647 }
1648
1649 # If we get to here and metamode is override, this means there
1650 # was no existing value to overide => treat as an append operation
1651
1652 # Create a new FileSet element and append to DirectoryMetadata
1653 # <FileSet>
1654 # <FileName>src_file</FileName>
1655 # <Description>
1656 # <Metadata mode="" name="">metavalue</Metadata>
1657 # </Description>
1658 # </FileSet>
1659 my $src_file = $parser->{'parameters'}->{'src_file'};
1660 my $metaname = $parser->{'parameters'}->{'metaname'};
1661 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1662 my $metadata_attr = {
1663 '_content' => $metavalue,
1664 'name' => $metaname,
1665 'mode' => "accumulate"
1666 };
1667 my $append_metadata = [ "Metadata" => $metadata_attr ];
1668 my $description_attr = { '_content' => [ "\n\t\t ", $append_metadata, "\n\t\t"] };
1669 my $description_element = [ "Description" => $description_attr ];
1670
1671 #_content is not an attribute, it's special and holds the children of this element
1672 # including the textnode value embedded in this element if any.
1673 my $filename_attr = {'_content' => $src_file};
1674 my $filename_element = [ "FileName" => $filename_attr ];
1675
1676 my $fileset_attr = {};
1677 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
1678 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
1679
1680
1681 # get children of dirmeta, and push the new FileSet element onto it
1682 print "Appending metadata to metadata.xml\n";
1683 my $dirmeta_content = $attrHash->{'_content'};
1684 if (ref($dirmeta_content)) {
1685 # got some existing interesting nested content
1686 #push(@$dirmeta_content, " ", $fileset ,"\n ");
1687 push(@$dirmeta_content, "\t", $fileset ,"\n");
1688 }
1689 else {
1690 #description_content is most likely a string such as "\n"
1691 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
1692 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
1693 }
1694
1695 $parser->{'parameters'}->{'metamode'} = "done";
1696 }
1697 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1698 return [$tagname => $attrHash];
1699}
1700
1701
1702sub edit_metadata_xml
1703{
1704 my $self = shift @_;
1705 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
1706
1707 # Set the call-back functions for the metadata tags
1708 my @rules =
1709 ( _default => 'raw',
1710 'FileName' => \&mxml_filename,
1711 'Metadata' => \&mxml_metadata,
1712 'Description' => \&mxml_description,
1713 'FileSet' => \&mxml_fileset,
1714 'DirectoryMetadata' => \&mxml_directorymetadata);
1715
1716 # use XML::Rules to add it in (read in and out again)
1717 my $parser = XML::Rules->new(rules => \@rules,
1718 style => 'filter',
1719 output_encoding => 'utf8',
1720 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1721
1722 if (!-e $metadata_xml_filename) {
1723
1724 if (open(MOUT,">$metadata_xml_filename")) {
1725
1726 my $src_file_re = &util::filename_to_regex($src_file);
1727 # shouldn't the following also be in the above utility routine??
1728 # $src_file_re =~ s/\./\\./g;
1729
1730 print MOUT "<?xml version=\"1.0\"?>\n";
1731 print MOUT "<DirectoryMetadata>\n";
1732 print MOUT " <FileSet>\n";
1733 print MOUT " <FileName>$src_file_re</FileName>\n";
1734 print MOUT " <Description>\n";
1735 print MOUT " </Description>\n";
1736 print MOUT " </FileSet>\n";
1737 print MOUT "</DirectoryMetadata>\n";
1738
1739 close(MOUT);
1740 }
1741 else {
1742 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1743 }
1744 }
1745
1746
1747 my $xml_in = "";
1748 if (!open(MIN,"<$metadata_xml_filename")) {
1749 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1750 }
1751 else {
1752 # Read them in
1753 my $line;
1754 while (defined ($line=<MIN>)) {
1755 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
1756 }
1757 close(MIN);
1758
1759 # Filter with the call-back functions
1760 my $xml_out = "";
1761
1762 my $MOUT;
1763 if (!open($MOUT,">$metadata_xml_filename")) {
1764 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1765 }
1766 else {
1767 binmode($MOUT,":utf8");
1768
1769 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1770 # At the moment, I will just hack it!
1771 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1772 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1773 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1774 #print MOUT $xml_out;
1775
1776 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1777 metapos => $metapos,
1778 metavalue => $metavalue,
1779 metamode => $metamode,
1780 src_file => $src_file,
1781 prevmetavalue => $prevmetavalue,
1782 current_file => undef} );
1783 close($MOUT);
1784 }
1785 }
1786}
1787
1788
1789sub set_import_metadata
1790{
1791 my $self = shift @_;
1792
1793 my $username = $self->{'username'};
1794 my $collect = $self->{'collect'};
1795 my $gsdl_cgi = $self->{'gsdl_cgi'};
1796
1797 if ($baseaction::authentication_enabled) {
1798 # Ensure the user is allowed to edit this collection
1799 $self->authenticate_user($username, $collect);
1800 }
1801
1802 # Make sure the collection isn't locked by someone else
1803 $self->lock_collection($username, $collect);
1804
1805 $self->_set_import_metadata(@_);
1806
1807 # Release the lock once it is done
1808 $self->unlock_collection($username, $collect);
1809
1810}
1811
1812sub set_import_metadata_array
1813{
1814 my $self = shift @_;
1815
1816 my $username = $self->{'username'};
1817 my $collect = $self->{'collect'};
1818 my $gsdl_cgi = $self->{'gsdl_cgi'};
1819# my $gsdlhome = $self->{'gsdlhome'};
1820
1821 if ($baseaction::authentication_enabled) {
1822 # Ensure the user is allowed to edit this collection
1823 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1824 }
1825
1826 my $site = $self->{'site'};
1827 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1828
1829 $gsdl_cgi->checked_chdir($collect_dir);
1830
1831 # Make sure the collection isn't locked by someone else
1832 $self->lock_collection($username, $collect);
1833
1834 $self->_set_import_metadata_array(@_);
1835
1836 # Release the lock once it is done
1837 $self->unlock_collection($username, $collect);
1838
1839}
1840
1841
1842sub _set_import_metadata_array
1843{
1844 my $self = shift @_;
1845
1846 my $collect = $self->{'collect'};
1847 my $gsdl_cgi = $self->{'gsdl_cgi'};
1848
1849 my $site = $self->{'site'};
1850 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1851
1852 # look up additional args
1853
1854 my $infodbtype = $self->{'infodbtype'};
1855
1856 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1857 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1858
1859 my $json_str = $self->{'json'};
1860 my $doc_array = decode_json $json_str;
1861
1862 my $global_status = 0;
1863 my $global_mess = "";
1864
1865 my @all_docids = ();
1866
1867 foreach my $doc_array_rec ( @$doc_array )
1868 {
1869 my $status = -1;
1870 my $docid = $doc_array_rec->{'docid'};
1871
1872 my ($docid_root,$docid_secnum);
1873 if(defined $docid) {
1874 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1875 # as yet no support for setting subsection metadata in metadata.xml
1876 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
1877 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
1878 next; # skip this docid in for loop
1879 }
1880 }
1881
1882 push(@all_docids,$docid); # docid_root rather
1883
1884 my $metaname = $doc_array_rec->{'metaname'};
1885 if (defined $metaname) {
1886 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1887 my $metavalue = $doc_array_rec->{'metavalue'};
1888 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1889
1890 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1891 # make "accumulate" the default (less destructive, as it won't actually
1892 # delete any existing values)
1893 $metamode = "accumulate";
1894 }
1895
1896 # adding metapos and prevmetavalue support to import_metadata subroutines
1897 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1898 my $prevmetavalue = $self->{'prevmetavalue'};
1899
1900 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1901
1902 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1903 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1904
1905 foreach my $metatable_rec ( @$metatable ) {
1906 $metaname = $metatable_rec->{'metaname'};
1907 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1908 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1909 # make "accumulate" the default (less destructive, as it won't actually
1910 # delete any existing values)
1911 $metamode = "accumulate";
1912 }
1913
1914 # No support for metapos and prevmetavalue in the JSON metatable substructure
1915 my $metapos = undef;
1916 my $prevmetavalue = undef;
1917 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1918
1919 foreach my $metavalue ( @$metavals ) {
1920 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1921
1922 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1923 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
1924 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1925 # metavalue for this metaname will be lost
1926 $metamode = "accumulate";
1927 }
1928 }
1929 }
1930 }
1931 }
1932
1933 # always a success message
1934 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1935 $gsdl_cgi->generate_ok_message($mess);
1936}
1937
1938# always returns true (1)
1939sub set_import_metadata_entry
1940{
1941 my $self = shift @_;
1942 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
1943
1944 my $info_mess = <<RAWEND;
1945****************************
1946 set_import_metadata_entry()
1947****************************
1948RAWEND
1949
1950 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1951 $info_mess .= " collect = $collect\n" if defined($collect);
1952 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
1953 $info_mess .= " arcinfo_doc_filename = $arcinfo_doc_filename\n" if defined($arcinfo_doc_filename);
1954 $info_mess .= " docid = $docid\n" if defined($docid);
1955 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1956 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1957 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1958 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1959 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1960
1961 $info_mess .= "****************************\n";
1962
1963 $gsdl_cgi->generate_message($info_mess);
1964
1965 # import works with metadata.xml which can have inherited metadata
1966 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
1967 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
1968 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
1969 # what they're doing if they provide a metapos.
1970 if(defined $metapos) {
1971 print STDERR "@@@@ WARNING: metapos defined.\n";
1972 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
1973 }
1974
1975 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1976 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1977 my $metadata_xml_file;
1978 my $import_filename = undef;
1979
1980 if (defined $docid) {
1981 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1982 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1983
1984 # This now stores the full pathname
1985 $import_filename = $doc_rec->{'src-file'}->[0];
1986 $import_filename = &util::placeholders_to_abspath($import_filename);
1987
1988 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
1989 # as the array version of the method doesn't support the -f parameter yet
1990 my $import_file = $self->{'f'};
1991 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1992 }
1993
1994 # figure out correct metadata.xml file [?]
1995 # Assuming the metadata.xml file is next to the source file
1996 # Note: This will not work if it is using the inherited metadata from the parent folder
1997 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1998 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1999
2000 # If we're overriding everything, then $prevmetavalue=undefined and
2001 # $metamode=override combined with $metapos=undefined
2002 # in which case we need to remove all metavalues for the metaname at the given (sub)section
2003 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2004 if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2005## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2006 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2007
2008 }
2009
2010 # Edit the metadata.xml
2011 # Modified by Jeffrey from DL Consulting
2012 # Handle the case where there is one metadata.xml file for multiple FileSets
2013 # The XML filter needs to know whether it is in the right FileSet
2014 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2015 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2016 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2017 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2018 #return 0;
2019 return $metadata_xml_filename;
2020}
2021
2022sub _remove_import_metadata
2023{
2024 my $self = shift @_;
2025
2026 my $collect = $self->{'collect'};
2027 my $gsdl_cgi = $self->{'gsdl_cgi'};
2028# my $gsdlhome = $self->{'gsdlhome'};
2029 my $infodbtype = $self->{'infodbtype'};
2030
2031 # Obtain the collect dir
2032 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2033 my $site = $self->{'site'};
2034 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2035
2036 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2037 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2038
2039 # look up additional args
2040 my $docid = $self->{'d'};
2041 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2042 {
2043 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2044 }
2045
2046 my $metaname = $self->{'metaname'};
2047 my $metapos = $self->{'metapos'};
2048 my $metavalue = $self->{'metavalue'};
2049
2050 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2051 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/));
2052
2053 if(defined $metavalue) { # metavalue is now a compulsory arg for remove_import_metadata()
2054 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2055 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2056 $metapos = 0;
2057 }
2058 my $metamode = $self->{'metamode'};
2059 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
2060
2061 # import works with metadata.xml which can have inherited metadata
2062 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2063 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2064 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2065 # what they're doing if they provide a metapos.
2066 if(defined $metapos) {
2067 print STDERR "@@@@ WARNING: metapos defined.\n";
2068 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2069 }
2070
2071 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2072 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2073 my $metadata_xml_file;
2074 my $import_filename = undef;
2075 if (defined $docid)
2076 {
2077 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2078 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2079
2080 # This now stores the full pathname
2081 $import_filename = $doc_rec->{'src-file'}->[0];
2082 $import_filename = &util::placeholders_to_abspath($import_filename);
2083 }
2084
2085 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2086 {
2087 $gsdl_cgi->generate_error("There is no metadata\n");
2088 }
2089
2090 # figure out correct metadata.xml file [?]
2091 # Assuming the metadata.xml file is next to the source file
2092 # Note: This will not work if it is using the inherited metadata from the parent folder
2093 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2094 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2095
2096 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2097
2098 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2099 $mess .= " $metaname";
2100 $mess .= " = $metavalue\n";
2101
2102 $gsdl_cgi->generate_ok_message($mess);
2103
2104 #return $status; # in case calling functions have a use for this
2105}
2106
2107sub remove_import_metadata
2108{
2109 my $self = shift @_;
2110
2111 my $username = $self->{'username'};
2112 my $collect = $self->{'collect'};
2113 my $gsdl_cgi = $self->{'gsdl_cgi'};
2114
2115 if ($baseaction::authentication_enabled) {
2116 # Ensure the user is allowed to edit this collection
2117 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2118 }
2119
2120 # Make sure the collection isn't locked by someone else
2121 $self->lock_collection($username, $collect);
2122
2123 $self->_remove_import_metadata(@_);
2124
2125 # Release the lock once it is done
2126 $self->unlock_collection($username, $collect);
2127
2128}
2129
2130sub remove_from_metadata_xml
2131{
2132 my $self = shift @_;
2133 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2134 # metamode generally has no meaning for removing meta, but is used by set_meta
2135 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2136
2137 # Set the call-back functions for the metadata tags
2138 my @rules =
2139 (
2140 _default => 'raw',
2141 'Metadata' => \&rfmxml_metadata,
2142 'FileName' => \&mxml_filename
2143 );
2144
2145 my $parser = XML::Rules->new
2146 (
2147 rules => \@rules,
2148 style => 'filter',
2149 output_encoding => 'utf8',
2150 #normalisespaces => 1,
2151 stripspaces => 2|0|0 # ineffectual
2152 );
2153
2154 my $xml_in = "";
2155 if (!open(MIN,"<$metadata_xml_filename"))
2156 {
2157 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2158 }
2159 else
2160 {
2161 # Read them in
2162 my $line;
2163 while (defined ($line=<MIN>)) {
2164 $xml_in .= $line if($line !~ m/^\s*$/); # preserve all but empty lines
2165 }
2166 close(MIN);
2167
2168 # Filter with the call-back functions
2169 my $xml_out = "";
2170
2171 my $MOUT;
2172 if (!open($MOUT,">$metadata_xml_filename")) {
2173 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2174 }
2175 else {
2176 binmode($MOUT,":utf8");
2177 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2178 close($MOUT);
2179 }
2180 }
2181}
2182
2183sub rfmxml_metadata
2184{
2185 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2186
2187 # metadata.xml does not handle subsections
2188
2189 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2190 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2191 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2192 {
2193 if (!defined $parser->{'parameters'}->{'poscount'})
2194 {
2195 $parser->{'parameters'}->{'poscount'} = 0;
2196 }
2197 else
2198 {
2199 $parser->{'parameters'}->{'poscount'}++;
2200 }
2201
2202 # if overriding but no metapos, then clear all the meta for this metaname
2203 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
2204 return [];
2205 }
2206
2207 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2208 {
2209 return [];
2210 }
2211 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2212 {
2213 return [];
2214 }
2215 }
2216
2217 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2218 return [$tagname => $attrHash];
2219}
2220
2221sub _remove_live_metadata
2222{
2223 my $self = shift @_;
2224
2225 my $collect = $self->{'collect'};
2226 my $gsdl_cgi = $self->{'gsdl_cgi'};
2227# my $gsdlhome = $self->{'gsdlhome'};
2228 my $infodbtype = $self->{'infodbtype'};
2229
2230 # Obtain the collect dir
2231 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2232 my $site = $self->{'site'};
2233 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2234
2235
2236 # look up additional args
2237 my $docid = $self->{'d'};
2238 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2239 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2240 }
2241
2242 # Generate the dbkey
2243 my $metaname = $self->{'metaname'};
2244 my $dbkey = "$docid.$metaname";
2245
2246 # To people who know $collect_tail please add some comments
2247 # Obtain the live gdbm_db path
2248 my $collect_tail = $collect;
2249 $collect_tail =~ s/^.*[\/|\\]//;
2250 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2251 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2252
2253 # Remove the key
2254 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2255 my $status = system($cmd);
2256 if ($status != 0) {
2257 # Catch error if gdbmdel failed
2258 my $mess = "Failed to set metadata key: $dbkey\n";
2259
2260 $mess .= "PATH: $ENV{'PATH'}\n";
2261 $mess .= "cmd = $cmd\n";
2262 $mess .= "Exit status: $status\n";
2263 $mess .= "System Error Message: $!\n";
2264
2265 $gsdl_cgi->generate_error($mess);
2266 }
2267 else {
2268 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2269 }
2270
2271}
2272
2273sub remove_live_metadata
2274{
2275 my $self = shift @_;
2276
2277 my $username = $self->{'username'};
2278 my $collect = $self->{'collect'};
2279 my $gsdl_cgi = $self->{'gsdl_cgi'};
2280 my $gsdlhome = $self->{'gsdlhome'};
2281
2282 if ($baseaction::authentication_enabled) {
2283 # Ensure the user is allowed to edit this collection
2284 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2285 }
2286
2287 # Make sure the collection isn't locked by someone else
2288 $self->lock_collection($username, $collect);
2289
2290 $self->_remove_live_metadata(@_);
2291
2292 $self->unlock_collection($username, $collect);
2293}
2294
2295sub remove_metadata
2296{
2297 my $self = shift @_;
2298
2299 my $where = $self->{'where'};
2300 if(!$where || ($where =~ m/^\s*$/)) {
2301 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2302 return;
2303 }
2304
2305 my $username = $self->{'username'};
2306 my $collect = $self->{'collect'};
2307 my $gsdl_cgi = $self->{'gsdl_cgi'};
2308
2309 if ($baseaction::authentication_enabled) {
2310 # Ensure the user is allowed to edit this collection
2311 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2312 }
2313
2314 # Make sure the collection isn't locked by someone else
2315 $self->lock_collection($username, $collect);
2316
2317 # check which directories need to be processed, specified in $where as
2318 # any combination of import|archives|index|live
2319 if($where =~ m/import/) {
2320 $self->_remove_import_metadata(@_);
2321 }
2322 if($where =~ m/archives/) {
2323 $self->_remove_archives_metadata(@_);
2324 }
2325 if($where =~ m/index/) {
2326 $self->_remove_index_metadata(@_);
2327 }
2328
2329 # Release the lock once it is done
2330 $self->unlock_collection($username, $collect);
2331}
2332
2333# the internal version, without authentication
2334sub _remove_index_metadata
2335{
2336 my $self = shift @_;
2337
2338 my $collect = $self->{'collect'};
2339 my $gsdl_cgi = $self->{'gsdl_cgi'};
2340# my $gsdlhome = $self->{'gsdlhome'};
2341 my $infodbtype = $self->{'infodbtype'};
2342
2343 # Obtain the collect dir
2344 my $site = $self->{'site'};
2345 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2346 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2347
2348 # look up additional args
2349 my $docid = $self->{'d'};
2350 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2351 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2352 }
2353 my $metaname = $self->{'metaname'};
2354 my $metapos = $self->{'metapos'};
2355 my $metavalue = $self->{'metavalue'};
2356
2357 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2358 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
2359
2360 # To people who know $collect_tail please add some comments
2361 # -> In collection groups, I think collect_tailname is the subcollection name,
2362 # e.g. colgroup-name/col-tail-name
2363 # Obtain the path to the database
2364 my $collect_tail = $collect;
2365 $collect_tail =~ s/^.*[\/|\\]//;
2366 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2367 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2368
2369 # Read the docid entry
2370 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2371
2372 # Check to make sure the key does exist
2373 if (!defined ($doc_rec->{$metaname})) {
2374 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2375 }
2376
2377 # Obtain the specified metadata pos
2378 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2379 if(!defined $metapos && !defined $metavalue) {
2380 $metapos = 0;
2381 }
2382
2383
2384 # consider check key is defined before deleting?
2385 # Loop through the metadata array and ignore the specified position
2386 my $filtered_metadata = [];
2387 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2388 for (my $i=0; $i<$num_metadata_vals; $i++) {
2389 my $metaval = shift(@{$doc_rec->{$metaname}});
2390
2391 if (!defined $metavalue && $i != $metapos) {
2392 push(@$filtered_metadata,$metaval);
2393 }
2394
2395 if(defined $metavalue && !($metavalue eq $metaval))
2396 {
2397 push(@$filtered_metadata,$metaval);
2398 }
2399 }
2400 $doc_rec->{$metaname} = $filtered_metadata;
2401
2402 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2403 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2404
2405 if ($status != 0) {
2406 my $mess = "Failed to set metadata key: $docid\n";
2407
2408 $mess .= "PATH: $ENV{'PATH'}\n";
2409 $mess .= "Exit status: $status\n";
2410 $mess .= "System Error Message: $!\n";
2411
2412 $gsdl_cgi->generate_error($mess);
2413 }
2414 else {
2415 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2416 $mess .= " $metaname";
2417 $mess .= "->[$metapos]" if (defined $metapos);
2418 $mess .= " ($metavalue)" if (defined $metavalue);
2419
2420 $gsdl_cgi->generate_ok_message($mess);
2421 }
2422
2423 #return $status; # in case calling functions have a use for this
2424}
2425
2426sub remove_index_metadata
2427{
2428 my $self = shift @_;
2429
2430 my $username = $self->{'username'};
2431 my $collect = $self->{'collect'};
2432 my $gsdl_cgi = $self->{'gsdl_cgi'};
2433# my $gsdlhome = $self->{'gsdlhome'};
2434
2435 if ($baseaction::authentication_enabled) {
2436 # Ensure the user is allowed to edit this collection
2437 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2438 }
2439
2440 # Obtain the collect dir
2441 my $site = $self->{'site'};
2442 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2443 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2444
2445 # Make sure the collection isn't locked by someone else
2446 $self->lock_collection($username, $collect);
2447
2448 $self->_remove_index_metadata(@_);
2449
2450 # Release the lock once it is done
2451 $self->unlock_collection($username, $collect);
2452}
2453
2454
2455# Was trying to reused the codes, but the functions need to be broken
2456# down more before they can be reused, otherwise there will be too
2457# much overhead and duplicate process...
2458sub insert_metadata
2459{
2460 my $self = shift @_;
2461
2462 my $username = $self->{'username'};
2463 my $collect = $self->{'collect'};
2464 my $gsdl_cgi = $self->{'gsdl_cgi'};
2465 my $gsdlhome = $self->{'gsdlhome'};
2466 my $infodbtype = $self->{'infodbtype'};
2467
2468 # If the import metadata and gdbm database have been updated, we
2469 # need to insert some notification to warn user that the the text
2470 # they see at the moment is not indexed and require a rebuild.
2471 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2472
2473 if ($baseaction::authentication_enabled) {
2474 # Ensure the user is allowed to edit this collection
2475 $self->authenticate_user($username, $collect);
2476 }
2477
2478 # Obtain the collect and archive dir
2479 my $site = $self->{'site'};
2480 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2481 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2482 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2483
2484 # Make sure the collection isn't locked by someone else
2485 $self->lock_collection($username, $collect);
2486
2487 # Check additional args
2488 my $docid = $self->{'d'};
2489 if (!defined($docid) || ($docid =~ m/^\s*$/)) {
2490 $gsdl_cgi->generate_error("No document id is specified: d=...");
2491 }
2492 my $metaname = $self->{'metaname'};
2493 if (!defined($metaname) || ($metaname =~ m/^\s*$/)) {
2494 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2495 }
2496 my $metavalue = $self->{'metavalue'};
2497 if (!defined($metavalue) || ($metavalue =~ m/^\s*$/)) {
2498 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2499 }
2500 # make "accumulate" the default (less destructive, as it won't actually
2501 # delete any existing values)
2502 my $metamode = "accumulate";
2503
2504 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
2505 my $metapos = undef;
2506 my $prevmetavalue = undef;
2507
2508 #=======================================================================#
2509 # set_import_metadata [START]
2510 #=======================================================================#
2511 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2512 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2513 my $metadata_xml_file;
2514 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2515 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2516
2517 # This now stores the full pathname
2518 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2519 $import_filename = &util::placeholders_to_abspath($import_filename);
2520
2521 # figure out correct metadata.xml file [?]
2522 # Assuming the metadata.xml file is next to the source file
2523 # Note: This will not work if it is using the inherited metadata from the parent folder
2524 my ($import_tailname, $import_dirname)
2525 = File::Basename::fileparse($import_filename);
2526 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2527
2528 # Shane's escape characters
2529 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2530 $metavalue =~ s/\,/&#44;/g;
2531 $metavalue =~ s/\:/&#58;/g;
2532 $metavalue =~ s/\|/&#124;/g;
2533 $metavalue =~ s/\(/&#40;/g;
2534 $metavalue =~ s/\)/&#41;/g;
2535 $metavalue =~ s/\[/&#91;/g;
2536 $metavalue =~ s/\\/&#92;/g;
2537 $metavalue =~ s/\]/&#93;/g;
2538 $metavalue =~ s/\{/&#123;/g;
2539 $metavalue =~ s/\}/&#125;/g;
2540 $metavalue =~ s/\"/&#34;/g;
2541 $metavalue =~ s/\`/&#96;/g;
2542 $metavalue =~ s/\n/_newline_/g;
2543
2544 # Edit the metadata.xml
2545 # Modified by Jeffrey from DL Consulting
2546 # Handle the case where there is one metadata.xml file for multiple FileSets
2547 # The XML filter needs to know whether it is in the right FileSet
2548 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2549 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2550 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2551 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2552 #=======================================================================#
2553 # set_import_metadata [END]
2554 #=======================================================================#
2555
2556
2557 #=======================================================================#
2558 # set_metadata (accumulate version) [START]
2559 #=======================================================================#
2560 # To people who know $collect_tail please add some comments
2561 # Obtain path to the database
2562 my $collect_tail = $collect;
2563 $collect_tail =~ s/^.*[\/|\\]//;
2564 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2565 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2566
2567 # Read the docid entry
2568 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2569
2570 # Protect the quotes
2571 $metavalue =~ s/\"/\\\"/g;
2572
2573 # Adds the pending macro
2574 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2575
2576 # If the metadata doesn't exist, create a new one
2577 if (!defined($doc_rec->{$metaname})){
2578 $doc_rec->{$metaname} = [ $macro_metavalue ];
2579 }
2580 # Else, let's acculumate the values
2581 else {
2582 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2583 }
2584
2585 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2586 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2587
2588 if ($status != 0) {
2589 # Catch error if gdbmget failed
2590 my $mess = "Failed to set metadata key: $docid\n";
2591
2592 $mess .= "PATH: $ENV{'PATH'}\n";
2593 $mess .= "Exit status: $status\n";
2594 $mess .= "System Error Message: $!\n";
2595
2596 $gsdl_cgi->generate_error($mess);
2597 }
2598 else {
2599 my $mess = "insert-metadata successful: Key[$docid]\n";
2600 $mess .= " [In metadata.xml] $metaname";
2601 $mess .= " = $metavalue\n";
2602 $mess .= " [In database] $metaname";
2603 $mess .= " = $macro_metavalue\n";
2604 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2605 $gsdl_cgi->generate_ok_message($mess);
2606 }
2607 #=======================================================================#
2608 # set_metadata (accumulate version) [END]
2609 #=======================================================================#
2610
2611 # Release the lock once it is done
2612 $self->unlock_collection($username, $collect);
2613}
2614
2615# not returning 1; here since this file is conditionally included by metadataction.pm
2616# and not otherwise meant to be used on its own
2617
Note: See TracBrowser for help on using the repository browser.