source: main/trunk/greenstone2/perllib/cgiactions/modmetadataaction.pm@ 31602

Last change on this file since 31602 was 31602, checked in by ak19, 7 years ago

Follows Dr Bainbridge's suggestion to prevent URL based calls to set-metadata and remove-meta metadataserver.pl operations. Split metadataaction.pm into modmetadataaction.pm and metadataaction.pm, shifting the methods that modify metadata (set and remove subroutines) into the first. Now GS3 sets an env var that will control whether the meta-modifying subroutines will be available when called. If the env var is set, then metadataaction.pm will include the modmetadataaction.pm file in the begin block. For GS2, it works as before, always including the meta modifying subroutines. Tested on Linux with the GS3 web doc editor vs calling metadataserver.pl to set metadata directly from a URL.

File size: 94.5 KB
RevLine 
[31602]1##########################################################################
2#
3# modmetadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This is a submodule of metadataaction.pm. It is loaded conditionally
27# upon having the permissions to modify (not just get) metadata.
28# With sufficient permissions, these additional subroutines should be made
29# available to metadataaction.pm
30
31# See http://www.perlmonks.org/?node_id=881761 for splitting module into multiple files
32# and how variables declared with 'our' are used there.
33
34# http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl
35# http://www.perlmonks.org/?node_id=881761 - splitting module into multiple files
36# http://www.perlmonks.org/?node_id=524456 - merging hashes
37
38package metadataaction;
39
40use strict;
41
42use cgiactions::baseaction;
43
44use dbutil;
45use ghtml;
46
47use JSON;
48
49
50BEGIN {
51 print STDERR "In ModMeta\n";
52}
53
54
55$metadataaction::modmeta_action_table = #OR: our $modmeta_action_table =
56{
57 #SET METHODS
58 "set-live-metadata" => {
59 'compulsory-args' => [ "d", "metaname", "metavalue" ],
60 'optional-args' => [ ] },
61
62 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
63 'compulsory-args' => [ "metaname", "metavalue" ],
64 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
65
66 "set-index-metadata" => {
67 'compulsory-args' => [ "d", "metaname", "metavalue" ],
68 'optional-args' => [ "metapos", "metamode" ] },
69
70 "set-archives-metadata" => {
71 'compulsory-args' => [ "d", "metaname", "metavalue" ],
72 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
73
74 "set-import-metadata" => {
75 'compulsory-args' => [ "metaname", "metavalue" ],
76 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
77
78 #SET METHODS (ARRAY)
79 "set-metadata-array" => {
80 'compulsory-args' => [ "where", "json" ],
81 'optional-args' => [ ],
82 'help-string' => [
83 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
84
85 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
86
87# The same examples rewritten for when running the metadata-server.pl script from the commandline:
88
89# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
90
91# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
92
93 "set-archives-metadata-array" => {
94 'compulsory-args' => [ "json" ],
95 'optional-args' => [ ] },
96
97 "set-import-metadata-array" => {
98 'compulsory-args' => [ "json" ],
99 'optional-args' => [ ] },
100
101 "set-index-metadata-array" => {
102 'compulsory-args' => [ "json" ],
103 'optional-args' => [ ] },
104
105 "set-live-metadata-array" => {
106 'compulsory-args' => [ "json" ],
107 'optional-args' => [ ] },
108
109 #REMOVE METHODS
110 "remove-import-metadata" => {
111 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
112 'optional-args' => [ "metapos", "metavalue", "metamode" ] }, # only provide metapos arg for SIMPLE collections.
113# Metavalue is now an optional arg for remove_import_metadata() based on what the implementation did, which allowed metavalue to be undefined, and if so, used metapos.
114
115 "remove-archives-metadata" => {
116 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
117 'optional-args' => [ "metapos", "metavalue", "metamode" ] },
118
119 "remove-live-metadata" => {
120 'compulsory-args' => [ "d", "metaname" ],
121 'optional-args' => [ ] },
122
123 "remove-index-metadata" => {
124 'compulsory-args' => [ "d", "metaname" ],
125 'optional-args' => [ "metapos", "metavalue" ] },
126
127 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
128 'compulsory-args' => [ "d", "metaname" ],
129 'optional-args' => [ "where", "metapos", "metavalue", "metamode" ] }, # metamode is optional since remove-metadata can call any of remove_import_meta and remove_archives_meta, remove_index_meta, of which the first two accept metamode as an optional param
130
131 #INSERT METHODS
132 "insert-metadata" => {
133 'compulsory-args' => [ "d", "metaname", "metavalue" ],
134 'optional-args' => [ ] }
135};
136
137print STDERR "\nMODMETA_TABLE: " .join(",", keys $metadataaction::modmeta_action_table) ."\n";
138
139
140sub _set_live_metadata
141{
142 my $self = shift @_;
143
144 my $collect = $self->{'collect'};
145 my $gsdl_cgi = $self->{'gsdl_cgi'};
146 #my $gsdlhome = $self->{'gsdlhome'};
147 my $infodbtype = $self->{'infodbtype'};
148
149 # Obtain the collect dir
150 my $site = $self->{'site'};
151 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
152 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
153
154
155 # look up additional args
156 my $docid = $self->{'d'};
157 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
158 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
159 }
160 my $metavalue = $self->{'metavalue'};
161
162 # Generate the dbkey
163 my $metaname = $self->{'metaname'};
164 my $dbkey = "$docid.$metaname";
165
166 # To people who know $collect_tail please add some comments
167 # Obtain path to the database
168 my $collect_tail = $collect;
169 $collect_tail =~ s/^.*[\/|\\]//;
170 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
171 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
172
173 # Set the new value
174 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
175 my $status = system($cmd);
176 if ($status != 0) {
177 # Catch error if gdbmget failed
178 my $mess = "Failed to set metadata key: $dbkey\n";
179
180 $mess .= "PATH: $ENV{'PATH'}\n";
181 $mess .= "cmd = $cmd\n";
182 $mess .= "Exit status: $status\n";
183 $mess .= "System Error Message: $!\n";
184
185 $gsdl_cgi->generate_error($mess);
186 }
187 else {
188 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
189 }
190
191 #return $status; # in case calling functions have any further use for this
192}
193
194sub set_live_metadata
195{
196 my $self = shift @_;
197
198 my $username = $self->{'username'};
199 my $collect = $self->{'collect'};
200 my $gsdl_cgi = $self->{'gsdl_cgi'};
201
202 if ($baseaction::authentication_enabled) {
203 # Ensure the user is allowed to edit this collection
204 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
205 }
206
207 # Make sure the collection isn't locked by someone else
208 $self->lock_collection($username, $collect);
209
210 $self->_set_live_metadata(@_);
211
212 # Release the lock once it is done
213 $self->unlock_collection($username, $collect);
214}
215
216sub set_index_metadata_entry
217{
218 print STDERR "1\n";
219 my $self = shift @_;
220 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
221
222 # To people who know $collect_tail please add some comments
223 # Obtain path to the database
224 my $collect_tail = $collect;
225 $collect_tail =~ s/^.*[\/|\\]//;
226 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
227 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
228
229 print STDERR "2\n";
230# print STDERR "**** infodb file path = $infodb_file_path\n";
231# print STDERR "***** infodb type = $infodbtype\n";
232
233 # Read the docid entry
234 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
235
236 # Set the metadata value
237 if (defined $metapos) {
238 print STDERR "3\n";
239 # if metamode=accumulate AND metapos, warn user and then use metapos
240 if (defined $metamode && $metamode eq "accumulate") {
241 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
242 print STDERR "**** Proceeding by using metapos\n";
243 }
244 $doc_rec->{$metaname}->[$metapos] = $metavalue;
245 }
246 elsif (defined $prevmetavalue) {
247 print STDERR "4\n";
248 my $array = $doc_rec->{$metaname};
249 my $length = @$array;
250
251 my $found = 0;
252 for (my $i = 0; $i < $length; $i++){
253 if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
254 $doc_rec->{$metaname}->[$i] = $metavalue;
255 $found = 1;
256 last;
257 }
258 }
259
260 if($found == 0){
261 $doc_rec->{$metaname} = [ $metavalue ];
262 }
263 }
264 elsif (defined $metamode && $metamode eq "override") {
265 print STDERR "5\n";
266 $doc_rec->{$metaname} = [ $metavalue ];
267 }
268 else { # default for index was to override, but because accumulate is less destructive,
269 # and because accumulate is the default for archives and import, that's the new default for index too
270 print STDERR "6\n";
271 if(defined $doc_rec->{$metaname}) {
272 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
273 } else {
274 $doc_rec->{$metaname} = [ $metavalue ];
275 }
276 }
277 print STDERR "6\n";
278
279 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
280
281 return $status;
282
283}
284
285sub _set_import_metadata
286{
287 my $self = shift @_;
288
289 my $collect = $self->{'collect'};
290 my $gsdl_cgi = $self->{'gsdl_cgi'};
291 my $infodbtype = $self->{'infodbtype'};
292# my $gsdlhome = $self->{'gsdlhome'};
293
294 # Obtain the collect and archive dir
295 my $site = $self->{'site'};
296 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
297 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
298 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
299
300 # look up additional args
301 # want either d= or f=
302 my $docid = $self->{'d'};
303 my ($docid_root,$docid_secnum);
304 if(defined $docid) {
305 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
306 # as yet no support for setting subsection metadata in metadata.xml
307 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
308 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
309 return;
310 }
311 }
312
313 my $import_file = $self->{'f'};
314 if ((!defined $docid || $docid =~ m/^\s*$/) && (!defined $import_file || $import_file =~ m/^\s*$/)) {
315 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified."); # at least d or f must be specified
316 }
317
318 # Get the parameters and set default mode to "accumulate"
319 my $metaname = $self->{'metaname'};
320 my $metavalue = $self->{'metavalue'};
321## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
322 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
323
324 my $metamode = $self->{'metamode'};
325 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
326 # make "accumulate" the default (less destructive, as it won't actually
327 # delete any existing values)
328 $metamode = "accumulate";
329 }
330
331 # adding metapos and prevmetavalue support to import_metadata subroutines
332 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
333 my $prevmetavalue = $self->{'prevmetavalue'};
334 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
335 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
336
337 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
338 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
339
340 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
341 $mess .= " $metaname";
342 $mess .= " = $metavalue";
343 $mess .= " ($metamode)\n";
344
345 $gsdl_cgi->generate_ok_message($mess);
346
347 #return $status; # in case calling functions have any further use for this
348}
349
350# the version of set_index_meta that doesn't do authentication
351sub _set_archives_metadata
352{
353 my $self = shift @_;
354
355 my $collect = $self->{'collect'};
356 my $gsdl_cgi = $self->{'gsdl_cgi'};
357 my $infodbtype = $self->{'infodbtype'};
358
359 # Obtain the collect and archive dir
360 my $site = $self->{'site'};
361 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
362 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
363
364 # look up additional args
365 my $docid = $self->{'d'};
366 my $metaname = $self->{'metaname'};
367 my $metavalue = $self->{'metavalue'};
368 my $prevmetavalue = $self->{'prevmetavalue'};
369
370 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
371 # Don't append "|| undef", since if metapos=0 it will then be set to undef
372
373 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
374 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
375
376 my $metamode = $self->{'metamode'};
377 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
378 # make "accumulate" the default (less destructive, as it won't actually
379 # delete any existing values)
380 $metamode = "accumulate";
381 }
382
383 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
384 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
385
386 if ($status == 0) {
387 my $mess = "set-archives-metadata successful: Key[$docid]\n";
388 $mess .= " $metaname";
389 $mess .= "->[$metapos]" if (defined $metapos);
390 $mess .= " = $metavalue";
391 $mess .= " ($metamode)\n";
392
393 $gsdl_cgi->generate_ok_message($mess);
394 }
395 else {
396 my $mess .= "Failed to set archives metadata key: $docid\n";
397 $mess .= "Exit status: $status\n";
398 if(defined $self->{'error_msg'}) {
399 $mess .= "Error Message: $self->{'error_msg'}\n";
400 } else {
401 $mess .= "System Error Message: $!\n";
402 }
403 $mess .= "-" x 20 . "\n";
404
405 $gsdl_cgi->generate_error($mess);
406 }
407
408 #return $status; # in case calling functions have any further use for this
409}
410
411
412# the version of set_index_meta that doesn't do authentication
413sub _set_index_metadata
414{
415 print STDERR "START SET INDEX METADATA\n";
416 my $self = shift @_;
417
418 my $collect = $self->{'collect'};
419 my $gsdl_cgi = $self->{'gsdl_cgi'};
420
421 my $site = $self->{'site'};
422 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
423
424 # look up additional args
425 my $docid = $self->{'d'};
426 my $metaname = $self->{'metaname'};
427 my $metapos = $self->{'metapos'}; # undef has meaning
428 my $metavalue = $self->{'metavalue'};
429 my $infodbtype = $self->{'infodbtype'};
430 my $metamode = $self->{'metamode'};
431 my $prevmetavalue = $self->{'prevmetavalue'};
432
433 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
434 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
435
436 print STDERR "SETTING INDEX METADATA ENTRY\n";
437 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
438 print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
439 if ($status != 0) {
440 # Catch error if set infodb entry failed
441 my $mess = "Failed to set metadata key: $docid\n";
442
443 $mess .= "PATH: $ENV{'PATH'}\n";
444 $mess .= "Exit status: $status\n";
445 $mess .= "System Error Message: $!\n";
446
447 $gsdl_cgi->generate_error($mess);
448 }
449 else {
450 my $mess = "set-index-metadata successful: Key[$docid]\n";
451 $mess .= " $metaname";
452 $mess .= "->[$metapos]" if (defined $metapos);
453 $mess .= " = $metavalue\n";
454
455 $gsdl_cgi->generate_ok_message($mess);
456 }
457
458 print STDERR "END SET INDEX METADATA\n";
459 #return $status; # in case calling functions have any further use for this
460}
461
462sub set_index_metadata
463{
464 my $self = shift @_;
465
466 my $username = $self->{'username'};
467 my $collect = $self->{'collect'};
468 my $gsdl_cgi = $self->{'gsdl_cgi'};
469 #my $gsdlhome = $self->{'gsdlhome'};
470
471 if ($baseaction::authentication_enabled) {
472 # Ensure the user is allowed to edit this collection
473 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
474 }
475
476 my $site = $self->{'site'};
477 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
478
479 $gsdl_cgi->checked_chdir($collect_dir);
480
481 # Obtain the collect dir
482 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
483
484 # Make sure the collection isn't locked by someone else
485 $self->lock_collection($username, $collect);
486
487 $self->_set_index_metadata(@_);
488
489 # Release the lock once it is done
490 $self->unlock_collection($username, $collect);
491}
492
493# call this to set the metadata for a combination of dirs archives, import or index, or live
494# if none specified, defaults to index which was the original behaviour of set_metadata.
495sub set_metadata
496{
497 my $self = shift @_;
498
499 # Testing that not defining a variable, setting it to "" or to " " all return false
500 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
501
502 my $where = $self->{'where'};
503 if(!$where || ($where =~ m/^\s*$/)) {
504 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
505 return;
506 }
507
508 # authenticate and lock collection once, even if processing multiple dirs
509 my $username = $self->{'username'};
510 my $collect = $self->{'collect'};
511 my $gsdl_cgi = $self->{'gsdl_cgi'};
512
513 if ($baseaction::authentication_enabled) {
514 # Ensure the user is allowed to edit this collection
515 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
516 }
517
518 if($where =~ m/index/) {
519 my $site = $self->{'site'};
520 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
521 $gsdl_cgi->checked_chdir($collect_dir);
522 }
523
524 # Make sure the collection isn't locked by someone else
525 $self->lock_collection($username, $collect);
526
527
528 # now at last can set the metadata. $where can specify multiple
529 # $where is of the form: import|archives|index, or a subset thereof
530
531 #my @whichdirs = split('\|', $where);
532
533 # just check whether $where contains import/archives/index/live in turn, and
534 # for each case, process it accordingly
535 if($where =~ m/import/) {
536 $self->_set_import_metadata(@_);
537 }
538
539 if($where =~ m/archives/) {
540
541 # look up docID arg which is optional to set_metadata because it's optional
542 # to set_import, but which is compulsory to set_archives_metadata
543 my $docid = $self->{'d'};
544 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
545 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
546 }
547 # we have a docid, so can set archives meta
548 $self->_set_archives_metadata(@_);
549 }
550
551 if($where =~ m/index/) {
552
553 # look up docID arg which is optional to set_metadata because it's optional
554 # to set_import, but which is compulsory to set_archives_metadata
555 my $docid = $self->{'d'};
556 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
557 $gsdl_cgi->generate_error("No docid (d=...) specified.");
558 }
559 # we have a docid, so can set index meta
560 $self->_set_index_metadata(@_);
561 }
562
563 if($where =~ m/live/) {
564 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
565 }
566
567 # Release the lock once it is done
568 $self->unlock_collection($username, $collect);
569}
570
571sub set_metadata_array
572{
573 my $self = shift @_;
574
575 my $where = $self->{'where'};
576 if(!$where || ($where =~ m/^\s*$/)) {
577 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
578 return;
579 }
580
581 my $username = $self->{'username'};
582 my $collect = $self->{'collect'};
583 my $gsdl_cgi = $self->{'gsdl_cgi'};
584
585 if ($baseaction::authentication_enabled) {
586 # Ensure the user is allowed to edit this collection
587 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
588 }
589
590 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
591 # But including the stmt during this code reorganisation to preserve as-is what used to happen
592 my $site = $self->{'site'};
593 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
594 $gsdl_cgi->checked_chdir($collect_dir);
595
596 # Make sure the collection isn't locked by someone else
597 $self->lock_collection($username, $collect);
598
599 if($where =~ m/import/) {
600 $self->_set_import_metadata_array(@_);
601 }
602 if($where =~ m/archives/) {
603 $self->_set_archives_metadata_array(@_);
604 }
605 if($where =~ m/index/) {
606 $self->_set_index_metadata_array(@_);
607 }
608 if($where =~ m/live/) {
609 $self->_set_live_metadata_array(@_);
610 }
611
612 # Release the lock once it is done
613 $self->unlock_collection($username, $collect);
614}
615
616sub _set_index_metadata_array
617{
618 my $self = shift @_;
619
620 my $collect = $self->{'collect'};
621 my $gsdl_cgi = $self->{'gsdl_cgi'};
622 my $site = $self->{'site'};
623 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
624
625
626 # look up additional args
627
628 my $infodbtype = $self->{'infodbtype'};
629
630 my $json_str = $self->{'json'};
631 my $doc_array = decode_json $json_str;
632
633
634 my $global_status = 0;
635 my $global_mess = "";
636
637 my @all_docids = ();
638
639 foreach my $doc_array_rec ( @$doc_array ) {
640
641 my $status = -1;
642 my $docid = $doc_array_rec->{'docid'};
643
644 push(@all_docids,$docid);
645
646 my $metaname = $doc_array_rec->{'metaname'};
647 if(defined $metaname) {
648 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
649 my $metavalue = $doc_array_rec->{'metavalue'};
650 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
651
652 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
653 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
654 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
655
656 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
657 $metaname = $metatable_rec->{'metaname'};
658 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
659 my $metapos = undef;
660 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
661
662 foreach my $metavalue ( @$metavals ) { # metavals is an array
663 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
664 if($metamode eq "override") { # now, having overridden the metavalue for the first,
665 # need to accumulate subsequent metavals for this metaname, else the just-assigned
666 # metavalue for this metaname will be lost
667 $metamode = "accumulate";
668 }
669 }
670 }
671 }
672
673 if ($status != 0) {
674 # Catch error if set infodb entry failed
675 $global_status = $status;
676 $global_mess .= "Failed to set metadata key: $docid\n";
677 $global_mess .= "Exit status: $status\n";
678 $global_mess .= "System Error Message: $!\n";
679 $global_mess .= "-" x 20;
680 }
681 }
682
683 if ($global_status != 0) {
684 $global_mess .= "PATH: $ENV{'PATH'}\n";
685 $gsdl_cgi->generate_error($global_mess);
686 }
687 else {
688 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
689 $gsdl_cgi->generate_ok_message($mess);
690 }
691}
692
693sub set_index_metadata_array
694{
695 my $self = shift @_;
696
697 my $username = $self->{'username'};
698 my $collect = $self->{'collect'};
699 my $gsdl_cgi = $self->{'gsdl_cgi'};
700# my $gsdlhome = $self->{'gsdlhome'};
701
702 if ($baseaction::authentication_enabled) {
703 # Ensure the user is allowed to edit this collection
704 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
705 }
706
707 my $site = $self->{'site'};
708 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
709
710 $gsdl_cgi->checked_chdir($collect_dir);
711
712 # Obtain the collect dir
713 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
714
715 # Make sure the collection isn't locked by someone else
716 $self->lock_collection($username, $collect);
717
718 $self->_set_index_metadata_array(@_);
719
720 # Release the lock once it is done
721 $self->unlock_collection($username, $collect);
722}
723
724# experimental, newly added in and untested
725sub _set_live_metadata_array
726{
727 my $self = shift @_;
728
729 my $collect = $self->{'collect'};
730 my $gsdl_cgi = $self->{'gsdl_cgi'};
731
732 my $site = $self->{'site'};
733 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
734
735
736 # look up additional args
737 my $infodbtype = $self->{'infodbtype'};
738 # To people who know $collect_tail please add some comments
739 # Obtain path to the database
740 my $collect_tail = $collect;
741 $collect_tail =~ s/^.*[\/|\\]//;
742 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
743 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
744
745
746 my $json_str = $self->{'json'};
747 my $doc_array = decode_json $json_str;
748
749
750 my $global_status = 0;
751 my $global_mess = "";
752
753 my @all_docids = ();
754
755
756 foreach my $doc_array_rec ( @$doc_array ) {
757
758 my $status = -1;
759 my $docid = $doc_array_rec->{'docid'};
760
761 push(@all_docids,$docid);
762
763 my $metaname = $doc_array_rec->{'metaname'};
764 if(defined $metaname) {
765 my $dbkey = "$docid.$metaname";
766 my $metavalue = $doc_array_rec->{'metavalue'};
767
768 # Set the new value
769 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
770 $status = system($cmd);
771
772 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
773 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
774 foreach my $metatable_rec ( @$metatable ) {
775 $metaname = $metatable_rec->{'metaname'};
776 my $dbkey = "$docid.$metaname";
777
778 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
779 foreach my $metavalue ( @$metavals ) {
780 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
781 $status = system($cmd);
782 }
783 }
784
785 }
786
787 if ($status != 0) {
788 # Catch error if gdbmget failed
789 $global_status = $status;
790 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
791 $global_mess .= "Exit status: $status\n";
792 $global_mess .= "System Error Message: $!\n";
793 $global_mess .= "-" x 20;
794 }
795 }
796
797 if ($global_status != 0) {
798 $global_mess .= "PATH: $ENV{'PATH'}\n";
799 $gsdl_cgi->generate_error($global_mess);
800 }
801 else {
802 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
803 $gsdl_cgi->generate_ok_message($mess);
804 }
805}
806
807sub set_live_metadata_array
808{
809 my $self = shift @_;
810
811 my $username = $self->{'username'};
812 my $collect = $self->{'collect'};
813 my $gsdl_cgi = $self->{'gsdl_cgi'};
814
815 if ($baseaction::authentication_enabled) {
816 # Ensure the user is allowed to edit this collection
817 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
818 }
819
820 my $site = $self->{'site'};
821 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
822
823 $gsdl_cgi->checked_chdir($collect_dir);
824
825 # Make sure the collection isn't locked by someone else
826 $self->lock_collection($username, $collect);
827
828 $self->_set_live_metadata_array(@_);
829
830 # Release the lock once it is done
831 $self->unlock_collection($username, $collect);
832}
833
834
835sub dxml_metadata
836{
837 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
838 my $metaname = $parser->{'parameters'}->{'metaname'};
839 my $metamode = $parser->{'parameters'}->{'metamode'};
840
841###!!! print STDERR "**** Processing closing </Metadata> tag\n";
842
843 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
844
845 # Find the right metadata tag and checks if we are going to
846 # override it
847 #
848 # Note: This over writes the first metadata block it
849 # encountered. If there are multiple Sections in the doc.xml, it
850 # might not behave as you would expect
851
852 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
853## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
854## print STDERR "**** metamode = $metamode\n";
855
856 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
857 {
858 my $name_attr = $attrHash->{'name'};
859 # print STDOUT "*** testing: $name_attr eq $metaname ? and $metamode eq override ?\n";
860 if (($name_attr eq $metaname) && ($metamode eq "override"))
861 {
862 if (!defined $parser->{'parameters'}->{'poscount'})
863 {
864 $parser->{'parameters'}->{'poscount'} = 0;
865 }
866 else
867 {
868 $parser->{'parameters'}->{'poscount'}++;
869 }
870
871 if ((defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
872 || (!defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == 0))
873
874 {
875 ##print STDERR "#### got match!!\n";
876 # Get the value and override the current value
877 my $metavalue = $parser->{'parameters'}->{'metavalue'};
878 $attrHash->{'_content'} = $metavalue;
879
880 # Don't want it to wipe out any other pieces of metadata
881 $parser->{'parameters'}->{'metamode'} = "done";
882 }
883 elsif (defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
884 {
885 my $metavalue = $parser->{'parameters'}->{'metavalue'};
886 $attrHash->{'_content'} = $metavalue;
887 $parser->{'parameters'}->{'metamode'} = "done";
888 }
889 }
890 }
891
892 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
893 return [$tagname => $attrHash];
894}
895
896# This method exists purely for catching invalid section numbers that the client
897# requested to edit. Once the parser has reached the end (the final </Archive> tag),
898# we've seen all the Sections in the doc.xml, and none of their section nums matched
899# if the metamode has not been set to 'done' by then.
900sub dxml_archive
901{
902 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
903 my $metamode = $parser->{'parameters'}->{'metamode'};
904
905 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
906 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
907
908# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
909
910 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
911 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
912
913 my $metaname = $parser->{'parameters'}->{'metaname'};
914 my $metavalue = $parser->{'parameters'}->{'metavalue'};
915
916 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
917 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
918 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
919 print STDERR "\tin section $opt_doc_secnum.\n";
920 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
921 }
922
923 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
924 return [$tagname => $attrHash];
925}
926
927sub dxml_description
928{
929 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
930 my $metamode = $parser->{'parameters'}->{'metamode'};
931
932 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
933 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
934
935###!!! print STDERR "**** Processing a closing </Description> tag \n";
936# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
937
938 # Accumulate the metadata
939
940 # We'll be accumulating metadata at this point if we haven't found and therefore
941 # haven't processed the metadata yet.
942 # For subsections, this means that if we're at a matching subsection, but haven't
943 # found the correct metaname to override in that subsection, we accumulate it as new
944 # meta in the subsection by adding it to the current description.
945 # If there's no subsection info for the metadata, it will accumulate at the top level
946 # section description if we hadn't found a matching metaname to override at this point.
947
948 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
949 # at the toplevel section. In the latter case, it means we want to process meta in the
950 # toplevel section. So the eq check between the values below will work in all cases.
951
952 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
953 # this document has been provided. In that case, the metadata for that opt_doc_secnum
954 # won't get attached/accumulated to any part of the doc, not even its top-level section.
955
956 if ($curr_secnum eq $opt_doc_secnum
957 && ($metamode eq "accumulate" || $metamode eq "override")) {
958 if ($metamode eq "override") {
959 print "Got to end of <Description> block. No metadata value to override. Switching 'metamode' to accumulate\n";
960 }
961
962 # If we get to here and metamode is override, this means there
963 # was no existing value to overide => treat as an append operation
964
965 # Tack a new metadata tag on to the end of the <Metadata>+ block
966 my $metaname = $parser->{'parameters'}->{'metaname'};
967 my $metavalue = $parser->{'parameters'}->{'metavalue'};
968
969 my $metadata_attr = {
970 '_content' => $metavalue,
971 'name' => $metaname,
972 'mode' => "accumulate"
973 };
974
975 my $append_metadata = [ "Metadata" => $metadata_attr ];
976 my $description_content = $attrHash->{'_content'};
977
978 print "Appending metadata to doc.xml\n";
979
980 if (ref($description_content)) {
981 # got some existing interesting nested content
982 push(@$description_content, " ", $append_metadata ,"\n ");
983 }
984 else {
985 #description_content is most likely a string such as "\n"
986 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
987 }
988
989 $parser->{'parameters'}->{'metamode'} = "done";
990 }
991 else {
992 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
993## print STDERR "**** NOT ACCUMULATE?!? \n";
994 }
995
996 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
997 return [$tagname => $attrHash];
998}
999
1000
1001sub dxml_start_section
1002{
1003 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1004
1005 my $new_depth = scalar(@$contextArray);
1006
1007# print STDERR "**** START SECTION \n";
1008
1009 if ($new_depth == 1) {
1010 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1011 $parser->{'parameters'}->{'curr_section_num'} = "";
1012 }
1013
1014 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1015 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1016
1017 my $new_secnum;
1018
1019 if ($new_depth > $old_depth) {
1020 # child subsection
1021 $new_secnum = "$old_secnum.1";
1022 }
1023 elsif ($new_depth == $old_depth) {
1024 # sibling section => increase it's value by 1
1025 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1026 $tail_num++;
1027 $new_secnum = $old_secnum;
1028 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1029 }
1030 else {
1031 # back up to parent section => lopp off tail
1032 $new_secnum = $old_secnum;
1033 $new_secnum =~ s/\.\d+$//;
1034 }
1035
1036 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1037 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1038
1039 1;
1040}
1041
1042sub edit_xml_file
1043{
1044 my $self = shift @_;
1045 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1046
1047 # use XML::Rules to add it in (read in and out again)
1048 my $parser = XML::Rules->new(start_rules => $start_rules,
1049 rules => $rules,
1050 style => 'filter',
1051 output_encoding => 'utf8' );
1052
1053 my $xml_in = "";
1054 if (!open(MIN,"<$filename")) {
1055 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1056 }
1057 else {
1058 # Read all the text in
1059 my $line;
1060 while (defined ($line=<MIN>)) {
1061 $xml_in .= $line;
1062 }
1063 close(MIN);
1064
1065 my $MOUT;
1066 if (!open($MOUT,">$filename")) {
1067 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1068 }
1069 else {
1070 # Matched lines will get handled by the call backs
1071## my $xml_out = "";
1072
1073 binmode($MOUT,":utf8");
1074 $parser->filter($xml_in,$MOUT, $options);
1075
1076# binmode(MOUT,":utf8");
1077# print MOUT $xml_out;
1078 close($MOUT);
1079 }
1080 }
1081
1082 # copy across any custom error information that was stored during parsing
1083 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1084}
1085
1086sub edit_doc_xml
1087{
1088 my $self = shift @_;
1089 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1090
1091 my $info_mess = <<RAWEND;
1092****************************
1093 edit_doc_xml()
1094****************************
1095RAWEND
1096
1097 $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1098 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1099 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1100 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1101 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1102 $info_mess .= " opt_secnum = $opt_secnum\n" if defined($opt_secnum);
1103 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1104
1105 $info_mess .= "****************************\n";
1106
1107 $gsdl_cgi->generate_message($info_mess);
1108
1109 # To monitor which section/subsection number we are in
1110 my @start_rules =
1111 ( 'Section' => \&dxml_start_section );
1112
1113 # use XML::Rules to add it in (read in and out again)
1114 # Set the call back functions
1115 my @rules =
1116 ( _default => 'raw',
1117 'Metadata' => \&dxml_metadata,
1118 'Description' => \&dxml_description,
1119 'Archive' => \&dxml_archive); # just for catching errors at end
1120
1121 # Sets the parameters
1122 my $options = { 'metaname' => $metaname,
1123 'metapos' => $metapos,
1124 'metavalue' => $metavalue,
1125 'metamode' => $metamode,
1126 'prevmetavalue' => $prevmetavalue };
1127
1128 if (defined $opt_secnum) {
1129 $options->{'secnum'} = $opt_secnum;
1130 }
1131
1132 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1133}
1134
1135sub set_archives_metadata_entry
1136{
1137 my $self = shift @_;
1138 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1139
1140 my $info_mess = <<RAWEND;
1141****************************
1142 set_archives_metadata_entry()
1143****************************
1144RAWEND
1145
1146 $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1147 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1148 $info_mess .= " collect = $collect\n" if defined($collect);
1149 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
1150 $info_mess .= " docid = $docid\n" if defined($docid);
1151 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1152 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1153 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1154 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1155 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1156
1157 $info_mess .= "****************************\n";
1158
1159 $gsdl_cgi->generate_message($info_mess);
1160
1161 # Obtain the doc.xml path for the specified docID
1162 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1163
1164 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1165 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1166 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1167
1168 # The $doc_xml_file is relative to the archives, and now let's get the full path
1169 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1170 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1171
1172 # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
1173 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1174 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1175 if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
1176 # remove all values of $metaname metadata
1177 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
1178 }
1179 # Edit the doc.xml file with the specified metadata name, value and position.
1180 # TODO: there is a potential problem here as this edit_doc_xml function
1181 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1182 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1183
1184 # dxml_metadata method ignores metapos if metamode anything other than override
1185 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1186 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1187
1188 # return 0; # return 0 for now to indicate no error
1189 return (defined $self->{'error_msg'}) ? 1 : 0;
1190}
1191
1192
1193sub set_archives_metadata
1194{
1195 my $self = shift @_;
1196
1197 my $username = $self->{'username'};
1198 my $collect = $self->{'collect'};
1199 my $gsdl_cgi = $self->{'gsdl_cgi'};
1200
1201 if ($baseaction::authentication_enabled) {
1202 # Ensure the user is allowed to edit this collection
1203 $self->authenticate_user($username, $collect);
1204 }
1205
1206 # Make sure the collection isn't locked by someone else
1207 $self->lock_collection($username, $collect);
1208
1209 $self->_set_archives_metadata(@_);
1210
1211 # Release the lock once it is done
1212 $self->unlock_collection($username, $collect);
1213}
1214
1215sub _set_archives_metadata_array
1216{
1217 my $self = shift @_;
1218
1219 my $collect = $self->{'collect'};
1220 my $gsdl_cgi = $self->{'gsdl_cgi'};
1221 my $site = $self->{'site'};
1222 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1223
1224 # look up additional args
1225
1226 my $infodbtype = $self->{'infodbtype'};
1227
1228 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1229
1230 my $json_str = $self->{'json'};
1231 my $doc_array = decode_json $json_str;
1232
1233
1234 my $global_status = 0;
1235 my $global_mess = "";
1236
1237 my @all_docids = ();
1238
1239 foreach my $doc_array_rec ( @$doc_array ) {
1240 my $status = -1;
1241 my $docid = $doc_array_rec->{'docid'};
1242
1243 push(@all_docids,$docid);
1244
1245 my $metaname = $doc_array_rec->{'metaname'};
1246 if(defined $metaname) {
1247
1248 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1249
1250 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1251 my $metavalue = $doc_array_rec->{'metavalue'};
1252 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1253
1254
1255 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1256 # make "accumulate" the default (less destructive, as it won't actually
1257 # delete any existing values)
1258 $metamode = "accumulate";
1259 }
1260
1261 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1262 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1263 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1264 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1265
1266 foreach my $metatable_rec ( @$metatable ) {
1267 $metaname = $metatable_rec->{'metaname'};
1268 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1269 my $metapos = undef;
1270 my $prevmetavalue = undef;
1271 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1272
1273 foreach my $metavalue ( @$metavals ) {
1274 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1275 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1276
1277 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1278 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1279 # metavalue for this metaname will be lost
1280 $metamode = "accumulate";
1281 }
1282 }
1283 }
1284 }
1285
1286 if ($status != 0) {
1287 # Catch error if set infodb entry failed
1288 $global_status = $status;
1289 $global_mess .= "Failed to set metadata key: $docid\n";
1290 $global_mess .= "Exit status: $status\n";
1291 $global_mess .= "System Error Message: $!\n";
1292 $global_mess .= "-" x 20 . "\n";
1293 }
1294 }
1295
1296 if ($global_status != 0) {
1297 $global_mess .= "PATH: $ENV{'PATH'}\n";
1298 $gsdl_cgi->generate_error($global_mess);
1299 }
1300 else {
1301 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1302 $gsdl_cgi->generate_ok_message($mess);
1303 }
1304}
1305
1306sub set_archives_metadata_array
1307{
1308 my $self = shift @_;
1309
1310 my $username = $self->{'username'};
1311 my $collect = $self->{'collect'};
1312 my $gsdl_cgi = $self->{'gsdl_cgi'};
1313# my $gsdlhome = $self->{'gsdlhome'};
1314
1315 if ($baseaction::authentication_enabled) {
1316 # Ensure the user is allowed to edit this collection
1317 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1318 }
1319
1320 my $site = $self->{'site'};
1321 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1322
1323 $gsdl_cgi->checked_chdir($collect_dir);
1324
1325 # Obtain the collect dir
1326 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1327
1328 # Make sure the collection isn't locked by someone else
1329 $self->lock_collection($username, $collect);
1330
1331 $self->_set_archives_metadata_array(@_);
1332
1333 # Release the lock once it is done
1334 $self->unlock_collection($username, $collect);
1335}
1336
1337sub _remove_archives_metadata
1338{
1339 my $self = shift @_;
1340
1341 my $collect = $self->{'collect'};
1342 my $gsdl_cgi = $self->{'gsdl_cgi'};
1343# my $gsdlhome = $self->{'gsdlhome'};
1344 my $infodbtype = $self->{'infodbtype'};
1345
1346 my $site = $self->{'site'};
1347
1348 # Obtain the collect and archive dir
1349 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1350
1351 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1352
1353 # look up additional args
1354 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1355
1356 my $metaname = $self->{'metaname'};
1357 my $metapos = $self->{'metapos'};
1358 my $metavalue = $self->{'metavalue'};
1359
1360 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1361 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
1362
1363 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1364 # default to deleting the first metavalue for the given metaname
1365 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1366 # seem to get deleted in one single remove_archives_meta action invocation.
1367 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1368 # deleted, both get deleted.
1369 if(!defined $metapos && !defined $metavalue) {
1370 $metapos = 0;
1371 }
1372
1373 my $metamode = $self->{'metamode'};
1374 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
1375
1376 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1377 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1378
1379 # This now stores the full pathname
1380 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1381
1382 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1383# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1384
1385 if ($status == 0)
1386 {
1387 my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
1388 $mess .= " $metaname";
1389 $mess .= "->[$metapos]" if (defined $metapos);
1390 $mess .= " ($metavalue)" if (defined $metavalue);
1391 $gsdl_cgi->generate_ok_message($mess);
1392 }
1393 else
1394 {
1395 my $mess .= "Failed to remove archives metadata key: $docid\n";
1396 $mess .= "Exit status: $status\n";
1397 $mess .= "System Error Message: $!\n";
1398 $mess .= "-" x 20 . "\n";
1399
1400 $gsdl_cgi->generate_error($mess);
1401 }
1402
1403 #return $status; # in case calling functions have a use for this
1404}
1405
1406sub remove_archives_metadata
1407{
1408 my $self = shift @_;
1409
1410 my $username = $self->{'username'};
1411 my $collect = $self->{'collect'};
1412 my $gsdl_cgi = $self->{'gsdl_cgi'};
1413
1414 if ($baseaction::authentication_enabled)
1415 {
1416 # Ensure the user is allowed to edit this collection
1417 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1418 }
1419
1420 # Make sure the collection isn't locked by someone else
1421 $self->lock_collection($username, $collect);
1422
1423 $self->_remove_archives_metadata(@_);
1424
1425 # Release the lock once it is done
1426 $self->unlock_collection($username, $collect);
1427}
1428
1429sub remove_from_doc_xml
1430{
1431 my $self = shift @_;
1432 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1433
1434 my @start_rules = ('Section' => \&dxml_start_section);
1435
1436 # Set the call-back functions for the metadata tags
1437 my @rules =
1438 (
1439 _default => 'raw',
1440 'Metadata' => \&rfdxml_metadata
1441 );
1442
1443 my $parser = XML::Rules->new
1444 (
1445 start_rules => \@start_rules,
1446 rules => \@rules,
1447 style => 'filter',
1448 output_encoding => 'utf8',
1449# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1450 stripspaces => 2|0|0 # ineffectual
1451 );
1452
1453 my $status = 0;
1454 my $xml_in = "";
1455 if (!open(MIN,"<$doc_xml_filename"))
1456 {
1457 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1458 $status = 1;
1459 }
1460 else
1461 {
1462 # Read them in
1463 my $line;
1464 while (defined ($line=<MIN>)) {
1465 $xml_in .= $line;
1466 }
1467 close(MIN);
1468
1469 # Filter with the call-back functions
1470 my $xml_out = "";
1471
1472 my $MOUT;
1473 if (!open($MOUT,">$doc_xml_filename")) {
1474 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1475 $status = 1;
1476 }
1477 else {
1478 binmode($MOUT,":utf8");
1479 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1480 close($MOUT);
1481 }
1482 }
1483 return $status;
1484}
1485
1486sub rfdxml_metadata
1487{
1488 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1489
1490 # For comparisons, toplevel section is indicated by ""
1491 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1492 my $secid = $parser->{'parameters'}->{'secid'} || "";
1493
1494 if (!($secid eq $curr_sec_num))
1495 {
1496 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1497 return [$tagname => $attrHash];
1498 }
1499
1500 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1501 {
1502 if (!defined $parser->{'parameters'}->{'poscount'})
1503 {
1504 $parser->{'parameters'}->{'poscount'} = 0;
1505 }
1506 else
1507 {
1508 $parser->{'parameters'}->{'poscount'}++;
1509 }
1510
1511 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
1512 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {
1513 return [];
1514 }
1515
1516 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1517 {
1518 return [];
1519 }
1520
1521 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1522 {
1523 return [];
1524 }
1525 }
1526
1527 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1528 return [$tagname => $attrHash];
1529}
1530
1531sub mxml_metadata
1532{
1533 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1534 my $metaname = $parser->{'parameters'}->{'metaname'};
1535 my $metamode = $parser->{'parameters'}->{'metamode'};
1536
1537 # Report error if we don't see FileName tag before this
1538 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1539
1540 # Don't do anything if we are not in the right FileSet
1541 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1542 if ($file_regexp =~ /\.\*/) {
1543 # Only interested in a file_regexp if it specifies precisely one
1544 # file.
1545 # So, skip anything with a .* in it as it is too general
1546## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
1547 return [$tagname => $attrHash];
1548 }
1549 my $src_file = $parser->{'parameters'}->{'src_file'};
1550 if (!($src_file =~ /$file_regexp/)) {
1551 return [$tagname => $attrHash];
1552 }
1553## print STDERR "*** mxl metamode = $metamode\n";
1554
1555 # Find the right metadata tag and checks if we are going to override it
1556 my $name_attr = $attrHash->{'name'};
1557 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1558
1559 # now metadata.xml functions need to keep track of metapos
1560 if (!defined $parser->{'parameters'}->{'poscount'})
1561 {
1562 $parser->{'parameters'}->{'poscount'} = 0;
1563 }
1564 else
1565 {
1566 $parser->{'parameters'}->{'poscount'}++;
1567 }
1568
1569 # If either the metapos or prevmetavalue is set,
1570 # get the value and override the current value
1571 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1572
1573 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1574 {
1575 $attrHash->{'_content'} = $metavalue;
1576
1577 ## print STDERR "**** overriding metadata.xml\n";
1578
1579 # Don't want it to wipe out any other pieces of metadata
1580 $parser->{'parameters'}->{'metamode'} = "done";
1581 }
1582 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1583 {
1584 $attrHash->{'_content'} = $metavalue;
1585 $parser->{'parameters'}->{'metamode'} = "done";
1586 }
1587 }
1588
1589 # mxml_description will process the metadata if metadata is accumulate,
1590 # or if we haven't found the metadata to override
1591
1592 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1593 return [$tagname => $attrHash];
1594}
1595
1596
1597sub mxml_description
1598{
1599 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1600 my $metamode = $parser->{'parameters'}->{'metamode'};
1601
1602 # Failed... Report error if we don't see FileName tag before this
1603 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1604
1605 # Don't do anything if we are not in the right FileSet
1606 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1607 if ($file_regexp =~ m/\.\*/) {
1608 # Only interested in a file_regexp if it specifies precisely one
1609 # file.
1610 # So, skip anything with a .* in it as it is too general
1611 return [$tagname => $attrHash];
1612 }
1613 my $src_file = $parser->{'parameters'}->{'src_file'};
1614
1615 if (!($src_file =~ m/$file_regexp/)) {
1616 return [$tagname => $attrHash];
1617 }
1618
1619 # Accumulate the metadata block to the end of the description block
1620 # Note: This adds metadata block to all description blocks, so if there are
1621 # multiple FileSets, it will add to all of them
1622 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1623
1624 # if metamode was "override" but get to here then it failed to
1625 # find an item to override, in which case it should append its
1626 # value to the end, just like the "accumulate" mode
1627
1628 if ($metamode eq "override") {
1629 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1630 }
1631
1632 # tack a new metadata tag on to the end of the <Metadata>+ block
1633 my $metaname = $parser->{'parameters'}->{'metaname'};
1634 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1635
1636 my $metadata_attr = { '_content' => $metavalue,
1637 'name' => $metaname,
1638 'mode' => "accumulate" };
1639
1640 my $append_metadata = [ "Metadata" => $metadata_attr ];
1641 my $description_content = $attrHash->{'_content'};
1642
1643## print STDERR "*** appending to metadata.xml\n";
1644
1645 # append the new metadata element to the end of the current
1646 # content contained inside this tag
1647 if (ref($description_content) eq "") {
1648 # => string or numeric literal
1649 # this is caused by a <Description> block has no <Metadata> child elements
1650 # => set up an empty array in '_content'
1651 $attrHash->{'_content'} = [ "\n" ];
1652 $description_content = $attrHash->{'_content'};
1653 }
1654
1655 push(@$description_content," ", $append_metadata ,"\n ");
1656 $parser->{'parameters'}->{'metamode'} = "done";
1657 }
1658
1659 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1660 return [$tagname => $attrHash];
1661}
1662
1663
1664sub mxml_filename
1665{
1666 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1667
1668 # Store the filename of the Current Fileset
1669 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1670 # FileName tag must come before Description tag
1671 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1672
1673 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1674 return [$tagname => $attrHash];
1675}
1676
1677
1678sub mxml_fileset
1679{
1680 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1681
1682 # Initilise the current_file
1683 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1684 # FileName tag must come before Description tag
1685 $parser->{'parameters'}->{'current_file'} = "";
1686
1687 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1688 return [$tagname => $attrHash];
1689}
1690
1691sub mxml_directorymetadata
1692{
1693 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1694
1695 # if we haven't processed the metadata when we reach the end of metadata.xml
1696 # it's because there's no particular FileSet element whose FileName matched
1697 # In which case, add a new FileSet for this FileName
1698 my $metamode = $parser->{'parameters'}->{'metamode'};
1699 if($metamode ne "done") {
1700
1701 if ($metamode eq "override") {
1702 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1703 }
1704
1705 # If we get to here and metamode is override, this means there
1706 # was no existing value to overide => treat as an append operation
1707
1708 # Create a new FileSet element and append to DirectoryMetadata
1709 # <FileSet>
1710 # <FileName>src_file</FileName>
1711 # <Description>
1712 # <Metadata mode="" name="">metavalue</Metadata>
1713 # </Description>
1714 # </FileSet>
1715 my $src_file = $parser->{'parameters'}->{'src_file'};
1716 my $metaname = $parser->{'parameters'}->{'metaname'};
1717 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1718 my $metadata_attr = {
1719 '_content' => $metavalue,
1720 'name' => $metaname,
1721 'mode' => "accumulate"
1722 };
1723 my $append_metadata = [ "Metadata" => $metadata_attr ];
1724 my $description_attr = { '_content' => [ "\n\t\t ", $append_metadata, "\n\t\t"] };
1725 my $description_element = [ "Description" => $description_attr ];
1726
1727 #_content is not an attribute, it's special and holds the children of this element
1728 # including the textnode value embedded in this element if any.
1729 my $filename_attr = {'_content' => $src_file};
1730 my $filename_element = [ "FileName" => $filename_attr ];
1731
1732 my $fileset_attr = {};
1733 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
1734 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
1735
1736
1737 # get children of dirmeta, and push the new FileSet element onto it
1738 print "Appending metadata to metadata.xml\n";
1739 my $dirmeta_content = $attrHash->{'_content'};
1740 if (ref($dirmeta_content)) {
1741 # got some existing interesting nested content
1742 #push(@$dirmeta_content, " ", $fileset ,"\n ");
1743 push(@$dirmeta_content, "\t", $fileset ,"\n");
1744 }
1745 else {
1746 #description_content is most likely a string such as "\n"
1747 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
1748 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
1749 }
1750
1751 $parser->{'parameters'}->{'metamode'} = "done";
1752 }
1753 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1754 return [$tagname => $attrHash];
1755}
1756
1757
1758sub edit_metadata_xml
1759{
1760 my $self = shift @_;
1761 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
1762
1763 # Set the call-back functions for the metadata tags
1764 my @rules =
1765 ( _default => 'raw',
1766 'FileName' => \&mxml_filename,
1767 'Metadata' => \&mxml_metadata,
1768 'Description' => \&mxml_description,
1769 'FileSet' => \&mxml_fileset,
1770 'DirectoryMetadata' => \&mxml_directorymetadata);
1771
1772 # use XML::Rules to add it in (read in and out again)
1773 my $parser = XML::Rules->new(rules => \@rules,
1774 style => 'filter',
1775 output_encoding => 'utf8',
1776 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1777
1778 if (!-e $metadata_xml_filename) {
1779
1780 if (open(MOUT,">$metadata_xml_filename")) {
1781
1782 my $src_file_re = &util::filename_to_regex($src_file);
1783 # shouldn't the following also be in the above utility routine??
1784 # $src_file_re =~ s/\./\\./g;
1785
1786 print MOUT "<?xml version=\"1.0\"?>\n";
1787 print MOUT "<DirectoryMetadata>\n";
1788 print MOUT " <FileSet>\n";
1789 print MOUT " <FileName>$src_file_re</FileName>\n";
1790 print MOUT " <Description>\n";
1791 print MOUT " </Description>\n";
1792 print MOUT " </FileSet>\n";
1793 print MOUT "</DirectoryMetadata>\n";
1794
1795 close(MOUT);
1796 }
1797 else {
1798 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1799 }
1800 }
1801
1802
1803 my $xml_in = "";
1804 if (!open(MIN,"<$metadata_xml_filename")) {
1805 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1806 }
1807 else {
1808 # Read them in
1809 my $line;
1810 while (defined ($line=<MIN>)) {
1811 $xml_in .= $line;
1812 }
1813 close(MIN);
1814
1815 # Filter with the call-back functions
1816 my $xml_out = "";
1817
1818 my $MOUT;
1819 if (!open($MOUT,">$metadata_xml_filename")) {
1820 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1821 }
1822 else {
1823 binmode($MOUT,":utf8");
1824
1825 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1826 # At the moment, I will just hack it!
1827 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1828 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1829 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1830 #print MOUT $xml_out;
1831
1832 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1833 metapos => $metapos,
1834 metavalue => $metavalue,
1835 metamode => $metamode,
1836 src_file => $src_file,
1837 prevmetavalue => $prevmetavalue,
1838 current_file => undef} );
1839 close($MOUT);
1840 }
1841 }
1842}
1843
1844
1845sub set_import_metadata
1846{
1847 my $self = shift @_;
1848
1849 my $username = $self->{'username'};
1850 my $collect = $self->{'collect'};
1851 my $gsdl_cgi = $self->{'gsdl_cgi'};
1852
1853 if ($baseaction::authentication_enabled) {
1854 # Ensure the user is allowed to edit this collection
1855 $self->authenticate_user($username, $collect);
1856 }
1857
1858 # Make sure the collection isn't locked by someone else
1859 $self->lock_collection($username, $collect);
1860
1861 $self->_set_import_metadata(@_);
1862
1863 # Release the lock once it is done
1864 $self->unlock_collection($username, $collect);
1865
1866}
1867
1868sub set_import_metadata_array
1869{
1870 my $self = shift @_;
1871
1872 my $username = $self->{'username'};
1873 my $collect = $self->{'collect'};
1874 my $gsdl_cgi = $self->{'gsdl_cgi'};
1875# my $gsdlhome = $self->{'gsdlhome'};
1876
1877 if ($baseaction::authentication_enabled) {
1878 # Ensure the user is allowed to edit this collection
1879 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1880 }
1881
1882 my $site = $self->{'site'};
1883 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1884
1885 $gsdl_cgi->checked_chdir($collect_dir);
1886
1887 # Make sure the collection isn't locked by someone else
1888 $self->lock_collection($username, $collect);
1889
1890 $self->_set_import_metadata_array(@_);
1891
1892 # Release the lock once it is done
1893 $self->unlock_collection($username, $collect);
1894
1895}
1896
1897
1898sub _set_import_metadata_array
1899{
1900 my $self = shift @_;
1901
1902 my $collect = $self->{'collect'};
1903 my $gsdl_cgi = $self->{'gsdl_cgi'};
1904
1905 my $site = $self->{'site'};
1906 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1907
1908 # look up additional args
1909
1910 my $infodbtype = $self->{'infodbtype'};
1911
1912 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1913 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1914
1915 my $json_str = $self->{'json'};
1916 my $doc_array = decode_json $json_str;
1917
1918 my $global_status = 0;
1919 my $global_mess = "";
1920
1921 my @all_docids = ();
1922
1923 foreach my $doc_array_rec ( @$doc_array )
1924 {
1925 my $status = -1;
1926 my $docid = $doc_array_rec->{'docid'};
1927
1928 my ($docid_root,$docid_secnum);
1929 if(defined $docid) {
1930 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1931 # as yet no support for setting subsection metadata in metadata.xml
1932 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
1933 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
1934 next; # skip this docid in for loop
1935 }
1936 }
1937
1938 push(@all_docids,$docid); # docid_root rather
1939
1940 my $metaname = $doc_array_rec->{'metaname'};
1941 if (defined $metaname) {
1942 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1943 my $metavalue = $doc_array_rec->{'metavalue'};
1944 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1945
1946 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1947 # make "accumulate" the default (less destructive, as it won't actually
1948 # delete any existing values)
1949 $metamode = "accumulate";
1950 }
1951
1952 # adding metapos and prevmetavalue support to import_metadata subroutines
1953 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1954 my $prevmetavalue = $self->{'prevmetavalue'};
1955
1956 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1957
1958 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1959 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1960
1961 foreach my $metatable_rec ( @$metatable ) {
1962 $metaname = $metatable_rec->{'metaname'};
1963 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1964 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1965 # make "accumulate" the default (less destructive, as it won't actually
1966 # delete any existing values)
1967 $metamode = "accumulate";
1968 }
1969
1970 # No support for metapos and prevmetavalue in the JSON metatable substructure
1971 my $metapos = undef;
1972 my $prevmetavalue = undef;
1973 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1974
1975 foreach my $metavalue ( @$metavals ) {
1976 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1977
1978 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1979 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
1980 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1981 # metavalue for this metaname will be lost
1982 $metamode = "accumulate";
1983 }
1984 }
1985 }
1986 }
1987 }
1988
1989 # always a success message
1990 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1991 $gsdl_cgi->generate_ok_message($mess);
1992}
1993
1994# always returns true (1)
1995sub set_import_metadata_entry
1996{
1997 my $self = shift @_;
1998 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
1999
2000 my $info_mess = <<RAWEND;
2001****************************
2002 set_import_metadata_entry()
2003****************************
2004RAWEND
2005
2006 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2007 $info_mess .= " collect = $collect\n" if defined($collect);
2008 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
2009 $info_mess .= " arcinfo_doc_filename = $arcinfo_doc_filename\n" if defined($arcinfo_doc_filename);
2010 $info_mess .= " docid = $docid\n" if defined($docid);
2011 $info_mess .= " metaname = $metaname\n" if defined($metaname);
2012 $info_mess .= " metapos = $metapos\n" if defined($metapos);
2013 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
2014 $info_mess .= " metamode = $metamode\n" if defined($metamode);
2015 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2016
2017 $info_mess .= "****************************\n";
2018
2019 $gsdl_cgi->generate_message($info_mess);
2020
2021 # import works with metadata.xml which can have inherited metadata
2022 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2023 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2024 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2025 # what they're doing if they provide a metapos.
2026 if(defined $metapos) {
2027 print STDERR "@@@@ WARNING: metapos defined.\n";
2028 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2029 }
2030
2031 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2032 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2033 my $metadata_xml_file;
2034 my $import_filename = undef;
2035
2036 if (defined $docid) {
2037 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2038 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2039
2040 # This now stores the full pathname
2041 $import_filename = $doc_rec->{'src-file'}->[0];
2042 $import_filename = &util::placeholders_to_abspath($import_filename);
2043
2044 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2045 # as the array version of the method doesn't support the -f parameter yet
2046 my $import_file = $self->{'f'};
2047 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2048 }
2049
2050 # figure out correct metadata.xml file [?]
2051 # Assuming the metadata.xml file is next to the source file
2052 # Note: This will not work if it is using the inherited metadata from the parent folder
2053 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2054 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2055
2056 # If we're overriding everything, then $prevmetavalue=undefined and
2057 # $metamode=override combined with $metapos=undefined
2058 # in which case we need to remove all metavalues for the metaname at the given (sub)section
2059 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2060 if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2061## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2062 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2063
2064 }
2065
2066 # Edit the metadata.xml
2067 # Modified by Jeffrey from DL Consulting
2068 # Handle the case where there is one metadata.xml file for multiple FileSets
2069 # The XML filter needs to know whether it is in the right FileSet
2070 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2071 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2072 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2073 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2074 #return 0;
2075 return $metadata_xml_filename;
2076}
2077
2078sub _remove_import_metadata
2079{
2080 my $self = shift @_;
2081
2082 my $collect = $self->{'collect'};
2083 my $gsdl_cgi = $self->{'gsdl_cgi'};
2084# my $gsdlhome = $self->{'gsdlhome'};
2085 my $infodbtype = $self->{'infodbtype'};
2086
2087 # Obtain the collect dir
2088 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2089 my $site = $self->{'site'};
2090 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2091
2092 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2093 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2094
2095 # look up additional args
2096 my $docid = $self->{'d'};
2097 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2098 {
2099 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2100 }
2101
2102 my $metaname = $self->{'metaname'};
2103 my $metapos = $self->{'metapos'};
2104 my $metavalue = $self->{'metavalue'};
2105
2106 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2107 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/));
2108
2109 if(defined $metavalue) { # metavalue is now a compulsory arg for remove_import_metadata()
2110 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2111 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2112 $metapos = 0;
2113 }
2114 my $metamode = $self->{'metamode'};
2115 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
2116
2117 # import works with metadata.xml which can have inherited metadata
2118 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2119 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2120 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2121 # what they're doing if they provide a metapos.
2122 if(defined $metapos) {
2123 print STDERR "@@@@ WARNING: metapos defined.\n";
2124 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2125 }
2126
2127 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2128 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2129 my $metadata_xml_file;
2130 my $import_filename = undef;
2131 if (defined $docid)
2132 {
2133 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2134 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2135
2136 # This now stores the full pathname
2137 $import_filename = $doc_rec->{'src-file'}->[0];
2138 $import_filename = &util::placeholders_to_abspath($import_filename);
2139 }
2140
2141 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2142 {
2143 $gsdl_cgi->generate_error("There is no metadata\n");
2144 }
2145
2146 # figure out correct metadata.xml file [?]
2147 # Assuming the metadata.xml file is next to the source file
2148 # Note: This will not work if it is using the inherited metadata from the parent folder
2149 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2150 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2151
2152 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2153
2154 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2155 $mess .= " $metaname";
2156 $mess .= " = $metavalue\n";
2157
2158 $gsdl_cgi->generate_ok_message($mess);
2159
2160 #return $status; # in case calling functions have a use for this
2161}
2162
2163sub remove_import_metadata
2164{
2165 my $self = shift @_;
2166
2167 my $username = $self->{'username'};
2168 my $collect = $self->{'collect'};
2169 my $gsdl_cgi = $self->{'gsdl_cgi'};
2170
2171 if ($baseaction::authentication_enabled) {
2172 # Ensure the user is allowed to edit this collection
2173 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2174 }
2175
2176 # Make sure the collection isn't locked by someone else
2177 $self->lock_collection($username, $collect);
2178
2179 $self->_remove_import_metadata(@_);
2180
2181 # Release the lock once it is done
2182 $self->unlock_collection($username, $collect);
2183
2184}
2185
2186sub remove_from_metadata_xml
2187{
2188 my $self = shift @_;
2189 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2190 # metamode generally has no meaning for removing meta, but is used by set_meta
2191 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2192
2193 # Set the call-back functions for the metadata tags
2194 my @rules =
2195 (
2196 _default => 'raw',
2197 'Metadata' => \&rfmxml_metadata,
2198 'FileName' => \&mxml_filename
2199 );
2200
2201 my $parser = XML::Rules->new
2202 (
2203 rules => \@rules,
2204 style => 'filter',
2205 output_encoding => 'utf8',
2206 #normalisespaces => 1,
2207 stripspaces => 2|0|0 # ineffectual
2208 );
2209
2210 my $xml_in = "";
2211 if (!open(MIN,"<$metadata_xml_filename"))
2212 {
2213 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2214 }
2215 else
2216 {
2217 # Read them in
2218 my $line;
2219 while (defined ($line=<MIN>)) {
2220 $xml_in .= $line;
2221 }
2222 close(MIN);
2223
2224 # Filter with the call-back functions
2225 my $xml_out = "";
2226
2227 my $MOUT;
2228 if (!open($MOUT,">$metadata_xml_filename")) {
2229 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2230 }
2231 else {
2232 binmode($MOUT,":utf8");
2233 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2234 close($MOUT);
2235 }
2236 }
2237}
2238
2239sub rfmxml_metadata
2240{
2241 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2242
2243 # metadata.xml does not handle subsections
2244
2245 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2246 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2247 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2248 {
2249 if (!defined $parser->{'parameters'}->{'poscount'})
2250 {
2251 $parser->{'parameters'}->{'poscount'} = 0;
2252 }
2253 else
2254 {
2255 $parser->{'parameters'}->{'poscount'}++;
2256 }
2257
2258 # if overriding but no metapos, then clear all the meta for this metaname
2259 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
2260 return [];
2261 }
2262
2263 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2264 {
2265 return [];
2266 }
2267 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2268 {
2269 return [];
2270 }
2271 }
2272
2273 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2274 return [$tagname => $attrHash];
2275}
2276
2277sub _remove_live_metadata
2278{
2279 my $self = shift @_;
2280
2281 my $collect = $self->{'collect'};
2282 my $gsdl_cgi = $self->{'gsdl_cgi'};
2283# my $gsdlhome = $self->{'gsdlhome'};
2284 my $infodbtype = $self->{'infodbtype'};
2285
2286 # Obtain the collect dir
2287 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2288 my $site = $self->{'site'};
2289 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2290
2291
2292 # look up additional args
2293 my $docid = $self->{'d'};
2294 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2295 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2296 }
2297
2298 # Generate the dbkey
2299 my $metaname = $self->{'metaname'};
2300 my $dbkey = "$docid.$metaname";
2301
2302 # To people who know $collect_tail please add some comments
2303 # Obtain the live gdbm_db path
2304 my $collect_tail = $collect;
2305 $collect_tail =~ s/^.*[\/|\\]//;
2306 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2307 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2308
2309 # Remove the key
2310 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2311 my $status = system($cmd);
2312 if ($status != 0) {
2313 # Catch error if gdbmdel failed
2314 my $mess = "Failed to set metadata key: $dbkey\n";
2315
2316 $mess .= "PATH: $ENV{'PATH'}\n";
2317 $mess .= "cmd = $cmd\n";
2318 $mess .= "Exit status: $status\n";
2319 $mess .= "System Error Message: $!\n";
2320
2321 $gsdl_cgi->generate_error($mess);
2322 }
2323 else {
2324 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2325 }
2326
2327}
2328
2329sub remove_live_metadata
2330{
2331 my $self = shift @_;
2332
2333 my $username = $self->{'username'};
2334 my $collect = $self->{'collect'};
2335 my $gsdl_cgi = $self->{'gsdl_cgi'};
2336 my $gsdlhome = $self->{'gsdlhome'};
2337
2338 if ($baseaction::authentication_enabled) {
2339 # Ensure the user is allowed to edit this collection
2340 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2341 }
2342
2343 # Make sure the collection isn't locked by someone else
2344 $self->lock_collection($username, $collect);
2345
2346 $self->_remove_live_metadata(@_);
2347
2348 $self->unlock_collection($username, $collect);
2349}
2350
2351sub remove_metadata
2352{
2353 my $self = shift @_;
2354
2355 my $where = $self->{'where'};
2356 if(!$where || ($where =~ m/^\s*$/)) {
2357 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2358 return;
2359 }
2360
2361 my $username = $self->{'username'};
2362 my $collect = $self->{'collect'};
2363 my $gsdl_cgi = $self->{'gsdl_cgi'};
2364
2365 if ($baseaction::authentication_enabled) {
2366 # Ensure the user is allowed to edit this collection
2367 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2368 }
2369
2370 # Make sure the collection isn't locked by someone else
2371 $self->lock_collection($username, $collect);
2372
2373 # check which directories need to be processed, specified in $where as
2374 # any combination of import|archives|index|live
2375 if($where =~ m/import/) {
2376 $self->_remove_import_metadata(@_);
2377 }
2378 if($where =~ m/archives/) {
2379 $self->_remove_archives_metadata(@_);
2380 }
2381 if($where =~ m/index/) {
2382 $self->_remove_index_metadata(@_);
2383 }
2384
2385 # Release the lock once it is done
2386 $self->unlock_collection($username, $collect);
2387}
2388
2389# the internal version, without authentication
2390sub _remove_index_metadata
2391{
2392 my $self = shift @_;
2393
2394 my $collect = $self->{'collect'};
2395 my $gsdl_cgi = $self->{'gsdl_cgi'};
2396# my $gsdlhome = $self->{'gsdlhome'};
2397 my $infodbtype = $self->{'infodbtype'};
2398
2399 # Obtain the collect dir
2400 my $site = $self->{'site'};
2401 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2402 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2403
2404 # look up additional args
2405 my $docid = $self->{'d'};
2406 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2407 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2408 }
2409 my $metaname = $self->{'metaname'};
2410 my $metapos = $self->{'metapos'};
2411 my $metavalue = $self->{'metavalue'};
2412
2413 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2414 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
2415
2416 # To people who know $collect_tail please add some comments
2417 # -> In collection groups, I think collect_tailname is the subcollection name,
2418 # e.g. colgroup-name/col-tail-name
2419 # Obtain the path to the database
2420 my $collect_tail = $collect;
2421 $collect_tail =~ s/^.*[\/|\\]//;
2422 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2423 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2424
2425 # Read the docid entry
2426 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2427
2428 # Check to make sure the key does exist
2429 if (!defined ($doc_rec->{$metaname})) {
2430 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2431 }
2432
2433 # Obtain the specified metadata pos
2434 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2435 if(!defined $metapos && !defined $metavalue) {
2436 $metapos = 0;
2437 }
2438
2439
2440 # consider check key is defined before deleting?
2441 # Loop through the metadata array and ignore the specified position
2442 my $filtered_metadata = [];
2443 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2444 for (my $i=0; $i<$num_metadata_vals; $i++) {
2445 my $metaval = shift(@{$doc_rec->{$metaname}});
2446
2447 if (!defined $metavalue && $i != $metapos) {
2448 push(@$filtered_metadata,$metaval);
2449 }
2450
2451 if(defined $metavalue && !($metavalue eq $metaval))
2452 {
2453 push(@$filtered_metadata,$metaval);
2454 }
2455 }
2456 $doc_rec->{$metaname} = $filtered_metadata;
2457
2458 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2459 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2460
2461 if ($status != 0) {
2462 my $mess = "Failed to set metadata key: $docid\n";
2463
2464 $mess .= "PATH: $ENV{'PATH'}\n";
2465 $mess .= "Exit status: $status\n";
2466 $mess .= "System Error Message: $!\n";
2467
2468 $gsdl_cgi->generate_error($mess);
2469 }
2470 else {
2471 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2472 $mess .= " $metaname";
2473 $mess .= "->[$metapos]" if (defined $metapos);
2474 $mess .= " ($metavalue)" if (defined $metavalue);
2475
2476 $gsdl_cgi->generate_ok_message($mess);
2477 }
2478
2479 #return $status; # in case calling functions have a use for this
2480}
2481
2482sub remove_index_metadata
2483{
2484 my $self = shift @_;
2485
2486 my $username = $self->{'username'};
2487 my $collect = $self->{'collect'};
2488 my $gsdl_cgi = $self->{'gsdl_cgi'};
2489# my $gsdlhome = $self->{'gsdlhome'};
2490
2491 if ($baseaction::authentication_enabled) {
2492 # Ensure the user is allowed to edit this collection
2493 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2494 }
2495
2496 # Obtain the collect dir
2497 my $site = $self->{'site'};
2498 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2499 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2500
2501 # Make sure the collection isn't locked by someone else
2502 $self->lock_collection($username, $collect);
2503
2504 $self->_remove_index_metadata(@_);
2505
2506 # Release the lock once it is done
2507 $self->unlock_collection($username, $collect);
2508}
2509
2510
2511# Was trying to reused the codes, but the functions need to be broken
2512# down more before they can be reused, otherwise there will be too
2513# much overhead and duplicate process...
2514sub insert_metadata
2515{
2516 my $self = shift @_;
2517
2518 my $username = $self->{'username'};
2519 my $collect = $self->{'collect'};
2520 my $gsdl_cgi = $self->{'gsdl_cgi'};
2521 my $gsdlhome = $self->{'gsdlhome'};
2522 my $infodbtype = $self->{'infodbtype'};
2523
2524 # If the import metadata and gdbm database have been updated, we
2525 # need to insert some notification to warn user that the the text
2526 # they see at the moment is not indexed and require a rebuild.
2527 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2528
2529 if ($baseaction::authentication_enabled) {
2530 # Ensure the user is allowed to edit this collection
2531 $self->authenticate_user($username, $collect);
2532 }
2533
2534 # Obtain the collect and archive dir
2535 my $site = $self->{'site'};
2536 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2537 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2538 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2539
2540 # Make sure the collection isn't locked by someone else
2541 $self->lock_collection($username, $collect);
2542
2543 # Check additional args
2544 my $docid = $self->{'d'};
2545 if (!defined($docid) || ($docid =~ m/^\s*$/)) {
2546 $gsdl_cgi->generate_error("No document id is specified: d=...");
2547 }
2548 my $metaname = $self->{'metaname'};
2549 if (!defined($metaname) || ($metaname =~ m/^\s*$/)) {
2550 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2551 }
2552 my $metavalue = $self->{'metavalue'};
2553 if (!defined($metavalue) || ($metavalue =~ m/^\s*$/)) {
2554 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2555 }
2556 # make "accumulate" the default (less destructive, as it won't actually
2557 # delete any existing values)
2558 my $metamode = "accumulate";
2559
2560 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
2561 my $metapos = undef;
2562 my $prevmetavalue = undef;
2563
2564 #=======================================================================#
2565 # set_import_metadata [START]
2566 #=======================================================================#
2567 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2568 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2569 my $metadata_xml_file;
2570 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2571 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2572
2573 # This now stores the full pathname
2574 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2575 $import_filename = &util::placeholders_to_abspath($import_filename);
2576
2577 # figure out correct metadata.xml file [?]
2578 # Assuming the metadata.xml file is next to the source file
2579 # Note: This will not work if it is using the inherited metadata from the parent folder
2580 my ($import_tailname, $import_dirname)
2581 = File::Basename::fileparse($import_filename);
2582 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2583
2584 # Shane's escape characters
2585 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2586 $metavalue =~ s/\,/&#44;/g;
2587 $metavalue =~ s/\:/&#58;/g;
2588 $metavalue =~ s/\|/&#124;/g;
2589 $metavalue =~ s/\(/&#40;/g;
2590 $metavalue =~ s/\)/&#41;/g;
2591 $metavalue =~ s/\[/&#91;/g;
2592 $metavalue =~ s/\\/&#92;/g;
2593 $metavalue =~ s/\]/&#93;/g;
2594 $metavalue =~ s/\{/&#123;/g;
2595 $metavalue =~ s/\}/&#125;/g;
2596 $metavalue =~ s/\"/&#34;/g;
2597 $metavalue =~ s/\`/&#96;/g;
2598 $metavalue =~ s/\n/_newline_/g;
2599
2600 # Edit the metadata.xml
2601 # Modified by Jeffrey from DL Consulting
2602 # Handle the case where there is one metadata.xml file for multiple FileSets
2603 # The XML filter needs to know whether it is in the right FileSet
2604 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2605 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2606 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2607 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2608 #=======================================================================#
2609 # set_import_metadata [END]
2610 #=======================================================================#
2611
2612
2613 #=======================================================================#
2614 # set_metadata (accumulate version) [START]
2615 #=======================================================================#
2616 # To people who know $collect_tail please add some comments
2617 # Obtain path to the database
2618 my $collect_tail = $collect;
2619 $collect_tail =~ s/^.*[\/|\\]//;
2620 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2621 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2622
2623 # Read the docid entry
2624 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2625
2626 # Protect the quotes
2627 $metavalue =~ s/\"/\\\"/g;
2628
2629 # Adds the pending macro
2630 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2631
2632 # If the metadata doesn't exist, create a new one
2633 if (!defined($doc_rec->{$metaname})){
2634 $doc_rec->{$metaname} = [ $macro_metavalue ];
2635 }
2636 # Else, let's acculumate the values
2637 else {
2638 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2639 }
2640
2641 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2642 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2643
2644 if ($status != 0) {
2645 # Catch error if gdbmget failed
2646 my $mess = "Failed to set metadata key: $docid\n";
2647
2648 $mess .= "PATH: $ENV{'PATH'}\n";
2649 $mess .= "Exit status: $status\n";
2650 $mess .= "System Error Message: $!\n";
2651
2652 $gsdl_cgi->generate_error($mess);
2653 }
2654 else {
2655 my $mess = "insert-metadata successful: Key[$docid]\n";
2656 $mess .= " [In metadata.xml] $metaname";
2657 $mess .= " = $metavalue\n";
2658 $mess .= " [In database] $metaname";
2659 $mess .= " = $macro_metavalue\n";
2660 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2661 $gsdl_cgi->generate_ok_message($mess);
2662 }
2663 #=======================================================================#
2664 # set_metadata (accumulate version) [END]
2665 #=======================================================================#
2666
2667 # Release the lock once it is done
2668 $self->unlock_collection($username, $collect);
2669}
2670
2671# not returning 1; here since this file is conditionally included by metadataction.pm
Note: See TracBrowser for help on using the repository browser.