source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27176

Last change on this file since 27176 was 27176, checked in by ak19, 11 years ago

import_meta functions are complete and tested now in this 2nd commit for them which primarily adds metapos and prevmetavalue for all the import_meta subroutines. Further corrections and cleaning up are part of this commit as well, including removing the append flag sent to gdbmset methods called by the live_meta functions, as the live meta subroutines seem to accumulate (append) by default for new metanames else overwrite existing ones if they have matching metanames.

File size: 99.9 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ "metapos" ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-index-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
67
68 #SET METHODS
69 "set-live-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ ] },
72
73 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
74 'compulsory-args' => [ "metaname", "metavalue" ],
75 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
76
77 "set-index-metadata" => {
78 'compulsory-args' => [ "d", "metaname", "metavalue" ],
79 'optional-args' => [ "metapos", "metamode" ] },
80
81 "set-archives-metadata" => {
82 'compulsory-args' => [ "d", "metaname", "metavalue" ],
83 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
84
85 "set-import-metadata" => {
86 'compulsory-args' => [ "metaname", "metavalue" ],
87 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
88
89 #SET METHODS (ARRAY)
90 "set-metadata-array" => {
91 'compulsory-args' => [ "where", "json" ],
92 'optional-args' => [ ] },
93
94 "set-archives-metadata-array" => {
95 'compulsory-args' => [ "json" ],
96 'optional-args' => [ ] },
97
98 "set-import-metadata-array" => {
99 'compulsory-args' => [ "json" ],
100 'optional-args' => [ ] },
101
102 "set-index-metadata-array" => {
103 'compulsory-args' => [ "json" ],
104 'optional-args' => [ ] },
105
106 "set-live-metadata-array" => {
107 'compulsory-args' => [ "json" ],
108 'optional-args' => [ ] },
109
110 #REMOVE METHODS
111 "remove-import-metadata" => {
112 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
113 'optional-args' => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
114
115 "remove-archives-metadata" => {
116 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
117 'optional-args' => [ "metapos", "metavalue" ] },
118
119 "remove-live-metadata" => {
120 'compulsory-args' => [ "d", "metaname" ],
121 'optional-args' => [ ] },
122
123 "remove-index-metadata" => {
124 'compulsory-args' => [ "d", "metaname" ],
125 'optional-args' => [ "metapos", "metavalue" ] },
126
127 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
128 'compulsory-args' => [ "d", "metaname" ],
129 'optional-args' => [ "where", "metapos", "metavalue" ] },
130
131 #INSERT METHODS
132 "insert-metadata" => {
133 'compulsory-args' => [ "d", "metaname", "metavalue" ],
134 'optional-args' => [ ] }
135};
136
137
138sub new
139{
140 my $class = shift (@_);
141 my ($gsdl_cgi,$iis6_mode) = @_;
142
143 # Treat metavalue specially. To transmit this through a GET request
144 # the Javascript side has url-encoded it, so here we need to decode
145 # it before proceeding
146
147 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
148 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
149 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
150
151 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
152 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
153
154 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
155
156 return bless $self, $class;
157}
158
159
160sub get_live_metadata
161{
162 my $self = shift @_;
163
164 my $username = $self->{'username'};
165 my $collect = $self->{'collect'};
166 my $gsdl_cgi = $self->{'gsdl_cgi'};
167 my $gsdlhome = $self->{'gsdlhome'};
168 my $infodbtype = $self->{'infodbtype'};
169
170 # live metadata gets/saves value scoped (prefixed) by the current usename
171 # so (for now) let's not bother to enforce authentication
172
173 # Obtain the collect dir
174 my $site = $self->{'site'};
175 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
176 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
177
178 # Make sure the collection isn't locked by someone else
179 $self->lock_collection($username, $collect);
180
181 # look up additional args
182 my $docid = $self->{'d'};
183 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
184 $gsdl_cgi->generate_error("No docid (d=...) specified.");
185 }
186
187 # Generate the dbkey
188 my $metaname = $self->{'metaname'};
189 my $dbkey = "$docid.$metaname";
190
191 # To people who know $collect_tail please add some comments
192 # Obtain path to the database
193 my $collect_tail = $collect;
194 $collect_tail =~ s/^.*[\/|\\]//;
195 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
196 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
197
198 # Obtain the content of the key
199 my $cmd = "gdbmget $infodb_file_path $dbkey";
200 if (open(GIN,"$cmd |") == 0) {
201 # Catch error if gdbmget failed
202 my $mess = "Failed to get metadata key: $metaname\n";
203 $mess .= "$!\n";
204
205 $gsdl_cgi->generate_error($mess);
206 }
207 else {
208 binmode(GIN,":utf8");
209 # Read everything in and concatenate them into $metavalue
210 my $metavalue = "";
211 my $line;
212 while (defined ($line=<GIN>)) {
213 $metavalue .= $line;
214 }
215 close(GIN);
216 chomp($metavalue); # Get rid off the tailing newlines
217 $gsdl_cgi->generate_ok_message("$metavalue");
218 }
219
220 # Release the lock once it is done
221 $self->unlock_collection($username, $collect);
222}
223
224# just calls the index version
225sub get_metadata
226{
227 my $self = shift @_;
228 $self->get_index_metadata(@_);
229}
230
231sub get_index_metadata
232{
233 my $self = shift @_;
234
235 my $username = $self->{'username'};
236 my $collect = $self->{'collect'};
237 my $gsdl_cgi = $self->{'gsdl_cgi'};
238 my $gsdlhome = $self->{'gsdlhome'};
239
240 # Authenticate user if it is enabled
241 if ($baseaction::authentication_enabled) {
242 # Ensure the user is allowed to edit this collection
243 &authenticate_user($gsdl_cgi, $username, $collect);
244 }
245
246 # Obtain the collect dir
247 my $site = $self->{'site'};
248 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
249 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
250
251 # Make sure the collection isn't locked by someone else
252 $self->lock_collection($username, $collect);
253
254 # look up additional args
255 my $docid = $self->{'d'};
256 my $metaname = $self->{'metaname'};
257 my $metapos = $self->{'metapos'};
258 my $infodbtype = $self->{'infodbtype'};
259
260 # To people who know $collect_tail please add some comments
261 # Obtain path to the database
262 my $collect_tail = $collect;
263 $collect_tail =~ s/^.*[\/\\]//;
264 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
265 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
266
267 # Read the docid entry
268 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
269
270 # Basically loop through and unescape_html the values
271 foreach my $k (keys %$doc_rec) {
272 my @escaped_v = ();
273 foreach my $v (@{$doc_rec->{$k}}) {
274 my $ev = &ghtml::unescape_html($v);
275 push(@escaped_v, $ev);
276 }
277 $doc_rec->{$k} = \@escaped_v;
278 }
279
280 # Obtain the specified metadata value
281 $metapos = 0 if (!defined $metapos);
282 my $metavalue = $doc_rec->{$metaname}->[$metapos];
283 $gsdl_cgi->generate_ok_message("$metavalue");
284
285 # Release the lock once it is done
286 $self->unlock_collection($username, $collect);
287}
288
289
290sub get_import_metadata
291{
292 my $self = shift @_;
293
294 my $username = $self->{'username'};
295 my $collect = $self->{'collect'};
296 my $gsdl_cgi = $self->{'gsdl_cgi'};
297 my $gsdlhome = $self->{'gsdlhome'};
298
299 # Authenticate user if it is enabled
300 if ($baseaction::authentication_enabled) {
301 # Ensure the user is allowed to edit this collection
302 &authenticate_user($gsdl_cgi, $username, $collect);
303 }
304
305 # Obtain the collect dir
306 my $site = $self->{'site'};
307 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
308 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
309
310 # Make sure the collection isn't locked by someone else
311 $self->lock_collection($username, $collect);
312
313 # look up additional args
314 my $docid = $self->{'d'};
315 my $metaname = $self->{'metaname'};
316 my $metapos = $self->{'metapos'};
317 $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
318
319 my $infodbtype = $self->{'infodbtype'};
320 if (!defined $docid)
321 {
322 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
323 }
324
325 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
326 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
327 my $metadata_xml_file;
328 my $import_filename = undef;
329
330
331 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
332 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
333 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
334
335 # This now stores the full pathname
336 $import_filename = $doc_rec->{'src-file'}->[0];
337
338 # figure out correct metadata.xml file [?]
339 # Assuming the metadata.xml file is next to the source file
340 # Note: This will not work if it is using the inherited metadata from the parent folder
341 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
342 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
343
344 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
345
346 # Release the lock once it is done
347 $self->unlock_collection($username, $collect);
348}
349
350sub get_metadata_from_metadata_xml
351{
352 my $self = shift @_;
353 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
354
355 my @rules =
356 (
357 _default => 'raw',
358 'Metadata' => \&gfmxml_metadata,
359 'FileName' => \&mxml_filename
360 );
361
362 my $parser = XML::Rules->new
363 (
364 rules => \@rules,
365 output_encoding => 'utf8'
366 );
367
368 my $xml_in = "";
369 if (!open(MIN,"<$metadata_xml_filename"))
370 {
371 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
372 }
373 else
374 {
375 # Read them in
376 my $line;
377 while (defined ($line=<MIN>)) {
378 $xml_in .= $line;
379 }
380 close(MIN);
381
382 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
383
384 if(defined $parser->{'pad'}->{'metavalue'})
385 {
386 return $parser->{'pad'}->{'metavalue'};
387 }
388 else
389 {
390 return "";
391 }
392 }
393}
394
395sub gfmxml_metadata
396{
397 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
398
399 # no subsection support yet in metadata.xml
400
401 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
402 {
403 if (!defined $parser->{'parameters'}->{'poscount'})
404 {
405 $parser->{'parameters'}->{'poscount'} = 0;
406 }
407 else
408 {
409 $parser->{'parameters'}->{'poscount'}++;
410 }
411
412 # gets the first value by default, since metapos defaults to 0
413 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
414 {
415 if($parser->{'parameters'}->{'metapos'} > 0) {
416 print STDERR "@@@@ WARNING: non-zero metapos.\n";
417 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at".$parser->{'parameters'}->{'metapos'}.".\n";
418 }
419 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
420 }
421 }
422}
423
424sub get_archives_metadata
425{
426 my $self = shift @_;
427
428 my $username = $self->{'username'};
429 my $collect = $self->{'collect'};
430 my $gsdl_cgi = $self->{'gsdl_cgi'};
431 my $gsdlhome = $self->{'gsdlhome'};
432 my $infodbtype = $self->{'infodbtype'};
433
434 # Authenticate user if it is enabled
435 if ($baseaction::authentication_enabled) {
436 # Ensure the user is allowed to edit this collection
437 &authenticate_user($gsdl_cgi, $username, $collect);
438 }
439
440 # Obtain the collect dir
441 my $site = $self->{'site'};
442 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
443
444 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
445
446 # Make sure the collection isn't locked by someone else
447 $self->lock_collection($username, $collect);
448
449 # look up additional args
450 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
451 $docid_secnum = "" if (!defined $docid_secnum);
452
453 my $metaname = $self->{'metaname'};
454 my $metapos = $self->{'metapos'};
455 $metapos = 0 if (!defined $metapos);
456
457 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
458 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
459
460 # This now stores the full pathname
461 my $doc_filename = $doc_rec->{'doc-file'}->[0];
462
463 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
464
465 # Release the lock once it is done
466 $self->unlock_collection($username, $collect);
467}
468
469sub get_metadata_from_archive_xml
470{
471 my $self = shift @_;
472 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
473
474 my @start_rules = ('Section' => \&dxml_start_section);
475
476 my @rules =
477 (
478 _default => 'raw',
479 'Metadata' => \&gfdxml_metadata
480 );
481
482 my $parser = XML::Rules->new
483 (
484 start_rules => \@start_rules,
485 rules => \@rules,
486 output_encoding => 'utf8'
487 );
488
489 my $xml_in = "";
490 if (!open(MIN,"<$doc_xml_filename"))
491 {
492 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
493 }
494 else
495 {
496 # Read them in
497 my $line;
498 while (defined ($line=<MIN>)) {
499 $xml_in .= $line;
500 }
501 close(MIN);
502
503 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
504
505 if(defined $parser->{'pad'}->{'metavalue'})
506 {
507 return $parser->{'pad'}->{'metavalue'};
508 }
509 else
510 {
511 return "";
512 }
513 }
514}
515
516sub gfdxml_metadata
517{
518 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
519
520 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
521 {
522 return;
523 }
524
525 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
526 {
527 if (!defined $parser->{'parameters'}->{'poscount'})
528 {
529 $parser->{'parameters'}->{'poscount'} = 0;
530 }
531 else
532 {
533 $parser->{'parameters'}->{'poscount'}++;
534 }
535 }
536
537 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
538 {
539 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
540 }
541}
542
543sub _set_live_metadata
544{
545 my $self = shift @_;
546
547 my $collect = $self->{'collect'};
548 my $gsdl_cgi = $self->{'gsdl_cgi'};
549 #my $gsdlhome = $self->{'gsdlhome'};
550 my $infodbtype = $self->{'infodbtype'};
551
552 # Obtain the collect dir
553 my $site = $self->{'site'};
554 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
555 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
556
557
558 # look up additional args
559 my $docid = $self->{'d'};
560 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
561 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
562 }
563 my $metavalue = $self->{'metavalue'};
564
565 # Generate the dbkey
566 my $metaname = $self->{'metaname'};
567 my $dbkey = "$docid.$metaname";
568
569 # To people who know $collect_tail please add some comments
570 # Obtain path to the database
571 my $collect_tail = $collect;
572 $collect_tail =~ s/^.*[\/\\]//;
573 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
574 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
575
576 # Set the new value
577 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
578 my $status = system($cmd);
579 if ($status != 0) {
580 # Catch error if gdbmget failed
581 my $mess = "Failed to set metadata key: $dbkey\n";
582
583 $mess .= "PATH: $ENV{'PATH'}\n";
584 $mess .= "cmd = $cmd\n";
585 $mess .= "Exit status: $status\n";
586 $mess .= "System Error Message: $!\n";
587
588 $gsdl_cgi->generate_error($mess);
589 }
590 else {
591 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
592 }
593
594 #return $status; # in case calling functions have any further use for this
595}
596
597sub set_live_metadata
598{
599 my $self = shift @_;
600
601 my $username = $self->{'username'};
602 my $collect = $self->{'collect'};
603 my $gsdl_cgi = $self->{'gsdl_cgi'};
604
605 if ($baseaction::authentication_enabled) {
606 # Ensure the user is allowed to edit this collection
607 &authenticate_user($gsdl_cgi, $username, $collect);
608 }
609
610 # Make sure the collection isn't locked by someone else
611 $self->lock_collection($username, $collect);
612
613 $self->_set_live_metadata(@_);
614
615 # Release the lock once it is done
616 $self->unlock_collection($username, $collect);
617}
618
619sub set_index_metadata_entry
620{
621 my $self = shift @_;
622 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
623
624 # To people who know $collect_tail please add some comments
625 # Obtain path to the database
626 my $collect_tail = $collect;
627 $collect_tail =~ s/^.*[\/\\]//;
628 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
629 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
630
631# print STDERR "**** infodb file path = $infodb_file_path\n";
632# print STDERR "***** infodb type = $infodbtype\n";
633
634 # Read the docid entry
635 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
636
637 # Set the metadata value
638 if (defined $metapos) {
639 # if metamode=accumulate AND metapos, warn user and then use metapos
640 if (defined $metamode && $metamode eq "accumulate") {
641 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
642 print STDERR "**** Proceeding by using metapos\n";
643 }
644 $doc_rec->{$metaname}->[$metapos] = $metavalue;
645 }
646 elsif (defined $metamode && $metamode eq "override") {
647 $doc_rec->{$metaname} = [ $metavalue ];
648 }
649 else { # default for index was to override, but because accumulate is less destructive,
650 # and because accumulate is the default for archives and import, that's the new default for index too
651 if(defined $doc_rec->{$metaname}) {
652 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
653 } else {
654 $doc_rec->{$metaname} = [ $metavalue ];
655 }
656 }
657
658 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
659
660 return $status;
661
662}
663
664sub _set_import_metadata
665{
666 my $self = shift @_;
667
668 my $collect = $self->{'collect'};
669 my $gsdl_cgi = $self->{'gsdl_cgi'};
670 my $infodbtype = $self->{'infodbtype'};
671# my $gsdlhome = $self->{'gsdlhome'};
672
673 # Obtain the collect and archive dir
674 my $site = $self->{'site'};
675 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
676 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
677 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
678
679 # look up additional args
680 # want either d= or f=
681 my $docid = $self->{'d'};
682 my ($docid_root,$docid_secnum);
683 if(defined $docid) {
684 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
685 # as yet no support for setting subsection metadata in metadata.xml
686 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
687 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
688 return;
689 }
690 }
691
692 my $import_file = $self->{'f'};
693 if ((!defined $docid) && (!defined $import_file)) {
694 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
695 }
696
697 # Get the parameters and set default mode to "accumulate"
698 my $metaname = $self->{'metaname'};
699 my $metavalue = $self->{'metavalue'};
700## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
701 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
702
703 my $metamode = $self->{'metamode'};
704 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
705 # make "accumulate" the default (less destructive, as won't actually
706 # delete any existing values)
707 $metamode = "accumulate";
708 }
709
710 # adding metapos and prevmetavalue support to import_metadata subroutines
711 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
712 my $prevmetavalue = $self->{'prevmetavalue'};
713
714 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
715 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
716
717 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
718 $mess .= " $metaname";
719 $mess .= " = $metavalue";
720 $mess .= " ($metamode)\n";
721
722 $gsdl_cgi->generate_ok_message($mess);
723
724 #return $status; # in case calling functions have any further use for this
725}
726
727# the version of set_index_meta that doesn't do authentication
728sub _set_archives_metadata
729{
730 my $self = shift @_;
731
732 my $collect = $self->{'collect'};
733 my $gsdl_cgi = $self->{'gsdl_cgi'};
734 my $infodbtype = $self->{'infodbtype'};
735
736 # Obtain the collect and archive dir
737 my $site = $self->{'site'};
738 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
739 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
740
741 # look up additional args
742 my $docid = $self->{'d'};
743 my $metaname = $self->{'metaname'};
744 my $metavalue = $self->{'metavalue'};
745 my $prevmetavalue = $self->{'prevmetavalue'};
746
747 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
748 # Don't append "|| undef", since if metapos=0 it will then be set to undef
749
750 my $metamode = $self->{'metamode'};
751 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
752 # make "accumulate" the default (less destructive, as won't actually
753 # delete any existing values)
754 $metamode = "accumulate";
755 }
756
757 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
758 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
759
760 if ($status == 0) {
761 my $mess = "set-archives-metadata successful: Key[$docid]\n";
762 $mess .= " $metaname";
763 $mess .= "->[$metapos]" if (defined $metapos);
764 $mess .= " = $metavalue";
765 $mess .= " ($metamode)\n";
766
767 $gsdl_cgi->generate_ok_message($mess);
768 }
769 else {
770 my $mess .= "Failed to set archives metadata key: $docid\n";
771 $mess .= "Exit status: $status\n";
772 if(defined $self->{'error_msg'}) {
773 $mess .= "Error Message: $self->{'error_msg'}\n";
774 } else {
775 $mess .= "System Error Message: $!\n";
776 }
777 $mess .= "-" x 20 . "\n";
778
779 $gsdl_cgi->generate_error($mess);
780 }
781
782 #return $status; # in case calling functions have any further use for this
783}
784
785
786# the version of set_index_meta that doesn't do authentication
787sub _set_index_metadata
788{
789 my $self = shift @_;
790
791 my $collect = $self->{'collect'};
792 my $gsdl_cgi = $self->{'gsdl_cgi'};
793
794 my $site = $self->{'site'};
795 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
796
797 # look up additional args
798 my $docid = $self->{'d'};
799 my $metaname = $self->{'metaname'};
800 my $metapos = $self->{'metapos'}; # undef has meaning
801 my $metavalue = $self->{'metavalue'};
802 my $infodbtype = $self->{'infodbtype'};
803 my $metamode = $self->{'metamode'};
804
805 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
806
807 if ($status != 0) {
808 # Catch error if set infodb entry failed
809 my $mess = "Failed to set metadata key: $docid\n";
810
811 $mess .= "PATH: $ENV{'PATH'}\n";
812 $mess .= "Exit status: $status\n";
813 $mess .= "System Error Message: $!\n";
814
815 $gsdl_cgi->generate_error($mess);
816 }
817 else {
818 my $mess = "set-index-metadata successful: Key[$docid]\n";
819 $mess .= " $metaname";
820 $mess .= "->[$metapos]" if (defined $metapos);
821 $mess .= " = $metavalue\n";
822
823 $gsdl_cgi->generate_ok_message($mess);
824 }
825
826 #return $status; # in case calling functions have any further use for this
827}
828
829sub set_index_metadata
830{
831 my $self = shift @_;
832
833 my $username = $self->{'username'};
834 my $collect = $self->{'collect'};
835 my $gsdl_cgi = $self->{'gsdl_cgi'};
836 #my $gsdlhome = $self->{'gsdlhome'};
837
838 if ($baseaction::authentication_enabled) {
839 # Ensure the user is allowed to edit this collection
840 &authenticate_user($gsdl_cgi, $username, $collect);
841 }
842
843 my $site = $self->{'site'};
844 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
845
846 $gsdl_cgi->checked_chdir($collect_dir);
847
848 # Obtain the collect dir
849 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
850
851 # Make sure the collection isn't locked by someone else
852 $self->lock_collection($username, $collect);
853
854 $self->_set_index_metadata(@_);
855
856 # Release the lock once it is done
857 $self->unlock_collection($username, $collect);
858}
859
860# call this to set the metadata for a combination of dirs archives, import or index, or live
861# if none specified, defaults to index which was the original behaviour of set_metadata.
862sub set_metadata
863{
864 my $self = shift @_;
865
866 # Testing that not defining a variable, setting it to "" or to " " all return false
867 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
868
869 my $where = $self->{'where'};
870 if(!$where) {
871 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
872 return;
873 }
874
875 # authenticate and lock collection once, even if processing multiple dirs
876 my $username = $self->{'username'};
877 my $collect = $self->{'collect'};
878 my $gsdl_cgi = $self->{'gsdl_cgi'};
879
880 if ($baseaction::authentication_enabled) {
881 # Ensure the user is allowed to edit this collection
882 #&authenticate_user($gsdl_cgi, $username, $collect);
883 $self->authenticate_user($username, $collect);
884 }
885
886 if($where =~ m/index/) {
887 my $site = $self->{'site'};
888 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
889 $gsdl_cgi->checked_chdir($collect_dir);
890 }
891
892 # Make sure the collection isn't locked by someone else
893 $self->lock_collection($username, $collect);
894
895
896 # now at last can set the metadata. $where can specify multiple
897 # $where is of the form: import|archives|index, or a subset thereof
898
899 #my @whichdirs = split('\|', $where);
900
901 # just check whether $where contains import/archives/index/live in turn, and
902 # for each case, process it accordingly
903 if($where =~ m/import/) {
904 $self->_set_import_metadata(@_);
905 }
906
907 if($where =~ m/archives/) {
908
909 # look up docID arg which is optional to set_metadata because it's optional
910 # to set_import, but which is compulsory to set_archives_metadata
911 my $docid = $self->{'d'};
912 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
913 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
914 }
915 # we have a docid, so can set archives meta
916 $self->_set_archives_metadata(@_);
917 }
918
919 if($where =~ m/index/) {
920
921 # look up docID arg which is optional to set_metadata because it's optional
922 # to set_import, but which is compulsory to set_archives_metadata
923 my $docid = $self->{'d'};
924 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
925 $gsdl_cgi->generate_error("No docid (d=...) specified.");
926 }
927 # we have a docid, so can set index meta
928 $self->_set_index_metadata(@_);
929 }
930
931 if($where =~ m/live/) {
932 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
933 }
934
935 # Release the lock once it is done
936 $self->unlock_collection($username, $collect);
937}
938
939sub set_metadata_array
940{
941 my $self = shift @_;
942
943 my $where = $self->{'where'};
944 if(!$where) {
945 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
946 return;
947 }
948
949 my $username = $self->{'username'};
950 my $collect = $self->{'collect'};
951 my $gsdl_cgi = $self->{'gsdl_cgi'};
952
953 if ($baseaction::authentication_enabled) {
954 # Ensure the user is allowed to edit this collection
955 &authenticate_user($gsdl_cgi, $username, $collect);
956 }
957
958 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
959 # But including the stmt during this code reorganisation to preserve as-is what used to happen
960 my $site = $self->{'site'};
961 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
962 $gsdl_cgi->checked_chdir($collect_dir);
963
964 # Make sure the collection isn't locked by someone else
965 $self->lock_collection($username, $collect);
966
967 if($where =~ m/import/) {
968 $self->_set_import_metadata_array(@_);
969 }
970 if($where =~ m/archives/) {
971 $self->_set_archives_metadata_array(@_);
972 }
973 if($where =~ m/index/) {
974 $self->_set_index_metadata_array(@_);
975 }
976 if($where =~ m/live/) {
977 $self->_set_live_metadata_array(@_);
978 }
979
980 # Release the lock once it is done
981 $self->unlock_collection($username, $collect);
982}
983
984sub _set_index_metadata_array
985{
986 my $self = shift @_;
987
988 my $collect = $self->{'collect'};
989 my $gsdl_cgi = $self->{'gsdl_cgi'};
990 my $site = $self->{'site'};
991 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
992
993
994 # look up additional args
995
996 my $infodbtype = $self->{'infodbtype'};
997
998 my $json_str = $self->{'json'};
999 my $doc_array = decode_json $json_str;
1000
1001
1002 my $global_status = 0;
1003 my $global_mess = "";
1004
1005 my @all_docids = ();
1006
1007 foreach my $doc_array_rec ( @$doc_array ) {
1008
1009 my $status = -1;
1010 my $docid = $doc_array_rec->{'docid'};
1011
1012 push(@all_docids,$docid);
1013
1014 my $metaname = $doc_array_rec->{'metaname'};
1015 if(defined $metaname) {
1016 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
1017 my $metavalue = $doc_array_rec->{'metavalue'};
1018 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1019
1020 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1021 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1022 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1023
1024 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1025 $metaname = $metatable_rec->{'metaname'};
1026 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1027 my $metapos = undef;
1028 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1029
1030 foreach my $metavalue ( @$metavals ) { # metavals is an array
1031 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1032 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1033 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1034 # metavalue for this metaname will be lost
1035 $metamode = "accumulate";
1036 }
1037 }
1038 }
1039 }
1040
1041 if ($status != 0) {
1042 # Catch error if set infodb entry failed
1043 $global_status = $status;
1044 $global_mess .= "Failed to set metadata key: $docid\n";
1045 $global_mess .= "Exit status: $status\n";
1046 $global_mess .= "System Error Message: $!\n";
1047 $global_mess .= "-" x 20;
1048 }
1049 }
1050
1051 if ($global_status != 0) {
1052 $global_mess .= "PATH: $ENV{'PATH'}\n";
1053 $gsdl_cgi->generate_error($global_mess);
1054 }
1055 else {
1056 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1057 $gsdl_cgi->generate_ok_message($mess);
1058 }
1059}
1060
1061sub set_index_metadata_array
1062{
1063 my $self = shift @_;
1064
1065 my $username = $self->{'username'};
1066 my $collect = $self->{'collect'};
1067 my $gsdl_cgi = $self->{'gsdl_cgi'};
1068# my $gsdlhome = $self->{'gsdlhome'};
1069
1070 if ($baseaction::authentication_enabled) {
1071 # Ensure the user is allowed to edit this collection
1072 &authenticate_user($gsdl_cgi, $username, $collect);
1073 }
1074
1075 my $site = $self->{'site'};
1076 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1077
1078 $gsdl_cgi->checked_chdir($collect_dir);
1079
1080 # Obtain the collect dir
1081 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1082
1083 # Make sure the collection isn't locked by someone else
1084 $self->lock_collection($username, $collect);
1085
1086 $self->_set_index_metadata_array(@_);
1087
1088 # Release the lock once it is done
1089 $self->unlock_collection($username, $collect);
1090}
1091
1092# experimental, newly added in and untested
1093sub _set_live_metadata_array
1094{
1095 my $self = shift @_;
1096
1097 my $collect = $self->{'collect'};
1098 my $gsdl_cgi = $self->{'gsdl_cgi'};
1099
1100 my $site = $self->{'site'};
1101 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1102
1103
1104 # look up additional args
1105 my $infodbtype = $self->{'infodbtype'};
1106 # To people who know $collect_tail please add some comments
1107 # Obtain path to the database
1108 my $collect_tail = $collect;
1109 $collect_tail =~ s/^.*[\/\\]//;
1110 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1111 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1112
1113
1114 my $json_str = $self->{'json'};
1115 my $doc_array = decode_json $json_str;
1116
1117
1118 my $global_status = 0;
1119 my $global_mess = "";
1120
1121 my @all_docids = ();
1122
1123
1124 foreach my $doc_array_rec ( @$doc_array ) {
1125
1126 my $status = -1;
1127 my $docid = $doc_array_rec->{'docid'};
1128
1129 push(@all_docids,$docid);
1130
1131 my $metaname = $doc_array_rec->{'metaname'};
1132 if(defined $metaname) {
1133 my $dbkey = "$docid.$metaname";
1134 my $metavalue = $doc_array_rec->{'metavalue'};
1135
1136 # Set the new value
1137 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1138 $status = system($cmd);
1139
1140 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1141 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1142 foreach my $metatable_rec ( @$metatable ) {
1143 $metaname = $metatable_rec->{'metaname'};
1144 my $dbkey = "$docid.$metaname";
1145
1146 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1147 foreach my $metavalue ( @$metavals ) {
1148 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1149 $status = system($cmd);
1150 }
1151 }
1152
1153 }
1154
1155 if ($status != 0) {
1156 # Catch error if gdbmget failed
1157 $global_status = $status;
1158 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1159 $global_mess .= "Exit status: $status\n";
1160 $global_mess .= "System Error Message: $!\n";
1161 $global_mess .= "-" x 20;
1162 }
1163 }
1164
1165 if ($global_status != 0) {
1166 $global_mess .= "PATH: $ENV{'PATH'}\n";
1167 $gsdl_cgi->generate_error($global_mess);
1168 }
1169 else {
1170 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1171 $gsdl_cgi->generate_ok_message($mess);
1172 }
1173}
1174
1175sub set_live_metadata_array
1176{
1177 my $self = shift @_;
1178
1179 my $username = $self->{'username'};
1180 my $collect = $self->{'collect'};
1181 my $gsdl_cgi = $self->{'gsdl_cgi'};
1182
1183 if ($baseaction::authentication_enabled) {
1184 # Ensure the user is allowed to edit this collection
1185 &authenticate_user($gsdl_cgi, $username, $collect);
1186 }
1187
1188 my $site = $self->{'site'};
1189 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1190
1191 $gsdl_cgi->checked_chdir($collect_dir);
1192
1193 # Make sure the collection isn't locked by someone else
1194 $self->lock_collection($username, $collect);
1195
1196 $self->_set_live_metadata_array(@_);
1197
1198 # Release the lock once it is done
1199 $self->unlock_collection($username, $collect);
1200}
1201
1202
1203sub dxml_metadata
1204{
1205 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1206 my $metaname = $parser->{'parameters'}->{'metaname'};
1207 my $metamode = $parser->{'parameters'}->{'metamode'};
1208
1209 print STDERR "**** Processing closing </Metadata> tag\n";
1210
1211 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1212
1213 # Find the right metadata tag and checks if we are going to
1214 # override it
1215 #
1216 # Note: This over writes the first metadata block it
1217 # encountered. If there are multiple Sections in the doc.xml, it
1218 # might not behave as you would expect
1219
1220 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1221## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1222## print STDERR "**** metamode = $metamode\n";
1223
1224 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1225 {
1226 my $name_attr = $attrHash->{'name'};
1227 if (($name_attr eq $metaname) && ($metamode eq "override"))
1228 {
1229 if (!defined $parser->{'parameters'}->{'poscount'})
1230 {
1231 $parser->{'parameters'}->{'poscount'} = 0;
1232 }
1233 else
1234 {
1235 $parser->{'parameters'}->{'poscount'}++;
1236 }
1237
1238 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1239 {
1240 ##print STDERR "#### got match!!\n";
1241 # Get the value and override the current value
1242 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1243 $attrHash->{'_content'} = $metavalue;
1244
1245 # Don't want it to wipe out any other pieces of metadata
1246 $parser->{'parameters'}->{'metamode'} = "done";
1247 }
1248 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1249 {
1250 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1251 $attrHash->{'_content'} = $metavalue;
1252 $parser->{'parameters'}->{'metamode'} = "done";
1253 }
1254 }
1255 }
1256
1257 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1258 return [$tagname => $attrHash];
1259}
1260
1261# This method exists purely for catching invalid section numbers that the client
1262# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1263# we've seen all the Sections in the doc.xml, and none of their section nums matched
1264# if the metamode has not been set to 'done' by then.
1265sub dxml_archive
1266{
1267 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1268 my $metamode = $parser->{'parameters'}->{'metamode'};
1269
1270 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1271 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1272
1273# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1274
1275 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1276 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1277
1278 my $metaname = $parser->{'parameters'}->{'metaname'};
1279 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1280
1281 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1282 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1283 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1284 print STDERR "\tin section $opt_doc_secnum.\n";
1285 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1286 }
1287
1288 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1289 return [$tagname => $attrHash];
1290}
1291
1292sub dxml_description
1293{
1294 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1295 my $metamode = $parser->{'parameters'}->{'metamode'};
1296
1297 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1298 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1299
1300 print STDERR "**** Processing a closing </Description> tag \n";
1301# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1302
1303 # Accumulate the metadata
1304
1305 # We'll be accumulating metadata at this point if we haven't found and therefore
1306 # haven't processed the metadata yet.
1307 # For subsections, this means that if we're at a matching subsection, but haven't
1308 # found the correct metaname to override in that subsection, we accumulate it as new
1309 # meta in the subsection by adding it to the current description.
1310 # If there's no subsection info for the metadata, it will accumulate at the top level
1311 # section description if we hadn't found a matching metaname to override at this point.
1312
1313 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1314 # at the toplevel section. In the latter case, it means we want to process meta in the
1315 # toplevel section. So the eq check between the values below will work in all cases.
1316
1317 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1318 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1319 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1320
1321 if ($curr_secnum eq $opt_doc_secnum
1322 && ($metamode eq "accumulate" || $metamode eq "override")) {
1323 if ($metamode eq "override") {
1324 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1325 }
1326
1327 # If we get to here and metamode is override, this means there
1328 # was no existing value to overide => treat as an append operation
1329
1330 # Tack a new metadata tag on to the end of the <Metadata>+ block
1331 my $metaname = $parser->{'parameters'}->{'metaname'};
1332 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1333
1334 my $metadata_attr = {
1335 '_content' => $metavalue,
1336 'name' => $metaname,
1337 'mode' => "accumulate"
1338 };
1339
1340 my $append_metadata = [ "Metadata" => $metadata_attr ];
1341 my $description_content = $attrHash->{'_content'};
1342
1343 print "Appending metadata to doc.xml\n";
1344
1345 if (ref($description_content)) {
1346 # got some existing interesting nested content
1347 push(@$description_content, " ", $append_metadata ,"\n ");
1348 }
1349 else {
1350 #description_content is most likely a string such as "\n"
1351 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1352 }
1353
1354 $parser->{'parameters'}->{'metamode'} = "done";
1355 }
1356 else {
1357 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1358## print STDERR "**** NOT ACCUMULATE?!? \n";
1359 }
1360
1361 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1362 return [$tagname => $attrHash];
1363}
1364
1365
1366sub dxml_start_section
1367{
1368 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1369
1370 my $new_depth = scalar(@$contextArray);
1371
1372 print STDERR "**** START SECTION \n";
1373
1374 if ($new_depth == 1) {
1375 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1376 $parser->{'parameters'}->{'curr_section_num'} = "";
1377 }
1378
1379 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1380 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1381
1382 my $new_secnum;
1383
1384 if ($new_depth > $old_depth) {
1385 # child subsection
1386 $new_secnum = "$old_secnum.1";
1387 }
1388 elsif ($new_depth == $old_depth) {
1389 # sibling section => increase it's value by 1
1390 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1391 $tail_num++;
1392 $new_secnum = $old_secnum;
1393 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1394 }
1395 else {
1396 # back up to parent section => lopp off tail
1397 $new_secnum = $old_secnum;
1398 $new_secnum =~ s/\.\d+$//;
1399 }
1400
1401 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1402 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1403
1404 1;
1405}
1406
1407sub edit_xml_file
1408{
1409 my $self = shift @_;
1410 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1411
1412 # use XML::Rules to add it in (read in and out again)
1413 my $parser = XML::Rules->new(start_rules => $start_rules,
1414 rules => $rules,
1415 style => 'filter',
1416 output_encoding => 'utf8' );
1417
1418 my $xml_in = "";
1419 if (!open(MIN,"<$filename")) {
1420 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1421 }
1422 else {
1423 # Read all the text in
1424 my $line;
1425 while (defined ($line=<MIN>)) {
1426 $xml_in .= $line;
1427 }
1428 close(MIN);
1429
1430 my $MOUT;
1431 if (!open($MOUT,">$filename")) {
1432 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1433 }
1434 else {
1435 # Matched lines will get handled by the call backs
1436## my $xml_out = "";
1437
1438 binmode($MOUT,":utf8");
1439 $parser->filter($xml_in,$MOUT, $options);
1440
1441# binmode(MOUT,":utf8");
1442# print MOUT $xml_out;
1443 close($MOUT);
1444 }
1445 }
1446
1447 # copy across any custom error information that was stored during parsing
1448 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1449}
1450
1451sub edit_doc_xml
1452{
1453 my $self = shift @_;
1454 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1455
1456 my $info_mess = <<RAWEND;
1457****************************
1458 edit_doc_xml()
1459****************************
1460doc_xml_filename = $doc_xml_filename
1461metaname = $metaname
1462metavalue = $metavalue
1463metapos = $metapos
1464metamode = $metamode
1465opt_secnum = $opt_secnum
1466prevmetavalue = $prevmetavalue
1467****************************
1468RAWEND
1469
1470 $gsdl_cgi->generate_message($info_mess);
1471
1472 # To monitor which section/subsection number we are in
1473 my @start_rules =
1474 ( 'Section' => \&dxml_start_section );
1475
1476 # use XML::Rules to add it in (read in and out again)
1477 # Set the call back functions
1478 my @rules =
1479 ( _default => 'raw',
1480 'Metadata' => \&dxml_metadata,
1481 'Description' => \&dxml_description,
1482 'Archive' => \&dxml_archive); # just for catching errors at end
1483
1484 # Sets the parameters
1485 my $options = { 'metaname' => $metaname,
1486 'metapos' => $metapos,
1487 'metavalue' => $metavalue,
1488 'metamode' => $metamode,
1489 'prevmetavalue' => $prevmetavalue };
1490
1491 if (defined $opt_secnum) {
1492 $options->{'secnum'} = $opt_secnum;
1493 }
1494
1495 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1496}
1497
1498sub set_archives_metadata_entry
1499{
1500 my $self = shift @_;
1501 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1502
1503 my $info_mess = <<RAWEND;
1504****************************
1505 set_archives_metadata_entry()
1506****************************
1507archive_dir = $archive_dir
1508collect_dir = $collect_dir
1509collect = $collect
1510infodbtype = $infodbtype
1511docid = $docid
1512metaname = $metaname
1513metapos = $metapos
1514metavalue = $metavalue
1515metamode = $metamode
1516prevmetavalue = $prevmetavalue
1517****************************
1518RAWEND
1519
1520 $gsdl_cgi->generate_message($info_mess);
1521
1522 # Obtain the doc.xml path for the specified docID
1523 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1524
1525 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1526 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1527 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1528
1529 # The $doc_xml_file is relative to the archives, and now let's get the full path
1530 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1531 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1532
1533 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
1534 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1535 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1536 if(!defined $metapos && $metamode eq "override") {
1537 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, $metapos, undef, $docid_secnum, $metamode);
1538 }
1539
1540 # Edit the doc.xml file with the specified metadata name, value and position.
1541 # TODO: there is a potential problem here as this edit_doc_xml function
1542 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1543 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1544
1545 # dxml_metadata method ignores metapos if metamode anything other than override
1546 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1547 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1548
1549 # return 0; # return 0 for now to indicate no error
1550 return (defined $self->{'error_msg'}) ? 1 : 0;
1551}
1552
1553
1554sub set_archives_metadata
1555{
1556 my $self = shift @_;
1557
1558 my $username = $self->{'username'};
1559 my $collect = $self->{'collect'};
1560 my $gsdl_cgi = $self->{'gsdl_cgi'};
1561
1562 if ($baseaction::authentication_enabled) {
1563 # Ensure the user is allowed to edit this collection
1564 $self->authenticate_user($username, $collect);
1565 }
1566
1567 # Make sure the collection isn't locked by someone else
1568 $self->lock_collection($username, $collect);
1569
1570 $self->_set_archives_metadata(@_);
1571
1572 # Release the lock once it is done
1573 $self->unlock_collection($username, $collect);
1574}
1575
1576sub _set_archives_metadata_array
1577{
1578 my $self = shift @_;
1579
1580 my $collect = $self->{'collect'};
1581 my $gsdl_cgi = $self->{'gsdl_cgi'};
1582 my $site = $self->{'site'};
1583 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1584
1585 # look up additional args
1586
1587 my $infodbtype = $self->{'infodbtype'};
1588
1589 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1590
1591 my $json_str = $self->{'json'};
1592 my $doc_array = decode_json $json_str;
1593
1594
1595 my $global_status = 0;
1596 my $global_mess = "";
1597
1598 my @all_docids = ();
1599
1600 foreach my $doc_array_rec ( @$doc_array ) {
1601 my $status = -1;
1602 my $docid = $doc_array_rec->{'docid'};
1603
1604 push(@all_docids,$docid);
1605
1606 my $metaname = $doc_array_rec->{'metaname'};
1607 if(defined $metaname) {
1608
1609 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1610
1611 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1612 my $metavalue = $doc_array_rec->{'metavalue'};
1613 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1614
1615
1616 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1617 # make "accumulate" the default (less destructive, as it won't actually
1618 # delete any existing values)
1619 $metamode = "accumulate";
1620 }
1621
1622 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1623 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1624 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1625 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1626
1627 foreach my $metatable_rec ( @$metatable ) {
1628 $metaname = $metatable_rec->{'metaname'};
1629 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1630 my $metapos = undef;
1631 my $prevmetavalue = undef;
1632 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1633
1634 foreach my $metavalue ( @$metavals ) {
1635 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1636 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1637
1638 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1639 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1640 # metavalue for this metaname will be lost
1641 $metamode = "accumulate";
1642 }
1643 }
1644 }
1645 }
1646
1647 if ($status != 0) {
1648 # Catch error if set infodb entry failed
1649 $global_status = $status;
1650 $global_mess .= "Failed to set metadata key: $docid\n";
1651 $global_mess .= "Exit status: $status\n";
1652 $global_mess .= "System Error Message: $!\n";
1653 $global_mess .= "-" x 20 . "\n";
1654 }
1655 }
1656
1657 if ($global_status != 0) {
1658 $global_mess .= "PATH: $ENV{'PATH'}\n";
1659 $gsdl_cgi->generate_error($global_mess);
1660 }
1661 else {
1662 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1663 $gsdl_cgi->generate_ok_message($mess);
1664 }
1665}
1666
1667sub set_archives_metadata_array
1668{
1669 my $self = shift @_;
1670
1671 my $username = $self->{'username'};
1672 my $collect = $self->{'collect'};
1673 my $gsdl_cgi = $self->{'gsdl_cgi'};
1674# my $gsdlhome = $self->{'gsdlhome'};
1675
1676 if ($baseaction::authentication_enabled) {
1677 # Ensure the user is allowed to edit this collection
1678 &authenticate_user($gsdl_cgi, $username, $collect);
1679 }
1680
1681 my $site = $self->{'site'};
1682 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1683
1684 $gsdl_cgi->checked_chdir($collect_dir);
1685
1686 # Obtain the collect dir
1687 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1688
1689 # Make sure the collection isn't locked by someone else
1690 $self->lock_collection($username, $collect);
1691
1692 $self->_set_archives_metadata_array(@_);
1693
1694 # Release the lock once it is done
1695 $self->unlock_collection($username, $collect);
1696}
1697
1698sub _remove_archives_metadata
1699{
1700 my $self = shift @_;
1701
1702 my $collect = $self->{'collect'};
1703 my $gsdl_cgi = $self->{'gsdl_cgi'};
1704# my $gsdlhome = $self->{'gsdlhome'};
1705 my $infodbtype = $self->{'infodbtype'};
1706
1707 my $site = $self->{'site'};
1708
1709 # Obtain the collect and archive dir
1710 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1711
1712 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1713
1714 # look up additional args
1715 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1716
1717 my $metaname = $self->{'metaname'};
1718 my $metapos = $self->{'metapos'};
1719
1720 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1721
1722 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1723 # default to deleting the first metavalue for the given metaname
1724 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1725 # seem to get deleted in one single remove_archives_meta action invocation.
1726 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1727 # deleted, both get deleted.
1728 if(!defined $metapos && !defined $metavalue) {
1729 $metapos = 0;
1730 }
1731
1732 my $metamode = $self->{'metamode'} || undef;
1733
1734 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1735 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1736
1737 # This now stores the full pathname
1738 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1739
1740 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1741# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1742
1743 if ($status == 0)
1744 {
1745 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1746 $mess .= " $metaname";
1747 $mess .= "->[$metapos]" if (defined $metapos);
1748
1749 $gsdl_cgi->generate_ok_message($mess);
1750 }
1751 else
1752 {
1753 my $mess .= "Failed to remove archives metadata key: $docid\n";
1754 $mess .= "Exit status: $status\n";
1755 $mess .= "System Error Message: $!\n";
1756 $mess .= "-" x 20 . "\n";
1757
1758 $gsdl_cgi->generate_error($mess);
1759 }
1760
1761 #return $status; # in case calling functions have a use for this
1762}
1763
1764sub remove_archives_metadata
1765{
1766 my $self = shift @_;
1767
1768 my $username = $self->{'username'};
1769 my $collect = $self->{'collect'};
1770 my $gsdl_cgi = $self->{'gsdl_cgi'};
1771
1772 if ($baseaction::authentication_enabled)
1773 {
1774 # Ensure the user is allowed to edit this collection
1775 &authenticate_user($gsdl_cgi, $username, $collect);
1776 }
1777
1778 # Make sure the collection isn't locked by someone else
1779 $self->lock_collection($username, $collect);
1780
1781 $self->_remove_archives_metadata(@_);
1782
1783 # Release the lock once it is done
1784 $self->unlock_collection($username, $collect);
1785}
1786
1787sub remove_from_doc_xml
1788{
1789 my $self = shift @_;
1790 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1791
1792 my @start_rules = ('Section' => \&dxml_start_section);
1793
1794 # Set the call-back functions for the metadata tags
1795 my @rules =
1796 (
1797 _default => 'raw',
1798 'Metadata' => \&rfdxml_metadata
1799 );
1800
1801 my $parser = XML::Rules->new
1802 (
1803 start_rules => \@start_rules,
1804 rules => \@rules,
1805 style => 'filter',
1806 output_encoding => 'utf8',
1807# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1808 stripspaces => 2|0|0 # ineffectual
1809 );
1810
1811 my $status = 0;
1812 my $xml_in = "";
1813 if (!open(MIN,"<$doc_xml_filename"))
1814 {
1815 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1816 $status = 1;
1817 }
1818 else
1819 {
1820 # Read them in
1821 my $line;
1822 while (defined ($line=<MIN>)) {
1823 $xml_in .= $line;
1824 }
1825 close(MIN);
1826
1827 # Filter with the call-back functions
1828 my $xml_out = "";
1829
1830 my $MOUT;
1831 if (!open($MOUT,">$doc_xml_filename")) {
1832 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1833 $status = 1;
1834 }
1835 else {
1836 binmode($MOUT,":utf8");
1837 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1838 close($MOUT);
1839 }
1840 }
1841 return $status;
1842}
1843
1844sub rfdxml_metadata
1845{
1846 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1847
1848 # For comparisons, toplevel section is indicated by ""
1849 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1850 my $secid = $parser->{'parameters'}->{'secid'} || "";
1851
1852 if (!($secid eq $curr_sec_num))
1853 {
1854 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1855 return [$tagname => $attrHash];
1856 }
1857
1858 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1859 {
1860 if (!defined $parser->{'parameters'}->{'poscount'})
1861 {
1862 $parser->{'parameters'}->{'poscount'} = 0;
1863 }
1864 else
1865 {
1866 $parser->{'parameters'}->{'poscount'}++;
1867 }
1868
1869 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
1870 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
1871 return [];
1872 }
1873 }
1874
1875 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1876 {
1877 return [];
1878 }
1879
1880 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1881 {
1882 return [];
1883 }
1884
1885 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1886 return [$tagname => $attrHash];
1887}
1888
1889sub mxml_metadata
1890{
1891 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1892 my $metaname = $parser->{'parameters'}->{'metaname'};
1893 my $metamode = $parser->{'parameters'}->{'metamode'};
1894
1895 # Report error if we don't see FileName tag before this
1896 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1897
1898 # Don't do anything if we are not in the right FileSet
1899 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1900 if ($file_regexp =~ /\.\*/) {
1901 # Only interested in a file_regexp if it specifies precisely one
1902 # file.
1903 # So, skip anything with a .* in it as it is too general
1904## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
1905 return [$tagname => $attrHash];
1906 }
1907 my $src_file = $parser->{'parameters'}->{'src_file'};
1908 if (!($src_file =~ /$file_regexp/)) {
1909 return [$tagname => $attrHash];
1910 }
1911## print STDERR "*** mxl metamode = $metamode\n";
1912
1913 # Find the right metadata tag and checks if we are going to override it
1914 my $name_attr = $attrHash->{'name'};
1915 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1916
1917 # now metadata.xml functions need to keep track of metapos
1918 if (!defined $parser->{'parameters'}->{'poscount'})
1919 {
1920 $parser->{'parameters'}->{'poscount'} = 0;
1921 }
1922 else
1923 {
1924 $parser->{'parameters'}->{'poscount'}++;
1925 }
1926
1927 # If either the metapos or prevmetavalue is set,
1928 # get the value and override the current value
1929 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1930
1931 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1932 {
1933 $attrHash->{'_content'} = $metavalue;
1934
1935 ## print STDERR "**** overriding metadata.xml\n";
1936
1937 # Don't want it to wipe out any other pieces of metadata
1938 $parser->{'parameters'}->{'metamode'} = "done";
1939 }
1940 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1941 {
1942 $attrHash->{'_content'} = $metavalue;
1943 $parser->{'parameters'}->{'metamode'} = "done";
1944 }
1945 }
1946
1947 # mxml_description will process the metadata if metadata is accumulate,
1948 # or if we haven't found the metadata to override
1949
1950 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1951 return [$tagname => $attrHash];
1952}
1953
1954
1955sub mxml_description
1956{
1957 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1958 my $metamode = $parser->{'parameters'}->{'metamode'};
1959
1960 # Failed... Report error if we don't see FileName tag before this
1961 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1962
1963 # Don't do anything if we are not in the right FileSet
1964 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1965 if ($file_regexp =~ m/\.\*/) {
1966 # Only interested in a file_regexp if it specifies precisely one
1967 # file.
1968 # So, skip anything with a .* in it as it is too general
1969 return [$tagname => $attrHash];
1970 }
1971 my $src_file = $parser->{'parameters'}->{'src_file'};
1972
1973 if (!($src_file =~ m/$file_regexp/)) {
1974 return [$tagname => $attrHash];
1975 }
1976
1977 # Accumulate the metadata block to the end of the description block
1978 # Note: This adds metadata block to all description blocks, so if there are
1979 # multiple FileSets, it will add to all of them
1980 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1981
1982 # if metamode was "override" but get to here then it failed to
1983 # find an item to override, in which case it should append its
1984 # value to the end, just like the "accumulate" mode
1985
1986 if ($metamode eq "override") {
1987 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1988 }
1989
1990 # tack a new metadata tag on to the end of the <Metadata>+ block
1991 my $metaname = $parser->{'parameters'}->{'metaname'};
1992 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1993
1994 my $metadata_attr = { '_content' => $metavalue,
1995 'name' => $metaname,
1996 'mode' => "accumulate" };
1997
1998 my $append_metadata = [ "Metadata" => $metadata_attr ];
1999 my $description_content = $attrHash->{'_content'};
2000
2001## print STDERR "*** appending to metadata.xml\n";
2002
2003 # append the new metadata element to the end of the current
2004 # content contained inside this tag
2005 if (ref($description_content) eq "") {
2006 # => string or numeric literal
2007 # this is caused by a <Description> block has no <Metadata> child elements
2008 # => set up an empty array in '_content'
2009 $attrHash->{'_content'} = [ "\n" ];
2010 $description_content = $attrHash->{'_content'};
2011 }
2012
2013 push(@$description_content," ", $append_metadata ,"\n ");
2014 $parser->{'parameters'}->{'metamode'} = "done";
2015 }
2016
2017 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2018 return [$tagname => $attrHash];
2019}
2020
2021
2022sub mxml_filename
2023{
2024 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2025
2026 # Store the filename of the Current Fileset
2027 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2028 # FileName tag must come before Description tag
2029 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2030
2031 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2032 return [$tagname => $attrHash];
2033}
2034
2035
2036sub mxml_fileset
2037{
2038 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2039
2040 # Initilise the current_file
2041 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2042 # FileName tag must come before Description tag
2043 $parser->{'parameters'}->{'current_file'} = "";
2044
2045 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2046 return [$tagname => $attrHash];
2047}
2048
2049sub mxml_directorymetadata
2050{
2051 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2052
2053 # if we haven't processed the metadata when we reach the end of metadata.xml
2054 # it's because there's no particular FileSet element whose FileName matched
2055 # In which case, add a new FileSet for this FileName
2056 my $metamode = $parser->{'parameters'}->{'metamode'};
2057 if($metamode ne "done") {
2058
2059 if ($metamode eq "override") {
2060 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2061 }
2062
2063 # If we get to here and metamode is override, this means there
2064 # was no existing value to overide => treat as an append operation
2065
2066 # Create a new FileSet element and append to DirectoryMetadata
2067 # <FileSet>
2068 # <FileName>src_file</FileName>
2069 # <Description>
2070 # <Metadata mode="" name="">metavalue</Metadata>
2071 # </Description>
2072 # </FileSet>
2073 my $src_file = $parser->{'parameters'}->{'src_file'};
2074 my $metaname = $parser->{'parameters'}->{'metaname'};
2075 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2076 my $metadata_attr = {
2077 '_content' => $metavalue,
2078 'name' => $metaname,
2079 'mode' => "accumulate"
2080 };
2081 my $append_metadata = [ "Metadata" => $metadata_attr ];
2082 my $description_attr->{'_content'} = [ "\n\t\t ", $append_metadata, "\n\t\t"];
2083 my $description_element = [ "Description" => $description_attr ];
2084
2085 #_content is not an attribute, it's special and holds the children of this element
2086 # including the textnode value embedded in this element if any.
2087 my $filename_attr = {'_content' => $src_file};
2088 my $filename_element = [ "FileName" => $filename_attr ];
2089
2090 my $fileset_attr = {};
2091 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2092 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2093
2094
2095 # get children of dirmeta, and push the new FileSet element onto it
2096 print "Appending metadata to metadata.xml\n";
2097 my $dirmeta_content = $attrHash->{'_content'};
2098 if (ref($dirmeta_content)) {
2099 # got some existing interesting nested content
2100 #push(@$dirmeta_content, " ", $fileset ,"\n ");
2101 push(@$dirmeta_content, "\t", $fileset ,"\n");
2102 }
2103 else {
2104 #description_content is most likely a string such as "\n"
2105 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
2106 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2107 }
2108
2109 $parser->{'parameters'}->{'metamode'} = "done";
2110 }
2111 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2112 return [$tagname => $attrHash];
2113}
2114
2115
2116sub edit_metadata_xml
2117{
2118 my $self = shift @_;
2119 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2120
2121 # Set the call-back functions for the metadata tags
2122 my @rules =
2123 ( _default => 'raw',
2124 'FileName' => \&mxml_filename,
2125 'Metadata' => \&mxml_metadata,
2126 'Description' => \&mxml_description,
2127 'FileSet' => \&mxml_fileset,
2128 'DirectoryMetadata' => \&mxml_directorymetadata);
2129
2130 # use XML::Rules to add it in (read in and out again)
2131 my $parser = XML::Rules->new(rules => \@rules,
2132 style => 'filter',
2133 output_encoding => 'utf8',
2134 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2135
2136 if (!-e $metadata_xml_filename) {
2137
2138 if (open(MOUT,">$metadata_xml_filename")) {
2139
2140 my $src_file_re = &util::filename_to_regex($src_file);
2141 # shouldn't the following also be in the above utility routine??
2142 # $src_file_re =~ s/\./\\./g;
2143
2144 print MOUT "<?xml version=\"1.0\"?>\n";
2145 print MOUT "<DirectoryMetadata>\n";
2146 print MOUT " <FileSet>\n";
2147 print MOUT " <FileName>$src_file_re</FileName>\n";
2148 print MOUT " <Description>\n";
2149 print MOUT " </Description>\n";
2150 print MOUT " </FileSet>\n";
2151 print MOUT "</DirectoryMetadata>\n";
2152
2153 close(MOUT);
2154 }
2155 else {
2156 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2157 }
2158 }
2159
2160
2161 my $xml_in = "";
2162 if (!open(MIN,"<$metadata_xml_filename")) {
2163 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2164 }
2165 else {
2166 # Read them in
2167 my $line;
2168 while (defined ($line=<MIN>)) {
2169 $xml_in .= $line;
2170 }
2171 close(MIN);
2172
2173 # Filter with the call-back functions
2174 my $xml_out = "";
2175
2176 my $MOUT;
2177 if (!open($MOUT,">$metadata_xml_filename")) {
2178 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2179 }
2180 else {
2181 binmode($MOUT,":utf8");
2182
2183 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2184 # At the moment, I will just hack it!
2185 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2186 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2187 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2188 #print MOUT $xml_out;
2189
2190 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2191 metapos => $metapos,
2192 metavalue => $metavalue,
2193 metamode => $metamode,
2194 src_file => $src_file,
2195 prevmetavalue => $prevmetavalue,
2196 current_file => undef} );
2197 close($MOUT);
2198 }
2199 }
2200}
2201
2202
2203sub set_import_metadata
2204{
2205 my $self = shift @_;
2206
2207 my $username = $self->{'username'};
2208 my $collect = $self->{'collect'};
2209 my $gsdl_cgi = $self->{'gsdl_cgi'};
2210
2211 if ($baseaction::authentication_enabled) {
2212 # Ensure the user is allowed to edit this collection
2213 $self->authenticate_user($username, $collect);
2214 }
2215
2216 # Make sure the collection isn't locked by someone else
2217 $self->lock_collection($username, $collect);
2218
2219 $self->_set_import_metadata(@_);
2220
2221 # Release the lock once it is done
2222 $self->unlock_collection($username, $collect);
2223
2224}
2225
2226sub set_import_metadata_array
2227{
2228 my $self = shift @_;
2229
2230 my $username = $self->{'username'};
2231 my $collect = $self->{'collect'};
2232 my $gsdl_cgi = $self->{'gsdl_cgi'};
2233# my $gsdlhome = $self->{'gsdlhome'};
2234
2235 if ($baseaction::authentication_enabled) {
2236 # Ensure the user is allowed to edit this collection
2237 &authenticate_user($gsdl_cgi, $username, $collect);
2238 }
2239
2240 my $site = $self->{'site'};
2241 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2242
2243 $gsdl_cgi->checked_chdir($collect_dir);
2244
2245 # Make sure the collection isn't locked by someone else
2246 $self->lock_collection($username, $collect);
2247
2248 $self->_set_import_metadata_array(@_);
2249
2250 # Release the lock once it is done
2251 $self->unlock_collection($username, $collect);
2252
2253}
2254
2255
2256sub _set_import_metadata_array
2257{
2258 my $self = shift @_;
2259
2260 my $collect = $self->{'collect'};
2261 my $gsdl_cgi = $self->{'gsdl_cgi'};
2262
2263 my $site = $self->{'site'};
2264 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2265
2266 # look up additional args
2267
2268 my $infodbtype = $self->{'infodbtype'};
2269
2270 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2271 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2272
2273 my $json_str = $self->{'json'};
2274 my $doc_array = decode_json $json_str;
2275
2276 my $global_status = 0;
2277 my $global_mess = "";
2278
2279 my @all_docids = ();
2280
2281 foreach my $doc_array_rec ( @$doc_array )
2282 {
2283 my $status = -1;
2284 my $docid = $doc_array_rec->{'docid'};
2285
2286 my ($docid_root,$docid_secnum);
2287 if(defined $docid) {
2288 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2289 # as yet no support for setting subsection metadata in metadata.xml
2290 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2291 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2292 next; # skip this docid in for loop
2293 }
2294 }
2295
2296 push(@all_docids,$docid); # docid_root rather
2297
2298 my $metaname = $doc_array_rec->{'metaname'};
2299 if (defined $metaname) {
2300 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2301 my $metavalue = $doc_array_rec->{'metavalue'};
2302 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2303
2304 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2305 # make "accumulate" the default (less destructive, as won't actually
2306 # delete any existing values)
2307 $metamode = "accumulate";
2308 }
2309
2310 # adding metapos and prevmetavalue support to import_metadata subroutines
2311 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2312 my $prevmetavalue = $self->{'prevmetavalue'};
2313
2314 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2315
2316 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2317 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2318
2319 foreach my $metatable_rec ( @$metatable ) {
2320 $metaname = $metatable_rec->{'metaname'};
2321 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2322 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2323 # make "accumulate" the default (less destructive, as won't actually
2324 # delete any existing values)
2325 $metamode = "accumulate";
2326 }
2327
2328 # No support for metapos and prevmetavalue in the JSON metatable substructure
2329 my $metapos = undef;
2330 my $prevmetavalue = undef;
2331 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2332
2333 foreach my $metavalue ( @$metavals ) {
2334 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2335
2336 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2337 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2338 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2339 # metavalue for this metaname will be lost
2340 $metamode = "accumulate";
2341 }
2342 }
2343 }
2344 }
2345 }
2346
2347 # always a success message
2348 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2349 $gsdl_cgi->generate_ok_message($mess);
2350}
2351
2352# always returns true (1)
2353sub set_import_metadata_entry
2354{
2355 my $self = shift @_;
2356 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2357
2358 my $info_mess = <<RAWEND;
2359****************************
2360 set_import_metadata_entry()
2361****************************
2362collect = $collect
2363collect_dir = $collect_dir
2364infodbtype = $infodbtype
2365arcinfo_doc_filename = $arcinfo_doc_filename
2366docid = $docid
2367metaname = $metaname
2368metapos = $metapos
2369metavalue = $metavalue
2370metamode = $metamode
2371prevmetavalue = $prevmetavalue
2372****************************
2373RAWEND
2374
2375 $gsdl_cgi->generate_message($info_mess);
2376
2377 # import works with metadata.xml which can have inherited metadata
2378 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2379 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2380 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2381 # what they're doing if they provide a metapos.
2382 if(defined $metapos) {
2383 print STDERR "@@@@ WARNING: metapos defined.\n";
2384 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2385 }
2386
2387 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2388 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2389 my $metadata_xml_file;
2390 my $import_filename = undef;
2391
2392 if (defined $docid) {
2393 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2394 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2395
2396 # This now stores the full pathname
2397 $import_filename = $doc_rec->{'src-file'}->[0];
2398 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2399 # as the array version of the method doesn't support the -f parameter yet
2400 my $import_file = $self->{'f'};
2401 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2402 }
2403
2404 # figure out correct metadata.xml file [?]
2405 # Assuming the metadata.xml file is next to the source file
2406 # Note: This will not work if it is using the inherited metadata from the parent folder
2407 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2408 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2409
2410 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
2411 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2412 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2413 if(!defined $metapos && $metamode eq "override") {
2414## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2415 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2416
2417 }
2418
2419 # Edit the metadata.xml
2420 # Modified by Jeffrey from DL Consulting
2421 # Handle the case where there is one metadata.xml file for multiple FileSets
2422 # The XML filter needs to know whether it is in the right FileSet
2423 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2424 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2425 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2426 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2427 #return 0;
2428 return $metadata_xml_filename;
2429}
2430
2431sub _remove_import_metadata
2432{
2433 my $self = shift @_;
2434
2435 my $collect = $self->{'collect'};
2436 my $gsdl_cgi = $self->{'gsdl_cgi'};
2437# my $gsdlhome = $self->{'gsdlhome'};
2438 my $infodbtype = $self->{'infodbtype'};
2439
2440 # Obtain the collect dir
2441 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2442 my $site = $self->{'site'};
2443 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2444
2445 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2446 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2447
2448 # look up additional args
2449 my $docid = $self->{'d'};
2450 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2451 {
2452 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2453 }
2454
2455 my $metaname = $self->{'metaname'};
2456 my $metapos = $self->{'metapos'};
2457 my $metavalue = $self->{'metavalue'};
2458 if(defined $metavalue) {
2459 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2460 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2461 $metapos = 0;
2462 }
2463 my $metamode = $self->{'metamode'} || undef;
2464
2465 # import works with metadata.xml which can have inherited metadata
2466 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2467 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2468 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2469 # what they're doing if they provide a metapos.
2470 if(defined $metapos) {
2471 print STDERR "@@@@ WARNING: metapos defined.\n";
2472 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2473 }
2474
2475 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2476 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2477 my $metadata_xml_file;
2478 my $import_filename = undef;
2479 if (defined $docid)
2480 {
2481 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2482 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2483
2484 # This now stores the full pathname
2485 $import_filename = $doc_rec->{'src-file'}->[0];
2486 }
2487
2488 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2489 {
2490 $gsdl_cgi->generate_error("There is no metadata\n");
2491 }
2492
2493 # figure out correct metadata.xml file [?]
2494 # Assuming the metadata.xml file is next to the source file
2495 # Note: This will not work if it is using the inherited metadata from the parent folder
2496 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2497 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2498
2499 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2500
2501 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2502 $mess .= " $metaname";
2503 $mess .= " = $metavalue\n";
2504
2505 $gsdl_cgi->generate_ok_message($mess);
2506
2507 #return $status; # in case calling functions have a use for this
2508}
2509
2510sub remove_import_metadata
2511{
2512 my $self = shift @_;
2513
2514 my $username = $self->{'username'};
2515 my $collect = $self->{'collect'};
2516 my $gsdl_cgi = $self->{'gsdl_cgi'};
2517
2518 if ($baseaction::authentication_enabled) {
2519 # Ensure the user is allowed to edit this collection
2520 &authenticate_user($gsdl_cgi, $username, $collect);
2521 }
2522
2523 # Make sure the collection isn't locked by someone else
2524 $self->lock_collection($username, $collect);
2525
2526 $self->_remove_import_metadata(@_);
2527
2528 # Release the lock once it is done
2529 $self->unlock_collection($username, $collect);
2530
2531}
2532
2533sub remove_from_metadata_xml
2534{
2535 my $self = shift @_;
2536 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2537 # metamode generally has no meaning for removing meta, but is used by set_meta
2538 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2539
2540 # Set the call-back functions for the metadata tags
2541 my @rules =
2542 (
2543 _default => 'raw',
2544 'Metadata' => \&rfmxml_metadata,
2545 'FileName' => \&mxml_filename
2546 );
2547
2548 my $parser = XML::Rules->new
2549 (
2550 rules => \@rules,
2551 style => 'filter',
2552 output_encoding => 'utf8',
2553 #normalisespaces => 1,
2554 stripspaces => 2|0|0 # ineffectual
2555 );
2556
2557 my $xml_in = "";
2558 if (!open(MIN,"<$metadata_xml_filename"))
2559 {
2560 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2561 }
2562 else
2563 {
2564 # Read them in
2565 my $line;
2566 while (defined ($line=<MIN>)) {
2567 $xml_in .= $line;
2568 }
2569 close(MIN);
2570
2571 # Filter with the call-back functions
2572 my $xml_out = "";
2573
2574 my $MOUT;
2575 if (!open($MOUT,">$metadata_xml_filename")) {
2576 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2577 }
2578 else {
2579 binmode($MOUT,":utf8");
2580 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2581 close($MOUT);
2582 }
2583 }
2584}
2585
2586sub rfmxml_metadata
2587{
2588 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2589
2590 # metadata.xml does not handle subsections
2591
2592 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2593 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2594 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2595 {
2596 if (!defined $parser->{'parameters'}->{'poscount'})
2597 {
2598 $parser->{'parameters'}->{'poscount'} = 0;
2599 }
2600 else
2601 {
2602 $parser->{'parameters'}->{'poscount'}++;
2603 }
2604
2605 # if overriding but no metapos, then clear all the meta for this metaname
2606 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
2607 return [];
2608 }
2609
2610 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2611 {
2612 return [];
2613 }
2614
2615 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2616 {
2617 return [];
2618 }
2619 }
2620
2621 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2622 return [$tagname => $attrHash];
2623}
2624
2625sub _remove_live_metadata
2626{
2627 my $self = shift @_;
2628
2629 my $collect = $self->{'collect'};
2630 my $gsdl_cgi = $self->{'gsdl_cgi'};
2631# my $gsdlhome = $self->{'gsdlhome'};
2632 my $infodbtype = $self->{'infodbtype'};
2633
2634 # Obtain the collect dir
2635 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2636 my $site = $self->{'site'};
2637 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2638
2639
2640 # look up additional args
2641 my $docid = $self->{'d'};
2642 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2643 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2644 }
2645
2646 # Generate the dbkey
2647 my $metaname = $self->{'metaname'};
2648 my $dbkey = "$docid.$metaname";
2649
2650 # To people who know $collect_tail please add some comments
2651 # Obtain the live gdbm_db path
2652 my $collect_tail = $collect;
2653 $collect_tail =~ s/^.*[\/\\]//;
2654 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2655 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2656
2657 # Remove the key
2658 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2659 my $status = system($cmd);
2660 if ($status != 0) {
2661 # Catch error if gdbmdel failed
2662 my $mess = "Failed to set metadata key: $dbkey\n";
2663
2664 $mess .= "PATH: $ENV{'PATH'}\n";
2665 $mess .= "cmd = $cmd\n";
2666 $mess .= "Exit status: $status\n";
2667 $mess .= "System Error Message: $!\n";
2668
2669 $gsdl_cgi->generate_error($mess);
2670 }
2671 else {
2672 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2673 }
2674
2675}
2676
2677sub remove_live_metadata
2678{
2679 my $self = shift @_;
2680
2681 my $username = $self->{'username'};
2682 my $collect = $self->{'collect'};
2683 my $gsdl_cgi = $self->{'gsdl_cgi'};
2684 my $gsdlhome = $self->{'gsdlhome'};
2685
2686 if ($baseaction::authentication_enabled) {
2687 # Ensure the user is allowed to edit this collection
2688 &authenticate_user($gsdl_cgi, $username, $collect);
2689 }
2690
2691 # Make sure the collection isn't locked by someone else
2692 $self->lock_collection($username, $collect);
2693
2694 $self->_remove_live_metadata(@_);
2695
2696 $self->unlock_collection($username, $collect);
2697}
2698
2699sub remove_metadata
2700{
2701 my $self = shift @_;
2702
2703 my $where = $self->{'where'};
2704 if(!$where) {
2705 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2706 return;
2707 }
2708
2709 my $username = $self->{'username'};
2710 my $collect = $self->{'collect'};
2711 my $gsdl_cgi = $self->{'gsdl_cgi'};
2712
2713 if ($baseaction::authentication_enabled) {
2714 # Ensure the user is allowed to edit this collection
2715 &authenticate_user($gsdl_cgi, $username, $collect);
2716 }
2717
2718 # Make sure the collection isn't locked by someone else
2719 $self->lock_collection($username, $collect);
2720
2721 # check which directories need to be processed, specified in $where as
2722 # any combination of import|archives|index|live
2723 if($where =~ m/import/) {
2724 $self->_remove_import_metadata(@_);
2725 }
2726 if($where =~ m/archives/) {
2727 $self->_remove_archives_metadata(@_);
2728 }
2729 if($where =~ m/index/) {
2730 $self->_remove_index_metadata(@_);
2731 }
2732
2733 # Release the lock once it is done
2734 $self->unlock_collection($username, $collect);
2735}
2736
2737# the internal version, without authentication
2738sub _remove_index_metadata
2739{
2740 my $self = shift @_;
2741
2742 my $collect = $self->{'collect'};
2743 my $gsdl_cgi = $self->{'gsdl_cgi'};
2744# my $gsdlhome = $self->{'gsdlhome'};
2745 my $infodbtype = $self->{'infodbtype'};
2746
2747 # Obtain the collect dir
2748 my $site = $self->{'site'};
2749 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2750 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2751
2752 # look up additional args
2753 my $docid = $self->{'d'};
2754 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2755 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2756 }
2757 my $metaname = $self->{'metaname'};
2758 my $metapos = $self->{'metapos'};
2759 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2760
2761 # To people who know $collect_tail please add some comments
2762 # Obtain the path to the database
2763 my $collect_tail = $collect;
2764 $collect_tail =~ s/^.*[\/\\]//;
2765 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2766 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2767
2768 # Read the docid entry
2769 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2770
2771 # Basically loop through and unescape_html the values
2772 foreach my $k (keys %$doc_rec) {
2773 my @escaped_v = ();
2774 foreach my $v (@{$doc_rec->{$k}}) {
2775 if ($k eq "contains") {
2776 # protect quotes in ".2;".3 etc
2777 $v =~ s/\"/\\\"/g;
2778 push(@escaped_v, $v);
2779 }
2780 else {
2781 my $ev = &ghtml::unescape_html($v);
2782 $ev =~ s/\"/\\\"/g;
2783 push(@escaped_v, $ev);
2784 }
2785 }
2786 $doc_rec->{$k} = \@escaped_v;
2787 }
2788
2789 # Check to make sure the key does exist
2790 if (!defined ($doc_rec->{$metaname})) {
2791 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2792 }
2793
2794 # Obtain the specified metadata pos
2795 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2796 if(!defined $metapos && !defined $metavalue) {
2797 $metapos = 0;
2798 }
2799
2800
2801 # consider check key is defined before deleting?
2802 # Loop through the metadata array and ignore the specified position
2803 my $filtered_metadata = [];
2804 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2805 for (my $i=0; $i<$num_metadata_vals; $i++) {
2806 my $metaval = shift(@{$doc_rec->{$metaname}});
2807
2808 if (!defined $metavalue && $i != $metapos) {
2809 push(@$filtered_metadata,$metaval);
2810 }
2811
2812 if(defined $metavalue && !($metavalue eq $metaval))
2813 {
2814 push(@$filtered_metadata,$metaval);
2815 }
2816 }
2817 $doc_rec->{$metaname} = $filtered_metadata;
2818
2819 # Turn the record back to string
2820 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2821
2822 # Store it back to the database
2823 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2824 my $status = system($cmd);
2825 if ($status != 0) {
2826 my $mess = "Failed to set metadata key: $docid\n";
2827
2828 $mess .= "PATH: $ENV{'PATH'}\n";
2829 $mess .= "cmd = $cmd\n";
2830 $mess .= "Exit status: $status\n";
2831 $mess .= "System Error Message: $!\n";
2832
2833 $gsdl_cgi->generate_error($mess);
2834 }
2835 else {
2836 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2837 $mess .= " $metaname";
2838 $mess .= "->[$metapos]" if (defined $metapos);
2839
2840 $gsdl_cgi->generate_ok_message($mess);
2841 }
2842
2843 #return $status; # in case calling functions have a use for this
2844}
2845
2846sub remove_index_metadata
2847{
2848 my $self = shift @_;
2849
2850 my $username = $self->{'username'};
2851 my $collect = $self->{'collect'};
2852 my $gsdl_cgi = $self->{'gsdl_cgi'};
2853# my $gsdlhome = $self->{'gsdlhome'};
2854
2855 if ($baseaction::authentication_enabled) {
2856 # Ensure the user is allowed to edit this collection
2857 &authenticate_user($gsdl_cgi, $username, $collect);
2858 }
2859
2860 # Obtain the collect dir
2861 my $site = $self->{'site'};
2862 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2863 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2864
2865 # Make sure the collection isn't locked by someone else
2866 $self->lock_collection($username, $collect);
2867
2868 $self->_remove_index_metadata(@_);
2869
2870 # Release the lock once it is done
2871 $self->unlock_collection($username, $collect);
2872}
2873
2874
2875# Was trying to reused the codes, but the functions need to be broken
2876# down more before they can be reused, otherwise there will be too
2877# much overhead and duplicate process...
2878sub insert_metadata
2879{
2880 my $self = shift @_;
2881
2882 my $username = $self->{'username'};
2883 my $collect = $self->{'collect'};
2884 my $gsdl_cgi = $self->{'gsdl_cgi'};
2885 my $gsdlhome = $self->{'gsdlhome'};
2886 my $infodbtype = $self->{'infodbtype'};
2887
2888 # If the import metadata and gdbm database have been updated, we
2889 # need to insert some notification to warn user that the the text
2890 # they see at the moment is not indexed and require a rebuild.
2891 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2892
2893 if ($baseaction::authentication_enabled) {
2894 # Ensure the user is allowed to edit this collection
2895 $self->authenticate_user($username, $collect);
2896 }
2897
2898 # Obtain the collect and archive dir
2899 my $site = $self->{'site'};
2900 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2901 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2902 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2903
2904 # Make sure the collection isn't locked by someone else
2905 $self->lock_collection($username, $collect);
2906
2907 # Check additional args
2908 my $docid = $self->{'d'};
2909 if (!defined($docid)) {
2910 $gsdl_cgi->generate_error("No document id is specified: d=...");
2911 }
2912 my $metaname = $self->{'metaname'};
2913 if (!defined($metaname)) {
2914 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2915 }
2916 my $metavalue = $self->{'metavalue'};
2917 if (!defined($metavalue) || $metavalue eq "") {
2918 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2919 }
2920 # make "accumulate" the default (less destructive, as won't actually
2921 # delete any existing values)
2922 my $metamode = "accumulate";
2923
2924 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
2925 my $metapos = undef;
2926 my $prevmetavalue = undef;
2927
2928 #=======================================================================#
2929 # set_import_metadata [START]
2930 #=======================================================================#
2931 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2932 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2933 my $metadata_xml_file;
2934 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2935 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2936
2937 # This now stores the full pathname
2938 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2939
2940 # figure out correct metadata.xml file [?]
2941 # Assuming the metadata.xml file is next to the source file
2942 # Note: This will not work if it is using the inherited metadata from the parent folder
2943 my ($import_tailname, $import_dirname)
2944 = File::Basename::fileparse($import_filename);
2945 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2946
2947 # Shane's escape characters
2948 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2949 $metavalue =~ s/\,/&#44;/g;
2950 $metavalue =~ s/\:/&#58;/g;
2951 $metavalue =~ s/\|/&#124;/g;
2952 $metavalue =~ s/\(/&#40;/g;
2953 $metavalue =~ s/\)/&#41;/g;
2954 $metavalue =~ s/\[/&#91;/g;
2955 $metavalue =~ s/\\/&#92;/g;
2956 $metavalue =~ s/\]/&#93;/g;
2957 $metavalue =~ s/\{/&#123;/g;
2958 $metavalue =~ s/\}/&#125;/g;
2959 $metavalue =~ s/\"/&#34;/g;
2960 $metavalue =~ s/\`/&#96;/g;
2961 $metavalue =~ s/\n/_newline_/g;
2962
2963 # Edit the metadata.xml
2964 # Modified by Jeffrey from DL Consulting
2965 # Handle the case where there is one metadata.xml file for multiple FileSets
2966 # The XML filter needs to know whether it is in the right FileSet
2967 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2968 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2969 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2970 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2971 #=======================================================================#
2972 # set_import_metadata [END]
2973 #=======================================================================#
2974
2975
2976 #=======================================================================#
2977 # set_metadata (accumulate version) [START]
2978 #=======================================================================#
2979 # To people who know $collect_tail please add some comments
2980 # Obtain path to the database
2981 my $collect_tail = $collect;
2982 $collect_tail =~ s/^.*[\/\\]//;
2983 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2984 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2985
2986 # Read the docid entry
2987 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2988
2989 foreach my $k (keys %$doc_rec) {
2990 my @escaped_v = ();
2991 foreach my $v (@{$doc_rec->{$k}}) {
2992 if ($k eq "contains") {
2993 # protect quotes in ".2;".3 etc
2994 $v =~ s/\"/\\\"/g;
2995 push(@escaped_v, $v);
2996 }
2997 else {
2998 my $ev = &ghtml::unescape_html($v);
2999 $ev =~ s/\"/\\\"/g;
3000 push(@escaped_v, $ev);
3001 }
3002 }
3003 $doc_rec->{$k} = \@escaped_v;
3004 }
3005
3006 # Protect the quotes
3007 $metavalue =~ s/\"/\\\"/g;
3008
3009 # Adds the pending macro
3010 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3011
3012 # If the metadata doesn't exist, create a new one
3013 if (!defined($doc_rec->{$metaname})){
3014 $doc_rec->{$metaname} = [ $macro_metavalue ];
3015 }
3016 # Else, let's acculumate the values
3017 else {
3018 push(@{$doc_rec->{$metaname}},$macro_metavalue);
3019 }
3020
3021 # Generate the record string
3022 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
3023
3024 # Store it into GDBM
3025 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
3026 my $status = system($cmd);
3027 if ($status != 0) {
3028 # Catch error if gdbmget failed
3029 my $mess = "Failed to set metadata key: $docid\n";
3030
3031 $mess .= "PATH: $ENV{'PATH'}\n";
3032 $mess .= "cmd = $cmd\n";
3033 $mess .= "Exit status: $status\n";
3034 $mess .= "System Error Message: $!\n";
3035
3036 $gsdl_cgi->generate_error($mess);
3037 }
3038 else {
3039 my $mess = "insert-metadata successful: Key[$docid]\n";
3040 $mess .= " [In metadata.xml] $metaname";
3041 $mess .= " = $metavalue\n";
3042 $mess .= " [In database] $metaname";
3043 $mess .= " = $macro_metavalue\n";
3044 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3045 $gsdl_cgi->generate_ok_message($mess);
3046 }
3047 #=======================================================================#
3048 # set_metadata (accumulate version) [END]
3049 #=======================================================================#
3050
3051 # Release the lock once it is done
3052 $self->unlock_collection($username, $collect);
3053}
3054
30551;
Note: See TracBrowser for help on using the repository browser.