source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27173

Last change on this file since 27173 was 27173, checked in by ak19, 11 years ago

First commit to do with setting and removing import metadata. 1. Now set_import_meta adds a new FileSet into metadata.xml when there's no FileSet for a given filename. 2. As metadata.xml doesn't support subsections, this prints a warning message. 3. empty strings used for toplevel subsections instead of undef, not only to avoid warnings but also because this makes the check for whether a current subsection is equal to the subsection we're looking for easier when the topsection is also a string (as is the case elsewhere). 4. Fixed references to a generate_error_message subroutine of gsdlcgi which ought to be generate_error instead. Second commit aims to add support for metapos and prevmetavalue to the import_metadata subroutines set and remove and maybe get too, to mirror what the equivalent archives_metadata subroutines do.

File size: 93.9 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-index-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
67
68 #SET METHODS
69 "set-live-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ ] },
72
73 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
74 'compulsory-args' => [ "metaname", "metavalue" ],
75 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
76
77 "set-index-metadata" => {
78 'compulsory-args' => [ "d", "metaname", "metavalue" ],
79 'optional-args' => [ "metapos", "metamode" ] },
80
81 "set-archives-metadata" => {
82 'compulsory-args' => [ "d", "metaname", "metavalue" ],
83 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
84
85 "set-import-metadata" => {
86 'compulsory-args' => [ "metaname", "metavalue" ],
87 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
88
89 #SET METHODS (ARRAY)
90 "set-metadata-array" => {
91 'compulsory-args' => [ "where", "json" ],
92 'optional-args' => [ ] },
93
94 "set-archives-metadata-array" => {
95 'compulsory-args' => [ "json" ],
96 'optional-args' => [ ] },
97
98 "set-import-metadata-array" => {
99 'compulsory-args' => [ "json" ],
100 'optional-args' => [ ] },
101
102 "set-index-metadata-array" => {
103 'compulsory-args' => [ "json" ],
104 'optional-args' => [ ] },
105
106 "set-live-metadata-array" => {
107 'compulsory-args' => [ "json" ],
108 'optional-args' => [ ] },
109
110 #REMOVE METHODS
111 "remove-import-metadata" => {
112 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
113 'optional-args' => [ ] },
114
115 "remove-archives-metadata" => {
116 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
117 'optional-args' => [ "metapos", "metavalue" ] },
118
119 "remove-live-metadata" => {
120 'compulsory-args' => [ "d", "metaname" ],
121 'optional-args' => [ ] },
122
123 "remove-index-metadata" => {
124 'compulsory-args' => [ "d", "metaname" ],
125 'optional-args' => [ "metapos", "metavalue" ] },
126
127 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
128 'compulsory-args' => [ "d", "metaname" ],
129 'optional-args' => [ "where", "metapos", "metavalue" ] },
130
131 #INSERT METHODS
132 "insert-metadata" => {
133 'compulsory-args' => [ "d", "metaname", "metavalue" ],
134 'optional-args' => [ ] }
135};
136
137
138sub new
139{
140 my $class = shift (@_);
141 my ($gsdl_cgi,$iis6_mode) = @_;
142
143 # Treat metavalue specially. To transmit this through a GET request
144 # the Javascript side has url-encoded it, so here we need to decode
145 # it before proceeding
146
147 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
148 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
149 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
150
151 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
152 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
153
154 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
155
156 return bless $self, $class;
157}
158
159
160sub get_live_metadata
161{
162 my $self = shift @_;
163
164 my $username = $self->{'username'};
165 my $collect = $self->{'collect'};
166 my $gsdl_cgi = $self->{'gsdl_cgi'};
167 my $gsdlhome = $self->{'gsdlhome'};
168 my $infodbtype = $self->{'infodbtype'};
169
170 # live metadata gets/saves value scoped (prefixed) by the current usename
171 # so (for now) let's not bother to enforce authentication
172
173 # Obtain the collect dir
174 my $site = $self->{'site'};
175 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
176 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
177
178 # Make sure the collection isn't locked by someone else
179 $self->lock_collection($username, $collect);
180
181 # look up additional args
182 my $docid = $self->{'d'};
183 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
184 $gsdl_cgi->generate_error("No docid (d=...) specified.");
185 }
186
187 # Generate the dbkey
188 my $metaname = $self->{'metaname'};
189 my $dbkey = "$docid.$metaname";
190
191 # To people who know $collect_tail please add some comments
192 # Obtain path to the database
193 my $collect_tail = $collect;
194 $collect_tail =~ s/^.*[\/|\\]//;
195 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
196 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
197
198 # Obtain the content of the key
199 my $cmd = "gdbmget $infodb_file_path $dbkey";
200 if (open(GIN,"$cmd |") == 0) {
201 # Catch error if gdbmget failed
202 my $mess = "Failed to get metadata key: $metaname\n";
203 $mess .= "$!\n";
204
205 $gsdl_cgi->generate_error($mess);
206 }
207 else {
208 binmode(GIN,":utf8");
209 # Read everything in and concatenate them into $metavalue
210 my $metavalue = "";
211 my $line;
212 while (defined ($line=<GIN>)) {
213 $metavalue .= $line;
214 }
215 close(GIN);
216 chomp($metavalue); # Get rid off the tailing newlines
217 $gsdl_cgi->generate_ok_message("$metavalue");
218 }
219
220 # Release the lock once it is done
221 $self->unlock_collection($username, $collect);
222}
223
224# just calls the index version
225sub get_metadata
226{
227 my $self = shift @_;
228 $self->get_index_metadata(@_);
229}
230
231sub get_index_metadata
232{
233 my $self = shift @_;
234
235 my $username = $self->{'username'};
236 my $collect = $self->{'collect'};
237 my $gsdl_cgi = $self->{'gsdl_cgi'};
238 my $gsdlhome = $self->{'gsdlhome'};
239
240 # Authenticate user if it is enabled
241 if ($baseaction::authentication_enabled) {
242 # Ensure the user is allowed to edit this collection
243 &authenticate_user($gsdl_cgi, $username, $collect);
244 }
245
246 # Obtain the collect dir
247 my $site = $self->{'site'};
248 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
249 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
250
251 # Make sure the collection isn't locked by someone else
252 $self->lock_collection($username, $collect);
253
254 # look up additional args
255 my $docid = $self->{'d'};
256 my $metaname = $self->{'metaname'};
257 my $metapos = $self->{'metapos'};
258 my $infodbtype = $self->{'infodbtype'};
259
260 # To people who know $collect_tail please add some comments
261 # Obtain path to the database
262 my $collect_tail = $collect;
263 $collect_tail =~ s/^.*[\/\\]//;
264 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
265 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
266
267 # Read the docid entry
268 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
269
270 # Basically loop through and unescape_html the values
271 foreach my $k (keys %$doc_rec) {
272 my @escaped_v = ();
273 foreach my $v (@{$doc_rec->{$k}}) {
274 my $ev = &ghtml::unescape_html($v);
275 push(@escaped_v, $ev);
276 }
277 $doc_rec->{$k} = \@escaped_v;
278 }
279
280 # Obtain the specified metadata value
281 $metapos = 0 if (!defined $metapos);
282 my $metavalue = $doc_rec->{$metaname}->[$metapos];
283 $gsdl_cgi->generate_ok_message("$metavalue");
284
285 # Release the lock once it is done
286 $self->unlock_collection($username, $collect);
287}
288
289
290sub get_import_metadata
291{
292 my $self = shift @_;
293
294 my $username = $self->{'username'};
295 my $collect = $self->{'collect'};
296 my $gsdl_cgi = $self->{'gsdl_cgi'};
297 my $gsdlhome = $self->{'gsdlhome'};
298
299 # Authenticate user if it is enabled
300 if ($baseaction::authentication_enabled) {
301 # Ensure the user is allowed to edit this collection
302 &authenticate_user($gsdl_cgi, $username, $collect);
303 }
304
305 # Obtain the collect dir
306 my $site = $self->{'site'};
307 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
308 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
309
310 # Make sure the collection isn't locked by someone else
311 $self->lock_collection($username, $collect);
312
313 # look up additional args
314 my $docid = $self->{'d'};
315 my $metaname = $self->{'metaname'};
316 my $infodbtype = $self->{'infodbtype'};
317 if (!defined $docid)
318 {
319 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
320 }
321
322 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
323 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
324 my $metadata_xml_file;
325 my $import_filename = undef;
326
327
328 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
329 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
330 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
331
332 # This now stores the full pathname
333 $import_filename = $doc_rec->{'src-file'}->[0];
334
335 # figure out correct metadata.xml file [?]
336 # Assuming the metadata.xml file is next to the source file
337 # Note: This will not work if it is using the inherited metadata from the parent folder
338 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
339 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
340
341 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
342
343 # Release the lock once it is done
344 $self->unlock_collection($username, $collect);
345}
346
347sub get_metadata_from_metadata_xml
348{
349 my $self = shift @_;
350 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
351
352 my @rules =
353 (
354 _default => 'raw',
355 'Metadata' => \&gfmxml_metadata,
356 'FileName' => \&mxml_filename
357 );
358
359 my $parser = XML::Rules->new
360 (
361 rules => \@rules,
362 output_encoding => 'utf8'
363 );
364
365 my $xml_in = "";
366 if (!open(MIN,"<$metadata_xml_filename"))
367 {
368 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
369 }
370 else
371 {
372 # Read them in
373 my $line;
374 while (defined ($line=<MIN>)) {
375 $xml_in .= $line;
376 }
377 close(MIN);
378
379 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
380
381 if(defined $parser->{'pad'}->{'metavalue'})
382 {
383 return $parser->{'pad'}->{'metavalue'};
384 }
385 else
386 {
387 return "";
388 }
389 }
390}
391
392sub gfmxml_metadata
393{
394 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
395
396 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
397 {
398 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
399 }
400}
401
402sub get_archives_metadata
403{
404 my $self = shift @_;
405
406 my $username = $self->{'username'};
407 my $collect = $self->{'collect'};
408 my $gsdl_cgi = $self->{'gsdl_cgi'};
409 my $gsdlhome = $self->{'gsdlhome'};
410 my $infodbtype = $self->{'infodbtype'};
411
412 # Authenticate user if it is enabled
413 if ($baseaction::authentication_enabled) {
414 # Ensure the user is allowed to edit this collection
415 &authenticate_user($gsdl_cgi, $username, $collect);
416 }
417
418 # Obtain the collect dir
419 my $site = $self->{'site'};
420 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
421
422 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
423
424 # Make sure the collection isn't locked by someone else
425 $self->lock_collection($username, $collect);
426
427 # look up additional args
428 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
429 $docid_secnum = "" if (!defined $docid_secnum);
430
431 my $metaname = $self->{'metaname'};
432 my $metapos = $self->{'metapos'};
433 $metapos = 0 if (!defined $metapos);
434
435 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
436 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
437
438 # This now stores the full pathname
439 my $doc_filename = $doc_rec->{'doc-file'}->[0];
440
441 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
442
443 # Release the lock once it is done
444 $self->unlock_collection($username, $collect);
445}
446
447sub get_metadata_from_archive_xml
448{
449 my $self = shift @_;
450 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
451
452 my @start_rules = ('Section' => \&dxml_start_section);
453
454 my @rules =
455 (
456 _default => 'raw',
457 'Metadata' => \&gfdxml_metadata
458 );
459
460 my $parser = XML::Rules->new
461 (
462 start_rules => \@start_rules,
463 rules => \@rules,
464 output_encoding => 'utf8'
465 );
466
467 my $xml_in = "";
468 if (!open(MIN,"<$doc_xml_filename"))
469 {
470 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
471 }
472 else
473 {
474 # Read them in
475 my $line;
476 while (defined ($line=<MIN>)) {
477 $xml_in .= $line;
478 }
479 close(MIN);
480
481 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
482
483 if(defined $parser->{'pad'}->{'metavalue'})
484 {
485 return $parser->{'pad'}->{'metavalue'};
486 }
487 else
488 {
489 return "";
490 }
491 }
492}
493
494sub gfdxml_metadata
495{
496 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
497
498 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
499 {
500 return;
501 }
502
503 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
504 {
505 if (!defined $parser->{'parameters'}->{'poscount'})
506 {
507 $parser->{'parameters'}->{'poscount'} = 0;
508 }
509 else
510 {
511 $parser->{'parameters'}->{'poscount'}++;
512 }
513 }
514
515 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
516 {
517 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
518 }
519}
520
521sub _set_live_metadata
522{
523 my $self = shift @_;
524
525 my $collect = $self->{'collect'};
526 my $gsdl_cgi = $self->{'gsdl_cgi'};
527 #my $gsdlhome = $self->{'gsdlhome'};
528 my $infodbtype = $self->{'infodbtype'};
529
530 # Obtain the collect dir
531 my $site = $self->{'site'};
532 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
533 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
534
535
536 # look up additional args
537 my $docid = $self->{'d'};
538 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
539 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
540 }
541 my $metavalue = $self->{'metavalue'};
542 my $metamode = $self->{'metamode'} || "accumulate";
543 my $append_or_not = ($metamode eq "accumulate") ? " \"append\"" : "";
544
545 # Generate the dbkey
546 my $metaname = $self->{'metaname'};
547 my $dbkey = "$docid.$metaname";
548
549 # To people who know $collect_tail please add some comments
550 # Obtain path to the database
551 my $collect_tail = $collect;
552 $collect_tail =~ s/^.*[\/\\]//;
553 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
554 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
555
556 # Set the new value
557 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"$append_or_not";
558 my $status = system($cmd);
559 if ($status != 0) {
560 # Catch error if gdbmget failed
561 my $mess = "Failed to set metadata key: $dbkey\n";
562
563 $mess .= "PATH: $ENV{'PATH'}\n";
564 $mess .= "cmd = $cmd\n";
565 $mess .= "Exit status: $status\n";
566 $mess .= "System Error Message: $!\n";
567
568 $gsdl_cgi->generate_error($mess);
569 }
570 else {
571 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
572 }
573
574 #return $status; # in case calling functions have any further use for this
575}
576
577sub set_live_metadata
578{
579 my $self = shift @_;
580
581 my $username = $self->{'username'};
582 my $collect = $self->{'collect'};
583 my $gsdl_cgi = $self->{'gsdl_cgi'};
584
585 if ($baseaction::authentication_enabled) {
586 # Ensure the user is allowed to edit this collection
587 &authenticate_user($gsdl_cgi, $username, $collect);
588 }
589
590 # Make sure the collection isn't locked by someone else
591 $self->lock_collection($username, $collect);
592
593 $self->_set_live_metadata(@_);
594
595 # Release the lock once it is done
596 $self->unlock_collection($username, $collect);
597}
598
599sub set_index_metadata_entry
600{
601 my $self = shift @_;
602 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
603
604 # To people who know $collect_tail please add some comments
605 # Obtain path to the database
606 my $collect_tail = $collect;
607 $collect_tail =~ s/^.*[\/\\]//;
608 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
609 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
610
611# print STDERR "**** infodb file path = $infodb_file_path\n";
612# print STDERR "***** infodb type = $infodbtype\n";
613
614 # Read the docid entry
615 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
616
617 # Set the metadata value
618 if (defined $metapos) {
619 # if metamode=accumulate AND metapos, warn user and then use metapos
620 if (defined $metamode && $metamode eq "accumulate") {
621 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
622 print STDERR "**** Proceeding by using metapos\n";
623 }
624 $doc_rec->{$metaname}->[$metapos] = $metavalue;
625 }
626 elsif (defined $metamode && $metamode eq "accumulate") {
627 if(defined $doc_rec->{$metaname}) {
628 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
629 } else {
630 $doc_rec->{$metaname} = [ $metavalue ];
631 }
632
633 }
634 else { # default for index was to override
635 $doc_rec->{$metaname} = [ $metavalue ];
636 }
637
638 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
639
640 return $status;
641
642}
643
644sub _set_import_metadata
645{
646 my $self = shift @_;
647
648 my $collect = $self->{'collect'};
649 my $gsdl_cgi = $self->{'gsdl_cgi'};
650 my $infodbtype = $self->{'infodbtype'};
651# my $gsdlhome = $self->{'gsdlhome'};
652
653 # Obtain the collect and archive dir
654 my $site = $self->{'site'};
655 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
656 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
657 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
658
659 # look up additional args
660 # want either d= or f=
661 my $docid = $self->{'d'};
662 my ($docid_root,$docid_secnum);
663 if(defined $docid) {
664 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
665 # as yet no support for setting subsection metadata in metadata.xml
666 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
667 $gsdl_cgi->generate_ok_message("*** Unable to set import metadata for subsections at this stage.\n");
668 return;
669 }
670 }
671
672 my $import_file = $self->{'f'};
673 if ((!defined $docid) && (!defined $import_file)) {
674 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
675 }
676
677 # Get the parameters and set default mode to "accumulate"
678 my $metaname = $self->{'metaname'};
679 my $metavalue = $self->{'metavalue'};
680## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
681 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
682
683 my $metamode = $self->{'metamode'};
684 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
685 # make "accumulate" the default (less destructive, as won't actually
686 # delete any existing values)
687 $metamode = "accumulate";
688 }
689
690 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
691 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
692 my $metadata_xml_file;
693 my $import_filename = undef;
694 if (defined $docid) {
695 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
696 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
697
698 # This now stores the full pathname
699 $import_filename = $doc_rec->{'src-file'}->[0];
700 }
701 else {
702 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
703 }
704
705 # figure out correct metadata.xml file [?]
706 # Assuming the metadata.xml file is next to the source file
707 # Note: This will not work if it is using the inherited metadata from the parent folder
708 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
709 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
710
711 # Edit the metadata.xml
712 # Modified by Jeffrey from DL Consulting
713 # Handle the case where there is one metadata.xml file for multiple FileSets
714 # The XML filter needs to know whether it is in the right FileSet
715 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
716 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
717 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
718 $metaname, $metavalue, $metamode, $import_tailname);
719
720
721 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
722 $mess .= " $metaname";
723 $mess .= " = $metavalue";
724 $mess .= " ($metamode)\n";
725
726 $gsdl_cgi->generate_ok_message($mess);
727
728 #return $status; # in case calling functions have any further use for this
729}
730
731# the version of set_index_meta that doesn't do authentication
732sub _set_archives_metadata
733{
734 my $self = shift @_;
735
736 my $collect = $self->{'collect'};
737 my $gsdl_cgi = $self->{'gsdl_cgi'};
738 my $infodbtype = $self->{'infodbtype'};
739
740 # Obtain the collect and archive dir
741 my $site = $self->{'site'};
742 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
743 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
744
745 # look up additional args
746 my $docid = $self->{'d'};
747 my $metaname = $self->{'metaname'};
748 my $metavalue = $self->{'metavalue'};
749 my $prevmetavalue = $self->{'prevmetavalue'};
750
751 my $metapos = $self->{'metapos'}; # Don't append "|| undef", since if metapos=0 it will then be set to undef
752
753 # Don't set metapos=0 if it's undefined, as doing so will interfere when overriding all values for a metaname
754 # (all values for a metaname should get overridden if metamode=override combined with no user-defined metapos)
755
756 my $metamode = $self->{'metamode'};
757 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
758 # make "accumulate" the default (less destructive, as won't actually
759 # delete any existing values)
760 $metamode = "accumulate";
761 }
762
763 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
764 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
765
766 if ($status == 0) {
767 my $mess = "set-archives-metadata successful: Key[$docid]\n";
768 $mess .= " $metaname";
769 $mess .= "->[$metapos]" if (defined $metapos);
770 $mess .= " = $metavalue";
771 $mess .= " ($metamode)\n";
772
773 $gsdl_cgi->generate_ok_message($mess);
774 }
775 else {
776 my $mess .= "Failed to set archives metadata key: $docid\n";
777 $mess .= "Exit status: $status\n";
778 if(defined $self->{'error_msg'}) {
779 $mess .= "Error Message: $self->{'error_msg'}\n";
780 } else {
781 $mess .= "System Error Message: $!\n";
782 }
783 $mess .= "-" x 20 . "\n";
784
785 $gsdl_cgi->generate_error($mess);
786 }
787
788 #return $status; # in case calling functions have any further use for this
789}
790
791
792# the version of set_index_meta that doesn't do authentication
793sub _set_index_metadata
794{
795 my $self = shift @_;
796
797 my $collect = $self->{'collect'};
798 my $gsdl_cgi = $self->{'gsdl_cgi'};
799
800 my $site = $self->{'site'};
801 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
802
803 # look up additional args
804 my $docid = $self->{'d'};
805 my $metaname = $self->{'metaname'};
806 my $metapos = $self->{'metapos'};
807 my $metavalue = $self->{'metavalue'};
808 my $infodbtype = $self->{'infodbtype'};
809 my $metamode = $self->{'metamode'};
810
811 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
812
813 if ($status != 0) {
814 # Catch error if set infodb entry failed
815 my $mess = "Failed to set metadata key: $docid\n";
816
817 $mess .= "PATH: $ENV{'PATH'}\n";
818 $mess .= "Exit status: $status\n";
819 $mess .= "System Error Message: $!\n";
820
821 $gsdl_cgi->generate_error($mess);
822 }
823 else {
824 my $mess = "set-index-metadata successful: Key[$docid]\n";
825 $mess .= " $metaname";
826 $mess .= "->[$metapos]" if (defined $metapos);
827 $mess .= " = $metavalue\n";
828
829 $gsdl_cgi->generate_ok_message($mess);
830 }
831
832 #return $status; # in case calling functions have any further use for this
833}
834
835sub set_index_metadata
836{
837 my $self = shift @_;
838
839 my $username = $self->{'username'};
840 my $collect = $self->{'collect'};
841 my $gsdl_cgi = $self->{'gsdl_cgi'};
842 #my $gsdlhome = $self->{'gsdlhome'};
843
844 if ($baseaction::authentication_enabled) {
845 # Ensure the user is allowed to edit this collection
846 &authenticate_user($gsdl_cgi, $username, $collect);
847 }
848
849 my $site = $self->{'site'};
850 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
851
852 $gsdl_cgi->checked_chdir($collect_dir);
853
854 # Obtain the collect dir
855 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
856
857 # Make sure the collection isn't locked by someone else
858 $self->lock_collection($username, $collect);
859
860 $self->_set_index_metadata(@_);
861
862 # Release the lock once it is done
863 $self->unlock_collection($username, $collect);
864}
865
866# call this to set the metadata for a combination of dirs archives, import or index, or live
867# if none specified, defaults to index which was the original behaviour of set_metadata.
868sub set_metadata
869{
870 my $self = shift @_;
871
872 # Testing that not defining a variable, setting it to "" or to " " all return false
873 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
874
875 my $where = $self->{'where'};
876 if(!$where) {
877 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
878 return;
879 }
880
881 # authenticate and lock collection once, even if processing multiple dirs
882 my $username = $self->{'username'};
883 my $collect = $self->{'collect'};
884 my $gsdl_cgi = $self->{'gsdl_cgi'};
885
886 if ($baseaction::authentication_enabled) {
887 # Ensure the user is allowed to edit this collection
888 #&authenticate_user($gsdl_cgi, $username, $collect);
889 $self->authenticate_user($username, $collect);
890 }
891
892 if($where =~ m/index/) {
893 my $site = $self->{'site'};
894 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
895 $gsdl_cgi->checked_chdir($collect_dir);
896 }
897
898 # Make sure the collection isn't locked by someone else
899 $self->lock_collection($username, $collect);
900
901
902 # now at last can set the metadata. $where can specify multiple
903 # $where is of the form: import|archives|index, or a subset thereof
904
905 #my @whichdirs = split('\|', $where);
906
907 # just check whether $where contains import/archives/index/live in turn, and
908 # for each case, process it accordingly
909 if($where =~ m/import/) {
910 $self->_set_import_metadata(@_);
911 }
912
913 if($where =~ m/archives/) {
914
915 # look up docID arg which is optional to set_metadata because it's optional
916 # to set_import, but which is compulsory to set_archives_metadata
917 my $docid = $self->{'d'};
918 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
919 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
920 }
921 # we have a docid, so can set archives meta
922 $self->_set_archives_metadata(@_);
923 }
924
925 if($where =~ m/index/) {
926
927 # look up docID arg which is optional to set_metadata because it's optional
928 # to set_import, but which is compulsory to set_archives_metadata
929 my $docid = $self->{'d'};
930 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
931 $gsdl_cgi->generate_error("No docid (d=...) specified.");
932 }
933 # we have a docid, so can set index meta
934 $self->_set_index_metadata(@_);
935 }
936
937 if($where =~ m/live/) {
938 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
939 }
940
941 # Release the lock once it is done
942 $self->unlock_collection($username, $collect);
943}
944
945sub set_metadata_array
946{
947 my $self = shift @_;
948
949 my $where = $self->{'where'};
950 if(!$where) {
951 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
952 return;
953 }
954
955 my $username = $self->{'username'};
956 my $collect = $self->{'collect'};
957 my $gsdl_cgi = $self->{'gsdl_cgi'};
958
959 if ($baseaction::authentication_enabled) {
960 # Ensure the user is allowed to edit this collection
961 &authenticate_user($gsdl_cgi, $username, $collect);
962 }
963
964 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
965 # But including the stmt during this code reorganisation to preserve as-is what used to happen
966 my $site = $self->{'site'};
967 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
968 $gsdl_cgi->checked_chdir($collect_dir);
969
970 # Make sure the collection isn't locked by someone else
971 $self->lock_collection($username, $collect);
972
973 if($where =~ m/import/) {
974 $self->_set_import_metadata_array(@_);
975 }
976 if($where =~ m/archives/) {
977 $self->_set_archives_metadata_array(@_);
978 }
979 if($where =~ m/index/) {
980 $self->_set_index_metadata_array(@_);
981 }
982 if($where =~ m/live/) {
983 $self->_set_live_metadata_array(@_);
984 }
985
986 # Release the lock once it is done
987 $self->unlock_collection($username, $collect);
988}
989
990sub _set_index_metadata_array
991{
992 my $self = shift @_;
993
994 my $collect = $self->{'collect'};
995 my $gsdl_cgi = $self->{'gsdl_cgi'};
996 my $site = $self->{'site'};
997 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
998
999
1000 # look up additional args
1001
1002 my $infodbtype = $self->{'infodbtype'};
1003
1004 my $json_str = $self->{'json'};
1005 my $doc_array = decode_json $json_str;
1006
1007
1008 my $global_status = 0;
1009 my $global_mess = "";
1010
1011 my @all_docids = ();
1012
1013 foreach my $doc_array_rec ( @$doc_array ) {
1014
1015 my $status = -1;
1016 my $docid = $doc_array_rec->{'docid'};
1017
1018 push(@all_docids,$docid);
1019
1020 my $metaname = $doc_array_rec->{'metaname'};
1021 if(defined $metaname) {
1022 my $metapos = $doc_array_rec->{'metapos'};
1023 my $metavalue = $doc_array_rec->{'metavalue'};
1024 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1025
1026 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1027 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1028 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1029
1030 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1031 $metaname = $metatable_rec->{'metaname'};
1032 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1033 my $metapos = undef;
1034 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1035
1036 foreach my $metavalue ( @$metavals ) { # metavals is an array
1037 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1038 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1039 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1040 # metavalue for this metaname will be lost
1041 $metamode = "accumulate";
1042 }
1043 }
1044 }
1045 }
1046
1047 if ($status != 0) {
1048 # Catch error if set infodb entry failed
1049 $global_status = $status;
1050 $global_mess .= "Failed to set metadata key: $docid\n";
1051 $global_mess .= "Exit status: $status\n";
1052 $global_mess .= "System Error Message: $!\n";
1053 $global_mess .= "-" x 20;
1054 }
1055 }
1056
1057 if ($global_status != 0) {
1058 $global_mess .= "PATH: $ENV{'PATH'}\n";
1059 $gsdl_cgi->generate_error($global_mess);
1060 }
1061 else {
1062 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1063 $gsdl_cgi->generate_ok_message($mess);
1064 }
1065}
1066
1067sub set_index_metadata_array
1068{
1069 my $self = shift @_;
1070
1071 my $username = $self->{'username'};
1072 my $collect = $self->{'collect'};
1073 my $gsdl_cgi = $self->{'gsdl_cgi'};
1074# my $gsdlhome = $self->{'gsdlhome'};
1075
1076 if ($baseaction::authentication_enabled) {
1077 # Ensure the user is allowed to edit this collection
1078 &authenticate_user($gsdl_cgi, $username, $collect);
1079 }
1080
1081 my $site = $self->{'site'};
1082 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1083
1084 $gsdl_cgi->checked_chdir($collect_dir);
1085
1086 # Obtain the collect dir
1087 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1088
1089 # Make sure the collection isn't locked by someone else
1090 $self->lock_collection($username, $collect);
1091
1092 $self->_set_index_metadata_array(@_);
1093
1094 # Release the lock once it is done
1095 $self->unlock_collection($username, $collect);
1096}
1097
1098# experimental, newly added in and untested
1099sub _set_live_metadata_array
1100{
1101 my $self = shift @_;
1102
1103 my $collect = $self->{'collect'};
1104 my $gsdl_cgi = $self->{'gsdl_cgi'};
1105
1106 my $site = $self->{'site'};
1107 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1108
1109
1110 # look up additional args
1111 my $infodbtype = $self->{'infodbtype'};
1112 # To people who know $collect_tail please add some comments
1113 # Obtain path to the database
1114 my $collect_tail = $collect;
1115 $collect_tail =~ s/^.*[\/\\]//;
1116 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1117 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1118
1119
1120 my $json_str = $self->{'json'};
1121 my $doc_array = decode_json $json_str;
1122
1123
1124 my $global_status = 0;
1125 my $global_mess = "";
1126
1127 my @all_docids = ();
1128
1129
1130 foreach my $doc_array_rec ( @$doc_array ) {
1131
1132 my $status = -1;
1133 my $docid = $doc_array_rec->{'docid'};
1134
1135 push(@all_docids,$docid);
1136
1137 my $metaname = $doc_array_rec->{'metaname'};
1138 if(defined $metaname) {
1139 my $dbkey = "$docid.$metaname";
1140 my $metavalue = $doc_array_rec->{'metavalue'};
1141 # no metapos for live_meta, so can default metamode to accumulate
1142 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'} || "accumulate";
1143
1144 my $append_or_not = ($metamode eq "accumulate") ? " \"append\"" : "";
1145
1146 # Set the new value
1147 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"$append_or_not";
1148 $status = system($cmd);
1149
1150 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1151 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1152 foreach my $metatable_rec ( @$metatable ) {
1153 $metaname = $metatable_rec->{'metaname'};
1154 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'} || "accumulate";
1155 my $append_or_not = ($metamode eq "accumulate") ? " \"append\"" : "";
1156 my $dbkey = "$docid.$metaname";
1157
1158 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1159 foreach my $metavalue ( @$metavals ) {
1160 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"$append_or_not";
1161 $status = system($cmd);
1162 $append_or_not = " \"append\""; # even if metamode=override, need to accumulate all subsequent
1163 # metavals for this metaname, else the just-assigned metavalue for this metaname will be lost
1164 }
1165 }
1166
1167 }
1168
1169 if ($status != 0) {
1170 # Catch error if gdbmget failed
1171 $global_status = $status;
1172 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1173 $global_mess .= "Exit status: $status\n";
1174 $global_mess .= "System Error Message: $!\n";
1175 $global_mess .= "-" x 20;
1176 }
1177 }
1178
1179 if ($global_status != 0) {
1180 $global_mess .= "PATH: $ENV{'PATH'}\n";
1181 $gsdl_cgi->generate_error($global_mess);
1182 }
1183 else {
1184 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1185 $gsdl_cgi->generate_ok_message($mess);
1186 }
1187}
1188
1189sub set_live_metadata_array
1190{
1191 my $self = shift @_;
1192
1193 my $username = $self->{'username'};
1194 my $collect = $self->{'collect'};
1195 my $gsdl_cgi = $self->{'gsdl_cgi'};
1196
1197 if ($baseaction::authentication_enabled) {
1198 # Ensure the user is allowed to edit this collection
1199 &authenticate_user($gsdl_cgi, $username, $collect);
1200 }
1201
1202 my $site = $self->{'site'};
1203 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1204
1205 $gsdl_cgi->checked_chdir($collect_dir);
1206
1207 # Make sure the collection isn't locked by someone else
1208 $self->lock_collection($username, $collect);
1209
1210 $self->_set_live_metadata_array(@_);
1211
1212 # Release the lock once it is done
1213 $self->unlock_collection($username, $collect);
1214}
1215
1216
1217sub dxml_metadata
1218{
1219 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1220 my $metaname = $parser->{'parameters'}->{'metaname'};
1221 my $metamode = $parser->{'parameters'}->{'metamode'};
1222
1223 print STDERR "**** Processing closing </Metadata> tag\n";
1224
1225 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1226
1227 # Find the right metadata tag and checks if we are going to
1228 # override it
1229 #
1230 # Note: This over writes the first metadata block it
1231 # encountered. If there are multiple Sections in the doc.xml, it
1232 # might not behave as you would expect
1233
1234 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1235## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1236## print STDERR "**** metamode = $metamode\n";
1237
1238 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1239 {
1240 my $name_attr = $attrHash->{'name'};
1241 if (($name_attr eq $metaname) && ($metamode eq "override"))
1242 {
1243 if (!defined $parser->{'parameters'}->{'poscount'})
1244 {
1245 $parser->{'parameters'}->{'poscount'} = 0;
1246 }
1247 else
1248 {
1249 $parser->{'parameters'}->{'poscount'}++;
1250 }
1251
1252 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1253 {
1254 ##print STDERR "#### got match!!\n";
1255 # Get the value and override the current value
1256 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1257 $attrHash->{'_content'} = $metavalue;
1258
1259 # Don't want it to wipe out any other pieces of metadata
1260 $parser->{'parameters'}->{'metamode'} = "done";
1261 }
1262 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1263 {
1264 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1265 $attrHash->{'_content'} = $metavalue;
1266 $parser->{'parameters'}->{'metamode'} = "done";
1267 }
1268 }
1269 }
1270
1271 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1272 return [$tagname => $attrHash];
1273}
1274
1275# This method exists purely for catching invalid section numbers that the client
1276# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1277# we've seen all the Sections in the doc.xml, and none of their section nums matched
1278# if the metamode has not been set to 'done' by then.
1279sub dxml_archive
1280{
1281 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1282 my $metamode = $parser->{'parameters'}->{'metamode'};
1283
1284 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1285 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1286
1287# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1288
1289 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1290 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1291
1292 my $metaname = $parser->{'parameters'}->{'metaname'};
1293 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1294
1295 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1296 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1297 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1298 print STDERR "\tin section $opt_doc_secnum.\n";
1299 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1300 }
1301
1302 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1303 return [$tagname => $attrHash];
1304}
1305
1306sub dxml_description
1307{
1308 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1309 my $metamode = $parser->{'parameters'}->{'metamode'};
1310
1311 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1312 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1313
1314 print STDERR "**** Processing a closing </Description> tag \n";
1315# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1316
1317 # Accumulate the metadata
1318
1319 # We'll be accumulating metadata at this point if we haven't found and therefore
1320 # haven't processed the metadata yet.
1321 # For subsections, this means that if we're at a matching subsection, but haven't
1322 # found the correct metaname to override in that subsection, we accumulate it as new
1323 # meta in the subsection by adding it to the current description.
1324 # If there's no subsection info for the metadata, it will accumulate at the top level
1325 # section description if we hadn't found a matching metaname to override at this point.
1326
1327 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1328 # at the toplevel section. In the latter case, it means we want to process meta in the
1329 # toplevel section. So the eq check between the values below will work in all cases.
1330
1331 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1332 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1333 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1334
1335 if ($curr_secnum eq $opt_doc_secnum
1336 && ($metamode eq "accumulate" || $metamode eq "override")) {
1337 if ($metamode eq "override") {
1338 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1339 }
1340
1341 # If we get to here and metamode is override, this means there
1342 # was no existing value to overide => treat as an append operation
1343
1344 # Tack a new metadata tag on to the end of the <Metadata>+ block
1345 my $metaname = $parser->{'parameters'}->{'metaname'};
1346 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1347
1348 my $metadata_attr = {
1349 '_content' => $metavalue,
1350 'name' => $metaname,
1351 'mode' => "accumulate"
1352 };
1353
1354 my $append_metadata = [ "Metadata" => $metadata_attr ];
1355 my $description_content = $attrHash->{'_content'};
1356
1357 print "Appending metadata to doc.xml\n";
1358
1359 if (ref($description_content)) {
1360 # got some existing interesting nested content
1361 push(@$description_content, " ", $append_metadata ,"\n ");
1362 }
1363 else {
1364 #description_content is most likely a string such as "\n"
1365 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1366 }
1367
1368 $parser->{'parameters'}->{'metamode'} = "done";
1369 }
1370 else {
1371 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1372## print STDERR "**** NOT ACCUMULATE?!? \n";
1373 }
1374
1375 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1376 return [$tagname => $attrHash];
1377}
1378
1379
1380sub dxml_start_section
1381{
1382 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1383
1384 my $new_depth = scalar(@$contextArray);
1385
1386 print STDERR "**** START SECTION \n";
1387
1388 if ($new_depth == 1) {
1389 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1390 $parser->{'parameters'}->{'curr_section_num'} = "";
1391 }
1392
1393 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1394 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1395
1396 my $new_secnum;
1397
1398 if ($new_depth > $old_depth) {
1399 # child subsection
1400 $new_secnum = "$old_secnum.1";
1401 }
1402 elsif ($new_depth == $old_depth) {
1403 # sibling section => increase it's value by 1
1404 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1405 $tail_num++;
1406 $new_secnum = $old_secnum;
1407 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1408 }
1409 else {
1410 # back up to parent section => lopp off tail
1411 $new_secnum = $old_secnum;
1412 $new_secnum =~ s/\.\d+$//;
1413 }
1414
1415 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1416 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1417
1418 1;
1419}
1420
1421sub edit_xml_file
1422{
1423 my $self = shift @_;
1424 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1425
1426 # use XML::Rules to add it in (read in and out again)
1427 my $parser = XML::Rules->new(start_rules => $start_rules,
1428 rules => $rules,
1429 style => 'filter',
1430 output_encoding => 'utf8' );
1431
1432 my $xml_in = "";
1433 if (!open(MIN,"<$filename")) {
1434 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1435 }
1436 else {
1437 # Read all the text in
1438 my $line;
1439 while (defined ($line=<MIN>)) {
1440 $xml_in .= $line;
1441 }
1442 close(MIN);
1443
1444 my $MOUT;
1445 if (!open($MOUT,">$filename")) {
1446 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1447 }
1448 else {
1449 # Matched lines will get handled by the call backs
1450## my $xml_out = "";
1451
1452 binmode($MOUT,":utf8");
1453 $parser->filter($xml_in,$MOUT, $options);
1454
1455# binmode(MOUT,":utf8");
1456# print MOUT $xml_out;
1457 close($MOUT);
1458 }
1459 }
1460
1461 # copy across any custom error information that was stored during parsing
1462 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1463}
1464
1465sub edit_doc_xml
1466{
1467 my $self = shift @_;
1468 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1469
1470 my $info_mess = <<RAWEND;
1471****************************
1472 edit_doc_xml()
1473****************************
1474doc_xml_filename = $doc_xml_filename
1475metaname = $metaname
1476metavalue = $metavalue
1477metapos = $metapos
1478metamode = $metamode
1479opt_secnum = $opt_secnum
1480prevmetavalue = $prevmetavalue
1481****************************
1482RAWEND
1483
1484 $gsdl_cgi->generate_message($info_mess);
1485
1486 # To monitor which section/subsection number we are in
1487 my @start_rules =
1488 ( 'Section' => \&dxml_start_section );
1489
1490 # use XML::Rules to add it in (read in and out again)
1491 # Set the call back functions
1492 my @rules =
1493 ( _default => 'raw',
1494 'Metadata' => \&dxml_metadata,
1495 'Description' => \&dxml_description,
1496 'Archive' => \&dxml_archive); # just for catching errors at end
1497
1498 # Sets the parameters
1499 my $options = { 'metaname' => $metaname,
1500 'metapos' => $metapos,
1501 'metavalue' => $metavalue,
1502 'metamode' => $metamode,
1503 'prevmetavalue' => $prevmetavalue };
1504
1505 if (defined $opt_secnum) {
1506 $options->{'secnum'} = $opt_secnum;
1507 }
1508
1509 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1510}
1511
1512sub set_archives_metadata_entry
1513{
1514 my $self = shift @_;
1515 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1516
1517 my $info_mess = <<RAWEND;
1518****************************
1519 set_archives_metadata_entry()
1520****************************
1521archive_dir = $archive_dir
1522collect_dir = $collect_dir
1523collect = $collect
1524infodbtype = $infodbtype
1525docid = $docid
1526metaname = $metaname
1527metapos = $metapos
1528metavalue = $metavalue
1529metamode = $metamode
1530prevmetavalue = $prevmetavalue
1531****************************
1532RAWEND
1533
1534 $gsdl_cgi->generate_message($info_mess);
1535
1536 # Obtain the doc.xml path for the specified docID
1537 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1538
1539 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1540 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1541 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1542
1543 # The $doc_xml_file is relative to the archives, and now let's get the full path
1544 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1545 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1546
1547 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
1548 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1549 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1550 if(!defined $metapos && $metamode eq "override") {
1551 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, $metapos, undef, $docid_secnum, $metamode);
1552 }
1553
1554 # Edit the doc.xml file with the specified metadata name, value and position.
1555 # TODO: there is a potential problem here as this edit_doc_xml function
1556 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1557 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1558
1559 # dxml_metadata method ignores metapos if metamode anything other than override
1560 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1561 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1562
1563 # return 0; # return 0 for now to indicate no error
1564 return (defined $self->{'error_msg'}) ? 1 : 0;
1565}
1566
1567
1568sub set_archives_metadata
1569{
1570 my $self = shift @_;
1571
1572 my $username = $self->{'username'};
1573 my $collect = $self->{'collect'};
1574 my $gsdl_cgi = $self->{'gsdl_cgi'};
1575
1576 if ($baseaction::authentication_enabled) {
1577 # Ensure the user is allowed to edit this collection
1578 $self->authenticate_user($username, $collect);
1579 }
1580
1581 # Make sure the collection isn't locked by someone else
1582 $self->lock_collection($username, $collect);
1583
1584 $self->_set_archives_metadata(@_);
1585
1586 # Release the lock once it is done
1587 $self->unlock_collection($username, $collect);
1588}
1589
1590sub _set_archives_metadata_array
1591{
1592 my $self = shift @_;
1593
1594 my $collect = $self->{'collect'};
1595 my $gsdl_cgi = $self->{'gsdl_cgi'};
1596 my $site = $self->{'site'};
1597 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1598
1599 # look up additional args
1600
1601 my $infodbtype = $self->{'infodbtype'};
1602
1603 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1604
1605 my $json_str = $self->{'json'};
1606 my $doc_array = decode_json $json_str;
1607
1608
1609 my $global_status = 0;
1610 my $global_mess = "";
1611
1612 my @all_docids = ();
1613
1614 foreach my $doc_array_rec ( @$doc_array ) {
1615 my $status = -1;
1616 my $docid = $doc_array_rec->{'docid'};
1617
1618 push(@all_docids,$docid);
1619
1620 my $metaname = $doc_array_rec->{'metaname'};
1621 if(defined $metaname) {
1622
1623 my $metapos = $doc_array_rec->{'metapos'} || 0;
1624 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1625 my $metavalue = $doc_array_rec->{'metavalue'};
1626 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1627
1628 # Some sanity checks
1629 $metapos = 0 if (!defined $metapos);
1630
1631 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1632 # make "accumulate" the default (less destructive, as it won't actually
1633 # delete any existing values)
1634 $metamode = "accumulate";
1635 }
1636
1637 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1638 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1639 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1640 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1641
1642 foreach my $metatable_rec ( @$metatable ) {
1643 $metaname = $metatable_rec->{'metaname'};
1644 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1645 my $metapos = undef;
1646 my $prevmetavalue = undef;
1647 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1648
1649 foreach my $metavalue ( @$metavals ) {
1650 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1651 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1652
1653 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1654 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1655 # metavalue for this metaname will be lost
1656 $metamode = "accumulate";
1657 }
1658 }
1659 }
1660 }
1661
1662 if ($status != 0) {
1663 # Catch error if set infodb entry failed
1664 $global_status = $status;
1665 $global_mess .= "Failed to set metadata key: $docid\n";
1666 $global_mess .= "Exit status: $status\n";
1667 $global_mess .= "System Error Message: $!\n";
1668 $global_mess .= "-" x 20 . "\n";
1669 }
1670 }
1671
1672 if ($global_status != 0) {
1673 $global_mess .= "PATH: $ENV{'PATH'}\n";
1674 $gsdl_cgi->generate_error($global_mess);
1675 }
1676 else {
1677 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1678 $gsdl_cgi->generate_ok_message($mess);
1679 }
1680}
1681
1682sub set_archives_metadata_array
1683{
1684 my $self = shift @_;
1685
1686 my $username = $self->{'username'};
1687 my $collect = $self->{'collect'};
1688 my $gsdl_cgi = $self->{'gsdl_cgi'};
1689# my $gsdlhome = $self->{'gsdlhome'};
1690
1691 if ($baseaction::authentication_enabled) {
1692 # Ensure the user is allowed to edit this collection
1693 &authenticate_user($gsdl_cgi, $username, $collect);
1694 }
1695
1696 my $site = $self->{'site'};
1697 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1698
1699 $gsdl_cgi->checked_chdir($collect_dir);
1700
1701 # Obtain the collect dir
1702 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1703
1704 # Make sure the collection isn't locked by someone else
1705 $self->lock_collection($username, $collect);
1706
1707 $self->_set_archives_metadata_array(@_);
1708
1709 # Release the lock once it is done
1710 $self->unlock_collection($username, $collect);
1711}
1712
1713sub _remove_archives_metadata
1714{
1715 my $self = shift @_;
1716
1717 my $collect = $self->{'collect'};
1718 my $gsdl_cgi = $self->{'gsdl_cgi'};
1719# my $gsdlhome = $self->{'gsdlhome'};
1720 my $infodbtype = $self->{'infodbtype'};
1721
1722 my $site = $self->{'site'};
1723
1724 # Obtain the collect and archive dir
1725 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1726
1727 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1728
1729 # look up additional args
1730 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1731
1732 my $metaname = $self->{'metaname'};
1733 my $metapos = $self->{'metapos'};
1734
1735 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1736
1737 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1738 # default to deleting the first metavalue for the given metaname
1739 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1740 # seem to get deleted in one single remove_archives_meta action invocation.
1741 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1742 # deleted, both get deleted.
1743 if(!defined $metapos && !defined $metavalue) {
1744 $metapos = 0;
1745 }
1746
1747 my $metamode = $self->{'metamode'} || undef;
1748
1749 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1750 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1751
1752 # This now stores the full pathname
1753 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1754
1755 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1756# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1757
1758 if ($status == 0)
1759 {
1760 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1761 $mess .= " $metaname";
1762 $mess .= "->[$metapos]" if (defined $metapos);
1763
1764 $gsdl_cgi->generate_ok_message($mess);
1765 }
1766 else
1767 {
1768 my $mess .= "Failed to remove archives metadata key: $docid\n";
1769 $mess .= "Exit status: $status\n";
1770 $mess .= "System Error Message: $!\n";
1771 $mess .= "-" x 20 . "\n";
1772
1773 $gsdl_cgi->generate_error($mess);
1774 }
1775
1776 #return $status; # in case calling functions have a use for this
1777}
1778
1779sub remove_archives_metadata
1780{
1781 my $self = shift @_;
1782
1783 my $username = $self->{'username'};
1784 my $collect = $self->{'collect'};
1785 my $gsdl_cgi = $self->{'gsdl_cgi'};
1786
1787 if ($baseaction::authentication_enabled)
1788 {
1789 # Ensure the user is allowed to edit this collection
1790 &authenticate_user($gsdl_cgi, $username, $collect);
1791 }
1792
1793 # Make sure the collection isn't locked by someone else
1794 $self->lock_collection($username, $collect);
1795
1796 $self->_remove_archives_metadata(@_);
1797
1798 # Release the lock once it is done
1799 $self->unlock_collection($username, $collect);
1800}
1801
1802sub remove_from_doc_xml
1803{
1804 my $self = shift @_;
1805 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1806
1807 my @start_rules = ('Section' => \&dxml_start_section);
1808
1809 # Set the call-back functions for the metadata tags
1810 my @rules =
1811 (
1812 _default => 'raw',
1813 'Metadata' => \&rfdxml_metadata
1814 );
1815
1816 my $parser = XML::Rules->new
1817 (
1818 start_rules => \@start_rules,
1819 rules => \@rules,
1820 style => 'filter',
1821 output_encoding => 'utf8',
1822# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1823 stripspaces => 2|0|0 # ineffectual
1824 );
1825
1826 my $status = 0;
1827 my $xml_in = "";
1828 if (!open(MIN,"<$doc_xml_filename"))
1829 {
1830 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1831 $status = 1;
1832 }
1833 else
1834 {
1835 # Read them in
1836 my $line;
1837 while (defined ($line=<MIN>)) {
1838 $xml_in .= $line;
1839 }
1840 close(MIN);
1841
1842 # Filter with the call-back functions
1843 my $xml_out = "";
1844
1845 my $MOUT;
1846 if (!open($MOUT,">$doc_xml_filename")) {
1847 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1848 $status = 1;
1849 }
1850 else {
1851 binmode($MOUT,":utf8");
1852 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1853 close($MOUT);
1854 }
1855 }
1856 return $status;
1857}
1858
1859sub rfdxml_metadata
1860{
1861 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1862
1863 # For comparisons, toplevel section is indicated by ""
1864 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1865 my $secid = $parser->{'parameters'}->{'secid'} || "";
1866
1867 if (!($secid eq $curr_sec_num))
1868 {
1869 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1870 return [$tagname => $attrHash];
1871 }
1872
1873 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1874 {
1875 if (!defined $parser->{'parameters'}->{'poscount'})
1876 {
1877 $parser->{'parameters'}->{'poscount'} = 0;
1878 }
1879 else
1880 {
1881 $parser->{'parameters'}->{'poscount'}++;
1882 }
1883
1884 # if overriding but no metapos, then clear all the meta for this metaname
1885 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
1886 return [];
1887 }
1888 }
1889
1890 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1891 {
1892 return [];
1893 }
1894
1895 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1896 {
1897 return [];
1898 }
1899
1900 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1901 return [$tagname => $attrHash];
1902}
1903
1904sub mxml_metadata
1905{
1906 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1907 my $metaname = $parser->{'parameters'}->{'metaname'};
1908 my $metamode = $parser->{'parameters'}->{'metamode'};
1909
1910 # Report error if we don't see FileName tag before this
1911 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1912
1913 # Don't do anything if we are not in the right FileSet
1914 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1915 if ($file_regexp =~ /\.\*/) {
1916 # Only interested in a file_regexp if it specifies precisely one
1917 # file.
1918 # So, skip anything with a .* in it as it is too general
1919# print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
1920 return [$tagname => $attrHash];
1921 }
1922 my $src_file = $parser->{'parameters'}->{'src_file'};
1923 if (!($src_file =~ /$file_regexp/)) {
1924 return [$tagname => $attrHash];
1925 }
1926## print STDERR "*** mxl metamode = $metamode\n";
1927
1928 # Find the right metadata tag and checks if we are going to override it
1929 my $name_attr = $attrHash->{'name'};
1930 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1931 # Get the value and override the current value
1932 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1933 $attrHash->{'_content'} = $metavalue;
1934
1935## print STDERR "**** overriding metadata.xml\n";
1936
1937 # Don't want it to wipe out any other pieces of metadata
1938 $parser->{'parameters'}->{'metamode'} = "done";
1939 }
1940
1941 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1942 return [$tagname => $attrHash];
1943}
1944
1945
1946sub mxml_description
1947{
1948 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1949 my $metamode = $parser->{'parameters'}->{'metamode'};
1950
1951 # Failed... Report error if we don't see FileName tag before this
1952 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1953
1954 # Don't do anything if we are not in the right FileSet
1955 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1956 if ($file_regexp =~ m/\.\*/) {
1957 # Only interested in a file_regexp if it specifies precisely one
1958 # file.
1959 # So, skip anything with a .* in it as it is too general
1960 return [$tagname => $attrHash];
1961 }
1962 my $src_file = $parser->{'parameters'}->{'src_file'};
1963
1964 if (!($src_file =~ m/$file_regexp/)) {
1965 return [$tagname => $attrHash];
1966 }
1967
1968 # Accumulate the metadata block to the end of the description block
1969 # Note: This adds metadata block to all description blocks, so if there are
1970 # multiple FileSets, it will add to all of them
1971 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1972 # if metamode was "override" but get to here then it failed to
1973 # find an item to override, in which case it should append its
1974 # value to the end, just like the "accumulate" mode
1975
1976 # tack a new metadata tag on to the end of the <Metadata>+ block
1977 my $metaname = $parser->{'parameters'}->{'metaname'};
1978 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1979
1980 my $metadata_attr = { '_content' => $metavalue,
1981 'name' => $metaname,
1982 'mode' => "accumulate" };
1983
1984 my $append_metadata = [ "Metadata" => $metadata_attr ];
1985 my $description_content = $attrHash->{'_content'};
1986
1987## print STDERR "*** appending to metadata.xml\n";
1988
1989 # append the new metadata element to the end of the current
1990 # content contained inside this tag
1991 if (ref($description_content) eq "") {
1992 # => string or numeric literal
1993 # this is caused by a <Description> block has no <Metadata> child elements
1994 # => set up an empty array in '_content'
1995 $attrHash->{'_content'} = [ "\n" ];
1996 $description_content = $attrHash->{'_content'};
1997 }
1998
1999 push(@$description_content," ", $append_metadata ,"\n ");
2000 $parser->{'parameters'}->{'metamode'} = "done";
2001 }
2002
2003 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2004 return [$tagname => $attrHash];
2005}
2006
2007
2008sub mxml_filename
2009{
2010 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2011
2012 # Store the filename of the Current Fileset
2013 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2014 # FileName tag must come before Description tag
2015 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2016
2017 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2018 return [$tagname => $attrHash];
2019}
2020
2021
2022sub mxml_fileset
2023{
2024 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2025
2026 # Initilise the current_file
2027 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2028 # FileName tag must come before Description tag
2029 $parser->{'parameters'}->{'current_file'} = "";
2030
2031 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2032 return [$tagname => $attrHash];
2033}
2034
2035sub mxml_directorymetadata
2036{
2037 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2038
2039 # if we haven't processed the metadata when we reach the end of metadata.xml
2040 # it's because there's no particular FileSet element whose FileName matched
2041 # In which case, add a new FileSet for this FileName
2042 my $metamode = $parser->{'parameters'}->{'metamode'};
2043 if($metamode ne "done") {
2044
2045 if ($metamode eq "override") {
2046 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2047 }
2048
2049 # If we get to here and metamode is override, this means there
2050 # was no existing value to overide => treat as an append operation
2051
2052 # Create a new FileSet element and append to DirectoryMetadata
2053 # <FileSet>
2054 # <FileName>src_file</FileName>
2055 # <Description>
2056 # <Metadata mode="" name="">metavalue</Metadata>
2057 # </Description>
2058 # </FileSet>
2059 my $src_file = $parser->{'parameters'}->{'src_file'};
2060 my $metaname = $parser->{'parameters'}->{'metaname'};
2061 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2062 my $metadata_attr = {
2063 '_content' => $metavalue,
2064 'name' => $metaname,
2065 'mode' => "accumulate"
2066 };
2067 my $append_metadata = [ "Metadata" => $metadata_attr ];
2068 my $description_attr->{'_content'} = [ "\n\t\t ", $append_metadata, "\n\t\t"];
2069 my $description_element = [ "Description" => $description_attr ];
2070
2071 #_content is not an attribute, it's special and holds the children of this element
2072 # including the textnode value embedded in this element if any.
2073 my $filename_attr = {'_content' => $src_file};
2074 my $filename_element = [ "FileName" => $filename_attr ];
2075
2076 my $fileset_attr = {};
2077 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2078 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2079
2080
2081 # get children of dirmeta, and push the new FileSet element onto it
2082 print "Appending metadata to metadata.xml\n";
2083 my $dirmeta_content = $attrHash->{'_content'};
2084 if (ref($dirmeta_content)) {
2085 # got some existing interesting nested content
2086 #push(@$dirmeta_content, " ", $fileset ,"\n ");
2087 push(@$dirmeta_content, "\t", $fileset ,"\n");
2088 }
2089 else {
2090 #description_content is most likely a string such as "\n"
2091 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
2092 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2093 }
2094
2095 $parser->{'parameters'}->{'metamode'} = "done";
2096 }
2097 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2098 return [$tagname => $attrHash];
2099}
2100
2101
2102sub edit_metadata_xml
2103{
2104 my $self = shift @_;
2105 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
2106
2107 # Set the call-back functions for the metadata tags
2108 my @rules =
2109 ( _default => 'raw',
2110 'FileName' => \&mxml_filename,
2111 'Metadata' => \&mxml_metadata,
2112 'Description' => \&mxml_description,
2113 'FileSet' => \&mxml_fileset,
2114 'DirectoryMetadata' => \&mxml_directorymetadata);
2115
2116 # use XML::Rules to add it in (read in and out again)
2117 my $parser = XML::Rules->new(rules => \@rules,
2118 style => 'filter',
2119 output_encoding => 'utf8',
2120 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2121
2122 if (!-e $metadata_xml_filename) {
2123
2124 if (open(MOUT,">$metadata_xml_filename")) {
2125
2126 my $src_file_re = &util::filename_to_regex($src_file);
2127 # shouldn't the following also be in the above utility routine??
2128 # $src_file_re =~ s/\./\\./g;
2129
2130 print MOUT "<?xml version=\"1.0\"?>\n";
2131 print MOUT "<DirectoryMetadata>\n";
2132 print MOUT " <FileSet>\n";
2133 print MOUT " <FileName>$src_file_re</FileName>\n";
2134 print MOUT " <Description>\n";
2135 print MOUT " </Description>\n";
2136 print MOUT " </FileSet>\n";
2137 print MOUT "</DirectoryMetadata>\n";
2138
2139 close(MOUT);
2140 }
2141 else {
2142 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2143 }
2144 }
2145
2146
2147 my $xml_in = "";
2148 if (!open(MIN,"<$metadata_xml_filename")) {
2149 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2150 }
2151 else {
2152 # Read them in
2153 my $line;
2154 while (defined ($line=<MIN>)) {
2155 $xml_in .= $line;
2156 }
2157 close(MIN);
2158
2159 # Filter with the call-back functions
2160 my $xml_out = "";
2161
2162 my $MOUT;
2163 if (!open($MOUT,">$metadata_xml_filename")) {
2164 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2165 }
2166 else {
2167 binmode($MOUT,":utf8");
2168
2169 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2170 # At the moment, I will just hack it!
2171 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2172 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2173 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2174 #print MOUT $xml_out;
2175
2176 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2177 metavalue => $metavalue,
2178 metamode => $metamode,
2179 src_file => $src_file,
2180 current_file => undef} );
2181 close($MOUT);
2182 }
2183 }
2184}
2185
2186
2187sub set_import_metadata
2188{
2189 my $self = shift @_;
2190
2191 my $username = $self->{'username'};
2192 my $collect = $self->{'collect'};
2193 my $gsdl_cgi = $self->{'gsdl_cgi'};
2194
2195 if ($baseaction::authentication_enabled) {
2196 # Ensure the user is allowed to edit this collection
2197 $self->authenticate_user($username, $collect);
2198 }
2199
2200 # Make sure the collection isn't locked by someone else
2201 $self->lock_collection($username, $collect);
2202
2203 $self->_set_import_metadata(@_);
2204
2205 # Release the lock once it is done
2206 $self->unlock_collection($username, $collect);
2207
2208}
2209
2210sub set_import_metadata_array
2211{
2212 my $self = shift @_;
2213
2214 my $username = $self->{'username'};
2215 my $collect = $self->{'collect'};
2216 my $gsdl_cgi = $self->{'gsdl_cgi'};
2217# my $gsdlhome = $self->{'gsdlhome'};
2218
2219 if ($baseaction::authentication_enabled) {
2220 # Ensure the user is allowed to edit this collection
2221 &authenticate_user($gsdl_cgi, $username, $collect);
2222 }
2223
2224 my $site = $self->{'site'};
2225 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2226
2227 $gsdl_cgi->checked_chdir($collect_dir);
2228
2229 # Make sure the collection isn't locked by someone else
2230 $self->lock_collection($username, $collect);
2231
2232 $self->_set_import_metadata_array(@_);
2233
2234 # Release the lock once it is done
2235 $self->unlock_collection($username, $collect);
2236
2237}
2238
2239
2240sub _set_import_metadata_array
2241{
2242 my $self = shift @_;
2243
2244 my $collect = $self->{'collect'};
2245 my $gsdl_cgi = $self->{'gsdl_cgi'};
2246
2247 my $site = $self->{'site'};
2248 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2249
2250 # look up additional args
2251
2252 my $infodbtype = $self->{'infodbtype'};
2253
2254 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2255 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2256
2257 my $json_str = $self->{'json'};
2258 my $doc_array = decode_json $json_str;
2259
2260 my $global_status = 0;
2261 my $global_mess = "";
2262
2263 my @all_docids = ();
2264
2265 foreach my $doc_array_rec ( @$doc_array )
2266 {
2267 my $status = -1;
2268 my $docid = $doc_array_rec->{'docid'};
2269
2270 push(@all_docids,$docid);
2271
2272 my $metaname = $doc_array_rec->{'metaname'};
2273 if (defined $metaname) {
2274 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2275 my $metavalue = $doc_array_rec->{'metavalue'};
2276
2277 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2278 # make "accumulate" the default (less destructive, as won't actually
2279 # delete any existing values)
2280 $metamode = "accumulate";
2281 }
2282
2283 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metavalue, $metamode);
2284
2285 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2286 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2287
2288 foreach my $metatable_rec ( @$metatable ) {
2289 $metaname = $metatable_rec->{'metaname'};
2290 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2291 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2292 # make "accumulate" the default (less destructive, as won't actually
2293 # delete any existing values)
2294 $metamode = "accumulate";
2295 }
2296 # no metapos for import
2297 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2298
2299 foreach my $metavalue ( @$metavals ) {
2300 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metavalue, $metamode);
2301 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2302 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2303 # metavalue for this metaname will be lost
2304 $metamode = "accumulate";
2305 }
2306 }
2307 }
2308 }
2309 }
2310
2311 # always a success message
2312 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2313 $gsdl_cgi->generate_ok_message($mess);
2314}
2315
2316# always returns true (1)
2317sub set_import_metadata_entry
2318{
2319 my $self = shift @_;
2320 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metavalue, $metamode) = @_;
2321
2322 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2323 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2324 my $metadata_xml_file;
2325 my $import_filename = undef;
2326
2327 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2328
2329 # This now stores the full pathname
2330 $import_filename = $doc_rec->{'src-file'}->[0];
2331
2332 # figure out correct metadata.xml file [?]
2333 # Assuming the metadata.xml file is next to the source file
2334 # Note: This will not work if it is using the inherited metadata from the parent folder
2335 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2336 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2337
2338 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
2339 return 1;
2340}
2341
2342sub _remove_import_metadata
2343{
2344 my $self = shift @_;
2345
2346 my $collect = $self->{'collect'};
2347 my $gsdl_cgi = $self->{'gsdl_cgi'};
2348# my $gsdlhome = $self->{'gsdlhome'};
2349 my $infodbtype = $self->{'infodbtype'};
2350
2351 # Obtain the collect dir
2352 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2353 my $site = $self->{'site'};
2354 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2355
2356 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2357 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2358
2359 # look up additional args
2360 my $docid = $self->{'d'};
2361 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2362 {
2363 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2364 }
2365
2366 my $metaname = $self->{'metaname'};
2367 my $metavalue = $self->{'metavalue'};
2368 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2369
2370 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2371 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2372 my $metadata_xml_file;
2373 my $import_filename = undef;
2374 if (defined $docid)
2375 {
2376 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2377 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2378
2379 # This now stores the full pathname
2380 $import_filename = $doc_rec->{'src-file'}->[0];
2381 }
2382
2383 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2384 {
2385 $gsdl_cgi->generate_error("There is no metadata\n");
2386 }
2387
2388 # figure out correct metadata.xml file [?]
2389 # Assuming the metadata.xml file is next to the source file
2390 # Note: This will not work if it is using the inherited metadata from the parent folder
2391 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2392 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2393
2394 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
2395
2396 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2397 $mess .= " $metaname";
2398 $mess .= " = $metavalue\n";
2399
2400 $gsdl_cgi->generate_ok_message($mess);
2401
2402 #return $status; # in case calling functions have a use for this
2403}
2404
2405sub remove_import_metadata
2406{
2407 my $self = shift @_;
2408
2409 my $username = $self->{'username'};
2410 my $collect = $self->{'collect'};
2411 my $gsdl_cgi = $self->{'gsdl_cgi'};
2412
2413 if ($baseaction::authentication_enabled) {
2414 # Ensure the user is allowed to edit this collection
2415 &authenticate_user($gsdl_cgi, $username, $collect);
2416 }
2417
2418 # Make sure the collection isn't locked by someone else
2419 $self->lock_collection($username, $collect);
2420
2421 $self->_remove_import_metadata(@_);
2422
2423 # Release the lock once it is done
2424 $self->unlock_collection($username, $collect);
2425
2426}
2427
2428sub remove_from_metadata_xml
2429{
2430 my $self = shift @_;
2431 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
2432
2433 # Set the call-back functions for the metadata tags
2434 my @rules =
2435 (
2436 _default => 'raw',
2437 'Metadata' => \&rfmxml_metadata,
2438 'FileName' => \&mxml_filename
2439 );
2440
2441 my $parser = XML::Rules->new
2442 (
2443 rules => \@rules,
2444 style => 'filter',
2445 output_encoding => 'utf8',
2446 #normalisespaces => 1,
2447 stripspaces => 2|0|0 # ineffectual
2448 );
2449
2450 my $xml_in = "";
2451 if (!open(MIN,"<$metadata_xml_filename"))
2452 {
2453 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2454 }
2455 else
2456 {
2457 # Read them in
2458 my $line;
2459 while (defined ($line=<MIN>)) {
2460 $xml_in .= $line;
2461 }
2462 close(MIN);
2463
2464 # Filter with the call-back functions
2465 my $xml_out = "";
2466
2467 my $MOUT;
2468 if (!open($MOUT,">$metadata_xml_filename")) {
2469 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2470 }
2471 else {
2472 binmode($MOUT,":utf8");
2473 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
2474 close($MOUT);
2475 }
2476 }
2477}
2478
2479sub rfmxml_metadata
2480{
2481 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2482
2483 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2484 {
2485 return [];
2486 }
2487
2488 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2489 return [$tagname => $attrHash];
2490}
2491
2492sub _remove_live_metadata
2493{
2494 my $self = shift @_;
2495
2496 my $collect = $self->{'collect'};
2497 my $gsdl_cgi = $self->{'gsdl_cgi'};
2498# my $gsdlhome = $self->{'gsdlhome'};
2499 my $infodbtype = $self->{'infodbtype'};
2500
2501 # Obtain the collect dir
2502 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2503 my $site = $self->{'site'};
2504 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2505
2506
2507 # look up additional args
2508 my $docid = $self->{'d'};
2509 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2510 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2511 }
2512
2513 # Generate the dbkey
2514 my $metaname = $self->{'metaname'};
2515 my $dbkey = "$docid.$metaname";
2516
2517 # To people who know $collect_tail please add some comments
2518 # Obtain the live gdbm_db path
2519 my $collect_tail = $collect;
2520 $collect_tail =~ s/^.*[\/\\]//;
2521 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2522 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2523
2524 # Remove the key
2525 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2526 my $status = system($cmd);
2527 if ($status != 0) {
2528 # Catch error if gdbmdel failed
2529 my $mess = "Failed to set metadata key: $dbkey\n";
2530
2531 $mess .= "PATH: $ENV{'PATH'}\n";
2532 $mess .= "cmd = $cmd\n";
2533 $mess .= "Exit status: $status\n";
2534 $mess .= "System Error Message: $!\n";
2535
2536 $gsdl_cgi->generate_error($mess);
2537 }
2538 else {
2539 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2540 }
2541
2542}
2543
2544sub remove_live_metadata
2545{
2546 my $self = shift @_;
2547
2548 my $username = $self->{'username'};
2549 my $collect = $self->{'collect'};
2550 my $gsdl_cgi = $self->{'gsdl_cgi'};
2551 my $gsdlhome = $self->{'gsdlhome'};
2552
2553 if ($baseaction::authentication_enabled) {
2554 # Ensure the user is allowed to edit this collection
2555 &authenticate_user($gsdl_cgi, $username, $collect);
2556 }
2557
2558 # Make sure the collection isn't locked by someone else
2559 $self->lock_collection($username, $collect);
2560
2561 $self->_remove_live_metadata(@_);
2562
2563 $self->unlock_collection($username, $collect);
2564}
2565
2566sub remove_metadata
2567{
2568 my $self = shift @_;
2569
2570 my $where = $self->{'where'};
2571 if(!$where) {
2572 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2573 return;
2574 }
2575
2576 my $username = $self->{'username'};
2577 my $collect = $self->{'collect'};
2578 my $gsdl_cgi = $self->{'gsdl_cgi'};
2579
2580 if ($baseaction::authentication_enabled) {
2581 # Ensure the user is allowed to edit this collection
2582 &authenticate_user($gsdl_cgi, $username, $collect);
2583 }
2584
2585 # Make sure the collection isn't locked by someone else
2586 $self->lock_collection($username, $collect);
2587
2588 # check which directories need to be processed, specified in $where as
2589 # any combination of import|archives|index|live
2590 if($where =~ m/import/) {
2591 $self->_remove_import_metadata(@_);
2592 }
2593 if($where =~ m/archives/) {
2594 $self->_remove_archives_metadata(@_);
2595 }
2596 if($where =~ m/index/) {
2597 $self->_remove_index_metadata(@_);
2598 }
2599
2600 # Release the lock once it is done
2601 $self->unlock_collection($username, $collect);
2602}
2603
2604# the internal version, without authentication
2605sub _remove_index_metadata
2606{
2607 my $self = shift @_;
2608
2609 my $collect = $self->{'collect'};
2610 my $gsdl_cgi = $self->{'gsdl_cgi'};
2611# my $gsdlhome = $self->{'gsdlhome'};
2612 my $infodbtype = $self->{'infodbtype'};
2613
2614 # Obtain the collect dir
2615 my $site = $self->{'site'};
2616 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2617 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2618
2619 # look up additional args
2620 my $docid = $self->{'d'};
2621 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2622 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2623 }
2624 my $metaname = $self->{'metaname'};
2625 my $metapos = $self->{'metapos'};
2626 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2627
2628 # To people who know $collect_tail please add some comments
2629 # Obtain the path to the database
2630 my $collect_tail = $collect;
2631 $collect_tail =~ s/^.*[\/\\]//;
2632 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2633 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2634
2635 # Read the docid entry
2636 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2637
2638 # Basically loop through and unescape_html the values
2639 foreach my $k (keys %$doc_rec) {
2640 my @escaped_v = ();
2641 foreach my $v (@{$doc_rec->{$k}}) {
2642 if ($k eq "contains") {
2643 # protect quotes in ".2;".3 etc
2644 $v =~ s/\"/\\\"/g;
2645 push(@escaped_v, $v);
2646 }
2647 else {
2648 my $ev = &ghtml::unescape_html($v);
2649 $ev =~ s/\"/\\\"/g;
2650 push(@escaped_v, $ev);
2651 }
2652 }
2653 $doc_rec->{$k} = \@escaped_v;
2654 }
2655
2656 # Check to make sure the key does exist
2657 if (!defined ($doc_rec->{$metaname})) {
2658 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2659 }
2660
2661 # Obtain the specified metadata pos
2662 if(!defined $metapos && !defined $metavalue) { #$metapos = 0 if (!defined $metapos);
2663 $metapos = 0;
2664 }
2665
2666
2667 # consider check key is defined before deleting?
2668 # Loop through the metadata array and ignore the specified position
2669 my $filtered_metadata = [];
2670 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2671 for (my $i=0; $i<$num_metadata_vals; $i++) {
2672 my $metaval = shift(@{$doc_rec->{$metaname}});
2673
2674 if (!defined $metavalue && $i != $metapos) {
2675 push(@$filtered_metadata,$metaval);
2676 }
2677
2678 if(defined $metavalue && !($metavalue eq $metaval))
2679 {
2680 push(@$filtered_metadata,$metaval);
2681 }
2682 }
2683 $doc_rec->{$metaname} = $filtered_metadata;
2684
2685 # Turn the record back to string
2686 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2687
2688 # Store it back to the database
2689 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2690 my $status = system($cmd);
2691 if ($status != 0) {
2692 my $mess = "Failed to set metadata key: $docid\n";
2693
2694 $mess .= "PATH: $ENV{'PATH'}\n";
2695 $mess .= "cmd = $cmd\n";
2696 $mess .= "Exit status: $status\n";
2697 $mess .= "System Error Message: $!\n";
2698
2699 $gsdl_cgi->generate_error($mess);
2700 }
2701 else {
2702 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2703 $mess .= " $metaname";
2704 $mess .= "->[$metapos]" if (defined $metapos);
2705
2706 $gsdl_cgi->generate_ok_message($mess);
2707 }
2708
2709 #return $status; # in case calling functions have a use for this
2710}
2711
2712sub remove_index_metadata
2713{
2714 my $self = shift @_;
2715
2716 my $username = $self->{'username'};
2717 my $collect = $self->{'collect'};
2718 my $gsdl_cgi = $self->{'gsdl_cgi'};
2719# my $gsdlhome = $self->{'gsdlhome'};
2720
2721 if ($baseaction::authentication_enabled) {
2722 # Ensure the user is allowed to edit this collection
2723 &authenticate_user($gsdl_cgi, $username, $collect);
2724 }
2725
2726 # Obtain the collect dir
2727 my $site = $self->{'site'};
2728 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2729 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2730
2731 # Make sure the collection isn't locked by someone else
2732 $self->lock_collection($username, $collect);
2733
2734 $self->_remove_index_metadata(@_);
2735
2736 # Release the lock once it is done
2737 $self->unlock_collection($username, $collect);
2738}
2739
2740
2741# Was trying to reused the codes, but the functions need to be broken
2742# down more before they can be reused, otherwise there will be too
2743# much overhead and duplicate process...
2744sub insert_metadata
2745{
2746 my $self = shift @_;
2747
2748 my $username = $self->{'username'};
2749 my $collect = $self->{'collect'};
2750 my $gsdl_cgi = $self->{'gsdl_cgi'};
2751 my $gsdlhome = $self->{'gsdlhome'};
2752 my $infodbtype = $self->{'infodbtype'};
2753
2754 # If the import metadata and gdbm database have been updated, we
2755 # need to insert some notification to warn user that the the text
2756 # they see at the moment is not indexed and require a rebuild.
2757 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2758
2759 if ($baseaction::authentication_enabled) {
2760 # Ensure the user is allowed to edit this collection
2761 $self->authenticate_user($username, $collect);
2762 }
2763
2764 # Obtain the collect and archive dir
2765 my $site = $self->{'site'};
2766 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2767 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2768 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2769
2770 # Make sure the collection isn't locked by someone else
2771 $self->lock_collection($username, $collect);
2772
2773 # Check additional args
2774 my $docid = $self->{'d'};
2775 if (!defined($docid)) {
2776 $gsdl_cgi->generate_error("No document id is specified: d=...");
2777 }
2778 my $metaname = $self->{'metaname'};
2779 if (!defined($metaname)) {
2780 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2781 }
2782 my $metavalue = $self->{'metavalue'};
2783 if (!defined($metavalue) || $metavalue eq "") {
2784 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2785 }
2786 # make "accumulate" the default (less destructive, as won't actually
2787 # delete any existing values)
2788 my $metamode = "accumulate";
2789
2790 #=======================================================================#
2791 # set_import_metadata [START]
2792 #=======================================================================#
2793 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2794 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2795 my $metadata_xml_file;
2796 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2797 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2798
2799 # This now stores the full pathname
2800 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2801
2802 # figure out correct metadata.xml file [?]
2803 # Assuming the metadata.xml file is next to the source file
2804 # Note: This will not work if it is using the inherited metadata from the parent folder
2805 my ($import_tailname, $import_dirname)
2806 = File::Basename::fileparse($import_filename);
2807 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2808
2809 # Shane's escape characters
2810 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2811 $metavalue =~ s/\,/&#44;/g;
2812 $metavalue =~ s/\:/&#58;/g;
2813 $metavalue =~ s/\|/&#124;/g;
2814 $metavalue =~ s/\(/&#40;/g;
2815 $metavalue =~ s/\)/&#41;/g;
2816 $metavalue =~ s/\[/&#91;/g;
2817 $metavalue =~ s/\\/&#92;/g;
2818 $metavalue =~ s/\]/&#93;/g;
2819 $metavalue =~ s/\{/&#123;/g;
2820 $metavalue =~ s/\}/&#125;/g;
2821 $metavalue =~ s/\"/&#34;/g;
2822 $metavalue =~ s/\`/&#96;/g;
2823 $metavalue =~ s/\n/_newline_/g;
2824
2825 # Edit the metadata.xml
2826 # Modified by Jeffrey from DL Consulting
2827 # Handle the case where there is one metadata.xml file for multiple FileSets
2828 # The XML filter needs to know whether it is in the right FileSet
2829 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2830 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2831 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2832 $metaname, $metavalue, $metamode, $import_tailname);
2833 #=======================================================================#
2834 # set_import_metadata [END]
2835 #=======================================================================#
2836
2837
2838 #=======================================================================#
2839 # set_metadata (accumulate version) [START]
2840 #=======================================================================#
2841 # To people who know $collect_tail please add some comments
2842 # Obtain path to the database
2843 my $collect_tail = $collect;
2844 $collect_tail =~ s/^.*[\/\\]//;
2845 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2846 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2847
2848 # Read the docid entry
2849 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2850
2851 foreach my $k (keys %$doc_rec) {
2852 my @escaped_v = ();
2853 foreach my $v (@{$doc_rec->{$k}}) {
2854 if ($k eq "contains") {
2855 # protect quotes in ".2;".3 etc
2856 $v =~ s/\"/\\\"/g;
2857 push(@escaped_v, $v);
2858 }
2859 else {
2860 my $ev = &ghtml::unescape_html($v);
2861 $ev =~ s/\"/\\\"/g;
2862 push(@escaped_v, $ev);
2863 }
2864 }
2865 $doc_rec->{$k} = \@escaped_v;
2866 }
2867
2868 # Protect the quotes
2869 $metavalue =~ s/\"/\\\"/g;
2870
2871 # Adds the pending macro
2872 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2873
2874 # If the metadata doesn't exist, create a new one
2875 if (!defined($doc_rec->{$metaname})){
2876 $doc_rec->{$metaname} = [ $macro_metavalue ];
2877 }
2878 # Else, let's acculumate the values
2879 else {
2880 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2881 }
2882
2883 # Generate the record string
2884 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2885
2886 # Store it into GDBM
2887 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2888 my $status = system($cmd);
2889 if ($status != 0) {
2890 # Catch error if gdbmget failed
2891 my $mess = "Failed to set metadata key: $docid\n";
2892
2893 $mess .= "PATH: $ENV{'PATH'}\n";
2894 $mess .= "cmd = $cmd\n";
2895 $mess .= "Exit status: $status\n";
2896 $mess .= "System Error Message: $!\n";
2897
2898 $gsdl_cgi->generate_error($mess);
2899 }
2900 else {
2901 my $mess = "insert-metadata successful: Key[$docid]\n";
2902 $mess .= " [In metadata.xml] $metaname";
2903 $mess .= " = $metavalue\n";
2904 $mess .= " [In database] $metaname";
2905 $mess .= " = $macro_metavalue\n";
2906 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2907 $gsdl_cgi->generate_ok_message($mess);
2908 }
2909 #=======================================================================#
2910 # set_metadata (accumulate version) [END]
2911 #=======================================================================#
2912
2913 # Release the lock once it is done
2914 $self->unlock_collection($username, $collect);
2915}
2916
29171;
Note: See TracBrowser for help on using the repository browser.