source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27168

Last change on this file since 27168 was 27168, checked in by ak19, 8 years ago

Second part of major restructuring of metadataaction: 1. the x_metadata_array subroutines now take a more complex JSON table which can now have subtables for each docID and whcih can each specify several metavalues to set for a single metaname using either accumulate or override as the metamode. 2. Added set_live_metadata_array. 3. set_index_metadata_array now does what the old set_metadata_array did, while set_metadata_array is a more generic method that takes a where parameter than can be set to a combination of archives|index|import|live. 4. set_index_metadata_array now takes the metamode parameter too. The complex JSON tables for set_archives_meta_array and set_index_meta_array have been tested, as has been the restructured remove_meta subroutines. But live and import metadata and metadata_array and remove subroutines have yet to be tested.

File size: 90.5 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-index-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
67
68 #SET METHODS
69 "set-live-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ ] },
72
73 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
74 'compulsory-args' => [ "metaname", "metavalue" ],
75 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
76
77 "set-index-metadata" => {
78 'compulsory-args' => [ "d", "metaname", "metavalue" ],
79 'optional-args' => [ "metapos", "metamode" ] },
80
81 "set-archives-metadata" => {
82 'compulsory-args' => [ "d", "metaname", "metavalue" ],
83 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
84
85 "set-import-metadata" => {
86 'compulsory-args' => [ "metaname", "metavalue" ],
87 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
88
89 #SET METHODS (ARRAY)
90 "set-metadata-array" => {
91 'compulsory-args' => [ "where", "json" ],
92 'optional-args' => [ ] },
93
94 "set-archives-metadata-array" => {
95 'compulsory-args' => [ "json" ],
96 'optional-args' => [ ] },
97
98 "set-import-metadata-array" => {
99 'compulsory-args' => [ "json" ],
100 'optional-args' => [ ] },
101
102 "set-index-metadata-array" => {
103 'compulsory-args' => [ "json" ],
104 'optional-args' => [ ] },
105
106 "set-live-metadata-array" => {
107 'compulsory-args' => [ "json" ],
108 'optional-args' => [ ] },
109
110 #REMOVE METHODS
111 "remove-import-metadata" => {
112 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
113 'optional-args' => [ ] },
114
115 "remove-archives-metadata" => {
116 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
117 'optional-args' => [ "metapos", "metavalue" ] },
118
119 "remove-live-metadata" => {
120 'compulsory-args' => [ "d", "metaname" ],
121 'optional-args' => [ ] },
122
123 "remove-index-metadata" => {
124 'compulsory-args' => [ "d", "metaname" ],
125 'optional-args' => [ "metapos", "metavalue" ] },
126
127 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
128 'compulsory-args' => [ "d", "metaname" ],
129 'optional-args' => [ "where", "metapos", "metavalue" ] },
130
131 #INSERT METHODS
132 "insert-metadata" => {
133 'compulsory-args' => [ "d", "metaname", "metavalue" ],
134 'optional-args' => [ ] }
135};
136
137
138sub new
139{
140 my $class = shift (@_);
141 my ($gsdl_cgi,$iis6_mode) = @_;
142
143 # Treat metavalue specially. To transmit this through a GET request
144 # the Javascript side has url-encoded it, so here we need to decode
145 # it before proceeding
146
147 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
148 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
149 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
150
151 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
152 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
153
154 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
155
156 return bless $self, $class;
157}
158
159
160sub get_live_metadata
161{
162 my $self = shift @_;
163
164 my $username = $self->{'username'};
165 my $collect = $self->{'collect'};
166 my $gsdl_cgi = $self->{'gsdl_cgi'};
167 my $gsdlhome = $self->{'gsdlhome'};
168 my $infodbtype = $self->{'infodbtype'};
169
170 # live metadata gets/saves value scoped (prefixed) by the current usename
171 # so (for now) let's not bother to enforce authentication
172
173 # Obtain the collect dir
174 my $site = $self->{'site'};
175 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
176 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
177
178 # Make sure the collection isn't locked by someone else
179 $self->lock_collection($username, $collect);
180
181 # look up additional args
182 my $docid = $self->{'d'};
183 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
184 $gsdl_cgi->generate_error("No docid (d=...) specified.");
185 }
186
187 # Generate the dbkey
188 my $metaname = $self->{'metaname'};
189 my $dbkey = "$docid.$metaname";
190
191 # To people who know $collect_tail please add some comments
192 # Obtain path to the database
193 my $collect_tail = $collect;
194 $collect_tail =~ s/^.*[\/|\\]//;
195 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
196 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
197
198 # Obtain the content of the key
199 my $cmd = "gdbmget $infodb_file_path $dbkey";
200 if (open(GIN,"$cmd |") == 0) {
201 # Catch error if gdbmget failed
202 my $mess = "Failed to get metadata key: $metaname\n";
203 $mess .= "$!\n";
204
205 $gsdl_cgi->generate_error($mess);
206 }
207 else {
208 binmode(GIN,":utf8");
209 # Read everything in and concatenate them into $metavalue
210 my $metavalue = "";
211 my $line;
212 while (defined ($line=<GIN>)) {
213 $metavalue .= $line;
214 }
215 close(GIN);
216 chomp($metavalue); # Get rid off the tailing newlines
217 $gsdl_cgi->generate_ok_message("$metavalue");
218 }
219
220 # Release the lock once it is done
221 $self->unlock_collection($username, $collect);
222}
223
224# just calls the index version
225sub get_metadata
226{
227 my $self = shift @_;
228 $self->get_index_metadata(@_);
229}
230
231sub get_index_metadata
232{
233 my $self = shift @_;
234
235 my $username = $self->{'username'};
236 my $collect = $self->{'collect'};
237 my $gsdl_cgi = $self->{'gsdl_cgi'};
238 my $gsdlhome = $self->{'gsdlhome'};
239
240 # Authenticate user if it is enabled
241 if ($baseaction::authentication_enabled) {
242 # Ensure the user is allowed to edit this collection
243 &authenticate_user($gsdl_cgi, $username, $collect);
244 }
245
246 # Obtain the collect dir
247 my $site = $self->{'site'};
248 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
249 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
250
251 # Make sure the collection isn't locked by someone else
252 $self->lock_collection($username, $collect);
253
254 # look up additional args
255 my $docid = $self->{'d'};
256 my $metaname = $self->{'metaname'};
257 my $metapos = $self->{'metapos'};
258 my $infodbtype = $self->{'infodbtype'};
259
260 # To people who know $collect_tail please add some comments
261 # Obtain path to the database
262 my $collect_tail = $collect;
263 $collect_tail =~ s/^.*[\/\\]//;
264 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
265 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
266
267 # Read the docid entry
268 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
269
270 # Basically loop through and unescape_html the values
271 foreach my $k (keys %$doc_rec) {
272 my @escaped_v = ();
273 foreach my $v (@{$doc_rec->{$k}}) {
274 my $ev = &ghtml::unescape_html($v);
275 push(@escaped_v, $ev);
276 }
277 $doc_rec->{$k} = \@escaped_v;
278 }
279
280 # Obtain the specified metadata value
281 $metapos = 0 if (!defined $metapos);
282 my $metavalue = $doc_rec->{$metaname}->[$metapos];
283 $gsdl_cgi->generate_ok_message("$metavalue");
284
285 # Release the lock once it is done
286 $self->unlock_collection($username, $collect);
287}
288
289
290sub get_import_metadata
291{
292 my $self = shift @_;
293
294 my $username = $self->{'username'};
295 my $collect = $self->{'collect'};
296 my $gsdl_cgi = $self->{'gsdl_cgi'};
297 my $gsdlhome = $self->{'gsdlhome'};
298
299 # Authenticate user if it is enabled
300 if ($baseaction::authentication_enabled) {
301 # Ensure the user is allowed to edit this collection
302 &authenticate_user($gsdl_cgi, $username, $collect);
303 }
304
305 # Obtain the collect dir
306 my $site = $self->{'site'};
307 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
308 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
309
310 # Make sure the collection isn't locked by someone else
311 $self->lock_collection($username, $collect);
312
313 # look up additional args
314 my $docid = $self->{'d'};
315 my $metaname = $self->{'metaname'};
316 my $infodbtype = $self->{'infodbtype'};
317 if (!defined $docid)
318 {
319 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
320 }
321
322 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
323 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
324 my $metadata_xml_file;
325 my $import_filename = undef;
326
327
328 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
329 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
330 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
331
332 # This now stores the full pathname
333 $import_filename = $doc_rec->{'src-file'}->[0];
334
335 # figure out correct metadata.xml file [?]
336 # Assuming the metadata.xml file is next to the source file
337 # Note: This will not work if it is using the inherited metadata from the parent folder
338 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
339 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
340
341 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
342
343 # Release the lock once it is done
344 $self->unlock_collection($username, $collect);
345}
346
347sub get_metadata_from_metadata_xml
348{
349 my $self = shift @_;
350 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
351
352 my @rules =
353 (
354 _default => 'raw',
355 'Metadata' => \&gfmxml_metadata,
356 'FileName' => \&mxml_filename
357 );
358
359 my $parser = XML::Rules->new
360 (
361 rules => \@rules,
362 output_encoding => 'utf8'
363 );
364
365 my $xml_in = "";
366 if (!open(MIN,"<$metadata_xml_filename"))
367 {
368 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
369 }
370 else
371 {
372 # Read them in
373 my $line;
374 while (defined ($line=<MIN>)) {
375 $xml_in .= $line;
376 }
377 close(MIN);
378
379 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
380
381 if(defined $parser->{'pad'}->{'metavalue'})
382 {
383 return $parser->{'pad'}->{'metavalue'};
384 }
385 else
386 {
387 return "";
388 }
389 }
390}
391
392sub gfmxml_metadata
393{
394 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
395
396 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
397 {
398 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
399 }
400}
401
402sub get_archives_metadata
403{
404 my $self = shift @_;
405
406 my $username = $self->{'username'};
407 my $collect = $self->{'collect'};
408 my $gsdl_cgi = $self->{'gsdl_cgi'};
409 my $gsdlhome = $self->{'gsdlhome'};
410 my $infodbtype = $self->{'infodbtype'};
411
412 # Authenticate user if it is enabled
413 if ($baseaction::authentication_enabled) {
414 # Ensure the user is allowed to edit this collection
415 &authenticate_user($gsdl_cgi, $username, $collect);
416 }
417
418 # Obtain the collect dir
419 my $site = $self->{'site'};
420 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
421
422 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
423
424 # Make sure the collection isn't locked by someone else
425 $self->lock_collection($username, $collect);
426
427 # look up additional args
428 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
429 $docid_secnum = "" if (!defined $docid_secnum);
430
431 my $metaname = $self->{'metaname'};
432 my $metapos = $self->{'metapos'};
433 $metapos = 0 if (!defined $metapos);
434
435 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
436 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
437
438 # This now stores the full pathname
439 my $doc_filename = $doc_rec->{'doc-file'}->[0];
440
441 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
442
443 # Release the lock once it is done
444 $self->unlock_collection($username, $collect);
445}
446
447sub get_metadata_from_archive_xml
448{
449 my $self = shift @_;
450 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
451
452 my @start_rules = ('Section' => \&dxml_start_section);
453
454 my @rules =
455 (
456 _default => 'raw',
457 'Metadata' => \&gfdxml_metadata
458 );
459
460 my $parser = XML::Rules->new
461 (
462 start_rules => \@start_rules,
463 rules => \@rules,
464 output_encoding => 'utf8'
465 );
466
467 my $xml_in = "";
468 if (!open(MIN,"<$doc_xml_filename"))
469 {
470 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
471 }
472 else
473 {
474 # Read them in
475 my $line;
476 while (defined ($line=<MIN>)) {
477 $xml_in .= $line;
478 }
479 close(MIN);
480
481 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
482
483 if(defined $parser->{'pad'}->{'metavalue'})
484 {
485 return $parser->{'pad'}->{'metavalue'};
486 }
487 else
488 {
489 return "";
490 }
491 }
492}
493
494sub gfdxml_metadata
495{
496 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
497
498 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
499 {
500 return;
501 }
502
503 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
504 {
505 if (!defined $parser->{'parameters'}->{'poscount'})
506 {
507 $parser->{'parameters'}->{'poscount'} = 0;
508 }
509 else
510 {
511 $parser->{'parameters'}->{'poscount'}++;
512 }
513 }
514
515 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
516 {
517 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
518 }
519}
520
521sub _set_live_metadata
522{
523 my $self = shift @_;
524
525 my $collect = $self->{'collect'};
526 my $gsdl_cgi = $self->{'gsdl_cgi'};
527 #my $gsdlhome = $self->{'gsdlhome'};
528 my $infodbtype = $self->{'infodbtype'};
529
530 # Obtain the collect dir
531 my $site = $self->{'site'};
532 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
533 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
534
535
536 # look up additional args
537 my $docid = $self->{'d'};
538 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
539 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
540 }
541 my $metavalue = $self->{'metavalue'};
542 my $metamode = $self->{'metamode'} || "accumulate";
543 my $append_or_not = ($metamode eq "accumulate") ? " \"append\"" : "";
544
545 # Generate the dbkey
546 my $metaname = $self->{'metaname'};
547 my $dbkey = "$docid.$metaname";
548
549 # To people who know $collect_tail please add some comments
550 # Obtain path to the database
551 my $collect_tail = $collect;
552 $collect_tail =~ s/^.*[\/\\]//;
553 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
554 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
555
556 # Set the new value
557 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"$append_or_not";
558 my $status = system($cmd);
559 if ($status != 0) {
560 # Catch error if gdbmget failed
561 my $mess = "Failed to set metadata key: $dbkey\n";
562
563 $mess .= "PATH: $ENV{'PATH'}\n";
564 $mess .= "cmd = $cmd\n";
565 $mess .= "Exit status: $status\n";
566 $mess .= "System Error Message: $!\n";
567
568 $gsdl_cgi->generate_error($mess);
569 }
570 else {
571 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
572 }
573
574 #return $status; # in case calling functions have any further use for this
575}
576
577sub set_live_metadata
578{
579 my $self = shift @_;
580
581 my $username = $self->{'username'};
582 my $collect = $self->{'collect'};
583 my $gsdl_cgi = $self->{'gsdl_cgi'};
584
585 if ($baseaction::authentication_enabled) {
586 # Ensure the user is allowed to edit this collection
587 &authenticate_user($gsdl_cgi, $username, $collect);
588 }
589
590 # Make sure the collection isn't locked by someone else
591 $self->lock_collection($username, $collect);
592
593 $self->_set_live_metadata(@_);
594
595 # Release the lock once it is done
596 $self->unlock_collection($username, $collect);
597}
598
599sub set_index_metadata_entry
600{
601 my $self = shift @_;
602 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
603
604 # To people who know $collect_tail please add some comments
605 # Obtain path to the database
606 my $collect_tail = $collect;
607 $collect_tail =~ s/^.*[\/\\]//;
608 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
609 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
610
611# print STDERR "**** infodb file path = $infodb_file_path\n";
612# print STDERR "***** infodb type = $infodbtype\n";
613
614 # Read the docid entry
615 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
616
617 # Set the metadata value
618 if (defined $metapos) {
619 # if metamode=accumulate AND metapos, warn user and then use metapos
620 if (defined $metamode && $metamode eq "accumulate") {
621 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
622 print STDERR "**** Proceeding by using metapos\n";
623 }
624 $doc_rec->{$metaname}->[$metapos] = $metavalue;
625 }
626 elsif (defined $metamode && $metamode eq "accumulate") {
627 if(defined $doc_rec->{$metaname}) {
628 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
629 } else {
630 $doc_rec->{$metaname} = [ $metavalue ];
631 }
632
633 }
634 else { # default for index was to override
635 $doc_rec->{$metaname} = [ $metavalue ];
636 }
637
638 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
639
640 return $status;
641
642}
643
644sub _set_import_metadata
645{
646 my $self = shift @_;
647
648 my $collect = $self->{'collect'};
649 my $gsdl_cgi = $self->{'gsdl_cgi'};
650 my $infodbtype = $self->{'infodbtype'};
651# my $gsdlhome = $self->{'gsdlhome'};
652
653 # Obtain the collect and archive dir
654 my $site = $self->{'site'};
655 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
656 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
657 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
658
659 # look up additional args
660 # want either d= or f=
661 my $docid = $self->{'d'};
662 my $import_file = $self->{'f'};
663 if ((!defined $docid) && (!defined $import_file)) {
664 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
665 }
666
667 # Get the parameters and set default mode to "accumulate"
668 my $metaname = $self->{'metaname'};
669 my $metavalue = $self->{'metavalue'};
670## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
671 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
672
673 my $metamode = $self->{'metamode'};
674 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
675 # make "accumulate" the default (less destructive, as won't actually
676 # delete any existing values)
677 $metamode = "accumulate";
678 }
679
680 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
681 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
682 my $metadata_xml_file;
683 my $import_filename = undef;
684 if (defined $docid) {
685 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
686 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
687
688 # This now stores the full pathname
689 $import_filename = $doc_rec->{'src-file'}->[0];
690 }
691 else {
692 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
693 }
694
695 # figure out correct metadata.xml file [?]
696 # Assuming the metadata.xml file is next to the source file
697 # Note: This will not work if it is using the inherited metadata from the parent folder
698 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
699 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
700
701 # Edit the metadata.xml
702 # Modified by Jeffrey from DL Consulting
703 # Handle the case where there is one metadata.xml file for multiple FileSets
704 # The XML filter needs to know whether it is in the right FileSet
705 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
706 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
707 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
708 $metaname, $metavalue, $metamode, $import_tailname);
709
710
711 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
712 $mess .= " $metaname";
713 $mess .= " = $metavalue";
714 $mess .= " ($metamode)\n";
715
716 $gsdl_cgi->generate_ok_message($mess);
717
718 #return $status; # in case calling functions have any further use for this
719}
720
721# the version of set_index_meta that doesn't do authentication
722sub _set_archives_metadata
723{
724 my $self = shift @_;
725
726 my $collect = $self->{'collect'};
727 my $gsdl_cgi = $self->{'gsdl_cgi'};
728 my $infodbtype = $self->{'infodbtype'};
729
730 # Obtain the collect and archive dir
731 my $site = $self->{'site'};
732 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
733 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
734
735 # look up additional args
736 my $docid = $self->{'d'};
737 my $metaname = $self->{'metaname'};
738 my $metavalue = $self->{'metavalue'};
739 my $prevmetavalue = $self->{'prevmetavalue'};
740
741 my $metapos = $self->{'metapos'}; # Don't append "|| undef", since if metapos=0 it will then be set to undef
742
743 # Don't set metapos=0 if it's undefined, as doing so will interfere when overriding all values for a metaname
744 # (all values for a metaname should get overridden if metamode=override combined with no user-defined metapos)
745
746 my $metamode = $self->{'metamode'};
747 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
748 # make "accumulate" the default (less destructive, as won't actually
749 # delete any existing values)
750 $metamode = "accumulate";
751 }
752
753 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
754 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
755
756 if ($status == 0) {
757 my $mess = "set-archives-metadata successful: Key[$docid]\n";
758 $mess .= " $metaname";
759 $mess .= "->[$metapos]" if (defined $metapos);
760 $mess .= " = $metavalue";
761 $mess .= " ($metamode)\n";
762
763 $gsdl_cgi->generate_ok_message($mess);
764 }
765 else {
766 my $mess .= "Failed to set archives metadata key: $docid\n";
767 $mess .= "Exit status: $status\n";
768 if(defined $self->{'error_msg'}) {
769 $mess .= "Error Message: $self->{'error_msg'}\n";
770 } else {
771 $mess .= "System Error Message: $!\n";
772 }
773 $mess .= "-" x 20 . "\n";
774
775 $gsdl_cgi->generate_error($mess);
776 }
777
778 #return $status; # in case calling functions have any further use for this
779}
780
781
782# the version of set_index_meta that doesn't do authentication
783sub _set_index_metadata
784{
785 my $self = shift @_;
786
787 my $collect = $self->{'collect'};
788 my $gsdl_cgi = $self->{'gsdl_cgi'};
789
790 my $site = $self->{'site'};
791 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
792
793 # look up additional args
794 my $docid = $self->{'d'};
795 my $metaname = $self->{'metaname'};
796 my $metapos = $self->{'metapos'};
797 my $metavalue = $self->{'metavalue'};
798 my $infodbtype = $self->{'infodbtype'};
799 my $metamode = $self->{'metamode'};
800
801 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
802
803 if ($status != 0) {
804 # Catch error if set infodb entry failed
805 my $mess = "Failed to set metadata key: $docid\n";
806
807 $mess .= "PATH: $ENV{'PATH'}\n";
808 $mess .= "Exit status: $status\n";
809 $mess .= "System Error Message: $!\n";
810
811 $gsdl_cgi->generate_error($mess);
812 }
813 else {
814 my $mess = "set-index-metadata successful: Key[$docid]\n";
815 $mess .= " $metaname";
816 $mess .= "->[$metapos]" if (defined $metapos);
817 $mess .= " = $metavalue\n";
818
819 $gsdl_cgi->generate_ok_message($mess);
820 }
821
822 #return $status; # in case calling functions have any further use for this
823}
824
825sub set_index_metadata
826{
827 my $self = shift @_;
828
829 my $username = $self->{'username'};
830 my $collect = $self->{'collect'};
831 my $gsdl_cgi = $self->{'gsdl_cgi'};
832 #my $gsdlhome = $self->{'gsdlhome'};
833
834 if ($baseaction::authentication_enabled) {
835 # Ensure the user is allowed to edit this collection
836 &authenticate_user($gsdl_cgi, $username, $collect);
837 }
838
839 my $site = $self->{'site'};
840 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
841
842 $gsdl_cgi->checked_chdir($collect_dir);
843
844 # Obtain the collect dir
845 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
846
847 # Make sure the collection isn't locked by someone else
848 $self->lock_collection($username, $collect);
849
850 $self->_set_index_metadata(@_);
851
852 # Release the lock once it is done
853 $self->unlock_collection($username, $collect);
854}
855
856# call this to set the metadata for a combination of dirs archives, import or index, or live
857# if none specified, defaults to index which was the original behaviour of set_metadata.
858sub set_metadata
859{
860 my $self = shift @_;
861
862 # Testing that not defining a variable, setting it to "" or to " " all return false
863 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
864
865 my $where = $self->{'where'};
866 if(!$where) {
867 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
868 return;
869 }
870
871 # authenticate and lock collection once, even if processing multiple dirs
872 my $username = $self->{'username'};
873 my $collect = $self->{'collect'};
874 my $gsdl_cgi = $self->{'gsdl_cgi'};
875
876 if ($baseaction::authentication_enabled) {
877 # Ensure the user is allowed to edit this collection
878 #&authenticate_user($gsdl_cgi, $username, $collect);
879 $self->authenticate_user($username, $collect);
880 }
881
882 if($where =~ m/index/) {
883 my $site = $self->{'site'};
884 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
885 $gsdl_cgi->checked_chdir($collect_dir);
886 }
887
888 # Make sure the collection isn't locked by someone else
889 $self->lock_collection($username, $collect);
890
891
892 # now at last can set the metadata. $where can specify multiple
893 # $where is of the form: import|archives|index, or a subset thereof
894
895 #my @whichdirs = split('\|', $where);
896
897 # just check whether $where contains import/archives/index/live in turn, and
898 # for each case, process it accordingly
899 if($where =~ m/import/) {
900 $self->_set_import_metadata(@_);
901 }
902
903 if($where =~ m/archives/) {
904
905 # look up docID arg which is optional to set_metadata because it's optional
906 # to set_import, but which is compulsory to set_archives_metadata
907 my $docid = $self->{'d'};
908 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
909 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
910 }
911 # we have a docid, so can set archives meta
912 $self->_set_archives_metadata(@_);
913 }
914
915 if($where =~ m/index/) {
916
917 # look up docID arg which is optional to set_metadata because it's optional
918 # to set_import, but which is compulsory to set_archives_metadata
919 my $docid = $self->{'d'};
920 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
921 $gsdl_cgi->generate_error("No docid (d=...) specified.");
922 }
923 # we have a docid, so can set index meta
924 $self->_set_index_metadata(@_);
925 }
926
927 if($where =~ m/live/) {
928 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
929 }
930
931 # Release the lock once it is done
932 $self->unlock_collection($username, $collect);
933}
934
935sub set_metadata_array
936{
937 my $self = shift @_;
938
939 my $where = $self->{'where'};
940 if(!$where) {
941 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
942 return;
943 }
944
945 my $username = $self->{'username'};
946 my $collect = $self->{'collect'};
947 my $gsdl_cgi = $self->{'gsdl_cgi'};
948
949 if ($baseaction::authentication_enabled) {
950 # Ensure the user is allowed to edit this collection
951 &authenticate_user($gsdl_cgi, $username, $collect);
952 }
953
954 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
955 # But including the stmt during this code reorganisation to preserve as-is what used to happen
956 my $site = $self->{'site'};
957 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
958 $gsdl_cgi->checked_chdir($collect_dir);
959
960 # Make sure the collection isn't locked by someone else
961 $self->lock_collection($username, $collect);
962
963 if($where =~ m/import/) {
964 $self->_set_import_metadata_array(@_);
965 }
966 if($where =~ m/archives/) {
967 $self->_set_archives_metadata_array(@_);
968 }
969 if($where =~ m/index/) {
970 $self->_set_index_metadata_array(@_);
971 }
972 if($where =~ m/live/) {
973 $self->_set_live_metadata_array(@_);
974 }
975
976 # Release the lock once it is done
977 $self->unlock_collection($username, $collect);
978}
979
980sub _set_index_metadata_array
981{
982 my $self = shift @_;
983
984 my $collect = $self->{'collect'};
985 my $gsdl_cgi = $self->{'gsdl_cgi'};
986 my $site = $self->{'site'};
987 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
988
989
990 # look up additional args
991
992 my $infodbtype = $self->{'infodbtype'};
993
994 my $json_str = $self->{'json'};
995 my $doc_array = decode_json $json_str;
996
997
998 my $global_status = 0;
999 my $global_mess = "";
1000
1001 my @all_docids = ();
1002
1003 foreach my $doc_array_rec ( @$doc_array ) {
1004
1005 my $status = -1;
1006 my $docid = $doc_array_rec->{'docid'};
1007
1008 push(@all_docids,$docid);
1009
1010 my $metaname = $doc_array_rec->{'metaname'};
1011 if(defined $metaname) {
1012 my $metapos = $doc_array_rec->{'metapos'};
1013 my $metavalue = $doc_array_rec->{'metavalue'};
1014 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1015
1016 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1017 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1018 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1019
1020 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1021 $metaname = $metatable_rec->{'metaname'};
1022 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1023 my $metapos = undef;
1024 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1025
1026 foreach my $metavalue ( @$metavals ) { # metavals is an array
1027 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1028 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1029 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1030 # metavalue for this metaname will be lost
1031 $metamode = "accumulate";
1032 }
1033 }
1034 }
1035 }
1036
1037 if ($status != 0) {
1038 # Catch error if set infodb entry failed
1039 $global_status = $status;
1040 $global_mess .= "Failed to set metadata key: $docid\n";
1041 $global_mess .= "Exit status: $status\n";
1042 $global_mess .= "System Error Message: $!\n";
1043 $global_mess .= "-" x 20;
1044 }
1045 }
1046
1047 if ($global_status != 0) {
1048 $global_mess .= "PATH: $ENV{'PATH'}\n";
1049 $gsdl_cgi->generate_error($global_mess);
1050 }
1051 else {
1052 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1053 $gsdl_cgi->generate_ok_message($mess);
1054 }
1055}
1056
1057sub set_index_metadata_array
1058{
1059 my $self = shift @_;
1060
1061 my $username = $self->{'username'};
1062 my $collect = $self->{'collect'};
1063 my $gsdl_cgi = $self->{'gsdl_cgi'};
1064# my $gsdlhome = $self->{'gsdlhome'};
1065
1066 if ($baseaction::authentication_enabled) {
1067 # Ensure the user is allowed to edit this collection
1068 &authenticate_user($gsdl_cgi, $username, $collect);
1069 }
1070
1071 my $site = $self->{'site'};
1072 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1073
1074 $gsdl_cgi->checked_chdir($collect_dir);
1075
1076 # Obtain the collect dir
1077 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1078
1079 # Make sure the collection isn't locked by someone else
1080 $self->lock_collection($username, $collect);
1081
1082 $self->_set_index_metadata_array(@_);
1083
1084 # Release the lock once it is done
1085 $self->unlock_collection($username, $collect);
1086}
1087
1088# experimental, newly added in and untested
1089sub _set_live_metadata_array
1090{
1091 my $self = shift @_;
1092
1093 my $collect = $self->{'collect'};
1094 my $gsdl_cgi = $self->{'gsdl_cgi'};
1095
1096 my $site = $self->{'site'};
1097 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1098
1099
1100 # look up additional args
1101 my $infodbtype = $self->{'infodbtype'};
1102 # To people who know $collect_tail please add some comments
1103 # Obtain path to the database
1104 my $collect_tail = $collect;
1105 $collect_tail =~ s/^.*[\/\\]//;
1106 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1107 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1108
1109
1110 my $json_str = $self->{'json'};
1111 my $doc_array = decode_json $json_str;
1112
1113
1114 my $global_status = 0;
1115 my $global_mess = "";
1116
1117 my @all_docids = ();
1118
1119
1120 foreach my $doc_array_rec ( @$doc_array ) {
1121
1122 my $status = -1;
1123 my $docid = $doc_array_rec->{'docid'};
1124
1125 push(@all_docids,$docid);
1126
1127 my $metaname = $doc_array_rec->{'metaname'};
1128 if(defined $metaname) {
1129 my $dbkey = "$docid.$metaname";
1130 my $metavalue = $doc_array_rec->{'metavalue'};
1131 # no metapos for live_meta, so can default metamode to accumulate
1132 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'} || "accumulate";
1133
1134 my $append_or_not = ($metamode eq "accumulate") ? " \"append\"" : "";
1135
1136 # Set the new value
1137 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"$append_or_not";
1138 $status = system($cmd);
1139
1140 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1141 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1142 foreach my $metatable_rec ( @$metatable ) {
1143 $metaname = $metatable_rec->{'metaname'};
1144 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'} || "accumulate";
1145 my $append_or_not = ($metamode eq "accumulate") ? " \"append\"" : "";
1146 my $dbkey = "$docid.$metaname";
1147
1148 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1149 foreach my $metavalue ( @$metavals ) {
1150 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"$append_or_not";
1151 $status = system($cmd);
1152 $append_or_not = " \"append\""; # even if metamode=override, need to accumulate all subsequent
1153 # metavals for this metaname, else the just-assigned metavalue for this metaname will be lost
1154 }
1155 }
1156
1157 }
1158
1159 if ($status != 0) {
1160 # Catch error if gdbmget failed
1161 $global_status = $status;
1162 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1163 $global_mess .= "Exit status: $status\n";
1164 $global_mess .= "System Error Message: $!\n";
1165 $global_mess .= "-" x 20;
1166 }
1167 }
1168
1169 if ($global_status != 0) {
1170 $global_mess .= "PATH: $ENV{'PATH'}\n";
1171 $gsdl_cgi->generate_error($global_mess);
1172 }
1173 else {
1174 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1175 $gsdl_cgi->generate_ok_message($mess);
1176 }
1177}
1178
1179sub set_live_metadata_array
1180{
1181 my $self = shift @_;
1182
1183 my $username = $self->{'username'};
1184 my $collect = $self->{'collect'};
1185 my $gsdl_cgi = $self->{'gsdl_cgi'};
1186
1187 if ($baseaction::authentication_enabled) {
1188 # Ensure the user is allowed to edit this collection
1189 &authenticate_user($gsdl_cgi, $username, $collect);
1190 }
1191
1192 my $site = $self->{'site'};
1193 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1194
1195 $gsdl_cgi->checked_chdir($collect_dir);
1196
1197 # Make sure the collection isn't locked by someone else
1198 $self->lock_collection($username, $collect);
1199
1200 $self->_set_live_metadata_array(@_);
1201
1202 # Release the lock once it is done
1203 $self->unlock_collection($username, $collect);
1204}
1205
1206
1207sub dxml_metadata
1208{
1209 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1210 my $metaname = $parser->{'parameters'}->{'metaname'};
1211 my $metamode = $parser->{'parameters'}->{'metamode'};
1212
1213 print STDERR "**** Processing closing </Metadata> tag\n";
1214
1215 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1216
1217 # Find the right metadata tag and checks if we are going to
1218 # override it
1219 #
1220 # Note: This over writes the first metadata block it
1221 # encountered. If there are multiple Sections in the doc.xml, it
1222 # might not behave as you would expect
1223
1224 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1225## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1226## print STDERR "**** metamode = $metamode\n";
1227
1228 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1229 {
1230 my $name_attr = $attrHash->{'name'};
1231 if (($name_attr eq $metaname) && ($metamode eq "override"))
1232 {
1233 if (!defined $parser->{'parameters'}->{'poscount'})
1234 {
1235 $parser->{'parameters'}->{'poscount'} = 0;
1236 }
1237 else
1238 {
1239 $parser->{'parameters'}->{'poscount'}++;
1240 }
1241
1242 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1243 {
1244 ##print STDERR "#### got match!!\n";
1245 # Get the value and override the current value
1246 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1247 $attrHash->{'_content'} = $metavalue;
1248
1249 # Don't want it to wipe out any other pieces of metadata
1250 $parser->{'parameters'}->{'metamode'} = "done";
1251 }
1252 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1253 {
1254 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1255 $attrHash->{'_content'} = $metavalue;
1256 $parser->{'parameters'}->{'metamode'} = "done";
1257 }
1258 }
1259 }
1260
1261 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1262 return [$tagname => $attrHash];
1263}
1264
1265# This method exists purely for catching invalid section numbers that the client
1266# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1267# we've seen all the Sections in the doc.xml, and none of their section nums matched
1268# if the metamode has not been set to 'done' by then.
1269sub dxml_archive
1270{
1271 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1272 my $metamode = $parser->{'parameters'}->{'metamode'};
1273
1274 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1275 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1276
1277# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1278
1279 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1280 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1281
1282 my $metaname = $parser->{'parameters'}->{'metaname'};
1283 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1284
1285 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1286 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1287 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1288 print STDERR "\tin section $opt_doc_secnum.\n";
1289 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1290 }
1291
1292 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1293 return [$tagname => $attrHash];
1294}
1295
1296sub dxml_description
1297{
1298 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1299 my $metamode = $parser->{'parameters'}->{'metamode'};
1300
1301 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1302 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1303
1304 print STDERR "**** Processing a closing </Description> tag \n";
1305# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1306
1307 # Accumulate the metadata
1308
1309 # We'll be accumulating metadata at this point if we haven't found and therefore
1310 # haven't processed the metadata yet.
1311 # For subsections, this means that if we're at a matching subsection, but haven't
1312 # found the correct metaname to override in that subsection, we accumulate it as new
1313 # meta in the subsection by adding it to the current description.
1314 # If there's no subsection info for the metadata, it will accumulate at the top level
1315 # section description if we hadn't found a matching metaname to override at this point.
1316
1317 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1318 # at the toplevel section. In the latter case, it means we want to process meta in the
1319 # toplevel section. So the eq check between the values below will work in all cases.
1320
1321 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1322 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1323 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1324
1325 if ($curr_secnum eq $opt_doc_secnum
1326 && ($metamode eq "accumulate" || $metamode eq "override")) {
1327 if ($metamode eq "override") {
1328 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1329 }
1330
1331 # If get to here and metamode is override, the this means there
1332 # was no existing value to overide => treat as an append operation
1333
1334 # Tack a new metadata tag on to the end of the <Metadata>+ block
1335 my $metaname = $parser->{'parameters'}->{'metaname'};
1336 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1337
1338 my $metadata_attr = {
1339 '_content' => $metavalue,
1340 'name' => $metaname,
1341 'mode' => "accumulate"
1342 };
1343
1344 my $append_metadata = [ "Metadata" => $metadata_attr ];
1345 my $description_content = $attrHash->{'_content'};
1346
1347 print "Appending metadata to doc.xml\n";
1348
1349 if (ref($description_content)) {
1350 # got some existing interesting nested content
1351 push(@$description_content, " ", $append_metadata ,"\n ");
1352 }
1353 else {
1354 #description_content is most likely a string such as "\n"
1355 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1356 }
1357
1358 $parser->{'parameters'}->{'metamode'} = "done";
1359 }
1360 else {
1361 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1362## print STDERR "**** NOT ACCUMULATE?!? \n";
1363 }
1364
1365 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1366 return [$tagname => $attrHash];
1367}
1368
1369
1370sub dxml_start_section
1371{
1372 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1373
1374 my $new_depth = scalar(@$contextArray);
1375
1376 print STDERR "**** START SECTION \n";
1377
1378 if ($new_depth == 1) {
1379 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1380 $parser->{'parameters'}->{'curr_section_num'} = "";
1381 }
1382
1383 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1384 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1385
1386 my $new_secnum;
1387
1388 if ($new_depth > $old_depth) {
1389 # child subsection
1390 $new_secnum = "$old_secnum.1";
1391 }
1392 elsif ($new_depth == $old_depth) {
1393 # sibling section => increase it's value by 1
1394 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1395 $tail_num++;
1396 $new_secnum = $old_secnum;
1397 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1398 }
1399 else {
1400 # back up to parent section => lopp off tail
1401 $new_secnum = $old_secnum;
1402 $new_secnum =~ s/\.\d+$//;
1403 }
1404
1405 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1406 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1407
1408 1;
1409}
1410
1411sub edit_xml_file
1412{
1413 my $self = shift @_;
1414 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1415
1416 # use XML::Rules to add it in (read in and out again)
1417 my $parser = XML::Rules->new(start_rules => $start_rules,
1418 rules => $rules,
1419 style => 'filter',
1420 output_encoding => 'utf8' );
1421
1422 my $xml_in = "";
1423 if (!open(MIN,"<$filename")) {
1424 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1425 }
1426 else {
1427 # Read all the text in
1428 my $line;
1429 while (defined ($line=<MIN>)) {
1430 $xml_in .= $line;
1431 }
1432 close(MIN);
1433
1434 my $MOUT;
1435 if (!open($MOUT,">$filename")) {
1436 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1437 }
1438 else {
1439 # Matched lines will get handled by the call backs
1440## my $xml_out = "";
1441
1442 binmode($MOUT,":utf8");
1443 $parser->filter($xml_in,$MOUT, $options);
1444
1445# binmode(MOUT,":utf8");
1446# print MOUT $xml_out;
1447 close($MOUT);
1448 }
1449 }
1450
1451 # copy across any custom error information that was stored during parsing
1452 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1453}
1454
1455sub edit_doc_xml
1456{
1457 my $self = shift @_;
1458 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1459
1460 my $info_mess = <<RAWEND;
1461****************************
1462 edit_doc_xml()
1463****************************
1464doc_xml_filename = $doc_xml_filename
1465metaname = $metaname
1466metavalue = $metavalue
1467metapos = $metapos
1468metamode = $metamode
1469opt_secnum = $opt_secnum
1470prevmetavalue = $prevmetavalue
1471****************************
1472RAWEND
1473
1474 $gsdl_cgi->generate_message($info_mess);
1475
1476 # To monitor which section/subsection number we are in
1477 my @start_rules =
1478 ( 'Section' => \&dxml_start_section );
1479
1480 # use XML::Rules to add it in (read in and out again)
1481 # Set the call back functions
1482 my @rules =
1483 ( _default => 'raw',
1484 'Metadata' => \&dxml_metadata,
1485 'Description' => \&dxml_description,
1486 'Archive' => \&dxml_archive); # just for catching errors at end
1487
1488 # Sets the parameters
1489 my $options = { 'metaname' => $metaname,
1490 'metapos' => $metapos,
1491 'metavalue' => $metavalue,
1492 'metamode' => $metamode,
1493 'prevmetavalue' => $prevmetavalue };
1494
1495 if (defined $opt_secnum) {
1496 $options->{'secnum'} = $opt_secnum;
1497 }
1498
1499 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1500}
1501
1502sub set_archives_metadata_entry
1503{
1504 my $self = shift @_;
1505 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1506
1507 my $info_mess = <<RAWEND;
1508****************************
1509 set_archives_metadata_entry()
1510****************************
1511archive_dir = $archive_dir
1512collect_dir = $collect_dir
1513collect = $collect
1514infodbtype = $infodbtype
1515docid = $docid
1516metaname = $metaname
1517metapos = $metapos
1518metavalue = $metavalue
1519metamode = $metamode
1520prevmetavalue = $prevmetavalue
1521****************************
1522RAWEND
1523
1524 $gsdl_cgi->generate_message($info_mess);
1525
1526 # Obtain the doc.xml path for the specified docID
1527 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1528
1529 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1530 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1531 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1532
1533 # The $doc_xml_file is relative to the archives, and now let's get the full path
1534 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1535 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1536
1537 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
1538 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1539 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1540 if(!defined $metapos && $metamode eq "override") {
1541 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, $metapos, undef, $docid_secnum, $metamode);
1542 }
1543
1544 # Edit the doc.xml file with the specified metadata name, value and position.
1545 # TODO: there is a potential problem here as this edit_doc_xml function
1546 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1547 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1548
1549 # dxml_metadata method ignores metapos if metamode anything other than override
1550 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1551 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1552
1553 # return 0; # return 0 for now to indicate no error
1554 return (defined $self->{'error_msg'}) ? 1 : 0;
1555}
1556
1557
1558sub set_archives_metadata
1559{
1560 my $self = shift @_;
1561
1562 my $username = $self->{'username'};
1563 my $collect = $self->{'collect'};
1564 my $gsdl_cgi = $self->{'gsdl_cgi'};
1565
1566 if ($baseaction::authentication_enabled) {
1567 # Ensure the user is allowed to edit this collection
1568 $self->authenticate_user($username, $collect);
1569 }
1570
1571 # Make sure the collection isn't locked by someone else
1572 $self->lock_collection($username, $collect);
1573
1574 $self->_set_archives_metadata(@_);
1575
1576 # Release the lock once it is done
1577 $self->unlock_collection($username, $collect);
1578}
1579
1580sub _set_archives_metadata_array
1581{
1582 my $self = shift @_;
1583
1584 my $collect = $self->{'collect'};
1585 my $gsdl_cgi = $self->{'gsdl_cgi'};
1586 my $site = $self->{'site'};
1587 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1588
1589 # look up additional args
1590
1591 my $infodbtype = $self->{'infodbtype'};
1592
1593 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1594
1595 my $json_str = $self->{'json'};
1596 my $doc_array = decode_json $json_str;
1597
1598
1599 my $global_status = 0;
1600 my $global_mess = "";
1601
1602 my @all_docids = ();
1603
1604 foreach my $doc_array_rec ( @$doc_array ) {
1605 my $status = -1;
1606 my $docid = $doc_array_rec->{'docid'};
1607
1608 push(@all_docids,$docid);
1609
1610 my $metaname = $doc_array_rec->{'metaname'};
1611 if(defined $metaname) {
1612
1613 my $metapos = $doc_array_rec->{'metapos'} || 0;
1614 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1615 my $metavalue = $doc_array_rec->{'metavalue'};
1616 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1617
1618 # Some sanity checks
1619 $metapos = 0 if (!defined $metapos);
1620
1621 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1622 # make "accumulate" the default (less destructive, as it won't actually
1623 # delete any existing values)
1624 $metamode = "accumulate";
1625 }
1626
1627 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1628 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1629 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1630 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1631
1632 foreach my $metatable_rec ( @$metatable ) {
1633 $metaname = $metatable_rec->{'metaname'};
1634 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1635 my $metapos = undef;
1636 my $prevmetavalue = undef;
1637 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1638
1639 foreach my $metavalue ( @$metavals ) {
1640 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1641 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1642
1643 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1644 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1645 # metavalue for this metaname will be lost
1646 $metamode = "accumulate";
1647 }
1648 }
1649 }
1650 }
1651
1652 if ($status != 0) {
1653 # Catch error if set infodb entry failed
1654 $global_status = $status;
1655 $global_mess .= "Failed to set metadata key: $docid\n";
1656 $global_mess .= "Exit status: $status\n";
1657 $global_mess .= "System Error Message: $!\n";
1658 $global_mess .= "-" x 20 . "\n";
1659 }
1660 }
1661
1662 if ($global_status != 0) {
1663 $global_mess .= "PATH: $ENV{'PATH'}\n";
1664 $gsdl_cgi->generate_error($global_mess);
1665 }
1666 else {
1667 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1668 $gsdl_cgi->generate_ok_message($mess);
1669 }
1670}
1671
1672sub set_archives_metadata_array
1673{
1674 my $self = shift @_;
1675
1676 my $username = $self->{'username'};
1677 my $collect = $self->{'collect'};
1678 my $gsdl_cgi = $self->{'gsdl_cgi'};
1679# my $gsdlhome = $self->{'gsdlhome'};
1680
1681 if ($baseaction::authentication_enabled) {
1682 # Ensure the user is allowed to edit this collection
1683 &authenticate_user($gsdl_cgi, $username, $collect);
1684 }
1685
1686 my $site = $self->{'site'};
1687 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1688
1689 $gsdl_cgi->checked_chdir($collect_dir);
1690
1691 # Obtain the collect dir
1692 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1693
1694 # Make sure the collection isn't locked by someone else
1695 $self->lock_collection($username, $collect);
1696
1697 $self->_set_archives_metadata_array(@_);
1698
1699 # Release the lock once it is done
1700 $self->unlock_collection($username, $collect);
1701}
1702
1703sub _remove_archives_metadata
1704{
1705 my $self = shift @_;
1706
1707 my $collect = $self->{'collect'};
1708 my $gsdl_cgi = $self->{'gsdl_cgi'};
1709# my $gsdlhome = $self->{'gsdlhome'};
1710 my $infodbtype = $self->{'infodbtype'};
1711
1712 my $site = $self->{'site'};
1713
1714 # Obtain the collect and archive dir
1715 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1716
1717 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1718
1719 # look up additional args
1720 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1721
1722 my $metaname = $self->{'metaname'};
1723 my $metapos = $self->{'metapos'};
1724
1725 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1726
1727 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1728 # default to deleting the first metavalue for the given metaname
1729 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1730 # seem to get deleted in one single remove_archives_meta action invocation.
1731 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1732 # deleted, both get deleted.
1733 if(!defined $metapos && !defined $metavalue) {
1734 $metapos = 0;
1735 }
1736
1737 my $metamode = $self->{'metamode'} || undef;
1738
1739 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1740 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1741
1742 # This now stores the full pathname
1743 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1744
1745 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1746# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1747
1748 if ($status == 0)
1749 {
1750 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1751 $mess .= " $metaname";
1752 $mess .= "->[$metapos]" if (defined $metapos);
1753
1754 $gsdl_cgi->generate_ok_message($mess);
1755 }
1756 else
1757 {
1758 my $mess .= "Failed to remove archives metadata key: $docid\n";
1759 $mess .= "Exit status: $status\n";
1760 $mess .= "System Error Message: $!\n";
1761 $mess .= "-" x 20 . "\n";
1762
1763 $gsdl_cgi->generate_error($mess);
1764 }
1765
1766 #return $status; # in case calling functions have a use for this
1767}
1768
1769sub remove_archives_metadata
1770{
1771 my $self = shift @_;
1772
1773 my $username = $self->{'username'};
1774 my $collect = $self->{'collect'};
1775 my $gsdl_cgi = $self->{'gsdl_cgi'};
1776
1777 if ($baseaction::authentication_enabled)
1778 {
1779 # Ensure the user is allowed to edit this collection
1780 &authenticate_user($gsdl_cgi, $username, $collect);
1781 }
1782
1783 # Make sure the collection isn't locked by someone else
1784 $self->lock_collection($username, $collect);
1785
1786 $self->_remove_archives_metadata(@_);
1787
1788 # Release the lock once it is done
1789 $self->unlock_collection($username, $collect);
1790}
1791
1792sub remove_from_doc_xml
1793{
1794 my $self = shift @_;
1795 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1796
1797 my @start_rules = ('Section' => \&dxml_start_section);
1798
1799 # Set the call-back functions for the metadata tags
1800 my @rules =
1801 (
1802 _default => 'raw',
1803 'Metadata' => \&rfdxml_metadata
1804 );
1805
1806 my $parser = XML::Rules->new
1807 (
1808 start_rules => \@start_rules,
1809 rules => \@rules,
1810 style => 'filter',
1811 output_encoding => 'utf8'
1812 );
1813
1814 my $status = 0;
1815 my $xml_in = "";
1816 if (!open(MIN,"<$doc_xml_filename"))
1817 {
1818 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1819 $status = 1;
1820 }
1821 else
1822 {
1823 # Read them in
1824 my $line;
1825 while (defined ($line=<MIN>)) {
1826 $xml_in .= $line;
1827 }
1828 close(MIN);
1829
1830 # Filter with the call-back functions
1831 my $xml_out = "";
1832
1833 my $MOUT;
1834 if (!open($MOUT,">$doc_xml_filename")) {
1835 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1836 $status = 1;
1837 }
1838 else {
1839 binmode($MOUT,":utf8");
1840 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1841 close($MOUT);
1842 }
1843 }
1844 return $status;
1845}
1846
1847sub rfdxml_metadata
1848{
1849 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1850
1851 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1852 {
1853 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1854 return [$tagname => $attrHash];
1855 }
1856
1857 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1858 {
1859 if (!defined $parser->{'parameters'}->{'poscount'})
1860 {
1861 $parser->{'parameters'}->{'poscount'} = 0;
1862 }
1863 else
1864 {
1865 $parser->{'parameters'}->{'poscount'}++;
1866 }
1867
1868 # if overriding but no metapos, then clear all the meta for this metaname
1869 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
1870 return [];
1871 }
1872 }
1873
1874 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1875 {
1876 return [];
1877 }
1878
1879 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1880 {
1881 return [];
1882 }
1883
1884 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1885 return [$tagname => $attrHash];
1886}
1887
1888sub mxml_metadata
1889{
1890 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1891 my $metaname = $parser->{'parameters'}->{'metaname'};
1892 my $metamode = $parser->{'parameters'}->{'metamode'};
1893
1894 # Report error if we don't see FileName tag before this
1895 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1896
1897 # Don't do anything if we are not in the right FileSet
1898 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1899 if ($file_regexp =~ /\.\*/) {
1900 # Only interested in a file_regexp if it specifies precisely one
1901 # file.
1902 # So, skip anything with a .* in it as it is too general
1903 return [$tagname => $attrHash];
1904 }
1905 my $src_file = $parser->{'parameters'}->{'src_file'};
1906 if (!($src_file =~ /$file_regexp/)) {
1907 return [$tagname => $attrHash];
1908 }
1909## print STDERR "*** mxl metamode = $metamode\n";
1910
1911 # Find the right metadata tag and checks if we are going to override it
1912 my $name_attr = $attrHash->{'name'};
1913 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1914 # Get the value and override the current value
1915 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1916 $attrHash->{'_content'} = $metavalue;
1917
1918## print STDERR "**** overrideing metadata.xml\n";
1919
1920 # Don't want it to wipe out any other pieces of metadata
1921 $parser->{'parameters'}->{'metamode'} = "done";
1922 }
1923
1924 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1925 return [$tagname => $attrHash];
1926}
1927
1928
1929sub mxml_description
1930{
1931 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1932 my $metamode = $parser->{'parameters'}->{'metamode'};
1933
1934 # Failed... Report error if we don't see FileName tag before this
1935 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1936
1937 # Don't do anything if we are not in the right FileSet
1938 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1939 if ($file_regexp =~ m/\.\*/) {
1940 # Only interested in a file_regexp if it specifies precisely one
1941 # file.
1942 # So, skip anything with a .* in it as it is too general
1943 return [$tagname => $attrHash];
1944 }
1945 my $src_file = $parser->{'parameters'}->{'src_file'};
1946
1947 if (!($src_file =~ m/$file_regexp/)) {
1948 return [$tagname => $attrHash];
1949 }
1950
1951 # Accumulate the metadata block to the end of the description block
1952 # Note: This adds metadata block to all description blocks, so if there are
1953 # multiple FileSets, it will add to all of them
1954 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1955 # if metamode was "override" but get to here then it failed to
1956 # find an item to override, in which case it should append its
1957 # value to the end, just like the "accumulate" mode
1958
1959 # tack a new metadata tag on to the end of the <Metadata>+ block
1960 my $metaname = $parser->{'parameters'}->{'metaname'};
1961 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1962
1963 my $metadata_attr = { '_content' => $metavalue,
1964 'name' => $metaname,
1965 'mode' => "accumulate" };
1966
1967 my $append_metadata = [ "Metadata" => $metadata_attr ];
1968 my $description_content = $attrHash->{'_content'};
1969
1970## print STDERR "*** appending to metadata.xml\n";
1971
1972 # append the new metadata element to the end of the current
1973 # content contained inside this tag
1974 if (ref($description_content) eq "") {
1975 # => string or numeric literal
1976 # this is caused by a <Description> block has no <Metadata> child elements
1977 # => set up an empty array in '_content'
1978 $attrHash->{'_content'} = [ "\n" ];
1979 $description_content = $attrHash->{'_content'};
1980 }
1981
1982 push(@$description_content," ", $append_metadata ,"\n ");
1983 $parser->{'parameters'}->{'metamode'} = "done";
1984 }
1985
1986 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1987 return [$tagname => $attrHash];
1988}
1989
1990
1991sub mxml_filename
1992{
1993 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1994
1995 # Store the filename of the Current Fileset
1996 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1997 # FileName tag must come before Description tag
1998 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1999
2000 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2001 return [$tagname => $attrHash];
2002}
2003
2004
2005sub mxml_fileset
2006{
2007 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2008
2009 # Initilise the current_file
2010 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2011 # FileName tag must come before Description tag
2012 $parser->{'parameters'}->{'current_file'} = "";
2013
2014 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2015 return [$tagname => $attrHash];
2016}
2017
2018
2019sub edit_metadata_xml
2020{
2021 my $self = shift @_;
2022 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
2023
2024 # Set the call-back functions for the metadata tags
2025 my @rules =
2026 ( _default => 'raw',
2027 'FileName' => \&mxml_filename,
2028 'Metadata' => \&mxml_metadata,
2029 'Description' => \&mxml_description,
2030 'FileSet' => \&mxml_fileset);
2031
2032 # use XML::Rules to add it in (read in and out again)
2033 my $parser = XML::Rules->new(rules => \@rules,
2034 style => 'filter',
2035 output_encoding => 'utf8');
2036
2037 if (!-e $metadata_xml_filename) {
2038
2039 if (open(MOUT,">$metadata_xml_filename")) {
2040
2041 my $src_file_re = &util::filename_to_regex($src_file);
2042 # shouldn't the following also be in the above utility routine??
2043 # $src_file_re =~ s/\./\\./g;
2044
2045 print MOUT "<?xml version=\"1.0\"?>\n";
2046 print MOUT "<DirectoryMetadata>\n";
2047 print MOUT " <FileSet>\n";
2048 print MOUT " <FileName>$src_file_re</FileName>\n";
2049 print MOUT " <Description>\n";
2050 print MOUT " </Description>\n";
2051 print MOUT " </FileSet>\n";
2052 print MOUT "</DirectoryMetadata>\n";
2053
2054 close(MOUT);
2055 }
2056 else {
2057 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2058 }
2059 }
2060
2061
2062 my $xml_in = "";
2063 if (!open(MIN,"<$metadata_xml_filename")) {
2064 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2065 }
2066 else {
2067 # Read them in
2068 my $line;
2069 while (defined ($line=<MIN>)) {
2070 $xml_in .= $line;
2071 }
2072 close(MIN);
2073
2074 # Filter with the call-back functions
2075 my $xml_out = "";
2076
2077 my $MOUT;
2078 if (!open($MOUT,">$metadata_xml_filename")) {
2079 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2080 }
2081 else {
2082 binmode($MOUT,":utf8");
2083
2084 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2085 # At the moment, I will just hack it!
2086 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2087 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2088 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2089 #print MOUT $xml_out;
2090
2091 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2092 metavalue => $metavalue,
2093 metamode => $metamode,
2094 src_file => $src_file,
2095 current_file => undef} );
2096 close($MOUT);
2097 }
2098 }
2099}
2100
2101
2102sub set_import_metadata
2103{
2104 my $self = shift @_;
2105
2106 my $username = $self->{'username'};
2107 my $collect = $self->{'collect'};
2108 my $gsdl_cgi = $self->{'gsdl_cgi'};
2109
2110 if ($baseaction::authentication_enabled) {
2111 # Ensure the user is allowed to edit this collection
2112 $self->authenticate_user($username, $collect);
2113 }
2114
2115 # Make sure the collection isn't locked by someone else
2116 $self->lock_collection($username, $collect);
2117
2118 $self->_set_import_metadata(@_);
2119
2120 # Release the lock once it is done
2121 $self->unlock_collection($username, $collect);
2122
2123}
2124
2125sub set_import_metadata_array
2126{
2127 my $self = shift @_;
2128
2129 my $username = $self->{'username'};
2130 my $collect = $self->{'collect'};
2131 my $gsdl_cgi = $self->{'gsdl_cgi'};
2132# my $gsdlhome = $self->{'gsdlhome'};
2133
2134 if ($baseaction::authentication_enabled) {
2135 # Ensure the user is allowed to edit this collection
2136 &authenticate_user($gsdl_cgi, $username, $collect);
2137 }
2138
2139 my $site = $self->{'site'};
2140 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2141
2142 $gsdl_cgi->checked_chdir($collect_dir);
2143
2144 # Make sure the collection isn't locked by someone else
2145 $self->lock_collection($username, $collect);
2146
2147 $self->_set_import_metadata_array(@_);
2148
2149 # Release the lock once it is done
2150 $self->unlock_collection($username, $collect);
2151
2152}
2153
2154
2155sub _set_import_metadata_array
2156{
2157 my $self = shift @_;
2158
2159 my $collect = $self->{'collect'};
2160 my $gsdl_cgi = $self->{'gsdl_cgi'};
2161
2162 my $site = $self->{'site'};
2163 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2164
2165 # look up additional args
2166
2167 my $infodbtype = $self->{'infodbtype'};
2168
2169 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2170 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2171
2172 my $json_str = $self->{'json'};
2173 my $doc_array = decode_json $json_str;
2174
2175 my $global_status = 0;
2176 my $global_mess = "";
2177
2178 my @all_docids = ();
2179
2180 foreach my $doc_array_rec ( @$doc_array )
2181 {
2182 my $status = -1;
2183 my $docid = $doc_array_rec->{'docid'};
2184
2185 push(@all_docids,$docid);
2186
2187 my $metaname = $doc_array_rec->{'metaname'};
2188 if (defined $metaname) {
2189 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2190 my $metavalue = $doc_array_rec->{'metavalue'};
2191
2192 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2193 # make "accumulate" the default (less destructive, as won't actually
2194 # delete any existing values)
2195 $metamode = "accumulate";
2196 }
2197
2198 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metavalue, $metamode);
2199
2200 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2201 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2202
2203 foreach my $metatable_rec ( @$metatable ) {
2204 $metaname = $metatable_rec->{'metaname'};
2205 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2206 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2207 # make "accumulate" the default (less destructive, as won't actually
2208 # delete any existing values)
2209 $metamode = "accumulate";
2210 }
2211 # no metapos for import
2212 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2213
2214 foreach my $metavalue ( @$metavals ) {
2215 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metavalue, $metamode);
2216 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2217 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2218 # metavalue for this metaname will be lost
2219 $metamode = "accumulate";
2220 }
2221 }
2222 }
2223 }
2224 }
2225
2226 # always a success message
2227 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2228 $gsdl_cgi->generate_ok_message($mess);
2229}
2230
2231# always returns true (1)
2232sub set_import_metadata_entry
2233{
2234 my $self = shift @_;
2235 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metavalue, $metamode) = @_;
2236
2237 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2238 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2239 my $metadata_xml_file;
2240 my $import_filename = undef;
2241
2242 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2243
2244 # This now stores the full pathname
2245 $import_filename = $doc_rec->{'src-file'}->[0];
2246
2247 # figure out correct metadata.xml file [?]
2248 # Assuming the metadata.xml file is next to the source file
2249 # Note: This will not work if it is using the inherited metadata from the parent folder
2250 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2251 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2252
2253 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
2254 return 1;
2255}
2256
2257sub _remove_import_metadata
2258{
2259 my $self = shift @_;
2260
2261 my $collect = $self->{'collect'};
2262 my $gsdl_cgi = $self->{'gsdl_cgi'};
2263# my $gsdlhome = $self->{'gsdlhome'};
2264 my $infodbtype = $self->{'infodbtype'};
2265
2266 # Obtain the collect dir
2267 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2268 my $site = $self->{'site'};
2269 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2270
2271 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2272 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2273
2274 # look up additional args
2275 my $docid = $self->{'d'};
2276 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2277 {
2278 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
2279 }
2280
2281 my $metaname = $self->{'metaname'};
2282 my $metavalue = $self->{'metavalue'};
2283 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2284
2285 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2286 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2287 my $metadata_xml_file;
2288 my $import_filename = undef;
2289 if (defined $docid)
2290 {
2291 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2292 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2293
2294 # This now stores the full pathname
2295 $import_filename = $doc_rec->{'src-file'}->[0];
2296 }
2297
2298 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2299 {
2300 $gsdl_cgi->generate_error_message("There is no metadata\n");
2301 }
2302
2303 # figure out correct metadata.xml file [?]
2304 # Assuming the metadata.xml file is next to the source file
2305 # Note: This will not work if it is using the inherited metadata from the parent folder
2306 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2307 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2308
2309 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
2310
2311 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2312 $mess .= " $metaname";
2313 $mess .= " = $metavalue\n";
2314
2315 $gsdl_cgi->generate_ok_message($mess);
2316
2317 #return $status; # in case calling functions have a use for this
2318}
2319
2320sub remove_import_metadata
2321{
2322 my $self = shift @_;
2323
2324 my $username = $self->{'username'};
2325 my $collect = $self->{'collect'};
2326 my $gsdl_cgi = $self->{'gsdl_cgi'};
2327
2328 if ($baseaction::authentication_enabled) {
2329 # Ensure the user is allowed to edit this collection
2330 &authenticate_user($gsdl_cgi, $username, $collect);
2331 }
2332
2333 # Make sure the collection isn't locked by someone else
2334 $self->lock_collection($username, $collect);
2335
2336 $self->_remove_import_metadata(@_);
2337
2338 # Release the lock once it is done
2339 $self->unlock_collection($username, $collect);
2340
2341}
2342
2343sub remove_from_metadata_xml
2344{
2345 my $self = shift @_;
2346 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
2347
2348 # Set the call-back functions for the metadata tags
2349 my @rules =
2350 (
2351 _default => 'raw',
2352 'Metadata' => \&rfmxml_metadata,
2353 'FileName' => \&mxml_filename
2354 );
2355
2356 my $parser = XML::Rules->new
2357 (
2358 rules => \@rules,
2359 style => 'filter',
2360 output_encoding => 'utf8'
2361 );
2362
2363 my $xml_in = "";
2364 if (!open(MIN,"<$metadata_xml_filename"))
2365 {
2366 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2367 }
2368 else
2369 {
2370 # Read them in
2371 my $line;
2372 while (defined ($line=<MIN>)) {
2373 $xml_in .= $line;
2374 }
2375 close(MIN);
2376
2377 # Filter with the call-back functions
2378 my $xml_out = "";
2379
2380 my $MOUT;
2381 if (!open($MOUT,">$metadata_xml_filename")) {
2382 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2383 }
2384 else {
2385 binmode($MOUT,":utf8");
2386 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
2387 close($MOUT);
2388 }
2389 }
2390}
2391
2392sub rfmxml_metadata
2393{
2394 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2395
2396 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2397 {
2398 return [];
2399 }
2400
2401 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2402 return [$tagname => $attrHash];
2403}
2404
2405sub _remove_live_metadata
2406{
2407 my $self = shift @_;
2408
2409 my $collect = $self->{'collect'};
2410 my $gsdl_cgi = $self->{'gsdl_cgi'};
2411# my $gsdlhome = $self->{'gsdlhome'};
2412 my $infodbtype = $self->{'infodbtype'};
2413
2414 # Obtain the collect dir
2415 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2416 my $site = $self->{'site'};
2417 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2418
2419
2420 # look up additional args
2421 my $docid = $self->{'d'};
2422 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2423 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2424 }
2425
2426 # Generate the dbkey
2427 my $metaname = $self->{'metaname'};
2428 my $dbkey = "$docid.$metaname";
2429
2430 # To people who know $collect_tail please add some comments
2431 # Obtain the live gdbm_db path
2432 my $collect_tail = $collect;
2433 $collect_tail =~ s/^.*[\/\\]//;
2434 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2435 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2436
2437 # Remove the key
2438 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2439 my $status = system($cmd);
2440 if ($status != 0) {
2441 # Catch error if gdbmdel failed
2442 my $mess = "Failed to set metadata key: $dbkey\n";
2443
2444 $mess .= "PATH: $ENV{'PATH'}\n";
2445 $mess .= "cmd = $cmd\n";
2446 $mess .= "Exit status: $status\n";
2447 $mess .= "System Error Message: $!\n";
2448
2449 $gsdl_cgi->generate_error($mess);
2450 }
2451 else {
2452 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2453 }
2454
2455}
2456
2457sub remove_live_metadata
2458{
2459 my $self = shift @_;
2460
2461 my $username = $self->{'username'};
2462 my $collect = $self->{'collect'};
2463 my $gsdl_cgi = $self->{'gsdl_cgi'};
2464 my $gsdlhome = $self->{'gsdlhome'};
2465
2466 if ($baseaction::authentication_enabled) {
2467 # Ensure the user is allowed to edit this collection
2468 &authenticate_user($gsdl_cgi, $username, $collect);
2469 }
2470
2471 # Make sure the collection isn't locked by someone else
2472 $self->lock_collection($username, $collect);
2473
2474 $self->_remove_live_metadata(@_);
2475
2476 $self->unlock_collection($username, $collect);
2477}
2478
2479sub remove_metadata
2480{
2481 my $self = shift @_;
2482
2483 my $where = $self->{'where'};
2484 if(!$where) {
2485 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2486 return;
2487 }
2488
2489 my $username = $self->{'username'};
2490 my $collect = $self->{'collect'};
2491 my $gsdl_cgi = $self->{'gsdl_cgi'};
2492
2493 if ($baseaction::authentication_enabled) {
2494 # Ensure the user is allowed to edit this collection
2495 &authenticate_user($gsdl_cgi, $username, $collect);
2496 }
2497
2498 # Make sure the collection isn't locked by someone else
2499 $self->lock_collection($username, $collect);
2500
2501 # check which directories need to be processed, specified in $where as
2502 # any combination of import|archives|index|live
2503 if($where =~ m/import/) {
2504 $self->_remove_import_metadata(@_);
2505 }
2506 if($where =~ m/archives/) {
2507 $self->_remove_archives_metadata(@_);
2508 }
2509 if($where =~ m/index/) {
2510 $self->_remove_index_metadata(@_);
2511 }
2512
2513 # Release the lock once it is done
2514 $self->unlock_collection($username, $collect);
2515}
2516
2517# the internal version, without authentication
2518sub _remove_index_metadata
2519{
2520 my $self = shift @_;
2521
2522 my $collect = $self->{'collect'};
2523 my $gsdl_cgi = $self->{'gsdl_cgi'};
2524# my $gsdlhome = $self->{'gsdlhome'};
2525 my $infodbtype = $self->{'infodbtype'};
2526
2527 # Obtain the collect dir
2528 my $site = $self->{'site'};
2529 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2530 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2531
2532 # look up additional args
2533 my $docid = $self->{'d'};
2534 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2535 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2536 }
2537 my $metaname = $self->{'metaname'};
2538 my $metapos = $self->{'metapos'};
2539 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2540
2541 # To people who know $collect_tail please add some comments
2542 # Obtain the path to the database
2543 my $collect_tail = $collect;
2544 $collect_tail =~ s/^.*[\/\\]//;
2545 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2546 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2547
2548 # Read the docid entry
2549 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2550
2551 # Basically loop through and unescape_html the values
2552 foreach my $k (keys %$doc_rec) {
2553 my @escaped_v = ();
2554 foreach my $v (@{$doc_rec->{$k}}) {
2555 if ($k eq "contains") {
2556 # protect quotes in ".2;".3 etc
2557 $v =~ s/\"/\\\"/g;
2558 push(@escaped_v, $v);
2559 }
2560 else {
2561 my $ev = &ghtml::unescape_html($v);
2562 $ev =~ s/\"/\\\"/g;
2563 push(@escaped_v, $ev);
2564 }
2565 }
2566 $doc_rec->{$k} = \@escaped_v;
2567 }
2568
2569 # Check to make sure the key does exist
2570 if (!defined ($doc_rec->{$metaname})) {
2571 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2572 }
2573
2574 # Obtain the specified metadata pos
2575 if(!defined $metapos && !defined $metavalue) { #$metapos = 0 if (!defined $metapos);
2576 $metapos = 0;
2577 }
2578
2579
2580 # consider check key is defined before deleting?
2581 # Loop through the metadata array and ignore the specified position
2582 my $filtered_metadata = [];
2583 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2584 for (my $i=0; $i<$num_metadata_vals; $i++) {
2585 my $metaval = shift(@{$doc_rec->{$metaname}});
2586
2587 if (!defined $metavalue && $i != $metapos) {
2588 push(@$filtered_metadata,$metaval);
2589 }
2590
2591 if(defined $metavalue && !($metavalue eq $metaval))
2592 {
2593 push(@$filtered_metadata,$metaval);
2594 }
2595 }
2596 $doc_rec->{$metaname} = $filtered_metadata;
2597
2598 # Turn the record back to string
2599 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2600
2601 # Store it back to the database
2602 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2603 my $status = system($cmd);
2604 if ($status != 0) {
2605 my $mess = "Failed to set metadata key: $docid\n";
2606
2607 $mess .= "PATH: $ENV{'PATH'}\n";
2608 $mess .= "cmd = $cmd\n";
2609 $mess .= "Exit status: $status\n";
2610 $mess .= "System Error Message: $!\n";
2611
2612 $gsdl_cgi->generate_error($mess);
2613 }
2614 else {
2615 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2616 $mess .= " $metaname";
2617 $mess .= "->[$metapos]" if (defined $metapos);
2618
2619 $gsdl_cgi->generate_ok_message($mess);
2620 }
2621
2622 #return $status; # in case calling functions have a use for this
2623}
2624
2625sub remove_index_metadata
2626{
2627 my $self = shift @_;
2628
2629 my $username = $self->{'username'};
2630 my $collect = $self->{'collect'};
2631 my $gsdl_cgi = $self->{'gsdl_cgi'};
2632# my $gsdlhome = $self->{'gsdlhome'};
2633
2634 if ($baseaction::authentication_enabled) {
2635 # Ensure the user is allowed to edit this collection
2636 &authenticate_user($gsdl_cgi, $username, $collect);
2637 }
2638
2639 # Obtain the collect dir
2640 my $site = $self->{'site'};
2641 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2642 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2643
2644 # Make sure the collection isn't locked by someone else
2645 $self->lock_collection($username, $collect);
2646
2647 $self->_remove_index_metadata(@_);
2648
2649 # Release the lock once it is done
2650 $self->unlock_collection($username, $collect);
2651}
2652
2653
2654# Was trying to reused the codes, but the functions need to be broken
2655# down more before they can be reused, otherwise there will be too
2656# much overhead and duplicate process...
2657sub insert_metadata
2658{
2659 my $self = shift @_;
2660
2661 my $username = $self->{'username'};
2662 my $collect = $self->{'collect'};
2663 my $gsdl_cgi = $self->{'gsdl_cgi'};
2664 my $gsdlhome = $self->{'gsdlhome'};
2665 my $infodbtype = $self->{'infodbtype'};
2666
2667 # If the import metadata and gdbm database have been updated, we
2668 # need to insert some notification to warn user that the the text
2669 # they see at the moment is not indexed and require a rebuild.
2670 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2671
2672 if ($baseaction::authentication_enabled) {
2673 # Ensure the user is allowed to edit this collection
2674 $self->authenticate_user($username, $collect);
2675 }
2676
2677 # Obtain the collect and archive dir
2678 my $site = $self->{'site'};
2679 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2680 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2681 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2682
2683 # Make sure the collection isn't locked by someone else
2684 $self->lock_collection($username, $collect);
2685
2686 # Check additional args
2687 my $docid = $self->{'d'};
2688 if (!defined($docid)) {
2689 $gsdl_cgi->generate_error("No document id is specified: d=...");
2690 }
2691 my $metaname = $self->{'metaname'};
2692 if (!defined($metaname)) {
2693 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2694 }
2695 my $metavalue = $self->{'metavalue'};
2696 if (!defined($metavalue) || $metavalue eq "") {
2697 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2698 }
2699 # make "accumulate" the default (less destructive, as won't actually
2700 # delete any existing values)
2701 my $metamode = "accumulate";
2702
2703 #=======================================================================#
2704 # set_import_metadata [START]
2705 #=======================================================================#
2706 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2707 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2708 my $metadata_xml_file;
2709 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2710 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2711
2712 # This now stores the full pathname
2713 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2714
2715 # figure out correct metadata.xml file [?]
2716 # Assuming the metadata.xml file is next to the source file
2717 # Note: This will not work if it is using the inherited metadata from the parent folder
2718 my ($import_tailname, $import_dirname)
2719 = File::Basename::fileparse($import_filename);
2720 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2721
2722 # Shane's escape characters
2723 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2724 $metavalue =~ s/\,/&#44;/g;
2725 $metavalue =~ s/\:/&#58;/g;
2726 $metavalue =~ s/\|/&#124;/g;
2727 $metavalue =~ s/\(/&#40;/g;
2728 $metavalue =~ s/\)/&#41;/g;
2729 $metavalue =~ s/\[/&#91;/g;
2730 $metavalue =~ s/\\/&#92;/g;
2731 $metavalue =~ s/\]/&#93;/g;
2732 $metavalue =~ s/\{/&#123;/g;
2733 $metavalue =~ s/\}/&#125;/g;
2734 $metavalue =~ s/\"/&#34;/g;
2735 $metavalue =~ s/\`/&#96;/g;
2736 $metavalue =~ s/\n/_newline_/g;
2737
2738 # Edit the metadata.xml
2739 # Modified by Jeffrey from DL Consulting
2740 # Handle the case where there is one metadata.xml file for multiple FileSets
2741 # The XML filter needs to know whether it is in the right FileSet
2742 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2743 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2744 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2745 $metaname, $metavalue, $metamode, $import_tailname);
2746 #=======================================================================#
2747 # set_import_metadata [END]
2748 #=======================================================================#
2749
2750
2751 #=======================================================================#
2752 # set_metadata (accumulate version) [START]
2753 #=======================================================================#
2754 # To people who know $collect_tail please add some comments
2755 # Obtain path to the database
2756 my $collect_tail = $collect;
2757 $collect_tail =~ s/^.*[\/\\]//;
2758 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2759 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2760
2761 # Read the docid entry
2762 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2763
2764 foreach my $k (keys %$doc_rec) {
2765 my @escaped_v = ();
2766 foreach my $v (@{$doc_rec->{$k}}) {
2767 if ($k eq "contains") {
2768 # protect quotes in ".2;".3 etc
2769 $v =~ s/\"/\\\"/g;
2770 push(@escaped_v, $v);
2771 }
2772 else {
2773 my $ev = &ghtml::unescape_html($v);
2774 $ev =~ s/\"/\\\"/g;
2775 push(@escaped_v, $ev);
2776 }
2777 }
2778 $doc_rec->{$k} = \@escaped_v;
2779 }
2780
2781 # Protect the quotes
2782 $metavalue =~ s/\"/\\\"/g;
2783
2784 # Adds the pending macro
2785 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2786
2787 # If the metadata doesn't exist, create a new one
2788 if (!defined($doc_rec->{$metaname})){
2789 $doc_rec->{$metaname} = [ $macro_metavalue ];
2790 }
2791 # Else, let's acculumate the values
2792 else {
2793 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2794 }
2795
2796 # Generate the record string
2797 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2798
2799 # Store it into GDBM
2800 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2801 my $status = system($cmd);
2802 if ($status != 0) {
2803 # Catch error if gdbmget failed
2804 my $mess = "Failed to set metadata key: $docid\n";
2805
2806 $mess .= "PATH: $ENV{'PATH'}\n";
2807 $mess .= "cmd = $cmd\n";
2808 $mess .= "Exit status: $status\n";
2809 $mess .= "System Error Message: $!\n";
2810
2811 $gsdl_cgi->generate_error($mess);
2812 }
2813 else {
2814 my $mess = "insert-metadata successful: Key[$docid]\n";
2815 $mess .= " [In metadata.xml] $metaname";
2816 $mess .= " = $metavalue\n";
2817 $mess .= " [In database] $metaname";
2818 $mess .= " = $macro_metavalue\n";
2819 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2820 $gsdl_cgi->generate_ok_message($mess);
2821 }
2822 #=======================================================================#
2823 # set_metadata (accumulate version) [END]
2824 #=======================================================================#
2825
2826 # Release the lock once it is done
2827 $self->unlock_collection($username, $collect);
2828}
2829
28301;
Note: See TracBrowser for help on using the repository browser.