source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27162

Last change on this file since 27162 was 27162, checked in by ak19, 8 years ago

Fixed bug in remove_index_metadata: it was specifically including the metavalue to be deleted in the metadata that was to be retained (the erroneous line was pushing metavalue instead of metaval onto filtered_metadata).

File size: 78.1 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-index-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
67
68 #SET METHODS
69 "set-live-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ ] },
72
73 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
74 'compulsory-args' => [ "metaname", "metavalue" ],
75 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
76
77 "set-index-metadata" => {
78 'compulsory-args' => [ "d", "metaname", "metavalue" ],
79 'optional-args' => [ "metapos" ] },
80
81 "set-archives-metadata" => {
82 'compulsory-args' => [ "d", "metaname", "metavalue" ],
83 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
84
85 "set-import-metadata" => {
86 'compulsory-args' => [ "metaname", "metavalue" ],
87 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
88
89 #SET METHODS (ARRAY)
90 "set-metadata-array" => {
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ ] },
93
94 "set-archives-metadata-array" => {
95 'compulsory-args' => [ "json" ],
96 'optional-args' => [ ] },
97
98 "set-import-metadata-array" => {
99 'compulsory-args' => [ "json" ],
100 'optional-args' => [ ] },
101
102 #REMOVE METHODS
103 "remove-import-metadata" => {
104 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
105 'optional-args' => [ ] },
106
107 "remove-archives-metadata" => {
108 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
109 'optional-args' => [ "metapos", "metavalue" ] },
110
111 "remove-live-metadata" => {
112 'compulsory-args' => [ "d", "metaname" ],
113 'optional-args' => [ ] },
114
115 "remove-index-metadata" => {
116 'compulsory-args' => [ "d", "metaname" ],
117 'optional-args' => [ "metapos", "metavalue" ] },
118
119 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
120 'compulsory-args' => [ "d", "metaname" ],
121 'optional-args' => [ "where", "metapos", "metavalue" ] },
122
123 #INSERT METHODS
124 "insert-metadata" => {
125 'compulsory-args' => [ "d", "metaname", "metavalue" ],
126 'optional-args' => [ ] }
127};
128
129
130sub new
131{
132 my $class = shift (@_);
133 my ($gsdl_cgi,$iis6_mode) = @_;
134
135 # Treat metavalue specially. To transmit this through a GET request
136 # the Javascript side has url-encoded it, so here we need to decode
137 # it before proceeding
138
139 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
140 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
141 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
142
143 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
144 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
145
146 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
147
148 return bless $self, $class;
149}
150
151
152sub get_live_metadata
153{
154 my $self = shift @_;
155
156 my $username = $self->{'username'};
157 my $collect = $self->{'collect'};
158 my $gsdl_cgi = $self->{'gsdl_cgi'};
159 my $gsdlhome = $self->{'gsdlhome'};
160 my $infodbtype = $self->{'infodbtype'};
161
162 # live metadata gets/saves value scoped (prefixed) by the current usename
163 # so (for now) let's not bother to enforce authentication
164
165 # Obtain the collect dir
166 my $site = $self->{'site'};
167 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
168 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
169
170 # Make sure the collection isn't locked by someone else
171 $self->lock_collection($username, $collect);
172
173 # look up additional args
174 my $docid = $self->{'d'};
175 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
176 $gsdl_cgi->generate_error("No docid (d=...) specified.");
177 }
178
179 # Generate the dbkey
180 my $metaname = $self->{'metaname'};
181 my $dbkey = "$docid.$metaname";
182
183 # To people who know $collect_tail please add some comments
184 # Obtain path to the database
185 my $collect_tail = $collect;
186 $collect_tail =~ s/^.*[\/|\\]//;
187 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
188 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
189
190 # Obtain the content of the key
191 my $cmd = "gdbmget $infodb_file_path $dbkey";
192 if (open(GIN,"$cmd |") == 0) {
193 # Catch error if gdbmget failed
194 my $mess = "Failed to get metadata key: $metaname\n";
195 $mess .= "$!\n";
196
197 $gsdl_cgi->generate_error($mess);
198 }
199 else {
200 binmode(GIN,":utf8");
201 # Read everything in and concatenate them into $metavalue
202 my $metavalue = "";
203 my $line;
204 while (defined ($line=<GIN>)) {
205 $metavalue .= $line;
206 }
207 close(GIN);
208 chomp($metavalue); # Get rid off the tailing newlines
209 $gsdl_cgi->generate_ok_message("$metavalue");
210 }
211
212 # Release the lock once it is done
213 $self->unlock_collection($username, $collect);
214}
215
216# just calls the index version
217sub get_metadata
218{
219 my $self = shift @_;
220 $self->get_index_metadata(@_);
221}
222
223sub get_index_metadata
224{
225 my $self = shift @_;
226
227 my $username = $self->{'username'};
228 my $collect = $self->{'collect'};
229 my $gsdl_cgi = $self->{'gsdl_cgi'};
230 my $gsdlhome = $self->{'gsdlhome'};
231
232 # Authenticate user if it is enabled
233 if ($baseaction::authentication_enabled) {
234 # Ensure the user is allowed to edit this collection
235 &authenticate_user($gsdl_cgi, $username, $collect);
236 }
237
238 # Obtain the collect dir
239 my $site = $self->{'site'};
240 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
241 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
242
243 # Make sure the collection isn't locked by someone else
244 $self->lock_collection($username, $collect);
245
246 # look up additional args
247 my $docid = $self->{'d'};
248 my $metaname = $self->{'metaname'};
249 my $metapos = $self->{'metapos'};
250 my $infodbtype = $self->{'infodbtype'};
251
252 # To people who know $collect_tail please add some comments
253 # Obtain path to the database
254 my $collect_tail = $collect;
255 $collect_tail =~ s/^.*[\/\\]//;
256 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
257 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
258
259 # Read the docid entry
260 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
261
262 # Basically loop through and unescape_html the values
263 foreach my $k (keys %$doc_rec) {
264 my @escaped_v = ();
265 foreach my $v (@{$doc_rec->{$k}}) {
266 my $ev = &ghtml::unescape_html($v);
267 push(@escaped_v, $ev);
268 }
269 $doc_rec->{$k} = \@escaped_v;
270 }
271
272 # Obtain the specified metadata value
273 $metapos = 0 if (!defined $metapos);
274 my $metavalue = $doc_rec->{$metaname}->[$metapos];
275 $gsdl_cgi->generate_ok_message("$metavalue");
276
277 # Release the lock once it is done
278 $self->unlock_collection($username, $collect);
279}
280
281
282sub get_import_metadata
283{
284 my $self = shift @_;
285
286 my $username = $self->{'username'};
287 my $collect = $self->{'collect'};
288 my $gsdl_cgi = $self->{'gsdl_cgi'};
289 my $gsdlhome = $self->{'gsdlhome'};
290
291 # Authenticate user if it is enabled
292 if ($baseaction::authentication_enabled) {
293 # Ensure the user is allowed to edit this collection
294 &authenticate_user($gsdl_cgi, $username, $collect);
295 }
296
297 # Obtain the collect dir
298 my $site = $self->{'site'};
299 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
300 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
301
302 # Make sure the collection isn't locked by someone else
303 $self->lock_collection($username, $collect);
304
305 # look up additional args
306 my $docid = $self->{'d'};
307 my $metaname = $self->{'metaname'};
308 my $infodbtype = $self->{'infodbtype'};
309 if (!defined $docid)
310 {
311 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
312 }
313
314 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
315 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
316 my $metadata_xml_file;
317 my $import_filename = undef;
318
319
320 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
321 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
322 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
323
324 # This now stores the full pathname
325 $import_filename = $doc_rec->{'src-file'}->[0];
326
327 # figure out correct metadata.xml file [?]
328 # Assuming the metadata.xml file is next to the source file
329 # Note: This will not work if it is using the inherited metadata from the parent folder
330 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
331 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
332
333 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
334
335 # Release the lock once it is done
336 $self->unlock_collection($username, $collect);
337}
338
339sub get_metadata_from_metadata_xml
340{
341 my $self = shift @_;
342 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
343
344 my @rules =
345 (
346 _default => 'raw',
347 'Metadata' => \&gfmxml_metadata,
348 'FileName' => \&mxml_filename
349 );
350
351 my $parser = XML::Rules->new
352 (
353 rules => \@rules,
354 output_encoding => 'utf8'
355 );
356
357 my $xml_in = "";
358 if (!open(MIN,"<$metadata_xml_filename"))
359 {
360 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
361 }
362 else
363 {
364 # Read them in
365 my $line;
366 while (defined ($line=<MIN>)) {
367 $xml_in .= $line;
368 }
369 close(MIN);
370
371 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
372
373 if(defined $parser->{'pad'}->{'metavalue'})
374 {
375 return $parser->{'pad'}->{'metavalue'};
376 }
377 else
378 {
379 return "";
380 }
381 }
382}
383
384sub gfmxml_metadata
385{
386 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
387
388 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
389 {
390 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
391 }
392}
393
394sub get_archives_metadata
395{
396 my $self = shift @_;
397
398 my $username = $self->{'username'};
399 my $collect = $self->{'collect'};
400 my $gsdl_cgi = $self->{'gsdl_cgi'};
401 my $gsdlhome = $self->{'gsdlhome'};
402 my $infodbtype = $self->{'infodbtype'};
403
404 # Authenticate user if it is enabled
405 if ($baseaction::authentication_enabled) {
406 # Ensure the user is allowed to edit this collection
407 &authenticate_user($gsdl_cgi, $username, $collect);
408 }
409
410 # Obtain the collect dir
411 my $site = $self->{'site'};
412 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
413
414 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
415
416 # Make sure the collection isn't locked by someone else
417 $self->lock_collection($username, $collect);
418
419 # look up additional args
420 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
421 $docid_secnum = "" if (!defined $docid_secnum);
422
423 my $metaname = $self->{'metaname'};
424 my $metapos = $self->{'metapos'};
425 $metapos = 0 if (!defined $metapos);
426
427 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
428 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
429
430 # This now stores the full pathname
431 my $doc_filename = $doc_rec->{'doc-file'}->[0];
432
433 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
434
435 # Release the lock once it is done
436 $self->unlock_collection($username, $collect);
437}
438
439sub get_metadata_from_archive_xml
440{
441 my $self = shift @_;
442 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
443
444 my @start_rules = ('Section' => \&dxml_start_section);
445
446 my @rules =
447 (
448 _default => 'raw',
449 'Metadata' => \&gfdxml_metadata
450 );
451
452 my $parser = XML::Rules->new
453 (
454 start_rules => \@start_rules,
455 rules => \@rules,
456 output_encoding => 'utf8'
457 );
458
459 my $xml_in = "";
460 if (!open(MIN,"<$doc_xml_filename"))
461 {
462 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
463 }
464 else
465 {
466 # Read them in
467 my $line;
468 while (defined ($line=<MIN>)) {
469 $xml_in .= $line;
470 }
471 close(MIN);
472
473 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
474
475 if(defined $parser->{'pad'}->{'metavalue'})
476 {
477 return $parser->{'pad'}->{'metavalue'};
478 }
479 else
480 {
481 return "";
482 }
483 }
484}
485
486sub gfdxml_metadata
487{
488 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
489
490 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
491 {
492 return;
493 }
494
495 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
496 {
497 if (!defined $parser->{'parameters'}->{'poscount'})
498 {
499 $parser->{'parameters'}->{'poscount'} = 0;
500 }
501 else
502 {
503 $parser->{'parameters'}->{'poscount'}++;
504 }
505 }
506
507 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
508 {
509 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
510 }
511}
512
513sub _set_live_metadata
514{
515 my $self = shift @_;
516
517 my $collect = $self->{'collect'};
518 my $gsdl_cgi = $self->{'gsdl_cgi'};
519 #my $gsdlhome = $self->{'gsdlhome'};
520 my $infodbtype = $self->{'infodbtype'};
521
522 # Obtain the collect dir
523 my $site = $self->{'site'};
524 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
525 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
526
527
528 # look up additional args
529 my $docid = $self->{'d'};
530 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
531 $gsdl_cgi->generate_error("No docid (d=...) specified.");
532 }
533 my $metavalue = $self->{'metavalue'};
534
535
536 # Generate the dbkey
537 my $metaname = $self->{'metaname'};
538 my $dbkey = "$docid.$metaname";
539
540 # To people who know $collect_tail please add some comments
541 # Obtain path to the database
542 my $collect_tail = $collect;
543 $collect_tail =~ s/^.*[\/\\]//;
544 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
545 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
546
547 # Set the new value
548 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
549 my $status = system($cmd);
550 if ($status != 0) {
551 # Catch error if gdbmget failed
552 my $mess = "Failed to set metadata key: $dbkey\n";
553
554 $mess .= "PATH: $ENV{'PATH'}\n";
555 $mess .= "cmd = $cmd\n";
556 $mess .= "Exit status: $status\n";
557 $mess .= "System Error Message: $!\n";
558
559 $gsdl_cgi->generate_error($mess);
560 }
561 else {
562 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
563 }
564
565 #return $status; # in case calling functions have any further use for this
566}
567
568sub set_live_metadata
569{
570 my $self = shift @_;
571
572 my $username = $self->{'username'};
573 my $collect = $self->{'collect'};
574 my $gsdl_cgi = $self->{'gsdl_cgi'};
575
576 if ($baseaction::authentication_enabled) {
577 # Ensure the user is allowed to edit this collection
578 &authenticate_user($gsdl_cgi, $username, $collect);
579 }
580
581 # Make sure the collection isn't locked by someone else
582 $self->lock_collection($username, $collect);
583
584 $self->_set_live_metadata(@_);
585
586 # Release the lock once it is done
587 $self->unlock_collection($username, $collect);
588}
589
590sub set_metadata_entry
591{
592 my $self = shift @_;
593 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
594
595 # To people who know $collect_tail please add some comments
596 # Obtain path to the database
597 my $collect_tail = $collect;
598 $collect_tail =~ s/^.*[\/\\]//;
599 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
600 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
601
602# print STDERR "**** infodb file path = $infodb_file_path\n";
603# print STDERR "***** infodb type = $infodbtype\n";
604
605 # Read the docid entry
606 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
607
608 # Set the metadata value
609 if (defined $metapos) {
610 $doc_rec->{$metaname}->[$metapos] = $metavalue;
611 }
612 else {
613 $doc_rec->{$metaname} = [ $metavalue ];
614 }
615
616 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
617
618 return $status;
619
620}
621
622sub _set_import_metadata
623{
624 my $self = shift @_;
625
626 my $collect = $self->{'collect'};
627 my $gsdl_cgi = $self->{'gsdl_cgi'};
628 my $infodbtype = $self->{'infodbtype'};
629# my $gsdlhome = $self->{'gsdlhome'};
630
631 # Obtain the collect and archive dir
632 my $site = $self->{'site'};
633 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
634 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
635 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
636
637 # look up additional args
638 # want either d= or f=
639 my $docid = $self->{'d'};
640 my $import_file = $self->{'f'};
641 if ((!defined $docid) && (!defined $import_file)) {
642 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
643 }
644
645 # Get the parameters and set default mode to "accumulate"
646 my $metaname = $self->{'metaname'};
647 my $metavalue = $self->{'metavalue'};
648## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
649 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
650
651 my $metamode = $self->{'metamode'};
652 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
653 # make "accumulate" the default (less destructive, as won't actually
654 # delete any existing values)
655 $metamode = "accumulate";
656 }
657
658 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
659 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
660 my $metadata_xml_file;
661 my $import_filename = undef;
662 if (defined $docid) {
663 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
664 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
665
666 # This now stores the full pathname
667 $import_filename = $doc_rec->{'src-file'}->[0];
668 }
669 else {
670 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
671 }
672
673 # figure out correct metadata.xml file [?]
674 # Assuming the metadata.xml file is next to the source file
675 # Note: This will not work if it is using the inherited metadata from the parent folder
676 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
677 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
678
679 # Edit the metadata.xml
680 # Modified by Jeffrey from DL Consulting
681 # Handle the case where there is one metadata.xml file for multiple FileSets
682 # The XML filter needs to know whether it is in the right FileSet
683 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
684 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
685 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
686 $metaname, $metavalue, $metamode, $import_tailname);
687
688
689 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
690 $mess .= " $metaname";
691 $mess .= " = $metavalue";
692 $mess .= " ($metamode)\n";
693
694 $gsdl_cgi->generate_ok_message($mess);
695
696 #return $status; # in case calling functions have any further use for this
697}
698
699# the version of set_index_meta that doesn't do authentication
700sub _set_archives_metadata
701{
702 my $self = shift @_;
703
704 my $collect = $self->{'collect'};
705 my $gsdl_cgi = $self->{'gsdl_cgi'};
706 my $infodbtype = $self->{'infodbtype'};
707
708 # Obtain the collect and archive dir
709 my $site = $self->{'site'};
710 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
711 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
712
713 # look up additional args
714 my $docid = $self->{'d'};
715 my $metaname = $self->{'metaname'};
716 my $metavalue = $self->{'metavalue'};
717 my $prevmetavalue = $self->{'prevmetavalue'};
718
719 my $metapos = $self->{'metapos'};
720 $metapos = 0 if (!defined $metapos);
721
722 my $metamode = $self->{'metamode'};
723 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
724 # make "accumulate" the default (less destructive, as won't actually
725 # delete any existing values)
726 $metamode = "accumulate";
727 }
728
729 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
730 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
731
732 if ($status == 0) {
733 my $mess = "set-archives-metadata successful: Key[$docid]\n";
734 $mess .= " $metaname";
735 $mess .= "->[$metapos]" if (defined $metapos);
736 $mess .= " = $metavalue";
737 $mess .= " ($metamode)\n";
738
739 $gsdl_cgi->generate_ok_message($mess);
740 }
741 else {
742 my $mess .= "Failed to set archives metadata key: $docid\n";
743 $mess .= "Exit status: $status\n";
744 if(defined $self->{'error_msg'}) {
745 $mess .= "Error Message: $self->{'error_msg'}\n";
746 } else {
747 $mess .= "System Error Message: $!\n";
748 }
749 $mess .= "-" x 20 . "\n";
750
751 $gsdl_cgi->generate_error($mess);
752 }
753
754 #return $status; # in case calling functions have any further use for this
755}
756
757
758# the version of set_index_meta that doesn't do authentication
759sub _set_index_metadata
760{
761 my $self = shift @_;
762
763 my $collect = $self->{'collect'};
764 my $gsdl_cgi = $self->{'gsdl_cgi'};
765
766 my $site = $self->{'site'};
767 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
768
769 # look up additional args
770 my $docid = $self->{'d'};
771 my $metaname = $self->{'metaname'};
772 my $metapos = $self->{'metapos'};
773 my $metavalue = $self->{'metavalue'};
774 my $infodbtype = $self->{'infodbtype'};
775
776 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
777
778 if ($status != 0) {
779 # Catch error if set infodb entry failed
780 my $mess = "Failed to set metadata key: $docid\n";
781
782 $mess .= "PATH: $ENV{'PATH'}\n";
783 $mess .= "Exit status: $status\n";
784 $mess .= "System Error Message: $!\n";
785
786 $gsdl_cgi->generate_error($mess);
787 }
788 else {
789 my $mess = "set-index-metadata successful: Key[$docid]\n";
790 $mess .= " $metaname";
791 $mess .= "->[$metapos]" if (defined $metapos);
792 $mess .= " = $metavalue\n";
793
794 $gsdl_cgi->generate_ok_message($mess);
795 }
796
797 #return $status; # in case calling functions have any further use for this
798}
799
800sub set_index_metadata
801{
802 my $self = shift @_;
803
804 my $username = $self->{'username'};
805 my $collect = $self->{'collect'};
806 my $gsdl_cgi = $self->{'gsdl_cgi'};
807 #my $gsdlhome = $self->{'gsdlhome'};
808
809 if ($baseaction::authentication_enabled) {
810 # Ensure the user is allowed to edit this collection
811 &authenticate_user($gsdl_cgi, $username, $collect);
812 }
813
814 my $site = $self->{'site'};
815 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
816
817 $gsdl_cgi->checked_chdir($collect_dir);
818
819 # Obtain the collect dir
820 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
821
822 # Make sure the collection isn't locked by someone else
823 $self->lock_collection($username, $collect);
824
825 $self->_set_index_metadata(@_);
826
827 # Release the lock once it is done
828 $self->unlock_collection($username, $collect);
829}
830
831# call this to set the metadata for a combination of dirs archives, import or index, or live
832# if none specified, defaults to index which was the original behaviour of set_metadata.
833sub set_metadata
834{
835 my $self = shift @_;
836
837 # Testing that not defining a variable, setting it to "" or to " " all return false
838 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
839
840 my $where = $self->{'where'};
841 if(!$where) {
842 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
843 return;
844 }
845
846 # authenticate and lock collection once, even if processing multiple dirs
847 my $username = $self->{'username'};
848 my $collect = $self->{'collect'};
849 my $gsdl_cgi = $self->{'gsdl_cgi'};
850
851 if ($baseaction::authentication_enabled) {
852 # Ensure the user is allowed to edit this collection
853 #&authenticate_user($gsdl_cgi, $username, $collect);
854 $self->authenticate_user($username, $collect);
855 }
856
857 if($where =~ m/index/) {
858 my $site = $self->{'site'};
859 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
860 $gsdl_cgi->checked_chdir($collect_dir);
861 }
862
863 # Make sure the collection isn't locked by someone else
864 $self->lock_collection($username, $collect);
865
866
867 # now at last can set the metadata. $where can specify multiple
868 # $where is of the form: import|archives|index, or a subset thereof
869
870 #my @whichdirs = split('\|', $where);
871
872 # just check whether $where contains import/archives/index/live in turn, and
873 # for each case, process it accordingly
874 if($where =~ m/import/) {
875 $self->_set_import_metadata(@_);
876 }
877
878 if($where =~ m/archives/) {
879
880 # look up docID arg which is optional to set_metadata because it's optional
881 # to set_import, but which is compulsory to set_archives_metadata
882 my $docid = $self->{'d'};
883 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
884 $gsdl_cgi->generate_error("No docid (d=...) specified.");
885 }
886 else { # we have a docid, so can set archives meta
887 $self->_set_archives_metadata(@_);
888 }
889 }
890
891 if($where =~ m/index/) {
892
893 # look up docID arg which is optional to set_metadata because it's optional
894 # to set_import, but which is compulsory to set_archives_metadata
895 my $docid = $self->{'d'};
896 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
897 $gsdl_cgi->generate_error("No docid (d=...) specified.");
898 }
899 else { # we have a docid, so can set index meta
900 $self->_set_index_metadata(@_);
901 }
902 }
903
904 if($where =~ m/live/) {
905 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
906 }
907
908 # Release the lock once it is done
909 $self->unlock_collection($username, $collect);
910}
911
912sub set_metadata_array
913{
914 my $self = shift @_;
915
916 my $username = $self->{'username'};
917 my $collect = $self->{'collect'};
918 my $gsdl_cgi = $self->{'gsdl_cgi'};
919 my $gsdlhome = $self->{'gsdlhome'};
920
921 if ($baseaction::authentication_enabled) {
922 # Ensure the user is allowed to edit this collection
923 &authenticate_user($gsdl_cgi, $username, $collect);
924 }
925
926 my $site = $self->{'site'};
927 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
928
929 $gsdl_cgi->checked_chdir($collect_dir);
930
931 # Obtain the collect dir
932 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
933
934 # Make sure the collection isn't locked by someone else
935 $self->lock_collection($username, $collect);
936
937 # look up additional args
938
939 my $infodbtype = $self->{'infodbtype'};
940
941 my $json_str = $self->{'json'};
942 my $doc_array = decode_json $json_str;
943
944
945 my $global_status = 0;
946 my $global_mess = "";
947
948 my @all_docids = ();
949
950 foreach my $doc_array_rec ( @$doc_array ) {
951
952 my $docid = $doc_array_rec->{'docid'};
953 my $metaname = $doc_array_rec->{'metaname'};
954 my $metapos = $doc_array_rec->{'metapos'};
955 my $metavalue = $doc_array_rec->{'metavalue'};
956
957 push(@all_docids,$docid);
958
959 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
960
961 if ($status != 0) {
962 # Catch error if set infodb entry failed
963 $global_status = $status;
964 $global_mess .= "Failed to set metadata key: $docid\n";
965 $global_mess .= "Exit status: $status\n";
966 $global_mess .= "System Error Message: $!\n";
967 $global_mess .= "-" x 20;
968 }
969 }
970
971 if ($global_status != 0) {
972 $global_mess .= "PATH: $ENV{'PATH'}\n";
973 $gsdl_cgi->generate_error($global_mess);
974 }
975 else {
976 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
977 $gsdl_cgi->generate_ok_message($mess);
978 }
979
980 # Release the lock once it is done
981 $self->unlock_collection($username, $collect);
982}
983
984
985sub dxml_metadata
986{
987 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
988 my $metaname = $parser->{'parameters'}->{'metaname'};
989 my $metamode = $parser->{'parameters'}->{'metamode'};
990
991 print STDERR "**** Processing closing </Metadata> tag\n";
992
993 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
994
995 # Find the right metadata tag and checks if we are going to
996 # override it
997 #
998 # Note: This over writes the first metadata block it
999 # encountered. If there are multiple Sections in the doc.xml, it
1000 # might not behave as you would expect
1001
1002 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1003## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1004## print STDERR "**** metamode = $metamode\n";
1005
1006 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1007 {
1008 my $name_attr = $attrHash->{'name'};
1009 if (($name_attr eq $metaname) && ($metamode eq "override"))
1010 {
1011 if (!defined $parser->{'parameters'}->{'poscount'})
1012 {
1013 $parser->{'parameters'}->{'poscount'} = 0;
1014 }
1015 else
1016 {
1017 $parser->{'parameters'}->{'poscount'}++;
1018 }
1019
1020 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1021 {
1022 ##print STDERR "#### got match!!\n";
1023 # Get the value and override the current value
1024 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1025 $attrHash->{'_content'} = $metavalue;
1026
1027 # Don't want it to wipe out any other pieces of metadata
1028 $parser->{'parameters'}->{'metamode'} = "done";
1029 }
1030 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1031 {
1032 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1033 $attrHash->{'_content'} = $metavalue;
1034 $parser->{'parameters'}->{'metamode'} = "done";
1035 }
1036 }
1037 }
1038
1039 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1040 return [$tagname => $attrHash];
1041}
1042
1043# This method exists purely for catching invalid section numbers that the client
1044# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1045# we've seen all the Sections in the doc.xml, and none of their section nums matched
1046# if the metamode has not been set to 'done' by then.
1047sub dxml_archive
1048{
1049 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1050 my $metamode = $parser->{'parameters'}->{'metamode'};
1051
1052 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1053 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1054
1055# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1056
1057 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1058 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1059
1060 my $metaname = $parser->{'parameters'}->{'metaname'};
1061 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1062
1063 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1064 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1065 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1066 print STDERR "\tin section $opt_doc_secnum.\n";
1067 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1068 }
1069
1070 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1071 return [$tagname => $attrHash];
1072}
1073
1074sub dxml_description
1075{
1076 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1077 my $metamode = $parser->{'parameters'}->{'metamode'};
1078
1079 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1080 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1081
1082 print STDERR "**** Processing a closing </Description> tag \n";
1083# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1084
1085 # Accumulate the metadata
1086
1087 # We'll be accumulating metadata at this point if we haven't found and therefore
1088 # haven't processed the metadata yet.
1089 # For subsections, this means that if we're at a matching subsection, but haven't
1090 # found the correct metaname to override in that subsection, we accumulate it as new
1091 # meta in the subsection by adding it to the current description.
1092 # If there's no subsection info for the metadata, it will accumulate at the top level
1093 # section description if we hadn't found a matching metaname to override at this point.
1094
1095 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1096 # at the toplevel section. In the latter case, it means we want to process meta in the
1097 # toplevel section. So the eq check between the values below will work in all cases.
1098
1099 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1100 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1101 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1102
1103 if ($curr_secnum eq $opt_doc_secnum
1104 && ($metamode eq "accumulate" || $metamode eq "override")) {
1105 if ($metamode eq "override") {
1106 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1107 }
1108
1109 # If get to here and metamode is override, the this means there
1110 # was no existing value to overide => treat as an append operation
1111
1112 # Tack a new metadata tag on to the end of the <Metadata>+ block
1113 my $metaname = $parser->{'parameters'}->{'metaname'};
1114 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1115
1116 my $metadata_attr = {
1117 '_content' => $metavalue,
1118 'name' => $metaname,
1119 'mode' => "accumulate"
1120 };
1121
1122 my $append_metadata = [ "Metadata" => $metadata_attr ];
1123 my $description_content = $attrHash->{'_content'};
1124
1125 print "Appending metadata to doc.xml\n";
1126
1127 if (ref($description_content)) {
1128 # got some existing interesting nested content
1129 push(@$description_content, " ", $append_metadata ,"\n ");
1130 }
1131 else {
1132 #description_content is most likely a string such as "\n"
1133 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1134 }
1135
1136 $parser->{'parameters'}->{'metamode'} = "done";
1137 }
1138 else {
1139 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1140## print STDERR "**** NOT ACCUMULATE?!? \n";
1141 }
1142
1143 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1144 return [$tagname => $attrHash];
1145}
1146
1147
1148sub dxml_start_section
1149{
1150 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1151
1152 my $new_depth = scalar(@$contextArray);
1153
1154 print STDERR "**** START SECTION \n";
1155
1156 if ($new_depth == 1) {
1157 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1158 $parser->{'parameters'}->{'curr_section_num'} = "";
1159 }
1160
1161 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1162 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1163
1164 my $new_secnum;
1165
1166 if ($new_depth > $old_depth) {
1167 # child subsection
1168 $new_secnum = "$old_secnum.1";
1169 }
1170 elsif ($new_depth == $old_depth) {
1171 # sibling section => increase it's value by 1
1172 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1173 $tail_num++;
1174 $new_secnum = $old_secnum;
1175 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1176 }
1177 else {
1178 # back up to parent section => lopp off tail
1179 $new_secnum = $old_secnum;
1180 $new_secnum =~ s/\.\d+$//;
1181 }
1182
1183 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1184 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1185
1186 1;
1187}
1188
1189sub edit_xml_file
1190{
1191 my $self = shift @_;
1192 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1193
1194 # use XML::Rules to add it in (read in and out again)
1195 my $parser = XML::Rules->new(start_rules => $start_rules,
1196 rules => $rules,
1197 style => 'filter',
1198 output_encoding => 'utf8' );
1199
1200 my $xml_in = "";
1201 if (!open(MIN,"<$filename")) {
1202 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1203 }
1204 else {
1205 # Read all the text in
1206 my $line;
1207 while (defined ($line=<MIN>)) {
1208 $xml_in .= $line;
1209 }
1210 close(MIN);
1211
1212 my $MOUT;
1213 if (!open($MOUT,">$filename")) {
1214 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1215 }
1216 else {
1217 # Matched lines will get handled by the call backs
1218## my $xml_out = "";
1219
1220 binmode($MOUT,":utf8");
1221 $parser->filter($xml_in,$MOUT, $options);
1222
1223# binmode(MOUT,":utf8");
1224# print MOUT $xml_out;
1225 close($MOUT);
1226 }
1227 }
1228
1229 # copy across any custom error information that was stored during parsing
1230 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1231}
1232
1233sub edit_doc_xml
1234{
1235 my $self = shift @_;
1236 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1237
1238 my $info_mess = <<RAWEND;
1239****************************
1240 edit_doc_xml()
1241****************************
1242doc_xml_filename = $doc_xml_filename
1243metaname = $metaname
1244metavalue = $metavalue
1245metapos = $metapos
1246metamode = $metamode
1247opt_secnum = $opt_secnum
1248prevmetavalue = $prevmetavalue
1249****************************
1250RAWEND
1251
1252 $gsdl_cgi->generate_message($info_mess);
1253
1254 # To monitor which section/subsection number we are in
1255 my @start_rules =
1256 ( 'Section' => \&dxml_start_section );
1257
1258 # use XML::Rules to add it in (read in and out again)
1259 # Set the call back functions
1260 my @rules =
1261 ( _default => 'raw',
1262 'Metadata' => \&dxml_metadata,
1263 'Description' => \&dxml_description,
1264 'Archive' => \&dxml_archive); # just for catching errors at end
1265
1266 # Sets the parameters
1267 my $options = { 'metaname' => $metaname,
1268 'metapos' => $metapos,
1269 'metavalue' => $metavalue,
1270 'metamode' => $metamode,
1271 'prevmetavalue' => $prevmetavalue };
1272
1273 if (defined $opt_secnum) {
1274 $options->{'secnum'} = $opt_secnum;
1275 }
1276
1277 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1278}
1279
1280sub set_archives_metadata_entry
1281{
1282 my $self = shift @_;
1283 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1284
1285 my $info_mess = <<RAWEND;
1286****************************
1287 set_archives_metadata_entry()
1288****************************
1289archive_dir = $archive_dir
1290collect_dir = $collect_dir
1291collect = $collect
1292infodbtype = $infodbtype
1293docid = $docid
1294metaname = $metaname
1295metapos = $metapos
1296metavalue = $metavalue
1297metamode = $metamode
1298prevmetavalue = $prevmetavalue
1299****************************
1300RAWEND
1301
1302 $gsdl_cgi->generate_message($info_mess);
1303
1304 # Obtain the doc.xml path for the specified docID
1305 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1306
1307 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1308 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1309 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1310
1311 # The $doc_xml_file is relative to the archives, and now let's get the full path
1312 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1313 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1314
1315 # Edit the doc.xml file with the specified metadata name, value and position.
1316 # TODO: there is a potential problem here as this edit_doc_xml function
1317 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1318 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1319
1320 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1321 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1322
1323 # return 0; # return 0 for now to indicate no error
1324 return (defined $self->{'error_msg'}) ? 1 : 0;
1325}
1326
1327
1328sub set_archives_metadata
1329{
1330 my $self = shift @_;
1331
1332 my $username = $self->{'username'};
1333 my $collect = $self->{'collect'};
1334 my $gsdl_cgi = $self->{'gsdl_cgi'};
1335
1336 if ($baseaction::authentication_enabled) {
1337 # Ensure the user is allowed to edit this collection
1338 $self->authenticate_user($username, $collect);
1339 }
1340
1341 # Make sure the collection isn't locked by someone else
1342 $self->lock_collection($username, $collect);
1343
1344 $self->_set_archives_metadata(@_);
1345
1346 # Release the lock once it is done
1347 $self->unlock_collection($username, $collect);
1348}
1349
1350
1351sub set_archives_metadata_array
1352{
1353 my $self = shift @_;
1354
1355 my $username = $self->{'username'};
1356 my $collect = $self->{'collect'};
1357 my $gsdl_cgi = $self->{'gsdl_cgi'};
1358 my $gsdlhome = $self->{'gsdlhome'};
1359
1360 if ($baseaction::authentication_enabled) {
1361 # Ensure the user is allowed to edit this collection
1362 &authenticate_user($gsdl_cgi, $username, $collect);
1363 }
1364
1365 my $site = $self->{'site'};
1366 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1367
1368 $gsdl_cgi->checked_chdir($collect_dir);
1369
1370 # Obtain the collect dir
1371 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1372
1373 # Make sure the collection isn't locked by someone else
1374 $self->lock_collection($username, $collect);
1375
1376 # look up additional args
1377
1378 my $infodbtype = $self->{'infodbtype'};
1379
1380 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1381
1382 my $json_str = $self->{'json'};
1383 my $doc_array = decode_json $json_str;
1384
1385
1386 my $global_status = 0;
1387 my $global_mess = "";
1388
1389 my @all_docids = ();
1390
1391 foreach my $doc_array_rec ( @$doc_array ) {
1392
1393 my $docid = $doc_array_rec->{'docid'};
1394 my $metaname = $doc_array_rec->{'metaname'};
1395 my $metapos = $doc_array_rec->{'metapos'};
1396 my $metamode = $self->{'metamode'};
1397 my $metavalue = $doc_array_rec->{'metavalue'};
1398
1399 # Some sanity checks
1400 $metapos = 0 if (!defined $metapos);
1401
1402 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1403 # make "accumulate" the default (less destructive, as won't actually
1404 # delete any existing values)
1405 $metamode = "accumulate";
1406 }
1407
1408 push(@all_docids,$docid);
1409
1410 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1411 $metaname,$metapos,$metavalue,$metamode);
1412
1413 if ($status != 0) {
1414 # Catch error if set infodb entry failed
1415 $global_status = $status;
1416 $global_mess .= "Failed to set metadata key: $docid\n";
1417 $global_mess .= "Exit status: $status\n";
1418 $global_mess .= "System Error Message: $!\n";
1419 $global_mess .= "-" x 20 . "\n";
1420 }
1421 }
1422
1423 if ($global_status != 0) {
1424 $global_mess .= "PATH: $ENV{'PATH'}\n";
1425 $gsdl_cgi->generate_error($global_mess);
1426 }
1427 else {
1428 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1429 $gsdl_cgi->generate_ok_message($mess);
1430 }
1431
1432 # Release the lock once it is done
1433 $self->unlock_collection($username, $collect);
1434}
1435
1436sub _remove_archives_metadata
1437{
1438 my $self = shift @_;
1439
1440 my $collect = $self->{'collect'};
1441 my $gsdl_cgi = $self->{'gsdl_cgi'};
1442# my $gsdlhome = $self->{'gsdlhome'};
1443 my $infodbtype = $self->{'infodbtype'};
1444
1445 my $site = $self->{'site'};
1446
1447 # Obtain the collect and archive dir
1448 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1449
1450 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1451
1452 # look up additional args
1453 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1454
1455 my $metaname = $self->{'metaname'};
1456 my $metapos = $self->{'metapos'};
1457 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1458
1459 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1460 # default to deleting the first metavalue for the given metaname
1461 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1462 # seem to get deleted in one single remove_archives_meta action invocation.
1463 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1464 # deleted, both get deleted.
1465 if(!defined $metapos && !defined $metavalue) {
1466 $metapos = 0;
1467 }
1468
1469 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1470 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1471
1472 # This now stores the full pathname
1473 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1474
1475 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum);
1476# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1477
1478 if ($status == 0)
1479 {
1480 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1481 $mess .= " $metaname";
1482 $mess .= "->[$metapos]" if (defined $metapos);
1483
1484 $gsdl_cgi->generate_ok_message($mess);
1485 }
1486 else
1487 {
1488 my $mess .= "Failed to remove archives metadata key: $docid\n";
1489 $mess .= "Exit status: $status\n";
1490 $mess .= "System Error Message: $!\n";
1491 $mess .= "-" x 20 . "\n";
1492
1493 $gsdl_cgi->generate_error($mess);
1494 }
1495
1496 #return $status; # in case calling functions have a use for this
1497}
1498
1499sub remove_archives_metadata
1500{
1501 my $self = shift @_;
1502
1503 my $username = $self->{'username'};
1504 my $collect = $self->{'collect'};
1505 my $gsdl_cgi = $self->{'gsdl_cgi'};
1506
1507 if ($baseaction::authentication_enabled)
1508 {
1509 # Ensure the user is allowed to edit this collection
1510 &authenticate_user($gsdl_cgi, $username, $collect);
1511 }
1512
1513 # Make sure the collection isn't locked by someone else
1514 $self->lock_collection($username, $collect);
1515
1516 $self->_remove_archives_metadata(@_);
1517
1518 # Release the lock once it is done
1519 $self->unlock_collection($username, $collect);
1520}
1521
1522sub remove_from_doc_xml
1523{
1524 my $self = shift @_;
1525 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
1526
1527 my @start_rules = ('Section' => \&dxml_start_section);
1528
1529 # Set the call-back functions for the metadata tags
1530 my @rules =
1531 (
1532 _default => 'raw',
1533 'Metadata' => \&rfdxml_metadata
1534 );
1535
1536 my $parser = XML::Rules->new
1537 (
1538 start_rules => \@start_rules,
1539 rules => \@rules,
1540 style => 'filter',
1541 output_encoding => 'utf8'
1542 );
1543
1544 my $status = 0;
1545 my $xml_in = "";
1546 if (!open(MIN,"<$doc_xml_filename"))
1547 {
1548 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1549 $status = 1;
1550 }
1551 else
1552 {
1553 # Read them in
1554 my $line;
1555 while (defined ($line=<MIN>)) {
1556 $xml_in .= $line;
1557 }
1558 close(MIN);
1559
1560 # Filter with the call-back functions
1561 my $xml_out = "";
1562
1563 my $MOUT;
1564 if (!open($MOUT,">$doc_xml_filename")) {
1565 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1566 $status = 1;
1567 }
1568 else {
1569 binmode($MOUT,":utf8");
1570 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
1571 close($MOUT);
1572 }
1573 }
1574 return $status;
1575}
1576
1577sub rfdxml_metadata
1578{
1579 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1580
1581 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1582 {
1583 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1584 return [$tagname => $attrHash];
1585 }
1586
1587 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1588 {
1589 if (!defined $parser->{'parameters'}->{'poscount'})
1590 {
1591 $parser->{'parameters'}->{'poscount'} = 0;
1592 }
1593 else
1594 {
1595 $parser->{'parameters'}->{'poscount'}++;
1596 }
1597 }
1598
1599 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1600 {
1601 return [];
1602 }
1603
1604 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1605 {
1606 return [];
1607 }
1608
1609 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1610 return [$tagname => $attrHash];
1611}
1612
1613sub mxml_metadata
1614{
1615 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1616 my $metaname = $parser->{'parameters'}->{'metaname'};
1617 my $metamode = $parser->{'parameters'}->{'metamode'};
1618
1619 # Report error if we don't see FileName tag before this
1620 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1621
1622 # Don't do anything if we are not in the right FileSet
1623 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1624 if ($file_regexp =~ /\.\*/) {
1625 # Only interested in a file_regexp if it specifies precisely one
1626 # file.
1627 # So, skip anything with a .* in it as it is too general
1628 return [$tagname => $attrHash];
1629 }
1630 my $src_file = $parser->{'parameters'}->{'src_file'};
1631 if (!($src_file =~ /$file_regexp/)) {
1632 return [$tagname => $attrHash];
1633 }
1634## print STDERR "*** mxl metamode = $metamode\n";
1635
1636 # Find the right metadata tag and checks if we are going to override it
1637 my $name_attr = $attrHash->{'name'};
1638 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1639 # Get the value and override the current value
1640 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1641 $attrHash->{'_content'} = $metavalue;
1642
1643## print STDERR "**** overrideing metadata.xml\n";
1644
1645 # Don't want it to wipe out any other pieces of metadata
1646 $parser->{'parameters'}->{'metamode'} = "done";
1647 }
1648
1649 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1650 return [$tagname => $attrHash];
1651}
1652
1653
1654sub mxml_description
1655{
1656 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1657 my $metamode = $parser->{'parameters'}->{'metamode'};
1658
1659 # Failed... Report error if we don't see FileName tag before this
1660 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1661
1662 # Don't do anything if we are not in the right FileSet
1663 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1664 if ($file_regexp =~ m/\.\*/) {
1665 # Only interested in a file_regexp if it specifies precisely one
1666 # file.
1667 # So, skip anything with a .* in it as it is too general
1668 return [$tagname => $attrHash];
1669 }
1670 my $src_file = $parser->{'parameters'}->{'src_file'};
1671
1672 if (!($src_file =~ m/$file_regexp/)) {
1673 return [$tagname => $attrHash];
1674 }
1675
1676 # Accumulate the metadata block to the end of the description block
1677 # Note: This adds metadata block to all description blocks, so if there are
1678 # multiple FileSets, it will add to all of them
1679 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1680 # if metamode was "override" but get to here then it failed to
1681 # find an item to override, in which case it should append its
1682 # value to the end, just like the "accumulate" mode
1683
1684 # tack a new metadata tag on to the end of the <Metadata>+ block
1685 my $metaname = $parser->{'parameters'}->{'metaname'};
1686 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1687
1688 my $metadata_attr = { '_content' => $metavalue,
1689 'name' => $metaname,
1690 'mode' => "accumulate" };
1691
1692 my $append_metadata = [ "Metadata" => $metadata_attr ];
1693 my $description_content = $attrHash->{'_content'};
1694
1695## print STDERR "*** appending to metadata.xml\n";
1696
1697 # append the new metadata element to the end of the current
1698 # content contained inside this tag
1699 if (ref($description_content) eq "") {
1700 # => string or numeric literal
1701 # this is caused by a <Description> block has no <Metadata> child elements
1702 # => set up an empty array in '_content'
1703 $attrHash->{'_content'} = [ "\n" ];
1704 $description_content = $attrHash->{'_content'};
1705 }
1706
1707 push(@$description_content," ", $append_metadata ,"\n ");
1708 $parser->{'parameters'}->{'metamode'} = "done";
1709 }
1710
1711 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1712 return [$tagname => $attrHash];
1713}
1714
1715
1716sub mxml_filename
1717{
1718 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1719
1720 # Store the filename of the Current Fileset
1721 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1722 # FileName tag must come before Description tag
1723 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1724
1725 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1726 return [$tagname => $attrHash];
1727}
1728
1729
1730sub mxml_fileset
1731{
1732 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1733
1734 # Initilise the current_file
1735 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1736 # FileName tag must come before Description tag
1737 $parser->{'parameters'}->{'current_file'} = "";
1738
1739 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1740 return [$tagname => $attrHash];
1741}
1742
1743
1744sub edit_metadata_xml
1745{
1746 my $self = shift @_;
1747 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
1748
1749 # Set the call-back functions for the metadata tags
1750 my @rules =
1751 ( _default => 'raw',
1752 'FileName' => \&mxml_filename,
1753 'Metadata' => \&mxml_metadata,
1754 'Description' => \&mxml_description,
1755 'FileSet' => \&mxml_fileset);
1756
1757 # use XML::Rules to add it in (read in and out again)
1758 my $parser = XML::Rules->new(rules => \@rules,
1759 style => 'filter',
1760 output_encoding => 'utf8');
1761
1762 if (!-e $metadata_xml_filename) {
1763
1764 if (open(MOUT,">$metadata_xml_filename")) {
1765
1766 my $src_file_re = &util::filename_to_regex($src_file);
1767 # shouldn't the following also be in the above utility routine??
1768 # $src_file_re =~ s/\./\\./g;
1769
1770 print MOUT "<?xml version=\"1.0\"?>\n";
1771 print MOUT "<DirectoryMetadata>\n";
1772 print MOUT " <FileSet>\n";
1773 print MOUT " <FileName>$src_file_re</FileName>\n";
1774 print MOUT " <Description>\n";
1775 print MOUT " </Description>\n";
1776 print MOUT " </FileSet>\n";
1777 print MOUT "</DirectoryMetadata>\n";
1778
1779 close(MOUT);
1780 }
1781 else {
1782 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1783 }
1784 }
1785
1786
1787 my $xml_in = "";
1788 if (!open(MIN,"<$metadata_xml_filename")) {
1789 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1790 }
1791 else {
1792 # Read them in
1793 my $line;
1794 while (defined ($line=<MIN>)) {
1795 $xml_in .= $line;
1796 }
1797 close(MIN);
1798
1799 # Filter with the call-back functions
1800 my $xml_out = "";
1801
1802 my $MOUT;
1803 if (!open($MOUT,">$metadata_xml_filename")) {
1804 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1805 }
1806 else {
1807 binmode($MOUT,":utf8");
1808
1809 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1810 # At the moment, I will just hack it!
1811 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1812 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1813 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1814 #print MOUT $xml_out;
1815
1816 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1817 metavalue => $metavalue,
1818 metamode => $metamode,
1819 src_file => $src_file,
1820 current_file => undef} );
1821 close($MOUT);
1822 }
1823 }
1824}
1825
1826
1827sub set_import_metadata
1828{
1829 my $self = shift @_;
1830
1831 my $username = $self->{'username'};
1832 my $collect = $self->{'collect'};
1833 my $gsdl_cgi = $self->{'gsdl_cgi'};
1834
1835 if ($baseaction::authentication_enabled) {
1836 # Ensure the user is allowed to edit this collection
1837 $self->authenticate_user($username, $collect);
1838 }
1839
1840 # Make sure the collection isn't locked by someone else
1841 $self->lock_collection($username, $collect);
1842
1843 $self->_set_import_metadata(@_);
1844
1845 # Release the lock once it is done
1846 $self->unlock_collection($username, $collect);
1847
1848}
1849
1850sub set_import_metadata_array
1851{
1852 my $self = shift @_;
1853
1854 my $username = $self->{'username'};
1855 my $collect = $self->{'collect'};
1856 my $gsdl_cgi = $self->{'gsdl_cgi'};
1857 my $gsdlhome = $self->{'gsdlhome'};
1858
1859 if ($baseaction::authentication_enabled) {
1860 # Ensure the user is allowed to edit this collection
1861 &authenticate_user($gsdl_cgi, $username, $collect);
1862 }
1863
1864 my $site = $self->{'site'};
1865 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1866
1867 $gsdl_cgi->checked_chdir($collect_dir);
1868
1869 # Make sure the collection isn't locked by someone else
1870 $self->lock_collection($username, $collect);
1871
1872 # look up additional args
1873
1874 my $infodbtype = $self->{'infodbtype'};
1875
1876 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1877
1878 my $json_str = $self->{'json'};
1879 my $doc_array = decode_json $json_str;
1880
1881 my $global_status = 0;
1882 my $global_mess = "";
1883
1884 my @all_docids = ();
1885
1886 foreach my $doc_array_rec ( @$doc_array )
1887 {
1888 my $docid = $doc_array_rec->{'docid'};
1889 my $metaname = $doc_array_rec->{'metaname'};
1890 my $metamode = $self->{'metamode'};
1891 my $metavalue = $doc_array_rec->{'metavalue'};
1892
1893 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1894 # make "accumulate" the default (less destructive, as won't actually
1895 # delete any existing values)
1896 $metamode = "accumulate";
1897 }
1898
1899 push(@all_docids,$docid);
1900
1901 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1902 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1903 my $metadata_xml_file;
1904 my $import_filename = undef;
1905
1906 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1907 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1908
1909 # This now stores the full pathname
1910 $import_filename = $doc_rec->{'src-file'}->[0];
1911
1912 # figure out correct metadata.xml file [?]
1913 # Assuming the metadata.xml file is next to the source file
1914 # Note: This will not work if it is using the inherited metadata from the parent folder
1915 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1916 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1917
1918 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1919 }
1920
1921 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1922 $gsdl_cgi->generate_ok_message($mess);
1923
1924 # Release the lock once it is done
1925 $self->unlock_collection($username, $collect);
1926}
1927
1928sub _remove_import_metadata
1929{
1930 my $self = shift @_;
1931
1932 my $collect = $self->{'collect'};
1933 my $gsdl_cgi = $self->{'gsdl_cgi'};
1934 my $gsdlhome = $self->{'gsdlhome'};
1935 my $infodbtype = $self->{'infodbtype'};
1936
1937 # Obtain the collect dir
1938 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1939 my $site = $self->{'site'};
1940 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1941
1942 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1943 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1944
1945 # look up additional args
1946 my $docid = $self->{'d'};
1947 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1948 {
1949 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
1950 }
1951
1952 my $metaname = $self->{'metaname'};
1953 my $metavalue = $self->{'metavalue'};
1954 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1955
1956 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1957 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1958 my $metadata_xml_file;
1959 my $import_filename = undef;
1960 if (defined $docid)
1961 {
1962 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1963 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1964
1965 # This now stores the full pathname
1966 $import_filename = $doc_rec->{'src-file'}->[0];
1967 }
1968
1969 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1970 {
1971 $gsdl_cgi->generate_error_message("There is no metadata\n");
1972 }
1973
1974 # figure out correct metadata.xml file [?]
1975 # Assuming the metadata.xml file is next to the source file
1976 # Note: This will not work if it is using the inherited metadata from the parent folder
1977 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1978 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1979
1980 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1981
1982 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1983 $mess .= " $metaname";
1984 $mess .= " = $metavalue\n";
1985
1986 $gsdl_cgi->generate_ok_message($mess);
1987
1988 #return $status; # in case calling functions have a use for this
1989}
1990
1991sub remove_import_metadata
1992{
1993 my $self = shift @_;
1994
1995 my $username = $self->{'username'};
1996 my $collect = $self->{'collect'};
1997 my $gsdl_cgi = $self->{'gsdl_cgi'};
1998
1999 if ($baseaction::authentication_enabled) {
2000 # Ensure the user is allowed to edit this collection
2001 &authenticate_user($gsdl_cgi, $username, $collect);
2002 }
2003
2004 # Make sure the collection isn't locked by someone else
2005 $self->lock_collection($username, $collect);
2006
2007 $self->_remove_import_metadata(@_);
2008
2009 # Release the lock once it is done
2010 $self->unlock_collection($username, $collect);
2011
2012}
2013
2014sub remove_from_metadata_xml
2015{
2016 my $self = shift @_;
2017 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
2018
2019 # Set the call-back functions for the metadata tags
2020 my @rules =
2021 (
2022 _default => 'raw',
2023 'Metadata' => \&rfmxml_metadata,
2024 'FileName' => \&mxml_filename
2025 );
2026
2027 my $parser = XML::Rules->new
2028 (
2029 rules => \@rules,
2030 style => 'filter',
2031 output_encoding => 'utf8'
2032 );
2033
2034 my $xml_in = "";
2035 if (!open(MIN,"<$metadata_xml_filename"))
2036 {
2037 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2038 }
2039 else
2040 {
2041 # Read them in
2042 my $line;
2043 while (defined ($line=<MIN>)) {
2044 $xml_in .= $line;
2045 }
2046 close(MIN);
2047
2048 # Filter with the call-back functions
2049 my $xml_out = "";
2050
2051 my $MOUT;
2052 if (!open($MOUT,">$metadata_xml_filename")) {
2053 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2054 }
2055 else {
2056 binmode($MOUT,":utf8");
2057 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
2058 close($MOUT);
2059 }
2060 }
2061}
2062
2063sub rfmxml_metadata
2064{
2065 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2066
2067 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2068 {
2069 return [];
2070 }
2071
2072 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2073 return [$tagname => $attrHash];
2074}
2075
2076sub _remove_live_metadata
2077{
2078 my $self = shift @_;
2079
2080 my $collect = $self->{'collect'};
2081 my $gsdl_cgi = $self->{'gsdl_cgi'};
2082# my $gsdlhome = $self->{'gsdlhome'};
2083 my $infodbtype = $self->{'infodbtype'};
2084
2085 # Obtain the collect dir
2086 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2087 my $site = $self->{'site'};
2088 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2089
2090
2091 # look up additional args
2092 my $docid = $self->{'d'};
2093 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2094 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2095 }
2096
2097 # Generate the dbkey
2098 my $metaname = $self->{'metaname'};
2099 my $dbkey = "$docid.$metaname";
2100
2101 # To people who know $collect_tail please add some comments
2102 # Obtain the live gdbm_db path
2103 my $collect_tail = $collect;
2104 $collect_tail =~ s/^.*[\/\\]//;
2105 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2106 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2107
2108 # Remove the key
2109 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2110 my $status = system($cmd);
2111 if ($status != 0) {
2112 # Catch error if gdbmdel failed
2113 my $mess = "Failed to set metadata key: $dbkey\n";
2114
2115 $mess .= "PATH: $ENV{'PATH'}\n";
2116 $mess .= "cmd = $cmd\n";
2117 $mess .= "Exit status: $status\n";
2118 $mess .= "System Error Message: $!\n";
2119
2120 $gsdl_cgi->generate_error($mess);
2121 }
2122 else {
2123 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2124 }
2125
2126}
2127
2128sub remove_live_metadata
2129{
2130 my $self = shift @_;
2131
2132 my $username = $self->{'username'};
2133 my $collect = $self->{'collect'};
2134 my $gsdl_cgi = $self->{'gsdl_cgi'};
2135 my $gsdlhome = $self->{'gsdlhome'};
2136
2137 if ($baseaction::authentication_enabled) {
2138 # Ensure the user is allowed to edit this collection
2139 &authenticate_user($gsdl_cgi, $username, $collect);
2140 }
2141
2142 # Make sure the collection isn't locked by someone else
2143 $self->lock_collection($username, $collect);
2144
2145 $self->_remove_live_metadata(@_);
2146
2147 $self->unlock_collection($username, $collect);
2148}
2149
2150sub remove_metadata
2151{
2152 my $self = shift @_;
2153
2154 my $where = $self->{'where'};
2155 if(!$where) {
2156 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2157 return;
2158 }
2159
2160 my $username = $self->{'username'};
2161 my $collect = $self->{'collect'};
2162 my $gsdl_cgi = $self->{'gsdl_cgi'};
2163
2164 if ($baseaction::authentication_enabled) {
2165 # Ensure the user is allowed to edit this collection
2166 &authenticate_user($gsdl_cgi, $username, $collect);
2167 }
2168
2169 # Make sure the collection isn't locked by someone else
2170 $self->lock_collection($username, $collect);
2171
2172 # check which directories need to be processed, specified in $where as
2173 # any combination of import|archives|index|live
2174 if($where =~ m/import/) {
2175 $self->_remove_import_metadata(@_);
2176 }
2177 if($where =~ m/archives/) {
2178 $self->_remove_archives_metadata(@_);
2179 }
2180 if($where =~ m/index/) {
2181 $self->_remove_index_metadata(@_);
2182 }
2183
2184 # Release the lock once it is done
2185 $self->unlock_collection($username, $collect);
2186}
2187
2188# the internal version, without authentication
2189sub _remove_index_metadata
2190{
2191 my $self = shift @_;
2192
2193 my $collect = $self->{'collect'};
2194 my $gsdl_cgi = $self->{'gsdl_cgi'};
2195# my $gsdlhome = $self->{'gsdlhome'};
2196 my $infodbtype = $self->{'infodbtype'};
2197
2198 # Obtain the collect dir
2199 my $site = $self->{'site'};
2200 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2201 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2202
2203 # look up additional args
2204 my $docid = $self->{'d'};
2205 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2206 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2207 }
2208 my $metaname = $self->{'metaname'};
2209 my $metapos = $self->{'metapos'};
2210 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2211
2212 # To people who know $collect_tail please add some comments
2213 # Obtain the path to the database
2214 my $collect_tail = $collect;
2215 $collect_tail =~ s/^.*[\/\\]//;
2216 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2217 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2218
2219 # Read the docid entry
2220 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2221
2222 # Basically loop through and unescape_html the values
2223 foreach my $k (keys %$doc_rec) {
2224 my @escaped_v = ();
2225 foreach my $v (@{$doc_rec->{$k}}) {
2226 if ($k eq "contains") {
2227 # protect quotes in ".2;".3 etc
2228 $v =~ s/\"/\\\"/g;
2229 push(@escaped_v, $v);
2230 }
2231 else {
2232 my $ev = &ghtml::unescape_html($v);
2233 $ev =~ s/\"/\\\"/g;
2234 push(@escaped_v, $ev);
2235 }
2236 }
2237 $doc_rec->{$k} = \@escaped_v;
2238 }
2239
2240 # Check to make sure the key does exist
2241 if (!defined ($doc_rec->{$metaname})) {
2242 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2243 }
2244
2245 # Obtain the specified metadata pos
2246 if(!defined $metapos && !defined $metavalue) { #$metapos = 0 if (!defined $metapos);
2247 $metapos = 0;
2248 }
2249
2250 # consider check key is defined before deleting?
2251 # Loop through the metadata array and ignore the specified position
2252 my $filtered_metadata = [];
2253 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2254 for (my $i=0; $i<$num_metadata_vals; $i++) {
2255 my $metaval = shift(@{$doc_rec->{$metaname}});
2256
2257 if (!defined $metavalue && $i != $metapos) {
2258 push(@$filtered_metadata,$metaval);
2259 }
2260
2261 if(defined $metavalue && !($metavalue eq $metaval))
2262 {
2263 push(@$filtered_metadata,$metaval);
2264 }
2265 }
2266 $doc_rec->{$metaname} = $filtered_metadata;
2267
2268 # Turn the record back to string
2269 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2270
2271 # Store it back to the database
2272 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2273 my $status = system($cmd);
2274 if ($status != 0) {
2275 my $mess = "Failed to set metadata key: $docid\n";
2276
2277 $mess .= "PATH: $ENV{'PATH'}\n";
2278 $mess .= "cmd = $cmd\n";
2279 $mess .= "Exit status: $status\n";
2280 $mess .= "System Error Message: $!\n";
2281
2282 $gsdl_cgi->generate_error($mess);
2283 }
2284 else {
2285 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2286 $mess .= " $metaname";
2287 $mess .= "->[$metapos]" if (defined $metapos);
2288
2289 $gsdl_cgi->generate_ok_message($mess);
2290 }
2291
2292 #return $status; # in case calling functions have a use for this
2293}
2294
2295sub remove_index_metadata
2296{
2297 my $self = shift @_;
2298
2299 my $username = $self->{'username'};
2300 my $collect = $self->{'collect'};
2301 my $gsdl_cgi = $self->{'gsdl_cgi'};
2302# my $gsdlhome = $self->{'gsdlhome'};
2303
2304 if ($baseaction::authentication_enabled) {
2305 # Ensure the user is allowed to edit this collection
2306 &authenticate_user($gsdl_cgi, $username, $collect);
2307 }
2308
2309 # Obtain the collect dir
2310 my $site = $self->{'site'};
2311 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2312 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2313
2314 # Make sure the collection isn't locked by someone else
2315 $self->lock_collection($username, $collect);
2316
2317 $self->_remove_index_metadata(@_);
2318
2319 # Release the lock once it is done
2320 $self->unlock_collection($username, $collect);
2321}
2322
2323
2324# Was trying to reused the codes, but the functions need to be broken
2325# down more before they can be reused, otherwise there will be too
2326# much overhead and duplicate process...
2327sub insert_metadata
2328{
2329 my $self = shift @_;
2330
2331 my $username = $self->{'username'};
2332 my $collect = $self->{'collect'};
2333 my $gsdl_cgi = $self->{'gsdl_cgi'};
2334 my $gsdlhome = $self->{'gsdlhome'};
2335 my $infodbtype = $self->{'infodbtype'};
2336
2337 # If the import metadata and gdbm database have been updated, we
2338 # need to insert some notification to warn user that the the text
2339 # they see at the moment is not indexed and require a rebuild.
2340 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2341
2342 if ($baseaction::authentication_enabled) {
2343 # Ensure the user is allowed to edit this collection
2344 $self->authenticate_user($username, $collect);
2345 }
2346
2347 # Obtain the collect and archive dir
2348 my $site = $self->{'site'};
2349 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2350 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2351 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2352
2353 # Make sure the collection isn't locked by someone else
2354 $self->lock_collection($username, $collect);
2355
2356 # Check additional args
2357 my $docid = $self->{'d'};
2358 if (!defined($docid)) {
2359 $gsdl_cgi->generate_error("No document id is specified: d=...");
2360 }
2361 my $metaname = $self->{'metaname'};
2362 if (!defined($metaname)) {
2363 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2364 }
2365 my $metavalue = $self->{'metavalue'};
2366 if (!defined($metavalue) || $metavalue eq "") {
2367 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2368 }
2369 # make "accumulate" the default (less destructive, as won't actually
2370 # delete any existing values)
2371 my $metamode = "accumulate";
2372
2373 #=======================================================================#
2374 # set_import_metadata [START]
2375 #=======================================================================#
2376 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2377 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2378 my $metadata_xml_file;
2379 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2380 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2381
2382 # This now stores the full pathname
2383 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2384
2385 # figure out correct metadata.xml file [?]
2386 # Assuming the metadata.xml file is next to the source file
2387 # Note: This will not work if it is using the inherited metadata from the parent folder
2388 my ($import_tailname, $import_dirname)
2389 = File::Basename::fileparse($import_filename);
2390 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2391
2392 # Shane's escape characters
2393 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2394 $metavalue =~ s/\,/&#44;/g;
2395 $metavalue =~ s/\:/&#58;/g;
2396 $metavalue =~ s/\|/&#124;/g;
2397 $metavalue =~ s/\(/&#40;/g;
2398 $metavalue =~ s/\)/&#41;/g;
2399 $metavalue =~ s/\[/&#91;/g;
2400 $metavalue =~ s/\\/&#92;/g;
2401 $metavalue =~ s/\]/&#93;/g;
2402 $metavalue =~ s/\{/&#123;/g;
2403 $metavalue =~ s/\}/&#125;/g;
2404 $metavalue =~ s/\"/&#34;/g;
2405 $metavalue =~ s/\`/&#96;/g;
2406 $metavalue =~ s/\n/_newline_/g;
2407
2408 # Edit the metadata.xml
2409 # Modified by Jeffrey from DL Consulting
2410 # Handle the case where there is one metadata.xml file for multiple FileSets
2411 # The XML filter needs to know whether it is in the right FileSet
2412 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2413 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2414 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2415 $metaname, $metavalue, $metamode, $import_tailname);
2416 #=======================================================================#
2417 # set_import_metadata [END]
2418 #=======================================================================#
2419
2420
2421 #=======================================================================#
2422 # set_metadata (accumulate version) [START]
2423 #=======================================================================#
2424 # To people who know $collect_tail please add some comments
2425 # Obtain path to the database
2426 my $collect_tail = $collect;
2427 $collect_tail =~ s/^.*[\/\\]//;
2428 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2429 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2430
2431 # Read the docid entry
2432 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2433
2434 foreach my $k (keys %$doc_rec) {
2435 my @escaped_v = ();
2436 foreach my $v (@{$doc_rec->{$k}}) {
2437 if ($k eq "contains") {
2438 # protect quotes in ".2;".3 etc
2439 $v =~ s/\"/\\\"/g;
2440 push(@escaped_v, $v);
2441 }
2442 else {
2443 my $ev = &ghtml::unescape_html($v);
2444 $ev =~ s/\"/\\\"/g;
2445 push(@escaped_v, $ev);
2446 }
2447 }
2448 $doc_rec->{$k} = \@escaped_v;
2449 }
2450
2451 # Protect the quotes
2452 $metavalue =~ s/\"/\\\"/g;
2453
2454 # Adds the pending macro
2455 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2456
2457 # If the metadata doesn't exist, create a new one
2458 if (!defined($doc_rec->{$metaname})){
2459 $doc_rec->{$metaname} = [ $macro_metavalue ];
2460 }
2461 # Else, let's acculumate the values
2462 else {
2463 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2464 }
2465
2466 # Generate the record string
2467 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2468
2469 # Store it into GDBM
2470 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2471 my $status = system($cmd);
2472 if ($status != 0) {
2473 # Catch error if gdbmget failed
2474 my $mess = "Failed to set metadata key: $docid\n";
2475
2476 $mess .= "PATH: $ENV{'PATH'}\n";
2477 $mess .= "cmd = $cmd\n";
2478 $mess .= "Exit status: $status\n";
2479 $mess .= "System Error Message: $!\n";
2480
2481 $gsdl_cgi->generate_error($mess);
2482 }
2483 else {
2484 my $mess = "insert-metadata successful: Key[$docid]\n";
2485 $mess .= " [In metadata.xml] $metaname";
2486 $mess .= " = $metavalue\n";
2487 $mess .= " [In database] $metaname";
2488 $mess .= " = $macro_metavalue\n";
2489 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2490 $gsdl_cgi->generate_ok_message($mess);
2491 }
2492 #=======================================================================#
2493 # set_metadata (accumulate version) [END]
2494 #=======================================================================#
2495
2496 # Release the lock once it is done
2497 $self->unlock_collection($username, $collect);
2498}
2499
25001;
Note: See TracBrowser for help on using the repository browser.