source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27160

Last change on this file since 27160 was 27160, checked in by ak19, 11 years ago

Committing bugfix discovered during restructuring of code (the restructured code is still being tested). Previously, the remove_archives_meta subroutine was ignoring any metavalue that was provided (and was instead always deleting the first metavalue for a metaname, unless metapos was explicitly set), whereas the rfdxml_metadata parse function clearly contained code to handle deleting a specified metavalue. Now remove_archives_meta checks for metapos and metavalue, and if both/either is set the specified value(s) are deleted. If neither is set, then metapos defaults to 0 as in the past and the first metavalue for the given metaname is deleted.

File size: 78.0 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-index-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
67
68 #SET METHODS
69 "set-live-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ ] },
72
73 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
74 'compulsory-args' => [ "metaname", "metavalue" ],
75 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
76
77 "set-index-metadata" => {
78 'compulsory-args' => [ "d", "metaname", "metavalue" ],
79 'optional-args' => [ "metapos" ] },
80
81 "set-archives-metadata" => {
82 'compulsory-args' => [ "d", "metaname", "metavalue" ],
83 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
84
85 "set-import-metadata" => {
86 'compulsory-args' => [ "metaname", "metavalue" ],
87 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
88
89 #SET METHODS (ARRAY)
90 "set-metadata-array" => {
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ ] },
93
94 "set-archives-metadata-array" => {
95 'compulsory-args' => [ "json" ],
96 'optional-args' => [ ] },
97
98 "set-import-metadata-array" => {
99 'compulsory-args' => [ "json" ],
100 'optional-args' => [ ] },
101
102 #REMOVE METHODS
103 "remove-import-metadata" => {
104 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
105 'optional-args' => [ ] },
106
107 "remove-archives-metadata" => {
108 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
109 'optional-args' => [ "metapos", "metavalue" ] },
110
111 "remove-live-metadata" => {
112 'compulsory-args' => [ "d", "metaname" ],
113 'optional-args' => [ ] },
114
115 "remove-index-metadata" => {
116 'compulsory-args' => [ "d", "metaname" ],
117 'optional-args' => [ "metapos", "metavalue" ] },
118
119 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
120 'compulsory-args' => [ "d", "metaname" ],
121 'optional-args' => [ "where", "metapos", "metavalue" ] },
122
123 #INSERT METHODS
124 "insert-metadata" => {
125 'compulsory-args' => [ "d", "metaname", "metavalue" ],
126 'optional-args' => [ ] }
127};
128
129
130sub new
131{
132 my $class = shift (@_);
133 my ($gsdl_cgi,$iis6_mode) = @_;
134
135 # Treat metavalue specially. To transmit this through a GET request
136 # the Javascript side has url-encoded it, so here we need to decode
137 # it before proceeding
138
139 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
140 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
141 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
142
143 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
144 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
145
146 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
147
148 return bless $self, $class;
149}
150
151
152sub get_live_metadata
153{
154 my $self = shift @_;
155
156 my $username = $self->{'username'};
157 my $collect = $self->{'collect'};
158 my $gsdl_cgi = $self->{'gsdl_cgi'};
159 my $gsdlhome = $self->{'gsdlhome'};
160 my $infodbtype = $self->{'infodbtype'};
161
162 # live metadata gets/saves value scoped (prefixed) by the current usename
163 # so (for now) let's not bother to enforce authentication
164
165 # Obtain the collect dir
166 my $site = $self->{'site'};
167 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
168 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
169
170 # Make sure the collection isn't locked by someone else
171 $self->lock_collection($username, $collect);
172
173 # look up additional args
174 my $docid = $self->{'d'};
175 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
176 $gsdl_cgi->generate_error("No docid (d=...) specified.");
177 }
178
179 # Generate the dbkey
180 my $metaname = $self->{'metaname'};
181 my $dbkey = "$docid.$metaname";
182
183 # To people who know $collect_tail please add some comments
184 # Obtain path to the database
185 my $collect_tail = $collect;
186 $collect_tail =~ s/^.*[\/|\\]//;
187 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
188 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
189
190 # Obtain the content of the key
191 my $cmd = "gdbmget $infodb_file_path $dbkey";
192 if (open(GIN,"$cmd |") == 0) {
193 # Catch error if gdbmget failed
194 my $mess = "Failed to get metadata key: $metaname\n";
195 $mess .= "$!\n";
196
197 $gsdl_cgi->generate_error($mess);
198 }
199 else {
200 binmode(GIN,":utf8");
201 # Read everything in and concatenate them into $metavalue
202 my $metavalue = "";
203 my $line;
204 while (defined ($line=<GIN>)) {
205 $metavalue .= $line;
206 }
207 close(GIN);
208 chomp($metavalue); # Get rid off the tailing newlines
209 $gsdl_cgi->generate_ok_message("$metavalue");
210 }
211
212 # Release the lock once it is done
213 $self->unlock_collection($username, $collect);
214}
215
216# just calls the index version
217sub get_metadata
218{
219 my $self = shift @_;
220 $self->get_index_metadata(@_);
221}
222
223sub get_index_metadata
224{
225 my $self = shift @_;
226
227 my $username = $self->{'username'};
228 my $collect = $self->{'collect'};
229 my $gsdl_cgi = $self->{'gsdl_cgi'};
230 my $gsdlhome = $self->{'gsdlhome'};
231
232 # Authenticate user if it is enabled
233 if ($baseaction::authentication_enabled) {
234 # Ensure the user is allowed to edit this collection
235 &authenticate_user($gsdl_cgi, $username, $collect);
236 }
237
238 # Obtain the collect dir
239 my $site = $self->{'site'};
240 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
241 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
242
243 # Make sure the collection isn't locked by someone else
244 $self->lock_collection($username, $collect);
245
246 # look up additional args
247 my $docid = $self->{'d'};
248 my $metaname = $self->{'metaname'};
249 my $metapos = $self->{'metapos'};
250 my $infodbtype = $self->{'infodbtype'};
251
252 # To people who know $collect_tail please add some comments
253 # Obtain path to the database
254 my $collect_tail = $collect;
255 $collect_tail =~ s/^.*[\/\\]//;
256 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
257 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
258
259 # Read the docid entry
260 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
261
262 # Basically loop through and unescape_html the values
263 foreach my $k (keys %$doc_rec) {
264 my @escaped_v = ();
265 foreach my $v (@{$doc_rec->{$k}}) {
266 my $ev = &ghtml::unescape_html($v);
267 push(@escaped_v, $ev);
268 }
269 $doc_rec->{$k} = \@escaped_v;
270 }
271
272 # Obtain the specified metadata value
273 $metapos = 0 if (!defined $metapos);
274 my $metavalue = $doc_rec->{$metaname}->[$metapos];
275 $gsdl_cgi->generate_ok_message("$metavalue");
276
277 # Release the lock once it is done
278 $self->unlock_collection($username, $collect);
279}
280
281
282sub get_import_metadata
283{
284 my $self = shift @_;
285
286 my $username = $self->{'username'};
287 my $collect = $self->{'collect'};
288 my $gsdl_cgi = $self->{'gsdl_cgi'};
289 my $gsdlhome = $self->{'gsdlhome'};
290
291 # Authenticate user if it is enabled
292 if ($baseaction::authentication_enabled) {
293 # Ensure the user is allowed to edit this collection
294 &authenticate_user($gsdl_cgi, $username, $collect);
295 }
296
297 # Obtain the collect dir
298 my $site = $self->{'site'};
299 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
300 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
301
302 # Make sure the collection isn't locked by someone else
303 $self->lock_collection($username, $collect);
304
305 # look up additional args
306 my $docid = $self->{'d'};
307 my $metaname = $self->{'metaname'};
308 my $infodbtype = $self->{'infodbtype'};
309 if (!defined $docid)
310 {
311 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
312 }
313
314 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
315 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
316 my $metadata_xml_file;
317 my $import_filename = undef;
318
319
320 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
321 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
322 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
323
324 # This now stores the full pathname
325 $import_filename = $doc_rec->{'src-file'}->[0];
326
327 # figure out correct metadata.xml file [?]
328 # Assuming the metadata.xml file is next to the source file
329 # Note: This will not work if it is using the inherited metadata from the parent folder
330 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
331 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
332
333 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
334
335 # Release the lock once it is done
336 $self->unlock_collection($username, $collect);
337}
338
339sub get_metadata_from_metadata_xml
340{
341 my $self = shift @_;
342 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
343
344 my @rules =
345 (
346 _default => 'raw',
347 'Metadata' => \&gfmxml_metadata,
348 'FileName' => \&mxml_filename
349 );
350
351 my $parser = XML::Rules->new
352 (
353 rules => \@rules,
354 output_encoding => 'utf8'
355 );
356
357 my $xml_in = "";
358 if (!open(MIN,"<$metadata_xml_filename"))
359 {
360 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
361 }
362 else
363 {
364 # Read them in
365 my $line;
366 while (defined ($line=<MIN>)) {
367 $xml_in .= $line;
368 }
369 close(MIN);
370
371 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
372
373 if(defined $parser->{'pad'}->{'metavalue'})
374 {
375 return $parser->{'pad'}->{'metavalue'};
376 }
377 else
378 {
379 return "";
380 }
381 }
382}
383
384sub gfmxml_metadata
385{
386 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
387
388 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
389 {
390 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
391 }
392}
393
394sub get_archives_metadata
395{
396 my $self = shift @_;
397
398 my $username = $self->{'username'};
399 my $collect = $self->{'collect'};
400 my $gsdl_cgi = $self->{'gsdl_cgi'};
401 my $gsdlhome = $self->{'gsdlhome'};
402 my $infodbtype = $self->{'infodbtype'};
403
404 # Authenticate user if it is enabled
405 if ($baseaction::authentication_enabled) {
406 # Ensure the user is allowed to edit this collection
407 &authenticate_user($gsdl_cgi, $username, $collect);
408 }
409
410 # Obtain the collect dir
411 my $site = $self->{'site'};
412 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
413
414 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
415
416 # Make sure the collection isn't locked by someone else
417 $self->lock_collection($username, $collect);
418
419 # look up additional args
420 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
421 $docid_secnum = "" if (!defined $docid_secnum);
422
423 my $metaname = $self->{'metaname'};
424 my $metapos = $self->{'metapos'};
425 $metapos = 0 if (!defined $metapos);
426
427 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
428 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
429
430 # This now stores the full pathname
431 my $doc_filename = $doc_rec->{'doc-file'}->[0];
432
433 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
434
435 # Release the lock once it is done
436 $self->unlock_collection($username, $collect);
437}
438
439sub get_metadata_from_archive_xml
440{
441 my $self = shift @_;
442 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
443
444 my @start_rules = ('Section' => \&dxml_start_section);
445
446 my @rules =
447 (
448 _default => 'raw',
449 'Metadata' => \&gfdxml_metadata
450 );
451
452 my $parser = XML::Rules->new
453 (
454 start_rules => \@start_rules,
455 rules => \@rules,
456 output_encoding => 'utf8'
457 );
458
459 my $xml_in = "";
460 if (!open(MIN,"<$doc_xml_filename"))
461 {
462 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
463 }
464 else
465 {
466 # Read them in
467 my $line;
468 while (defined ($line=<MIN>)) {
469 $xml_in .= $line;
470 }
471 close(MIN);
472
473 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
474
475 if(defined $parser->{'pad'}->{'metavalue'})
476 {
477 return $parser->{'pad'}->{'metavalue'};
478 }
479 else
480 {
481 return "";
482 }
483 }
484}
485
486sub gfdxml_metadata
487{
488 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
489
490 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
491 {
492 return;
493 }
494
495 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
496 {
497 if (!defined $parser->{'parameters'}->{'poscount'})
498 {
499 $parser->{'parameters'}->{'poscount'} = 0;
500 }
501 else
502 {
503 $parser->{'parameters'}->{'poscount'}++;
504 }
505 }
506
507 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
508 {
509 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
510 }
511}
512
513sub _set_live_metadata
514{
515 my $self = shift @_;
516
517 my $collect = $self->{'collect'};
518 my $gsdl_cgi = $self->{'gsdl_cgi'};
519 #my $gsdlhome = $self->{'gsdlhome'};
520 my $infodbtype = $self->{'infodbtype'};
521
522 # Obtain the collect dir
523 my $site = $self->{'site'};
524 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
525 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
526
527
528 # look up additional args
529 my $docid = $self->{'d'};
530 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
531 $gsdl_cgi->generate_error("No docid (d=...) specified.");
532 }
533 my $metavalue = $self->{'metavalue'};
534
535
536 # Generate the dbkey
537 my $metaname = $self->{'metaname'};
538 my $dbkey = "$docid.$metaname";
539
540 # To people who know $collect_tail please add some comments
541 # Obtain path to the database
542 my $collect_tail = $collect;
543 $collect_tail =~ s/^.*[\/\\]//;
544 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
545 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
546
547 # Set the new value
548 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
549 my $status = system($cmd);
550 if ($status != 0) {
551 # Catch error if gdbmget failed
552 my $mess = "Failed to set metadata key: $dbkey\n";
553
554 $mess .= "PATH: $ENV{'PATH'}\n";
555 $mess .= "cmd = $cmd\n";
556 $mess .= "Exit status: $status\n";
557 $mess .= "System Error Message: $!\n";
558
559 $gsdl_cgi->generate_error($mess);
560 }
561 else {
562 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
563 }
564
565 #return $status; # in case calling functions have any further use for this
566}
567
568sub set_live_metadata
569{
570 my $self = shift @_;
571
572 my $username = $self->{'username'};
573 my $collect = $self->{'collect'};
574 my $gsdl_cgi = $self->{'gsdl_cgi'};
575
576 if ($baseaction::authentication_enabled) {
577 # Ensure the user is allowed to edit this collection
578 &authenticate_user($gsdl_cgi, $username, $collect);
579 }
580
581 # Make sure the collection isn't locked by someone else
582 $self->lock_collection($username, $collect);
583
584 $self->_set_live_metadata(@_);
585
586 # Release the lock once it is done
587 $self->unlock_collection($username, $collect);
588}
589
590sub set_metadata_entry
591{
592 my $self = shift @_;
593 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
594
595 # To people who know $collect_tail please add some comments
596 # Obtain path to the database
597 my $collect_tail = $collect;
598 $collect_tail =~ s/^.*[\/\\]//;
599 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
600 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
601
602# print STDERR "**** infodb file path = $infodb_file_path\n";
603# print STDERR "***** infodb type = $infodbtype\n";
604
605 # Read the docid entry
606 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
607
608 # Set the metadata value
609 if (defined $metapos) {
610 $doc_rec->{$metaname}->[$metapos] = $metavalue;
611 }
612 else {
613 $doc_rec->{$metaname} = [ $metavalue ];
614 }
615
616 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
617
618 return $status;
619
620}
621
622sub _set_import_metadata
623{
624 my $self = shift @_;
625
626 my $collect = $self->{'collect'};
627 my $gsdl_cgi = $self->{'gsdl_cgi'};
628 my $infodbtype = $self->{'infodbtype'};
629# my $gsdlhome = $self->{'gsdlhome'};
630
631 # Obtain the collect and archive dir
632 my $site = $self->{'site'};
633 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
634 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
635 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
636
637 # look up additional args
638 # want either d= or f=
639 my $docid = $self->{'d'};
640 my $import_file = $self->{'f'};
641 if ((!defined $docid) && (!defined $import_file)) {
642 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
643 }
644
645 # Get the parameters and set default mode to "accumulate"
646 my $metaname = $self->{'metaname'};
647 my $metavalue = $self->{'metavalue'};
648## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
649 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
650
651 my $metamode = $self->{'metamode'};
652 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
653 # make "accumulate" the default (less destructive, as won't actually
654 # delete any existing values)
655 $metamode = "accumulate";
656 }
657
658 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
659 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
660 my $metadata_xml_file;
661 my $import_filename = undef;
662 if (defined $docid) {
663 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
664 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
665
666 # This now stores the full pathname
667 $import_filename = $doc_rec->{'src-file'}->[0];
668 }
669 else {
670 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
671 }
672
673 # figure out correct metadata.xml file [?]
674 # Assuming the metadata.xml file is next to the source file
675 # Note: This will not work if it is using the inherited metadata from the parent folder
676 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
677 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
678
679 # Edit the metadata.xml
680 # Modified by Jeffrey from DL Consulting
681 # Handle the case where there is one metadata.xml file for multiple FileSets
682 # The XML filter needs to know whether it is in the right FileSet
683 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
684 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
685 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
686 $metaname, $metavalue, $metamode, $import_tailname);
687
688
689 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
690 $mess .= " $metaname";
691 $mess .= " = $metavalue";
692 $mess .= " ($metamode)\n";
693
694 $gsdl_cgi->generate_ok_message($mess);
695
696 #return $status; # in case calling functions have any further use for this
697}
698
699# the version of set_index_meta that doesn't do authentication
700sub _set_archives_metadata
701{
702 my $self = shift @_;
703
704 my $collect = $self->{'collect'};
705 my $gsdl_cgi = $self->{'gsdl_cgi'};
706 my $infodbtype = $self->{'infodbtype'};
707
708 # Obtain the collect and archive dir
709 my $site = $self->{'site'};
710 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
711 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
712
713 # look up additional args
714 my $docid = $self->{'d'};
715 my $metaname = $self->{'metaname'};
716 my $metavalue = $self->{'metavalue'};
717 my $prevmetavalue = $self->{'prevmetavalue'};
718
719 my $metapos = $self->{'metapos'};
720 $metapos = 0 if (!defined $metapos);
721
722 my $metamode = $self->{'metamode'};
723 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
724 # make "accumulate" the default (less destructive, as won't actually
725 # delete any existing values)
726 $metamode = "accumulate";
727 }
728
729 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
730 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
731
732 if ($status == 0) {
733 my $mess = "set-archives-metadata successful: Key[$docid]\n";
734 $mess .= " $metaname";
735 $mess .= "->[$metapos]" if (defined $metapos);
736 $mess .= " = $metavalue";
737 $mess .= " ($metamode)\n";
738
739 $gsdl_cgi->generate_ok_message($mess);
740 }
741 else {
742 my $mess .= "Failed to set archives metadata key: $docid\n";
743 $mess .= "Exit status: $status\n";
744 if(defined $self->{'error_msg'}) {
745 $mess .= "Error Message: $self->{'error_msg'}\n";
746 } else {
747 $mess .= "System Error Message: $!\n";
748 }
749 $mess .= "-" x 20 . "\n";
750
751 $gsdl_cgi->generate_error($mess);
752 }
753
754 #return $status; # in case calling functions have any further use for this
755}
756
757
758# the version of set_index_meta that doesn't do authentication
759sub _set_index_metadata
760{
761 my $self = shift @_;
762
763 my $collect = $self->{'collect'};
764 my $gsdl_cgi = $self->{'gsdl_cgi'};
765
766 my $site = $self->{'site'};
767 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
768
769 # look up additional args
770 my $docid = $self->{'d'};
771 my $metaname = $self->{'metaname'};
772 my $metapos = $self->{'metapos'};
773 my $metavalue = $self->{'metavalue'};
774 my $infodbtype = $self->{'infodbtype'};
775
776 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
777
778 if ($status != 0) {
779 # Catch error if set infodb entry failed
780 my $mess = "Failed to set metadata key: $docid\n";
781
782 $mess .= "PATH: $ENV{'PATH'}\n";
783 $mess .= "Exit status: $status\n";
784 $mess .= "System Error Message: $!\n";
785
786 $gsdl_cgi->generate_error($mess);
787 }
788 else {
789 my $mess = "set-index-metadata successful: Key[$docid]\n";
790 $mess .= " $metaname";
791 $mess .= "->[$metapos]" if (defined $metapos);
792 $mess .= " = $metavalue\n";
793
794 $gsdl_cgi->generate_ok_message($mess);
795 }
796
797 #return $status; # in case calling functions have any further use for this
798}
799
800sub set_index_metadata
801{
802 my $self = shift @_;
803
804 my $username = $self->{'username'};
805 my $collect = $self->{'collect'};
806 my $gsdl_cgi = $self->{'gsdl_cgi'};
807 #my $gsdlhome = $self->{'gsdlhome'};
808
809 if ($baseaction::authentication_enabled) {
810 # Ensure the user is allowed to edit this collection
811 &authenticate_user($gsdl_cgi, $username, $collect);
812 }
813
814 my $site = $self->{'site'};
815 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
816
817 $gsdl_cgi->checked_chdir($collect_dir);
818
819 # Obtain the collect dir
820 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
821
822 # Make sure the collection isn't locked by someone else
823 $self->lock_collection($username, $collect);
824
825 $self->_set_index_metadata(@_);
826
827 # Release the lock once it is done
828 $self->unlock_collection($username, $collect);
829}
830
831# call this to set the metadata for a combination of dirs archives, import or index, or live
832# if none specified, defaults to index which was the original behaviour of set_metadata.
833sub set_metadata
834{
835 my $self = shift @_;
836
837 # Testing that not defining a variable, setting it to "" or to " " all return false
838 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
839
840 my $where = $self->{'where'};
841 if(!$where) {
842 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
843 return;
844 }
845
846 # authenticate and lock collection once, even if processing multiple dirs
847 my $username = $self->{'username'};
848 my $collect = $self->{'collect'};
849 my $gsdl_cgi = $self->{'gsdl_cgi'};
850
851 if ($baseaction::authentication_enabled) {
852 # Ensure the user is allowed to edit this collection
853 #&authenticate_user($gsdl_cgi, $username, $collect);
854 $self->authenticate_user($username, $collect);
855 }
856
857 if($where =~ m/index/) {
858 my $site = $self->{'site'};
859 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
860 $gsdl_cgi->checked_chdir($collect_dir);
861 }
862
863 # Make sure the collection isn't locked by someone else
864 $self->lock_collection($username, $collect);
865
866
867 # now at last can set the metadata. $where can specify multiple
868 # $where is of the form: import|archives|index, or a subset thereof
869
870 #my @whichdirs = split('\|', $where);
871
872 # just check whether $where contains import/archives/index/live in turn, and
873 # for each case, process it accordingly
874 if($where =~ m/import/) {
875 $self->_set_import_metadata(@_);
876 }
877
878 if($where =~ m/archives/) {
879
880 # look up docID arg which is optional to set_metadata because it's optional
881 # to set_import, but which is compulsory to set_archives_metadata
882 my $docid = $self->{'d'};
883 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
884 $gsdl_cgi->generate_error("No docid (d=...) specified.");
885 }
886 else { # we have a docid, so can set archives meta
887 $self->_set_archives_metadata(@_);
888 }
889 }
890
891 if($where =~ m/index/) {
892
893 # look up docID arg which is optional to set_metadata because it's optional
894 # to set_import, but which is compulsory to set_archives_metadata
895 my $docid = $self->{'d'};
896 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
897 $gsdl_cgi->generate_error("No docid (d=...) specified.");
898 }
899 else { # we have a docid, so can set index meta
900 $self->_set_index_metadata(@_);
901 }
902 }
903
904 if($where =~ m/live/) {
905 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
906 }
907
908 # Release the lock once it is done
909 $self->unlock_collection($username, $collect);
910}
911
912sub set_metadata_array
913{
914 my $self = shift @_;
915
916 my $username = $self->{'username'};
917 my $collect = $self->{'collect'};
918 my $gsdl_cgi = $self->{'gsdl_cgi'};
919 my $gsdlhome = $self->{'gsdlhome'};
920
921 if ($baseaction::authentication_enabled) {
922 # Ensure the user is allowed to edit this collection
923 &authenticate_user($gsdl_cgi, $username, $collect);
924 }
925
926 my $site = $self->{'site'};
927 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
928
929 $gsdl_cgi->checked_chdir($collect_dir);
930
931 # Obtain the collect dir
932 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
933
934 # Make sure the collection isn't locked by someone else
935 $self->lock_collection($username, $collect);
936
937 # look up additional args
938
939 my $infodbtype = $self->{'infodbtype'};
940
941 my $json_str = $self->{'json'};
942 my $doc_array = decode_json $json_str;
943
944
945 my $global_status = 0;
946 my $global_mess = "";
947
948 my @all_docids = ();
949
950 foreach my $doc_array_rec ( @$doc_array ) {
951
952 my $docid = $doc_array_rec->{'docid'};
953 my $metaname = $doc_array_rec->{'metaname'};
954 my $metapos = $doc_array_rec->{'metapos'};
955 my $metavalue = $doc_array_rec->{'metavalue'};
956
957 push(@all_docids,$docid);
958
959 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
960
961 if ($status != 0) {
962 # Catch error if set infodb entry failed
963 $global_status = $status;
964 $global_mess .= "Failed to set metadata key: $docid\n";
965 $global_mess .= "Exit status: $status\n";
966 $global_mess .= "System Error Message: $!\n";
967 $global_mess .= "-" x 20;
968 }
969 }
970
971 if ($global_status != 0) {
972 $global_mess .= "PATH: $ENV{'PATH'}\n";
973 $gsdl_cgi->generate_error($global_mess);
974 }
975 else {
976 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
977 $gsdl_cgi->generate_ok_message($mess);
978 }
979
980 # Release the lock once it is done
981 $self->unlock_collection($username, $collect);
982}
983
984
985sub dxml_metadata
986{
987 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
988 my $metaname = $parser->{'parameters'}->{'metaname'};
989 my $metamode = $parser->{'parameters'}->{'metamode'};
990
991 print STDERR "**** Processing closing </Metadata> tag\n";
992
993 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
994
995 # Find the right metadata tag and checks if we are going to
996 # override it
997 #
998 # Note: This over writes the first metadata block it
999 # encountered. If there are multiple Sections in the doc.xml, it
1000 # might not behave as you would expect
1001
1002 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1003## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1004## print STDERR "**** metamode = $metamode\n";
1005
1006 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1007 {
1008 my $name_attr = $attrHash->{'name'};
1009 if (($name_attr eq $metaname) && ($metamode eq "override"))
1010 {
1011 if (!defined $parser->{'parameters'}->{'poscount'})
1012 {
1013 $parser->{'parameters'}->{'poscount'} = 0;
1014 }
1015 else
1016 {
1017 $parser->{'parameters'}->{'poscount'}++;
1018 }
1019
1020 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1021 {
1022 ##print STDERR "#### got match!!\n";
1023 # Get the value and override the current value
1024 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1025 $attrHash->{'_content'} = $metavalue;
1026
1027 # Don't want it to wipe out any other pieces of metadata
1028 $parser->{'parameters'}->{'metamode'} = "done";
1029 }
1030 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1031 {
1032 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1033 $attrHash->{'_content'} = $metavalue;
1034 $parser->{'parameters'}->{'metamode'} = "done";
1035 }
1036 }
1037 }
1038
1039 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1040 return [$tagname => $attrHash];
1041}
1042
1043# This method exists purely for catching invalid section numbers that the client
1044# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1045# we've seen all the Sections in the doc.xml, and none of their section nums matched
1046# if the metamode has not been set to 'done' by then.
1047sub dxml_archive
1048{
1049 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1050 my $metamode = $parser->{'parameters'}->{'metamode'};
1051
1052 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1053 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1054
1055# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1056
1057 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1058 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1059
1060 my $metaname = $parser->{'parameters'}->{'metaname'};
1061 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1062
1063 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1064 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1065 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1066 print STDERR "\tin section $opt_doc_secnum.\n";
1067 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1068 }
1069
1070 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1071 return [$tagname => $attrHash];
1072}
1073
1074sub dxml_description
1075{
1076 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1077 my $metamode = $parser->{'parameters'}->{'metamode'};
1078
1079 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1080 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1081
1082 print STDERR "**** Processing a closing </Description> tag \n";
1083# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1084
1085 # Accumulate the metadata
1086
1087 # We'll be accumulating metadata at this point if we haven't found and therefore
1088 # haven't processed the metadata yet.
1089 # For subsections, this means that if we're at a matching subsection, but haven't
1090 # found the correct metaname to override in that subsection, we accumulate it as new
1091 # meta in the subsection by adding it to the current description.
1092 # If there's no subsection info for the metadata, it will accumulate at the top level
1093 # section description if we hadn't found a matching metaname to override at this point.
1094
1095 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1096 # at the toplevel section. In the latter case, it means we want to process meta in the
1097 # toplevel section. So the eq check between the values below will work in all cases.
1098
1099 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1100 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1101 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1102
1103 if ($curr_secnum eq $opt_doc_secnum
1104 && ($metamode eq "accumulate" || $metamode eq "override")) {
1105 if ($metamode eq "override") {
1106 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1107 }
1108
1109 # If get to here and metamode is override, the this means there
1110 # was no existing value to overide => treat as an append operation
1111
1112 # Tack a new metadata tag on to the end of the <Metadata>+ block
1113 my $metaname = $parser->{'parameters'}->{'metaname'};
1114 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1115
1116 my $metadata_attr = {
1117 '_content' => $metavalue,
1118 'name' => $metaname,
1119 'mode' => "accumulate"
1120 };
1121
1122 my $append_metadata = [ "Metadata" => $metadata_attr ];
1123 my $description_content = $attrHash->{'_content'};
1124
1125 print "Appending metadata to doc.xml\n";
1126
1127 if (ref($description_content)) {
1128 # got some existing interesting nested content
1129 push(@$description_content, " ", $append_metadata ,"\n ");
1130 }
1131 else {
1132 #description_content is most likely a string such as "\n"
1133 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1134 }
1135
1136 $parser->{'parameters'}->{'metamode'} = "done";
1137 }
1138 else {
1139 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1140## print STDERR "**** NOT ACCUMULATE?!? \n";
1141 }
1142
1143 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1144 return [$tagname => $attrHash];
1145}
1146
1147
1148sub dxml_start_section
1149{
1150 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1151
1152 my $new_depth = scalar(@$contextArray);
1153
1154 print STDERR "**** START SECTION \n";
1155
1156 if ($new_depth == 1) {
1157 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1158 $parser->{'parameters'}->{'curr_section_num'} = "";
1159 }
1160
1161 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1162 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1163
1164 my $new_secnum;
1165
1166 if ($new_depth > $old_depth) {
1167 # child subsection
1168 $new_secnum = "$old_secnum.1";
1169 }
1170 elsif ($new_depth == $old_depth) {
1171 # sibling section => increase it's value by 1
1172 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1173 $tail_num++;
1174 $new_secnum = $old_secnum;
1175 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1176 }
1177 else {
1178 # back up to parent section => lopp off tail
1179 $new_secnum = $old_secnum;
1180 $new_secnum =~ s/\.\d+$//;
1181 }
1182
1183 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1184 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1185
1186 1;
1187}
1188
1189sub edit_xml_file
1190{
1191 my $self = shift @_;
1192 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1193
1194 # use XML::Rules to add it in (read in and out again)
1195 my $parser = XML::Rules->new(start_rules => $start_rules,
1196 rules => $rules,
1197 style => 'filter',
1198 output_encoding => 'utf8' );
1199
1200 my $xml_in = "";
1201 if (!open(MIN,"<$filename")) {
1202 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1203 }
1204 else {
1205 # Read all the text in
1206 my $line;
1207 while (defined ($line=<MIN>)) {
1208 $xml_in .= $line;
1209 }
1210 close(MIN);
1211
1212 my $MOUT;
1213 if (!open($MOUT,">$filename")) {
1214 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1215 }
1216 else {
1217 # Matched lines will get handled by the call backs
1218## my $xml_out = "";
1219
1220 binmode($MOUT,":utf8");
1221 $parser->filter($xml_in,$MOUT, $options);
1222
1223# binmode(MOUT,":utf8");
1224# print MOUT $xml_out;
1225 close($MOUT);
1226 }
1227 }
1228
1229 # copy across any custom error information that was stored during parsing
1230 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1231}
1232
1233sub edit_doc_xml
1234{
1235 my $self = shift @_;
1236 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1237
1238 my $info_mess = <<RAWEND;
1239****************************
1240 edit_doc_xml()
1241****************************
1242doc_xml_filename = $doc_xml_filename
1243metaname = $metaname
1244metavalue = $metavalue
1245metapos = $metapos
1246metamode = $metamode
1247opt_secnum = $opt_secnum
1248prevmetavalue = $prevmetavalue
1249****************************
1250RAWEND
1251
1252 $gsdl_cgi->generate_message($info_mess);
1253
1254 # To monitor which section/subsection number we are in
1255 my @start_rules =
1256 ( 'Section' => \&dxml_start_section );
1257
1258 # use XML::Rules to add it in (read in and out again)
1259 # Set the call back functions
1260 my @rules =
1261 ( _default => 'raw',
1262 'Metadata' => \&dxml_metadata,
1263 'Description' => \&dxml_description,
1264 'Archive' => \&dxml_archive); # just for catching errors at end
1265
1266 # Sets the parameters
1267 my $options = { 'metaname' => $metaname,
1268 'metapos' => $metapos,
1269 'metavalue' => $metavalue,
1270 'metamode' => $metamode,
1271 'prevmetavalue' => $prevmetavalue };
1272
1273 if (defined $opt_secnum) {
1274 $options->{'secnum'} = $opt_secnum;
1275 }
1276
1277 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1278}
1279
1280sub set_archives_metadata_entry
1281{
1282 my $self = shift @_;
1283 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1284
1285 my $info_mess = <<RAWEND;
1286****************************
1287 set_archives_metadata_entry()
1288****************************
1289archive_dir = $archive_dir
1290collect_dir = $collect_dir
1291collect = $collect
1292infodbtype = $infodbtype
1293docid = $docid
1294metaname = $metaname
1295metapos = $metapos
1296metavalue = $metavalue
1297metamode = $metamode
1298prevmetavalue = $prevmetavalue
1299****************************
1300RAWEND
1301
1302 $gsdl_cgi->generate_message($info_mess);
1303
1304 # Obtain the doc.xml path for the specified docID
1305 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1306
1307 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1308 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1309 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1310
1311 # The $doc_xml_file is relative to the archives, and now let's get the full path
1312 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1313 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1314
1315 # Edit the doc.xml file with the specified metadata name, value and position.
1316 # TODO: there is a potential problem here as this edit_doc_xml function
1317 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1318 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1319
1320 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1321 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1322
1323 # return 0; # return 0 for now to indicate no error
1324 return (defined $self->{'error_msg'}) ? 1 : 0;
1325}
1326
1327
1328sub set_archives_metadata
1329{
1330 my $self = shift @_;
1331
1332 my $username = $self->{'username'};
1333 my $collect = $self->{'collect'};
1334 my $gsdl_cgi = $self->{'gsdl_cgi'};
1335
1336 if ($baseaction::authentication_enabled) {
1337 # Ensure the user is allowed to edit this collection
1338 $self->authenticate_user($username, $collect);
1339 }
1340
1341 # Make sure the collection isn't locked by someone else
1342 $self->lock_collection($username, $collect);
1343
1344 $self->_set_archives_metadata(@_);
1345
1346 # Release the lock once it is done
1347 $self->unlock_collection($username, $collect);
1348}
1349
1350
1351sub set_archives_metadata_array
1352{
1353 my $self = shift @_;
1354
1355 my $username = $self->{'username'};
1356 my $collect = $self->{'collect'};
1357 my $gsdl_cgi = $self->{'gsdl_cgi'};
1358 my $gsdlhome = $self->{'gsdlhome'};
1359
1360 if ($baseaction::authentication_enabled) {
1361 # Ensure the user is allowed to edit this collection
1362 &authenticate_user($gsdl_cgi, $username, $collect);
1363 }
1364
1365 my $site = $self->{'site'};
1366 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1367
1368 $gsdl_cgi->checked_chdir($collect_dir);
1369
1370 # Obtain the collect dir
1371 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1372
1373 # Make sure the collection isn't locked by someone else
1374 $self->lock_collection($username, $collect);
1375
1376 # look up additional args
1377
1378 my $infodbtype = $self->{'infodbtype'};
1379
1380 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1381
1382 my $json_str = $self->{'json'};
1383 my $doc_array = decode_json $json_str;
1384
1385
1386 my $global_status = 0;
1387 my $global_mess = "";
1388
1389 my @all_docids = ();
1390
1391 foreach my $doc_array_rec ( @$doc_array ) {
1392
1393 my $docid = $doc_array_rec->{'docid'};
1394 my $metaname = $doc_array_rec->{'metaname'};
1395 my $metapos = $doc_array_rec->{'metapos'};
1396 my $metamode = $self->{'metamode'};
1397 my $metavalue = $doc_array_rec->{'metavalue'};
1398
1399 # Some sanity checks
1400 $metapos = 0 if (!defined $metapos);
1401
1402 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1403 # make "accumulate" the default (less destructive, as won't actually
1404 # delete any existing values)
1405 $metamode = "accumulate";
1406 }
1407
1408 push(@all_docids,$docid);
1409
1410 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1411 $metaname,$metapos,$metavalue,$metamode);
1412
1413 if ($status != 0) {
1414 # Catch error if set infodb entry failed
1415 $global_status = $status;
1416 $global_mess .= "Failed to set metadata key: $docid\n";
1417 $global_mess .= "Exit status: $status\n";
1418 $global_mess .= "System Error Message: $!\n";
1419 $global_mess .= "-" x 20 . "\n";
1420 }
1421 }
1422
1423 if ($global_status != 0) {
1424 $global_mess .= "PATH: $ENV{'PATH'}\n";
1425 $gsdl_cgi->generate_error($global_mess);
1426 }
1427 else {
1428 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1429 $gsdl_cgi->generate_ok_message($mess);
1430 }
1431
1432 # Release the lock once it is done
1433 $self->unlock_collection($username, $collect);
1434}
1435
1436sub _remove_archives_metadata
1437{
1438 my $self = shift @_;
1439
1440 my $collect = $self->{'collect'};
1441 my $gsdl_cgi = $self->{'gsdl_cgi'};
1442# my $gsdlhome = $self->{'gsdlhome'};
1443 my $infodbtype = $self->{'infodbtype'};
1444
1445 my $site = $self->{'site'};
1446
1447 # Obtain the collect and archive dir
1448 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1449
1450 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1451
1452 # look up additional args
1453 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1454
1455 my $metaname = $self->{'metaname'};
1456 my $metapos = $self->{'metapos'};
1457 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1458
1459 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1460 # default to deleting the first metavalue for the given metaname
1461 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1462 # seem to get deleted in one single remove_archives_meta action invocation.
1463 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1464 # deleted, both get deleted.
1465 if(!defined $metapos && !defined $metavalue) {
1466 $metapos = 0;
1467 }
1468
1469 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1470 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1471
1472 # This now stores the full pathname
1473 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1474
1475 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum);
1476# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1477
1478 if ($status == 0)
1479 {
1480 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1481 $mess .= " $metaname";
1482 $mess .= "->[$metapos]" if (defined $metapos);
1483
1484 $gsdl_cgi->generate_ok_message($mess);
1485 }
1486 else
1487 {
1488 my $mess .= "Failed to remove archives metadata key: $docid\n";
1489 $mess .= "Exit status: $status\n";
1490 $mess .= "System Error Message: $!\n";
1491 $mess .= "-" x 20 . "\n";
1492
1493 $gsdl_cgi->generate_error($mess);
1494 }
1495
1496 #return $status; # in case calling functions have a use for this
1497}
1498
1499sub remove_archives_metadata
1500{
1501 my $self = shift @_;
1502
1503 my $username = $self->{'username'};
1504 my $collect = $self->{'collect'};
1505 my $gsdl_cgi = $self->{'gsdl_cgi'};
1506
1507 if ($baseaction::authentication_enabled)
1508 {
1509 # Ensure the user is allowed to edit this collection
1510 &authenticate_user($gsdl_cgi, $username, $collect);
1511 }
1512
1513 # Make sure the collection isn't locked by someone else
1514 $self->lock_collection($username, $collect);
1515
1516 $self->_remove_archives_metadata(@_);
1517
1518 # Release the lock once it is done
1519 $self->unlock_collection($username, $collect);
1520}
1521
1522sub remove_from_doc_xml
1523{
1524 my $self = shift @_;
1525 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
1526
1527 my @start_rules = ('Section' => \&dxml_start_section);
1528
1529 # Set the call-back functions for the metadata tags
1530 my @rules =
1531 (
1532 _default => 'raw',
1533 'Metadata' => \&rfdxml_metadata
1534 );
1535
1536 my $parser = XML::Rules->new
1537 (
1538 start_rules => \@start_rules,
1539 rules => \@rules,
1540 style => 'filter',
1541 output_encoding => 'utf8'
1542 );
1543
1544 my $status = 0;
1545 my $xml_in = "";
1546 if (!open(MIN,"<$doc_xml_filename"))
1547 {
1548 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1549 $status = 1;
1550 }
1551 else
1552 {
1553 # Read them in
1554 my $line;
1555 while (defined ($line=<MIN>)) {
1556 $xml_in .= $line;
1557 }
1558 close(MIN);
1559
1560 # Filter with the call-back functions
1561 my $xml_out = "";
1562
1563 my $MOUT;
1564 if (!open($MOUT,">$doc_xml_filename")) {
1565 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1566 $status = 1;
1567 }
1568 else {
1569 binmode($MOUT,":utf8");
1570 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
1571 close($MOUT);
1572 }
1573 }
1574 return $status;
1575}
1576
1577sub rfdxml_metadata
1578{
1579 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1580
1581 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1582 {
1583 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1584 return [$tagname => $attrHash];
1585 }
1586
1587 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1588 {
1589 if (!defined $parser->{'parameters'}->{'poscount'})
1590 {
1591 $parser->{'parameters'}->{'poscount'} = 0;
1592 }
1593 else
1594 {
1595 $parser->{'parameters'}->{'poscount'}++;
1596 }
1597 }
1598
1599 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1600 {
1601 return [];
1602 }
1603
1604 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1605 {
1606 return [];
1607 }
1608
1609 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1610 return [$tagname => $attrHash];
1611}
1612
1613sub mxml_metadata
1614{
1615 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1616 my $metaname = $parser->{'parameters'}->{'metaname'};
1617 my $metamode = $parser->{'parameters'}->{'metamode'};
1618
1619 # Report error if we don't see FileName tag before this
1620 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1621
1622 # Don't do anything if we are not in the right FileSet
1623 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1624 if ($file_regexp =~ /\.\*/) {
1625 # Only interested in a file_regexp if it specifies precisely one
1626 # file.
1627 # So, skip anything with a .* in it as it is too general
1628 return [$tagname => $attrHash];
1629 }
1630 my $src_file = $parser->{'parameters'}->{'src_file'};
1631 if (!($src_file =~ /$file_regexp/)) {
1632 return [$tagname => $attrHash];
1633 }
1634## print STDERR "*** mxl metamode = $metamode\n";
1635
1636 # Find the right metadata tag and checks if we are going to override it
1637 my $name_attr = $attrHash->{'name'};
1638 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1639 # Get the value and override the current value
1640 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1641 $attrHash->{'_content'} = $metavalue;
1642
1643## print STDERR "**** overrideing metadata.xml\n";
1644
1645 # Don't want it to wipe out any other pieces of metadata
1646 $parser->{'parameters'}->{'metamode'} = "done";
1647 }
1648
1649 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1650 return [$tagname => $attrHash];
1651}
1652
1653
1654sub mxml_description
1655{
1656 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1657 my $metamode = $parser->{'parameters'}->{'metamode'};
1658
1659 # Failed... Report error if we don't see FileName tag before this
1660 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1661
1662 # Don't do anything if we are not in the right FileSet
1663 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1664 if ($file_regexp =~ m/\.\*/) {
1665 # Only interested in a file_regexp if it specifies precisely one
1666 # file.
1667 # So, skip anything with a .* in it as it is too general
1668 return [$tagname => $attrHash];
1669 }
1670 my $src_file = $parser->{'parameters'}->{'src_file'};
1671
1672 if (!($src_file =~ m/$file_regexp/)) {
1673 return [$tagname => $attrHash];
1674 }
1675
1676 # Accumulate the metadata block to the end of the description block
1677 # Note: This adds metadata block to all description blocks, so if there are
1678 # multiple FileSets, it will add to all of them
1679 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1680 # if metamode was "override" but get to here then it failed to
1681 # find an item to override, in which case it should append its
1682 # value to the end, just like the "accumulate" mode
1683
1684 # tack a new metadata tag on to the end of the <Metadata>+ block
1685 my $metaname = $parser->{'parameters'}->{'metaname'};
1686 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1687
1688 my $metadata_attr = { '_content' => $metavalue,
1689 'name' => $metaname,
1690 'mode' => "accumulate" };
1691
1692 my $append_metadata = [ "Metadata" => $metadata_attr ];
1693 my $description_content = $attrHash->{'_content'};
1694
1695## print STDERR "*** appending to metadata.xml\n";
1696
1697 # append the new metadata element to the end of the current
1698 # content contained inside this tag
1699 if (ref($description_content) eq "") {
1700 # => string or numeric literal
1701 # this is caused by a <Description> block has no <Metadata> child elements
1702 # => set up an empty array in '_content'
1703 $attrHash->{'_content'} = [ "\n" ];
1704 $description_content = $attrHash->{'_content'};
1705 }
1706
1707 push(@$description_content," ", $append_metadata ,"\n ");
1708 $parser->{'parameters'}->{'metamode'} = "done";
1709 }
1710
1711 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1712 return [$tagname => $attrHash];
1713}
1714
1715
1716sub mxml_filename
1717{
1718 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1719
1720 # Store the filename of the Current Fileset
1721 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1722 # FileName tag must come before Description tag
1723 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1724
1725 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1726 return [$tagname => $attrHash];
1727}
1728
1729
1730sub mxml_fileset
1731{
1732 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1733
1734 # Initilise the current_file
1735 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1736 # FileName tag must come before Description tag
1737 $parser->{'parameters'}->{'current_file'} = "";
1738
1739 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1740 return [$tagname => $attrHash];
1741}
1742
1743
1744sub edit_metadata_xml
1745{
1746 my $self = shift @_;
1747 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
1748
1749 # Set the call-back functions for the metadata tags
1750 my @rules =
1751 ( _default => 'raw',
1752 'FileName' => \&mxml_filename,
1753 'Metadata' => \&mxml_metadata,
1754 'Description' => \&mxml_description,
1755 'FileSet' => \&mxml_fileset);
1756
1757 # use XML::Rules to add it in (read in and out again)
1758 my $parser = XML::Rules->new(rules => \@rules,
1759 style => 'filter',
1760 output_encoding => 'utf8');
1761
1762 if (!-e $metadata_xml_filename) {
1763
1764 if (open(MOUT,">$metadata_xml_filename")) {
1765
1766 my $src_file_re = &util::filename_to_regex($src_file);
1767 # shouldn't the following also be in the above utility routine??
1768 # $src_file_re =~ s/\./\\./g;
1769
1770 print MOUT "<?xml version=\"1.0\"?>\n";
1771 print MOUT "<DirectoryMetadata>\n";
1772 print MOUT " <FileSet>\n";
1773 print MOUT " <FileName>$src_file_re</FileName>\n";
1774 print MOUT " <Description>\n";
1775 print MOUT " </Description>\n";
1776 print MOUT " </FileSet>\n";
1777 print MOUT "</DirectoryMetadata>\n";
1778
1779 close(MOUT);
1780 }
1781 else {
1782 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1783 }
1784 }
1785
1786
1787 my $xml_in = "";
1788 if (!open(MIN,"<$metadata_xml_filename")) {
1789 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1790 }
1791 else {
1792 # Read them in
1793 my $line;
1794 while (defined ($line=<MIN>)) {
1795 $xml_in .= $line;
1796 }
1797 close(MIN);
1798
1799 # Filter with the call-back functions
1800 my $xml_out = "";
1801
1802 my $MOUT;
1803 if (!open($MOUT,">$metadata_xml_filename")) {
1804 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1805 }
1806 else {
1807 binmode($MOUT,":utf8");
1808
1809 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1810 # At the moment, I will just hack it!
1811 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1812 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1813 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1814 #print MOUT $xml_out;
1815
1816 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1817 metavalue => $metavalue,
1818 metamode => $metamode,
1819 src_file => $src_file,
1820 current_file => undef} );
1821 close($MOUT);
1822 }
1823 }
1824}
1825
1826
1827sub set_import_metadata
1828{
1829 my $self = shift @_;
1830
1831 my $username = $self->{'username'};
1832 my $collect = $self->{'collect'};
1833 my $gsdl_cgi = $self->{'gsdl_cgi'};
1834
1835 if ($baseaction::authentication_enabled) {
1836 # Ensure the user is allowed to edit this collection
1837 $self->authenticate_user($username, $collect);
1838 }
1839
1840 # Make sure the collection isn't locked by someone else
1841 $self->lock_collection($username, $collect);
1842
1843 $self->_set_import_metadata(@_);
1844
1845 # Release the lock once it is done
1846 $self->unlock_collection($username, $collect);
1847
1848}
1849
1850sub set_import_metadata_array
1851{
1852 my $self = shift @_;
1853
1854 my $username = $self->{'username'};
1855 my $collect = $self->{'collect'};
1856 my $gsdl_cgi = $self->{'gsdl_cgi'};
1857 my $gsdlhome = $self->{'gsdlhome'};
1858
1859 if ($baseaction::authentication_enabled) {
1860 # Ensure the user is allowed to edit this collection
1861 &authenticate_user($gsdl_cgi, $username, $collect);
1862 }
1863
1864 my $site = $self->{'site'};
1865 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1866
1867 $gsdl_cgi->checked_chdir($collect_dir);
1868
1869 # Make sure the collection isn't locked by someone else
1870 $self->lock_collection($username, $collect);
1871
1872 # look up additional args
1873
1874 my $infodbtype = $self->{'infodbtype'};
1875
1876 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1877
1878 my $json_str = $self->{'json'};
1879 my $doc_array = decode_json $json_str;
1880
1881 my $global_status = 0;
1882 my $global_mess = "";
1883
1884 my @all_docids = ();
1885
1886 foreach my $doc_array_rec ( @$doc_array )
1887 {
1888 my $docid = $doc_array_rec->{'docid'};
1889 my $metaname = $doc_array_rec->{'metaname'};
1890 my $metamode = $self->{'metamode'};
1891 my $metavalue = $doc_array_rec->{'metavalue'};
1892
1893 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1894 # make "accumulate" the default (less destructive, as won't actually
1895 # delete any existing values)
1896 $metamode = "accumulate";
1897 }
1898
1899 push(@all_docids,$docid);
1900
1901 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1902 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1903 my $metadata_xml_file;
1904 my $import_filename = undef;
1905
1906 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1907 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1908
1909 # This now stores the full pathname
1910 $import_filename = $doc_rec->{'src-file'}->[0];
1911
1912 # figure out correct metadata.xml file [?]
1913 # Assuming the metadata.xml file is next to the source file
1914 # Note: This will not work if it is using the inherited metadata from the parent folder
1915 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1916 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1917
1918 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1919 }
1920
1921 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1922 $gsdl_cgi->generate_ok_message($mess);
1923
1924 # Release the lock once it is done
1925 $self->unlock_collection($username, $collect);
1926}
1927
1928sub _remove_import_metadata
1929{
1930 my $self = shift @_;
1931
1932 my $collect = $self->{'collect'};
1933 my $gsdl_cgi = $self->{'gsdl_cgi'};
1934 my $gsdlhome = $self->{'gsdlhome'};
1935 my $infodbtype = $self->{'infodbtype'};
1936
1937 # Obtain the collect dir
1938 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1939 my $site = $self->{'site'};
1940 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1941
1942 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1943 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1944
1945 # look up additional args
1946 my $docid = $self->{'d'};
1947 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1948 {
1949 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
1950 }
1951
1952 my $metaname = $self->{'metaname'};
1953 my $metavalue = $self->{'metavalue'};
1954 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1955
1956 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1957 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1958 my $metadata_xml_file;
1959 my $import_filename = undef;
1960 if (defined $docid)
1961 {
1962 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1963 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1964
1965 # This now stores the full pathname
1966 $import_filename = $doc_rec->{'src-file'}->[0];
1967 }
1968
1969 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1970 {
1971 $gsdl_cgi->generate_error_message("There is no metadata\n");
1972 }
1973
1974 # figure out correct metadata.xml file [?]
1975 # Assuming the metadata.xml file is next to the source file
1976 # Note: This will not work if it is using the inherited metadata from the parent folder
1977 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1978 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1979
1980 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1981
1982 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1983 $mess .= " $metaname";
1984 $mess .= " = $metavalue\n";
1985
1986 $gsdl_cgi->generate_ok_message($mess);
1987
1988 #return $status; # in case calling functions have a use for this
1989}
1990
1991sub remove_import_metadata
1992{
1993 my $self = shift @_;
1994
1995 my $username = $self->{'username'};
1996 my $collect = $self->{'collect'};
1997 my $gsdl_cgi = $self->{'gsdl_cgi'};
1998
1999 if ($baseaction::authentication_enabled) {
2000 # Ensure the user is allowed to edit this collection
2001 &authenticate_user($gsdl_cgi, $username, $collect);
2002 }
2003
2004 # Make sure the collection isn't locked by someone else
2005 $self->lock_collection($username, $collect);
2006
2007 $self->_remove_import_metadata(@_);
2008
2009 # Release the lock once it is done
2010 $self->unlock_collection($username, $collect);
2011
2012}
2013
2014sub remove_from_metadata_xml
2015{
2016 my $self = shift @_;
2017 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
2018
2019 # Set the call-back functions for the metadata tags
2020 my @rules =
2021 (
2022 _default => 'raw',
2023 'Metadata' => \&rfmxml_metadata,
2024 'FileName' => \&mxml_filename
2025 );
2026
2027 my $parser = XML::Rules->new
2028 (
2029 rules => \@rules,
2030 style => 'filter',
2031 output_encoding => 'utf8'
2032 );
2033
2034 my $xml_in = "";
2035 if (!open(MIN,"<$metadata_xml_filename"))
2036 {
2037 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2038 }
2039 else
2040 {
2041 # Read them in
2042 my $line;
2043 while (defined ($line=<MIN>)) {
2044 $xml_in .= $line;
2045 }
2046 close(MIN);
2047
2048 # Filter with the call-back functions
2049 my $xml_out = "";
2050
2051 my $MOUT;
2052 if (!open($MOUT,">$metadata_xml_filename")) {
2053 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2054 }
2055 else {
2056 binmode($MOUT,":utf8");
2057 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
2058 close($MOUT);
2059 }
2060 }
2061}
2062
2063sub rfmxml_metadata
2064{
2065 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2066
2067 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2068 {
2069 return [];
2070 }
2071
2072 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2073 return [$tagname => $attrHash];
2074}
2075
2076sub _remove_live_metadata
2077{
2078 my $self = shift @_;
2079
2080 my $collect = $self->{'collect'};
2081 my $gsdl_cgi = $self->{'gsdl_cgi'};
2082# my $gsdlhome = $self->{'gsdlhome'};
2083 my $infodbtype = $self->{'infodbtype'};
2084
2085 # Obtain the collect dir
2086 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2087 my $site = $self->{'site'};
2088 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2089
2090
2091 # look up additional args
2092 my $docid = $self->{'d'};
2093 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2094 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2095 }
2096
2097 # Generate the dbkey
2098 my $metaname = $self->{'metaname'};
2099 my $dbkey = "$docid.$metaname";
2100
2101 # To people who know $collect_tail please add some comments
2102 # Obtain the live gdbm_db path
2103 my $collect_tail = $collect;
2104 $collect_tail =~ s/^.*[\/\\]//;
2105 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2106 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2107
2108 # Remove the key
2109 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2110 my $status = system($cmd);
2111 if ($status != 0) {
2112 # Catch error if gdbmdel failed
2113 my $mess = "Failed to set metadata key: $dbkey\n";
2114
2115 $mess .= "PATH: $ENV{'PATH'}\n";
2116 $mess .= "cmd = $cmd\n";
2117 $mess .= "Exit status: $status\n";
2118 $mess .= "System Error Message: $!\n";
2119
2120 $gsdl_cgi->generate_error($mess);
2121 }
2122 else {
2123 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2124 }
2125
2126}
2127
2128sub remove_live_metadata
2129{
2130 my $self = shift @_;
2131
2132 my $username = $self->{'username'};
2133 my $collect = $self->{'collect'};
2134 my $gsdl_cgi = $self->{'gsdl_cgi'};
2135 my $gsdlhome = $self->{'gsdlhome'};
2136
2137 if ($baseaction::authentication_enabled) {
2138 # Ensure the user is allowed to edit this collection
2139 &authenticate_user($gsdl_cgi, $username, $collect);
2140 }
2141
2142 # Make sure the collection isn't locked by someone else
2143 $self->lock_collection($username, $collect);
2144
2145 $self->_remove_live_metadata(@_);
2146
2147 $self->unlock_collection($username, $collect);
2148}
2149
2150sub remove_metadata
2151{
2152 my $self = shift @_;
2153
2154 my $where = $self->{'where'};
2155 if(!$where) {
2156 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2157 return;
2158 }
2159
2160 my $username = $self->{'username'};
2161 my $collect = $self->{'collect'};
2162 my $gsdl_cgi = $self->{'gsdl_cgi'};
2163
2164 if ($baseaction::authentication_enabled) {
2165 # Ensure the user is allowed to edit this collection
2166 &authenticate_user($gsdl_cgi, $username, $collect);
2167 }
2168
2169 # Make sure the collection isn't locked by someone else
2170 $self->lock_collection($username, $collect);
2171
2172 # check which directories need to be processed, specified in $where as
2173 # any combination of import|archives|index|live
2174 if($where =~ m/import/) {
2175 $self->_remove_import_metadata(@_);
2176 }
2177 if($where =~ m/archives/) {
2178 $self->_remove_archives_metadata(@_);
2179 }
2180 if($where =~ m/index/) {
2181 $self->_remove_index_metadata(@_);
2182 }
2183
2184 # Release the lock once it is done
2185 $self->unlock_collection($username, $collect);
2186}
2187
2188# the internal version, without authentication
2189sub _remove_index_metadata
2190{
2191 my $self = shift @_;
2192
2193 my $collect = $self->{'collect'};
2194 my $gsdl_cgi = $self->{'gsdl_cgi'};
2195# my $gsdlhome = $self->{'gsdlhome'};
2196 my $infodbtype = $self->{'infodbtype'};
2197
2198 # Obtain the collect dir
2199 my $site = $self->{'site'};
2200 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2201 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2202
2203 # look up additional args
2204 my $docid = $self->{'d'};
2205 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2206 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2207 }
2208 my $metaname = $self->{'metaname'};
2209 my $metapos = $self->{'metapos'};
2210 my $metavalue = $self->{'metavalue'};
2211
2212 # To people who know $collect_tail please add some comments
2213 # Obtain the path to the database
2214 my $collect_tail = $collect;
2215 $collect_tail =~ s/^.*[\/\\]//;
2216 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2217 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2218
2219 # Read the docid entry
2220 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2221
2222 # Basically loop through and unescape_html the values
2223 foreach my $k (keys %$doc_rec) {
2224 my @escaped_v = ();
2225 foreach my $v (@{$doc_rec->{$k}}) {
2226 if ($k eq "contains") {
2227 # protect quotes in ".2;".3 etc
2228 $v =~ s/\"/\\\"/g;
2229 push(@escaped_v, $v);
2230 }
2231 else {
2232 my $ev = &ghtml::unescape_html($v);
2233 $ev =~ s/\"/\\\"/g;
2234 push(@escaped_v, $ev);
2235 }
2236 }
2237 $doc_rec->{$k} = \@escaped_v;
2238 }
2239
2240 # Check to make sure the key does exist
2241 if (!defined ($doc_rec->{$metaname})) {
2242 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2243 }
2244
2245 # Obtain the specified metadata pos
2246 $metapos = 0 if (!defined $metapos);
2247
2248 # consider check key is defined before deleting?
2249 # Loop through the metadata array and ignore the specified position
2250 my $filtered_metadata = [];
2251 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2252 for (my $i=0; $i<$num_metadata_vals; $i++) {
2253 my $metaval = shift(@{$doc_rec->{$metaname}});
2254
2255 if (!defined $metavalue && $i != $metapos) {
2256 push(@$filtered_metadata,$metaval)
2257 }
2258
2259 if(defined $metavalue && !($metavalue eq $metaval))
2260 {
2261 push(@$filtered_metadata,$metavalue)
2262 }
2263 }
2264 $doc_rec->{$metaname} = $filtered_metadata;
2265
2266 # Turn the record back to string
2267 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2268
2269 # Store it back to the database
2270 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2271 my $status = system($cmd);
2272 if ($status != 0) {
2273 my $mess = "Failed to set metadata key: $docid\n";
2274
2275 $mess .= "PATH: $ENV{'PATH'}\n";
2276 $mess .= "cmd = $cmd\n";
2277 $mess .= "Exit status: $status\n";
2278 $mess .= "System Error Message: $!\n";
2279
2280 $gsdl_cgi->generate_error($mess);
2281 }
2282 else {
2283 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2284 $mess .= " $metaname";
2285 $mess .= "->[$metapos]" if (defined $metapos);
2286
2287 $gsdl_cgi->generate_ok_message($mess);
2288 }
2289
2290 #return $status; # in case calling functions have a use for this
2291}
2292
2293sub remove_index_metadata
2294{
2295 my $self = shift @_;
2296
2297 my $username = $self->{'username'};
2298 my $collect = $self->{'collect'};
2299 my $gsdl_cgi = $self->{'gsdl_cgi'};
2300# my $gsdlhome = $self->{'gsdlhome'};
2301
2302 if ($baseaction::authentication_enabled) {
2303 # Ensure the user is allowed to edit this collection
2304 &authenticate_user($gsdl_cgi, $username, $collect);
2305 }
2306
2307 # Obtain the collect dir
2308 my $site = $self->{'site'};
2309 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2310 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2311
2312 # Make sure the collection isn't locked by someone else
2313 $self->lock_collection($username, $collect);
2314
2315 $self->_remove_index_metadata(@_);
2316
2317 # Release the lock once it is done
2318 $self->unlock_collection($username, $collect);
2319}
2320
2321
2322# Was trying to reused the codes, but the functions need to be broken
2323# down more before they can be reused, otherwise there will be too
2324# much overhead and duplicate process...
2325sub insert_metadata
2326{
2327 my $self = shift @_;
2328
2329 my $username = $self->{'username'};
2330 my $collect = $self->{'collect'};
2331 my $gsdl_cgi = $self->{'gsdl_cgi'};
2332 my $gsdlhome = $self->{'gsdlhome'};
2333 my $infodbtype = $self->{'infodbtype'};
2334
2335 # If the import metadata and gdbm database have been updated, we
2336 # need to insert some notification to warn user that the the text
2337 # they see at the moment is not indexed and require a rebuild.
2338 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2339
2340 if ($baseaction::authentication_enabled) {
2341 # Ensure the user is allowed to edit this collection
2342 $self->authenticate_user($username, $collect);
2343 }
2344
2345 # Obtain the collect and archive dir
2346 my $site = $self->{'site'};
2347 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2348 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2349 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2350
2351 # Make sure the collection isn't locked by someone else
2352 $self->lock_collection($username, $collect);
2353
2354 # Check additional args
2355 my $docid = $self->{'d'};
2356 if (!defined($docid)) {
2357 $gsdl_cgi->generate_error("No document id is specified: d=...");
2358 }
2359 my $metaname = $self->{'metaname'};
2360 if (!defined($metaname)) {
2361 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2362 }
2363 my $metavalue = $self->{'metavalue'};
2364 if (!defined($metavalue) || $metavalue eq "") {
2365 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2366 }
2367 # make "accumulate" the default (less destructive, as won't actually
2368 # delete any existing values)
2369 my $metamode = "accumulate";
2370
2371 #=======================================================================#
2372 # set_import_metadata [START]
2373 #=======================================================================#
2374 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2375 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2376 my $metadata_xml_file;
2377 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2378 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2379
2380 # This now stores the full pathname
2381 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2382
2383 # figure out correct metadata.xml file [?]
2384 # Assuming the metadata.xml file is next to the source file
2385 # Note: This will not work if it is using the inherited metadata from the parent folder
2386 my ($import_tailname, $import_dirname)
2387 = File::Basename::fileparse($import_filename);
2388 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2389
2390 # Shane's escape characters
2391 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2392 $metavalue =~ s/\,/&#44;/g;
2393 $metavalue =~ s/\:/&#58;/g;
2394 $metavalue =~ s/\|/&#124;/g;
2395 $metavalue =~ s/\(/&#40;/g;
2396 $metavalue =~ s/\)/&#41;/g;
2397 $metavalue =~ s/\[/&#91;/g;
2398 $metavalue =~ s/\\/&#92;/g;
2399 $metavalue =~ s/\]/&#93;/g;
2400 $metavalue =~ s/\{/&#123;/g;
2401 $metavalue =~ s/\}/&#125;/g;
2402 $metavalue =~ s/\"/&#34;/g;
2403 $metavalue =~ s/\`/&#96;/g;
2404 $metavalue =~ s/\n/_newline_/g;
2405
2406 # Edit the metadata.xml
2407 # Modified by Jeffrey from DL Consulting
2408 # Handle the case where there is one metadata.xml file for multiple FileSets
2409 # The XML filter needs to know whether it is in the right FileSet
2410 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2411 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2412 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2413 $metaname, $metavalue, $metamode, $import_tailname);
2414 #=======================================================================#
2415 # set_import_metadata [END]
2416 #=======================================================================#
2417
2418
2419 #=======================================================================#
2420 # set_metadata (accumulate version) [START]
2421 #=======================================================================#
2422 # To people who know $collect_tail please add some comments
2423 # Obtain path to the database
2424 my $collect_tail = $collect;
2425 $collect_tail =~ s/^.*[\/\\]//;
2426 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2427 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2428
2429 # Read the docid entry
2430 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2431
2432 foreach my $k (keys %$doc_rec) {
2433 my @escaped_v = ();
2434 foreach my $v (@{$doc_rec->{$k}}) {
2435 if ($k eq "contains") {
2436 # protect quotes in ".2;".3 etc
2437 $v =~ s/\"/\\\"/g;
2438 push(@escaped_v, $v);
2439 }
2440 else {
2441 my $ev = &ghtml::unescape_html($v);
2442 $ev =~ s/\"/\\\"/g;
2443 push(@escaped_v, $ev);
2444 }
2445 }
2446 $doc_rec->{$k} = \@escaped_v;
2447 }
2448
2449 # Protect the quotes
2450 $metavalue =~ s/\"/\\\"/g;
2451
2452 # Adds the pending macro
2453 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2454
2455 # If the metadata doesn't exist, create a new one
2456 if (!defined($doc_rec->{$metaname})){
2457 $doc_rec->{$metaname} = [ $macro_metavalue ];
2458 }
2459 # Else, let's acculumate the values
2460 else {
2461 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2462 }
2463
2464 # Generate the record string
2465 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2466
2467 # Store it into GDBM
2468 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2469 my $status = system($cmd);
2470 if ($status != 0) {
2471 # Catch error if gdbmget failed
2472 my $mess = "Failed to set metadata key: $docid\n";
2473
2474 $mess .= "PATH: $ENV{'PATH'}\n";
2475 $mess .= "cmd = $cmd\n";
2476 $mess .= "Exit status: $status\n";
2477 $mess .= "System Error Message: $!\n";
2478
2479 $gsdl_cgi->generate_error($mess);
2480 }
2481 else {
2482 my $mess = "insert-metadata successful: Key[$docid]\n";
2483 $mess .= " [In metadata.xml] $metaname";
2484 $mess .= " = $metavalue\n";
2485 $mess .= " [In database] $metaname";
2486 $mess .= " = $macro_metavalue\n";
2487 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2488 $gsdl_cgi->generate_ok_message($mess);
2489 }
2490 #=======================================================================#
2491 # set_metadata (accumulate version) [END]
2492 #=======================================================================#
2493
2494 # Release the lock once it is done
2495 $self->unlock_collection($username, $collect);
2496}
2497
24981;
Note: See TracBrowser for help on using the repository browser.