source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27157

Last change on this file since 27157 was 27157, checked in by ak19, 11 years ago

First pass updating metadataaction.pm to create generic set_meta, remove_meta methods that still do the default of setting or removing index meta, but if the where variable is specified, they will set archives, import, index and/or live meta accordingly. For symmetry, there is now a get_index_meta. This is always called by get_meta since get_meta is not generic.

File size: 77.3 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-index-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
67
68 #SET METHODS
69 "set-live-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ ] },
72
73 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
74 'compulsory-args' => [ "metaname", "metavalue" ],
75 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
76
77 "set-index-metadata" => {
78 'compulsory-args' => [ "d", "metaname", "metavalue" ],
79 'optional-args' => [ "metapos" ] },
80
81 "set-archives-metadata" => {
82 'compulsory-args' => [ "d", "metaname", "metavalue" ],
83 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
84
85 "set-import-metadata" => {
86 'compulsory-args' => [ "metaname", "metavalue" ],
87 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
88
89 #SET METHODS (ARRAY)
90 "set-metadata-array" => {
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ ] },
93
94 "set-archives-metadata-array" => {
95 'compulsory-args' => [ "json" ],
96 'optional-args' => [ ] },
97
98 "set-import-metadata-array" => {
99 'compulsory-args' => [ "json" ],
100 'optional-args' => [ ] },
101
102 #REMOVE METHODS
103 "remove-import-metadata" => {
104 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
105 'optional-args' => [ ] },
106
107 "remove-archives-metadata" => {
108 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
109 'optional-args' => [ "metapos", "metavalue" ] },
110
111 "remove-live-metadata" => {
112 'compulsory-args' => [ "d", "metaname" ],
113 'optional-args' => [ ] },
114
115 "remove-index-metadata" => {
116 'compulsory-args' => [ "d", "metaname" ],
117 'optional-args' => [ "metapos", "metavalue" ] },
118
119 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
120 'compulsory-args' => [ "d", "metaname" ],
121 'optional-args' => [ "where", "metapos", "metavalue" ] },
122
123 #INSERT METHODS
124 "insert-metadata" => {
125 'compulsory-args' => [ "d", "metaname", "metavalue" ],
126 'optional-args' => [ ] }
127};
128
129
130sub new
131{
132 my $class = shift (@_);
133 my ($gsdl_cgi,$iis6_mode) = @_;
134
135 # Treat metavalue specially. To transmit this through a GET request
136 # the Javascript side has url-encoded it, so here we need to decode
137 # it before proceeding
138
139 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
140 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
141 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
142
143 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
144 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
145
146 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
147
148 return bless $self, $class;
149}
150
151
152sub get_live_metadata
153{
154 my $self = shift @_;
155
156 my $username = $self->{'username'};
157 my $collect = $self->{'collect'};
158 my $gsdl_cgi = $self->{'gsdl_cgi'};
159 my $gsdlhome = $self->{'gsdlhome'};
160 my $infodbtype = $self->{'infodbtype'};
161
162 # live metadata gets/saves value scoped (prefixed) by the current usename
163 # so (for now) let's not bother to enforce authentication
164
165 # Obtain the collect dir
166 my $site = $self->{'site'};
167 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
168 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
169
170 # Make sure the collection isn't locked by someone else
171 $self->lock_collection($username, $collect);
172
173 # look up additional args
174 my $docid = $self->{'d'};
175 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
176 $gsdl_cgi->generate_error("No docid (d=...) specified.");
177 }
178
179 # Generate the dbkey
180 my $metaname = $self->{'metaname'};
181 my $dbkey = "$docid.$metaname";
182
183 # To people who know $collect_tail please add some comments
184 # Obtain path to the database
185 my $collect_tail = $collect;
186 $collect_tail =~ s/^.*[\/|\\]//;
187 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
188 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
189
190 # Obtain the content of the key
191 my $cmd = "gdbmget $infodb_file_path $dbkey";
192 if (open(GIN,"$cmd |") == 0) {
193 # Catch error if gdbmget failed
194 my $mess = "Failed to get metadata key: $metaname\n";
195 $mess .= "$!\n";
196
197 $gsdl_cgi->generate_error($mess);
198 }
199 else {
200 binmode(GIN,":utf8");
201 # Read everything in and concatenate them into $metavalue
202 my $metavalue = "";
203 my $line;
204 while (defined ($line=<GIN>)) {
205 $metavalue .= $line;
206 }
207 close(GIN);
208 chomp($metavalue); # Get rid off the tailing newlines
209 $gsdl_cgi->generate_ok_message("$metavalue");
210 }
211
212 # Release the lock once it is done
213 $self->unlock_collection($username, $collect);
214}
215
216# just calls the index version
217sub get_metadata
218{
219 my $self = shift @_;
220 $self->get_index_metadata(@_);
221}
222
223sub get_index_metadata
224{
225 my $self = shift @_;
226
227 my $username = $self->{'username'};
228 my $collect = $self->{'collect'};
229 my $gsdl_cgi = $self->{'gsdl_cgi'};
230 my $gsdlhome = $self->{'gsdlhome'};
231
232 # Authenticate user if it is enabled
233 if ($baseaction::authentication_enabled) {
234 # Ensure the user is allowed to edit this collection
235 &authenticate_user($gsdl_cgi, $username, $collect);
236 }
237
238 # Obtain the collect dir
239 my $site = $self->{'site'};
240 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
241 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
242
243 # Make sure the collection isn't locked by someone else
244 $self->lock_collection($username, $collect);
245
246 # look up additional args
247 my $docid = $self->{'d'};
248 my $metaname = $self->{'metaname'};
249 my $metapos = $self->{'metapos'};
250 my $infodbtype = $self->{'infodbtype'};
251
252 # To people who know $collect_tail please add some comments
253 # Obtain path to the database
254 my $collect_tail = $collect;
255 $collect_tail =~ s/^.*[\/\\]//;
256 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
257 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
258
259 # Read the docid entry
260 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
261
262 # Basically loop through and unescape_html the values
263 foreach my $k (keys %$doc_rec) {
264 my @escaped_v = ();
265 foreach my $v (@{$doc_rec->{$k}}) {
266 my $ev = &ghtml::unescape_html($v);
267 push(@escaped_v, $ev);
268 }
269 $doc_rec->{$k} = \@escaped_v;
270 }
271
272 # Obtain the specified metadata value
273 $metapos = 0 if (!defined $metapos);
274 my $metavalue = $doc_rec->{$metaname}->[$metapos];
275 $gsdl_cgi->generate_ok_message("$metavalue");
276
277 # Release the lock once it is done
278 $self->unlock_collection($username, $collect);
279}
280
281
282sub get_import_metadata
283{
284 my $self = shift @_;
285
286 my $username = $self->{'username'};
287 my $collect = $self->{'collect'};
288 my $gsdl_cgi = $self->{'gsdl_cgi'};
289 my $gsdlhome = $self->{'gsdlhome'};
290
291 # Authenticate user if it is enabled
292 if ($baseaction::authentication_enabled) {
293 # Ensure the user is allowed to edit this collection
294 &authenticate_user($gsdl_cgi, $username, $collect);
295 }
296
297 # Obtain the collect dir
298 my $site = $self->{'site'};
299 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
300 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
301
302 # Make sure the collection isn't locked by someone else
303 $self->lock_collection($username, $collect);
304
305 # look up additional args
306 my $docid = $self->{'d'};
307 my $metaname = $self->{'metaname'};
308 my $infodbtype = $self->{'infodbtype'};
309 if (!defined $docid)
310 {
311 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
312 }
313
314 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
315 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
316 my $metadata_xml_file;
317 my $import_filename = undef;
318
319
320 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
321 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
322 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
323
324 # This now stores the full pathname
325 $import_filename = $doc_rec->{'src-file'}->[0];
326
327 # figure out correct metadata.xml file [?]
328 # Assuming the metadata.xml file is next to the source file
329 # Note: This will not work if it is using the inherited metadata from the parent folder
330 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
331 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
332
333 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
334
335 # Release the lock once it is done
336 $self->unlock_collection($username, $collect);
337}
338
339sub get_metadata_from_metadata_xml
340{
341 my $self = shift @_;
342 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
343
344 my @rules =
345 (
346 _default => 'raw',
347 'Metadata' => \&gfmxml_metadata,
348 'FileName' => \&mxml_filename
349 );
350
351 my $parser = XML::Rules->new
352 (
353 rules => \@rules,
354 output_encoding => 'utf8'
355 );
356
357 my $xml_in = "";
358 if (!open(MIN,"<$metadata_xml_filename"))
359 {
360 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
361 }
362 else
363 {
364 # Read them in
365 my $line;
366 while (defined ($line=<MIN>)) {
367 $xml_in .= $line;
368 }
369 close(MIN);
370
371 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
372
373 if(defined $parser->{'pad'}->{'metavalue'})
374 {
375 return $parser->{'pad'}->{'metavalue'};
376 }
377 else
378 {
379 return "";
380 }
381 }
382}
383
384sub gfmxml_metadata
385{
386 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
387
388 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
389 {
390 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
391 }
392}
393
394sub get_archives_metadata
395{
396 my $self = shift @_;
397
398 my $username = $self->{'username'};
399 my $collect = $self->{'collect'};
400 my $gsdl_cgi = $self->{'gsdl_cgi'};
401 my $gsdlhome = $self->{'gsdlhome'};
402 my $infodbtype = $self->{'infodbtype'};
403
404 # Authenticate user if it is enabled
405 if ($baseaction::authentication_enabled) {
406 # Ensure the user is allowed to edit this collection
407 &authenticate_user($gsdl_cgi, $username, $collect);
408 }
409
410 # Obtain the collect dir
411 my $site = $self->{'site'};
412 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
413
414 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
415
416 # Make sure the collection isn't locked by someone else
417 $self->lock_collection($username, $collect);
418
419 # look up additional args
420 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
421 $docid_secnum = "" if (!defined $docid_secnum);
422
423 my $metaname = $self->{'metaname'};
424 my $metapos = $self->{'metapos'};
425 $metapos = 0 if (!defined $metapos);
426
427 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
428 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
429
430 # This now stores the full pathname
431 my $doc_filename = $doc_rec->{'doc-file'}->[0];
432
433 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
434
435 # Release the lock once it is done
436 $self->unlock_collection($username, $collect);
437}
438
439sub get_metadata_from_archive_xml
440{
441 my $self = shift @_;
442 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
443
444 my @start_rules = ('Section' => \&dxml_start_section);
445
446 my @rules =
447 (
448 _default => 'raw',
449 'Metadata' => \&gfdxml_metadata
450 );
451
452 my $parser = XML::Rules->new
453 (
454 start_rules => \@start_rules,
455 rules => \@rules,
456 output_encoding => 'utf8'
457 );
458
459 my $xml_in = "";
460 if (!open(MIN,"<$doc_xml_filename"))
461 {
462 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
463 }
464 else
465 {
466 # Read them in
467 my $line;
468 while (defined ($line=<MIN>)) {
469 $xml_in .= $line;
470 }
471 close(MIN);
472
473 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
474
475 if(defined $parser->{'pad'}->{'metavalue'})
476 {
477 return $parser->{'pad'}->{'metavalue'};
478 }
479 else
480 {
481 return "";
482 }
483 }
484}
485
486sub gfdxml_metadata
487{
488 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
489
490 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
491 {
492 return;
493 }
494
495 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
496 {
497 if (!defined $parser->{'parameters'}->{'poscount'})
498 {
499 $parser->{'parameters'}->{'poscount'} = 0;
500 }
501 else
502 {
503 $parser->{'parameters'}->{'poscount'}++;
504 }
505 }
506
507 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
508 {
509 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
510 }
511}
512
513sub _set_live_metadata
514{
515 my $self = shift @_;
516
517 my $collect = $self->{'collect'};
518 my $gsdl_cgi = $self->{'gsdl_cgi'};
519 #my $gsdlhome = $self->{'gsdlhome'};
520 my $infodbtype = $self->{'infodbtype'};
521
522 # Obtain the collect dir
523 my $site = $self->{'site'};
524 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
525 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
526
527
528 # look up additional args
529 my $docid = $self->{'d'};
530 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
531 $gsdl_cgi->generate_error("No docid (d=...) specified.");
532 }
533 my $metavalue = $self->{'metavalue'};
534
535
536 # Generate the dbkey
537 my $metaname = $self->{'metaname'};
538 my $dbkey = "$docid.$metaname";
539
540 # To people who know $collect_tail please add some comments
541 # Obtain path to the database
542 my $collect_tail = $collect;
543 $collect_tail =~ s/^.*[\/\\]//;
544 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
545 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
546
547 # Set the new value
548 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
549 my $status = system($cmd);
550 if ($status != 0) {
551 # Catch error if gdbmget failed
552 my $mess = "Failed to set metadata key: $dbkey\n";
553
554 $mess .= "PATH: $ENV{'PATH'}\n";
555 $mess .= "cmd = $cmd\n";
556 $mess .= "Exit status: $status\n";
557 $mess .= "System Error Message: $!\n";
558
559 $gsdl_cgi->generate_error($mess);
560 }
561 else {
562 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
563 }
564
565 #return $status; # in case calling functions have any further use for this
566}
567
568sub set_live_metadata
569{
570 my $self = shift @_;
571
572 my $username = $self->{'username'};
573 my $collect = $self->{'collect'};
574 my $gsdl_cgi = $self->{'gsdl_cgi'};
575
576 if ($baseaction::authentication_enabled) {
577 # Ensure the user is allowed to edit this collection
578 &authenticate_user($gsdl_cgi, $username, $collect);
579 }
580
581 # Make sure the collection isn't locked by someone else
582 $self->lock_collection($username, $collect);
583
584 $self->_set_live_metadata(@_);
585
586 # Release the lock once it is done
587 $self->unlock_collection($username, $collect);
588}
589
590sub set_metadata_entry
591{
592 my $self = shift @_;
593 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
594
595 # To people who know $collect_tail please add some comments
596 # Obtain path to the database
597 my $collect_tail = $collect;
598 $collect_tail =~ s/^.*[\/\\]//;
599 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
600 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
601
602# print STDERR "**** infodb file path = $infodb_file_path\n";
603# print STDERR "***** infodb type = $infodbtype\n";
604
605 # Read the docid entry
606 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
607
608 # Set the metadata value
609 if (defined $metapos) {
610 $doc_rec->{$metaname}->[$metapos] = $metavalue;
611 }
612 else {
613 $doc_rec->{$metaname} = [ $metavalue ];
614 }
615
616 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
617
618 return $status;
619
620}
621
622sub _set_import_metadata
623{
624 my $self = shift @_;
625
626 my $collect = $self->{'collect'};
627 my $gsdl_cgi = $self->{'gsdl_cgi'};
628 my $infodbtype = $self->{'infodbtype'};
629# my $gsdlhome = $self->{'gsdlhome'};
630
631 # Obtain the collect and archive dir
632 my $site = $self->{'site'};
633 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
634 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
635 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
636
637 # look up additional args
638 # want either d= or f=
639 my $docid = $self->{'d'};
640 my $import_file = $self->{'f'};
641 if ((!defined $docid) && (!defined $import_file)) {
642 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
643 }
644
645 # Get the parameters and set default mode to "accumulate"
646 my $metaname = $self->{'metaname'};
647 my $metavalue = $self->{'metavalue'};
648## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
649 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
650
651 my $metamode = $self->{'metamode'};
652 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
653 # make "accumulate" the default (less destructive, as won't actually
654 # delete any existing values)
655 $metamode = "accumulate";
656 }
657
658 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
659 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
660 my $metadata_xml_file;
661 my $import_filename = undef;
662 if (defined $docid) {
663 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
664 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
665
666 # This now stores the full pathname
667 $import_filename = $doc_rec->{'src-file'}->[0];
668 }
669 else {
670 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
671 }
672
673 # figure out correct metadata.xml file [?]
674 # Assuming the metadata.xml file is next to the source file
675 # Note: This will not work if it is using the inherited metadata from the parent folder
676 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
677 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
678
679 # Edit the metadata.xml
680 # Modified by Jeffrey from DL Consulting
681 # Handle the case where there is one metadata.xml file for multiple FileSets
682 # The XML filter needs to know whether it is in the right FileSet
683 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
684 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
685 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
686 $metaname, $metavalue, $metamode, $import_tailname);
687
688
689 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
690 $mess .= " $metaname";
691 $mess .= " = $metavalue";
692 $mess .= " ($metamode)\n";
693
694 $gsdl_cgi->generate_ok_message($mess);
695
696 #return $status; # in case calling functions have any further use for this
697}
698
699# the version of set_index_meta that doesn't do authentication
700sub _set_archives_metadata
701{
702 my $self = shift @_;
703
704 my $collect = $self->{'collect'};
705 my $gsdl_cgi = $self->{'gsdl_cgi'};
706 my $infodbtype = $self->{'infodbtype'};
707
708 # Obtain the collect and archive dir
709 my $site = $self->{'site'};
710 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
711 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
712
713 # look up additional args
714 my $docid = $self->{'d'};
715 my $metaname = $self->{'metaname'};
716 my $metavalue = $self->{'metavalue'};
717 my $prevmetavalue = $self->{'prevmetavalue'};
718
719 my $metapos = $self->{'metapos'};
720 $metapos = 0 if (!defined $metapos);
721
722 my $metamode = $self->{'metamode'};
723 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
724 # make "accumulate" the default (less destructive, as won't actually
725 # delete any existing values)
726 $metamode = "accumulate";
727 }
728
729 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
730 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
731
732 if ($status == 0) {
733 my $mess = "set-archives-metadata successful: Key[$docid]\n";
734 $mess .= " $metaname";
735 $mess .= "->[$metapos]" if (defined $metapos);
736 $mess .= " = $metavalue";
737 $mess .= " ($metamode)\n";
738
739 $gsdl_cgi->generate_ok_message($mess);
740 }
741 else {
742 my $mess .= "Failed to set archives metadata key: $docid\n";
743 $mess .= "Exit status: $status\n";
744 if(defined $self->{'error_msg'}) {
745 $mess .= "Error Message: $self->{'error_msg'}\n";
746 } else {
747 $mess .= "System Error Message: $!\n";
748 }
749 $mess .= "-" x 20 . "\n";
750
751 $gsdl_cgi->generate_error($mess);
752 }
753
754 #return $status; # in case calling functions have any further use for this
755}
756
757
758# the version of set_index_meta that doesn't do authentication
759sub _set_index_metadata
760{
761 my $self = shift @_;
762
763 my $collect = $self->{'collect'};
764 my $gsdl_cgi = $self->{'gsdl_cgi'};
765
766 my $site = $self->{'site'};
767 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
768
769 # look up additional args
770 my $docid = $self->{'d'};
771 my $metaname = $self->{'metaname'};
772 my $metapos = $self->{'metapos'};
773 my $metavalue = $self->{'metavalue'};
774 my $infodbtype = $self->{'infodbtype'};
775
776 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
777
778 if ($status != 0) {
779 # Catch error if set infodb entry failed
780 my $mess = "Failed to set metadata key: $docid\n";
781
782 $mess .= "PATH: $ENV{'PATH'}\n";
783 $mess .= "Exit status: $status\n";
784 $mess .= "System Error Message: $!\n";
785
786 $gsdl_cgi->generate_error($mess);
787 }
788 else {
789 my $mess = "set-index-metadata successful: Key[$docid]\n";
790 $mess .= " $metaname";
791 $mess .= "->[$metapos]" if (defined $metapos);
792 $mess .= " = $metavalue\n";
793
794 $gsdl_cgi->generate_ok_message($mess);
795 }
796
797 #return $status; # in case calling functions have any further use for this
798}
799
800sub set_index_metadata
801{
802 my $self = shift @_;
803
804 my $username = $self->{'username'};
805 my $collect = $self->{'collect'};
806 my $gsdl_cgi = $self->{'gsdl_cgi'};
807 #my $gsdlhome = $self->{'gsdlhome'};
808
809 if ($baseaction::authentication_enabled) {
810 # Ensure the user is allowed to edit this collection
811 &authenticate_user($gsdl_cgi, $username, $collect);
812 }
813
814 my $site = $self->{'site'};
815 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
816
817 $gsdl_cgi->checked_chdir($collect_dir);
818
819 # Obtain the collect dir
820 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
821
822 # Make sure the collection isn't locked by someone else
823 $self->lock_collection($username, $collect);
824
825 $self->_set_index_metadata(@_);
826
827 # Release the lock once it is done
828 $self->unlock_collection($username, $collect);
829}
830
831# call this to set the metadata for a combination of dirs archives, import or index, or live
832# if none specified, defaults to index which was the original behaviour of set_metadata.
833sub set_metadata
834{
835 my $self = shift @_;
836
837 # Testing that not defining a variable, setting it to "" or to " " all return false
838 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
839
840 my $where = $self->{'where'};
841 if(!$where) {
842 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
843 return;
844 }
845
846 # authenticate and lock collection once, even if processing multiple dirs
847 my $username = $self->{'username'};
848 my $collect = $self->{'collect'};
849 my $gsdl_cgi = $self->{'gsdl_cgi'};
850
851 if ($baseaction::authentication_enabled) {
852 # Ensure the user is allowed to edit this collection
853 #&authenticate_user($gsdl_cgi, $username, $collect);
854 $self->authenticate_user($username, $collect);
855 }
856
857 if($where =~ m/index/) {
858 my $site = $self->{'site'};
859 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
860 $gsdl_cgi->checked_chdir($collect_dir);
861 }
862
863 # Make sure the collection isn't locked by someone else
864 $self->lock_collection($username, $collect);
865
866
867 # now at last can set the metadata. $where can specify multiple
868 # $where is of the form: import|archives|index, or a subset thereof
869
870 #my @whichdirs = split('\|', $where);
871
872 # just check whether $where contains import/archives/index/live in turn, and
873 # for each case, process it accordingly
874 if($where =~ m/import/) {
875 $self->_set_import_metadata(@_);
876 }
877
878 if($where =~ m/archives/) {
879
880 # look up docID arg which is optional to set_metadata because it's optional
881 # to set_import, but which is compulsory to set_archives_metadata
882 my $docid = $self->{'d'};
883 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
884 $gsdl_cgi->generate_error("No docid (d=...) specified.");
885 }
886 else { # we have a docid, so can set archives meta
887 $self->_set_archives_metadata(@_);
888 }
889 }
890
891 if($where =~ m/index/) {
892
893 # look up docID arg which is optional to set_metadata because it's optional
894 # to set_import, but which is compulsory to set_archives_metadata
895 my $docid = $self->{'d'};
896 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
897 $gsdl_cgi->generate_error("No docid (d=...) specified.");
898 }
899 else { # we have a docid, so can set index meta
900 $self->_set_index_metadata(@_);
901 }
902 }
903
904 if($where =~ m/live/) {
905 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
906 }
907
908 # Release the lock once it is done
909 $self->unlock_collection($username, $collect);
910}
911
912sub set_metadata_array
913{
914 my $self = shift @_;
915
916 my $username = $self->{'username'};
917 my $collect = $self->{'collect'};
918 my $gsdl_cgi = $self->{'gsdl_cgi'};
919 my $gsdlhome = $self->{'gsdlhome'};
920
921 if ($baseaction::authentication_enabled) {
922 # Ensure the user is allowed to edit this collection
923 &authenticate_user($gsdl_cgi, $username, $collect);
924 }
925
926 my $site = $self->{'site'};
927 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
928
929 $gsdl_cgi->checked_chdir($collect_dir);
930
931 # Obtain the collect dir
932 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
933
934 # Make sure the collection isn't locked by someone else
935 $self->lock_collection($username, $collect);
936
937 # look up additional args
938
939 my $infodbtype = $self->{'infodbtype'};
940
941 my $json_str = $self->{'json'};
942 my $doc_array = decode_json $json_str;
943
944
945 my $global_status = 0;
946 my $global_mess = "";
947
948 my @all_docids = ();
949
950 foreach my $doc_array_rec ( @$doc_array ) {
951
952 my $docid = $doc_array_rec->{'docid'};
953 my $metaname = $doc_array_rec->{'metaname'};
954 my $metapos = $doc_array_rec->{'metapos'};
955 my $metavalue = $doc_array_rec->{'metavalue'};
956
957 push(@all_docids,$docid);
958
959 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
960
961 if ($status != 0) {
962 # Catch error if set infodb entry failed
963 $global_status = $status;
964 $global_mess .= "Failed to set metadata key: $docid\n";
965 $global_mess .= "Exit status: $status\n";
966 $global_mess .= "System Error Message: $!\n";
967 $global_mess .= "-" x 20;
968 }
969 }
970
971 if ($global_status != 0) {
972 $global_mess .= "PATH: $ENV{'PATH'}\n";
973 $gsdl_cgi->generate_error($global_mess);
974 }
975 else {
976 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
977 $gsdl_cgi->generate_ok_message($mess);
978 }
979
980 # Release the lock once it is done
981 $self->unlock_collection($username, $collect);
982}
983
984
985sub dxml_metadata
986{
987 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
988 my $metaname = $parser->{'parameters'}->{'metaname'};
989 my $metamode = $parser->{'parameters'}->{'metamode'};
990
991 print STDERR "**** Processing closing </Metadata> tag\n";
992
993 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
994
995 # Find the right metadata tag and checks if we are going to
996 # override it
997 #
998 # Note: This over writes the first metadata block it
999 # encountered. If there are multiple Sections in the doc.xml, it
1000 # might not behave as you would expect
1001
1002 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1003## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1004## print STDERR "**** metamode = $metamode\n";
1005
1006 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1007 {
1008 my $name_attr = $attrHash->{'name'};
1009 if (($name_attr eq $metaname) && ($metamode eq "override"))
1010 {
1011 if (!defined $parser->{'parameters'}->{'poscount'})
1012 {
1013 $parser->{'parameters'}->{'poscount'} = 0;
1014 }
1015 else
1016 {
1017 $parser->{'parameters'}->{'poscount'}++;
1018 }
1019
1020 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1021 {
1022 ##print STDERR "#### got match!!\n";
1023 # Get the value and override the current value
1024 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1025 $attrHash->{'_content'} = $metavalue;
1026
1027 # Don't want it to wipe out any other pieces of metadata
1028 $parser->{'parameters'}->{'metamode'} = "done";
1029 }
1030 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1031 {
1032 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1033 $attrHash->{'_content'} = $metavalue;
1034 $parser->{'parameters'}->{'metamode'} = "done";
1035 }
1036 }
1037 }
1038
1039 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1040 return [$tagname => $attrHash];
1041}
1042
1043# This method exists purely for catching invalid section numbers that the client
1044# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1045# we've seen all the Sections in the doc.xml, and none of their section nums matched
1046# if the metamode has not been set to 'done' by then.
1047sub dxml_archive
1048{
1049 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1050 my $metamode = $parser->{'parameters'}->{'metamode'};
1051
1052 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1053 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1054
1055# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1056
1057 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1058 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1059
1060 my $metaname = $parser->{'parameters'}->{'metaname'};
1061 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1062
1063 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1064 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1065 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1066 print STDERR "\tin section $opt_doc_secnum.\n";
1067 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1068 }
1069
1070 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1071 return [$tagname => $attrHash];
1072}
1073
1074sub dxml_description
1075{
1076 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1077 my $metamode = $parser->{'parameters'}->{'metamode'};
1078
1079 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1080 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1081
1082 print STDERR "**** Processing a closing </Description> tag \n";
1083# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1084
1085 # Accumulate the metadata
1086
1087 # We'll be accumulating metadata at this point if we haven't found and therefore
1088 # haven't processed the metadata yet.
1089 # For subsections, this means that if we're at a matching subsection, but haven't
1090 # found the correct metaname to override in that subsection, we accumulate it as new
1091 # meta in the subsection by adding it to the current description.
1092 # If there's no subsection info for the metadata, it will accumulate at the top level
1093 # section description if we hadn't found a matching metaname to override at this point.
1094
1095 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1096 # at the toplevel section. In the latter case, it means we want to process meta in the
1097 # toplevel section. So the eq check between the values below will work in all cases.
1098
1099 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1100 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1101 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1102
1103 if ($curr_secnum eq $opt_doc_secnum
1104 && ($metamode eq "accumulate" || $metamode eq "override")) {
1105 if ($metamode eq "override") {
1106 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1107 }
1108
1109 # If get to here and metamode is override, the this means there
1110 # was no existing value to overide => treat as an append operation
1111
1112 # Tack a new metadata tag on to the end of the <Metadata>+ block
1113 my $metaname = $parser->{'parameters'}->{'metaname'};
1114 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1115
1116 my $metadata_attr = {
1117 '_content' => $metavalue,
1118 'name' => $metaname,
1119 'mode' => "accumulate"
1120 };
1121
1122 my $append_metadata = [ "Metadata" => $metadata_attr ];
1123 my $description_content = $attrHash->{'_content'};
1124
1125 print "Appending metadata to doc.xml\n";
1126
1127 if (ref($description_content)) {
1128 # got some existing interesting nested content
1129 push(@$description_content, " ", $append_metadata ,"\n ");
1130 }
1131 else {
1132 #description_content is most likely a string such as "\n"
1133 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1134 }
1135
1136 $parser->{'parameters'}->{'metamode'} = "done";
1137 }
1138 else {
1139 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1140## print STDERR "**** NOT ACCUMULATE?!? \n";
1141 }
1142
1143 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1144 return [$tagname => $attrHash];
1145}
1146
1147
1148sub dxml_start_section
1149{
1150 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1151
1152 my $new_depth = scalar(@$contextArray);
1153
1154 print STDERR "**** START SECTION \n";
1155
1156 if ($new_depth == 1) {
1157 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1158 $parser->{'parameters'}->{'curr_section_num'} = "";
1159 }
1160
1161 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1162 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1163
1164 my $new_secnum;
1165
1166 if ($new_depth > $old_depth) {
1167 # child subsection
1168 $new_secnum = "$old_secnum.1";
1169 }
1170 elsif ($new_depth == $old_depth) {
1171 # sibling section => increase it's value by 1
1172 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1173 $tail_num++;
1174 $new_secnum = $old_secnum;
1175 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1176 }
1177 else {
1178 # back up to parent section => lopp off tail
1179 $new_secnum = $old_secnum;
1180 $new_secnum =~ s/\.\d+$//;
1181 }
1182
1183 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1184 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1185
1186 1;
1187}
1188
1189sub edit_xml_file
1190{
1191 my $self = shift @_;
1192 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1193
1194 # use XML::Rules to add it in (read in and out again)
1195 my $parser = XML::Rules->new(start_rules => $start_rules,
1196 rules => $rules,
1197 style => 'filter',
1198 output_encoding => 'utf8' );
1199
1200 my $xml_in = "";
1201 if (!open(MIN,"<$filename")) {
1202 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1203 }
1204 else {
1205 # Read all the text in
1206 my $line;
1207 while (defined ($line=<MIN>)) {
1208 $xml_in .= $line;
1209 }
1210 close(MIN);
1211
1212 my $MOUT;
1213 if (!open($MOUT,">$filename")) {
1214 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1215 }
1216 else {
1217 # Matched lines will get handled by the call backs
1218## my $xml_out = "";
1219
1220 binmode($MOUT,":utf8");
1221 $parser->filter($xml_in,$MOUT, $options);
1222
1223# binmode(MOUT,":utf8");
1224# print MOUT $xml_out;
1225 close($MOUT);
1226 }
1227 }
1228
1229 # copy across any custom error information that was stored during parsing
1230 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1231}
1232
1233sub edit_doc_xml
1234{
1235 my $self = shift @_;
1236 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1237
1238 my $info_mess = <<RAWEND;
1239****************************
1240 edit_doc_xml()
1241****************************
1242doc_xml_filename = $doc_xml_filename
1243metaname = $metaname
1244metavalue = $metavalue
1245metapos = $metapos
1246metamode = $metamode
1247opt_secnum = $opt_secnum
1248prevmetavalue = $prevmetavalue
1249****************************
1250RAWEND
1251
1252 $gsdl_cgi->generate_message($info_mess);
1253
1254 # To monitor which section/subsection number we are in
1255 my @start_rules =
1256 ( 'Section' => \&dxml_start_section );
1257
1258 # use XML::Rules to add it in (read in and out again)
1259 # Set the call back functions
1260 my @rules =
1261 ( _default => 'raw',
1262 'Metadata' => \&dxml_metadata,
1263 'Description' => \&dxml_description,
1264 'Archive' => \&dxml_archive); # just for catching errors at end
1265
1266 # Sets the parameters
1267 my $options = { 'metaname' => $metaname,
1268 'metapos' => $metapos,
1269 'metavalue' => $metavalue,
1270 'metamode' => $metamode,
1271 'prevmetavalue' => $prevmetavalue };
1272
1273 if (defined $opt_secnum) {
1274 $options->{'secnum'} = $opt_secnum;
1275 }
1276
1277 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1278}
1279
1280sub set_archives_metadata_entry
1281{
1282 my $self = shift @_;
1283 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1284
1285 my $info_mess = <<RAWEND;
1286****************************
1287 set_archives_metadata_entry()
1288****************************
1289archive_dir = $archive_dir
1290collect_dir = $collect_dir
1291collect = $collect
1292infodbtype = $infodbtype
1293docid = $docid
1294metaname = $metaname
1295metapos = $metapos
1296metavalue = $metavalue
1297metamode = $metamode
1298prevmetavalue = $prevmetavalue
1299****************************
1300RAWEND
1301
1302 $gsdl_cgi->generate_message($info_mess);
1303
1304 # Obtain the doc.xml path for the specified docID
1305 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1306
1307 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1308 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1309 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1310
1311 # The $doc_xml_file is relative to the archives, and now let's get the full path
1312 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1313 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1314
1315 # Edit the doc.xml file with the specified metadata name, value and position.
1316 # TODO: there is a potential problem here as this edit_doc_xml function
1317 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1318 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1319
1320 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1321 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1322
1323 # return 0; # return 0 for now to indicate no error
1324 return (defined $self->{'error_msg'}) ? 1 : 0;
1325}
1326
1327
1328sub set_archives_metadata
1329{
1330 my $self = shift @_;
1331
1332 my $username = $self->{'username'};
1333 my $collect = $self->{'collect'};
1334 my $gsdl_cgi = $self->{'gsdl_cgi'};
1335
1336 if ($baseaction::authentication_enabled) {
1337 # Ensure the user is allowed to edit this collection
1338 $self->authenticate_user($username, $collect);
1339 }
1340
1341 # Make sure the collection isn't locked by someone else
1342 $self->lock_collection($username, $collect);
1343
1344 $self->_set_archives_metadata(@_);
1345
1346 # Release the lock once it is done
1347 $self->unlock_collection($username, $collect);
1348}
1349
1350
1351sub set_archives_metadata_array
1352{
1353 my $self = shift @_;
1354
1355 my $username = $self->{'username'};
1356 my $collect = $self->{'collect'};
1357 my $gsdl_cgi = $self->{'gsdl_cgi'};
1358 my $gsdlhome = $self->{'gsdlhome'};
1359
1360 if ($baseaction::authentication_enabled) {
1361 # Ensure the user is allowed to edit this collection
1362 &authenticate_user($gsdl_cgi, $username, $collect);
1363 }
1364
1365 my $site = $self->{'site'};
1366 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1367
1368 $gsdl_cgi->checked_chdir($collect_dir);
1369
1370 # Obtain the collect dir
1371 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1372
1373 # Make sure the collection isn't locked by someone else
1374 $self->lock_collection($username, $collect);
1375
1376 # look up additional args
1377
1378 my $infodbtype = $self->{'infodbtype'};
1379
1380 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1381
1382 my $json_str = $self->{'json'};
1383 my $doc_array = decode_json $json_str;
1384
1385
1386 my $global_status = 0;
1387 my $global_mess = "";
1388
1389 my @all_docids = ();
1390
1391 foreach my $doc_array_rec ( @$doc_array ) {
1392
1393 my $docid = $doc_array_rec->{'docid'};
1394 my $metaname = $doc_array_rec->{'metaname'};
1395 my $metapos = $doc_array_rec->{'metapos'};
1396 my $metamode = $self->{'metamode'};
1397 my $metavalue = $doc_array_rec->{'metavalue'};
1398
1399 # Some sanity checks
1400 $metapos = 0 if (!defined $metapos);
1401
1402 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1403 # make "accumulate" the default (less destructive, as won't actually
1404 # delete any existing values)
1405 $metamode = "accumulate";
1406 }
1407
1408 push(@all_docids,$docid);
1409
1410 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1411 $metaname,$metapos,$metavalue,$metamode);
1412
1413 if ($status != 0) {
1414 # Catch error if set infodb entry failed
1415 $global_status = $status;
1416 $global_mess .= "Failed to set metadata key: $docid\n";
1417 $global_mess .= "Exit status: $status\n";
1418 $global_mess .= "System Error Message: $!\n";
1419 $global_mess .= "-" x 20 . "\n";
1420 }
1421 }
1422
1423 if ($global_status != 0) {
1424 $global_mess .= "PATH: $ENV{'PATH'}\n";
1425 $gsdl_cgi->generate_error($global_mess);
1426 }
1427 else {
1428 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1429 $gsdl_cgi->generate_ok_message($mess);
1430 }
1431
1432 # Release the lock once it is done
1433 $self->unlock_collection($username, $collect);
1434}
1435
1436sub _remove_archives_metadata
1437{
1438 my $self = shift @_;
1439
1440 my $collect = $self->{'collect'};
1441 my $gsdl_cgi = $self->{'gsdl_cgi'};
1442# my $gsdlhome = $self->{'gsdlhome'};
1443 my $infodbtype = $self->{'infodbtype'};
1444
1445 my $site = $self->{'site'};
1446
1447 # Obtain the collect and archive dir
1448 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1449
1450 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1451
1452 # look up additional args
1453 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1454
1455 my $metaname = $self->{'metaname'};
1456 my $metapos = $self->{'metapos'};
1457 $metapos = 0 if (!defined $metapos);
1458
1459 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1460 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1461
1462 # This now stores the full pathname
1463 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1464
1465 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1466
1467 if ($status == 0)
1468 {
1469 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1470 $mess .= " $metaname";
1471 $mess .= "->[$metapos]" if (defined $metapos);
1472
1473 $gsdl_cgi->generate_ok_message($mess);
1474 }
1475 else
1476 {
1477 my $mess .= "Failed to remove archives metadata key: $docid\n";
1478 $mess .= "Exit status: $status\n";
1479 $mess .= "System Error Message: $!\n";
1480 $mess .= "-" x 20 . "\n";
1481
1482 $gsdl_cgi->generate_error($mess);
1483 }
1484
1485 #return $status; # in case calling functions have a use for this
1486}
1487
1488sub remove_archives_metadata
1489{
1490 my $self = shift @_;
1491
1492 my $username = $self->{'username'};
1493 my $collect = $self->{'collect'};
1494 my $gsdl_cgi = $self->{'gsdl_cgi'};
1495
1496 if ($baseaction::authentication_enabled)
1497 {
1498 # Ensure the user is allowed to edit this collection
1499 &authenticate_user($gsdl_cgi, $username, $collect);
1500 }
1501
1502 # Make sure the collection isn't locked by someone else
1503 $self->lock_collection($username, $collect);
1504
1505 $self->_remove_archives_metadata(@_);
1506
1507 # Release the lock once it is done
1508 $self->unlock_collection($username, $collect);
1509}
1510
1511sub remove_from_doc_xml
1512{
1513 my $self = shift @_;
1514 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
1515
1516 my @start_rules = ('Section' => \&dxml_start_section);
1517
1518 # Set the call-back functions for the metadata tags
1519 my @rules =
1520 (
1521 _default => 'raw',
1522 'Metadata' => \&rfdxml_metadata
1523 );
1524
1525 my $parser = XML::Rules->new
1526 (
1527 start_rules => \@start_rules,
1528 rules => \@rules,
1529 style => 'filter',
1530 output_encoding => 'utf8'
1531 );
1532
1533 my $status = 0;
1534 my $xml_in = "";
1535 if (!open(MIN,"<$doc_xml_filename"))
1536 {
1537 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1538 $status = 1;
1539 }
1540 else
1541 {
1542 # Read them in
1543 my $line;
1544 while (defined ($line=<MIN>)) {
1545 $xml_in .= $line;
1546 }
1547 close(MIN);
1548
1549 # Filter with the call-back functions
1550 my $xml_out = "";
1551
1552 my $MOUT;
1553 if (!open($MOUT,">$doc_xml_filename")) {
1554 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1555 $status = 1;
1556 }
1557 else {
1558 binmode($MOUT,":utf8");
1559 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
1560 close($MOUT);
1561 }
1562 }
1563 return $status;
1564}
1565
1566sub rfdxml_metadata
1567{
1568 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1569
1570 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1571 {
1572 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1573 return [$tagname => $attrHash];
1574 }
1575
1576 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1577 {
1578 if (!defined $parser->{'parameters'}->{'poscount'})
1579 {
1580 $parser->{'parameters'}->{'poscount'} = 0;
1581 }
1582 else
1583 {
1584 $parser->{'parameters'}->{'poscount'}++;
1585 }
1586 }
1587
1588 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1589 {
1590 return [];
1591 }
1592
1593 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1594 {
1595 return [];
1596 }
1597
1598 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1599 return [$tagname => $attrHash];
1600}
1601
1602sub mxml_metadata
1603{
1604 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1605 my $metaname = $parser->{'parameters'}->{'metaname'};
1606 my $metamode = $parser->{'parameters'}->{'metamode'};
1607
1608 # Report error if we don't see FileName tag before this
1609 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1610
1611 # Don't do anything if we are not in the right FileSet
1612 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1613 if ($file_regexp =~ /\.\*/) {
1614 # Only interested in a file_regexp if it specifies precisely one
1615 # file.
1616 # So, skip anything with a .* in it as it is too general
1617 return [$tagname => $attrHash];
1618 }
1619 my $src_file = $parser->{'parameters'}->{'src_file'};
1620 if (!($src_file =~ /$file_regexp/)) {
1621 return [$tagname => $attrHash];
1622 }
1623## print STDERR "*** mxl metamode = $metamode\n";
1624
1625 # Find the right metadata tag and checks if we are going to override it
1626 my $name_attr = $attrHash->{'name'};
1627 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1628 # Get the value and override the current value
1629 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1630 $attrHash->{'_content'} = $metavalue;
1631
1632## print STDERR "**** overrideing metadata.xml\n";
1633
1634 # Don't want it to wipe out any other pieces of metadata
1635 $parser->{'parameters'}->{'metamode'} = "done";
1636 }
1637
1638 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1639 return [$tagname => $attrHash];
1640}
1641
1642
1643sub mxml_description
1644{
1645 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1646 my $metamode = $parser->{'parameters'}->{'metamode'};
1647
1648 # Failed... Report error if we don't see FileName tag before this
1649 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1650
1651 # Don't do anything if we are not in the right FileSet
1652 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1653 if ($file_regexp =~ m/\.\*/) {
1654 # Only interested in a file_regexp if it specifies precisely one
1655 # file.
1656 # So, skip anything with a .* in it as it is too general
1657 return [$tagname => $attrHash];
1658 }
1659 my $src_file = $parser->{'parameters'}->{'src_file'};
1660
1661 if (!($src_file =~ m/$file_regexp/)) {
1662 return [$tagname => $attrHash];
1663 }
1664
1665 # Accumulate the metadata block to the end of the description block
1666 # Note: This adds metadata block to all description blocks, so if there are
1667 # multiple FileSets, it will add to all of them
1668 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1669 # if metamode was "override" but get to here then it failed to
1670 # find an item to override, in which case it should append its
1671 # value to the end, just like the "accumulate" mode
1672
1673 # tack a new metadata tag on to the end of the <Metadata>+ block
1674 my $metaname = $parser->{'parameters'}->{'metaname'};
1675 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1676
1677 my $metadata_attr = { '_content' => $metavalue,
1678 'name' => $metaname,
1679 'mode' => "accumulate" };
1680
1681 my $append_metadata = [ "Metadata" => $metadata_attr ];
1682 my $description_content = $attrHash->{'_content'};
1683
1684## print STDERR "*** appending to metadata.xml\n";
1685
1686 # append the new metadata element to the end of the current
1687 # content contained inside this tag
1688 if (ref($description_content) eq "") {
1689 # => string or numeric literal
1690 # this is caused by a <Description> block has no <Metadata> child elements
1691 # => set up an empty array in '_content'
1692 $attrHash->{'_content'} = [ "\n" ];
1693 $description_content = $attrHash->{'_content'};
1694 }
1695
1696 push(@$description_content," ", $append_metadata ,"\n ");
1697 $parser->{'parameters'}->{'metamode'} = "done";
1698 }
1699
1700 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1701 return [$tagname => $attrHash];
1702}
1703
1704
1705sub mxml_filename
1706{
1707 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1708
1709 # Store the filename of the Current Fileset
1710 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1711 # FileName tag must come before Description tag
1712 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1713
1714 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1715 return [$tagname => $attrHash];
1716}
1717
1718
1719sub mxml_fileset
1720{
1721 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1722
1723 # Initilise the current_file
1724 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1725 # FileName tag must come before Description tag
1726 $parser->{'parameters'}->{'current_file'} = "";
1727
1728 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1729 return [$tagname => $attrHash];
1730}
1731
1732
1733sub edit_metadata_xml
1734{
1735 my $self = shift @_;
1736 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
1737
1738 # Set the call-back functions for the metadata tags
1739 my @rules =
1740 ( _default => 'raw',
1741 'FileName' => \&mxml_filename,
1742 'Metadata' => \&mxml_metadata,
1743 'Description' => \&mxml_description,
1744 'FileSet' => \&mxml_fileset);
1745
1746 # use XML::Rules to add it in (read in and out again)
1747 my $parser = XML::Rules->new(rules => \@rules,
1748 style => 'filter',
1749 output_encoding => 'utf8');
1750
1751 if (!-e $metadata_xml_filename) {
1752
1753 if (open(MOUT,">$metadata_xml_filename")) {
1754
1755 my $src_file_re = &util::filename_to_regex($src_file);
1756 # shouldn't the following also be in the above utility routine??
1757 # $src_file_re =~ s/\./\\./g;
1758
1759 print MOUT "<?xml version=\"1.0\"?>\n";
1760 print MOUT "<DirectoryMetadata>\n";
1761 print MOUT " <FileSet>\n";
1762 print MOUT " <FileName>$src_file_re</FileName>\n";
1763 print MOUT " <Description>\n";
1764 print MOUT " </Description>\n";
1765 print MOUT " </FileSet>\n";
1766 print MOUT "</DirectoryMetadata>\n";
1767
1768 close(MOUT);
1769 }
1770 else {
1771 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1772 }
1773 }
1774
1775
1776 my $xml_in = "";
1777 if (!open(MIN,"<$metadata_xml_filename")) {
1778 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1779 }
1780 else {
1781 # Read them in
1782 my $line;
1783 while (defined ($line=<MIN>)) {
1784 $xml_in .= $line;
1785 }
1786 close(MIN);
1787
1788 # Filter with the call-back functions
1789 my $xml_out = "";
1790
1791 my $MOUT;
1792 if (!open($MOUT,">$metadata_xml_filename")) {
1793 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1794 }
1795 else {
1796 binmode($MOUT,":utf8");
1797
1798 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1799 # At the moment, I will just hack it!
1800 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1801 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1802 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1803 #print MOUT $xml_out;
1804
1805 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1806 metavalue => $metavalue,
1807 metamode => $metamode,
1808 src_file => $src_file,
1809 current_file => undef} );
1810 close($MOUT);
1811 }
1812 }
1813}
1814
1815
1816sub set_import_metadata
1817{
1818 my $self = shift @_;
1819
1820 my $username = $self->{'username'};
1821 my $collect = $self->{'collect'};
1822 my $gsdl_cgi = $self->{'gsdl_cgi'};
1823
1824 if ($baseaction::authentication_enabled) {
1825 # Ensure the user is allowed to edit this collection
1826 $self->authenticate_user($username, $collect);
1827 }
1828
1829 # Make sure the collection isn't locked by someone else
1830 $self->lock_collection($username, $collect);
1831
1832 $self->_set_import_metadata(@_);
1833
1834 # Release the lock once it is done
1835 $self->unlock_collection($username, $collect);
1836
1837}
1838
1839sub set_import_metadata_array
1840{
1841 my $self = shift @_;
1842
1843 my $username = $self->{'username'};
1844 my $collect = $self->{'collect'};
1845 my $gsdl_cgi = $self->{'gsdl_cgi'};
1846 my $gsdlhome = $self->{'gsdlhome'};
1847
1848 if ($baseaction::authentication_enabled) {
1849 # Ensure the user is allowed to edit this collection
1850 &authenticate_user($gsdl_cgi, $username, $collect);
1851 }
1852
1853 my $site = $self->{'site'};
1854 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1855
1856 $gsdl_cgi->checked_chdir($collect_dir);
1857
1858 # Make sure the collection isn't locked by someone else
1859 $self->lock_collection($username, $collect);
1860
1861 # look up additional args
1862
1863 my $infodbtype = $self->{'infodbtype'};
1864
1865 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1866
1867 my $json_str = $self->{'json'};
1868 my $doc_array = decode_json $json_str;
1869
1870 my $global_status = 0;
1871 my $global_mess = "";
1872
1873 my @all_docids = ();
1874
1875 foreach my $doc_array_rec ( @$doc_array )
1876 {
1877 my $docid = $doc_array_rec->{'docid'};
1878 my $metaname = $doc_array_rec->{'metaname'};
1879 my $metamode = $self->{'metamode'};
1880 my $metavalue = $doc_array_rec->{'metavalue'};
1881
1882 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1883 # make "accumulate" the default (less destructive, as won't actually
1884 # delete any existing values)
1885 $metamode = "accumulate";
1886 }
1887
1888 push(@all_docids,$docid);
1889
1890 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1891 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1892 my $metadata_xml_file;
1893 my $import_filename = undef;
1894
1895 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1896 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1897
1898 # This now stores the full pathname
1899 $import_filename = $doc_rec->{'src-file'}->[0];
1900
1901 # figure out correct metadata.xml file [?]
1902 # Assuming the metadata.xml file is next to the source file
1903 # Note: This will not work if it is using the inherited metadata from the parent folder
1904 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1905 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1906
1907 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1908 }
1909
1910 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1911 $gsdl_cgi->generate_ok_message($mess);
1912
1913 # Release the lock once it is done
1914 $self->unlock_collection($username, $collect);
1915}
1916
1917sub _remove_import_metadata
1918{
1919 my $self = shift @_;
1920
1921 my $collect = $self->{'collect'};
1922 my $gsdl_cgi = $self->{'gsdl_cgi'};
1923 my $gsdlhome = $self->{'gsdlhome'};
1924 my $infodbtype = $self->{'infodbtype'};
1925
1926 # Obtain the collect dir
1927 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1928 my $site = $self->{'site'};
1929 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1930
1931 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1932 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1933
1934 # look up additional args
1935 my $docid = $self->{'d'};
1936 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1937 {
1938 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
1939 }
1940
1941 my $metaname = $self->{'metaname'};
1942 my $metavalue = $self->{'metavalue'};
1943 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1944
1945 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1946 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1947 my $metadata_xml_file;
1948 my $import_filename = undef;
1949 if (defined $docid)
1950 {
1951 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1952 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1953
1954 # This now stores the full pathname
1955 $import_filename = $doc_rec->{'src-file'}->[0];
1956 }
1957
1958 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1959 {
1960 $gsdl_cgi->generate_error_message("There is no metadata\n");
1961 }
1962
1963 # figure out correct metadata.xml file [?]
1964 # Assuming the metadata.xml file is next to the source file
1965 # Note: This will not work if it is using the inherited metadata from the parent folder
1966 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1967 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1968
1969 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1970
1971 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1972 $mess .= " $metaname";
1973 $mess .= " = $metavalue\n";
1974
1975 $gsdl_cgi->generate_ok_message($mess);
1976
1977 #return $status; # in case calling functions have a use for this
1978}
1979
1980sub remove_import_metadata
1981{
1982 my $self = shift @_;
1983
1984 my $username = $self->{'username'};
1985 my $collect = $self->{'collect'};
1986 my $gsdl_cgi = $self->{'gsdl_cgi'};
1987
1988 if ($baseaction::authentication_enabled) {
1989 # Ensure the user is allowed to edit this collection
1990 &authenticate_user($gsdl_cgi, $username, $collect);
1991 }
1992
1993 # Make sure the collection isn't locked by someone else
1994 $self->lock_collection($username, $collect);
1995
1996 $self->_remove_import_metadata(@_);
1997
1998 # Release the lock once it is done
1999 $self->unlock_collection($username, $collect);
2000
2001}
2002
2003sub remove_from_metadata_xml
2004{
2005 my $self = shift @_;
2006 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
2007
2008 # Set the call-back functions for the metadata tags
2009 my @rules =
2010 (
2011 _default => 'raw',
2012 'Metadata' => \&rfmxml_metadata,
2013 'FileName' => \&mxml_filename
2014 );
2015
2016 my $parser = XML::Rules->new
2017 (
2018 rules => \@rules,
2019 style => 'filter',
2020 output_encoding => 'utf8'
2021 );
2022
2023 my $xml_in = "";
2024 if (!open(MIN,"<$metadata_xml_filename"))
2025 {
2026 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2027 }
2028 else
2029 {
2030 # Read them in
2031 my $line;
2032 while (defined ($line=<MIN>)) {
2033 $xml_in .= $line;
2034 }
2035 close(MIN);
2036
2037 # Filter with the call-back functions
2038 my $xml_out = "";
2039
2040 my $MOUT;
2041 if (!open($MOUT,">$metadata_xml_filename")) {
2042 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2043 }
2044 else {
2045 binmode($MOUT,":utf8");
2046 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
2047 close($MOUT);
2048 }
2049 }
2050}
2051
2052sub rfmxml_metadata
2053{
2054 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2055
2056 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2057 {
2058 return [];
2059 }
2060
2061 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2062 return [$tagname => $attrHash];
2063}
2064
2065sub _remove_live_metadata
2066{
2067 my $self = shift @_;
2068
2069 my $collect = $self->{'collect'};
2070 my $gsdl_cgi = $self->{'gsdl_cgi'};
2071# my $gsdlhome = $self->{'gsdlhome'};
2072 my $infodbtype = $self->{'infodbtype'};
2073
2074 # Obtain the collect dir
2075 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2076 my $site = $self->{'site'};
2077 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2078
2079
2080 # look up additional args
2081 my $docid = $self->{'d'};
2082 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2083 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2084 }
2085
2086 # Generate the dbkey
2087 my $metaname = $self->{'metaname'};
2088 my $dbkey = "$docid.$metaname";
2089
2090 # To people who know $collect_tail please add some comments
2091 # Obtain the live gdbm_db path
2092 my $collect_tail = $collect;
2093 $collect_tail =~ s/^.*[\/\\]//;
2094 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2095 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2096
2097 # Remove the key
2098 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2099 my $status = system($cmd);
2100 if ($status != 0) {
2101 # Catch error if gdbmdel failed
2102 my $mess = "Failed to set metadata key: $dbkey\n";
2103
2104 $mess .= "PATH: $ENV{'PATH'}\n";
2105 $mess .= "cmd = $cmd\n";
2106 $mess .= "Exit status: $status\n";
2107 $mess .= "System Error Message: $!\n";
2108
2109 $gsdl_cgi->generate_error($mess);
2110 }
2111 else {
2112 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2113 }
2114
2115}
2116
2117sub remove_live_metadata
2118{
2119 my $self = shift @_;
2120
2121 my $username = $self->{'username'};
2122 my $collect = $self->{'collect'};
2123 my $gsdl_cgi = $self->{'gsdl_cgi'};
2124 my $gsdlhome = $self->{'gsdlhome'};
2125
2126 if ($baseaction::authentication_enabled) {
2127 # Ensure the user is allowed to edit this collection
2128 &authenticate_user($gsdl_cgi, $username, $collect);
2129 }
2130
2131 # Make sure the collection isn't locked by someone else
2132 $self->lock_collection($username, $collect);
2133
2134 $self->_remove_live_metadata(@_);
2135
2136 $self->unlock_collection($username, $collect);
2137}
2138
2139sub remove_metadata
2140{
2141 my $self = shift @_;
2142
2143 my $where = $self->{'where'};
2144 if(!$where) {
2145 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2146 return;
2147 }
2148
2149 my $username = $self->{'username'};
2150 my $collect = $self->{'collect'};
2151 my $gsdl_cgi = $self->{'gsdl_cgi'};
2152
2153 if ($baseaction::authentication_enabled) {
2154 # Ensure the user is allowed to edit this collection
2155 &authenticate_user($gsdl_cgi, $username, $collect);
2156 }
2157
2158 # Make sure the collection isn't locked by someone else
2159 $self->lock_collection($username, $collect);
2160
2161 # check which directories need to be processed, specified in $where as
2162 # any combination of import|archives|index|live
2163 if($where =~ m/import/) {
2164 $self->_remove_import_metadata(@_);
2165 }
2166 if($where =~ m/archives/) {
2167 $self->_remove_archives_metadata(@_);
2168 }
2169 if($where =~ m/index/) {
2170 $self->_remove_index_metadata(@_);
2171 }
2172
2173 # Release the lock once it is done
2174 $self->unlock_collection($username, $collect);
2175}
2176
2177# the internal version, without authentication
2178sub _remove_index_metadata
2179{
2180 my $self = shift @_;
2181
2182 my $collect = $self->{'collect'};
2183 my $gsdl_cgi = $self->{'gsdl_cgi'};
2184# my $gsdlhome = $self->{'gsdlhome'};
2185 my $infodbtype = $self->{'infodbtype'};
2186
2187 # Obtain the collect dir
2188 my $site = $self->{'site'};
2189 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2190 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2191
2192 # look up additional args
2193 my $docid = $self->{'d'};
2194 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2195 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2196 }
2197 my $metaname = $self->{'metaname'};
2198 my $metapos = $self->{'metapos'};
2199 my $metavalue = $self->{'metavalue'};
2200
2201 # To people who know $collect_tail please add some comments
2202 # Obtain the path to the database
2203 my $collect_tail = $collect;
2204 $collect_tail =~ s/^.*[\/\\]//;
2205 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2206 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2207
2208 # Read the docid entry
2209 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2210
2211 # Basically loop through and unescape_html the values
2212 foreach my $k (keys %$doc_rec) {
2213 my @escaped_v = ();
2214 foreach my $v (@{$doc_rec->{$k}}) {
2215 if ($k eq "contains") {
2216 # protect quotes in ".2;".3 etc
2217 $v =~ s/\"/\\\"/g;
2218 push(@escaped_v, $v);
2219 }
2220 else {
2221 my $ev = &ghtml::unescape_html($v);
2222 $ev =~ s/\"/\\\"/g;
2223 push(@escaped_v, $ev);
2224 }
2225 }
2226 $doc_rec->{$k} = \@escaped_v;
2227 }
2228
2229 # Check to make sure the key does exist
2230 if (!defined ($doc_rec->{$metaname})) {
2231 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2232 }
2233
2234 # Obtain the specified metadata pos
2235 $metapos = 0 if (!defined $metapos);
2236
2237 # consider check key is defined before deleting?
2238 # Loop through the metadata array and ignore the specified position
2239 my $filtered_metadata = [];
2240 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2241 for (my $i=0; $i<$num_metadata_vals; $i++) {
2242 my $metaval = shift(@{$doc_rec->{$metaname}});
2243
2244 if (!defined $metavalue && $i != $metapos) {
2245 push(@$filtered_metadata,$metaval)
2246 }
2247
2248 if(defined $metavalue && !($metavalue eq $metaval))
2249 {
2250 push(@$filtered_metadata,$metavalue)
2251 }
2252 }
2253 $doc_rec->{$metaname} = $filtered_metadata;
2254
2255 # Turn the record back to string
2256 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2257
2258 # Store it back to the database
2259 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2260 my $status = system($cmd);
2261 if ($status != 0) {
2262 my $mess = "Failed to set metadata key: $docid\n";
2263
2264 $mess .= "PATH: $ENV{'PATH'}\n";
2265 $mess .= "cmd = $cmd\n";
2266 $mess .= "Exit status: $status\n";
2267 $mess .= "System Error Message: $!\n";
2268
2269 $gsdl_cgi->generate_error($mess);
2270 }
2271 else {
2272 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2273 $mess .= " $metaname";
2274 $mess .= "->[$metapos]" if (defined $metapos);
2275
2276 $gsdl_cgi->generate_ok_message($mess);
2277 }
2278
2279 #return $status; # in case calling functions have a use for this
2280}
2281
2282sub remove_index_metadata
2283{
2284 my $self = shift @_;
2285
2286 my $username = $self->{'username'};
2287 my $collect = $self->{'collect'};
2288 my $gsdl_cgi = $self->{'gsdl_cgi'};
2289# my $gsdlhome = $self->{'gsdlhome'};
2290
2291 if ($baseaction::authentication_enabled) {
2292 # Ensure the user is allowed to edit this collection
2293 &authenticate_user($gsdl_cgi, $username, $collect);
2294 }
2295
2296 # Obtain the collect dir
2297 my $site = $self->{'site'};
2298 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2299 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2300
2301 # Make sure the collection isn't locked by someone else
2302 $self->lock_collection($username, $collect);
2303
2304 $self->_remove_index_metadata(@_);
2305
2306 # Release the lock once it is done
2307 $self->unlock_collection($username, $collect);
2308}
2309
2310
2311# Was trying to reused the codes, but the functions need to be broken
2312# down more before they can be reused, otherwise there will be too
2313# much overhead and duplicate process...
2314sub insert_metadata
2315{
2316 my $self = shift @_;
2317
2318 my $username = $self->{'username'};
2319 my $collect = $self->{'collect'};
2320 my $gsdl_cgi = $self->{'gsdl_cgi'};
2321 my $gsdlhome = $self->{'gsdlhome'};
2322 my $infodbtype = $self->{'infodbtype'};
2323
2324 # If the import metadata and gdbm database have been updated, we
2325 # need to insert some notification to warn user that the the text
2326 # they see at the moment is not indexed and require a rebuild.
2327 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2328
2329 if ($baseaction::authentication_enabled) {
2330 # Ensure the user is allowed to edit this collection
2331 $self->authenticate_user($username, $collect);
2332 }
2333
2334 # Obtain the collect and archive dir
2335 my $site = $self->{'site'};
2336 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2337 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2338 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2339
2340 # Make sure the collection isn't locked by someone else
2341 $self->lock_collection($username, $collect);
2342
2343 # Check additional args
2344 my $docid = $self->{'d'};
2345 if (!defined($docid)) {
2346 $gsdl_cgi->generate_error("No document id is specified: d=...");
2347 }
2348 my $metaname = $self->{'metaname'};
2349 if (!defined($metaname)) {
2350 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2351 }
2352 my $metavalue = $self->{'metavalue'};
2353 if (!defined($metavalue) || $metavalue eq "") {
2354 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2355 }
2356 # make "accumulate" the default (less destructive, as won't actually
2357 # delete any existing values)
2358 my $metamode = "accumulate";
2359
2360 #=======================================================================#
2361 # set_import_metadata [START]
2362 #=======================================================================#
2363 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2364 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2365 my $metadata_xml_file;
2366 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2367 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2368
2369 # This now stores the full pathname
2370 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2371
2372 # figure out correct metadata.xml file [?]
2373 # Assuming the metadata.xml file is next to the source file
2374 # Note: This will not work if it is using the inherited metadata from the parent folder
2375 my ($import_tailname, $import_dirname)
2376 = File::Basename::fileparse($import_filename);
2377 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2378
2379 # Shane's escape characters
2380 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2381 $metavalue =~ s/\,/&#44;/g;
2382 $metavalue =~ s/\:/&#58;/g;
2383 $metavalue =~ s/\|/&#124;/g;
2384 $metavalue =~ s/\(/&#40;/g;
2385 $metavalue =~ s/\)/&#41;/g;
2386 $metavalue =~ s/\[/&#91;/g;
2387 $metavalue =~ s/\\/&#92;/g;
2388 $metavalue =~ s/\]/&#93;/g;
2389 $metavalue =~ s/\{/&#123;/g;
2390 $metavalue =~ s/\}/&#125;/g;
2391 $metavalue =~ s/\"/&#34;/g;
2392 $metavalue =~ s/\`/&#96;/g;
2393 $metavalue =~ s/\n/_newline_/g;
2394
2395 # Edit the metadata.xml
2396 # Modified by Jeffrey from DL Consulting
2397 # Handle the case where there is one metadata.xml file for multiple FileSets
2398 # The XML filter needs to know whether it is in the right FileSet
2399 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2400 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2401 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2402 $metaname, $metavalue, $metamode, $import_tailname);
2403 #=======================================================================#
2404 # set_import_metadata [END]
2405 #=======================================================================#
2406
2407
2408 #=======================================================================#
2409 # set_metadata (accumulate version) [START]
2410 #=======================================================================#
2411 # To people who know $collect_tail please add some comments
2412 # Obtain path to the database
2413 my $collect_tail = $collect;
2414 $collect_tail =~ s/^.*[\/\\]//;
2415 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2416 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2417
2418 # Read the docid entry
2419 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2420
2421 foreach my $k (keys %$doc_rec) {
2422 my @escaped_v = ();
2423 foreach my $v (@{$doc_rec->{$k}}) {
2424 if ($k eq "contains") {
2425 # protect quotes in ".2;".3 etc
2426 $v =~ s/\"/\\\"/g;
2427 push(@escaped_v, $v);
2428 }
2429 else {
2430 my $ev = &ghtml::unescape_html($v);
2431 $ev =~ s/\"/\\\"/g;
2432 push(@escaped_v, $ev);
2433 }
2434 }
2435 $doc_rec->{$k} = \@escaped_v;
2436 }
2437
2438 # Protect the quotes
2439 $metavalue =~ s/\"/\\\"/g;
2440
2441 # Adds the pending macro
2442 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2443
2444 # If the metadata doesn't exist, create a new one
2445 if (!defined($doc_rec->{$metaname})){
2446 $doc_rec->{$metaname} = [ $macro_metavalue ];
2447 }
2448 # Else, let's acculumate the values
2449 else {
2450 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2451 }
2452
2453 # Generate the record string
2454 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2455
2456 # Store it into GDBM
2457 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2458 my $status = system($cmd);
2459 if ($status != 0) {
2460 # Catch error if gdbmget failed
2461 my $mess = "Failed to set metadata key: $docid\n";
2462
2463 $mess .= "PATH: $ENV{'PATH'}\n";
2464 $mess .= "cmd = $cmd\n";
2465 $mess .= "Exit status: $status\n";
2466 $mess .= "System Error Message: $!\n";
2467
2468 $gsdl_cgi->generate_error($mess);
2469 }
2470 else {
2471 my $mess = "insert-metadata successful: Key[$docid]\n";
2472 $mess .= " [In metadata.xml] $metaname";
2473 $mess .= " = $metavalue\n";
2474 $mess .= " [In database] $metaname";
2475 $mess .= " = $macro_metavalue\n";
2476 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2477 $gsdl_cgi->generate_ok_message($mess);
2478 }
2479 #=======================================================================#
2480 # set_metadata (accumulate version) [END]
2481 #=======================================================================#
2482
2483 # Release the lock once it is done
2484 $self->unlock_collection($username, $collect);
2485}
2486
24871;
Note: See TracBrowser for help on using the repository browser.