source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 25097

Last change on this file since 25097 was 25097, checked in by sjm84, 12 years ago

More changes to metadataaction as well as sorting the keys when printing

File size: 65.7 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-live-metadata" => {
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ ] },
63
64 #SET METHODS
65 "set-live-metadata" => {
66 'compulsory-args' => [ "d", "metaname", "metavalue" ],
67 'optional-args' => [ ] },
68
69 "set-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ "metapos" ] },
72
73 "set-archives-metadata" => {
74 'compulsory-args' => [ "d", "metaname", "metavalue" ],
75 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
76
77 "set-import-metadata" => {
78 'compulsory-args' => [ "metaname", "metavalue" ],
79 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
80
81 #SET METHODS (ARRAY)
82 "set-metadata-array" => {
83 'compulsory-args' => [ "json" ],
84 'optional-args' => [ ] },
85
86 "set-archives-metadata-array" => {
87 'compulsory-args' => [ "json" ],
88 'optional-args' => [ ] },
89
90 "set-import-metadata-array" => {
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ ] },
93
94 #REMOVE METHODS
95 "remove-import-metadata" => {
96 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
97 'optional-args' => [ ] },
98
99 "remove-archives-metadata" => {
100 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
101 'optional-args' => [ "metapos", "metavalue" ] },
102
103 "remove-live-metadata" => {
104 'compulsory-args' => [ "d", "metaname" ],
105 'optional-args' => [ ] },
106
107 "remove-metadata" => {
108 'compulsory-args' => [ "d", "metaname" ],
109 'optional-args' => [ "metapos", "metavalue" ] },
110
111 #INSERT METHODS
112 "insert-metadata" => {
113 'compulsory-args' => [ "d", "metaname", "metavalue" ],
114 'optional-args' => [ ] }
115};
116
117
118sub new
119{
120 my $class = shift (@_);
121 my ($gsdl_cgi,$iis6_mode) = @_;
122
123 # Treat metavalue specially. To transmit this through a GET request
124 # the Javascript side has url-encoded it, so here we need to decode
125 # it before proceeding
126
127 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
128 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
129
130 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
131
132 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
133
134 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
135
136 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
137
138 return bless $self, $class;
139}
140
141
142sub get_live_metadata
143{
144 my $self = shift @_;
145
146 my $username = $self->{'username'};
147 my $collect = $self->{'collect'};
148 my $gsdl_cgi = $self->{'gsdl_cgi'};
149 my $gsdlhome = $self->{'gsdlhome'};
150 my $infodbtype = $self->{'infodbtype'};
151
152 # live metadata gets/saves value scoped (prefixed) by the current usename
153 # so (for now) let's not bother to enforce authentication
154
155 # Obtain the collect dir
156 my $site = $self->{'site'};
157 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
158 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
159
160 # Make sure the collection isn't locked by someone else
161 $self->lock_collection($username, $collect);
162
163 # look up additional args
164 my $docid = $self->{'d'};
165 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
166 $gsdl_cgi->generate_error("No docid (d=...) specified.");
167 }
168
169 # Generate the dbkey
170 my $metaname = $self->{'metaname'};
171 my $dbkey = "$docid.$metaname";
172
173 # To people who know $collect_tail please add some comments
174 # Obtain path to the database
175 my $collect_tail = $collect;
176 $collect_tail =~ s/^.*[\/|\\]//;
177 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
178 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
179
180 # Obtain the content of the key
181 my $cmd = "gdbmget $infodb_file_path $dbkey";
182 if (open(GIN,"$cmd |") == 0) {
183 # Catch error if gdbmget failed
184 my $mess = "Failed to get metadata key: $metaname\n";
185 $mess .= "$!\n";
186
187 $gsdl_cgi->generate_error($mess);
188 }
189 else {
190 binmode(GIN,":utf8");
191 # Read everything in and concatenate them into $metavalue
192 my $metavalue = "";
193 my $line;
194 while (defined ($line=<GIN>)) {
195 $metavalue .= $line;
196 }
197 close(GIN);
198 chomp($metavalue); # Get rid off the tailing newlines
199 $gsdl_cgi->generate_ok_message("$metavalue");
200 }
201
202 # Release the lock once it is done
203 $self->unlock_collection($username, $collect);
204}
205
206
207sub get_metadata
208{
209 my $self = shift @_;
210
211 my $username = $self->{'username'};
212 my $collect = $self->{'collect'};
213 my $gsdl_cgi = $self->{'gsdl_cgi'};
214 my $gsdlhome = $self->{'gsdlhome'};
215
216 # Authenticate user if it is enabled
217 if ($baseaction::authentication_enabled) {
218 # Ensure the user is allowed to edit this collection
219 &authenticate_user($gsdl_cgi, $username, $collect);
220 }
221
222 # Obtain the collect dir
223 my $site = $self->{'site'};
224 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
225 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
226
227 # Make sure the collection isn't locked by someone else
228 $self->lock_collection($username, $collect);
229
230 # look up additional args
231 my $docid = $self->{'d'};
232 my $metaname = $self->{'metaname'};
233 my $metapos = $self->{'metapos'};
234 my $infodbtype = $self->{'infodbtype'};
235
236 # To people who know $collect_tail please add some comments
237 # Obtain path to the database
238 my $collect_tail = $collect;
239 $collect_tail =~ s/^.*[\/\\]//;
240 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
241 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
242
243 # Read the docid entry
244 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
245
246 # Basically loop through and unescape_html the values
247 foreach my $k (keys %$doc_rec) {
248 my @escaped_v = ();
249 foreach my $v (@{$doc_rec->{$k}}) {
250 my $ev = &ghtml::unescape_html($v);
251 push(@escaped_v, $ev);
252 }
253 $doc_rec->{$k} = \@escaped_v;
254 }
255
256 # Obtain the specified metadata value
257 $metapos = 0 if (!defined $metapos);
258 my $metavalue = $doc_rec->{$metaname}->[$metapos];
259 $gsdl_cgi->generate_ok_message("$metavalue");
260
261 # Release the lock once it is done
262 $self->unlock_collection($username, $collect);
263}
264
265
266sub get_import_metadata
267{
268 my $self = shift @_;
269
270 my $username = $self->{'username'};
271 my $collect = $self->{'collect'};
272 my $gsdl_cgi = $self->{'gsdl_cgi'};
273 my $gsdlhome = $self->{'gsdlhome'};
274
275 # Authenticate user if it is enabled
276 if ($baseaction::authentication_enabled) {
277 # Ensure the user is allowed to edit this collection
278 &authenticate_user($gsdl_cgi, $username, $collect);
279 }
280
281 # Obtain the collect dir
282 my $site = $self->{'site'};
283 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
284 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
285
286 # Make sure the collection isn't locked by someone else
287 $self->lock_collection($username, $collect);
288
289 # look up additional args
290 my $docid = $self->{'d'};
291 my $metaname = $self->{'metaname'};
292 my $infodbtype = $self->{'infodbtype'};
293 if (!defined $docid)
294 {
295 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
296 }
297
298 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
299 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
300 my $metadata_xml_file;
301 my $import_filename = undef;
302
303
304 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
305 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
306 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
307
308 # This now stores the full pathname
309 $import_filename = $doc_rec->{'src-file'}->[0];
310
311 # figure out correct metadata.xml file [?]
312 # Assuming the metadata.xml file is next to the source file
313 # Note: This will not work if it is using the inherited metadata from the parent folder
314 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
315 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
316
317 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
318
319 # Release the lock once it is done
320 $self->unlock_collection($username, $collect);
321}
322
323sub get_metadata_from_metadata_xml
324{
325 my $self = shift @_;
326 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
327
328 my @rules =
329 (
330 _default => 'raw',
331 'Metadata' => \&gfmxml_metadata,
332 'FileName' => \&mxml_filename
333 );
334
335 my $parser = XML::Rules->new
336 (
337 rules => \@rules,
338 output_encoding => 'utf8'
339 );
340
341 my $xml_in = "";
342 if (!open(MIN,"<$metadata_xml_filename"))
343 {
344 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
345 }
346 else
347 {
348 # Read them in
349 my $line;
350 while (defined ($line=<MIN>)) {
351 $xml_in .= $line;
352 }
353 close(MIN);
354
355 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
356
357 if(defined $parser->{'pad'}->{'metavalue'})
358 {
359 return $parser->{'pad'}->{'metavalue'};
360 }
361 else
362 {
363 return "";
364 }
365 }
366}
367
368sub gfmxml_metadata
369{
370 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
371
372 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
373 {
374 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
375 }
376}
377
378sub get_archives_metadata
379{
380 my $self = shift @_;
381
382 my $username = $self->{'username'};
383 my $collect = $self->{'collect'};
384 my $gsdl_cgi = $self->{'gsdl_cgi'};
385 my $gsdlhome = $self->{'gsdlhome'};
386 my $infodbtype = $self->{'infodbtype'};
387
388 # Authenticate user if it is enabled
389 if ($baseaction::authentication_enabled) {
390 # Ensure the user is allowed to edit this collection
391 &authenticate_user($gsdl_cgi, $username, $collect);
392 }
393
394 # Obtain the collect dir
395 my $site = $self->{'site'};
396 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
397
398 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
399
400 # Make sure the collection isn't locked by someone else
401 $self->lock_collection($username, $collect);
402
403 # look up additional args
404 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
405 $docid_secnum = "" if (!defined $docid_secnum);
406
407 my $metaname = $self->{'metaname'};
408 my $metapos = $self->{'metapos'};
409 $metapos = 0 if (!defined $metapos);
410
411 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
412 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
413
414 # This now stores the full pathname
415 my $doc_filename = $doc_rec->{'doc-file'}->[0];
416
417 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
418
419 # Release the lock once it is done
420 $self->unlock_collection($username, $collect);
421}
422
423sub get_metadata_from_archive_xml
424{
425 my $self = shift @_;
426 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
427
428 my @start_rules = ('Section' => \&dxml_start_section);
429
430 my @rules =
431 (
432 _default => 'raw',
433 'Metadata' => \&gfdxml_metadata
434 );
435
436 my $parser = XML::Rules->new
437 (
438 start_rules => \@start_rules,
439 rules => \@rules,
440 output_encoding => 'utf8'
441 );
442
443 my $xml_in = "";
444 if (!open(MIN,"<$doc_xml_filename"))
445 {
446 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
447 }
448 else
449 {
450 # Read them in
451 my $line;
452 while (defined ($line=<MIN>)) {
453 $xml_in .= $line;
454 }
455 close(MIN);
456
457 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
458
459 if(defined $parser->{'pad'}->{'metavalue'})
460 {
461 return $parser->{'pad'}->{'metavalue'};
462 }
463 else
464 {
465 return "";
466 }
467 }
468}
469
470sub gfdxml_metadata
471{
472 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
473
474 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
475 {
476 return;
477 }
478
479 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
480 {
481 if (!defined $parser->{'parameters'}->{'poscount'})
482 {
483 $parser->{'parameters'}->{'poscount'} = 0;
484 }
485 else
486 {
487 $parser->{'parameters'}->{'poscount'}++;
488 }
489 }
490
491 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
492 {
493 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
494 }
495}
496
497sub set_live_metadata
498{
499 my $self = shift @_;
500
501 my $username = $self->{'username'};
502 my $collect = $self->{'collect'};
503 my $gsdl_cgi = $self->{'gsdl_cgi'};
504 my $gsdlhome = $self->{'gsdlhome'};
505 my $infodbtype = $self->{'infodbtype'};
506
507 if ($baseaction::authentication_enabled) {
508 # Ensure the user is allowed to edit this collection
509 &authenticate_user($gsdl_cgi, $username, $collect);
510 }
511
512 # Obtain the collect dir
513 my $site = $self->{'site'};
514 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
515 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
516
517 # Make sure the collection isn't locked by someone else
518 $self->lock_collection($username, $collect);
519
520 # look up additional args
521 my $docid = $self->{'d'};
522 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
523 $gsdl_cgi->generate_error("No docid (d=...) specified.");
524 }
525 my $metavalue = $self->{'metavalue'};
526
527
528 # Generate the dbkey
529 my $metaname = $self->{'metaname'};
530 my $dbkey = "$docid.$metaname";
531
532 # To people who know $collect_tail please add some comments
533 # Obtain path to the database
534 my $collect_tail = $collect;
535 $collect_tail =~ s/^.*[\/\\]//;
536 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
537 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
538
539 # Set the new value
540 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
541 my $status = system($cmd);
542 if ($status != 0) {
543 # Catch error if gdbmget failed
544 my $mess = "Failed to set metadata key: $dbkey\n";
545
546 $mess .= "PATH: $ENV{'PATH'}\n";
547 $mess .= "cmd = $cmd\n";
548 $mess .= "Exit status: $status\n";
549 $mess .= "System Error Message: $!\n";
550
551 $gsdl_cgi->generate_error($mess);
552 }
553 else {
554 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
555 }
556
557 # Release the lock once it is done
558 $self->unlock_collection($username, $collect);
559}
560
561sub set_metadata_entry
562{
563 my $self = shift @_;
564 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
565
566 # To people who know $collect_tail please add some comments
567 # Obtain path to the database
568 my $collect_tail = $collect;
569 $collect_tail =~ s/^.*[\/\\]//;
570 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
571 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
572
573# print STDERR "**** infodb file path = $infodb_file_path\n";
574# print STDERR "***** infodb type = $infodbtype\n";
575
576 # Read the docid entry
577 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
578
579 # Set the metadata value
580 if (defined $metapos) {
581 $doc_rec->{$metaname}->[$metapos] = $metavalue;
582 }
583 else {
584 $doc_rec->{$metaname} = [ $metavalue ];
585 }
586
587 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
588
589 return $status;
590
591}
592
593sub set_metadata
594{
595 my $self = shift @_;
596
597 my $username = $self->{'username'};
598 my $collect = $self->{'collect'};
599 my $gsdl_cgi = $self->{'gsdl_cgi'};
600 my $gsdlhome = $self->{'gsdlhome'};
601
602 if ($baseaction::authentication_enabled) {
603 # Ensure the user is allowed to edit this collection
604 &authenticate_user($gsdl_cgi, $username, $collect);
605 }
606
607 my $site = $self->{'site'};
608 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
609
610 $gsdl_cgi->checked_chdir($collect_dir);
611
612 # Obtain the collect dir
613 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
614
615 # Make sure the collection isn't locked by someone else
616 $self->lock_collection($username, $collect);
617
618 # look up additional args
619 my $docid = $self->{'d'};
620 my $metaname = $self->{'metaname'};
621 my $metapos = $self->{'metapos'};
622 my $metavalue = $self->{'metavalue'};
623 my $infodbtype = $self->{'infodbtype'};
624
625 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
626
627 if ($status != 0) {
628 # Catch error if set infodb entry failed
629 my $mess = "Failed to set metadata key: $docid\n";
630
631 $mess .= "PATH: $ENV{'PATH'}\n";
632 $mess .= "Exit status: $status\n";
633 $mess .= "System Error Message: $!\n";
634
635 $gsdl_cgi->generate_error($mess);
636 }
637 else {
638 my $mess = "set-metadata successful: Key[$docid]\n";
639 $mess .= " $metaname";
640 $mess .= "->[$metapos]" if (defined $metapos);
641 $mess .= " = $metavalue";
642
643 $gsdl_cgi->generate_ok_message($mess);
644 }
645
646 # Release the lock once it is done
647 $self->unlock_collection($username, $collect);
648}
649
650
651sub set_metadata_array
652{
653 my $self = shift @_;
654
655 my $username = $self->{'username'};
656 my $collect = $self->{'collect'};
657 my $gsdl_cgi = $self->{'gsdl_cgi'};
658 my $gsdlhome = $self->{'gsdlhome'};
659
660 if ($baseaction::authentication_enabled) {
661 # Ensure the user is allowed to edit this collection
662 &authenticate_user($gsdl_cgi, $username, $collect);
663 }
664
665 my $site = $self->{'site'};
666 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
667
668 $gsdl_cgi->checked_chdir($collect_dir);
669
670 # Obtain the collect dir
671 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
672
673 # Make sure the collection isn't locked by someone else
674 $self->lock_collection($username, $collect);
675
676 # look up additional args
677
678 my $infodbtype = $self->{'infodbtype'};
679
680 my $json_str = $self->{'json'};
681 my $doc_array = decode_json $json_str;
682
683
684 my $global_status = 0;
685 my $global_mess = "";
686
687 my @all_docids = ();
688
689 foreach my $doc_array_rec ( @$doc_array ) {
690
691 my $docid = $doc_array_rec->{'docid'};
692 my $metaname = $doc_array_rec->{'metaname'};
693 my $metapos = $doc_array_rec->{'metapos'};
694 my $metavalue = $doc_array_rec->{'metavalue'};
695
696 push(@all_docids,$docid);
697
698 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
699
700 if ($status != 0) {
701 # Catch error if set infodb entry failed
702 $global_status = $status;
703 $global_mess .= "Failed to set metadata key: $docid\n";
704 $global_mess .= "Exit status: $status\n";
705 $global_mess .= "System Error Message: $!\n";
706 $global_mess .= "-" x 20;
707 }
708 }
709
710 if ($global_status != 0) {
711 $global_mess .= "PATH: $ENV{'PATH'}\n";
712 $gsdl_cgi->generate_error($global_mess);
713 }
714 else {
715 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
716 $gsdl_cgi->generate_ok_message($mess);
717 }
718
719 # Release the lock once it is done
720 $self->unlock_collection($username, $collect);
721}
722
723
724sub dxml_metadata
725{
726 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
727 my $metaname = $parser->{'parameters'}->{'metaname'};
728 my $metamode = $parser->{'parameters'}->{'metamode'};
729
730 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
731
732 # Find the right metadata tag and checks if we are going to
733 # override it
734 #
735 # Note: This over writes the first metadata block it
736 # encountered. If there are multiple Sections in the doc.xml, it
737 # might not behave as you would expect
738
739 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
740## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
741## print STDERR "**** metamode = $metamode\n";
742
743 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
744 {
745 my $name_attr = $attrHash->{'name'};
746 if (($name_attr eq $metaname) && ($metamode eq "override"))
747 {
748 if (!defined $parser->{'parameters'}->{'poscount'})
749 {
750 $parser->{'parameters'}->{'poscount'} = 0;
751 }
752 else
753 {
754 $parser->{'parameters'}->{'poscount'}++;
755 }
756
757 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
758 {
759 ##print STDERR "**** got match!!\n";
760 # Get the value and override the current value
761 my $metavalue = $parser->{'parameters'}->{'metavalue'};
762 $attrHash->{'_content'} = $metavalue;
763
764 # Don't want it to wipe out any other pieces of metadata
765 $parser->{'parameters'}->{'metamode'} = "done";
766 }
767 else if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} == $attrHash->{'_content'})
768 {
769 $attrHash->{'_content'} = $metavalue;
770 $parser->{'parameters'}->{'metamode'} = "done";
771 }
772 }
773 }
774
775 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
776 return [$tagname => $attrHash];
777}
778
779
780sub dxml_description
781{
782 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
783 my $metamode = $parser->{'parameters'}->{'metamode'};
784
785 # Accumulate the metadata
786 # NOTE: This appends new metadata element to all description fields.
787 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
788 if (($metamode eq "accumulate") || ($metamode eq "override")) {
789 # If get to here and metamode is override, the this means there
790 # was no existing value to overide => treat as an append operation
791
792 # Tack a new metadata tag on to the end of the <Metadata>+ block
793 my $metaname = $parser->{'parameters'}->{'metaname'};
794 my $metavalue = $parser->{'parameters'}->{'metavalue'};
795
796 my $metadata_attr = { '_content' => $metavalue,
797 'name' => $metaname,
798 'mode' => "accumulate" };
799
800 my $append_metadata = [ "Metadata" => $metadata_attr ];
801 my $description_content = $attrHash->{'_content'};
802
803## print STDERR "**** appending to doc.xml\n";
804
805 push(@$description_content," ", $append_metadata ,"\n ");
806 $parser->{'parameters'}->{'metamode'} = "done";
807 }
808
809
810 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
811 return [$tagname => $attrHash];
812}
813
814
815sub dxml_start_section
816{
817 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
818
819 my $new_depth = scalar(@$contextArray);
820
821 if ($new_depth == 1) {
822 $parser->{'parameters'}->{'curr_section_depth'} = 1;
823 $parser->{'parameters'}->{'curr_section_num'} = "";
824 }
825
826 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
827 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
828
829 my $new_secnum;
830
831 if ($new_depth > $old_depth) {
832 # child subsection
833 $new_secnum = "$old_secnum.1";
834 }
835 elsif ($new_depth == $old_depth) {
836 # sibling section => increase it's value by 1
837 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
838 $tail_num++;
839 $new_secnum = $old_secnum;
840 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
841 }
842 else {
843 # back up to parent section => lopp off tail
844 $new_secnum = $old_secnum;
845 $new_secnum =~ s/\.\d+$//;
846 }
847
848 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
849 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
850
851 ##print STDERR "*** In Section: $new_secnum\n";
852}
853
854sub edit_xml_file
855{
856 my $self = shift @_;
857 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
858
859 # use XML::Rules to add it in (read in and out again)
860 my $parser = XML::Rules->new(start_rules => $start_rules,
861 rules => $rules,
862 style => 'filter',
863 output_encoding => 'utf8' );
864
865 my $xml_in = "";
866 if (!open(MIN,"<$filename")) {
867 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
868 }
869 else {
870 # Read all the text in
871 my $line;
872 while (defined ($line=<MIN>)) {
873 $xml_in .= $line;
874 }
875 close(MIN);
876
877 my $MOUT;
878 if (!open($MOUT,">$filename")) {
879 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
880 }
881 else {
882 # Matched lines will get handled by the call backs
883## my $xml_out = "";
884
885 binmode($MOUT,":utf8");
886 $parser->filter($xml_in,$MOUT, $options);
887
888# binmode(MOUT,":utf8");
889# print MOUT $xml_out;
890 close($MOUT);
891 }
892 }
893}
894
895sub edit_doc_xml
896{
897 my $self = shift @_;
898 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
899
900 # To monitor which section/subsection number we are in
901 my @start_rules =
902 ( 'Section' => \&dxml_start_section );
903
904 # use XML::Rules to add it in (read in and out again)
905 # Set the call back functions
906 my @rules =
907 ( _default => 'raw',
908 'Metadata' => \&dxml_metadata,
909 'Description' => \&dxml_description);
910
911 # Sets the parameters
912 my $options = { 'metaname' => $metaname,
913 'metapos' => $metapos,
914 'metavalue' => $metavalue,
915 'metamode' => $metamode,
916 'prevmetavalue' => $prevmetavalue };
917
918 if (defined $opt_secnum) {
919 $options->{'secnum'} = $opt_secnum;
920 }
921
922 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
923}
924
925sub set_archives_metadata_entry
926{
927 my $self = shift @_;
928 my ($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
929
930 # Obtain the doc.xml path for the specified docID
931 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
932
933 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
934 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
935 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
936
937 # The $doc_xml_file is relative to the archives, and now let's get the full path
938 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
939 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
940
941 # Edit the doc.xml file with the specified metadata name, value and position.
942 # TODO: there is a potential problem here as this edit_doc_xml function
943 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
944 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
945
946 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
947 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
948
949 return 0; # return 0 for now to indicate no error
950
951}
952
953
954sub set_archives_metadata
955{
956 my $self = shift @_;
957
958 my $username = $self->{'username'};
959 my $collect = $self->{'collect'};
960 my $gsdl_cgi = $self->{'gsdl_cgi'};
961 my $gsdlhome = $self->{'gsdlhome'};
962 my $infodbtype = $self->{'infodbtype'};
963
964 if ($baseaction::authentication_enabled) {
965 # Ensure the user is allowed to edit this collection
966 $self->authenticate_user($username, $collect);
967 }
968
969 my $site = $self->{'site'};
970
971 # Obtain the collect and archive dir
972 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
973
974 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
975
976 # Make sure the collection isn't locked by someone else
977 $self->lock_collection($username, $collect);
978
979 # look up additional args
980 my $docid = $self->{'d'};
981 my $metaname = $self->{'metaname'};
982 my $metavalue = $self->{'metavalue'};
983 my $prevmetavalue = $self->{'prevmetavalue'}
984
985 my $metapos = $self->{'metapos'};
986 $metapos = 0 if (!defined $metapos);
987
988 my $metamode = $self->{'metamode'};
989 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
990 # make "accumulate" the default (less destructive, as won't actually
991 # delete any existing values)
992 $metamode = "accumulate";
993 }
994
995 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
996 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
997
998 # Release the lock once it is done
999 $self->unlock_collection($username, $collect);
1000
1001 if ($status == 0) {
1002 my $mess = "set-archives-metadata successful: Key[$docid]\n";
1003 $mess .= " $metaname";
1004 $mess .= "->[$metapos]" if (defined $metapos);
1005 $mess .= " = $metavalue";
1006 $mess .= " ($metamode)\n";
1007
1008 $gsdl_cgi->generate_ok_message($mess);
1009 }
1010 else {
1011 my $mess .= "Failed to set archives metadata key: $docid\n";
1012 $mess .= "Exit status: $status\n";
1013 $mess .= "System Error Message: $!\n";
1014 $mess .= "-" x 20 . "\n";
1015
1016 $gsdl_cgi->generate_error($mess);
1017 }
1018}
1019
1020
1021sub set_archives_metadata_array
1022{
1023 my $self = shift @_;
1024
1025 my $username = $self->{'username'};
1026 my $collect = $self->{'collect'};
1027 my $gsdl_cgi = $self->{'gsdl_cgi'};
1028 my $gsdlhome = $self->{'gsdlhome'};
1029
1030 if ($baseaction::authentication_enabled) {
1031 # Ensure the user is allowed to edit this collection
1032 &authenticate_user($gsdl_cgi, $username, $collect);
1033 }
1034
1035 my $site = $self->{'site'};
1036 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1037
1038 $gsdl_cgi->checked_chdir($collect_dir);
1039
1040 # Obtain the collect dir
1041 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1042
1043 # Make sure the collection isn't locked by someone else
1044 $self->lock_collection($username, $collect);
1045
1046 # look up additional args
1047
1048 my $infodbtype = $self->{'infodbtype'};
1049
1050 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1051
1052 my $json_str = $self->{'json'};
1053 my $doc_array = decode_json $json_str;
1054
1055
1056 my $global_status = 0;
1057 my $global_mess = "";
1058
1059 my @all_docids = ();
1060
1061 foreach my $doc_array_rec ( @$doc_array ) {
1062
1063 my $docid = $doc_array_rec->{'docid'};
1064 my $metaname = $doc_array_rec->{'metaname'};
1065 my $metapos = $doc_array_rec->{'metapos'};
1066 my $metamode = $self->{'metamode'};
1067 my $metavalue = $doc_array_rec->{'metavalue'};
1068
1069 # Some sanity checks
1070 $metapos = 0 if (!defined $metapos);
1071
1072 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1073 # make "accumulate" the default (less destructive, as won't actually
1074 # delete any existing values)
1075 $metamode = "accumulate";
1076 }
1077
1078 push(@all_docids,$docid);
1079
1080 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1081 $metaname,$metapos,$metavalue,$metamode);
1082
1083 if ($status != 0) {
1084 # Catch error if set infodb entry failed
1085 $global_status = $status;
1086 $global_mess .= "Failed to set metadata key: $docid\n";
1087 $global_mess .= "Exit status: $status\n";
1088 $global_mess .= "System Error Message: $!\n";
1089 $global_mess .= "-" x 20 . "\n";
1090 }
1091 }
1092
1093 if ($global_status != 0) {
1094 $global_mess .= "PATH: $ENV{'PATH'}\n";
1095 $gsdl_cgi->generate_error($global_mess);
1096 }
1097 else {
1098 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1099 $gsdl_cgi->generate_ok_message($mess);
1100 }
1101
1102 # Release the lock once it is done
1103 $self->unlock_collection($username, $collect);
1104}
1105
1106sub remove_archives_metadata
1107{
1108 my $self = shift @_;
1109
1110 my $username = $self->{'username'};
1111 my $collect = $self->{'collect'};
1112 my $gsdl_cgi = $self->{'gsdl_cgi'};
1113 my $gsdlhome = $self->{'gsdlhome'};
1114 my $infodbtype = $self->{'infodbtype'};
1115
1116 if ($baseaction::authentication_enabled)
1117 {
1118 # Ensure the user is allowed to edit this collection
1119 &authenticate_user($gsdl_cgi, $username, $collect);
1120 }
1121
1122 my $site = $self->{'site'};
1123
1124 # Obtain the collect and archive dir
1125 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1126
1127 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1128
1129 # Make sure the collection isn't locked by someone else
1130 $self->lock_collection($username, $collect);
1131
1132 # look up additional args
1133 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1134
1135 my $metaname = $self->{'metaname'};
1136 my $metapos = $self->{'metapos'};
1137 $metapos = 0 if (!defined $metapos);
1138
1139 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1140 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1141
1142 # This now stores the full pathname
1143 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1144
1145 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1146
1147 # Release the lock once it is done
1148 $self->unlock_collection($username, $collect);
1149
1150 if ($status == 0)
1151 {
1152 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1153 $mess .= " $metaname";
1154 $mess .= "->[$metapos]" if (defined $metapos);
1155
1156 $gsdl_cgi->generate_ok_message($mess);
1157 }
1158 else
1159 {
1160 my $mess .= "Failed to remove archives metadata key: $docid\n";
1161 $mess .= "Exit status: $status\n";
1162 $mess .= "System Error Message: $!\n";
1163 $mess .= "-" x 20 . "\n";
1164
1165 $gsdl_cgi->generate_error($mess);
1166 }
1167}
1168
1169sub remove_from_doc_xml
1170{
1171 my $self = shift @_;
1172 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
1173
1174 my @start_rules = ('Section' => \&dxml_start_section);
1175
1176 # Set the call-back functions for the metadata tags
1177 my @rules =
1178 (
1179 _default => 'raw',
1180 'Metadata' => \&rfdxml_metadata
1181 );
1182
1183 my $parser = XML::Rules->new
1184 (
1185 start_rules => \@start_rules,
1186 rules => \@rules,
1187 style => 'filter',
1188 output_encoding => 'utf8'
1189 );
1190
1191 my $status = 0;
1192 my $xml_in = "";
1193 if (!open(MIN,"<$doc_xml_filename"))
1194 {
1195 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1196 $status = 1;
1197 }
1198 else
1199 {
1200 # Read them in
1201 my $line;
1202 while (defined ($line=<MIN>)) {
1203 $xml_in .= $line;
1204 }
1205 close(MIN);
1206
1207 # Filter with the call-back functions
1208 my $xml_out = "";
1209
1210 my $MOUT;
1211 if (!open($MOUT,">$doc_xml_filename")) {
1212 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1213 $status = 1;
1214 }
1215 else {
1216 binmode($MOUT,":utf8");
1217 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
1218 close($MOUT);
1219 }
1220 }
1221 return $status;
1222}
1223
1224sub rfdxml_metadata
1225{
1226 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1227
1228 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1229 {
1230 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1231 return [$tagname => $attrHash];
1232 }
1233
1234 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1235 {
1236 if (!defined $parser->{'parameters'}->{'poscount'})
1237 {
1238 $parser->{'parameters'}->{'poscount'} = 0;
1239 }
1240 else
1241 {
1242 $parser->{'parameters'}->{'poscount'}++;
1243 }
1244 }
1245
1246 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1247 {
1248 return [];
1249 }
1250
1251 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1252 {
1253 return [];
1254 }
1255
1256 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1257 return [$tagname => $attrHash];
1258}
1259
1260sub mxml_metadata
1261{
1262 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1263 my $metaname = $parser->{'parameters'}->{'metaname'};
1264 my $metamode = $parser->{'parameters'}->{'metamode'};
1265
1266 # Report error if we don't see FileName tag before this
1267 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1268
1269 # Don't do anything if we are not in the right FileSet
1270 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1271 if ($file_regexp =~ /\.\*/) {
1272 # Only interested in a file_regexp if it specifies precisely one
1273 # file.
1274 # So, skip anything with a .* in it as it is too general
1275 return [$tagname => $attrHash];
1276 }
1277 my $src_file = $parser->{'parameters'}->{'src_file'};
1278 if (!($src_file =~ /$file_regexp/)) {
1279 return [$tagname => $attrHash];
1280 }
1281## print STDERR "*** mxl metamode = $metamode\n";
1282
1283 # Find the right metadata tag and checks if we are going to override it
1284 my $name_attr = $attrHash->{'name'};
1285 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1286 # Get the value and override the current value
1287 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1288 $attrHash->{'_content'} = $metavalue;
1289
1290## print STDERR "**** overrideing metadata.xml\n";
1291
1292 # Don't want it to wipe out any other pieces of metadata
1293 $parser->{'parameters'}->{'metamode'} = "done";
1294 }
1295
1296 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1297 return [$tagname => $attrHash];
1298}
1299
1300
1301sub mxml_description
1302{
1303 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1304 my $metamode = $parser->{'parameters'}->{'metamode'};
1305
1306 # Failed... Report error if we don't see FileName tag before this
1307 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1308
1309 # Don't do anything if we are not in the right FileSet
1310 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1311 if ($file_regexp =~ m/\.\*/) {
1312 # Only interested in a file_regexp if it specifies precisely one
1313 # file.
1314 # So, skip anything with a .* in it as it is too general
1315 return [$tagname => $attrHash];
1316 }
1317 my $src_file = $parser->{'parameters'}->{'src_file'};
1318
1319 if (!($src_file =~ m/$file_regexp/)) {
1320 return [$tagname => $attrHash];
1321 }
1322
1323 # Accumulate the metadata block to the end of the description block
1324 # Note: This adds metadata block to all description blocks, so if there are
1325 # multiple FileSets, it will add to all of them
1326 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1327 # if metamode was "override" but get to here then it failed to
1328 # find an item to override, in which case it should append its
1329 # value to the end, just like the "accumulate" mode
1330
1331 # tack a new metadata tag on to the end of the <Metadata>+ block
1332 my $metaname = $parser->{'parameters'}->{'metaname'};
1333 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1334
1335 my $metadata_attr = { '_content' => $metavalue,
1336 'name' => $metaname,
1337 'mode' => "accumulate" };
1338
1339 my $append_metadata = [ "Metadata" => $metadata_attr ];
1340 my $description_content = $attrHash->{'_content'};
1341
1342## print STDERR "*** appending to metadata.xml\n";
1343
1344 # append the new metadata element to the end of the current
1345 # content contained inside this tag
1346 if (ref($description_content) eq "") {
1347 # => string or numeric literal
1348 # this is caused by a <Description> block has no <Metadata> child elements
1349 # => set up an empty array in '_content'
1350 $attrHash->{'_content'} = [ "\n" ];
1351 $description_content = $attrHash->{'_content'};
1352 }
1353
1354 push(@$description_content," ", $append_metadata ,"\n ");
1355 $parser->{'parameters'}->{'metamode'} = "done";
1356 }
1357
1358 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1359 return [$tagname => $attrHash];
1360}
1361
1362
1363sub mxml_filename
1364{
1365 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1366
1367 # Store the filename of the Current Fileset
1368 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1369 # FileName tag must come before Description tag
1370 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1371
1372 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1373 return [$tagname => $attrHash];
1374}
1375
1376
1377sub mxml_fileset
1378{
1379 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1380
1381 # Initilise the current_file
1382 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1383 # FileName tag must come before Description tag
1384 $parser->{'parameters'}->{'current_file'} = "";
1385
1386 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1387 return [$tagname => $attrHash];
1388}
1389
1390
1391sub edit_metadata_xml
1392{
1393 my $self = shift @_;
1394 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
1395
1396 # Set the call-back functions for the metadata tags
1397 my @rules =
1398 ( _default => 'raw',
1399 'FileName' => \&mxml_filename,
1400 'Metadata' => \&mxml_metadata,
1401 'Description' => \&mxml_description,
1402 'FileSet' => \&mxml_fileset);
1403
1404 # use XML::Rules to add it in (read in and out again)
1405 my $parser = XML::Rules->new(rules => \@rules,
1406 style => 'filter',
1407 output_encoding => 'utf8');
1408
1409 if (!-e $metadata_xml_filename) {
1410
1411 if (open(MOUT,">$metadata_xml_filename")) {
1412
1413 my $src_file_re = &util::filename_to_regex($src_file);
1414 # shouldn't the following also be in the above utility routine??
1415 # $src_file_re =~ s/\./\\./g;
1416
1417 print MOUT "<?xml version=\"1.0\"?>\n";
1418 print MOUT "<DirectoryMetadata>\n";
1419 print MOUT " <FileSet>\n";
1420 print MOUT " <FileName>$src_file_re</FileName>\n";
1421 print MOUT " <Description>\n";
1422 print MOUT " </Description>\n";
1423 print MOUT " </FileSet>\n";
1424 print MOUT "</DirectoryMetadata>\n";
1425
1426 close(MOUT);
1427 }
1428 else {
1429 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1430 }
1431 }
1432
1433
1434 my $xml_in = "";
1435 if (!open(MIN,"<$metadata_xml_filename")) {
1436 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1437 }
1438 else {
1439 # Read them in
1440 my $line;
1441 while (defined ($line=<MIN>)) {
1442 $xml_in .= $line;
1443 }
1444 close(MIN);
1445
1446 # Filter with the call-back functions
1447 my $xml_out = "";
1448
1449 my $MOUT;
1450 if (!open($MOUT,">$metadata_xml_filename")) {
1451 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1452 }
1453 else {
1454 binmode($MOUT,":utf8");
1455
1456 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1457 # At the moment, I will just hack it!
1458 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1459 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1460 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1461 #print MOUT $xml_out;
1462
1463 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1464 metavalue => $metavalue,
1465 metamode => $metamode,
1466 src_file => $src_file,
1467 current_file => undef} );
1468 close($MOUT);
1469 }
1470 }
1471}
1472
1473
1474sub set_import_metadata
1475{
1476 my $self = shift @_;
1477
1478 my $username = $self->{'username'};
1479 my $collect = $self->{'collect'};
1480 my $gsdl_cgi = $self->{'gsdl_cgi'};
1481 my $gsdlhome = $self->{'gsdlhome'};
1482 my $infodbtype = $self->{'infodbtype'};
1483
1484 if ($baseaction::authentication_enabled) {
1485 # Ensure the user is allowed to edit this collection
1486 $self->authenticate_user($username, $collect);
1487 }
1488
1489
1490 # Obtain the collect and archive dir
1491 my $site = $self->{'site'};
1492 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1493
1494 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1495 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1496
1497 # Make sure the collection isn't locked by someone else
1498 $self->lock_collection($username, $collect);
1499
1500 # look up additional args
1501 # want either d= or f=
1502 my $docid = $self->{'d'};
1503 my $import_file = $self->{'f'};
1504 if ((!defined $docid) && (!defined $import_file)) {
1505 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1506 }
1507
1508 # Get the parameters and set default mode to "accumulate"
1509 my $metaname = $self->{'metaname'};
1510 my $metavalue = $self->{'metavalue'};
1511## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1512 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1513 print STDERR "*** set import meta: val = $metavalue\n";
1514
1515 my $metamode = $self->{'metamode'};
1516 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1517 # make "accumulate" the default (less destructive, as won't actually
1518 # delete any existing values)
1519 $metamode = "accumulate";
1520 }
1521
1522 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1523 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1524 my $metadata_xml_file;
1525 my $import_filename = undef;
1526 if (defined $docid) {
1527 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1528 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1529
1530 # This now stores the full pathname
1531 $import_filename = $doc_rec->{'src-file'}->[0];
1532 }
1533 else {
1534 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1535 }
1536
1537 # figure out correct metadata.xml file [?]
1538 # Assuming the metadata.xml file is next to the source file
1539 # Note: This will not work if it is using the inherited metadata from the parent folder
1540 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1541 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1542
1543 # Edit the metadata.xml
1544 # Modified by Jeffrey from DL Consulting
1545 # Handle the case where there is one metadata.xml file for multiple FileSets
1546 # The XML filter needs to know whether it is in the right FileSet
1547 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1548 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1549 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1550 $metaname, $metavalue, $metamode, $import_tailname);
1551
1552 # Release the lock once it is done
1553 $self->unlock_collection($username, $collect);
1554
1555 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1556 $mess .= " $metaname";
1557 $mess .= " = $metavalue";
1558 $mess .= " ($metamode)\n";
1559
1560 $gsdl_cgi->generate_ok_message($mess);
1561
1562}
1563
1564sub set_import_metadata_array
1565{
1566 my $self = shift @_;
1567
1568 my $username = $self->{'username'};
1569 my $collect = $self->{'collect'};
1570 my $gsdl_cgi = $self->{'gsdl_cgi'};
1571 my $gsdlhome = $self->{'gsdlhome'};
1572
1573 if ($baseaction::authentication_enabled) {
1574 # Ensure the user is allowed to edit this collection
1575 &authenticate_user($gsdl_cgi, $username, $collect);
1576 }
1577
1578 my $site = $self->{'site'};
1579 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1580
1581 $gsdl_cgi->checked_chdir($collect_dir);
1582
1583 # Make sure the collection isn't locked by someone else
1584 $self->lock_collection($username, $collect);
1585
1586 # look up additional args
1587
1588 my $infodbtype = $self->{'infodbtype'};
1589
1590 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1591
1592 my $json_str = $self->{'json'};
1593 my $doc_array = decode_json $json_str;
1594
1595 my $global_status = 0;
1596 my $global_mess = "";
1597
1598 my @all_docids = ();
1599
1600 foreach my $doc_array_rec ( @$doc_array )
1601 {
1602 my $docid = $doc_array_rec->{'docid'};
1603 my $metaname = $doc_array_rec->{'metaname'};
1604 my $metamode = $self->{'metamode'};
1605 my $metavalue = $doc_array_rec->{'metavalue'};
1606
1607 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1608 # make "accumulate" the default (less destructive, as won't actually
1609 # delete any existing values)
1610 $metamode = "accumulate";
1611 }
1612
1613 push(@all_docids,$docid);
1614
1615 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1616 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1617 my $metadata_xml_file;
1618 my $import_filename = undef;
1619
1620 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1621 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1622
1623 # This now stores the full pathname
1624 $import_filename = $doc_rec->{'src-file'}->[0];
1625
1626 # figure out correct metadata.xml file [?]
1627 # Assuming the metadata.xml file is next to the source file
1628 # Note: This will not work if it is using the inherited metadata from the parent folder
1629 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1630 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1631
1632 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1633 }
1634
1635 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1636 $gsdl_cgi->generate_ok_message($mess);
1637
1638 # Release the lock once it is done
1639 $self->unlock_collection($username, $collect);
1640}
1641
1642sub remove_import_metadata
1643{
1644 my $self = shift @_;
1645
1646 my $username = $self->{'username'};
1647 my $collect = $self->{'collect'};
1648 my $gsdl_cgi = $self->{'gsdl_cgi'};
1649
1650 if ($baseaction::authentication_enabled) {
1651 # Ensure the user is allowed to edit this collection
1652 &authenticate_user($gsdl_cgi, $username, $collect);
1653 }
1654
1655 my $gsdlhome = $self->{'gsdlhome'};
1656 my $infodbtype = $self->{'infodbtype'};
1657
1658 # Obtain the collect dir
1659 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1660 my $site = $self->{'site'};
1661 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1662
1663 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1664 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1665
1666 # Make sure the collection isn't locked by someone else
1667 $self->lock_collection($username, $collect);
1668
1669 # look up additional args
1670 my $docid = $self->{'d'};
1671 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1672 {
1673 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
1674 }
1675
1676 my $metaname = $self->{'metaname'};
1677 my $metavalue = $self->{'metavalue'};
1678 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1679
1680 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1681 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1682 my $metadata_xml_file;
1683 my $import_filename = undef;
1684 if (defined $docid)
1685 {
1686 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1687 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1688
1689 # This now stores the full pathname
1690 $import_filename = $doc_rec->{'src-file'}->[0];
1691 }
1692
1693 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1694 {
1695 $gsdl_cgi->generate_error_message("There is no metadata\n");
1696 }
1697
1698 # figure out correct metadata.xml file [?]
1699 # Assuming the metadata.xml file is next to the source file
1700 # Note: This will not work if it is using the inherited metadata from the parent folder
1701 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1702 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1703
1704 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1705
1706 # Release the lock once it is done
1707 $self->unlock_collection($username, $collect);
1708
1709 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1710 $mess .= " $metaname";
1711 $mess .= " = $metavalue\n";
1712
1713 $gsdl_cgi->generate_ok_message($mess);
1714}
1715
1716sub remove_from_metadata_xml
1717{
1718 my $self = shift @_;
1719 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
1720
1721 # Set the call-back functions for the metadata tags
1722 my @rules =
1723 (
1724 _default => 'raw',
1725 'Metadata' => \&rfmxml_metadata,
1726 'FileName' => \&mxml_filename
1727 );
1728
1729 my $parser = XML::Rules->new
1730 (
1731 rules => \@rules,
1732 style => 'filter',
1733 output_encoding => 'utf8'
1734 );
1735
1736 my $xml_in = "";
1737 if (!open(MIN,"<$metadata_xml_filename"))
1738 {
1739 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1740 }
1741 else
1742 {
1743 # Read them in
1744 my $line;
1745 while (defined ($line=<MIN>)) {
1746 $xml_in .= $line;
1747 }
1748 close(MIN);
1749
1750 # Filter with the call-back functions
1751 my $xml_out = "";
1752
1753 my $MOUT;
1754 if (!open($MOUT,">$metadata_xml_filename")) {
1755 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1756 }
1757 else {
1758 binmode($MOUT,":utf8");
1759 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
1760 close($MOUT);
1761 }
1762 }
1763}
1764
1765sub rfmxml_metadata
1766{
1767 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1768
1769 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
1770 {
1771 return [];
1772 }
1773
1774 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1775 return [$tagname => $attrHash];
1776}
1777
1778sub remove_live_metadata
1779{
1780 my $self = shift @_;
1781
1782 my $username = $self->{'username'};
1783 my $collect = $self->{'collect'};
1784 my $gsdl_cgi = $self->{'gsdl_cgi'};
1785 my $gsdlhome = $self->{'gsdlhome'};
1786 my $infodbtype = $self->{'infodbtype'};
1787
1788 if ($baseaction::authentication_enabled) {
1789 # Ensure the user is allowed to edit this collection
1790 &authenticate_user($gsdl_cgi, $username, $collect);
1791 }
1792
1793 # Obtain the collect dir
1794 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1795 my $site = $self->{'site'};
1796 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1797
1798 # Make sure the collection isn't locked by someone else
1799 $self->lock_collection($username, $collect);
1800
1801 # look up additional args
1802 my $docid = $self->{'d'};
1803 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1804 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1805 }
1806
1807 # Generate the dbkey
1808 my $metaname = $self->{'metaname'};
1809 my $dbkey = "$docid.$metaname";
1810
1811 # To people who know $collect_tail please add some comments
1812 # Obtain the live gdbm_db path
1813 my $collect_tail = $collect;
1814 $collect_tail =~ s/^.*[\/\\]//;
1815 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1816 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1817
1818 # Remove the key
1819 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
1820 my $status = system($cmd);
1821 if ($status != 0) {
1822 # Catch error if gdbmdel failed
1823 my $mess = "Failed to set metadata key: $dbkey\n";
1824
1825 $mess .= "PATH: $ENV{'PATH'}\n";
1826 $mess .= "cmd = $cmd\n";
1827 $mess .= "Exit status: $status\n";
1828 $mess .= "System Error Message: $!\n";
1829
1830 $gsdl_cgi->generate_error($mess);
1831 }
1832 else {
1833 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1834 }
1835
1836}
1837
1838
1839sub remove_metadata
1840{
1841 my $self = shift @_;
1842
1843 my $username = $self->{'username'};
1844 my $collect = $self->{'collect'};
1845 my $gsdl_cgi = $self->{'gsdl_cgi'};
1846 my $gsdlhome = $self->{'gsdlhome'};
1847 my $infodbtype = $self->{'infodbtype'};
1848
1849 if ($baseaction::authentication_enabled) {
1850 # Ensure the user is allowed to edit this collection
1851 &authenticate_user($gsdl_cgi, $username, $collect);
1852 }
1853
1854 # Obtain the collect dir
1855 my $site = $self->{'site'};
1856 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1857 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1858
1859 # Make sure the collection isn't locked by someone else
1860 $self->lock_collection($username, $collect);
1861
1862 # look up additional args
1863 my $docid = $self->{'d'};
1864 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1865 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1866 }
1867 my $metaname = $self->{'metaname'};
1868 my $metapos = $self->{'metapos'};
1869 my $metavalue = $self->{'metavalue'};
1870
1871 # To people who know $collect_tail please add some comments
1872 # Obtain the path to the database
1873 my $collect_tail = $collect;
1874 $collect_tail =~ s/^.*[\/\\]//;
1875 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1876 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1877
1878 # Read the docid entry
1879 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1880
1881 # Basically loop through and unescape_html the values
1882 foreach my $k (keys %$doc_rec) {
1883 my @escaped_v = ();
1884 foreach my $v (@{$doc_rec->{$k}}) {
1885 if ($k eq "contains") {
1886 # protect quotes in ".2;".3 etc
1887 $v =~ s/\"/\\\"/g;
1888 push(@escaped_v, $v);
1889 }
1890 else {
1891 my $ev = &ghtml::unescape_html($v);
1892 $ev =~ s/\"/\\\"/g;
1893 push(@escaped_v, $ev);
1894 }
1895 }
1896 $doc_rec->{$k} = \@escaped_v;
1897 }
1898
1899 # Check to make sure the key does exist
1900 if (!defined ($doc_rec->{$metaname})) {
1901 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1902 }
1903
1904 # Obtain the specified metadata pos
1905 $metapos = 0 if (!defined $metapos);
1906
1907 # consider check key is defined before deleting?
1908 # Loop through the metadata array and ignore the specified position
1909 my $filtered_metadata = [];
1910 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
1911 for (my $i=0; $i<$num_metadata_vals; $i++) {
1912 my $metaval = shift(@{$doc_rec->{$metaname}});
1913
1914 if (!defined $metavalue && $i != $metapos) {
1915 push(@$filtered_metadata,$metaval)
1916 }
1917
1918 if(defined $metavalue && !($metavalue eq $metaval))
1919 {
1920 push(@$filtered_metadata,$metavalue)
1921 }
1922 }
1923 $doc_rec->{$metaname} = $filtered_metadata;
1924
1925 # Turn the record back to string
1926 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1927
1928 # Store it back to the database
1929 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1930 my $status = system($cmd);
1931 if ($status != 0) {
1932 my $mess = "Failed to set metadata key: $docid\n";
1933
1934 $mess .= "PATH: $ENV{'PATH'}\n";
1935 $mess .= "cmd = $cmd\n";
1936 $mess .= "Exit status: $status\n";
1937 $mess .= "System Error Message: $!\n";
1938
1939 $gsdl_cgi->generate_error($mess);
1940 }
1941 else {
1942 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1943 $mess .= " $metaname";
1944 $mess .= "->[$metapos]" if (defined $metapos);
1945
1946 $gsdl_cgi->generate_ok_message($mess);
1947 }
1948}
1949
1950
1951# Was trying to reused the codes, but the functions need to be broken
1952# down more before they can be reused, otherwise there will be too
1953# much overhead and duplicate process...
1954sub insert_metadata
1955{
1956 my $self = shift @_;
1957
1958 my $username = $self->{'username'};
1959 my $collect = $self->{'collect'};
1960 my $gsdl_cgi = $self->{'gsdl_cgi'};
1961 my $gsdlhome = $self->{'gsdlhome'};
1962 my $infodbtype = $self->{'infodbtype'};
1963
1964 # If the import metadata and gdbm database have been updated, we
1965 # need to insert some notification to warn user that the the text
1966 # they see at the moment is not indexed and require a rebuild.
1967 my $rebuild_pending_macro = "_rebuildpendingmessage_";
1968
1969 if ($baseaction::authentication_enabled) {
1970 # Ensure the user is allowed to edit this collection
1971 $self->authenticate_user($username, $collect);
1972 }
1973
1974 # Obtain the collect and archive dir
1975 my $site = $self->{'site'};
1976 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1977 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1978 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1979
1980 # Make sure the collection isn't locked by someone else
1981 $self->lock_collection($username, $collect);
1982
1983 # Check additional args
1984 my $docid = $self->{'d'};
1985 if (!defined($docid)) {
1986 $gsdl_cgi->generate_error("No document id is specified: d=...");
1987 }
1988 my $metaname = $self->{'metaname'};
1989 if (!defined($metaname)) {
1990 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1991 }
1992 my $metavalue = $self->{'metavalue'};
1993 if (!defined($metavalue) || $metavalue eq "") {
1994 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
1995 }
1996 # make "accumulate" the default (less destructive, as won't actually
1997 # delete any existing values)
1998 my $metamode = "accumulate";
1999
2000 #=======================================================================#
2001 # set_import_metadata [START]
2002 #=======================================================================#
2003 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2004 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2005 my $metadata_xml_file;
2006 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2007 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2008
2009 # This now stores the full pathname
2010 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2011
2012 # figure out correct metadata.xml file [?]
2013 # Assuming the metadata.xml file is next to the source file
2014 # Note: This will not work if it is using the inherited metadata from the parent folder
2015 my ($import_tailname, $import_dirname)
2016 = File::Basename::fileparse($import_filename);
2017 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2018
2019 # Shane's escape characters
2020 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2021 $metavalue =~ s/\,/&#44;/g;
2022 $metavalue =~ s/\:/&#58;/g;
2023 $metavalue =~ s/\|/&#124;/g;
2024 $metavalue =~ s/\(/&#40;/g;
2025 $metavalue =~ s/\)/&#41;/g;
2026 $metavalue =~ s/\[/&#91;/g;
2027 $metavalue =~ s/\\/&#92;/g;
2028 $metavalue =~ s/\]/&#93;/g;
2029 $metavalue =~ s/\{/&#123;/g;
2030 $metavalue =~ s/\}/&#125;/g;
2031 $metavalue =~ s/\"/&#34;/g;
2032 $metavalue =~ s/\`/&#96;/g;
2033 $metavalue =~ s/\n/_newline_/g;
2034
2035 # Edit the metadata.xml
2036 # Modified by Jeffrey from DL Consulting
2037 # Handle the case where there is one metadata.xml file for multiple FileSets
2038 # The XML filter needs to know whether it is in the right FileSet
2039 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2040 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2041 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2042 $metaname, $metavalue, $metamode, $import_tailname);
2043 #=======================================================================#
2044 # set_import_metadata [END]
2045 #=======================================================================#
2046
2047
2048 #=======================================================================#
2049 # set_metadata (accumulate version) [START]
2050 #=======================================================================#
2051 # To people who know $collect_tail please add some comments
2052 # Obtain path to the database
2053 my $collect_tail = $collect;
2054 $collect_tail =~ s/^.*[\/\\]//;
2055 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2056 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2057
2058 # Read the docid entry
2059 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2060
2061 foreach my $k (keys %$doc_rec) {
2062 my @escaped_v = ();
2063 foreach my $v (@{$doc_rec->{$k}}) {
2064 if ($k eq "contains") {
2065 # protect quotes in ".2;".3 etc
2066 $v =~ s/\"/\\\"/g;
2067 push(@escaped_v, $v);
2068 }
2069 else {
2070 my $ev = &ghtml::unescape_html($v);
2071 $ev =~ s/\"/\\\"/g;
2072 push(@escaped_v, $ev);
2073 }
2074 }
2075 $doc_rec->{$k} = \@escaped_v;
2076 }
2077
2078 # Protect the quotes
2079 $metavalue =~ s/\"/\\\"/g;
2080
2081 # Adds the pending macro
2082 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2083
2084 # If the metadata doesn't exist, create a new one
2085 if (!defined($doc_rec->{$metaname})){
2086 $doc_rec->{$metaname} = [ $macro_metavalue ];
2087 }
2088 # Else, let's acculumate the values
2089 else {
2090 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2091 }
2092
2093 # Generate the record string
2094 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2095
2096 # Store it into GDBM
2097 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2098 my $status = system($cmd);
2099 if ($status != 0) {
2100 # Catch error if gdbmget failed
2101 my $mess = "Failed to set metadata key: $docid\n";
2102
2103 $mess .= "PATH: $ENV{'PATH'}\n";
2104 $mess .= "cmd = $cmd\n";
2105 $mess .= "Exit status: $status\n";
2106 $mess .= "System Error Message: $!\n";
2107
2108 $gsdl_cgi->generate_error($mess);
2109 }
2110 else {
2111 my $mess = "insert-metadata successful: Key[$docid]\n";
2112 $mess .= " [In metadata.xml] $metaname";
2113 $mess .= " = $metavalue\n";
2114 $mess .= " [In database] $metaname";
2115 $mess .= " = $macro_metavalue\n";
2116 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2117 $gsdl_cgi->generate_ok_message($mess);
2118 }
2119 #=======================================================================#
2120 # set_metadata (accumulate version) [END]
2121 #=======================================================================#
2122
2123 # Release the lock once it is done
2124 $self->unlock_collection($username, $collect);
2125}
2126
21271;
Note: See TracBrowser for help on using the repository browser.