source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 25102

Last change on this file since 25102 was 25102, checked in by sjm84, 9 years ago

Fixed a few bugs

File size: 66.0 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-live-metadata" => {
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ ] },
63
64 #SET METHODS
65 "set-live-metadata" => {
66 'compulsory-args' => [ "d", "metaname", "metavalue" ],
67 'optional-args' => [ ] },
68
69 "set-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ "metapos" ] },
72
73 "set-archives-metadata" => {
74 'compulsory-args' => [ "d", "metaname", "metavalue" ],
75 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
76
77 "set-import-metadata" => {
78 'compulsory-args' => [ "metaname", "metavalue" ],
79 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
80
81 #SET METHODS (ARRAY)
82 "set-metadata-array" => {
83 'compulsory-args' => [ "json" ],
84 'optional-args' => [ ] },
85
86 "set-archives-metadata-array" => {
87 'compulsory-args' => [ "json" ],
88 'optional-args' => [ ] },
89
90 "set-import-metadata-array" => {
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ ] },
93
94 #REMOVE METHODS
95 "remove-import-metadata" => {
96 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
97 'optional-args' => [ ] },
98
99 "remove-archives-metadata" => {
100 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
101 'optional-args' => [ "metapos", "metavalue" ] },
102
103 "remove-live-metadata" => {
104 'compulsory-args' => [ "d", "metaname" ],
105 'optional-args' => [ ] },
106
107 "remove-metadata" => {
108 'compulsory-args' => [ "d", "metaname" ],
109 'optional-args' => [ "metapos", "metavalue" ] },
110
111 #INSERT METHODS
112 "insert-metadata" => {
113 'compulsory-args' => [ "d", "metaname", "metavalue" ],
114 'optional-args' => [ ] }
115};
116
117
118sub new
119{
120 my $class = shift (@_);
121 my ($gsdl_cgi,$iis6_mode) = @_;
122
123 # Treat metavalue specially. To transmit this through a GET request
124 # the Javascript side has url-encoded it, so here we need to decode
125 # it before proceeding
126
127 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
128 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
129
130 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
131
132 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
133
134 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
135
136 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
137
138 return bless $self, $class;
139}
140
141
142sub get_live_metadata
143{
144 my $self = shift @_;
145
146 my $username = $self->{'username'};
147 my $collect = $self->{'collect'};
148 my $gsdl_cgi = $self->{'gsdl_cgi'};
149 my $gsdlhome = $self->{'gsdlhome'};
150 my $infodbtype = $self->{'infodbtype'};
151
152 # live metadata gets/saves value scoped (prefixed) by the current usename
153 # so (for now) let's not bother to enforce authentication
154
155 # Obtain the collect dir
156 my $site = $self->{'site'};
157 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
158 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
159
160 # Make sure the collection isn't locked by someone else
161 $self->lock_collection($username, $collect);
162
163 # look up additional args
164 my $docid = $self->{'d'};
165 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
166 $gsdl_cgi->generate_error("No docid (d=...) specified.");
167 }
168
169 # Generate the dbkey
170 my $metaname = $self->{'metaname'};
171 my $dbkey = "$docid.$metaname";
172
173 # To people who know $collect_tail please add some comments
174 # Obtain path to the database
175 my $collect_tail = $collect;
176 $collect_tail =~ s/^.*[\/|\\]//;
177 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
178 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
179
180 # Obtain the content of the key
181 my $cmd = "gdbmget $infodb_file_path $dbkey";
182 if (open(GIN,"$cmd |") == 0) {
183 # Catch error if gdbmget failed
184 my $mess = "Failed to get metadata key: $metaname\n";
185 $mess .= "$!\n";
186
187 $gsdl_cgi->generate_error($mess);
188 }
189 else {
190 binmode(GIN,":utf8");
191 # Read everything in and concatenate them into $metavalue
192 my $metavalue = "";
193 my $line;
194 while (defined ($line=<GIN>)) {
195 $metavalue .= $line;
196 }
197 close(GIN);
198 chomp($metavalue); # Get rid off the tailing newlines
199 $gsdl_cgi->generate_ok_message("$metavalue");
200 }
201
202 # Release the lock once it is done
203 $self->unlock_collection($username, $collect);
204}
205
206
207sub get_metadata
208{
209 my $self = shift @_;
210
211 my $username = $self->{'username'};
212 my $collect = $self->{'collect'};
213 my $gsdl_cgi = $self->{'gsdl_cgi'};
214 my $gsdlhome = $self->{'gsdlhome'};
215
216 # Authenticate user if it is enabled
217 if ($baseaction::authentication_enabled) {
218 # Ensure the user is allowed to edit this collection
219 &authenticate_user($gsdl_cgi, $username, $collect);
220 }
221
222 # Obtain the collect dir
223 my $site = $self->{'site'};
224 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
225 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
226
227 # Make sure the collection isn't locked by someone else
228 $self->lock_collection($username, $collect);
229
230 # look up additional args
231 my $docid = $self->{'d'};
232 my $metaname = $self->{'metaname'};
233 my $metapos = $self->{'metapos'};
234 my $infodbtype = $self->{'infodbtype'};
235
236 # To people who know $collect_tail please add some comments
237 # Obtain path to the database
238 my $collect_tail = $collect;
239 $collect_tail =~ s/^.*[\/\\]//;
240 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
241 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
242
243 # Read the docid entry
244 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
245
246 # Basically loop through and unescape_html the values
247 foreach my $k (keys %$doc_rec) {
248 my @escaped_v = ();
249 foreach my $v (@{$doc_rec->{$k}}) {
250 my $ev = &ghtml::unescape_html($v);
251 push(@escaped_v, $ev);
252 }
253 $doc_rec->{$k} = \@escaped_v;
254 }
255
256 # Obtain the specified metadata value
257 $metapos = 0 if (!defined $metapos);
258 my $metavalue = $doc_rec->{$metaname}->[$metapos];
259 $gsdl_cgi->generate_ok_message("$metavalue");
260
261 # Release the lock once it is done
262 $self->unlock_collection($username, $collect);
263}
264
265
266sub get_import_metadata
267{
268 my $self = shift @_;
269
270 my $username = $self->{'username'};
271 my $collect = $self->{'collect'};
272 my $gsdl_cgi = $self->{'gsdl_cgi'};
273 my $gsdlhome = $self->{'gsdlhome'};
274
275 # Authenticate user if it is enabled
276 if ($baseaction::authentication_enabled) {
277 # Ensure the user is allowed to edit this collection
278 &authenticate_user($gsdl_cgi, $username, $collect);
279 }
280
281 # Obtain the collect dir
282 my $site = $self->{'site'};
283 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
284 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
285
286 # Make sure the collection isn't locked by someone else
287 $self->lock_collection($username, $collect);
288
289 # look up additional args
290 my $docid = $self->{'d'};
291 my $metaname = $self->{'metaname'};
292 my $infodbtype = $self->{'infodbtype'};
293 if (!defined $docid)
294 {
295 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
296 }
297
298 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
299 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
300 my $metadata_xml_file;
301 my $import_filename = undef;
302
303
304 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
305 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
306 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
307
308 # This now stores the full pathname
309 $import_filename = $doc_rec->{'src-file'}->[0];
310
311 # figure out correct metadata.xml file [?]
312 # Assuming the metadata.xml file is next to the source file
313 # Note: This will not work if it is using the inherited metadata from the parent folder
314 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
315 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
316
317 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
318
319 # Release the lock once it is done
320 $self->unlock_collection($username, $collect);
321}
322
323sub get_metadata_from_metadata_xml
324{
325 my $self = shift @_;
326 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
327
328 my @rules =
329 (
330 _default => 'raw',
331 'Metadata' => \&gfmxml_metadata,
332 'FileName' => \&mxml_filename
333 );
334
335 my $parser = XML::Rules->new
336 (
337 rules => \@rules,
338 output_encoding => 'utf8'
339 );
340
341 my $xml_in = "";
342 if (!open(MIN,"<$metadata_xml_filename"))
343 {
344 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
345 }
346 else
347 {
348 # Read them in
349 my $line;
350 while (defined ($line=<MIN>)) {
351 $xml_in .= $line;
352 }
353 close(MIN);
354
355 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
356
357 if(defined $parser->{'pad'}->{'metavalue'})
358 {
359 return $parser->{'pad'}->{'metavalue'};
360 }
361 else
362 {
363 return "";
364 }
365 }
366}
367
368sub gfmxml_metadata
369{
370 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
371
372 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
373 {
374 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
375 }
376}
377
378sub get_archives_metadata
379{
380 my $self = shift @_;
381
382 my $username = $self->{'username'};
383 my $collect = $self->{'collect'};
384 my $gsdl_cgi = $self->{'gsdl_cgi'};
385 my $gsdlhome = $self->{'gsdlhome'};
386 my $infodbtype = $self->{'infodbtype'};
387
388 # Authenticate user if it is enabled
389 if ($baseaction::authentication_enabled) {
390 # Ensure the user is allowed to edit this collection
391 &authenticate_user($gsdl_cgi, $username, $collect);
392 }
393
394 # Obtain the collect dir
395 my $site = $self->{'site'};
396 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
397
398 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
399
400 # Make sure the collection isn't locked by someone else
401 $self->lock_collection($username, $collect);
402
403 # look up additional args
404 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
405 $docid_secnum = "" if (!defined $docid_secnum);
406
407 my $metaname = $self->{'metaname'};
408 my $metapos = $self->{'metapos'};
409 $metapos = 0 if (!defined $metapos);
410
411 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
412 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
413
414 # This now stores the full pathname
415 my $doc_filename = $doc_rec->{'doc-file'}->[0];
416
417 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
418
419 # Release the lock once it is done
420 $self->unlock_collection($username, $collect);
421}
422
423sub get_metadata_from_archive_xml
424{
425 my $self = shift @_;
426 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
427
428 my @start_rules = ('Section' => \&dxml_start_section);
429
430 my @rules =
431 (
432 _default => 'raw',
433 'Metadata' => \&gfdxml_metadata
434 );
435
436 my $parser = XML::Rules->new
437 (
438 start_rules => \@start_rules,
439 rules => \@rules,
440 output_encoding => 'utf8'
441 );
442
443 my $xml_in = "";
444 if (!open(MIN,"<$doc_xml_filename"))
445 {
446 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
447 }
448 else
449 {
450 # Read them in
451 my $line;
452 while (defined ($line=<MIN>)) {
453 $xml_in .= $line;
454 }
455 close(MIN);
456
457 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
458
459 if(defined $parser->{'pad'}->{'metavalue'})
460 {
461 return $parser->{'pad'}->{'metavalue'};
462 }
463 else
464 {
465 return "";
466 }
467 }
468}
469
470sub gfdxml_metadata
471{
472 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
473
474 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
475 {
476 return;
477 }
478
479 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
480 {
481 if (!defined $parser->{'parameters'}->{'poscount'})
482 {
483 $parser->{'parameters'}->{'poscount'} = 0;
484 }
485 else
486 {
487 $parser->{'parameters'}->{'poscount'}++;
488 }
489 }
490
491 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
492 {
493 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
494 }
495}
496
497sub set_live_metadata
498{
499 my $self = shift @_;
500
501 my $username = $self->{'username'};
502 my $collect = $self->{'collect'};
503 my $gsdl_cgi = $self->{'gsdl_cgi'};
504 my $gsdlhome = $self->{'gsdlhome'};
505 my $infodbtype = $self->{'infodbtype'};
506
507 if ($baseaction::authentication_enabled) {
508 # Ensure the user is allowed to edit this collection
509 &authenticate_user($gsdl_cgi, $username, $collect);
510 }
511
512 # Obtain the collect dir
513 my $site = $self->{'site'};
514 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
515 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
516
517 # Make sure the collection isn't locked by someone else
518 $self->lock_collection($username, $collect);
519
520 # look up additional args
521 my $docid = $self->{'d'};
522 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
523 $gsdl_cgi->generate_error("No docid (d=...) specified.");
524 }
525 my $metavalue = $self->{'metavalue'};
526
527
528 # Generate the dbkey
529 my $metaname = $self->{'metaname'};
530 my $dbkey = "$docid.$metaname";
531
532 # To people who know $collect_tail please add some comments
533 # Obtain path to the database
534 my $collect_tail = $collect;
535 $collect_tail =~ s/^.*[\/\\]//;
536 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
537 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
538
539 # Set the new value
540 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
541 my $status = system($cmd);
542 if ($status != 0) {
543 # Catch error if gdbmget failed
544 my $mess = "Failed to set metadata key: $dbkey\n";
545
546 $mess .= "PATH: $ENV{'PATH'}\n";
547 $mess .= "cmd = $cmd\n";
548 $mess .= "Exit status: $status\n";
549 $mess .= "System Error Message: $!\n";
550
551 $gsdl_cgi->generate_error($mess);
552 }
553 else {
554 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
555 }
556
557 # Release the lock once it is done
558 $self->unlock_collection($username, $collect);
559}
560
561sub set_metadata_entry
562{
563 my $self = shift @_;
564 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
565
566 # To people who know $collect_tail please add some comments
567 # Obtain path to the database
568 my $collect_tail = $collect;
569 $collect_tail =~ s/^.*[\/\\]//;
570 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
571 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
572
573# print STDERR "**** infodb file path = $infodb_file_path\n";
574# print STDERR "***** infodb type = $infodbtype\n";
575
576 # Read the docid entry
577 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
578
579 # Set the metadata value
580 if (defined $metapos) {
581 $doc_rec->{$metaname}->[$metapos] = $metavalue;
582 }
583 else {
584 $doc_rec->{$metaname} = [ $metavalue ];
585 }
586
587 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
588
589 return $status;
590
591}
592
593sub set_metadata
594{
595 my $self = shift @_;
596
597 my $username = $self->{'username'};
598 my $collect = $self->{'collect'};
599 my $gsdl_cgi = $self->{'gsdl_cgi'};
600 my $gsdlhome = $self->{'gsdlhome'};
601
602 if ($baseaction::authentication_enabled) {
603 # Ensure the user is allowed to edit this collection
604 &authenticate_user($gsdl_cgi, $username, $collect);
605 }
606
607 my $site = $self->{'site'};
608 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
609
610 $gsdl_cgi->checked_chdir($collect_dir);
611
612 # Obtain the collect dir
613 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
614
615 # Make sure the collection isn't locked by someone else
616 $self->lock_collection($username, $collect);
617
618 # look up additional args
619 my $docid = $self->{'d'};
620 my $metaname = $self->{'metaname'};
621 my $metapos = $self->{'metapos'};
622 my $metavalue = $self->{'metavalue'};
623 my $infodbtype = $self->{'infodbtype'};
624
625 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
626
627 if ($status != 0) {
628 # Catch error if set infodb entry failed
629 my $mess = "Failed to set metadata key: $docid\n";
630
631 $mess .= "PATH: $ENV{'PATH'}\n";
632 $mess .= "Exit status: $status\n";
633 $mess .= "System Error Message: $!\n";
634
635 $gsdl_cgi->generate_error($mess);
636 }
637 else {
638 my $mess = "set-metadata successful: Key[$docid]\n";
639 $mess .= " $metaname";
640 $mess .= "->[$metapos]" if (defined $metapos);
641 $mess .= " = $metavalue";
642
643 $gsdl_cgi->generate_ok_message($mess);
644 }
645
646 # Release the lock once it is done
647 $self->unlock_collection($username, $collect);
648}
649
650
651sub set_metadata_array
652{
653 my $self = shift @_;
654
655 my $username = $self->{'username'};
656 my $collect = $self->{'collect'};
657 my $gsdl_cgi = $self->{'gsdl_cgi'};
658 my $gsdlhome = $self->{'gsdlhome'};
659
660 if ($baseaction::authentication_enabled) {
661 # Ensure the user is allowed to edit this collection
662 &authenticate_user($gsdl_cgi, $username, $collect);
663 }
664
665 my $site = $self->{'site'};
666 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
667
668 $gsdl_cgi->checked_chdir($collect_dir);
669
670 # Obtain the collect dir
671 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
672
673 # Make sure the collection isn't locked by someone else
674 $self->lock_collection($username, $collect);
675
676 # look up additional args
677
678 my $infodbtype = $self->{'infodbtype'};
679
680 my $json_str = $self->{'json'};
681 my $doc_array = decode_json $json_str;
682
683
684 my $global_status = 0;
685 my $global_mess = "";
686
687 my @all_docids = ();
688
689 foreach my $doc_array_rec ( @$doc_array ) {
690
691 my $docid = $doc_array_rec->{'docid'};
692 my $metaname = $doc_array_rec->{'metaname'};
693 my $metapos = $doc_array_rec->{'metapos'};
694 my $metavalue = $doc_array_rec->{'metavalue'};
695
696 push(@all_docids,$docid);
697
698 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
699
700 if ($status != 0) {
701 # Catch error if set infodb entry failed
702 $global_status = $status;
703 $global_mess .= "Failed to set metadata key: $docid\n";
704 $global_mess .= "Exit status: $status\n";
705 $global_mess .= "System Error Message: $!\n";
706 $global_mess .= "-" x 20;
707 }
708 }
709
710 if ($global_status != 0) {
711 $global_mess .= "PATH: $ENV{'PATH'}\n";
712 $gsdl_cgi->generate_error($global_mess);
713 }
714 else {
715 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
716 $gsdl_cgi->generate_ok_message($mess);
717 }
718
719 # Release the lock once it is done
720 $self->unlock_collection($username, $collect);
721}
722
723
724sub dxml_metadata
725{
726 print STDERR "IN dxml_metadata\n";
727 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
728 my $metaname = $parser->{'parameters'}->{'metaname'};
729 my $metamode = $parser->{'parameters'}->{'metamode'};
730
731 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
732
733 # Find the right metadata tag and checks if we are going to
734 # override it
735 #
736 # Note: This over writes the first metadata block it
737 # encountered. If there are multiple Sections in the doc.xml, it
738 # might not behave as you would expect
739
740 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
741## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
742## print STDERR "**** metamode = $metamode\n";
743
744 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
745 {
746 my $name_attr = $attrHash->{'name'};
747 if (($name_attr eq $metaname) && ($metamode eq "override"))
748 {
749 if (!defined $parser->{'parameters'}->{'poscount'})
750 {
751 $parser->{'parameters'}->{'poscount'} = 0;
752 }
753 else
754 {
755 $parser->{'parameters'}->{'poscount'}++;
756 }
757
758 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
759 {
760 ##print STDERR "**** got match!!\n";
761 # Get the value and override the current value
762 my $metavalue = $parser->{'parameters'}->{'metavalue'};
763 $attrHash->{'_content'} = $metavalue;
764
765 # Don't want it to wipe out any other pieces of metadata
766 $parser->{'parameters'}->{'metamode'} = "done";
767 }
768 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} == $attrHash->{'_content'})
769 {
770 my $metavalue = $parser->{'parameters'}->{'metavalue'};
771 $attrHash->{'_content'} = $metavalue;
772 $parser->{'parameters'}->{'metamode'} = "done";
773 }
774 }
775 }
776
777 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
778 return [$tagname => $attrHash];
779}
780
781
782sub dxml_description
783{
784 print STDERR "IN dxml_description\n";
785 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
786 my $metamode = $parser->{'parameters'}->{'metamode'};
787
788 # Accumulate the metadata
789 # NOTE: This appends new metadata element to all description fields.
790 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
791 if (($metamode eq "accumulate") || ($metamode eq "override")) {
792 # If get to here and metamode is override, the this means there
793 # was no existing value to overide => treat as an append operation
794
795 # Tack a new metadata tag on to the end of the <Metadata>+ block
796 my $metaname = $parser->{'parameters'}->{'metaname'};
797 my $metavalue = $parser->{'parameters'}->{'metavalue'};
798
799 my $metadata_attr = { '_content' => $metavalue,
800 'name' => $metaname,
801 'mode' => "accumulate" };
802
803 my $append_metadata = [ "Metadata" => $metadata_attr ];
804 my $description_content = $attrHash->{'_content'};
805
806## print STDERR "**** appending to doc.xml\n";
807
808 push(@$description_content," ", $append_metadata ,"\n ");
809 $parser->{'parameters'}->{'metamode'} = "done";
810 }
811
812
813 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
814 return [$tagname => $attrHash];
815}
816
817
818sub dxml_start_section
819{
820 print STDERR "IN DXML_START_SECTION\n";
821 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
822
823 my $new_depth = scalar(@$contextArray);
824
825 if ($new_depth == 1) {
826 $parser->{'parameters'}->{'curr_section_depth'} = 1;
827 $parser->{'parameters'}->{'curr_section_num'} = "";
828 }
829
830 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
831 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
832
833 my $new_secnum;
834
835 if ($new_depth > $old_depth) {
836 # child subsection
837 $new_secnum = "$old_secnum.1";
838 }
839 elsif ($new_depth == $old_depth) {
840 # sibling section => increase it's value by 1
841 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
842 $tail_num++;
843 $new_secnum = $old_secnum;
844 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
845 }
846 else {
847 # back up to parent section => lopp off tail
848 $new_secnum = $old_secnum;
849 $new_secnum =~ s/\.\d+$//;
850 }
851
852 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
853 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
854
855 print STDERR "*** In Section: $new_secnum\n";
856}
857
858sub edit_xml_file
859{
860 print STDERR "IN EDIT XML FILE\n";
861 my $self = shift @_;
862 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
863
864 # use XML::Rules to add it in (read in and out again)
865 my $parser = XML::Rules->new(start_rules => $start_rules,
866 rules => $rules,
867 style => 'filter',
868 output_encoding => 'utf8' );
869
870 my $xml_in = "";
871 if (!open(MIN,"<$filename")) {
872 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
873 }
874 else {
875 # Read all the text in
876 my $line;
877 while (defined ($line=<MIN>)) {
878 $xml_in .= $line;
879 }
880 close(MIN);
881
882 my $MOUT;
883 if (!open($MOUT,">$filename")) {
884 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
885 }
886 else {
887 # Matched lines will get handled by the call backs
888## my $xml_out = "";
889
890 binmode($MOUT,":utf8");
891 $parser->filter($xml_in,$MOUT, $options);
892
893# binmode(MOUT,":utf8");
894# print MOUT $xml_out;
895 close($MOUT);
896 }
897 }
898}
899
900sub edit_doc_xml
901{
902 print STDERR "IN EDIT DOC XML\n";
903 my $self = shift @_;
904 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
905
906 # To monitor which section/subsection number we are in
907 my @start_rules =
908 ( 'Section' => \&dxml_start_section );
909
910 # use XML::Rules to add it in (read in and out again)
911 # Set the call back functions
912 my @rules =
913 ( _default => 'raw',
914 'Metadata' => \&dxml_metadata,
915 'Description' => \&dxml_description);
916
917 # Sets the parameters
918 my $options = { 'metaname' => $metaname,
919 'metapos' => $metapos,
920 'metavalue' => $metavalue,
921 'metamode' => $metamode,
922 'prevmetavalue' => $prevmetavalue };
923
924 if (defined $opt_secnum) {
925 $options->{'secnum'} = $opt_secnum;
926 }
927
928 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
929}
930
931sub set_archives_metadata_entry
932{
933 print STDERR "IN SET ARCHIVES ENTRY\n";
934 my $self = shift @_;
935 my ($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
936
937 # Obtain the doc.xml path for the specified docID
938 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
939
940 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
941 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
942 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
943
944 # The $doc_xml_file is relative to the archives, and now let's get the full path
945 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
946 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
947
948 # Edit the doc.xml file with the specified metadata name, value and position.
949 # TODO: there is a potential problem here as this edit_doc_xml function
950 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
951 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
952
953 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
954 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
955
956 return 0; # return 0 for now to indicate no error
957
958}
959
960
961sub set_archives_metadata
962{
963 print STDERR "IN SET ARCHIVES\n";
964 my $self = shift @_;
965
966 my $username = $self->{'username'};
967 my $collect = $self->{'collect'};
968 my $gsdl_cgi = $self->{'gsdl_cgi'};
969 my $gsdlhome = $self->{'gsdlhome'};
970 my $infodbtype = $self->{'infodbtype'};
971
972 if ($baseaction::authentication_enabled) {
973 # Ensure the user is allowed to edit this collection
974 $self->authenticate_user($username, $collect);
975 }
976
977 my $site = $self->{'site'};
978
979 # Obtain the collect and archive dir
980 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
981
982 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
983
984 # Make sure the collection isn't locked by someone else
985 $self->lock_collection($username, $collect);
986
987 # look up additional args
988 my $docid = $self->{'d'};
989 my $metaname = $self->{'metaname'};
990 my $metavalue = $self->{'metavalue'};
991 my $prevmetavalue = $self->{'prevmetavalue'};
992
993 my $metapos = $self->{'metapos'};
994 $metapos = 0 if (!defined $metapos);
995
996 my $metamode = $self->{'metamode'};
997 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
998 # make "accumulate" the default (less destructive, as won't actually
999 # delete any existing values)
1000 $metamode = "accumulate";
1001 }
1002
1003 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1004 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1005
1006 # Release the lock once it is done
1007 $self->unlock_collection($username, $collect);
1008
1009 if ($status == 0) {
1010 my $mess = "set-archives-metadata successful: Key[$docid]\n";
1011 $mess .= " $metaname";
1012 $mess .= "->[$metapos]" if (defined $metapos);
1013 $mess .= " = $metavalue";
1014 $mess .= " ($metamode)\n";
1015
1016 $gsdl_cgi->generate_ok_message($mess);
1017 }
1018 else {
1019 my $mess .= "Failed to set archives metadata key: $docid\n";
1020 $mess .= "Exit status: $status\n";
1021 $mess .= "System Error Message: $!\n";
1022 $mess .= "-" x 20 . "\n";
1023
1024 $gsdl_cgi->generate_error($mess);
1025 }
1026}
1027
1028
1029sub set_archives_metadata_array
1030{
1031 my $self = shift @_;
1032
1033 my $username = $self->{'username'};
1034 my $collect = $self->{'collect'};
1035 my $gsdl_cgi = $self->{'gsdl_cgi'};
1036 my $gsdlhome = $self->{'gsdlhome'};
1037
1038 if ($baseaction::authentication_enabled) {
1039 # Ensure the user is allowed to edit this collection
1040 &authenticate_user($gsdl_cgi, $username, $collect);
1041 }
1042
1043 my $site = $self->{'site'};
1044 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1045
1046 $gsdl_cgi->checked_chdir($collect_dir);
1047
1048 # Obtain the collect dir
1049 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1050
1051 # Make sure the collection isn't locked by someone else
1052 $self->lock_collection($username, $collect);
1053
1054 # look up additional args
1055
1056 my $infodbtype = $self->{'infodbtype'};
1057
1058 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1059
1060 my $json_str = $self->{'json'};
1061 my $doc_array = decode_json $json_str;
1062
1063
1064 my $global_status = 0;
1065 my $global_mess = "";
1066
1067 my @all_docids = ();
1068
1069 foreach my $doc_array_rec ( @$doc_array ) {
1070
1071 my $docid = $doc_array_rec->{'docid'};
1072 my $metaname = $doc_array_rec->{'metaname'};
1073 my $metapos = $doc_array_rec->{'metapos'};
1074 my $metamode = $self->{'metamode'};
1075 my $metavalue = $doc_array_rec->{'metavalue'};
1076
1077 # Some sanity checks
1078 $metapos = 0 if (!defined $metapos);
1079
1080 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1081 # make "accumulate" the default (less destructive, as won't actually
1082 # delete any existing values)
1083 $metamode = "accumulate";
1084 }
1085
1086 push(@all_docids,$docid);
1087
1088 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1089 $metaname,$metapos,$metavalue,$metamode);
1090
1091 if ($status != 0) {
1092 # Catch error if set infodb entry failed
1093 $global_status = $status;
1094 $global_mess .= "Failed to set metadata key: $docid\n";
1095 $global_mess .= "Exit status: $status\n";
1096 $global_mess .= "System Error Message: $!\n";
1097 $global_mess .= "-" x 20 . "\n";
1098 }
1099 }
1100
1101 if ($global_status != 0) {
1102 $global_mess .= "PATH: $ENV{'PATH'}\n";
1103 $gsdl_cgi->generate_error($global_mess);
1104 }
1105 else {
1106 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1107 $gsdl_cgi->generate_ok_message($mess);
1108 }
1109
1110 # Release the lock once it is done
1111 $self->unlock_collection($username, $collect);
1112}
1113
1114sub remove_archives_metadata
1115{
1116 my $self = shift @_;
1117
1118 my $username = $self->{'username'};
1119 my $collect = $self->{'collect'};
1120 my $gsdl_cgi = $self->{'gsdl_cgi'};
1121 my $gsdlhome = $self->{'gsdlhome'};
1122 my $infodbtype = $self->{'infodbtype'};
1123
1124 if ($baseaction::authentication_enabled)
1125 {
1126 # Ensure the user is allowed to edit this collection
1127 &authenticate_user($gsdl_cgi, $username, $collect);
1128 }
1129
1130 my $site = $self->{'site'};
1131
1132 # Obtain the collect and archive dir
1133 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1134
1135 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1136
1137 # Make sure the collection isn't locked by someone else
1138 $self->lock_collection($username, $collect);
1139
1140 # look up additional args
1141 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1142
1143 my $metaname = $self->{'metaname'};
1144 my $metapos = $self->{'metapos'};
1145 $metapos = 0 if (!defined $metapos);
1146
1147 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1148 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1149
1150 # This now stores the full pathname
1151 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1152
1153 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1154
1155 # Release the lock once it is done
1156 $self->unlock_collection($username, $collect);
1157
1158 if ($status == 0)
1159 {
1160 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1161 $mess .= " $metaname";
1162 $mess .= "->[$metapos]" if (defined $metapos);
1163
1164 $gsdl_cgi->generate_ok_message($mess);
1165 }
1166 else
1167 {
1168 my $mess .= "Failed to remove archives metadata key: $docid\n";
1169 $mess .= "Exit status: $status\n";
1170 $mess .= "System Error Message: $!\n";
1171 $mess .= "-" x 20 . "\n";
1172
1173 $gsdl_cgi->generate_error($mess);
1174 }
1175}
1176
1177sub remove_from_doc_xml
1178{
1179 my $self = shift @_;
1180 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
1181
1182 my @start_rules = ('Section' => \&dxml_start_section);
1183
1184 # Set the call-back functions for the metadata tags
1185 my @rules =
1186 (
1187 _default => 'raw',
1188 'Metadata' => \&rfdxml_metadata
1189 );
1190
1191 my $parser = XML::Rules->new
1192 (
1193 start_rules => \@start_rules,
1194 rules => \@rules,
1195 style => 'filter',
1196 output_encoding => 'utf8'
1197 );
1198
1199 my $status = 0;
1200 my $xml_in = "";
1201 if (!open(MIN,"<$doc_xml_filename"))
1202 {
1203 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1204 $status = 1;
1205 }
1206 else
1207 {
1208 # Read them in
1209 my $line;
1210 while (defined ($line=<MIN>)) {
1211 $xml_in .= $line;
1212 }
1213 close(MIN);
1214
1215 # Filter with the call-back functions
1216 my $xml_out = "";
1217
1218 my $MOUT;
1219 if (!open($MOUT,">$doc_xml_filename")) {
1220 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1221 $status = 1;
1222 }
1223 else {
1224 binmode($MOUT,":utf8");
1225 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
1226 close($MOUT);
1227 }
1228 }
1229 return $status;
1230}
1231
1232sub rfdxml_metadata
1233{
1234 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1235
1236 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1237 {
1238 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1239 return [$tagname => $attrHash];
1240 }
1241
1242 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1243 {
1244 if (!defined $parser->{'parameters'}->{'poscount'})
1245 {
1246 $parser->{'parameters'}->{'poscount'} = 0;
1247 }
1248 else
1249 {
1250 $parser->{'parameters'}->{'poscount'}++;
1251 }
1252 }
1253
1254 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1255 {
1256 return [];
1257 }
1258
1259 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1260 {
1261 return [];
1262 }
1263
1264 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1265 return [$tagname => $attrHash];
1266}
1267
1268sub mxml_metadata
1269{
1270 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1271 my $metaname = $parser->{'parameters'}->{'metaname'};
1272 my $metamode = $parser->{'parameters'}->{'metamode'};
1273
1274 # Report error if we don't see FileName tag before this
1275 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1276
1277 # Don't do anything if we are not in the right FileSet
1278 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1279 if ($file_regexp =~ /\.\*/) {
1280 # Only interested in a file_regexp if it specifies precisely one
1281 # file.
1282 # So, skip anything with a .* in it as it is too general
1283 return [$tagname => $attrHash];
1284 }
1285 my $src_file = $parser->{'parameters'}->{'src_file'};
1286 if (!($src_file =~ /$file_regexp/)) {
1287 return [$tagname => $attrHash];
1288 }
1289## print STDERR "*** mxl metamode = $metamode\n";
1290
1291 # Find the right metadata tag and checks if we are going to override it
1292 my $name_attr = $attrHash->{'name'};
1293 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1294 # Get the value and override the current value
1295 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1296 $attrHash->{'_content'} = $metavalue;
1297
1298## print STDERR "**** overrideing metadata.xml\n";
1299
1300 # Don't want it to wipe out any other pieces of metadata
1301 $parser->{'parameters'}->{'metamode'} = "done";
1302 }
1303
1304 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1305 return [$tagname => $attrHash];
1306}
1307
1308
1309sub mxml_description
1310{
1311 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1312 my $metamode = $parser->{'parameters'}->{'metamode'};
1313
1314 # Failed... Report error if we don't see FileName tag before this
1315 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1316
1317 # Don't do anything if we are not in the right FileSet
1318 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1319 if ($file_regexp =~ m/\.\*/) {
1320 # Only interested in a file_regexp if it specifies precisely one
1321 # file.
1322 # So, skip anything with a .* in it as it is too general
1323 return [$tagname => $attrHash];
1324 }
1325 my $src_file = $parser->{'parameters'}->{'src_file'};
1326
1327 if (!($src_file =~ m/$file_regexp/)) {
1328 return [$tagname => $attrHash];
1329 }
1330
1331 # Accumulate the metadata block to the end of the description block
1332 # Note: This adds metadata block to all description blocks, so if there are
1333 # multiple FileSets, it will add to all of them
1334 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1335 # if metamode was "override" but get to here then it failed to
1336 # find an item to override, in which case it should append its
1337 # value to the end, just like the "accumulate" mode
1338
1339 # tack a new metadata tag on to the end of the <Metadata>+ block
1340 my $metaname = $parser->{'parameters'}->{'metaname'};
1341 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1342
1343 my $metadata_attr = { '_content' => $metavalue,
1344 'name' => $metaname,
1345 'mode' => "accumulate" };
1346
1347 my $append_metadata = [ "Metadata" => $metadata_attr ];
1348 my $description_content = $attrHash->{'_content'};
1349
1350## print STDERR "*** appending to metadata.xml\n";
1351
1352 # append the new metadata element to the end of the current
1353 # content contained inside this tag
1354 if (ref($description_content) eq "") {
1355 # => string or numeric literal
1356 # this is caused by a <Description> block has no <Metadata> child elements
1357 # => set up an empty array in '_content'
1358 $attrHash->{'_content'} = [ "\n" ];
1359 $description_content = $attrHash->{'_content'};
1360 }
1361
1362 push(@$description_content," ", $append_metadata ,"\n ");
1363 $parser->{'parameters'}->{'metamode'} = "done";
1364 }
1365
1366 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1367 return [$tagname => $attrHash];
1368}
1369
1370
1371sub mxml_filename
1372{
1373 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1374
1375 # Store the filename of the Current Fileset
1376 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1377 # FileName tag must come before Description tag
1378 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1379
1380 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1381 return [$tagname => $attrHash];
1382}
1383
1384
1385sub mxml_fileset
1386{
1387 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1388
1389 # Initilise the current_file
1390 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1391 # FileName tag must come before Description tag
1392 $parser->{'parameters'}->{'current_file'} = "";
1393
1394 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1395 return [$tagname => $attrHash];
1396}
1397
1398
1399sub edit_metadata_xml
1400{
1401 my $self = shift @_;
1402 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
1403
1404 # Set the call-back functions for the metadata tags
1405 my @rules =
1406 ( _default => 'raw',
1407 'FileName' => \&mxml_filename,
1408 'Metadata' => \&mxml_metadata,
1409 'Description' => \&mxml_description,
1410 'FileSet' => \&mxml_fileset);
1411
1412 # use XML::Rules to add it in (read in and out again)
1413 my $parser = XML::Rules->new(rules => \@rules,
1414 style => 'filter',
1415 output_encoding => 'utf8');
1416
1417 if (!-e $metadata_xml_filename) {
1418
1419 if (open(MOUT,">$metadata_xml_filename")) {
1420
1421 my $src_file_re = &util::filename_to_regex($src_file);
1422 # shouldn't the following also be in the above utility routine??
1423 # $src_file_re =~ s/\./\\./g;
1424
1425 print MOUT "<?xml version=\"1.0\"?>\n";
1426 print MOUT "<DirectoryMetadata>\n";
1427 print MOUT " <FileSet>\n";
1428 print MOUT " <FileName>$src_file_re</FileName>\n";
1429 print MOUT " <Description>\n";
1430 print MOUT " </Description>\n";
1431 print MOUT " </FileSet>\n";
1432 print MOUT "</DirectoryMetadata>\n";
1433
1434 close(MOUT);
1435 }
1436 else {
1437 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1438 }
1439 }
1440
1441
1442 my $xml_in = "";
1443 if (!open(MIN,"<$metadata_xml_filename")) {
1444 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1445 }
1446 else {
1447 # Read them in
1448 my $line;
1449 while (defined ($line=<MIN>)) {
1450 $xml_in .= $line;
1451 }
1452 close(MIN);
1453
1454 # Filter with the call-back functions
1455 my $xml_out = "";
1456
1457 my $MOUT;
1458 if (!open($MOUT,">$metadata_xml_filename")) {
1459 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1460 }
1461 else {
1462 binmode($MOUT,":utf8");
1463
1464 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1465 # At the moment, I will just hack it!
1466 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1467 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1468 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1469 #print MOUT $xml_out;
1470
1471 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1472 metavalue => $metavalue,
1473 metamode => $metamode,
1474 src_file => $src_file,
1475 current_file => undef} );
1476 close($MOUT);
1477 }
1478 }
1479}
1480
1481
1482sub set_import_metadata
1483{
1484 my $self = shift @_;
1485
1486 my $username = $self->{'username'};
1487 my $collect = $self->{'collect'};
1488 my $gsdl_cgi = $self->{'gsdl_cgi'};
1489 my $gsdlhome = $self->{'gsdlhome'};
1490 my $infodbtype = $self->{'infodbtype'};
1491
1492 if ($baseaction::authentication_enabled) {
1493 # Ensure the user is allowed to edit this collection
1494 $self->authenticate_user($username, $collect);
1495 }
1496
1497
1498 # Obtain the collect and archive dir
1499 my $site = $self->{'site'};
1500 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1501
1502 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1503 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1504
1505 # Make sure the collection isn't locked by someone else
1506 $self->lock_collection($username, $collect);
1507
1508 # look up additional args
1509 # want either d= or f=
1510 my $docid = $self->{'d'};
1511 my $import_file = $self->{'f'};
1512 if ((!defined $docid) && (!defined $import_file)) {
1513 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1514 }
1515
1516 # Get the parameters and set default mode to "accumulate"
1517 my $metaname = $self->{'metaname'};
1518 my $metavalue = $self->{'metavalue'};
1519## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1520 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1521 print STDERR "*** set import meta: val = $metavalue\n";
1522
1523 my $metamode = $self->{'metamode'};
1524 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1525 # make "accumulate" the default (less destructive, as won't actually
1526 # delete any existing values)
1527 $metamode = "accumulate";
1528 }
1529
1530 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1531 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1532 my $metadata_xml_file;
1533 my $import_filename = undef;
1534 if (defined $docid) {
1535 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1536 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1537
1538 # This now stores the full pathname
1539 $import_filename = $doc_rec->{'src-file'}->[0];
1540 }
1541 else {
1542 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1543 }
1544
1545 # figure out correct metadata.xml file [?]
1546 # Assuming the metadata.xml file is next to the source file
1547 # Note: This will not work if it is using the inherited metadata from the parent folder
1548 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1549 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1550
1551 # Edit the metadata.xml
1552 # Modified by Jeffrey from DL Consulting
1553 # Handle the case where there is one metadata.xml file for multiple FileSets
1554 # The XML filter needs to know whether it is in the right FileSet
1555 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1556 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1557 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1558 $metaname, $metavalue, $metamode, $import_tailname);
1559
1560 # Release the lock once it is done
1561 $self->unlock_collection($username, $collect);
1562
1563 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1564 $mess .= " $metaname";
1565 $mess .= " = $metavalue";
1566 $mess .= " ($metamode)\n";
1567
1568 $gsdl_cgi->generate_ok_message($mess);
1569
1570}
1571
1572sub set_import_metadata_array
1573{
1574 my $self = shift @_;
1575
1576 my $username = $self->{'username'};
1577 my $collect = $self->{'collect'};
1578 my $gsdl_cgi = $self->{'gsdl_cgi'};
1579 my $gsdlhome = $self->{'gsdlhome'};
1580
1581 if ($baseaction::authentication_enabled) {
1582 # Ensure the user is allowed to edit this collection
1583 &authenticate_user($gsdl_cgi, $username, $collect);
1584 }
1585
1586 my $site = $self->{'site'};
1587 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1588
1589 $gsdl_cgi->checked_chdir($collect_dir);
1590
1591 # Make sure the collection isn't locked by someone else
1592 $self->lock_collection($username, $collect);
1593
1594 # look up additional args
1595
1596 my $infodbtype = $self->{'infodbtype'};
1597
1598 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1599
1600 my $json_str = $self->{'json'};
1601 my $doc_array = decode_json $json_str;
1602
1603 my $global_status = 0;
1604 my $global_mess = "";
1605
1606 my @all_docids = ();
1607
1608 foreach my $doc_array_rec ( @$doc_array )
1609 {
1610 my $docid = $doc_array_rec->{'docid'};
1611 my $metaname = $doc_array_rec->{'metaname'};
1612 my $metamode = $self->{'metamode'};
1613 my $metavalue = $doc_array_rec->{'metavalue'};
1614
1615 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1616 # make "accumulate" the default (less destructive, as won't actually
1617 # delete any existing values)
1618 $metamode = "accumulate";
1619 }
1620
1621 push(@all_docids,$docid);
1622
1623 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1624 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1625 my $metadata_xml_file;
1626 my $import_filename = undef;
1627
1628 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1629 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1630
1631 # This now stores the full pathname
1632 $import_filename = $doc_rec->{'src-file'}->[0];
1633
1634 # figure out correct metadata.xml file [?]
1635 # Assuming the metadata.xml file is next to the source file
1636 # Note: This will not work if it is using the inherited metadata from the parent folder
1637 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1638 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1639
1640 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1641 }
1642
1643 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1644 $gsdl_cgi->generate_ok_message($mess);
1645
1646 # Release the lock once it is done
1647 $self->unlock_collection($username, $collect);
1648}
1649
1650sub remove_import_metadata
1651{
1652 my $self = shift @_;
1653
1654 my $username = $self->{'username'};
1655 my $collect = $self->{'collect'};
1656 my $gsdl_cgi = $self->{'gsdl_cgi'};
1657
1658 if ($baseaction::authentication_enabled) {
1659 # Ensure the user is allowed to edit this collection
1660 &authenticate_user($gsdl_cgi, $username, $collect);
1661 }
1662
1663 my $gsdlhome = $self->{'gsdlhome'};
1664 my $infodbtype = $self->{'infodbtype'};
1665
1666 # Obtain the collect dir
1667 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1668 my $site = $self->{'site'};
1669 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1670
1671 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1672 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1673
1674 # Make sure the collection isn't locked by someone else
1675 $self->lock_collection($username, $collect);
1676
1677 # look up additional args
1678 my $docid = $self->{'d'};
1679 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1680 {
1681 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
1682 }
1683
1684 my $metaname = $self->{'metaname'};
1685 my $metavalue = $self->{'metavalue'};
1686 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1687
1688 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1689 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1690 my $metadata_xml_file;
1691 my $import_filename = undef;
1692 if (defined $docid)
1693 {
1694 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1695 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1696
1697 # This now stores the full pathname
1698 $import_filename = $doc_rec->{'src-file'}->[0];
1699 }
1700
1701 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1702 {
1703 $gsdl_cgi->generate_error_message("There is no metadata\n");
1704 }
1705
1706 # figure out correct metadata.xml file [?]
1707 # Assuming the metadata.xml file is next to the source file
1708 # Note: This will not work if it is using the inherited metadata from the parent folder
1709 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1710 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1711
1712 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1713
1714 # Release the lock once it is done
1715 $self->unlock_collection($username, $collect);
1716
1717 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1718 $mess .= " $metaname";
1719 $mess .= " = $metavalue\n";
1720
1721 $gsdl_cgi->generate_ok_message($mess);
1722}
1723
1724sub remove_from_metadata_xml
1725{
1726 my $self = shift @_;
1727 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
1728
1729 # Set the call-back functions for the metadata tags
1730 my @rules =
1731 (
1732 _default => 'raw',
1733 'Metadata' => \&rfmxml_metadata,
1734 'FileName' => \&mxml_filename
1735 );
1736
1737 my $parser = XML::Rules->new
1738 (
1739 rules => \@rules,
1740 style => 'filter',
1741 output_encoding => 'utf8'
1742 );
1743
1744 my $xml_in = "";
1745 if (!open(MIN,"<$metadata_xml_filename"))
1746 {
1747 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1748 }
1749 else
1750 {
1751 # Read them in
1752 my $line;
1753 while (defined ($line=<MIN>)) {
1754 $xml_in .= $line;
1755 }
1756 close(MIN);
1757
1758 # Filter with the call-back functions
1759 my $xml_out = "";
1760
1761 my $MOUT;
1762 if (!open($MOUT,">$metadata_xml_filename")) {
1763 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1764 }
1765 else {
1766 binmode($MOUT,":utf8");
1767 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
1768 close($MOUT);
1769 }
1770 }
1771}
1772
1773sub rfmxml_metadata
1774{
1775 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1776
1777 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
1778 {
1779 return [];
1780 }
1781
1782 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1783 return [$tagname => $attrHash];
1784}
1785
1786sub remove_live_metadata
1787{
1788 my $self = shift @_;
1789
1790 my $username = $self->{'username'};
1791 my $collect = $self->{'collect'};
1792 my $gsdl_cgi = $self->{'gsdl_cgi'};
1793 my $gsdlhome = $self->{'gsdlhome'};
1794 my $infodbtype = $self->{'infodbtype'};
1795
1796 if ($baseaction::authentication_enabled) {
1797 # Ensure the user is allowed to edit this collection
1798 &authenticate_user($gsdl_cgi, $username, $collect);
1799 }
1800
1801 # Obtain the collect dir
1802 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1803 my $site = $self->{'site'};
1804 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1805
1806 # Make sure the collection isn't locked by someone else
1807 $self->lock_collection($username, $collect);
1808
1809 # look up additional args
1810 my $docid = $self->{'d'};
1811 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1812 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1813 }
1814
1815 # Generate the dbkey
1816 my $metaname = $self->{'metaname'};
1817 my $dbkey = "$docid.$metaname";
1818
1819 # To people who know $collect_tail please add some comments
1820 # Obtain the live gdbm_db path
1821 my $collect_tail = $collect;
1822 $collect_tail =~ s/^.*[\/\\]//;
1823 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1824 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1825
1826 # Remove the key
1827 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
1828 my $status = system($cmd);
1829 if ($status != 0) {
1830 # Catch error if gdbmdel failed
1831 my $mess = "Failed to set metadata key: $dbkey\n";
1832
1833 $mess .= "PATH: $ENV{'PATH'}\n";
1834 $mess .= "cmd = $cmd\n";
1835 $mess .= "Exit status: $status\n";
1836 $mess .= "System Error Message: $!\n";
1837
1838 $gsdl_cgi->generate_error($mess);
1839 }
1840 else {
1841 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1842 }
1843
1844}
1845
1846
1847sub remove_metadata
1848{
1849 my $self = shift @_;
1850
1851 my $username = $self->{'username'};
1852 my $collect = $self->{'collect'};
1853 my $gsdl_cgi = $self->{'gsdl_cgi'};
1854 my $gsdlhome = $self->{'gsdlhome'};
1855 my $infodbtype = $self->{'infodbtype'};
1856
1857 if ($baseaction::authentication_enabled) {
1858 # Ensure the user is allowed to edit this collection
1859 &authenticate_user($gsdl_cgi, $username, $collect);
1860 }
1861
1862 # Obtain the collect dir
1863 my $site = $self->{'site'};
1864 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1865 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1866
1867 # Make sure the collection isn't locked by someone else
1868 $self->lock_collection($username, $collect);
1869
1870 # look up additional args
1871 my $docid = $self->{'d'};
1872 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1873 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1874 }
1875 my $metaname = $self->{'metaname'};
1876 my $metapos = $self->{'metapos'};
1877 my $metavalue = $self->{'metavalue'};
1878
1879 # To people who know $collect_tail please add some comments
1880 # Obtain the path to the database
1881 my $collect_tail = $collect;
1882 $collect_tail =~ s/^.*[\/\\]//;
1883 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1884 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1885
1886 # Read the docid entry
1887 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1888
1889 # Basically loop through and unescape_html the values
1890 foreach my $k (keys %$doc_rec) {
1891 my @escaped_v = ();
1892 foreach my $v (@{$doc_rec->{$k}}) {
1893 if ($k eq "contains") {
1894 # protect quotes in ".2;".3 etc
1895 $v =~ s/\"/\\\"/g;
1896 push(@escaped_v, $v);
1897 }
1898 else {
1899 my $ev = &ghtml::unescape_html($v);
1900 $ev =~ s/\"/\\\"/g;
1901 push(@escaped_v, $ev);
1902 }
1903 }
1904 $doc_rec->{$k} = \@escaped_v;
1905 }
1906
1907 # Check to make sure the key does exist
1908 if (!defined ($doc_rec->{$metaname})) {
1909 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1910 }
1911
1912 # Obtain the specified metadata pos
1913 $metapos = 0 if (!defined $metapos);
1914
1915 # consider check key is defined before deleting?
1916 # Loop through the metadata array and ignore the specified position
1917 my $filtered_metadata = [];
1918 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
1919 for (my $i=0; $i<$num_metadata_vals; $i++) {
1920 my $metaval = shift(@{$doc_rec->{$metaname}});
1921
1922 if (!defined $metavalue && $i != $metapos) {
1923 push(@$filtered_metadata,$metaval)
1924 }
1925
1926 if(defined $metavalue && !($metavalue eq $metaval))
1927 {
1928 push(@$filtered_metadata,$metavalue)
1929 }
1930 }
1931 $doc_rec->{$metaname} = $filtered_metadata;
1932
1933 # Turn the record back to string
1934 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1935
1936 # Store it back to the database
1937 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1938 my $status = system($cmd);
1939 if ($status != 0) {
1940 my $mess = "Failed to set metadata key: $docid\n";
1941
1942 $mess .= "PATH: $ENV{'PATH'}\n";
1943 $mess .= "cmd = $cmd\n";
1944 $mess .= "Exit status: $status\n";
1945 $mess .= "System Error Message: $!\n";
1946
1947 $gsdl_cgi->generate_error($mess);
1948 }
1949 else {
1950 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1951 $mess .= " $metaname";
1952 $mess .= "->[$metapos]" if (defined $metapos);
1953
1954 $gsdl_cgi->generate_ok_message($mess);
1955 }
1956}
1957
1958
1959# Was trying to reused the codes, but the functions need to be broken
1960# down more before they can be reused, otherwise there will be too
1961# much overhead and duplicate process...
1962sub insert_metadata
1963{
1964 my $self = shift @_;
1965
1966 my $username = $self->{'username'};
1967 my $collect = $self->{'collect'};
1968 my $gsdl_cgi = $self->{'gsdl_cgi'};
1969 my $gsdlhome = $self->{'gsdlhome'};
1970 my $infodbtype = $self->{'infodbtype'};
1971
1972 # If the import metadata and gdbm database have been updated, we
1973 # need to insert some notification to warn user that the the text
1974 # they see at the moment is not indexed and require a rebuild.
1975 my $rebuild_pending_macro = "_rebuildpendingmessage_";
1976
1977 if ($baseaction::authentication_enabled) {
1978 # Ensure the user is allowed to edit this collection
1979 $self->authenticate_user($username, $collect);
1980 }
1981
1982 # Obtain the collect and archive dir
1983 my $site = $self->{'site'};
1984 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1985 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1986 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1987
1988 # Make sure the collection isn't locked by someone else
1989 $self->lock_collection($username, $collect);
1990
1991 # Check additional args
1992 my $docid = $self->{'d'};
1993 if (!defined($docid)) {
1994 $gsdl_cgi->generate_error("No document id is specified: d=...");
1995 }
1996 my $metaname = $self->{'metaname'};
1997 if (!defined($metaname)) {
1998 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1999 }
2000 my $metavalue = $self->{'metavalue'};
2001 if (!defined($metavalue) || $metavalue eq "") {
2002 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2003 }
2004 # make "accumulate" the default (less destructive, as won't actually
2005 # delete any existing values)
2006 my $metamode = "accumulate";
2007
2008 #=======================================================================#
2009 # set_import_metadata [START]
2010 #=======================================================================#
2011 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2012 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2013 my $metadata_xml_file;
2014 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2015 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2016
2017 # This now stores the full pathname
2018 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2019
2020 # figure out correct metadata.xml file [?]
2021 # Assuming the metadata.xml file is next to the source file
2022 # Note: This will not work if it is using the inherited metadata from the parent folder
2023 my ($import_tailname, $import_dirname)
2024 = File::Basename::fileparse($import_filename);
2025 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2026
2027 # Shane's escape characters
2028 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2029 $metavalue =~ s/\,/&#44;/g;
2030 $metavalue =~ s/\:/&#58;/g;
2031 $metavalue =~ s/\|/&#124;/g;
2032 $metavalue =~ s/\(/&#40;/g;
2033 $metavalue =~ s/\)/&#41;/g;
2034 $metavalue =~ s/\[/&#91;/g;
2035 $metavalue =~ s/\\/&#92;/g;
2036 $metavalue =~ s/\]/&#93;/g;
2037 $metavalue =~ s/\{/&#123;/g;
2038 $metavalue =~ s/\}/&#125;/g;
2039 $metavalue =~ s/\"/&#34;/g;
2040 $metavalue =~ s/\`/&#96;/g;
2041 $metavalue =~ s/\n/_newline_/g;
2042
2043 # Edit the metadata.xml
2044 # Modified by Jeffrey from DL Consulting
2045 # Handle the case where there is one metadata.xml file for multiple FileSets
2046 # The XML filter needs to know whether it is in the right FileSet
2047 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2048 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2049 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2050 $metaname, $metavalue, $metamode, $import_tailname);
2051 #=======================================================================#
2052 # set_import_metadata [END]
2053 #=======================================================================#
2054
2055
2056 #=======================================================================#
2057 # set_metadata (accumulate version) [START]
2058 #=======================================================================#
2059 # To people who know $collect_tail please add some comments
2060 # Obtain path to the database
2061 my $collect_tail = $collect;
2062 $collect_tail =~ s/^.*[\/\\]//;
2063 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2064 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2065
2066 # Read the docid entry
2067 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2068
2069 foreach my $k (keys %$doc_rec) {
2070 my @escaped_v = ();
2071 foreach my $v (@{$doc_rec->{$k}}) {
2072 if ($k eq "contains") {
2073 # protect quotes in ".2;".3 etc
2074 $v =~ s/\"/\\\"/g;
2075 push(@escaped_v, $v);
2076 }
2077 else {
2078 my $ev = &ghtml::unescape_html($v);
2079 $ev =~ s/\"/\\\"/g;
2080 push(@escaped_v, $ev);
2081 }
2082 }
2083 $doc_rec->{$k} = \@escaped_v;
2084 }
2085
2086 # Protect the quotes
2087 $metavalue =~ s/\"/\\\"/g;
2088
2089 # Adds the pending macro
2090 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2091
2092 # If the metadata doesn't exist, create a new one
2093 if (!defined($doc_rec->{$metaname})){
2094 $doc_rec->{$metaname} = [ $macro_metavalue ];
2095 }
2096 # Else, let's acculumate the values
2097 else {
2098 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2099 }
2100
2101 # Generate the record string
2102 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2103
2104 # Store it into GDBM
2105 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2106 my $status = system($cmd);
2107 if ($status != 0) {
2108 # Catch error if gdbmget failed
2109 my $mess = "Failed to set metadata key: $docid\n";
2110
2111 $mess .= "PATH: $ENV{'PATH'}\n";
2112 $mess .= "cmd = $cmd\n";
2113 $mess .= "Exit status: $status\n";
2114 $mess .= "System Error Message: $!\n";
2115
2116 $gsdl_cgi->generate_error($mess);
2117 }
2118 else {
2119 my $mess = "insert-metadata successful: Key[$docid]\n";
2120 $mess .= " [In metadata.xml] $metaname";
2121 $mess .= " = $metavalue\n";
2122 $mess .= " [In database] $metaname";
2123 $mess .= " = $macro_metavalue\n";
2124 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2125 $gsdl_cgi->generate_ok_message($mess);
2126 }
2127 #=======================================================================#
2128 # set_metadata (accumulate version) [END]
2129 #=======================================================================#
2130
2131 # Release the lock once it is done
2132 $self->unlock_collection($username, $collect);
2133}
2134
21351;
Note: See TracBrowser for help on using the repository browser.