source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 25558

Last change on this file since 25558 was 25558, checked in by sjm84, 12 years ago

Some minor formatting, and also a minor fix with metadata

File size: 65.9 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-live-metadata" => {
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ ] },
63
64 #SET METHODS
65 "set-live-metadata" => {
66 'compulsory-args' => [ "d", "metaname", "metavalue" ],
67 'optional-args' => [ ] },
68
69 "set-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ "metapos" ] },
72
73 "set-archives-metadata" => {
74 'compulsory-args' => [ "d", "metaname", "metavalue" ],
75 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
76
77 "set-import-metadata" => {
78 'compulsory-args' => [ "metaname", "metavalue" ],
79 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
80
81 #SET METHODS (ARRAY)
82 "set-metadata-array" => {
83 'compulsory-args' => [ "json" ],
84 'optional-args' => [ ] },
85
86 "set-archives-metadata-array" => {
87 'compulsory-args' => [ "json" ],
88 'optional-args' => [ ] },
89
90 "set-import-metadata-array" => {
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ ] },
93
94 #REMOVE METHODS
95 "remove-import-metadata" => {
96 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
97 'optional-args' => [ ] },
98
99 "remove-archives-metadata" => {
100 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
101 'optional-args' => [ "metapos", "metavalue" ] },
102
103 "remove-live-metadata" => {
104 'compulsory-args' => [ "d", "metaname" ],
105 'optional-args' => [ ] },
106
107 "remove-metadata" => {
108 'compulsory-args' => [ "d", "metaname" ],
109 'optional-args' => [ "metapos", "metavalue" ] },
110
111 #INSERT METHODS
112 "insert-metadata" => {
113 'compulsory-args' => [ "d", "metaname", "metavalue" ],
114 'optional-args' => [ ] }
115};
116
117
118sub new
119{
120 my $class = shift (@_);
121 my ($gsdl_cgi,$iis6_mode) = @_;
122
123 # Treat metavalue specially. To transmit this through a GET request
124 # the Javascript side has url-encoded it, so here we need to decode
125 # it before proceeding
126
127 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
128 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
129 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
130
131 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
132 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
133
134 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
135
136 return bless $self, $class;
137}
138
139
140sub get_live_metadata
141{
142 my $self = shift @_;
143
144 my $username = $self->{'username'};
145 my $collect = $self->{'collect'};
146 my $gsdl_cgi = $self->{'gsdl_cgi'};
147 my $gsdlhome = $self->{'gsdlhome'};
148 my $infodbtype = $self->{'infodbtype'};
149
150 # live metadata gets/saves value scoped (prefixed) by the current usename
151 # so (for now) let's not bother to enforce authentication
152
153 # Obtain the collect dir
154 my $site = $self->{'site'};
155 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
156 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
157
158 # Make sure the collection isn't locked by someone else
159 $self->lock_collection($username, $collect);
160
161 # look up additional args
162 my $docid = $self->{'d'};
163 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
164 $gsdl_cgi->generate_error("No docid (d=...) specified.");
165 }
166
167 # Generate the dbkey
168 my $metaname = $self->{'metaname'};
169 my $dbkey = "$docid.$metaname";
170
171 # To people who know $collect_tail please add some comments
172 # Obtain path to the database
173 my $collect_tail = $collect;
174 $collect_tail =~ s/^.*[\/|\\]//;
175 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
176 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
177
178 # Obtain the content of the key
179 my $cmd = "gdbmget $infodb_file_path $dbkey";
180 if (open(GIN,"$cmd |") == 0) {
181 # Catch error if gdbmget failed
182 my $mess = "Failed to get metadata key: $metaname\n";
183 $mess .= "$!\n";
184
185 $gsdl_cgi->generate_error($mess);
186 }
187 else {
188 binmode(GIN,":utf8");
189 # Read everything in and concatenate them into $metavalue
190 my $metavalue = "";
191 my $line;
192 while (defined ($line=<GIN>)) {
193 $metavalue .= $line;
194 }
195 close(GIN);
196 chomp($metavalue); # Get rid off the tailing newlines
197 $gsdl_cgi->generate_ok_message("$metavalue");
198 }
199
200 # Release the lock once it is done
201 $self->unlock_collection($username, $collect);
202}
203
204
205sub get_metadata
206{
207 my $self = shift @_;
208
209 my $username = $self->{'username'};
210 my $collect = $self->{'collect'};
211 my $gsdl_cgi = $self->{'gsdl_cgi'};
212 my $gsdlhome = $self->{'gsdlhome'};
213
214 # Authenticate user if it is enabled
215 if ($baseaction::authentication_enabled) {
216 # Ensure the user is allowed to edit this collection
217 &authenticate_user($gsdl_cgi, $username, $collect);
218 }
219
220 # Obtain the collect dir
221 my $site = $self->{'site'};
222 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
223 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
224
225 # Make sure the collection isn't locked by someone else
226 $self->lock_collection($username, $collect);
227
228 # look up additional args
229 my $docid = $self->{'d'};
230 my $metaname = $self->{'metaname'};
231 my $metapos = $self->{'metapos'};
232 my $infodbtype = $self->{'infodbtype'};
233
234 # To people who know $collect_tail please add some comments
235 # Obtain path to the database
236 my $collect_tail = $collect;
237 $collect_tail =~ s/^.*[\/\\]//;
238 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
239 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
240
241 # Read the docid entry
242 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
243
244 # Basically loop through and unescape_html the values
245 foreach my $k (keys %$doc_rec) {
246 my @escaped_v = ();
247 foreach my $v (@{$doc_rec->{$k}}) {
248 my $ev = &ghtml::unescape_html($v);
249 push(@escaped_v, $ev);
250 }
251 $doc_rec->{$k} = \@escaped_v;
252 }
253
254 # Obtain the specified metadata value
255 $metapos = 0 if (!defined $metapos);
256 my $metavalue = $doc_rec->{$metaname}->[$metapos];
257 $gsdl_cgi->generate_ok_message("$metavalue");
258
259 # Release the lock once it is done
260 $self->unlock_collection($username, $collect);
261}
262
263
264sub get_import_metadata
265{
266 my $self = shift @_;
267
268 my $username = $self->{'username'};
269 my $collect = $self->{'collect'};
270 my $gsdl_cgi = $self->{'gsdl_cgi'};
271 my $gsdlhome = $self->{'gsdlhome'};
272
273 # Authenticate user if it is enabled
274 if ($baseaction::authentication_enabled) {
275 # Ensure the user is allowed to edit this collection
276 &authenticate_user($gsdl_cgi, $username, $collect);
277 }
278
279 # Obtain the collect dir
280 my $site = $self->{'site'};
281 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
282 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
283
284 # Make sure the collection isn't locked by someone else
285 $self->lock_collection($username, $collect);
286
287 # look up additional args
288 my $docid = $self->{'d'};
289 my $metaname = $self->{'metaname'};
290 my $infodbtype = $self->{'infodbtype'};
291 if (!defined $docid)
292 {
293 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
294 }
295
296 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
297 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
298 my $metadata_xml_file;
299 my $import_filename = undef;
300
301
302 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
303 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
304 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
305
306 # This now stores the full pathname
307 $import_filename = $doc_rec->{'src-file'}->[0];
308
309 # figure out correct metadata.xml file [?]
310 # Assuming the metadata.xml file is next to the source file
311 # Note: This will not work if it is using the inherited metadata from the parent folder
312 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
313 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
314
315 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
316
317 # Release the lock once it is done
318 $self->unlock_collection($username, $collect);
319}
320
321sub get_metadata_from_metadata_xml
322{
323 my $self = shift @_;
324 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
325
326 my @rules =
327 (
328 _default => 'raw',
329 'Metadata' => \&gfmxml_metadata,
330 'FileName' => \&mxml_filename
331 );
332
333 my $parser = XML::Rules->new
334 (
335 rules => \@rules,
336 output_encoding => 'utf8'
337 );
338
339 my $xml_in = "";
340 if (!open(MIN,"<$metadata_xml_filename"))
341 {
342 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
343 }
344 else
345 {
346 # Read them in
347 my $line;
348 while (defined ($line=<MIN>)) {
349 $xml_in .= $line;
350 }
351 close(MIN);
352
353 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
354
355 if(defined $parser->{'pad'}->{'metavalue'})
356 {
357 return $parser->{'pad'}->{'metavalue'};
358 }
359 else
360 {
361 return "";
362 }
363 }
364}
365
366sub gfmxml_metadata
367{
368 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
369
370 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
371 {
372 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
373 }
374}
375
376sub get_archives_metadata
377{
378 my $self = shift @_;
379
380 my $username = $self->{'username'};
381 my $collect = $self->{'collect'};
382 my $gsdl_cgi = $self->{'gsdl_cgi'};
383 my $gsdlhome = $self->{'gsdlhome'};
384 my $infodbtype = $self->{'infodbtype'};
385
386 # Authenticate user if it is enabled
387 if ($baseaction::authentication_enabled) {
388 # Ensure the user is allowed to edit this collection
389 &authenticate_user($gsdl_cgi, $username, $collect);
390 }
391
392 # Obtain the collect dir
393 my $site = $self->{'site'};
394 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
395
396 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
397
398 # Make sure the collection isn't locked by someone else
399 $self->lock_collection($username, $collect);
400
401 # look up additional args
402 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
403 $docid_secnum = "" if (!defined $docid_secnum);
404
405 my $metaname = $self->{'metaname'};
406 my $metapos = $self->{'metapos'};
407 $metapos = 0 if (!defined $metapos);
408
409 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
410 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
411
412 # This now stores the full pathname
413 my $doc_filename = $doc_rec->{'doc-file'}->[0];
414
415 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
416
417 # Release the lock once it is done
418 $self->unlock_collection($username, $collect);
419}
420
421sub get_metadata_from_archive_xml
422{
423 my $self = shift @_;
424 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
425
426 my @start_rules = ('Section' => \&dxml_start_section);
427
428 my @rules =
429 (
430 _default => 'raw',
431 'Metadata' => \&gfdxml_metadata
432 );
433
434 my $parser = XML::Rules->new
435 (
436 start_rules => \@start_rules,
437 rules => \@rules,
438 output_encoding => 'utf8'
439 );
440
441 my $xml_in = "";
442 if (!open(MIN,"<$doc_xml_filename"))
443 {
444 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
445 }
446 else
447 {
448 # Read them in
449 my $line;
450 while (defined ($line=<MIN>)) {
451 $xml_in .= $line;
452 }
453 close(MIN);
454
455 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
456
457 if(defined $parser->{'pad'}->{'metavalue'})
458 {
459 return $parser->{'pad'}->{'metavalue'};
460 }
461 else
462 {
463 return "";
464 }
465 }
466}
467
468sub gfdxml_metadata
469{
470 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
471
472 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
473 {
474 return;
475 }
476
477 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
478 {
479 if (!defined $parser->{'parameters'}->{'poscount'})
480 {
481 $parser->{'parameters'}->{'poscount'} = 0;
482 }
483 else
484 {
485 $parser->{'parameters'}->{'poscount'}++;
486 }
487 }
488
489 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
490 {
491 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
492 }
493}
494
495sub set_live_metadata
496{
497 my $self = shift @_;
498
499 my $username = $self->{'username'};
500 my $collect = $self->{'collect'};
501 my $gsdl_cgi = $self->{'gsdl_cgi'};
502 my $gsdlhome = $self->{'gsdlhome'};
503 my $infodbtype = $self->{'infodbtype'};
504
505 if ($baseaction::authentication_enabled) {
506 # Ensure the user is allowed to edit this collection
507 &authenticate_user($gsdl_cgi, $username, $collect);
508 }
509
510 # Obtain the collect dir
511 my $site = $self->{'site'};
512 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
513 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
514
515 # Make sure the collection isn't locked by someone else
516 $self->lock_collection($username, $collect);
517
518 # look up additional args
519 my $docid = $self->{'d'};
520 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
521 $gsdl_cgi->generate_error("No docid (d=...) specified.");
522 }
523 my $metavalue = $self->{'metavalue'};
524
525
526 # Generate the dbkey
527 my $metaname = $self->{'metaname'};
528 my $dbkey = "$docid.$metaname";
529
530 # To people who know $collect_tail please add some comments
531 # Obtain path to the database
532 my $collect_tail = $collect;
533 $collect_tail =~ s/^.*[\/\\]//;
534 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
535 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
536
537 # Set the new value
538 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
539 my $status = system($cmd);
540 if ($status != 0) {
541 # Catch error if gdbmget failed
542 my $mess = "Failed to set metadata key: $dbkey\n";
543
544 $mess .= "PATH: $ENV{'PATH'}\n";
545 $mess .= "cmd = $cmd\n";
546 $mess .= "Exit status: $status\n";
547 $mess .= "System Error Message: $!\n";
548
549 $gsdl_cgi->generate_error($mess);
550 }
551 else {
552 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
553 }
554
555 # Release the lock once it is done
556 $self->unlock_collection($username, $collect);
557}
558
559sub set_metadata_entry
560{
561 my $self = shift @_;
562 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
563
564 # To people who know $collect_tail please add some comments
565 # Obtain path to the database
566 my $collect_tail = $collect;
567 $collect_tail =~ s/^.*[\/\\]//;
568 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
569 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
570
571# print STDERR "**** infodb file path = $infodb_file_path\n";
572# print STDERR "***** infodb type = $infodbtype\n";
573
574 # Read the docid entry
575 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
576
577 # Set the metadata value
578 if (defined $metapos) {
579 $doc_rec->{$metaname}->[$metapos] = $metavalue;
580 }
581 else {
582 $doc_rec->{$metaname} = [ $metavalue ];
583 }
584
585 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
586
587 return $status;
588
589}
590
591sub set_metadata
592{
593 my $self = shift @_;
594
595 my $username = $self->{'username'};
596 my $collect = $self->{'collect'};
597 my $gsdl_cgi = $self->{'gsdl_cgi'};
598 my $gsdlhome = $self->{'gsdlhome'};
599
600 if ($baseaction::authentication_enabled) {
601 # Ensure the user is allowed to edit this collection
602 &authenticate_user($gsdl_cgi, $username, $collect);
603 }
604
605 my $site = $self->{'site'};
606 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
607
608 $gsdl_cgi->checked_chdir($collect_dir);
609
610 # Obtain the collect dir
611 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
612
613 # Make sure the collection isn't locked by someone else
614 $self->lock_collection($username, $collect);
615
616 # look up additional args
617 my $docid = $self->{'d'};
618 my $metaname = $self->{'metaname'};
619 my $metapos = $self->{'metapos'};
620 my $metavalue = $self->{'metavalue'};
621 my $infodbtype = $self->{'infodbtype'};
622
623 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
624
625 if ($status != 0) {
626 # Catch error if set infodb entry failed
627 my $mess = "Failed to set metadata key: $docid\n";
628
629 $mess .= "PATH: $ENV{'PATH'}\n";
630 $mess .= "Exit status: $status\n";
631 $mess .= "System Error Message: $!\n";
632
633 $gsdl_cgi->generate_error($mess);
634 }
635 else {
636 my $mess = "set-metadata successful: Key[$docid]\n";
637 $mess .= " $metaname";
638 $mess .= "->[$metapos]" if (defined $metapos);
639 $mess .= " = $metavalue";
640
641 $gsdl_cgi->generate_ok_message($mess);
642 }
643
644 # Release the lock once it is done
645 $self->unlock_collection($username, $collect);
646}
647
648
649sub set_metadata_array
650{
651 my $self = shift @_;
652
653 my $username = $self->{'username'};
654 my $collect = $self->{'collect'};
655 my $gsdl_cgi = $self->{'gsdl_cgi'};
656 my $gsdlhome = $self->{'gsdlhome'};
657
658 if ($baseaction::authentication_enabled) {
659 # Ensure the user is allowed to edit this collection
660 &authenticate_user($gsdl_cgi, $username, $collect);
661 }
662
663 my $site = $self->{'site'};
664 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
665
666 $gsdl_cgi->checked_chdir($collect_dir);
667
668 # Obtain the collect dir
669 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
670
671 # Make sure the collection isn't locked by someone else
672 $self->lock_collection($username, $collect);
673
674 # look up additional args
675
676 my $infodbtype = $self->{'infodbtype'};
677
678 my $json_str = $self->{'json'};
679 my $doc_array = decode_json $json_str;
680
681
682 my $global_status = 0;
683 my $global_mess = "";
684
685 my @all_docids = ();
686
687 foreach my $doc_array_rec ( @$doc_array ) {
688
689 my $docid = $doc_array_rec->{'docid'};
690 my $metaname = $doc_array_rec->{'metaname'};
691 my $metapos = $doc_array_rec->{'metapos'};
692 my $metavalue = $doc_array_rec->{'metavalue'};
693
694 push(@all_docids,$docid);
695
696 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
697
698 if ($status != 0) {
699 # Catch error if set infodb entry failed
700 $global_status = $status;
701 $global_mess .= "Failed to set metadata key: $docid\n";
702 $global_mess .= "Exit status: $status\n";
703 $global_mess .= "System Error Message: $!\n";
704 $global_mess .= "-" x 20;
705 }
706 }
707
708 if ($global_status != 0) {
709 $global_mess .= "PATH: $ENV{'PATH'}\n";
710 $gsdl_cgi->generate_error($global_mess);
711 }
712 else {
713 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
714 $gsdl_cgi->generate_ok_message($mess);
715 }
716
717 # Release the lock once it is done
718 $self->unlock_collection($username, $collect);
719}
720
721
722sub dxml_metadata
723{
724 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
725 my $metaname = $parser->{'parameters'}->{'metaname'};
726 my $metamode = $parser->{'parameters'}->{'metamode'};
727
728 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
729
730 # Find the right metadata tag and checks if we are going to
731 # override it
732 #
733 # Note: This over writes the first metadata block it
734 # encountered. If there are multiple Sections in the doc.xml, it
735 # might not behave as you would expect
736
737 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
738## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
739## print STDERR "**** metamode = $metamode\n";
740
741 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
742 {
743 my $name_attr = $attrHash->{'name'};
744 if (($name_attr eq $metaname) && ($metamode eq "override"))
745 {
746 if (!defined $parser->{'parameters'}->{'poscount'})
747 {
748 $parser->{'parameters'}->{'poscount'} = 0;
749 }
750 else
751 {
752 $parser->{'parameters'}->{'poscount'}++;
753 }
754
755 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
756 {
757 ##print STDERR "**** got match!!\n";
758 # Get the value and override the current value
759 my $metavalue = $parser->{'parameters'}->{'metavalue'};
760 $attrHash->{'_content'} = $metavalue;
761
762 # Don't want it to wipe out any other pieces of metadata
763 $parser->{'parameters'}->{'metamode'} = "done";
764 }
765 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} == $attrHash->{'_content'})
766 {
767 my $metavalue = $parser->{'parameters'}->{'metavalue'};
768 $attrHash->{'_content'} = $metavalue;
769 $parser->{'parameters'}->{'metamode'} = "done";
770 }
771 }
772 }
773
774 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
775 return [$tagname => $attrHash];
776}
777
778
779sub dxml_description
780{
781 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
782 my $metamode = $parser->{'parameters'}->{'metamode'};
783
784 # Accumulate the metadata
785 # NOTE: This appends new metadata element to all description fields.
786 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
787 if (($metamode eq "accumulate") || ($metamode eq "override")) {
788 # If get to here and metamode is override, the this means there
789 # was no existing value to overide => treat as an append operation
790
791 # Tack a new metadata tag on to the end of the <Metadata>+ block
792 my $metaname = $parser->{'parameters'}->{'metaname'};
793 my $metavalue = $parser->{'parameters'}->{'metavalue'};
794
795 my $metadata_attr = {
796 '_content' => $metavalue,
797 'name' => $metaname,
798 'mode' => "accumulate"
799 };
800
801 my $append_metadata = [ "Metadata" => $metadata_attr ];
802 my $description_content = $attrHash->{'_content'};
803
804 ## print STDERR "**** appending to doc.xml\n";
805
806 if (ref($description_content)) {
807 # got some existing interesting nested content
808 push(@$description_content, " ", $append_metadata ,"\n ");
809 }
810 else {
811 #description_content is most likely a string such as "\n"
812 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
813 }
814
815 $parser->{'parameters'}->{'metamode'} = "done";
816 }
817
818 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
819 return [$tagname => $attrHash];
820}
821
822
823sub dxml_start_section
824{
825 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
826
827 my $new_depth = scalar(@$contextArray);
828
829 if ($new_depth == 1) {
830 $parser->{'parameters'}->{'curr_section_depth'} = 1;
831 $parser->{'parameters'}->{'curr_section_num'} = "";
832 }
833
834 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
835 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
836
837 my $new_secnum;
838
839 if ($new_depth > $old_depth) {
840 # child subsection
841 $new_secnum = "$old_secnum.1";
842 }
843 elsif ($new_depth == $old_depth) {
844 # sibling section => increase it's value by 1
845 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
846 $tail_num++;
847 $new_secnum = $old_secnum;
848 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
849 }
850 else {
851 # back up to parent section => lopp off tail
852 $new_secnum = $old_secnum;
853 $new_secnum =~ s/\.\d+$//;
854 }
855
856 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
857 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
858}
859
860sub edit_xml_file
861{
862 my $self = shift @_;
863 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
864
865 # use XML::Rules to add it in (read in and out again)
866 my $parser = XML::Rules->new(start_rules => $start_rules,
867 rules => $rules,
868 style => 'filter',
869 output_encoding => 'utf8' );
870
871 my $xml_in = "";
872 if (!open(MIN,"<$filename")) {
873 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
874 }
875 else {
876 # Read all the text in
877 my $line;
878 while (defined ($line=<MIN>)) {
879 $xml_in .= $line;
880 }
881 close(MIN);
882
883 my $MOUT;
884 if (!open($MOUT,">$filename")) {
885 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
886 }
887 else {
888 # Matched lines will get handled by the call backs
889## my $xml_out = "";
890
891 binmode($MOUT,":utf8");
892 $parser->filter($xml_in,$MOUT, $options);
893
894# binmode(MOUT,":utf8");
895# print MOUT $xml_out;
896 close($MOUT);
897 }
898 }
899}
900
901sub edit_doc_xml
902{
903 my $self = shift @_;
904 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
905
906 # To monitor which section/subsection number we are in
907 my @start_rules =
908 ( 'Section' => \&dxml_start_section );
909
910 # use XML::Rules to add it in (read in and out again)
911 # Set the call back functions
912 my @rules =
913 ( _default => 'raw',
914 'Metadata' => \&dxml_metadata,
915 'Description' => \&dxml_description);
916
917 # Sets the parameters
918 my $options = { 'metaname' => $metaname,
919 'metapos' => $metapos,
920 'metavalue' => $metavalue,
921 'metamode' => $metamode,
922 'prevmetavalue' => $prevmetavalue };
923
924 if (defined $opt_secnum) {
925 $options->{'secnum'} = $opt_secnum;
926 }
927
928 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
929}
930
931sub set_archives_metadata_entry
932{
933 my $self = shift @_;
934 my ($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
935
936 # Obtain the doc.xml path for the specified docID
937 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
938
939 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
940 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
941 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
942
943 # The $doc_xml_file is relative to the archives, and now let's get the full path
944 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
945 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
946
947 # Edit the doc.xml file with the specified metadata name, value and position.
948 # TODO: there is a potential problem here as this edit_doc_xml function
949 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
950 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
951
952 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
953 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
954
955 return 0; # return 0 for now to indicate no error
956}
957
958
959sub set_archives_metadata
960{
961 my $self = shift @_;
962
963 my $username = $self->{'username'};
964 my $collect = $self->{'collect'};
965 my $gsdl_cgi = $self->{'gsdl_cgi'};
966 my $gsdlhome = $self->{'gsdlhome'};
967 my $infodbtype = $self->{'infodbtype'};
968
969 if ($baseaction::authentication_enabled) {
970 # Ensure the user is allowed to edit this collection
971 $self->authenticate_user($username, $collect);
972 }
973
974 my $site = $self->{'site'};
975
976 # Obtain the collect and archive dir
977 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
978
979 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
980
981 # Make sure the collection isn't locked by someone else
982 $self->lock_collection($username, $collect);
983
984 # look up additional args
985 my $docid = $self->{'d'};
986 my $metaname = $self->{'metaname'};
987 my $metavalue = $self->{'metavalue'};
988 my $prevmetavalue = $self->{'prevmetavalue'};
989
990 my $metapos = $self->{'metapos'};
991 $metapos = 0 if (!defined $metapos);
992
993 my $metamode = $self->{'metamode'};
994 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
995 # make "accumulate" the default (less destructive, as won't actually
996 # delete any existing values)
997 $metamode = "accumulate";
998 }
999
1000 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1001 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1002
1003 # Release the lock once it is done
1004 $self->unlock_collection($username, $collect);
1005
1006 if ($status == 0) {
1007 my $mess = "set-archives-metadata successful: Key[$docid]\n";
1008 $mess .= " $metaname";
1009 $mess .= "->[$metapos]" if (defined $metapos);
1010 $mess .= " = $metavalue";
1011 $mess .= " ($metamode)\n";
1012
1013 $gsdl_cgi->generate_ok_message($mess);
1014 }
1015 else {
1016 my $mess .= "Failed to set archives metadata key: $docid\n";
1017 $mess .= "Exit status: $status\n";
1018 $mess .= "System Error Message: $!\n";
1019 $mess .= "-" x 20 . "\n";
1020
1021 $gsdl_cgi->generate_error($mess);
1022 }
1023}
1024
1025
1026sub set_archives_metadata_array
1027{
1028 my $self = shift @_;
1029
1030 my $username = $self->{'username'};
1031 my $collect = $self->{'collect'};
1032 my $gsdl_cgi = $self->{'gsdl_cgi'};
1033 my $gsdlhome = $self->{'gsdlhome'};
1034
1035 if ($baseaction::authentication_enabled) {
1036 # Ensure the user is allowed to edit this collection
1037 &authenticate_user($gsdl_cgi, $username, $collect);
1038 }
1039
1040 my $site = $self->{'site'};
1041 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1042
1043 $gsdl_cgi->checked_chdir($collect_dir);
1044
1045 # Obtain the collect dir
1046 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1047
1048 # Make sure the collection isn't locked by someone else
1049 $self->lock_collection($username, $collect);
1050
1051 # look up additional args
1052
1053 my $infodbtype = $self->{'infodbtype'};
1054
1055 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1056
1057 my $json_str = $self->{'json'};
1058 my $doc_array = decode_json $json_str;
1059
1060
1061 my $global_status = 0;
1062 my $global_mess = "";
1063
1064 my @all_docids = ();
1065
1066 foreach my $doc_array_rec ( @$doc_array ) {
1067
1068 my $docid = $doc_array_rec->{'docid'};
1069 my $metaname = $doc_array_rec->{'metaname'};
1070 my $metapos = $doc_array_rec->{'metapos'};
1071 my $metamode = $self->{'metamode'};
1072 my $metavalue = $doc_array_rec->{'metavalue'};
1073
1074 # Some sanity checks
1075 $metapos = 0 if (!defined $metapos);
1076
1077 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1078 # make "accumulate" the default (less destructive, as won't actually
1079 # delete any existing values)
1080 $metamode = "accumulate";
1081 }
1082
1083 push(@all_docids,$docid);
1084
1085 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1086 $metaname,$metapos,$metavalue,$metamode);
1087
1088 if ($status != 0) {
1089 # Catch error if set infodb entry failed
1090 $global_status = $status;
1091 $global_mess .= "Failed to set metadata key: $docid\n";
1092 $global_mess .= "Exit status: $status\n";
1093 $global_mess .= "System Error Message: $!\n";
1094 $global_mess .= "-" x 20 . "\n";
1095 }
1096 }
1097
1098 if ($global_status != 0) {
1099 $global_mess .= "PATH: $ENV{'PATH'}\n";
1100 $gsdl_cgi->generate_error($global_mess);
1101 }
1102 else {
1103 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1104 $gsdl_cgi->generate_ok_message($mess);
1105 }
1106
1107 # Release the lock once it is done
1108 $self->unlock_collection($username, $collect);
1109}
1110
1111sub remove_archives_metadata
1112{
1113 my $self = shift @_;
1114
1115 my $username = $self->{'username'};
1116 my $collect = $self->{'collect'};
1117 my $gsdl_cgi = $self->{'gsdl_cgi'};
1118 my $gsdlhome = $self->{'gsdlhome'};
1119 my $infodbtype = $self->{'infodbtype'};
1120
1121 if ($baseaction::authentication_enabled)
1122 {
1123 # Ensure the user is allowed to edit this collection
1124 &authenticate_user($gsdl_cgi, $username, $collect);
1125 }
1126
1127 my $site = $self->{'site'};
1128
1129 # Obtain the collect and archive dir
1130 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1131
1132 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1133
1134 # Make sure the collection isn't locked by someone else
1135 $self->lock_collection($username, $collect);
1136
1137 # look up additional args
1138 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1139
1140 my $metaname = $self->{'metaname'};
1141 my $metapos = $self->{'metapos'};
1142 $metapos = 0 if (!defined $metapos);
1143
1144 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1145 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1146
1147 # This now stores the full pathname
1148 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1149
1150 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1151
1152 # Release the lock once it is done
1153 $self->unlock_collection($username, $collect);
1154
1155 if ($status == 0)
1156 {
1157 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1158 $mess .= " $metaname";
1159 $mess .= "->[$metapos]" if (defined $metapos);
1160
1161 $gsdl_cgi->generate_ok_message($mess);
1162 }
1163 else
1164 {
1165 my $mess .= "Failed to remove archives metadata key: $docid\n";
1166 $mess .= "Exit status: $status\n";
1167 $mess .= "System Error Message: $!\n";
1168 $mess .= "-" x 20 . "\n";
1169
1170 $gsdl_cgi->generate_error($mess);
1171 }
1172}
1173
1174sub remove_from_doc_xml
1175{
1176 my $self = shift @_;
1177 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
1178
1179 my @start_rules = ('Section' => \&dxml_start_section);
1180
1181 # Set the call-back functions for the metadata tags
1182 my @rules =
1183 (
1184 _default => 'raw',
1185 'Metadata' => \&rfdxml_metadata
1186 );
1187
1188 my $parser = XML::Rules->new
1189 (
1190 start_rules => \@start_rules,
1191 rules => \@rules,
1192 style => 'filter',
1193 output_encoding => 'utf8'
1194 );
1195
1196 my $status = 0;
1197 my $xml_in = "";
1198 if (!open(MIN,"<$doc_xml_filename"))
1199 {
1200 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1201 $status = 1;
1202 }
1203 else
1204 {
1205 # Read them in
1206 my $line;
1207 while (defined ($line=<MIN>)) {
1208 $xml_in .= $line;
1209 }
1210 close(MIN);
1211
1212 # Filter with the call-back functions
1213 my $xml_out = "";
1214
1215 my $MOUT;
1216 if (!open($MOUT,">$doc_xml_filename")) {
1217 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1218 $status = 1;
1219 }
1220 else {
1221 binmode($MOUT,":utf8");
1222 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
1223 close($MOUT);
1224 }
1225 }
1226 return $status;
1227}
1228
1229sub rfdxml_metadata
1230{
1231 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1232
1233 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1234 {
1235 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1236 return [$tagname => $attrHash];
1237 }
1238
1239 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1240 {
1241 if (!defined $parser->{'parameters'}->{'poscount'})
1242 {
1243 $parser->{'parameters'}->{'poscount'} = 0;
1244 }
1245 else
1246 {
1247 $parser->{'parameters'}->{'poscount'}++;
1248 }
1249 }
1250
1251 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1252 {
1253 return [];
1254 }
1255
1256 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1257 {
1258 return [];
1259 }
1260
1261 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1262 return [$tagname => $attrHash];
1263}
1264
1265sub mxml_metadata
1266{
1267 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1268 my $metaname = $parser->{'parameters'}->{'metaname'};
1269 my $metamode = $parser->{'parameters'}->{'metamode'};
1270
1271 # Report error if we don't see FileName tag before this
1272 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1273
1274 # Don't do anything if we are not in the right FileSet
1275 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1276 if ($file_regexp =~ /\.\*/) {
1277 # Only interested in a file_regexp if it specifies precisely one
1278 # file.
1279 # So, skip anything with a .* in it as it is too general
1280 return [$tagname => $attrHash];
1281 }
1282 my $src_file = $parser->{'parameters'}->{'src_file'};
1283 if (!($src_file =~ /$file_regexp/)) {
1284 return [$tagname => $attrHash];
1285 }
1286## print STDERR "*** mxl metamode = $metamode\n";
1287
1288 # Find the right metadata tag and checks if we are going to override it
1289 my $name_attr = $attrHash->{'name'};
1290 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1291 # Get the value and override the current value
1292 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1293 $attrHash->{'_content'} = $metavalue;
1294
1295## print STDERR "**** overrideing metadata.xml\n";
1296
1297 # Don't want it to wipe out any other pieces of metadata
1298 $parser->{'parameters'}->{'metamode'} = "done";
1299 }
1300
1301 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1302 return [$tagname => $attrHash];
1303}
1304
1305
1306sub mxml_description
1307{
1308 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1309 my $metamode = $parser->{'parameters'}->{'metamode'};
1310
1311 # Failed... Report error if we don't see FileName tag before this
1312 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1313
1314 # Don't do anything if we are not in the right FileSet
1315 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1316 if ($file_regexp =~ m/\.\*/) {
1317 # Only interested in a file_regexp if it specifies precisely one
1318 # file.
1319 # So, skip anything with a .* in it as it is too general
1320 return [$tagname => $attrHash];
1321 }
1322 my $src_file = $parser->{'parameters'}->{'src_file'};
1323
1324 if (!($src_file =~ m/$file_regexp/)) {
1325 return [$tagname => $attrHash];
1326 }
1327
1328 # Accumulate the metadata block to the end of the description block
1329 # Note: This adds metadata block to all description blocks, so if there are
1330 # multiple FileSets, it will add to all of them
1331 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1332 # if metamode was "override" but get to here then it failed to
1333 # find an item to override, in which case it should append its
1334 # value to the end, just like the "accumulate" mode
1335
1336 # tack a new metadata tag on to the end of the <Metadata>+ block
1337 my $metaname = $parser->{'parameters'}->{'metaname'};
1338 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1339
1340 my $metadata_attr = { '_content' => $metavalue,
1341 'name' => $metaname,
1342 'mode' => "accumulate" };
1343
1344 my $append_metadata = [ "Metadata" => $metadata_attr ];
1345 my $description_content = $attrHash->{'_content'};
1346
1347## print STDERR "*** appending to metadata.xml\n";
1348
1349 # append the new metadata element to the end of the current
1350 # content contained inside this tag
1351 if (ref($description_content) eq "") {
1352 # => string or numeric literal
1353 # this is caused by a <Description> block has no <Metadata> child elements
1354 # => set up an empty array in '_content'
1355 $attrHash->{'_content'} = [ "\n" ];
1356 $description_content = $attrHash->{'_content'};
1357 }
1358
1359 push(@$description_content," ", $append_metadata ,"\n ");
1360 $parser->{'parameters'}->{'metamode'} = "done";
1361 }
1362
1363 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1364 return [$tagname => $attrHash];
1365}
1366
1367
1368sub mxml_filename
1369{
1370 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1371
1372 # Store the filename of the Current Fileset
1373 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1374 # FileName tag must come before Description tag
1375 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1376
1377 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1378 return [$tagname => $attrHash];
1379}
1380
1381
1382sub mxml_fileset
1383{
1384 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1385
1386 # Initilise the current_file
1387 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1388 # FileName tag must come before Description tag
1389 $parser->{'parameters'}->{'current_file'} = "";
1390
1391 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1392 return [$tagname => $attrHash];
1393}
1394
1395
1396sub edit_metadata_xml
1397{
1398 my $self = shift @_;
1399 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
1400
1401 # Set the call-back functions for the metadata tags
1402 my @rules =
1403 ( _default => 'raw',
1404 'FileName' => \&mxml_filename,
1405 'Metadata' => \&mxml_metadata,
1406 'Description' => \&mxml_description,
1407 'FileSet' => \&mxml_fileset);
1408
1409 # use XML::Rules to add it in (read in and out again)
1410 my $parser = XML::Rules->new(rules => \@rules,
1411 style => 'filter',
1412 output_encoding => 'utf8');
1413
1414 if (!-e $metadata_xml_filename) {
1415
1416 if (open(MOUT,">$metadata_xml_filename")) {
1417
1418 my $src_file_re = &util::filename_to_regex($src_file);
1419 # shouldn't the following also be in the above utility routine??
1420 # $src_file_re =~ s/\./\\./g;
1421
1422 print MOUT "<?xml version=\"1.0\"?>\n";
1423 print MOUT "<DirectoryMetadata>\n";
1424 print MOUT " <FileSet>\n";
1425 print MOUT " <FileName>$src_file_re</FileName>\n";
1426 print MOUT " <Description>\n";
1427 print MOUT " </Description>\n";
1428 print MOUT " </FileSet>\n";
1429 print MOUT "</DirectoryMetadata>\n";
1430
1431 close(MOUT);
1432 }
1433 else {
1434 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1435 }
1436 }
1437
1438
1439 my $xml_in = "";
1440 if (!open(MIN,"<$metadata_xml_filename")) {
1441 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1442 }
1443 else {
1444 # Read them in
1445 my $line;
1446 while (defined ($line=<MIN>)) {
1447 $xml_in .= $line;
1448 }
1449 close(MIN);
1450
1451 # Filter with the call-back functions
1452 my $xml_out = "";
1453
1454 my $MOUT;
1455 if (!open($MOUT,">$metadata_xml_filename")) {
1456 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1457 }
1458 else {
1459 binmode($MOUT,":utf8");
1460
1461 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1462 # At the moment, I will just hack it!
1463 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1464 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1465 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1466 #print MOUT $xml_out;
1467
1468 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1469 metavalue => $metavalue,
1470 metamode => $metamode,
1471 src_file => $src_file,
1472 current_file => undef} );
1473 close($MOUT);
1474 }
1475 }
1476}
1477
1478
1479sub set_import_metadata
1480{
1481 my $self = shift @_;
1482
1483 my $username = $self->{'username'};
1484 my $collect = $self->{'collect'};
1485 my $gsdl_cgi = $self->{'gsdl_cgi'};
1486 my $gsdlhome = $self->{'gsdlhome'};
1487 my $infodbtype = $self->{'infodbtype'};
1488
1489 if ($baseaction::authentication_enabled) {
1490 # Ensure the user is allowed to edit this collection
1491 $self->authenticate_user($username, $collect);
1492 }
1493
1494
1495 # Obtain the collect and archive dir
1496 my $site = $self->{'site'};
1497 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1498
1499 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1500 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1501
1502 # Make sure the collection isn't locked by someone else
1503 $self->lock_collection($username, $collect);
1504
1505 # look up additional args
1506 # want either d= or f=
1507 my $docid = $self->{'d'};
1508 my $import_file = $self->{'f'};
1509 if ((!defined $docid) && (!defined $import_file)) {
1510 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1511 }
1512
1513 # Get the parameters and set default mode to "accumulate"
1514 my $metaname = $self->{'metaname'};
1515 my $metavalue = $self->{'metavalue'};
1516## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1517 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1518
1519 my $metamode = $self->{'metamode'};
1520 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1521 # make "accumulate" the default (less destructive, as won't actually
1522 # delete any existing values)
1523 $metamode = "accumulate";
1524 }
1525
1526 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1527 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1528 my $metadata_xml_file;
1529 my $import_filename = undef;
1530 if (defined $docid) {
1531 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1532 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1533
1534 # This now stores the full pathname
1535 $import_filename = $doc_rec->{'src-file'}->[0];
1536 }
1537 else {
1538 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1539 }
1540
1541 # figure out correct metadata.xml file [?]
1542 # Assuming the metadata.xml file is next to the source file
1543 # Note: This will not work if it is using the inherited metadata from the parent folder
1544 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1545 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1546
1547 # Edit the metadata.xml
1548 # Modified by Jeffrey from DL Consulting
1549 # Handle the case where there is one metadata.xml file for multiple FileSets
1550 # The XML filter needs to know whether it is in the right FileSet
1551 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1552 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1553 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1554 $metaname, $metavalue, $metamode, $import_tailname);
1555
1556 # Release the lock once it is done
1557 $self->unlock_collection($username, $collect);
1558
1559 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1560 $mess .= " $metaname";
1561 $mess .= " = $metavalue";
1562 $mess .= " ($metamode)\n";
1563
1564 $gsdl_cgi->generate_ok_message($mess);
1565
1566}
1567
1568sub set_import_metadata_array
1569{
1570 my $self = shift @_;
1571
1572 my $username = $self->{'username'};
1573 my $collect = $self->{'collect'};
1574 my $gsdl_cgi = $self->{'gsdl_cgi'};
1575 my $gsdlhome = $self->{'gsdlhome'};
1576
1577 if ($baseaction::authentication_enabled) {
1578 # Ensure the user is allowed to edit this collection
1579 &authenticate_user($gsdl_cgi, $username, $collect);
1580 }
1581
1582 my $site = $self->{'site'};
1583 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1584
1585 $gsdl_cgi->checked_chdir($collect_dir);
1586
1587 # Make sure the collection isn't locked by someone else
1588 $self->lock_collection($username, $collect);
1589
1590 # look up additional args
1591
1592 my $infodbtype = $self->{'infodbtype'};
1593
1594 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1595
1596 my $json_str = $self->{'json'};
1597 my $doc_array = decode_json $json_str;
1598
1599 my $global_status = 0;
1600 my $global_mess = "";
1601
1602 my @all_docids = ();
1603
1604 foreach my $doc_array_rec ( @$doc_array )
1605 {
1606 my $docid = $doc_array_rec->{'docid'};
1607 my $metaname = $doc_array_rec->{'metaname'};
1608 my $metamode = $self->{'metamode'};
1609 my $metavalue = $doc_array_rec->{'metavalue'};
1610
1611 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1612 # make "accumulate" the default (less destructive, as won't actually
1613 # delete any existing values)
1614 $metamode = "accumulate";
1615 }
1616
1617 push(@all_docids,$docid);
1618
1619 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1620 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1621 my $metadata_xml_file;
1622 my $import_filename = undef;
1623
1624 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1625 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1626
1627 # This now stores the full pathname
1628 $import_filename = $doc_rec->{'src-file'}->[0];
1629
1630 # figure out correct metadata.xml file [?]
1631 # Assuming the metadata.xml file is next to the source file
1632 # Note: This will not work if it is using the inherited metadata from the parent folder
1633 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1634 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1635
1636 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1637 }
1638
1639 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1640 $gsdl_cgi->generate_ok_message($mess);
1641
1642 # Release the lock once it is done
1643 $self->unlock_collection($username, $collect);
1644}
1645
1646sub remove_import_metadata
1647{
1648 my $self = shift @_;
1649
1650 my $username = $self->{'username'};
1651 my $collect = $self->{'collect'};
1652 my $gsdl_cgi = $self->{'gsdl_cgi'};
1653
1654 if ($baseaction::authentication_enabled) {
1655 # Ensure the user is allowed to edit this collection
1656 &authenticate_user($gsdl_cgi, $username, $collect);
1657 }
1658
1659 my $gsdlhome = $self->{'gsdlhome'};
1660 my $infodbtype = $self->{'infodbtype'};
1661
1662 # Obtain the collect dir
1663 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1664 my $site = $self->{'site'};
1665 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1666
1667 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1668 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1669
1670 # Make sure the collection isn't locked by someone else
1671 $self->lock_collection($username, $collect);
1672
1673 # look up additional args
1674 my $docid = $self->{'d'};
1675 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1676 {
1677 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
1678 }
1679
1680 my $metaname = $self->{'metaname'};
1681 my $metavalue = $self->{'metavalue'};
1682 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1683
1684 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1685 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1686 my $metadata_xml_file;
1687 my $import_filename = undef;
1688 if (defined $docid)
1689 {
1690 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1691 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1692
1693 # This now stores the full pathname
1694 $import_filename = $doc_rec->{'src-file'}->[0];
1695 }
1696
1697 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1698 {
1699 $gsdl_cgi->generate_error_message("There is no metadata\n");
1700 }
1701
1702 # figure out correct metadata.xml file [?]
1703 # Assuming the metadata.xml file is next to the source file
1704 # Note: This will not work if it is using the inherited metadata from the parent folder
1705 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1706 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1707
1708 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1709
1710 # Release the lock once it is done
1711 $self->unlock_collection($username, $collect);
1712
1713 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1714 $mess .= " $metaname";
1715 $mess .= " = $metavalue\n";
1716
1717 $gsdl_cgi->generate_ok_message($mess);
1718}
1719
1720sub remove_from_metadata_xml
1721{
1722 my $self = shift @_;
1723 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
1724
1725 # Set the call-back functions for the metadata tags
1726 my @rules =
1727 (
1728 _default => 'raw',
1729 'Metadata' => \&rfmxml_metadata,
1730 'FileName' => \&mxml_filename
1731 );
1732
1733 my $parser = XML::Rules->new
1734 (
1735 rules => \@rules,
1736 style => 'filter',
1737 output_encoding => 'utf8'
1738 );
1739
1740 my $xml_in = "";
1741 if (!open(MIN,"<$metadata_xml_filename"))
1742 {
1743 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1744 }
1745 else
1746 {
1747 # Read them in
1748 my $line;
1749 while (defined ($line=<MIN>)) {
1750 $xml_in .= $line;
1751 }
1752 close(MIN);
1753
1754 # Filter with the call-back functions
1755 my $xml_out = "";
1756
1757 my $MOUT;
1758 if (!open($MOUT,">$metadata_xml_filename")) {
1759 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1760 }
1761 else {
1762 binmode($MOUT,":utf8");
1763 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
1764 close($MOUT);
1765 }
1766 }
1767}
1768
1769sub rfmxml_metadata
1770{
1771 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1772
1773 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
1774 {
1775 return [];
1776 }
1777
1778 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1779 return [$tagname => $attrHash];
1780}
1781
1782sub remove_live_metadata
1783{
1784 my $self = shift @_;
1785
1786 my $username = $self->{'username'};
1787 my $collect = $self->{'collect'};
1788 my $gsdl_cgi = $self->{'gsdl_cgi'};
1789 my $gsdlhome = $self->{'gsdlhome'};
1790 my $infodbtype = $self->{'infodbtype'};
1791
1792 if ($baseaction::authentication_enabled) {
1793 # Ensure the user is allowed to edit this collection
1794 &authenticate_user($gsdl_cgi, $username, $collect);
1795 }
1796
1797 # Obtain the collect dir
1798 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1799 my $site = $self->{'site'};
1800 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1801
1802 # Make sure the collection isn't locked by someone else
1803 $self->lock_collection($username, $collect);
1804
1805 # look up additional args
1806 my $docid = $self->{'d'};
1807 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1808 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1809 }
1810
1811 # Generate the dbkey
1812 my $metaname = $self->{'metaname'};
1813 my $dbkey = "$docid.$metaname";
1814
1815 # To people who know $collect_tail please add some comments
1816 # Obtain the live gdbm_db path
1817 my $collect_tail = $collect;
1818 $collect_tail =~ s/^.*[\/\\]//;
1819 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1820 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1821
1822 # Remove the key
1823 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
1824 my $status = system($cmd);
1825 if ($status != 0) {
1826 # Catch error if gdbmdel failed
1827 my $mess = "Failed to set metadata key: $dbkey\n";
1828
1829 $mess .= "PATH: $ENV{'PATH'}\n";
1830 $mess .= "cmd = $cmd\n";
1831 $mess .= "Exit status: $status\n";
1832 $mess .= "System Error Message: $!\n";
1833
1834 $gsdl_cgi->generate_error($mess);
1835 }
1836 else {
1837 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1838 }
1839
1840}
1841
1842
1843sub remove_metadata
1844{
1845 my $self = shift @_;
1846
1847 my $username = $self->{'username'};
1848 my $collect = $self->{'collect'};
1849 my $gsdl_cgi = $self->{'gsdl_cgi'};
1850 my $gsdlhome = $self->{'gsdlhome'};
1851 my $infodbtype = $self->{'infodbtype'};
1852
1853 if ($baseaction::authentication_enabled) {
1854 # Ensure the user is allowed to edit this collection
1855 &authenticate_user($gsdl_cgi, $username, $collect);
1856 }
1857
1858 # Obtain the collect dir
1859 my $site = $self->{'site'};
1860 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1861 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1862
1863 # Make sure the collection isn't locked by someone else
1864 $self->lock_collection($username, $collect);
1865
1866 # look up additional args
1867 my $docid = $self->{'d'};
1868 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1869 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1870 }
1871 my $metaname = $self->{'metaname'};
1872 my $metapos = $self->{'metapos'};
1873 my $metavalue = $self->{'metavalue'};
1874
1875 # To people who know $collect_tail please add some comments
1876 # Obtain the path to the database
1877 my $collect_tail = $collect;
1878 $collect_tail =~ s/^.*[\/\\]//;
1879 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1880 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1881
1882 # Read the docid entry
1883 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1884
1885 # Basically loop through and unescape_html the values
1886 foreach my $k (keys %$doc_rec) {
1887 my @escaped_v = ();
1888 foreach my $v (@{$doc_rec->{$k}}) {
1889 if ($k eq "contains") {
1890 # protect quotes in ".2;".3 etc
1891 $v =~ s/\"/\\\"/g;
1892 push(@escaped_v, $v);
1893 }
1894 else {
1895 my $ev = &ghtml::unescape_html($v);
1896 $ev =~ s/\"/\\\"/g;
1897 push(@escaped_v, $ev);
1898 }
1899 }
1900 $doc_rec->{$k} = \@escaped_v;
1901 }
1902
1903 # Check to make sure the key does exist
1904 if (!defined ($doc_rec->{$metaname})) {
1905 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1906 }
1907
1908 # Obtain the specified metadata pos
1909 $metapos = 0 if (!defined $metapos);
1910
1911 # consider check key is defined before deleting?
1912 # Loop through the metadata array and ignore the specified position
1913 my $filtered_metadata = [];
1914 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
1915 for (my $i=0; $i<$num_metadata_vals; $i++) {
1916 my $metaval = shift(@{$doc_rec->{$metaname}});
1917
1918 if (!defined $metavalue && $i != $metapos) {
1919 push(@$filtered_metadata,$metaval)
1920 }
1921
1922 if(defined $metavalue && !($metavalue eq $metaval))
1923 {
1924 push(@$filtered_metadata,$metavalue)
1925 }
1926 }
1927 $doc_rec->{$metaname} = $filtered_metadata;
1928
1929 # Turn the record back to string
1930 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1931
1932 # Store it back to the database
1933 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1934 my $status = system($cmd);
1935 if ($status != 0) {
1936 my $mess = "Failed to set metadata key: $docid\n";
1937
1938 $mess .= "PATH: $ENV{'PATH'}\n";
1939 $mess .= "cmd = $cmd\n";
1940 $mess .= "Exit status: $status\n";
1941 $mess .= "System Error Message: $!\n";
1942
1943 $gsdl_cgi->generate_error($mess);
1944 }
1945 else {
1946 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1947 $mess .= " $metaname";
1948 $mess .= "->[$metapos]" if (defined $metapos);
1949
1950 $gsdl_cgi->generate_ok_message($mess);
1951 }
1952}
1953
1954
1955# Was trying to reused the codes, but the functions need to be broken
1956# down more before they can be reused, otherwise there will be too
1957# much overhead and duplicate process...
1958sub insert_metadata
1959{
1960 my $self = shift @_;
1961
1962 my $username = $self->{'username'};
1963 my $collect = $self->{'collect'};
1964 my $gsdl_cgi = $self->{'gsdl_cgi'};
1965 my $gsdlhome = $self->{'gsdlhome'};
1966 my $infodbtype = $self->{'infodbtype'};
1967
1968 # If the import metadata and gdbm database have been updated, we
1969 # need to insert some notification to warn user that the the text
1970 # they see at the moment is not indexed and require a rebuild.
1971 my $rebuild_pending_macro = "_rebuildpendingmessage_";
1972
1973 if ($baseaction::authentication_enabled) {
1974 # Ensure the user is allowed to edit this collection
1975 $self->authenticate_user($username, $collect);
1976 }
1977
1978 # Obtain the collect and archive dir
1979 my $site = $self->{'site'};
1980 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1981 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1982 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1983
1984 # Make sure the collection isn't locked by someone else
1985 $self->lock_collection($username, $collect);
1986
1987 # Check additional args
1988 my $docid = $self->{'d'};
1989 if (!defined($docid)) {
1990 $gsdl_cgi->generate_error("No document id is specified: d=...");
1991 }
1992 my $metaname = $self->{'metaname'};
1993 if (!defined($metaname)) {
1994 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1995 }
1996 my $metavalue = $self->{'metavalue'};
1997 if (!defined($metavalue) || $metavalue eq "") {
1998 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
1999 }
2000 # make "accumulate" the default (less destructive, as won't actually
2001 # delete any existing values)
2002 my $metamode = "accumulate";
2003
2004 #=======================================================================#
2005 # set_import_metadata [START]
2006 #=======================================================================#
2007 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2008 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2009 my $metadata_xml_file;
2010 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2011 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2012
2013 # This now stores the full pathname
2014 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2015
2016 # figure out correct metadata.xml file [?]
2017 # Assuming the metadata.xml file is next to the source file
2018 # Note: This will not work if it is using the inherited metadata from the parent folder
2019 my ($import_tailname, $import_dirname)
2020 = File::Basename::fileparse($import_filename);
2021 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2022
2023 # Shane's escape characters
2024 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2025 $metavalue =~ s/\,/&#44;/g;
2026 $metavalue =~ s/\:/&#58;/g;
2027 $metavalue =~ s/\|/&#124;/g;
2028 $metavalue =~ s/\(/&#40;/g;
2029 $metavalue =~ s/\)/&#41;/g;
2030 $metavalue =~ s/\[/&#91;/g;
2031 $metavalue =~ s/\\/&#92;/g;
2032 $metavalue =~ s/\]/&#93;/g;
2033 $metavalue =~ s/\{/&#123;/g;
2034 $metavalue =~ s/\}/&#125;/g;
2035 $metavalue =~ s/\"/&#34;/g;
2036 $metavalue =~ s/\`/&#96;/g;
2037 $metavalue =~ s/\n/_newline_/g;
2038
2039 # Edit the metadata.xml
2040 # Modified by Jeffrey from DL Consulting
2041 # Handle the case where there is one metadata.xml file for multiple FileSets
2042 # The XML filter needs to know whether it is in the right FileSet
2043 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2044 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2045 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2046 $metaname, $metavalue, $metamode, $import_tailname);
2047 #=======================================================================#
2048 # set_import_metadata [END]
2049 #=======================================================================#
2050
2051
2052 #=======================================================================#
2053 # set_metadata (accumulate version) [START]
2054 #=======================================================================#
2055 # To people who know $collect_tail please add some comments
2056 # Obtain path to the database
2057 my $collect_tail = $collect;
2058 $collect_tail =~ s/^.*[\/\\]//;
2059 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2060 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2061
2062 # Read the docid entry
2063 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2064
2065 foreach my $k (keys %$doc_rec) {
2066 my @escaped_v = ();
2067 foreach my $v (@{$doc_rec->{$k}}) {
2068 if ($k eq "contains") {
2069 # protect quotes in ".2;".3 etc
2070 $v =~ s/\"/\\\"/g;
2071 push(@escaped_v, $v);
2072 }
2073 else {
2074 my $ev = &ghtml::unescape_html($v);
2075 $ev =~ s/\"/\\\"/g;
2076 push(@escaped_v, $ev);
2077 }
2078 }
2079 $doc_rec->{$k} = \@escaped_v;
2080 }
2081
2082 # Protect the quotes
2083 $metavalue =~ s/\"/\\\"/g;
2084
2085 # Adds the pending macro
2086 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2087
2088 # If the metadata doesn't exist, create a new one
2089 if (!defined($doc_rec->{$metaname})){
2090 $doc_rec->{$metaname} = [ $macro_metavalue ];
2091 }
2092 # Else, let's acculumate the values
2093 else {
2094 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2095 }
2096
2097 # Generate the record string
2098 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2099
2100 # Store it into GDBM
2101 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2102 my $status = system($cmd);
2103 if ($status != 0) {
2104 # Catch error if gdbmget failed
2105 my $mess = "Failed to set metadata key: $docid\n";
2106
2107 $mess .= "PATH: $ENV{'PATH'}\n";
2108 $mess .= "cmd = $cmd\n";
2109 $mess .= "Exit status: $status\n";
2110 $mess .= "System Error Message: $!\n";
2111
2112 $gsdl_cgi->generate_error($mess);
2113 }
2114 else {
2115 my $mess = "insert-metadata successful: Key[$docid]\n";
2116 $mess .= " [In metadata.xml] $metaname";
2117 $mess .= " = $metavalue\n";
2118 $mess .= " [In database] $metaname";
2119 $mess .= " = $macro_metavalue\n";
2120 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2121 $gsdl_cgi->generate_ok_message($mess);
2122 }
2123 #=======================================================================#
2124 # set_metadata (accumulate version) [END]
2125 #=======================================================================#
2126
2127 # Release the lock once it is done
2128 $self->unlock_collection($username, $collect);
2129}
2130
21311;
Note: See TracBrowser for help on using the repository browser.