source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27112

Last change on this file since 27112 was 27112, checked in by ak19, 11 years ago

Adding in error processing code for when an invalid subsection is entered. Separate commit to allow easy uncommiting of the changes in this revision, so that the bugfix in the previous commit is not accidentally uncommited.

File size: 70.1 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-live-metadata" => {
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ ] },
63
64 #SET METHODS
65 "set-live-metadata" => {
66 'compulsory-args' => [ "d", "metaname", "metavalue" ],
67 'optional-args' => [ ] },
68
69 "set-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ "metapos" ] },
72
73 "set-archives-metadata" => {
74 'compulsory-args' => [ "d", "metaname", "metavalue" ],
75 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
76
77 "set-import-metadata" => {
78 'compulsory-args' => [ "metaname", "metavalue" ],
79 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
80
81 #SET METHODS (ARRAY)
82 "set-metadata-array" => {
83 'compulsory-args' => [ "json" ],
84 'optional-args' => [ ] },
85
86 "set-archives-metadata-array" => {
87 'compulsory-args' => [ "json" ],
88 'optional-args' => [ ] },
89
90 "set-import-metadata-array" => {
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ ] },
93
94 #REMOVE METHODS
95 "remove-import-metadata" => {
96 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
97 'optional-args' => [ ] },
98
99 "remove-archives-metadata" => {
100 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
101 'optional-args' => [ "metapos", "metavalue" ] },
102
103 "remove-live-metadata" => {
104 'compulsory-args' => [ "d", "metaname" ],
105 'optional-args' => [ ] },
106
107 "remove-metadata" => {
108 'compulsory-args' => [ "d", "metaname" ],
109 'optional-args' => [ "metapos", "metavalue" ] },
110
111 #INSERT METHODS
112 "insert-metadata" => {
113 'compulsory-args' => [ "d", "metaname", "metavalue" ],
114 'optional-args' => [ ] }
115};
116
117
118sub new
119{
120 my $class = shift (@_);
121 my ($gsdl_cgi,$iis6_mode) = @_;
122
123 # Treat metavalue specially. To transmit this through a GET request
124 # the Javascript side has url-encoded it, so here we need to decode
125 # it before proceeding
126
127 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
128 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
129 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
130
131 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
132 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
133
134 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
135
136 return bless $self, $class;
137}
138
139
140sub get_live_metadata
141{
142 my $self = shift @_;
143
144 my $username = $self->{'username'};
145 my $collect = $self->{'collect'};
146 my $gsdl_cgi = $self->{'gsdl_cgi'};
147 my $gsdlhome = $self->{'gsdlhome'};
148 my $infodbtype = $self->{'infodbtype'};
149
150 # live metadata gets/saves value scoped (prefixed) by the current usename
151 # so (for now) let's not bother to enforce authentication
152
153 # Obtain the collect dir
154 my $site = $self->{'site'};
155 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
156 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
157
158 # Make sure the collection isn't locked by someone else
159 $self->lock_collection($username, $collect);
160
161 # look up additional args
162 my $docid = $self->{'d'};
163 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
164 $gsdl_cgi->generate_error("No docid (d=...) specified.");
165 }
166
167 # Generate the dbkey
168 my $metaname = $self->{'metaname'};
169 my $dbkey = "$docid.$metaname";
170
171 # To people who know $collect_tail please add some comments
172 # Obtain path to the database
173 my $collect_tail = $collect;
174 $collect_tail =~ s/^.*[\/|\\]//;
175 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
176 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
177
178 # Obtain the content of the key
179 my $cmd = "gdbmget $infodb_file_path $dbkey";
180 if (open(GIN,"$cmd |") == 0) {
181 # Catch error if gdbmget failed
182 my $mess = "Failed to get metadata key: $metaname\n";
183 $mess .= "$!\n";
184
185 $gsdl_cgi->generate_error($mess);
186 }
187 else {
188 binmode(GIN,":utf8");
189 # Read everything in and concatenate them into $metavalue
190 my $metavalue = "";
191 my $line;
192 while (defined ($line=<GIN>)) {
193 $metavalue .= $line;
194 }
195 close(GIN);
196 chomp($metavalue); # Get rid off the tailing newlines
197 $gsdl_cgi->generate_ok_message("$metavalue");
198 }
199
200 # Release the lock once it is done
201 $self->unlock_collection($username, $collect);
202}
203
204
205sub get_metadata
206{
207 my $self = shift @_;
208
209 my $username = $self->{'username'};
210 my $collect = $self->{'collect'};
211 my $gsdl_cgi = $self->{'gsdl_cgi'};
212 my $gsdlhome = $self->{'gsdlhome'};
213
214 # Authenticate user if it is enabled
215 if ($baseaction::authentication_enabled) {
216 # Ensure the user is allowed to edit this collection
217 &authenticate_user($gsdl_cgi, $username, $collect);
218 }
219
220 # Obtain the collect dir
221 my $site = $self->{'site'};
222 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
223 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
224
225 # Make sure the collection isn't locked by someone else
226 $self->lock_collection($username, $collect);
227
228 # look up additional args
229 my $docid = $self->{'d'};
230 my $metaname = $self->{'metaname'};
231 my $metapos = $self->{'metapos'};
232 my $infodbtype = $self->{'infodbtype'};
233
234 # To people who know $collect_tail please add some comments
235 # Obtain path to the database
236 my $collect_tail = $collect;
237 $collect_tail =~ s/^.*[\/\\]//;
238 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
239 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
240
241 # Read the docid entry
242 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
243
244 # Basically loop through and unescape_html the values
245 foreach my $k (keys %$doc_rec) {
246 my @escaped_v = ();
247 foreach my $v (@{$doc_rec->{$k}}) {
248 my $ev = &ghtml::unescape_html($v);
249 push(@escaped_v, $ev);
250 }
251 $doc_rec->{$k} = \@escaped_v;
252 }
253
254 # Obtain the specified metadata value
255 $metapos = 0 if (!defined $metapos);
256 my $metavalue = $doc_rec->{$metaname}->[$metapos];
257 $gsdl_cgi->generate_ok_message("$metavalue");
258
259 # Release the lock once it is done
260 $self->unlock_collection($username, $collect);
261}
262
263
264sub get_import_metadata
265{
266 my $self = shift @_;
267
268 my $username = $self->{'username'};
269 my $collect = $self->{'collect'};
270 my $gsdl_cgi = $self->{'gsdl_cgi'};
271 my $gsdlhome = $self->{'gsdlhome'};
272
273 # Authenticate user if it is enabled
274 if ($baseaction::authentication_enabled) {
275 # Ensure the user is allowed to edit this collection
276 &authenticate_user($gsdl_cgi, $username, $collect);
277 }
278
279 # Obtain the collect dir
280 my $site = $self->{'site'};
281 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
282 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
283
284 # Make sure the collection isn't locked by someone else
285 $self->lock_collection($username, $collect);
286
287 # look up additional args
288 my $docid = $self->{'d'};
289 my $metaname = $self->{'metaname'};
290 my $infodbtype = $self->{'infodbtype'};
291 if (!defined $docid)
292 {
293 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
294 }
295
296 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
297 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
298 my $metadata_xml_file;
299 my $import_filename = undef;
300
301
302 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
303 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
304 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
305
306 # This now stores the full pathname
307 $import_filename = $doc_rec->{'src-file'}->[0];
308
309 # figure out correct metadata.xml file [?]
310 # Assuming the metadata.xml file is next to the source file
311 # Note: This will not work if it is using the inherited metadata from the parent folder
312 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
313 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
314
315 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
316
317 # Release the lock once it is done
318 $self->unlock_collection($username, $collect);
319}
320
321sub get_metadata_from_metadata_xml
322{
323 my $self = shift @_;
324 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
325
326 my @rules =
327 (
328 _default => 'raw',
329 'Metadata' => \&gfmxml_metadata,
330 'FileName' => \&mxml_filename
331 );
332
333 my $parser = XML::Rules->new
334 (
335 rules => \@rules,
336 output_encoding => 'utf8'
337 );
338
339 my $xml_in = "";
340 if (!open(MIN,"<$metadata_xml_filename"))
341 {
342 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
343 }
344 else
345 {
346 # Read them in
347 my $line;
348 while (defined ($line=<MIN>)) {
349 $xml_in .= $line;
350 }
351 close(MIN);
352
353 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
354
355 if(defined $parser->{'pad'}->{'metavalue'})
356 {
357 return $parser->{'pad'}->{'metavalue'};
358 }
359 else
360 {
361 return "";
362 }
363 }
364}
365
366sub gfmxml_metadata
367{
368 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
369
370 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
371 {
372 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
373 }
374}
375
376sub get_archives_metadata
377{
378 my $self = shift @_;
379
380 my $username = $self->{'username'};
381 my $collect = $self->{'collect'};
382 my $gsdl_cgi = $self->{'gsdl_cgi'};
383 my $gsdlhome = $self->{'gsdlhome'};
384 my $infodbtype = $self->{'infodbtype'};
385
386 # Authenticate user if it is enabled
387 if ($baseaction::authentication_enabled) {
388 # Ensure the user is allowed to edit this collection
389 &authenticate_user($gsdl_cgi, $username, $collect);
390 }
391
392 # Obtain the collect dir
393 my $site = $self->{'site'};
394 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
395
396 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
397
398 # Make sure the collection isn't locked by someone else
399 $self->lock_collection($username, $collect);
400
401 # look up additional args
402 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
403 $docid_secnum = "" if (!defined $docid_secnum);
404
405 my $metaname = $self->{'metaname'};
406 my $metapos = $self->{'metapos'};
407 $metapos = 0 if (!defined $metapos);
408
409 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
410 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
411
412 # This now stores the full pathname
413 my $doc_filename = $doc_rec->{'doc-file'}->[0];
414
415 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
416
417 # Release the lock once it is done
418 $self->unlock_collection($username, $collect);
419}
420
421sub get_metadata_from_archive_xml
422{
423 my $self = shift @_;
424 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
425
426 my @start_rules = ('Section' => \&dxml_start_section);
427
428 my @rules =
429 (
430 _default => 'raw',
431 'Metadata' => \&gfdxml_metadata
432 );
433
434 my $parser = XML::Rules->new
435 (
436 start_rules => \@start_rules,
437 rules => \@rules,
438 output_encoding => 'utf8'
439 );
440
441 my $xml_in = "";
442 if (!open(MIN,"<$doc_xml_filename"))
443 {
444 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
445 }
446 else
447 {
448 # Read them in
449 my $line;
450 while (defined ($line=<MIN>)) {
451 $xml_in .= $line;
452 }
453 close(MIN);
454
455 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
456
457 if(defined $parser->{'pad'}->{'metavalue'})
458 {
459 return $parser->{'pad'}->{'metavalue'};
460 }
461 else
462 {
463 return "";
464 }
465 }
466}
467
468sub gfdxml_metadata
469{
470 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
471
472 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
473 {
474 return;
475 }
476
477 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
478 {
479 if (!defined $parser->{'parameters'}->{'poscount'})
480 {
481 $parser->{'parameters'}->{'poscount'} = 0;
482 }
483 else
484 {
485 $parser->{'parameters'}->{'poscount'}++;
486 }
487 }
488
489 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
490 {
491 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
492 }
493}
494
495sub set_live_metadata
496{
497 my $self = shift @_;
498
499 my $username = $self->{'username'};
500 my $collect = $self->{'collect'};
501 my $gsdl_cgi = $self->{'gsdl_cgi'};
502 my $gsdlhome = $self->{'gsdlhome'};
503 my $infodbtype = $self->{'infodbtype'};
504
505 if ($baseaction::authentication_enabled) {
506 # Ensure the user is allowed to edit this collection
507 &authenticate_user($gsdl_cgi, $username, $collect);
508 }
509
510 # Obtain the collect dir
511 my $site = $self->{'site'};
512 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
513 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
514
515 # Make sure the collection isn't locked by someone else
516 $self->lock_collection($username, $collect);
517
518 # look up additional args
519 my $docid = $self->{'d'};
520 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
521 $gsdl_cgi->generate_error("No docid (d=...) specified.");
522 }
523 my $metavalue = $self->{'metavalue'};
524
525
526 # Generate the dbkey
527 my $metaname = $self->{'metaname'};
528 my $dbkey = "$docid.$metaname";
529
530 # To people who know $collect_tail please add some comments
531 # Obtain path to the database
532 my $collect_tail = $collect;
533 $collect_tail =~ s/^.*[\/\\]//;
534 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
535 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
536
537 # Set the new value
538 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
539 my $status = system($cmd);
540 if ($status != 0) {
541 # Catch error if gdbmget failed
542 my $mess = "Failed to set metadata key: $dbkey\n";
543
544 $mess .= "PATH: $ENV{'PATH'}\n";
545 $mess .= "cmd = $cmd\n";
546 $mess .= "Exit status: $status\n";
547 $mess .= "System Error Message: $!\n";
548
549 $gsdl_cgi->generate_error($mess);
550 }
551 else {
552 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
553 }
554
555 # Release the lock once it is done
556 $self->unlock_collection($username, $collect);
557}
558
559sub set_metadata_entry
560{
561 my $self = shift @_;
562 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
563
564 # To people who know $collect_tail please add some comments
565 # Obtain path to the database
566 my $collect_tail = $collect;
567 $collect_tail =~ s/^.*[\/\\]//;
568 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
569 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
570
571# print STDERR "**** infodb file path = $infodb_file_path\n";
572# print STDERR "***** infodb type = $infodbtype\n";
573
574 # Read the docid entry
575 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
576
577 # Set the metadata value
578 if (defined $metapos) {
579 $doc_rec->{$metaname}->[$metapos] = $metavalue;
580 }
581 else {
582 $doc_rec->{$metaname} = [ $metavalue ];
583 }
584
585 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
586
587 return $status;
588
589}
590
591sub set_metadata
592{
593 my $self = shift @_;
594
595 my $username = $self->{'username'};
596 my $collect = $self->{'collect'};
597 my $gsdl_cgi = $self->{'gsdl_cgi'};
598 my $gsdlhome = $self->{'gsdlhome'};
599
600 if ($baseaction::authentication_enabled) {
601 # Ensure the user is allowed to edit this collection
602 &authenticate_user($gsdl_cgi, $username, $collect);
603 }
604
605 my $site = $self->{'site'};
606 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
607
608 $gsdl_cgi->checked_chdir($collect_dir);
609
610 # Obtain the collect dir
611 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
612
613 # Make sure the collection isn't locked by someone else
614 $self->lock_collection($username, $collect);
615
616 # look up additional args
617 my $docid = $self->{'d'};
618 my $metaname = $self->{'metaname'};
619 my $metapos = $self->{'metapos'};
620 my $metavalue = $self->{'metavalue'};
621 my $infodbtype = $self->{'infodbtype'};
622
623 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
624
625 if ($status != 0) {
626 # Catch error if set infodb entry failed
627 my $mess = "Failed to set metadata key: $docid\n";
628
629 $mess .= "PATH: $ENV{'PATH'}\n";
630 $mess .= "Exit status: $status\n";
631 $mess .= "System Error Message: $!\n";
632
633 $gsdl_cgi->generate_error($mess);
634 }
635 else {
636 my $mess = "set-metadata successful: Key[$docid]\n";
637 $mess .= " $metaname";
638 $mess .= "->[$metapos]" if (defined $metapos);
639 $mess .= " = $metavalue";
640
641 $gsdl_cgi->generate_ok_message($mess);
642 }
643
644 # Release the lock once it is done
645 $self->unlock_collection($username, $collect);
646}
647
648
649sub set_metadata_array
650{
651 my $self = shift @_;
652
653 my $username = $self->{'username'};
654 my $collect = $self->{'collect'};
655 my $gsdl_cgi = $self->{'gsdl_cgi'};
656 my $gsdlhome = $self->{'gsdlhome'};
657
658 if ($baseaction::authentication_enabled) {
659 # Ensure the user is allowed to edit this collection
660 &authenticate_user($gsdl_cgi, $username, $collect);
661 }
662
663 my $site = $self->{'site'};
664 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
665
666 $gsdl_cgi->checked_chdir($collect_dir);
667
668 # Obtain the collect dir
669 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
670
671 # Make sure the collection isn't locked by someone else
672 $self->lock_collection($username, $collect);
673
674 # look up additional args
675
676 my $infodbtype = $self->{'infodbtype'};
677
678 my $json_str = $self->{'json'};
679 my $doc_array = decode_json $json_str;
680
681
682 my $global_status = 0;
683 my $global_mess = "";
684
685 my @all_docids = ();
686
687 foreach my $doc_array_rec ( @$doc_array ) {
688
689 my $docid = $doc_array_rec->{'docid'};
690 my $metaname = $doc_array_rec->{'metaname'};
691 my $metapos = $doc_array_rec->{'metapos'};
692 my $metavalue = $doc_array_rec->{'metavalue'};
693
694 push(@all_docids,$docid);
695
696 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
697
698 if ($status != 0) {
699 # Catch error if set infodb entry failed
700 $global_status = $status;
701 $global_mess .= "Failed to set metadata key: $docid\n";
702 $global_mess .= "Exit status: $status\n";
703 $global_mess .= "System Error Message: $!\n";
704 $global_mess .= "-" x 20;
705 }
706 }
707
708 if ($global_status != 0) {
709 $global_mess .= "PATH: $ENV{'PATH'}\n";
710 $gsdl_cgi->generate_error($global_mess);
711 }
712 else {
713 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
714 $gsdl_cgi->generate_ok_message($mess);
715 }
716
717 # Release the lock once it is done
718 $self->unlock_collection($username, $collect);
719}
720
721
722sub dxml_metadata
723{
724 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
725 my $metaname = $parser->{'parameters'}->{'metaname'};
726 my $metamode = $parser->{'parameters'}->{'metamode'};
727
728 print STDERR "**** Processing closing </Metadata> tag\n";
729
730 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
731
732 # Find the right metadata tag and checks if we are going to
733 # override it
734 #
735 # Note: This over writes the first metadata block it
736 # encountered. If there are multiple Sections in the doc.xml, it
737 # might not behave as you would expect
738
739 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
740## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
741## print STDERR "**** metamode = $metamode\n";
742
743 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
744 {
745 my $name_attr = $attrHash->{'name'};
746 if (($name_attr eq $metaname) && ($metamode eq "override"))
747 {
748 if (!defined $parser->{'parameters'}->{'poscount'})
749 {
750 $parser->{'parameters'}->{'poscount'} = 0;
751 }
752 else
753 {
754 $parser->{'parameters'}->{'poscount'}++;
755 }
756
757 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
758 {
759 ##print STDERR "#### got match!!\n";
760 # Get the value and override the current value
761 my $metavalue = $parser->{'parameters'}->{'metavalue'};
762 $attrHash->{'_content'} = $metavalue;
763
764 # Don't want it to wipe out any other pieces of metadata
765 $parser->{'parameters'}->{'metamode'} = "done";
766 }
767 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
768 {
769 my $metavalue = $parser->{'parameters'}->{'metavalue'};
770 $attrHash->{'_content'} = $metavalue;
771 $parser->{'parameters'}->{'metamode'} = "done";
772 }
773 }
774 }
775
776 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
777 return [$tagname => $attrHash];
778}
779
780# This method exists purely for catching invalid section numbers that the client
781# requested to edit. Once the parser has reached the end (the final </Archive> tag),
782# we've seen all the Sections in the doc.xml, and none of their section nums matched
783# if the metamode has not been set to 'done' by then.
784sub dxml_archive
785{
786 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
787 my $metamode = $parser->{'parameters'}->{'metamode'};
788
789 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
790 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
791
792# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
793
794 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
795 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
796
797 my $metaname = $parser->{'parameters'}->{'metaname'};
798 my $metavalue = $parser->{'parameters'}->{'metavalue'};
799
800 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
801 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
802 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
803 print STDERR "\tin section $opt_doc_secnum.\n";
804 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
805 }
806
807 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
808 return [$tagname => $attrHash];
809}
810
811sub dxml_description
812{
813 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
814 my $metamode = $parser->{'parameters'}->{'metamode'};
815
816 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
817 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
818
819 print STDERR "**** Processing a closing </Description> tag \n";
820# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
821
822 # Accumulate the metadata
823
824 # We'll be accumulating metadata at this point if we haven't found and therefore
825 # haven't processed the metadata yet.
826 # For subsections, this means that if we're at a matching subsection, but haven't
827 # found the correct metaname to override in that subsection, we accumulate it as new
828 # meta in the subsection by adding it to the current description.
829 # If there's no subsection info for the metadata, it will accumulate at the top level
830 # section description if we hadn't found a matching metaname to override at this point.
831
832 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
833 # at the toplevel section. In the latter case, it means we want to process meta in the
834 # toplevel section. So the eq check between the values below will work in all cases.
835
836 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
837 # this document has been provided. In that case, the metadata for that opt_doc_secnum
838 # won't get attached/accumulated to any part of the doc, not even its top-level section.
839
840 if ($curr_secnum eq $opt_doc_secnum
841 && ($metamode eq "accumulate" || $metamode eq "override")) {
842 if ($metamode eq "override") {
843 print "No metadata value to override. Switching 'metamode' to accumulate\n";
844 }
845
846 # If get to here and metamode is override, the this means there
847 # was no existing value to overide => treat as an append operation
848
849 # Tack a new metadata tag on to the end of the <Metadata>+ block
850 my $metaname = $parser->{'parameters'}->{'metaname'};
851 my $metavalue = $parser->{'parameters'}->{'metavalue'};
852
853 my $metadata_attr = {
854 '_content' => $metavalue,
855 'name' => $metaname,
856 'mode' => "accumulate"
857 };
858
859 my $append_metadata = [ "Metadata" => $metadata_attr ];
860 my $description_content = $attrHash->{'_content'};
861
862 print "Appending metadata to doc.xml\n";
863
864 if (ref($description_content)) {
865 # got some existing interesting nested content
866 push(@$description_content, " ", $append_metadata ,"\n ");
867 }
868 else {
869 #description_content is most likely a string such as "\n"
870 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
871 }
872
873 $parser->{'parameters'}->{'metamode'} = "done";
874 }
875 else {
876 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
877## print STDERR "**** NOT ACCUMULATE?!? \n";
878 }
879
880 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
881 return [$tagname => $attrHash];
882}
883
884
885sub dxml_start_section
886{
887 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
888
889 my $new_depth = scalar(@$contextArray);
890
891 print STDERR "**** START SECTION \n";
892
893 if ($new_depth == 1) {
894 $parser->{'parameters'}->{'curr_section_depth'} = 1;
895 $parser->{'parameters'}->{'curr_section_num'} = "";
896 }
897
898 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
899 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
900
901 my $new_secnum;
902
903 if ($new_depth > $old_depth) {
904 # child subsection
905 $new_secnum = "$old_secnum.1";
906 }
907 elsif ($new_depth == $old_depth) {
908 # sibling section => increase it's value by 1
909 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
910 $tail_num++;
911 $new_secnum = $old_secnum;
912 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
913 }
914 else {
915 # back up to parent section => lopp off tail
916 $new_secnum = $old_secnum;
917 $new_secnum =~ s/\.\d+$//;
918 }
919
920 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
921 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
922
923 1;
924}
925
926sub edit_xml_file
927{
928 my $self = shift @_;
929 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
930
931 # use XML::Rules to add it in (read in and out again)
932 my $parser = XML::Rules->new(start_rules => $start_rules,
933 rules => $rules,
934 style => 'filter',
935 output_encoding => 'utf8' );
936
937 my $xml_in = "";
938 if (!open(MIN,"<$filename")) {
939 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
940 }
941 else {
942 # Read all the text in
943 my $line;
944 while (defined ($line=<MIN>)) {
945 $xml_in .= $line;
946 }
947 close(MIN);
948
949 my $MOUT;
950 if (!open($MOUT,">$filename")) {
951 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
952 }
953 else {
954 # Matched lines will get handled by the call backs
955## my $xml_out = "";
956
957 binmode($MOUT,":utf8");
958 $parser->filter($xml_in,$MOUT, $options);
959
960# binmode(MOUT,":utf8");
961# print MOUT $xml_out;
962 close($MOUT);
963 }
964 }
965
966 # copy across any custom error information that was stored during parsing
967 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
968}
969
970sub edit_doc_xml
971{
972 my $self = shift @_;
973 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
974
975 my $info_mess = <<RAWEND;
976****************************
977 edit_doc_xml()
978****************************
979doc_xml_filename = $doc_xml_filename
980metaname = $metaname
981metavalue = $metavalue
982metapos = $metapos
983metamode = $metamode
984opt_secnum = $opt_secnum
985prevmetavalue = $prevmetavalue
986****************************
987RAWEND
988
989 $gsdl_cgi->generate_message($info_mess);
990
991 # To monitor which section/subsection number we are in
992 my @start_rules =
993 ( 'Section' => \&dxml_start_section );
994
995 # use XML::Rules to add it in (read in and out again)
996 # Set the call back functions
997 my @rules =
998 ( _default => 'raw',
999 'Metadata' => \&dxml_metadata,
1000 'Description' => \&dxml_description,
1001 'Archive' => \&dxml_archive); # just for catching errors at end
1002
1003 # Sets the parameters
1004 my $options = { 'metaname' => $metaname,
1005 'metapos' => $metapos,
1006 'metavalue' => $metavalue,
1007 'metamode' => $metamode,
1008 'prevmetavalue' => $prevmetavalue };
1009
1010 if (defined $opt_secnum) {
1011 $options->{'secnum'} = $opt_secnum;
1012 }
1013
1014 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1015}
1016
1017sub set_archives_metadata_entry
1018{
1019 my $self = shift @_;
1020 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1021
1022 my $info_mess = <<RAWEND;
1023****************************
1024 set_archives_metadata_entry()
1025****************************
1026archive_dir = $archive_dir
1027collect_dir = $collect_dir
1028collect = $collect
1029infodbtype = $infodbtype
1030docid = $docid
1031metaname = $metaname
1032metapos = $metapos
1033metavalue = $metavalue
1034metamode = $metamode
1035prevmetavalue = $prevmetavalue
1036****************************
1037RAWEND
1038
1039 $gsdl_cgi->generate_message($info_mess);
1040
1041 # Obtain the doc.xml path for the specified docID
1042 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1043
1044 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1045 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1046 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1047
1048 # The $doc_xml_file is relative to the archives, and now let's get the full path
1049 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1050 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1051
1052 # Edit the doc.xml file with the specified metadata name, value and position.
1053 # TODO: there is a potential problem here as this edit_doc_xml function
1054 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1055 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1056
1057 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1058 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1059
1060 # return 0; # return 0 for now to indicate no error
1061 return (defined $self->{'error_msg'}) ? 1 : 0;
1062}
1063
1064
1065sub set_archives_metadata
1066{
1067 my $self = shift @_;
1068
1069 my $username = $self->{'username'};
1070 my $collect = $self->{'collect'};
1071 my $gsdl_cgi = $self->{'gsdl_cgi'};
1072 my $gsdlhome = $self->{'gsdlhome'};
1073 my $infodbtype = $self->{'infodbtype'};
1074
1075 if ($baseaction::authentication_enabled) {
1076 # Ensure the user is allowed to edit this collection
1077 $self->authenticate_user($username, $collect);
1078 }
1079
1080 my $site = $self->{'site'};
1081
1082 # Obtain the collect and archive dir
1083 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1084
1085 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1086
1087 # Make sure the collection isn't locked by someone else
1088 $self->lock_collection($username, $collect);
1089
1090 # look up additional args
1091 my $docid = $self->{'d'};
1092 my $metaname = $self->{'metaname'};
1093 my $metavalue = $self->{'metavalue'};
1094 my $prevmetavalue = $self->{'prevmetavalue'};
1095
1096 my $metapos = $self->{'metapos'};
1097 $metapos = 0 if (!defined $metapos);
1098
1099 my $metamode = $self->{'metamode'};
1100 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1101 # make "accumulate" the default (less destructive, as won't actually
1102 # delete any existing values)
1103 $metamode = "accumulate";
1104 }
1105
1106 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1107 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1108
1109 # Release the lock once it is done
1110 $self->unlock_collection($username, $collect);
1111
1112 if ($status == 0) {
1113 my $mess = "set-archives-metadata successful: Key[$docid]\n";
1114 $mess .= " $metaname";
1115 $mess .= "->[$metapos]" if (defined $metapos);
1116 $mess .= " = $metavalue";
1117 $mess .= " ($metamode)\n";
1118
1119 $gsdl_cgi->generate_ok_message($mess);
1120 }
1121 else {
1122 my $mess .= "Failed to set archives metadata key: $docid\n";
1123 $mess .= "Exit status: $status\n";
1124 if(defined $self->{'error_msg'}) {
1125 $mess .= "Error Message: $self->{'error_msg'}\n";
1126 } else {
1127 $mess .= "System Error Message: $!\n";
1128 }
1129 $mess .= "-" x 20 . "\n";
1130
1131 $gsdl_cgi->generate_error($mess);
1132 }
1133}
1134
1135
1136sub set_archives_metadata_array
1137{
1138 my $self = shift @_;
1139
1140 my $username = $self->{'username'};
1141 my $collect = $self->{'collect'};
1142 my $gsdl_cgi = $self->{'gsdl_cgi'};
1143 my $gsdlhome = $self->{'gsdlhome'};
1144
1145 if ($baseaction::authentication_enabled) {
1146 # Ensure the user is allowed to edit this collection
1147 &authenticate_user($gsdl_cgi, $username, $collect);
1148 }
1149
1150 my $site = $self->{'site'};
1151 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1152
1153 $gsdl_cgi->checked_chdir($collect_dir);
1154
1155 # Obtain the collect dir
1156 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1157
1158 # Make sure the collection isn't locked by someone else
1159 $self->lock_collection($username, $collect);
1160
1161 # look up additional args
1162
1163 my $infodbtype = $self->{'infodbtype'};
1164
1165 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1166
1167 my $json_str = $self->{'json'};
1168 my $doc_array = decode_json $json_str;
1169
1170
1171 my $global_status = 0;
1172 my $global_mess = "";
1173
1174 my @all_docids = ();
1175
1176 foreach my $doc_array_rec ( @$doc_array ) {
1177
1178 my $docid = $doc_array_rec->{'docid'};
1179 my $metaname = $doc_array_rec->{'metaname'};
1180 my $metapos = $doc_array_rec->{'metapos'};
1181 my $metamode = $self->{'metamode'};
1182 my $metavalue = $doc_array_rec->{'metavalue'};
1183
1184 # Some sanity checks
1185 $metapos = 0 if (!defined $metapos);
1186
1187 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1188 # make "accumulate" the default (less destructive, as won't actually
1189 # delete any existing values)
1190 $metamode = "accumulate";
1191 }
1192
1193 push(@all_docids,$docid);
1194
1195 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1196 $metaname,$metapos,$metavalue,$metamode);
1197
1198 if ($status != 0) {
1199 # Catch error if set infodb entry failed
1200 $global_status = $status;
1201 $global_mess .= "Failed to set metadata key: $docid\n";
1202 $global_mess .= "Exit status: $status\n";
1203 $global_mess .= "System Error Message: $!\n";
1204 $global_mess .= "-" x 20 . "\n";
1205 }
1206 }
1207
1208 if ($global_status != 0) {
1209 $global_mess .= "PATH: $ENV{'PATH'}\n";
1210 $gsdl_cgi->generate_error($global_mess);
1211 }
1212 else {
1213 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1214 $gsdl_cgi->generate_ok_message($mess);
1215 }
1216
1217 # Release the lock once it is done
1218 $self->unlock_collection($username, $collect);
1219}
1220
1221sub remove_archives_metadata
1222{
1223 my $self = shift @_;
1224
1225 my $username = $self->{'username'};
1226 my $collect = $self->{'collect'};
1227 my $gsdl_cgi = $self->{'gsdl_cgi'};
1228 my $gsdlhome = $self->{'gsdlhome'};
1229 my $infodbtype = $self->{'infodbtype'};
1230
1231 if ($baseaction::authentication_enabled)
1232 {
1233 # Ensure the user is allowed to edit this collection
1234 &authenticate_user($gsdl_cgi, $username, $collect);
1235 }
1236
1237 my $site = $self->{'site'};
1238
1239 # Obtain the collect and archive dir
1240 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1241
1242 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1243
1244 # Make sure the collection isn't locked by someone else
1245 $self->lock_collection($username, $collect);
1246
1247 # look up additional args
1248 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1249
1250 my $metaname = $self->{'metaname'};
1251 my $metapos = $self->{'metapos'};
1252 $metapos = 0 if (!defined $metapos);
1253
1254 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1255 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1256
1257 # This now stores the full pathname
1258 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1259
1260 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1261
1262 # Release the lock once it is done
1263 $self->unlock_collection($username, $collect);
1264
1265 if ($status == 0)
1266 {
1267 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1268 $mess .= " $metaname";
1269 $mess .= "->[$metapos]" if (defined $metapos);
1270
1271 $gsdl_cgi->generate_ok_message($mess);
1272 }
1273 else
1274 {
1275 my $mess .= "Failed to remove archives metadata key: $docid\n";
1276 $mess .= "Exit status: $status\n";
1277 $mess .= "System Error Message: $!\n";
1278 $mess .= "-" x 20 . "\n";
1279
1280 $gsdl_cgi->generate_error($mess);
1281 }
1282}
1283
1284sub remove_from_doc_xml
1285{
1286 my $self = shift @_;
1287 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
1288
1289 my @start_rules = ('Section' => \&dxml_start_section);
1290
1291 # Set the call-back functions for the metadata tags
1292 my @rules =
1293 (
1294 _default => 'raw',
1295 'Metadata' => \&rfdxml_metadata
1296 );
1297
1298 my $parser = XML::Rules->new
1299 (
1300 start_rules => \@start_rules,
1301 rules => \@rules,
1302 style => 'filter',
1303 output_encoding => 'utf8'
1304 );
1305
1306 my $status = 0;
1307 my $xml_in = "";
1308 if (!open(MIN,"<$doc_xml_filename"))
1309 {
1310 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1311 $status = 1;
1312 }
1313 else
1314 {
1315 # Read them in
1316 my $line;
1317 while (defined ($line=<MIN>)) {
1318 $xml_in .= $line;
1319 }
1320 close(MIN);
1321
1322 # Filter with the call-back functions
1323 my $xml_out = "";
1324
1325 my $MOUT;
1326 if (!open($MOUT,">$doc_xml_filename")) {
1327 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1328 $status = 1;
1329 }
1330 else {
1331 binmode($MOUT,":utf8");
1332 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
1333 close($MOUT);
1334 }
1335 }
1336 return $status;
1337}
1338
1339sub rfdxml_metadata
1340{
1341 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1342
1343 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1344 {
1345 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1346 return [$tagname => $attrHash];
1347 }
1348
1349 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1350 {
1351 if (!defined $parser->{'parameters'}->{'poscount'})
1352 {
1353 $parser->{'parameters'}->{'poscount'} = 0;
1354 }
1355 else
1356 {
1357 $parser->{'parameters'}->{'poscount'}++;
1358 }
1359 }
1360
1361 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1362 {
1363 return [];
1364 }
1365
1366 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1367 {
1368 return [];
1369 }
1370
1371 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1372 return [$tagname => $attrHash];
1373}
1374
1375sub mxml_metadata
1376{
1377 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1378 my $metaname = $parser->{'parameters'}->{'metaname'};
1379 my $metamode = $parser->{'parameters'}->{'metamode'};
1380
1381 # Report error if we don't see FileName tag before this
1382 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1383
1384 # Don't do anything if we are not in the right FileSet
1385 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1386 if ($file_regexp =~ /\.\*/) {
1387 # Only interested in a file_regexp if it specifies precisely one
1388 # file.
1389 # So, skip anything with a .* in it as it is too general
1390 return [$tagname => $attrHash];
1391 }
1392 my $src_file = $parser->{'parameters'}->{'src_file'};
1393 if (!($src_file =~ /$file_regexp/)) {
1394 return [$tagname => $attrHash];
1395 }
1396## print STDERR "*** mxl metamode = $metamode\n";
1397
1398 # Find the right metadata tag and checks if we are going to override it
1399 my $name_attr = $attrHash->{'name'};
1400 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1401 # Get the value and override the current value
1402 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1403 $attrHash->{'_content'} = $metavalue;
1404
1405## print STDERR "**** overrideing metadata.xml\n";
1406
1407 # Don't want it to wipe out any other pieces of metadata
1408 $parser->{'parameters'}->{'metamode'} = "done";
1409 }
1410
1411 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1412 return [$tagname => $attrHash];
1413}
1414
1415
1416sub mxml_description
1417{
1418 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1419 my $metamode = $parser->{'parameters'}->{'metamode'};
1420
1421 # Failed... Report error if we don't see FileName tag before this
1422 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1423
1424 # Don't do anything if we are not in the right FileSet
1425 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1426 if ($file_regexp =~ m/\.\*/) {
1427 # Only interested in a file_regexp if it specifies precisely one
1428 # file.
1429 # So, skip anything with a .* in it as it is too general
1430 return [$tagname => $attrHash];
1431 }
1432 my $src_file = $parser->{'parameters'}->{'src_file'};
1433
1434 if (!($src_file =~ m/$file_regexp/)) {
1435 return [$tagname => $attrHash];
1436 }
1437
1438 # Accumulate the metadata block to the end of the description block
1439 # Note: This adds metadata block to all description blocks, so if there are
1440 # multiple FileSets, it will add to all of them
1441 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1442 # if metamode was "override" but get to here then it failed to
1443 # find an item to override, in which case it should append its
1444 # value to the end, just like the "accumulate" mode
1445
1446 # tack a new metadata tag on to the end of the <Metadata>+ block
1447 my $metaname = $parser->{'parameters'}->{'metaname'};
1448 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1449
1450 my $metadata_attr = { '_content' => $metavalue,
1451 'name' => $metaname,
1452 'mode' => "accumulate" };
1453
1454 my $append_metadata = [ "Metadata" => $metadata_attr ];
1455 my $description_content = $attrHash->{'_content'};
1456
1457## print STDERR "*** appending to metadata.xml\n";
1458
1459 # append the new metadata element to the end of the current
1460 # content contained inside this tag
1461 if (ref($description_content) eq "") {
1462 # => string or numeric literal
1463 # this is caused by a <Description> block has no <Metadata> child elements
1464 # => set up an empty array in '_content'
1465 $attrHash->{'_content'} = [ "\n" ];
1466 $description_content = $attrHash->{'_content'};
1467 }
1468
1469 push(@$description_content," ", $append_metadata ,"\n ");
1470 $parser->{'parameters'}->{'metamode'} = "done";
1471 }
1472
1473 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1474 return [$tagname => $attrHash];
1475}
1476
1477
1478sub mxml_filename
1479{
1480 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1481
1482 # Store the filename of the Current Fileset
1483 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1484 # FileName tag must come before Description tag
1485 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1486
1487 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1488 return [$tagname => $attrHash];
1489}
1490
1491
1492sub mxml_fileset
1493{
1494 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1495
1496 # Initilise the current_file
1497 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1498 # FileName tag must come before Description tag
1499 $parser->{'parameters'}->{'current_file'} = "";
1500
1501 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1502 return [$tagname => $attrHash];
1503}
1504
1505
1506sub edit_metadata_xml
1507{
1508 my $self = shift @_;
1509 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
1510
1511 # Set the call-back functions for the metadata tags
1512 my @rules =
1513 ( _default => 'raw',
1514 'FileName' => \&mxml_filename,
1515 'Metadata' => \&mxml_metadata,
1516 'Description' => \&mxml_description,
1517 'FileSet' => \&mxml_fileset);
1518
1519 # use XML::Rules to add it in (read in and out again)
1520 my $parser = XML::Rules->new(rules => \@rules,
1521 style => 'filter',
1522 output_encoding => 'utf8');
1523
1524 if (!-e $metadata_xml_filename) {
1525
1526 if (open(MOUT,">$metadata_xml_filename")) {
1527
1528 my $src_file_re = &util::filename_to_regex($src_file);
1529 # shouldn't the following also be in the above utility routine??
1530 # $src_file_re =~ s/\./\\./g;
1531
1532 print MOUT "<?xml version=\"1.0\"?>\n";
1533 print MOUT "<DirectoryMetadata>\n";
1534 print MOUT " <FileSet>\n";
1535 print MOUT " <FileName>$src_file_re</FileName>\n";
1536 print MOUT " <Description>\n";
1537 print MOUT " </Description>\n";
1538 print MOUT " </FileSet>\n";
1539 print MOUT "</DirectoryMetadata>\n";
1540
1541 close(MOUT);
1542 }
1543 else {
1544 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1545 }
1546 }
1547
1548
1549 my $xml_in = "";
1550 if (!open(MIN,"<$metadata_xml_filename")) {
1551 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1552 }
1553 else {
1554 # Read them in
1555 my $line;
1556 while (defined ($line=<MIN>)) {
1557 $xml_in .= $line;
1558 }
1559 close(MIN);
1560
1561 # Filter with the call-back functions
1562 my $xml_out = "";
1563
1564 my $MOUT;
1565 if (!open($MOUT,">$metadata_xml_filename")) {
1566 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1567 }
1568 else {
1569 binmode($MOUT,":utf8");
1570
1571 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1572 # At the moment, I will just hack it!
1573 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1574 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1575 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1576 #print MOUT $xml_out;
1577
1578 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1579 metavalue => $metavalue,
1580 metamode => $metamode,
1581 src_file => $src_file,
1582 current_file => undef} );
1583 close($MOUT);
1584 }
1585 }
1586}
1587
1588
1589sub set_import_metadata
1590{
1591 my $self = shift @_;
1592
1593 my $username = $self->{'username'};
1594 my $collect = $self->{'collect'};
1595 my $gsdl_cgi = $self->{'gsdl_cgi'};
1596 my $gsdlhome = $self->{'gsdlhome'};
1597 my $infodbtype = $self->{'infodbtype'};
1598
1599 if ($baseaction::authentication_enabled) {
1600 # Ensure the user is allowed to edit this collection
1601 $self->authenticate_user($username, $collect);
1602 }
1603
1604
1605 # Obtain the collect and archive dir
1606 my $site = $self->{'site'};
1607 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1608
1609 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1610 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1611
1612 # Make sure the collection isn't locked by someone else
1613 $self->lock_collection($username, $collect);
1614
1615 # look up additional args
1616 # want either d= or f=
1617 my $docid = $self->{'d'};
1618 my $import_file = $self->{'f'};
1619 if ((!defined $docid) && (!defined $import_file)) {
1620 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1621 }
1622
1623 # Get the parameters and set default mode to "accumulate"
1624 my $metaname = $self->{'metaname'};
1625 my $metavalue = $self->{'metavalue'};
1626## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1627 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1628
1629 my $metamode = $self->{'metamode'};
1630 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1631 # make "accumulate" the default (less destructive, as won't actually
1632 # delete any existing values)
1633 $metamode = "accumulate";
1634 }
1635
1636 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1637 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1638 my $metadata_xml_file;
1639 my $import_filename = undef;
1640 if (defined $docid) {
1641 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1642 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1643
1644 # This now stores the full pathname
1645 $import_filename = $doc_rec->{'src-file'}->[0];
1646 }
1647 else {
1648 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1649 }
1650
1651 # figure out correct metadata.xml file [?]
1652 # Assuming the metadata.xml file is next to the source file
1653 # Note: This will not work if it is using the inherited metadata from the parent folder
1654 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1655 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1656
1657 # Edit the metadata.xml
1658 # Modified by Jeffrey from DL Consulting
1659 # Handle the case where there is one metadata.xml file for multiple FileSets
1660 # The XML filter needs to know whether it is in the right FileSet
1661 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1662 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1663 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1664 $metaname, $metavalue, $metamode, $import_tailname);
1665
1666 # Release the lock once it is done
1667 $self->unlock_collection($username, $collect);
1668
1669 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1670 $mess .= " $metaname";
1671 $mess .= " = $metavalue";
1672 $mess .= " ($metamode)\n";
1673
1674 $gsdl_cgi->generate_ok_message($mess);
1675
1676}
1677
1678sub set_import_metadata_array
1679{
1680 my $self = shift @_;
1681
1682 my $username = $self->{'username'};
1683 my $collect = $self->{'collect'};
1684 my $gsdl_cgi = $self->{'gsdl_cgi'};
1685 my $gsdlhome = $self->{'gsdlhome'};
1686
1687 if ($baseaction::authentication_enabled) {
1688 # Ensure the user is allowed to edit this collection
1689 &authenticate_user($gsdl_cgi, $username, $collect);
1690 }
1691
1692 my $site = $self->{'site'};
1693 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1694
1695 $gsdl_cgi->checked_chdir($collect_dir);
1696
1697 # Make sure the collection isn't locked by someone else
1698 $self->lock_collection($username, $collect);
1699
1700 # look up additional args
1701
1702 my $infodbtype = $self->{'infodbtype'};
1703
1704 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1705
1706 my $json_str = $self->{'json'};
1707 my $doc_array = decode_json $json_str;
1708
1709 my $global_status = 0;
1710 my $global_mess = "";
1711
1712 my @all_docids = ();
1713
1714 foreach my $doc_array_rec ( @$doc_array )
1715 {
1716 my $docid = $doc_array_rec->{'docid'};
1717 my $metaname = $doc_array_rec->{'metaname'};
1718 my $metamode = $self->{'metamode'};
1719 my $metavalue = $doc_array_rec->{'metavalue'};
1720
1721 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1722 # make "accumulate" the default (less destructive, as won't actually
1723 # delete any existing values)
1724 $metamode = "accumulate";
1725 }
1726
1727 push(@all_docids,$docid);
1728
1729 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1730 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1731 my $metadata_xml_file;
1732 my $import_filename = undef;
1733
1734 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1735 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1736
1737 # This now stores the full pathname
1738 $import_filename = $doc_rec->{'src-file'}->[0];
1739
1740 # figure out correct metadata.xml file [?]
1741 # Assuming the metadata.xml file is next to the source file
1742 # Note: This will not work if it is using the inherited metadata from the parent folder
1743 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1744 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1745
1746 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1747 }
1748
1749 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1750 $gsdl_cgi->generate_ok_message($mess);
1751
1752 # Release the lock once it is done
1753 $self->unlock_collection($username, $collect);
1754}
1755
1756sub remove_import_metadata
1757{
1758 my $self = shift @_;
1759
1760 my $username = $self->{'username'};
1761 my $collect = $self->{'collect'};
1762 my $gsdl_cgi = $self->{'gsdl_cgi'};
1763
1764 if ($baseaction::authentication_enabled) {
1765 # Ensure the user is allowed to edit this collection
1766 &authenticate_user($gsdl_cgi, $username, $collect);
1767 }
1768
1769 my $gsdlhome = $self->{'gsdlhome'};
1770 my $infodbtype = $self->{'infodbtype'};
1771
1772 # Obtain the collect dir
1773 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1774 my $site = $self->{'site'};
1775 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1776
1777 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1778 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1779
1780 # Make sure the collection isn't locked by someone else
1781 $self->lock_collection($username, $collect);
1782
1783 # look up additional args
1784 my $docid = $self->{'d'};
1785 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1786 {
1787 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
1788 }
1789
1790 my $metaname = $self->{'metaname'};
1791 my $metavalue = $self->{'metavalue'};
1792 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1793
1794 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1795 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1796 my $metadata_xml_file;
1797 my $import_filename = undef;
1798 if (defined $docid)
1799 {
1800 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1801 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1802
1803 # This now stores the full pathname
1804 $import_filename = $doc_rec->{'src-file'}->[0];
1805 }
1806
1807 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1808 {
1809 $gsdl_cgi->generate_error_message("There is no metadata\n");
1810 }
1811
1812 # figure out correct metadata.xml file [?]
1813 # Assuming the metadata.xml file is next to the source file
1814 # Note: This will not work if it is using the inherited metadata from the parent folder
1815 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1816 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1817
1818 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1819
1820 # Release the lock once it is done
1821 $self->unlock_collection($username, $collect);
1822
1823 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1824 $mess .= " $metaname";
1825 $mess .= " = $metavalue\n";
1826
1827 $gsdl_cgi->generate_ok_message($mess);
1828}
1829
1830sub remove_from_metadata_xml
1831{
1832 my $self = shift @_;
1833 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
1834
1835 # Set the call-back functions for the metadata tags
1836 my @rules =
1837 (
1838 _default => 'raw',
1839 'Metadata' => \&rfmxml_metadata,
1840 'FileName' => \&mxml_filename
1841 );
1842
1843 my $parser = XML::Rules->new
1844 (
1845 rules => \@rules,
1846 style => 'filter',
1847 output_encoding => 'utf8'
1848 );
1849
1850 my $xml_in = "";
1851 if (!open(MIN,"<$metadata_xml_filename"))
1852 {
1853 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1854 }
1855 else
1856 {
1857 # Read them in
1858 my $line;
1859 while (defined ($line=<MIN>)) {
1860 $xml_in .= $line;
1861 }
1862 close(MIN);
1863
1864 # Filter with the call-back functions
1865 my $xml_out = "";
1866
1867 my $MOUT;
1868 if (!open($MOUT,">$metadata_xml_filename")) {
1869 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1870 }
1871 else {
1872 binmode($MOUT,":utf8");
1873 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
1874 close($MOUT);
1875 }
1876 }
1877}
1878
1879sub rfmxml_metadata
1880{
1881 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1882
1883 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
1884 {
1885 return [];
1886 }
1887
1888 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1889 return [$tagname => $attrHash];
1890}
1891
1892sub remove_live_metadata
1893{
1894 my $self = shift @_;
1895
1896 my $username = $self->{'username'};
1897 my $collect = $self->{'collect'};
1898 my $gsdl_cgi = $self->{'gsdl_cgi'};
1899 my $gsdlhome = $self->{'gsdlhome'};
1900 my $infodbtype = $self->{'infodbtype'};
1901
1902 if ($baseaction::authentication_enabled) {
1903 # Ensure the user is allowed to edit this collection
1904 &authenticate_user($gsdl_cgi, $username, $collect);
1905 }
1906
1907 # Obtain the collect dir
1908 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1909 my $site = $self->{'site'};
1910 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1911
1912 # Make sure the collection isn't locked by someone else
1913 $self->lock_collection($username, $collect);
1914
1915 # look up additional args
1916 my $docid = $self->{'d'};
1917 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1918 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1919 }
1920
1921 # Generate the dbkey
1922 my $metaname = $self->{'metaname'};
1923 my $dbkey = "$docid.$metaname";
1924
1925 # To people who know $collect_tail please add some comments
1926 # Obtain the live gdbm_db path
1927 my $collect_tail = $collect;
1928 $collect_tail =~ s/^.*[\/\\]//;
1929 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1930 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1931
1932 # Remove the key
1933 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
1934 my $status = system($cmd);
1935 if ($status != 0) {
1936 # Catch error if gdbmdel failed
1937 my $mess = "Failed to set metadata key: $dbkey\n";
1938
1939 $mess .= "PATH: $ENV{'PATH'}\n";
1940 $mess .= "cmd = $cmd\n";
1941 $mess .= "Exit status: $status\n";
1942 $mess .= "System Error Message: $!\n";
1943
1944 $gsdl_cgi->generate_error($mess);
1945 }
1946 else {
1947 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1948 }
1949
1950}
1951
1952
1953sub remove_metadata
1954{
1955 my $self = shift @_;
1956
1957 my $username = $self->{'username'};
1958 my $collect = $self->{'collect'};
1959 my $gsdl_cgi = $self->{'gsdl_cgi'};
1960 my $gsdlhome = $self->{'gsdlhome'};
1961 my $infodbtype = $self->{'infodbtype'};
1962
1963 if ($baseaction::authentication_enabled) {
1964 # Ensure the user is allowed to edit this collection
1965 &authenticate_user($gsdl_cgi, $username, $collect);
1966 }
1967
1968 # Obtain the collect dir
1969 my $site = $self->{'site'};
1970 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1971 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1972
1973 # Make sure the collection isn't locked by someone else
1974 $self->lock_collection($username, $collect);
1975
1976 # look up additional args
1977 my $docid = $self->{'d'};
1978 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1979 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1980 }
1981 my $metaname = $self->{'metaname'};
1982 my $metapos = $self->{'metapos'};
1983 my $metavalue = $self->{'metavalue'};
1984
1985 # To people who know $collect_tail please add some comments
1986 # Obtain the path to the database
1987 my $collect_tail = $collect;
1988 $collect_tail =~ s/^.*[\/\\]//;
1989 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1990 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1991
1992 # Read the docid entry
1993 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1994
1995 # Basically loop through and unescape_html the values
1996 foreach my $k (keys %$doc_rec) {
1997 my @escaped_v = ();
1998 foreach my $v (@{$doc_rec->{$k}}) {
1999 if ($k eq "contains") {
2000 # protect quotes in ".2;".3 etc
2001 $v =~ s/\"/\\\"/g;
2002 push(@escaped_v, $v);
2003 }
2004 else {
2005 my $ev = &ghtml::unescape_html($v);
2006 $ev =~ s/\"/\\\"/g;
2007 push(@escaped_v, $ev);
2008 }
2009 }
2010 $doc_rec->{$k} = \@escaped_v;
2011 }
2012
2013 # Check to make sure the key does exist
2014 if (!defined ($doc_rec->{$metaname})) {
2015 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2016 }
2017
2018 # Obtain the specified metadata pos
2019 $metapos = 0 if (!defined $metapos);
2020
2021 # consider check key is defined before deleting?
2022 # Loop through the metadata array and ignore the specified position
2023 my $filtered_metadata = [];
2024 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2025 for (my $i=0; $i<$num_metadata_vals; $i++) {
2026 my $metaval = shift(@{$doc_rec->{$metaname}});
2027
2028 if (!defined $metavalue && $i != $metapos) {
2029 push(@$filtered_metadata,$metaval)
2030 }
2031
2032 if(defined $metavalue && !($metavalue eq $metaval))
2033 {
2034 push(@$filtered_metadata,$metavalue)
2035 }
2036 }
2037 $doc_rec->{$metaname} = $filtered_metadata;
2038
2039 # Turn the record back to string
2040 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2041
2042 # Store it back to the database
2043 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2044 my $status = system($cmd);
2045 if ($status != 0) {
2046 my $mess = "Failed to set metadata key: $docid\n";
2047
2048 $mess .= "PATH: $ENV{'PATH'}\n";
2049 $mess .= "cmd = $cmd\n";
2050 $mess .= "Exit status: $status\n";
2051 $mess .= "System Error Message: $!\n";
2052
2053 $gsdl_cgi->generate_error($mess);
2054 }
2055 else {
2056 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2057 $mess .= " $metaname";
2058 $mess .= "->[$metapos]" if (defined $metapos);
2059
2060 $gsdl_cgi->generate_ok_message($mess);
2061 }
2062}
2063
2064
2065# Was trying to reused the codes, but the functions need to be broken
2066# down more before they can be reused, otherwise there will be too
2067# much overhead and duplicate process...
2068sub insert_metadata
2069{
2070 my $self = shift @_;
2071
2072 my $username = $self->{'username'};
2073 my $collect = $self->{'collect'};
2074 my $gsdl_cgi = $self->{'gsdl_cgi'};
2075 my $gsdlhome = $self->{'gsdlhome'};
2076 my $infodbtype = $self->{'infodbtype'};
2077
2078 # If the import metadata and gdbm database have been updated, we
2079 # need to insert some notification to warn user that the the text
2080 # they see at the moment is not indexed and require a rebuild.
2081 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2082
2083 if ($baseaction::authentication_enabled) {
2084 # Ensure the user is allowed to edit this collection
2085 $self->authenticate_user($username, $collect);
2086 }
2087
2088 # Obtain the collect and archive dir
2089 my $site = $self->{'site'};
2090 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2091 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2092 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2093
2094 # Make sure the collection isn't locked by someone else
2095 $self->lock_collection($username, $collect);
2096
2097 # Check additional args
2098 my $docid = $self->{'d'};
2099 if (!defined($docid)) {
2100 $gsdl_cgi->generate_error("No document id is specified: d=...");
2101 }
2102 my $metaname = $self->{'metaname'};
2103 if (!defined($metaname)) {
2104 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2105 }
2106 my $metavalue = $self->{'metavalue'};
2107 if (!defined($metavalue) || $metavalue eq "") {
2108 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2109 }
2110 # make "accumulate" the default (less destructive, as won't actually
2111 # delete any existing values)
2112 my $metamode = "accumulate";
2113
2114 #=======================================================================#
2115 # set_import_metadata [START]
2116 #=======================================================================#
2117 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2118 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2119 my $metadata_xml_file;
2120 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2121 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2122
2123 # This now stores the full pathname
2124 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2125
2126 # figure out correct metadata.xml file [?]
2127 # Assuming the metadata.xml file is next to the source file
2128 # Note: This will not work if it is using the inherited metadata from the parent folder
2129 my ($import_tailname, $import_dirname)
2130 = File::Basename::fileparse($import_filename);
2131 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2132
2133 # Shane's escape characters
2134 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2135 $metavalue =~ s/\,/&#44;/g;
2136 $metavalue =~ s/\:/&#58;/g;
2137 $metavalue =~ s/\|/&#124;/g;
2138 $metavalue =~ s/\(/&#40;/g;
2139 $metavalue =~ s/\)/&#41;/g;
2140 $metavalue =~ s/\[/&#91;/g;
2141 $metavalue =~ s/\\/&#92;/g;
2142 $metavalue =~ s/\]/&#93;/g;
2143 $metavalue =~ s/\{/&#123;/g;
2144 $metavalue =~ s/\}/&#125;/g;
2145 $metavalue =~ s/\"/&#34;/g;
2146 $metavalue =~ s/\`/&#96;/g;
2147 $metavalue =~ s/\n/_newline_/g;
2148
2149 # Edit the metadata.xml
2150 # Modified by Jeffrey from DL Consulting
2151 # Handle the case where there is one metadata.xml file for multiple FileSets
2152 # The XML filter needs to know whether it is in the right FileSet
2153 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2154 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2155 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2156 $metaname, $metavalue, $metamode, $import_tailname);
2157 #=======================================================================#
2158 # set_import_metadata [END]
2159 #=======================================================================#
2160
2161
2162 #=======================================================================#
2163 # set_metadata (accumulate version) [START]
2164 #=======================================================================#
2165 # To people who know $collect_tail please add some comments
2166 # Obtain path to the database
2167 my $collect_tail = $collect;
2168 $collect_tail =~ s/^.*[\/\\]//;
2169 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2170 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2171
2172 # Read the docid entry
2173 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2174
2175 foreach my $k (keys %$doc_rec) {
2176 my @escaped_v = ();
2177 foreach my $v (@{$doc_rec->{$k}}) {
2178 if ($k eq "contains") {
2179 # protect quotes in ".2;".3 etc
2180 $v =~ s/\"/\\\"/g;
2181 push(@escaped_v, $v);
2182 }
2183 else {
2184 my $ev = &ghtml::unescape_html($v);
2185 $ev =~ s/\"/\\\"/g;
2186 push(@escaped_v, $ev);
2187 }
2188 }
2189 $doc_rec->{$k} = \@escaped_v;
2190 }
2191
2192 # Protect the quotes
2193 $metavalue =~ s/\"/\\\"/g;
2194
2195 # Adds the pending macro
2196 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2197
2198 # If the metadata doesn't exist, create a new one
2199 if (!defined($doc_rec->{$metaname})){
2200 $doc_rec->{$metaname} = [ $macro_metavalue ];
2201 }
2202 # Else, let's acculumate the values
2203 else {
2204 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2205 }
2206
2207 # Generate the record string
2208 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2209
2210 # Store it into GDBM
2211 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2212 my $status = system($cmd);
2213 if ($status != 0) {
2214 # Catch error if gdbmget failed
2215 my $mess = "Failed to set metadata key: $docid\n";
2216
2217 $mess .= "PATH: $ENV{'PATH'}\n";
2218 $mess .= "cmd = $cmd\n";
2219 $mess .= "Exit status: $status\n";
2220 $mess .= "System Error Message: $!\n";
2221
2222 $gsdl_cgi->generate_error($mess);
2223 }
2224 else {
2225 my $mess = "insert-metadata successful: Key[$docid]\n";
2226 $mess .= " [In metadata.xml] $metaname";
2227 $mess .= " = $metavalue\n";
2228 $mess .= " [In database] $metaname";
2229 $mess .= " = $macro_metavalue\n";
2230 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2231 $gsdl_cgi->generate_ok_message($mess);
2232 }
2233 #=======================================================================#
2234 # set_metadata (accumulate version) [END]
2235 #=======================================================================#
2236
2237 # Release the lock once it is done
2238 $self->unlock_collection($username, $collect);
2239}
2240
22411;
Note: See TracBrowser for help on using the repository browser.