source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27156

Last change on this file since 27156 was 27156, checked in by ak19, 11 years ago

Possible bugfix. Subroutine remove_live_metadata used to lock the collection but not unlock it again, whereas remove_archives_metadata and remove_import_metadata did do a symmetrical lock and unlock.

File size: 70.3 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-live-metadata" => {
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ ] },
63
64 #SET METHODS
65 "set-live-metadata" => {
66 'compulsory-args' => [ "d", "metaname", "metavalue" ],
67 'optional-args' => [ ] },
68
69 "set-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ "metapos" ] },
72
73 "set-archives-metadata" => {
74 'compulsory-args' => [ "d", "metaname", "metavalue" ],
75 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
76
77 "set-import-metadata" => {
78 'compulsory-args' => [ "metaname", "metavalue" ],
79 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
80
81 #SET METHODS (ARRAY)
82 "set-metadata-array" => {
83 'compulsory-args' => [ "json" ],
84 'optional-args' => [ ] },
85
86 "set-archives-metadata-array" => {
87 'compulsory-args' => [ "json" ],
88 'optional-args' => [ ] },
89
90 "set-import-metadata-array" => {
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ ] },
93
94 #REMOVE METHODS
95 "remove-import-metadata" => {
96 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
97 'optional-args' => [ ] },
98
99 "remove-archives-metadata" => {
100 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
101 'optional-args' => [ "metapos", "metavalue" ] },
102
103 "remove-live-metadata" => {
104 'compulsory-args' => [ "d", "metaname" ],
105 'optional-args' => [ ] },
106
107 "remove-metadata" => {
108 'compulsory-args' => [ "d", "metaname" ],
109 'optional-args' => [ "metapos", "metavalue" ] },
110
111 #INSERT METHODS
112 "insert-metadata" => {
113 'compulsory-args' => [ "d", "metaname", "metavalue" ],
114 'optional-args' => [ ] }
115};
116
117
118sub new
119{
120 my $class = shift (@_);
121 my ($gsdl_cgi,$iis6_mode) = @_;
122
123 # Treat metavalue specially. To transmit this through a GET request
124 # the Javascript side has url-encoded it, so here we need to decode
125 # it before proceeding
126
127 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
128 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
129 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
130
131 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
132 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
133
134 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
135
136 return bless $self, $class;
137}
138
139
140sub get_live_metadata
141{
142 my $self = shift @_;
143
144 my $username = $self->{'username'};
145 my $collect = $self->{'collect'};
146 my $gsdl_cgi = $self->{'gsdl_cgi'};
147 my $gsdlhome = $self->{'gsdlhome'};
148 my $infodbtype = $self->{'infodbtype'};
149
150 # live metadata gets/saves value scoped (prefixed) by the current usename
151 # so (for now) let's not bother to enforce authentication
152
153 # Obtain the collect dir
154 my $site = $self->{'site'};
155 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
156 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
157
158 # Make sure the collection isn't locked by someone else
159 $self->lock_collection($username, $collect);
160
161 # look up additional args
162 my $docid = $self->{'d'};
163 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
164 $gsdl_cgi->generate_error("No docid (d=...) specified.");
165 }
166
167 # Generate the dbkey
168 my $metaname = $self->{'metaname'};
169 my $dbkey = "$docid.$metaname";
170
171 # To people who know $collect_tail please add some comments
172 # Obtain path to the database
173 my $collect_tail = $collect;
174 $collect_tail =~ s/^.*[\/|\\]//;
175 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
176 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
177
178 # Obtain the content of the key
179 my $cmd = "gdbmget $infodb_file_path $dbkey";
180 if (open(GIN,"$cmd |") == 0) {
181 # Catch error if gdbmget failed
182 my $mess = "Failed to get metadata key: $metaname\n";
183 $mess .= "$!\n";
184
185 $gsdl_cgi->generate_error($mess);
186 }
187 else {
188 binmode(GIN,":utf8");
189 # Read everything in and concatenate them into $metavalue
190 my $metavalue = "";
191 my $line;
192 while (defined ($line=<GIN>)) {
193 $metavalue .= $line;
194 }
195 close(GIN);
196 chomp($metavalue); # Get rid off the tailing newlines
197 $gsdl_cgi->generate_ok_message("$metavalue");
198 }
199
200 # Release the lock once it is done
201 $self->unlock_collection($username, $collect);
202}
203
204
205sub get_metadata
206{
207 my $self = shift @_;
208
209 my $username = $self->{'username'};
210 my $collect = $self->{'collect'};
211 my $gsdl_cgi = $self->{'gsdl_cgi'};
212 my $gsdlhome = $self->{'gsdlhome'};
213
214 # Authenticate user if it is enabled
215 if ($baseaction::authentication_enabled) {
216 # Ensure the user is allowed to edit this collection
217 &authenticate_user($gsdl_cgi, $username, $collect);
218 }
219
220 # Obtain the collect dir
221 my $site = $self->{'site'};
222 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
223 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
224
225 # Make sure the collection isn't locked by someone else
226 $self->lock_collection($username, $collect);
227
228 # look up additional args
229 my $docid = $self->{'d'};
230 my $metaname = $self->{'metaname'};
231 my $metapos = $self->{'metapos'};
232 my $infodbtype = $self->{'infodbtype'};
233
234 # To people who know $collect_tail please add some comments
235 # Obtain path to the database
236 my $collect_tail = $collect;
237 $collect_tail =~ s/^.*[\/\\]//;
238 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
239 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
240
241 # Read the docid entry
242 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
243
244 # Basically loop through and unescape_html the values
245 foreach my $k (keys %$doc_rec) {
246 my @escaped_v = ();
247 foreach my $v (@{$doc_rec->{$k}}) {
248 my $ev = &ghtml::unescape_html($v);
249 push(@escaped_v, $ev);
250 }
251 $doc_rec->{$k} = \@escaped_v;
252 }
253
254 # Obtain the specified metadata value
255 $metapos = 0 if (!defined $metapos);
256 my $metavalue = $doc_rec->{$metaname}->[$metapos];
257 $gsdl_cgi->generate_ok_message("$metavalue");
258
259 # Release the lock once it is done
260 $self->unlock_collection($username, $collect);
261}
262
263
264sub get_import_metadata
265{
266 my $self = shift @_;
267
268 my $username = $self->{'username'};
269 my $collect = $self->{'collect'};
270 my $gsdl_cgi = $self->{'gsdl_cgi'};
271 my $gsdlhome = $self->{'gsdlhome'};
272
273 # Authenticate user if it is enabled
274 if ($baseaction::authentication_enabled) {
275 # Ensure the user is allowed to edit this collection
276 &authenticate_user($gsdl_cgi, $username, $collect);
277 }
278
279 # Obtain the collect dir
280 my $site = $self->{'site'};
281 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
282 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
283
284 # Make sure the collection isn't locked by someone else
285 $self->lock_collection($username, $collect);
286
287 # look up additional args
288 my $docid = $self->{'d'};
289 my $metaname = $self->{'metaname'};
290 my $infodbtype = $self->{'infodbtype'};
291 if (!defined $docid)
292 {
293 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
294 }
295
296 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
297 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
298 my $metadata_xml_file;
299 my $import_filename = undef;
300
301
302 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
303 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
304 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
305
306 # This now stores the full pathname
307 $import_filename = $doc_rec->{'src-file'}->[0];
308
309 # figure out correct metadata.xml file [?]
310 # Assuming the metadata.xml file is next to the source file
311 # Note: This will not work if it is using the inherited metadata from the parent folder
312 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
313 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
314
315 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
316
317 # Release the lock once it is done
318 $self->unlock_collection($username, $collect);
319}
320
321sub get_metadata_from_metadata_xml
322{
323 my $self = shift @_;
324 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
325
326 my @rules =
327 (
328 _default => 'raw',
329 'Metadata' => \&gfmxml_metadata,
330 'FileName' => \&mxml_filename
331 );
332
333 my $parser = XML::Rules->new
334 (
335 rules => \@rules,
336 output_encoding => 'utf8'
337 );
338
339 my $xml_in = "";
340 if (!open(MIN,"<$metadata_xml_filename"))
341 {
342 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
343 }
344 else
345 {
346 # Read them in
347 my $line;
348 while (defined ($line=<MIN>)) {
349 $xml_in .= $line;
350 }
351 close(MIN);
352
353 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
354
355 if(defined $parser->{'pad'}->{'metavalue'})
356 {
357 return $parser->{'pad'}->{'metavalue'};
358 }
359 else
360 {
361 return "";
362 }
363 }
364}
365
366sub gfmxml_metadata
367{
368 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
369
370 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
371 {
372 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
373 }
374}
375
376sub get_archives_metadata
377{
378 my $self = shift @_;
379
380 my $username = $self->{'username'};
381 my $collect = $self->{'collect'};
382 my $gsdl_cgi = $self->{'gsdl_cgi'};
383 my $gsdlhome = $self->{'gsdlhome'};
384 my $infodbtype = $self->{'infodbtype'};
385
386 # Authenticate user if it is enabled
387 if ($baseaction::authentication_enabled) {
388 # Ensure the user is allowed to edit this collection
389 &authenticate_user($gsdl_cgi, $username, $collect);
390 }
391
392 # Obtain the collect dir
393 my $site = $self->{'site'};
394 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
395
396 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
397
398 # Make sure the collection isn't locked by someone else
399 $self->lock_collection($username, $collect);
400
401 # look up additional args
402 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
403 $docid_secnum = "" if (!defined $docid_secnum);
404
405 my $metaname = $self->{'metaname'};
406 my $metapos = $self->{'metapos'};
407 $metapos = 0 if (!defined $metapos);
408
409 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
410 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
411
412 # This now stores the full pathname
413 my $doc_filename = $doc_rec->{'doc-file'}->[0];
414
415 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
416
417 # Release the lock once it is done
418 $self->unlock_collection($username, $collect);
419}
420
421sub get_metadata_from_archive_xml
422{
423 my $self = shift @_;
424 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
425
426 my @start_rules = ('Section' => \&dxml_start_section);
427
428 my @rules =
429 (
430 _default => 'raw',
431 'Metadata' => \&gfdxml_metadata
432 );
433
434 my $parser = XML::Rules->new
435 (
436 start_rules => \@start_rules,
437 rules => \@rules,
438 output_encoding => 'utf8'
439 );
440
441 my $xml_in = "";
442 if (!open(MIN,"<$doc_xml_filename"))
443 {
444 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
445 }
446 else
447 {
448 # Read them in
449 my $line;
450 while (defined ($line=<MIN>)) {
451 $xml_in .= $line;
452 }
453 close(MIN);
454
455 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
456
457 if(defined $parser->{'pad'}->{'metavalue'})
458 {
459 return $parser->{'pad'}->{'metavalue'};
460 }
461 else
462 {
463 return "";
464 }
465 }
466}
467
468sub gfdxml_metadata
469{
470 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
471
472 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
473 {
474 return;
475 }
476
477 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
478 {
479 if (!defined $parser->{'parameters'}->{'poscount'})
480 {
481 $parser->{'parameters'}->{'poscount'} = 0;
482 }
483 else
484 {
485 $parser->{'parameters'}->{'poscount'}++;
486 }
487 }
488
489 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
490 {
491 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
492 }
493}
494
495sub set_live_metadata
496{
497 my $self = shift @_;
498
499 my $username = $self->{'username'};
500 my $collect = $self->{'collect'};
501 my $gsdl_cgi = $self->{'gsdl_cgi'};
502 my $gsdlhome = $self->{'gsdlhome'};
503 my $infodbtype = $self->{'infodbtype'};
504
505 if ($baseaction::authentication_enabled) {
506 # Ensure the user is allowed to edit this collection
507 &authenticate_user($gsdl_cgi, $username, $collect);
508 }
509
510 # Obtain the collect dir
511 my $site = $self->{'site'};
512 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
513 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
514
515 # Make sure the collection isn't locked by someone else
516 $self->lock_collection($username, $collect);
517
518 # look up additional args
519 my $docid = $self->{'d'};
520 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
521 $gsdl_cgi->generate_error("No docid (d=...) specified.");
522 }
523 my $metavalue = $self->{'metavalue'};
524
525
526 # Generate the dbkey
527 my $metaname = $self->{'metaname'};
528 my $dbkey = "$docid.$metaname";
529
530 # To people who know $collect_tail please add some comments
531 # Obtain path to the database
532 my $collect_tail = $collect;
533 $collect_tail =~ s/^.*[\/\\]//;
534 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
535 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
536
537 # Set the new value
538 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
539 my $status = system($cmd);
540 if ($status != 0) {
541 # Catch error if gdbmget failed
542 my $mess = "Failed to set metadata key: $dbkey\n";
543
544 $mess .= "PATH: $ENV{'PATH'}\n";
545 $mess .= "cmd = $cmd\n";
546 $mess .= "Exit status: $status\n";
547 $mess .= "System Error Message: $!\n";
548
549 $gsdl_cgi->generate_error($mess);
550 }
551 else {
552 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
553 }
554
555 # Release the lock once it is done
556 $self->unlock_collection($username, $collect);
557}
558
559sub set_metadata_entry
560{
561 my $self = shift @_;
562 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
563
564 # To people who know $collect_tail please add some comments
565 # Obtain path to the database
566 my $collect_tail = $collect;
567 $collect_tail =~ s/^.*[\/\\]//;
568 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
569 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
570
571# print STDERR "**** infodb file path = $infodb_file_path\n";
572# print STDERR "***** infodb type = $infodbtype\n";
573
574 # Read the docid entry
575 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
576
577 # Set the metadata value
578 if (defined $metapos) {
579 $doc_rec->{$metaname}->[$metapos] = $metavalue;
580 }
581 else {
582 $doc_rec->{$metaname} = [ $metavalue ];
583 }
584
585 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
586
587 return $status;
588
589}
590
591sub set_metadata
592{
593 my $self = shift @_;
594
595 my $username = $self->{'username'};
596 my $collect = $self->{'collect'};
597 my $gsdl_cgi = $self->{'gsdl_cgi'};
598 my $gsdlhome = $self->{'gsdlhome'};
599
600 if ($baseaction::authentication_enabled) {
601 # Ensure the user is allowed to edit this collection
602 &authenticate_user($gsdl_cgi, $username, $collect);
603 }
604
605 my $site = $self->{'site'};
606 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
607
608 $gsdl_cgi->checked_chdir($collect_dir);
609
610 # Obtain the collect dir
611 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
612
613 # Make sure the collection isn't locked by someone else
614 $self->lock_collection($username, $collect);
615
616 # look up additional args
617 my $docid = $self->{'d'};
618 my $metaname = $self->{'metaname'};
619 my $metapos = $self->{'metapos'};
620 my $metavalue = $self->{'metavalue'};
621 my $infodbtype = $self->{'infodbtype'};
622
623 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
624
625 if ($status != 0) {
626 # Catch error if set infodb entry failed
627 my $mess = "Failed to set metadata key: $docid\n";
628
629 $mess .= "PATH: $ENV{'PATH'}\n";
630 $mess .= "Exit status: $status\n";
631 $mess .= "System Error Message: $!\n";
632
633 $gsdl_cgi->generate_error($mess);
634 }
635 else {
636 my $mess = "set-metadata successful: Key[$docid]\n";
637 $mess .= " $metaname";
638 $mess .= "->[$metapos]" if (defined $metapos);
639 $mess .= " = $metavalue";
640
641 $gsdl_cgi->generate_ok_message($mess);
642 }
643
644 # Release the lock once it is done
645 $self->unlock_collection($username, $collect);
646}
647
648
649sub set_metadata_array
650{
651 my $self = shift @_;
652
653 my $username = $self->{'username'};
654 my $collect = $self->{'collect'};
655 my $gsdl_cgi = $self->{'gsdl_cgi'};
656 my $gsdlhome = $self->{'gsdlhome'};
657
658 if ($baseaction::authentication_enabled) {
659 # Ensure the user is allowed to edit this collection
660 &authenticate_user($gsdl_cgi, $username, $collect);
661 }
662
663 my $site = $self->{'site'};
664 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
665
666 $gsdl_cgi->checked_chdir($collect_dir);
667
668 # Obtain the collect dir
669 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
670
671 # Make sure the collection isn't locked by someone else
672 $self->lock_collection($username, $collect);
673
674 # look up additional args
675
676 my $infodbtype = $self->{'infodbtype'};
677
678 my $json_str = $self->{'json'};
679 my $doc_array = decode_json $json_str;
680
681
682 my $global_status = 0;
683 my $global_mess = "";
684
685 my @all_docids = ();
686
687 foreach my $doc_array_rec ( @$doc_array ) {
688
689 my $docid = $doc_array_rec->{'docid'};
690 my $metaname = $doc_array_rec->{'metaname'};
691 my $metapos = $doc_array_rec->{'metapos'};
692 my $metavalue = $doc_array_rec->{'metavalue'};
693
694 push(@all_docids,$docid);
695
696 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
697
698 if ($status != 0) {
699 # Catch error if set infodb entry failed
700 $global_status = $status;
701 $global_mess .= "Failed to set metadata key: $docid\n";
702 $global_mess .= "Exit status: $status\n";
703 $global_mess .= "System Error Message: $!\n";
704 $global_mess .= "-" x 20;
705 }
706 }
707
708 if ($global_status != 0) {
709 $global_mess .= "PATH: $ENV{'PATH'}\n";
710 $gsdl_cgi->generate_error($global_mess);
711 }
712 else {
713 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
714 $gsdl_cgi->generate_ok_message($mess);
715 }
716
717 # Release the lock once it is done
718 $self->unlock_collection($username, $collect);
719}
720
721
722sub dxml_metadata
723{
724 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
725 my $metaname = $parser->{'parameters'}->{'metaname'};
726 my $metamode = $parser->{'parameters'}->{'metamode'};
727
728 print STDERR "**** Processing closing </Metadata> tag\n";
729
730 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
731
732 # Find the right metadata tag and checks if we are going to
733 # override it
734 #
735 # Note: This over writes the first metadata block it
736 # encountered. If there are multiple Sections in the doc.xml, it
737 # might not behave as you would expect
738
739 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
740## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
741## print STDERR "**** metamode = $metamode\n";
742
743 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
744 {
745 my $name_attr = $attrHash->{'name'};
746 if (($name_attr eq $metaname) && ($metamode eq "override"))
747 {
748 if (!defined $parser->{'parameters'}->{'poscount'})
749 {
750 $parser->{'parameters'}->{'poscount'} = 0;
751 }
752 else
753 {
754 $parser->{'parameters'}->{'poscount'}++;
755 }
756
757 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
758 {
759 ##print STDERR "#### got match!!\n";
760 # Get the value and override the current value
761 my $metavalue = $parser->{'parameters'}->{'metavalue'};
762 $attrHash->{'_content'} = $metavalue;
763
764 # Don't want it to wipe out any other pieces of metadata
765 $parser->{'parameters'}->{'metamode'} = "done";
766 }
767 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
768 {
769 my $metavalue = $parser->{'parameters'}->{'metavalue'};
770 $attrHash->{'_content'} = $metavalue;
771 $parser->{'parameters'}->{'metamode'} = "done";
772 }
773 }
774 }
775
776 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
777 return [$tagname => $attrHash];
778}
779
780# This method exists purely for catching invalid section numbers that the client
781# requested to edit. Once the parser has reached the end (the final </Archive> tag),
782# we've seen all the Sections in the doc.xml, and none of their section nums matched
783# if the metamode has not been set to 'done' by then.
784sub dxml_archive
785{
786 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
787 my $metamode = $parser->{'parameters'}->{'metamode'};
788
789 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
790 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
791
792# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
793
794 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
795 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
796
797 my $metaname = $parser->{'parameters'}->{'metaname'};
798 my $metavalue = $parser->{'parameters'}->{'metavalue'};
799
800 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
801 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
802 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
803 print STDERR "\tin section $opt_doc_secnum.\n";
804 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
805 }
806
807 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
808 return [$tagname => $attrHash];
809}
810
811sub dxml_description
812{
813 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
814 my $metamode = $parser->{'parameters'}->{'metamode'};
815
816 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
817 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
818
819 print STDERR "**** Processing a closing </Description> tag \n";
820# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
821
822 # Accumulate the metadata
823
824 # We'll be accumulating metadata at this point if we haven't found and therefore
825 # haven't processed the metadata yet.
826 # For subsections, this means that if we're at a matching subsection, but haven't
827 # found the correct metaname to override in that subsection, we accumulate it as new
828 # meta in the subsection by adding it to the current description.
829 # If there's no subsection info for the metadata, it will accumulate at the top level
830 # section description if we hadn't found a matching metaname to override at this point.
831
832 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
833 # at the toplevel section. In the latter case, it means we want to process meta in the
834 # toplevel section. So the eq check between the values below will work in all cases.
835
836 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
837 # this document has been provided. In that case, the metadata for that opt_doc_secnum
838 # won't get attached/accumulated to any part of the doc, not even its top-level section.
839
840 if ($curr_secnum eq $opt_doc_secnum
841 && ($metamode eq "accumulate" || $metamode eq "override")) {
842 if ($metamode eq "override") {
843 print "No metadata value to override. Switching 'metamode' to accumulate\n";
844 }
845
846 # If get to here and metamode is override, the this means there
847 # was no existing value to overide => treat as an append operation
848
849 # Tack a new metadata tag on to the end of the <Metadata>+ block
850 my $metaname = $parser->{'parameters'}->{'metaname'};
851 my $metavalue = $parser->{'parameters'}->{'metavalue'};
852
853 my $metadata_attr = {
854 '_content' => $metavalue,
855 'name' => $metaname,
856 'mode' => "accumulate"
857 };
858
859 my $append_metadata = [ "Metadata" => $metadata_attr ];
860 my $description_content = $attrHash->{'_content'};
861
862 print "Appending metadata to doc.xml\n";
863
864 if (ref($description_content)) {
865 # got some existing interesting nested content
866 push(@$description_content, " ", $append_metadata ,"\n ");
867 }
868 else {
869 #description_content is most likely a string such as "\n"
870 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
871 }
872
873 $parser->{'parameters'}->{'metamode'} = "done";
874 }
875 else {
876 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
877## print STDERR "**** NOT ACCUMULATE?!? \n";
878 }
879
880 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
881 return [$tagname => $attrHash];
882}
883
884
885sub dxml_start_section
886{
887 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
888
889 my $new_depth = scalar(@$contextArray);
890
891 print STDERR "**** START SECTION \n";
892
893 if ($new_depth == 1) {
894 $parser->{'parameters'}->{'curr_section_depth'} = 1;
895 $parser->{'parameters'}->{'curr_section_num'} = "";
896 }
897
898 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
899 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
900
901 my $new_secnum;
902
903 if ($new_depth > $old_depth) {
904 # child subsection
905 $new_secnum = "$old_secnum.1";
906 }
907 elsif ($new_depth == $old_depth) {
908 # sibling section => increase it's value by 1
909 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
910 $tail_num++;
911 $new_secnum = $old_secnum;
912 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
913 }
914 else {
915 # back up to parent section => lopp off tail
916 $new_secnum = $old_secnum;
917 $new_secnum =~ s/\.\d+$//;
918 }
919
920 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
921 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
922
923 1;
924}
925
926sub edit_xml_file
927{
928 my $self = shift @_;
929 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
930
931 # use XML::Rules to add it in (read in and out again)
932 my $parser = XML::Rules->new(start_rules => $start_rules,
933 rules => $rules,
934 style => 'filter',
935 output_encoding => 'utf8' );
936
937 my $xml_in = "";
938 if (!open(MIN,"<$filename")) {
939 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
940 }
941 else {
942 # Read all the text in
943 my $line;
944 while (defined ($line=<MIN>)) {
945 $xml_in .= $line;
946 }
947 close(MIN);
948
949 my $MOUT;
950 if (!open($MOUT,">$filename")) {
951 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
952 }
953 else {
954 # Matched lines will get handled by the call backs
955## my $xml_out = "";
956
957 binmode($MOUT,":utf8");
958 $parser->filter($xml_in,$MOUT, $options);
959
960# binmode(MOUT,":utf8");
961# print MOUT $xml_out;
962 close($MOUT);
963 }
964 }
965
966 # copy across any custom error information that was stored during parsing
967 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
968}
969
970sub edit_doc_xml
971{
972 my $self = shift @_;
973 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
974
975 my $info_mess = <<RAWEND;
976****************************
977 edit_doc_xml()
978****************************
979doc_xml_filename = $doc_xml_filename
980metaname = $metaname
981metavalue = $metavalue
982metapos = $metapos
983metamode = $metamode
984opt_secnum = $opt_secnum
985prevmetavalue = $prevmetavalue
986****************************
987RAWEND
988
989 $gsdl_cgi->generate_message($info_mess);
990
991 # To monitor which section/subsection number we are in
992 my @start_rules =
993 ( 'Section' => \&dxml_start_section );
994
995 # use XML::Rules to add it in (read in and out again)
996 # Set the call back functions
997 my @rules =
998 ( _default => 'raw',
999 'Metadata' => \&dxml_metadata,
1000 'Description' => \&dxml_description,
1001 'Archive' => \&dxml_archive); # just for catching errors at end
1002
1003 # Sets the parameters
1004 my $options = { 'metaname' => $metaname,
1005 'metapos' => $metapos,
1006 'metavalue' => $metavalue,
1007 'metamode' => $metamode,
1008 'prevmetavalue' => $prevmetavalue };
1009
1010 if (defined $opt_secnum) {
1011 $options->{'secnum'} = $opt_secnum;
1012 }
1013
1014 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1015}
1016
1017sub set_archives_metadata_entry
1018{
1019 my $self = shift @_;
1020 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1021
1022 my $info_mess = <<RAWEND;
1023****************************
1024 set_archives_metadata_entry()
1025****************************
1026archive_dir = $archive_dir
1027collect_dir = $collect_dir
1028collect = $collect
1029infodbtype = $infodbtype
1030docid = $docid
1031metaname = $metaname
1032metapos = $metapos
1033metavalue = $metavalue
1034metamode = $metamode
1035prevmetavalue = $prevmetavalue
1036****************************
1037RAWEND
1038
1039 $gsdl_cgi->generate_message($info_mess);
1040
1041 # Obtain the doc.xml path for the specified docID
1042 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1043
1044 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1045 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1046 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1047
1048 # The $doc_xml_file is relative to the archives, and now let's get the full path
1049 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1050 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1051
1052 # Edit the doc.xml file with the specified metadata name, value and position.
1053 # TODO: there is a potential problem here as this edit_doc_xml function
1054 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1055 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1056
1057 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1058 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1059
1060 # return 0; # return 0 for now to indicate no error
1061 return (defined $self->{'error_msg'}) ? 1 : 0;
1062}
1063
1064
1065sub set_archives_metadata
1066{
1067 my $self = shift @_;
1068
1069 my $username = $self->{'username'};
1070 my $collect = $self->{'collect'};
1071 my $gsdl_cgi = $self->{'gsdl_cgi'};
1072 my $gsdlhome = $self->{'gsdlhome'};
1073 my $infodbtype = $self->{'infodbtype'};
1074
1075 if ($baseaction::authentication_enabled) {
1076 # Ensure the user is allowed to edit this collection
1077 $self->authenticate_user($username, $collect);
1078 }
1079
1080 my $site = $self->{'site'};
1081
1082 # Obtain the collect and archive dir
1083 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1084
1085 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1086
1087 # Make sure the collection isn't locked by someone else
1088 $self->lock_collection($username, $collect);
1089
1090 # look up additional args
1091 my $docid = $self->{'d'};
1092 my $metaname = $self->{'metaname'};
1093 my $metavalue = $self->{'metavalue'};
1094 my $prevmetavalue = $self->{'prevmetavalue'};
1095
1096 my $metapos = $self->{'metapos'};
1097 $metapos = 0 if (!defined $metapos);
1098
1099 my $metamode = $self->{'metamode'};
1100 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1101 # make "accumulate" the default (less destructive, as won't actually
1102 # delete any existing values)
1103 $metamode = "accumulate";
1104 }
1105
1106 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1107 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1108
1109 # Release the lock once it is done
1110 $self->unlock_collection($username, $collect);
1111
1112 if ($status == 0) {
1113 my $mess = "set-archives-metadata successful: Key[$docid]\n";
1114 $mess .= " $metaname";
1115 $mess .= "->[$metapos]" if (defined $metapos);
1116 $mess .= " = $metavalue";
1117 $mess .= " ($metamode)\n";
1118
1119 $gsdl_cgi->generate_ok_message($mess);
1120 }
1121 else {
1122 my $mess .= "Failed to set archives metadata key: $docid\n";
1123 $mess .= "Exit status: $status\n";
1124 if(defined $self->{'error_msg'}) {
1125 $mess .= "Error Message: $self->{'error_msg'}\n";
1126 } else {
1127 $mess .= "System Error Message: $!\n";
1128 }
1129 $mess .= "-" x 20 . "\n";
1130
1131 $gsdl_cgi->generate_error($mess);
1132 }
1133}
1134
1135
1136sub set_archives_metadata_array
1137{
1138 my $self = shift @_;
1139
1140 my $username = $self->{'username'};
1141 my $collect = $self->{'collect'};
1142 my $gsdl_cgi = $self->{'gsdl_cgi'};
1143 my $gsdlhome = $self->{'gsdlhome'};
1144
1145 if ($baseaction::authentication_enabled) {
1146 # Ensure the user is allowed to edit this collection
1147 &authenticate_user($gsdl_cgi, $username, $collect);
1148 }
1149
1150 my $site = $self->{'site'};
1151 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1152
1153 $gsdl_cgi->checked_chdir($collect_dir);
1154
1155 # Obtain the collect dir
1156 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1157
1158 # Make sure the collection isn't locked by someone else
1159 $self->lock_collection($username, $collect);
1160
1161 # look up additional args
1162
1163 my $infodbtype = $self->{'infodbtype'};
1164
1165 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1166
1167 my $json_str = $self->{'json'};
1168 my $doc_array = decode_json $json_str;
1169
1170
1171 my $global_status = 0;
1172 my $global_mess = "";
1173
1174 my @all_docids = ();
1175
1176 foreach my $doc_array_rec ( @$doc_array ) {
1177
1178 my $docid = $doc_array_rec->{'docid'};
1179 my $metaname = $doc_array_rec->{'metaname'};
1180 my $metapos = $doc_array_rec->{'metapos'};
1181 my $metamode = $self->{'metamode'};
1182 my $metavalue = $doc_array_rec->{'metavalue'};
1183
1184 # Some sanity checks
1185 $metapos = 0 if (!defined $metapos);
1186
1187 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1188 # make "accumulate" the default (less destructive, as won't actually
1189 # delete any existing values)
1190 $metamode = "accumulate";
1191 }
1192
1193 push(@all_docids,$docid);
1194
1195 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1196 $metaname,$metapos,$metavalue,$metamode);
1197
1198 if ($status != 0) {
1199 # Catch error if set infodb entry failed
1200 $global_status = $status;
1201 $global_mess .= "Failed to set metadata key: $docid\n";
1202 $global_mess .= "Exit status: $status\n";
1203 $global_mess .= "System Error Message: $!\n";
1204 $global_mess .= "-" x 20 . "\n";
1205 }
1206 }
1207
1208 if ($global_status != 0) {
1209 $global_mess .= "PATH: $ENV{'PATH'}\n";
1210 $gsdl_cgi->generate_error($global_mess);
1211 }
1212 else {
1213 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1214 $gsdl_cgi->generate_ok_message($mess);
1215 }
1216
1217 # Release the lock once it is done
1218 $self->unlock_collection($username, $collect);
1219}
1220
1221sub remove_archives_metadata
1222{
1223 my $self = shift @_;
1224
1225 my $username = $self->{'username'};
1226 my $collect = $self->{'collect'};
1227 my $gsdl_cgi = $self->{'gsdl_cgi'};
1228 my $gsdlhome = $self->{'gsdlhome'};
1229 my $infodbtype = $self->{'infodbtype'};
1230
1231 if ($baseaction::authentication_enabled)
1232 {
1233 # Ensure the user is allowed to edit this collection
1234 &authenticate_user($gsdl_cgi, $username, $collect);
1235 }
1236
1237 my $site = $self->{'site'};
1238
1239 # Obtain the collect and archive dir
1240 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1241
1242 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1243
1244 # Make sure the collection isn't locked by someone else
1245 $self->lock_collection($username, $collect);
1246
1247 # look up additional args
1248 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1249
1250 my $metaname = $self->{'metaname'};
1251 my $metapos = $self->{'metapos'};
1252 $metapos = 0 if (!defined $metapos);
1253
1254 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1255 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1256
1257 # This now stores the full pathname
1258 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1259
1260 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1261
1262 # Release the lock once it is done
1263 $self->unlock_collection($username, $collect);
1264
1265 if ($status == 0)
1266 {
1267 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1268 $mess .= " $metaname";
1269 $mess .= "->[$metapos]" if (defined $metapos);
1270
1271 $gsdl_cgi->generate_ok_message($mess);
1272 }
1273 else
1274 {
1275 my $mess .= "Failed to remove archives metadata key: $docid\n";
1276 $mess .= "Exit status: $status\n";
1277 $mess .= "System Error Message: $!\n";
1278 $mess .= "-" x 20 . "\n";
1279
1280 $gsdl_cgi->generate_error($mess);
1281 }
1282}
1283
1284sub remove_from_doc_xml
1285{
1286 my $self = shift @_;
1287 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
1288
1289 my @start_rules = ('Section' => \&dxml_start_section);
1290
1291 # Set the call-back functions for the metadata tags
1292 my @rules =
1293 (
1294 _default => 'raw',
1295 'Metadata' => \&rfdxml_metadata
1296 );
1297
1298 my $parser = XML::Rules->new
1299 (
1300 start_rules => \@start_rules,
1301 rules => \@rules,
1302 style => 'filter',
1303 output_encoding => 'utf8'
1304 );
1305
1306 my $status = 0;
1307 my $xml_in = "";
1308 if (!open(MIN,"<$doc_xml_filename"))
1309 {
1310 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1311 $status = 1;
1312 }
1313 else
1314 {
1315 # Read them in
1316 my $line;
1317 while (defined ($line=<MIN>)) {
1318 $xml_in .= $line;
1319 }
1320 close(MIN);
1321
1322 # Filter with the call-back functions
1323 my $xml_out = "";
1324
1325 my $MOUT;
1326 if (!open($MOUT,">$doc_xml_filename")) {
1327 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1328 $status = 1;
1329 }
1330 else {
1331 binmode($MOUT,":utf8");
1332 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
1333 close($MOUT);
1334 }
1335 }
1336 return $status;
1337}
1338
1339sub rfdxml_metadata
1340{
1341 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1342
1343 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1344 {
1345 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1346 return [$tagname => $attrHash];
1347 }
1348
1349 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1350 {
1351 if (!defined $parser->{'parameters'}->{'poscount'})
1352 {
1353 $parser->{'parameters'}->{'poscount'} = 0;
1354 }
1355 else
1356 {
1357 $parser->{'parameters'}->{'poscount'}++;
1358 }
1359 }
1360
1361 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1362 {
1363 return [];
1364 }
1365
1366 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1367 {
1368 return [];
1369 }
1370
1371 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1372 return [$tagname => $attrHash];
1373}
1374
1375sub mxml_metadata
1376{
1377 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1378 my $metaname = $parser->{'parameters'}->{'metaname'};
1379 my $metamode = $parser->{'parameters'}->{'metamode'};
1380
1381 # Report error if we don't see FileName tag before this
1382 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1383
1384 # Don't do anything if we are not in the right FileSet
1385 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1386 if ($file_regexp =~ /\.\*/) {
1387 # Only interested in a file_regexp if it specifies precisely one
1388 # file.
1389 # So, skip anything with a .* in it as it is too general
1390 return [$tagname => $attrHash];
1391 }
1392 my $src_file = $parser->{'parameters'}->{'src_file'};
1393 if (!($src_file =~ /$file_regexp/)) {
1394 return [$tagname => $attrHash];
1395 }
1396## print STDERR "*** mxl metamode = $metamode\n";
1397
1398 # Find the right metadata tag and checks if we are going to override it
1399 my $name_attr = $attrHash->{'name'};
1400 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1401 # Get the value and override the current value
1402 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1403 $attrHash->{'_content'} = $metavalue;
1404
1405## print STDERR "**** overrideing metadata.xml\n";
1406
1407 # Don't want it to wipe out any other pieces of metadata
1408 $parser->{'parameters'}->{'metamode'} = "done";
1409 }
1410
1411 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1412 return [$tagname => $attrHash];
1413}
1414
1415
1416sub mxml_description
1417{
1418 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1419 my $metamode = $parser->{'parameters'}->{'metamode'};
1420
1421 # Failed... Report error if we don't see FileName tag before this
1422 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1423
1424 # Don't do anything if we are not in the right FileSet
1425 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1426 if ($file_regexp =~ m/\.\*/) {
1427 # Only interested in a file_regexp if it specifies precisely one
1428 # file.
1429 # So, skip anything with a .* in it as it is too general
1430 return [$tagname => $attrHash];
1431 }
1432 my $src_file = $parser->{'parameters'}->{'src_file'};
1433
1434 if (!($src_file =~ m/$file_regexp/)) {
1435 return [$tagname => $attrHash];
1436 }
1437
1438 # Accumulate the metadata block to the end of the description block
1439 # Note: This adds metadata block to all description blocks, so if there are
1440 # multiple FileSets, it will add to all of them
1441 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1442 # if metamode was "override" but get to here then it failed to
1443 # find an item to override, in which case it should append its
1444 # value to the end, just like the "accumulate" mode
1445
1446 # tack a new metadata tag on to the end of the <Metadata>+ block
1447 my $metaname = $parser->{'parameters'}->{'metaname'};
1448 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1449
1450 my $metadata_attr = { '_content' => $metavalue,
1451 'name' => $metaname,
1452 'mode' => "accumulate" };
1453
1454 my $append_metadata = [ "Metadata" => $metadata_attr ];
1455 my $description_content = $attrHash->{'_content'};
1456
1457## print STDERR "*** appending to metadata.xml\n";
1458
1459 # append the new metadata element to the end of the current
1460 # content contained inside this tag
1461 if (ref($description_content) eq "") {
1462 # => string or numeric literal
1463 # this is caused by a <Description> block has no <Metadata> child elements
1464 # => set up an empty array in '_content'
1465 $attrHash->{'_content'} = [ "\n" ];
1466 $description_content = $attrHash->{'_content'};
1467 }
1468
1469 push(@$description_content," ", $append_metadata ,"\n ");
1470 $parser->{'parameters'}->{'metamode'} = "done";
1471 }
1472
1473 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1474 return [$tagname => $attrHash];
1475}
1476
1477
1478sub mxml_filename
1479{
1480 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1481
1482 # Store the filename of the Current Fileset
1483 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1484 # FileName tag must come before Description tag
1485 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1486
1487 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1488 return [$tagname => $attrHash];
1489}
1490
1491
1492sub mxml_fileset
1493{
1494 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1495
1496 # Initilise the current_file
1497 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1498 # FileName tag must come before Description tag
1499 $parser->{'parameters'}->{'current_file'} = "";
1500
1501 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1502 return [$tagname => $attrHash];
1503}
1504
1505
1506sub edit_metadata_xml
1507{
1508 my $self = shift @_;
1509 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
1510
1511 # Set the call-back functions for the metadata tags
1512 my @rules =
1513 ( _default => 'raw',
1514 'FileName' => \&mxml_filename,
1515 'Metadata' => \&mxml_metadata,
1516 'Description' => \&mxml_description,
1517 'FileSet' => \&mxml_fileset);
1518
1519 # use XML::Rules to add it in (read in and out again)
1520 my $parser = XML::Rules->new(rules => \@rules,
1521 style => 'filter',
1522 output_encoding => 'utf8');
1523
1524 if (!-e $metadata_xml_filename) {
1525
1526 if (open(MOUT,">$metadata_xml_filename")) {
1527
1528 my $src_file_re = &util::filename_to_regex($src_file);
1529 # shouldn't the following also be in the above utility routine??
1530 # $src_file_re =~ s/\./\\./g;
1531
1532 print MOUT "<?xml version=\"1.0\"?>\n";
1533 print MOUT "<DirectoryMetadata>\n";
1534 print MOUT " <FileSet>\n";
1535 print MOUT " <FileName>$src_file_re</FileName>\n";
1536 print MOUT " <Description>\n";
1537 print MOUT " </Description>\n";
1538 print MOUT " </FileSet>\n";
1539 print MOUT "</DirectoryMetadata>\n";
1540
1541 close(MOUT);
1542 }
1543 else {
1544 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1545 }
1546 }
1547
1548
1549 my $xml_in = "";
1550 if (!open(MIN,"<$metadata_xml_filename")) {
1551 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1552 }
1553 else {
1554 # Read them in
1555 my $line;
1556 while (defined ($line=<MIN>)) {
1557 $xml_in .= $line;
1558 }
1559 close(MIN);
1560
1561 # Filter with the call-back functions
1562 my $xml_out = "";
1563
1564 my $MOUT;
1565 if (!open($MOUT,">$metadata_xml_filename")) {
1566 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1567 }
1568 else {
1569 binmode($MOUT,":utf8");
1570
1571 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1572 # At the moment, I will just hack it!
1573 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1574 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1575 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1576 #print MOUT $xml_out;
1577
1578 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1579 metavalue => $metavalue,
1580 metamode => $metamode,
1581 src_file => $src_file,
1582 current_file => undef} );
1583 close($MOUT);
1584 }
1585 }
1586}
1587
1588
1589sub set_import_metadata
1590{
1591 my $self = shift @_;
1592
1593 my $username = $self->{'username'};
1594 my $collect = $self->{'collect'};
1595 my $gsdl_cgi = $self->{'gsdl_cgi'};
1596 my $gsdlhome = $self->{'gsdlhome'};
1597 my $infodbtype = $self->{'infodbtype'};
1598
1599 if ($baseaction::authentication_enabled) {
1600 # Ensure the user is allowed to edit this collection
1601 $self->authenticate_user($username, $collect);
1602 }
1603
1604
1605 # Obtain the collect and archive dir
1606 my $site = $self->{'site'};
1607 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1608
1609 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1610 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1611
1612 # Make sure the collection isn't locked by someone else
1613 $self->lock_collection($username, $collect);
1614
1615 # look up additional args
1616 # want either d= or f=
1617 my $docid = $self->{'d'};
1618 my $import_file = $self->{'f'};
1619 if ((!defined $docid) && (!defined $import_file)) {
1620 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1621 }
1622
1623 # Get the parameters and set default mode to "accumulate"
1624 my $metaname = $self->{'metaname'};
1625 my $metavalue = $self->{'metavalue'};
1626## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1627 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1628
1629 my $metamode = $self->{'metamode'};
1630 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1631 # make "accumulate" the default (less destructive, as won't actually
1632 # delete any existing values)
1633 $metamode = "accumulate";
1634 }
1635
1636 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1637 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1638 my $metadata_xml_file;
1639 my $import_filename = undef;
1640 if (defined $docid) {
1641 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1642 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1643
1644 # This now stores the full pathname
1645 $import_filename = $doc_rec->{'src-file'}->[0];
1646 }
1647 else {
1648 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1649 }
1650
1651 # figure out correct metadata.xml file [?]
1652 # Assuming the metadata.xml file is next to the source file
1653 # Note: This will not work if it is using the inherited metadata from the parent folder
1654 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1655 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1656
1657 # Edit the metadata.xml
1658 # Modified by Jeffrey from DL Consulting
1659 # Handle the case where there is one metadata.xml file for multiple FileSets
1660 # The XML filter needs to know whether it is in the right FileSet
1661 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1662 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1663 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1664 $metaname, $metavalue, $metamode, $import_tailname);
1665
1666 # Release the lock once it is done
1667 $self->unlock_collection($username, $collect);
1668
1669 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1670 $mess .= " $metaname";
1671 $mess .= " = $metavalue";
1672 $mess .= " ($metamode)\n";
1673
1674 $gsdl_cgi->generate_ok_message($mess);
1675
1676}
1677
1678sub set_import_metadata_array
1679{
1680 my $self = shift @_;
1681
1682 my $username = $self->{'username'};
1683 my $collect = $self->{'collect'};
1684 my $gsdl_cgi = $self->{'gsdl_cgi'};
1685 my $gsdlhome = $self->{'gsdlhome'};
1686
1687 if ($baseaction::authentication_enabled) {
1688 # Ensure the user is allowed to edit this collection
1689 &authenticate_user($gsdl_cgi, $username, $collect);
1690 }
1691
1692 my $site = $self->{'site'};
1693 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1694
1695 $gsdl_cgi->checked_chdir($collect_dir);
1696
1697 # Make sure the collection isn't locked by someone else
1698 $self->lock_collection($username, $collect);
1699
1700 # look up additional args
1701
1702 my $infodbtype = $self->{'infodbtype'};
1703
1704 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1705
1706 my $json_str = $self->{'json'};
1707 my $doc_array = decode_json $json_str;
1708
1709 my $global_status = 0;
1710 my $global_mess = "";
1711
1712 my @all_docids = ();
1713
1714 foreach my $doc_array_rec ( @$doc_array )
1715 {
1716 my $docid = $doc_array_rec->{'docid'};
1717 my $metaname = $doc_array_rec->{'metaname'};
1718 my $metamode = $self->{'metamode'};
1719 my $metavalue = $doc_array_rec->{'metavalue'};
1720
1721 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1722 # make "accumulate" the default (less destructive, as won't actually
1723 # delete any existing values)
1724 $metamode = "accumulate";
1725 }
1726
1727 push(@all_docids,$docid);
1728
1729 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1730 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1731 my $metadata_xml_file;
1732 my $import_filename = undef;
1733
1734 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1735 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1736
1737 # This now stores the full pathname
1738 $import_filename = $doc_rec->{'src-file'}->[0];
1739
1740 # figure out correct metadata.xml file [?]
1741 # Assuming the metadata.xml file is next to the source file
1742 # Note: This will not work if it is using the inherited metadata from the parent folder
1743 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1744 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1745
1746 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1747 }
1748
1749 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1750 $gsdl_cgi->generate_ok_message($mess);
1751
1752 # Release the lock once it is done
1753 $self->unlock_collection($username, $collect);
1754}
1755
1756sub remove_import_metadata
1757{
1758 my $self = shift @_;
1759
1760 my $username = $self->{'username'};
1761 my $collect = $self->{'collect'};
1762 my $gsdl_cgi = $self->{'gsdl_cgi'};
1763
1764 if ($baseaction::authentication_enabled) {
1765 # Ensure the user is allowed to edit this collection
1766 &authenticate_user($gsdl_cgi, $username, $collect);
1767 }
1768
1769 my $gsdlhome = $self->{'gsdlhome'};
1770 my $infodbtype = $self->{'infodbtype'};
1771
1772 # Obtain the collect dir
1773 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1774 my $site = $self->{'site'};
1775 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1776
1777 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1778 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1779
1780 # Make sure the collection isn't locked by someone else
1781 $self->lock_collection($username, $collect);
1782
1783 # look up additional args
1784 my $docid = $self->{'d'};
1785 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1786 {
1787 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
1788 }
1789
1790 my $metaname = $self->{'metaname'};
1791 my $metavalue = $self->{'metavalue'};
1792 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1793
1794 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1795 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1796 my $metadata_xml_file;
1797 my $import_filename = undef;
1798 if (defined $docid)
1799 {
1800 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1801 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1802
1803 # This now stores the full pathname
1804 $import_filename = $doc_rec->{'src-file'}->[0];
1805 }
1806
1807 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1808 {
1809 $gsdl_cgi->generate_error_message("There is no metadata\n");
1810 }
1811
1812 # figure out correct metadata.xml file [?]
1813 # Assuming the metadata.xml file is next to the source file
1814 # Note: This will not work if it is using the inherited metadata from the parent folder
1815 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1816 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1817
1818 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1819
1820 # Release the lock once it is done
1821 $self->unlock_collection($username, $collect);
1822
1823 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1824 $mess .= " $metaname";
1825 $mess .= " = $metavalue\n";
1826
1827 $gsdl_cgi->generate_ok_message($mess);
1828}
1829
1830sub remove_from_metadata_xml
1831{
1832 my $self = shift @_;
1833 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
1834
1835 # Set the call-back functions for the metadata tags
1836 my @rules =
1837 (
1838 _default => 'raw',
1839 'Metadata' => \&rfmxml_metadata,
1840 'FileName' => \&mxml_filename
1841 );
1842
1843 my $parser = XML::Rules->new
1844 (
1845 rules => \@rules,
1846 style => 'filter',
1847 output_encoding => 'utf8'
1848 );
1849
1850 my $xml_in = "";
1851 if (!open(MIN,"<$metadata_xml_filename"))
1852 {
1853 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1854 }
1855 else
1856 {
1857 # Read them in
1858 my $line;
1859 while (defined ($line=<MIN>)) {
1860 $xml_in .= $line;
1861 }
1862 close(MIN);
1863
1864 # Filter with the call-back functions
1865 my $xml_out = "";
1866
1867 my $MOUT;
1868 if (!open($MOUT,">$metadata_xml_filename")) {
1869 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1870 }
1871 else {
1872 binmode($MOUT,":utf8");
1873 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
1874 close($MOUT);
1875 }
1876 }
1877}
1878
1879sub rfmxml_metadata
1880{
1881 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1882
1883 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
1884 {
1885 return [];
1886 }
1887
1888 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1889 return [$tagname => $attrHash];
1890}
1891
1892sub remove_live_metadata
1893{
1894 my $self = shift @_;
1895
1896 my $username = $self->{'username'};
1897 my $collect = $self->{'collect'};
1898 my $gsdl_cgi = $self->{'gsdl_cgi'};
1899 my $gsdlhome = $self->{'gsdlhome'};
1900 my $infodbtype = $self->{'infodbtype'};
1901
1902 if ($baseaction::authentication_enabled) {
1903 # Ensure the user is allowed to edit this collection
1904 &authenticate_user($gsdl_cgi, $username, $collect);
1905 }
1906
1907 # Obtain the collect dir
1908 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1909 my $site = $self->{'site'};
1910 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1911
1912 # Make sure the collection isn't locked by someone else
1913 $self->lock_collection($username, $collect);
1914
1915 # look up additional args
1916 my $docid = $self->{'d'};
1917 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1918 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1919 }
1920
1921 # Generate the dbkey
1922 my $metaname = $self->{'metaname'};
1923 my $dbkey = "$docid.$metaname";
1924
1925 # To people who know $collect_tail please add some comments
1926 # Obtain the live gdbm_db path
1927 my $collect_tail = $collect;
1928 $collect_tail =~ s/^.*[\/\\]//;
1929 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1930 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1931
1932 # Remove the key
1933 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
1934 my $status = system($cmd);
1935 if ($status != 0) {
1936 # Catch error if gdbmdel failed
1937 my $mess = "Failed to set metadata key: $dbkey\n";
1938
1939 $mess .= "PATH: $ENV{'PATH'}\n";
1940 $mess .= "cmd = $cmd\n";
1941 $mess .= "Exit status: $status\n";
1942 $mess .= "System Error Message: $!\n";
1943
1944 $gsdl_cgi->generate_error($mess);
1945 }
1946 else {
1947 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1948 }
1949
1950 # Release the lock once it is done
1951 $self->unlock_collection($username, $collect);
1952}
1953
1954
1955sub remove_metadata
1956{
1957 my $self = shift @_;
1958
1959 my $username = $self->{'username'};
1960 my $collect = $self->{'collect'};
1961 my $gsdl_cgi = $self->{'gsdl_cgi'};
1962 my $gsdlhome = $self->{'gsdlhome'};
1963 my $infodbtype = $self->{'infodbtype'};
1964
1965 if ($baseaction::authentication_enabled) {
1966 # Ensure the user is allowed to edit this collection
1967 &authenticate_user($gsdl_cgi, $username, $collect);
1968 }
1969
1970 # Obtain the collect dir
1971 my $site = $self->{'site'};
1972 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1973 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1974
1975 # Make sure the collection isn't locked by someone else
1976 $self->lock_collection($username, $collect);
1977
1978 # look up additional args
1979 my $docid = $self->{'d'};
1980 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1981 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1982 }
1983 my $metaname = $self->{'metaname'};
1984 my $metapos = $self->{'metapos'};
1985 my $metavalue = $self->{'metavalue'};
1986
1987 # To people who know $collect_tail please add some comments
1988 # Obtain the path to the database
1989 my $collect_tail = $collect;
1990 $collect_tail =~ s/^.*[\/\\]//;
1991 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1992 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1993
1994 # Read the docid entry
1995 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1996
1997 # Basically loop through and unescape_html the values
1998 foreach my $k (keys %$doc_rec) {
1999 my @escaped_v = ();
2000 foreach my $v (@{$doc_rec->{$k}}) {
2001 if ($k eq "contains") {
2002 # protect quotes in ".2;".3 etc
2003 $v =~ s/\"/\\\"/g;
2004 push(@escaped_v, $v);
2005 }
2006 else {
2007 my $ev = &ghtml::unescape_html($v);
2008 $ev =~ s/\"/\\\"/g;
2009 push(@escaped_v, $ev);
2010 }
2011 }
2012 $doc_rec->{$k} = \@escaped_v;
2013 }
2014
2015 # Check to make sure the key does exist
2016 if (!defined ($doc_rec->{$metaname})) {
2017 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2018 }
2019
2020 # Obtain the specified metadata pos
2021 $metapos = 0 if (!defined $metapos);
2022
2023 # consider check key is defined before deleting?
2024 # Loop through the metadata array and ignore the specified position
2025 my $filtered_metadata = [];
2026 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2027 for (my $i=0; $i<$num_metadata_vals; $i++) {
2028 my $metaval = shift(@{$doc_rec->{$metaname}});
2029
2030 if (!defined $metavalue && $i != $metapos) {
2031 push(@$filtered_metadata,$metaval)
2032 }
2033
2034 if(defined $metavalue && !($metavalue eq $metaval))
2035 {
2036 push(@$filtered_metadata,$metavalue)
2037 }
2038 }
2039 $doc_rec->{$metaname} = $filtered_metadata;
2040
2041 # Turn the record back to string
2042 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2043
2044 # Store it back to the database
2045 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2046 my $status = system($cmd);
2047 if ($status != 0) {
2048 my $mess = "Failed to set metadata key: $docid\n";
2049
2050 $mess .= "PATH: $ENV{'PATH'}\n";
2051 $mess .= "cmd = $cmd\n";
2052 $mess .= "Exit status: $status\n";
2053 $mess .= "System Error Message: $!\n";
2054
2055 $gsdl_cgi->generate_error($mess);
2056 }
2057 else {
2058 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2059 $mess .= " $metaname";
2060 $mess .= "->[$metapos]" if (defined $metapos);
2061
2062 $gsdl_cgi->generate_ok_message($mess);
2063 }
2064
2065 # Release the lock once it is done
2066 $self->unlock_collection($username, $collect);
2067}
2068
2069
2070# Was trying to reused the codes, but the functions need to be broken
2071# down more before they can be reused, otherwise there will be too
2072# much overhead and duplicate process...
2073sub insert_metadata
2074{
2075 my $self = shift @_;
2076
2077 my $username = $self->{'username'};
2078 my $collect = $self->{'collect'};
2079 my $gsdl_cgi = $self->{'gsdl_cgi'};
2080 my $gsdlhome = $self->{'gsdlhome'};
2081 my $infodbtype = $self->{'infodbtype'};
2082
2083 # If the import metadata and gdbm database have been updated, we
2084 # need to insert some notification to warn user that the the text
2085 # they see at the moment is not indexed and require a rebuild.
2086 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2087
2088 if ($baseaction::authentication_enabled) {
2089 # Ensure the user is allowed to edit this collection
2090 $self->authenticate_user($username, $collect);
2091 }
2092
2093 # Obtain the collect and archive dir
2094 my $site = $self->{'site'};
2095 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2096 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2097 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2098
2099 # Make sure the collection isn't locked by someone else
2100 $self->lock_collection($username, $collect);
2101
2102 # Check additional args
2103 my $docid = $self->{'d'};
2104 if (!defined($docid)) {
2105 $gsdl_cgi->generate_error("No document id is specified: d=...");
2106 }
2107 my $metaname = $self->{'metaname'};
2108 if (!defined($metaname)) {
2109 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2110 }
2111 my $metavalue = $self->{'metavalue'};
2112 if (!defined($metavalue) || $metavalue eq "") {
2113 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2114 }
2115 # make "accumulate" the default (less destructive, as won't actually
2116 # delete any existing values)
2117 my $metamode = "accumulate";
2118
2119 #=======================================================================#
2120 # set_import_metadata [START]
2121 #=======================================================================#
2122 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2123 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2124 my $metadata_xml_file;
2125 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2126 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2127
2128 # This now stores the full pathname
2129 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2130
2131 # figure out correct metadata.xml file [?]
2132 # Assuming the metadata.xml file is next to the source file
2133 # Note: This will not work if it is using the inherited metadata from the parent folder
2134 my ($import_tailname, $import_dirname)
2135 = File::Basename::fileparse($import_filename);
2136 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2137
2138 # Shane's escape characters
2139 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2140 $metavalue =~ s/\,/&#44;/g;
2141 $metavalue =~ s/\:/&#58;/g;
2142 $metavalue =~ s/\|/&#124;/g;
2143 $metavalue =~ s/\(/&#40;/g;
2144 $metavalue =~ s/\)/&#41;/g;
2145 $metavalue =~ s/\[/&#91;/g;
2146 $metavalue =~ s/\\/&#92;/g;
2147 $metavalue =~ s/\]/&#93;/g;
2148 $metavalue =~ s/\{/&#123;/g;
2149 $metavalue =~ s/\}/&#125;/g;
2150 $metavalue =~ s/\"/&#34;/g;
2151 $metavalue =~ s/\`/&#96;/g;
2152 $metavalue =~ s/\n/_newline_/g;
2153
2154 # Edit the metadata.xml
2155 # Modified by Jeffrey from DL Consulting
2156 # Handle the case where there is one metadata.xml file for multiple FileSets
2157 # The XML filter needs to know whether it is in the right FileSet
2158 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2159 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2160 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2161 $metaname, $metavalue, $metamode, $import_tailname);
2162 #=======================================================================#
2163 # set_import_metadata [END]
2164 #=======================================================================#
2165
2166
2167 #=======================================================================#
2168 # set_metadata (accumulate version) [START]
2169 #=======================================================================#
2170 # To people who know $collect_tail please add some comments
2171 # Obtain path to the database
2172 my $collect_tail = $collect;
2173 $collect_tail =~ s/^.*[\/\\]//;
2174 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2175 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2176
2177 # Read the docid entry
2178 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2179
2180 foreach my $k (keys %$doc_rec) {
2181 my @escaped_v = ();
2182 foreach my $v (@{$doc_rec->{$k}}) {
2183 if ($k eq "contains") {
2184 # protect quotes in ".2;".3 etc
2185 $v =~ s/\"/\\\"/g;
2186 push(@escaped_v, $v);
2187 }
2188 else {
2189 my $ev = &ghtml::unescape_html($v);
2190 $ev =~ s/\"/\\\"/g;
2191 push(@escaped_v, $ev);
2192 }
2193 }
2194 $doc_rec->{$k} = \@escaped_v;
2195 }
2196
2197 # Protect the quotes
2198 $metavalue =~ s/\"/\\\"/g;
2199
2200 # Adds the pending macro
2201 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2202
2203 # If the metadata doesn't exist, create a new one
2204 if (!defined($doc_rec->{$metaname})){
2205 $doc_rec->{$metaname} = [ $macro_metavalue ];
2206 }
2207 # Else, let's acculumate the values
2208 else {
2209 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2210 }
2211
2212 # Generate the record string
2213 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2214
2215 # Store it into GDBM
2216 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2217 my $status = system($cmd);
2218 if ($status != 0) {
2219 # Catch error if gdbmget failed
2220 my $mess = "Failed to set metadata key: $docid\n";
2221
2222 $mess .= "PATH: $ENV{'PATH'}\n";
2223 $mess .= "cmd = $cmd\n";
2224 $mess .= "Exit status: $status\n";
2225 $mess .= "System Error Message: $!\n";
2226
2227 $gsdl_cgi->generate_error($mess);
2228 }
2229 else {
2230 my $mess = "insert-metadata successful: Key[$docid]\n";
2231 $mess .= " [In metadata.xml] $metaname";
2232 $mess .= " = $metavalue\n";
2233 $mess .= " [In database] $metaname";
2234 $mess .= " = $macro_metavalue\n";
2235 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2236 $gsdl_cgi->generate_ok_message($mess);
2237 }
2238 #=======================================================================#
2239 # set_metadata (accumulate version) [END]
2240 #=======================================================================#
2241
2242 # Release the lock once it is done
2243 $self->unlock_collection($username, $collect);
2244}
2245
22461;
Note: See TracBrowser for help on using the repository browser.