source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 25891

Last change on this file since 25891 was 25891, checked in by sjm84, 12 years ago

Fixed a bug that wiped out doc.xml files if metadata was changed at the root level

File size: 67.3 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-live-metadata" => {
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ ] },
63
64 #SET METHODS
65 "set-live-metadata" => {
66 'compulsory-args' => [ "d", "metaname", "metavalue" ],
67 'optional-args' => [ ] },
68
69 "set-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ "metapos" ] },
72
73 "set-archives-metadata" => {
74 'compulsory-args' => [ "d", "metaname", "metavalue" ],
75 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
76
77 "set-import-metadata" => {
78 'compulsory-args' => [ "metaname", "metavalue" ],
79 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
80
81 #SET METHODS (ARRAY)
82 "set-metadata-array" => {
83 'compulsory-args' => [ "json" ],
84 'optional-args' => [ ] },
85
86 "set-archives-metadata-array" => {
87 'compulsory-args' => [ "json" ],
88 'optional-args' => [ ] },
89
90 "set-import-metadata-array" => {
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ ] },
93
94 #REMOVE METHODS
95 "remove-import-metadata" => {
96 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
97 'optional-args' => [ ] },
98
99 "remove-archives-metadata" => {
100 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
101 'optional-args' => [ "metapos", "metavalue" ] },
102
103 "remove-live-metadata" => {
104 'compulsory-args' => [ "d", "metaname" ],
105 'optional-args' => [ ] },
106
107 "remove-metadata" => {
108 'compulsory-args' => [ "d", "metaname" ],
109 'optional-args' => [ "metapos", "metavalue" ] },
110
111 #INSERT METHODS
112 "insert-metadata" => {
113 'compulsory-args' => [ "d", "metaname", "metavalue" ],
114 'optional-args' => [ ] }
115};
116
117
118sub new
119{
120 my $class = shift (@_);
121 my ($gsdl_cgi,$iis6_mode) = @_;
122
123 # Treat metavalue specially. To transmit this through a GET request
124 # the Javascript side has url-encoded it, so here we need to decode
125 # it before proceeding
126
127 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
128 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
129 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
130
131 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
132 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
133
134 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
135
136 return bless $self, $class;
137}
138
139
140sub get_live_metadata
141{
142 my $self = shift @_;
143
144 my $username = $self->{'username'};
145 my $collect = $self->{'collect'};
146 my $gsdl_cgi = $self->{'gsdl_cgi'};
147 my $gsdlhome = $self->{'gsdlhome'};
148 my $infodbtype = $self->{'infodbtype'};
149
150 # live metadata gets/saves value scoped (prefixed) by the current usename
151 # so (for now) let's not bother to enforce authentication
152
153 # Obtain the collect dir
154 my $site = $self->{'site'};
155 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
156 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
157
158 # Make sure the collection isn't locked by someone else
159 $self->lock_collection($username, $collect);
160
161 # look up additional args
162 my $docid = $self->{'d'};
163 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
164 $gsdl_cgi->generate_error("No docid (d=...) specified.");
165 }
166
167 # Generate the dbkey
168 my $metaname = $self->{'metaname'};
169 my $dbkey = "$docid.$metaname";
170
171 # To people who know $collect_tail please add some comments
172 # Obtain path to the database
173 my $collect_tail = $collect;
174 $collect_tail =~ s/^.*[\/|\\]//;
175 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
176 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
177
178 # Obtain the content of the key
179 my $cmd = "gdbmget $infodb_file_path $dbkey";
180 if (open(GIN,"$cmd |") == 0) {
181 # Catch error if gdbmget failed
182 my $mess = "Failed to get metadata key: $metaname\n";
183 $mess .= "$!\n";
184
185 $gsdl_cgi->generate_error($mess);
186 }
187 else {
188 binmode(GIN,":utf8");
189 # Read everything in and concatenate them into $metavalue
190 my $metavalue = "";
191 my $line;
192 while (defined ($line=<GIN>)) {
193 $metavalue .= $line;
194 }
195 close(GIN);
196 chomp($metavalue); # Get rid off the tailing newlines
197 $gsdl_cgi->generate_ok_message("$metavalue");
198 }
199
200 # Release the lock once it is done
201 $self->unlock_collection($username, $collect);
202}
203
204
205sub get_metadata
206{
207 my $self = shift @_;
208
209 my $username = $self->{'username'};
210 my $collect = $self->{'collect'};
211 my $gsdl_cgi = $self->{'gsdl_cgi'};
212 my $gsdlhome = $self->{'gsdlhome'};
213
214 # Authenticate user if it is enabled
215 if ($baseaction::authentication_enabled) {
216 # Ensure the user is allowed to edit this collection
217 &authenticate_user($gsdl_cgi, $username, $collect);
218 }
219
220 # Obtain the collect dir
221 my $site = $self->{'site'};
222 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
223 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
224
225 # Make sure the collection isn't locked by someone else
226 $self->lock_collection($username, $collect);
227
228 # look up additional args
229 my $docid = $self->{'d'};
230 my $metaname = $self->{'metaname'};
231 my $metapos = $self->{'metapos'};
232 my $infodbtype = $self->{'infodbtype'};
233
234 # To people who know $collect_tail please add some comments
235 # Obtain path to the database
236 my $collect_tail = $collect;
237 $collect_tail =~ s/^.*[\/\\]//;
238 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
239 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
240
241 # Read the docid entry
242 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
243
244 # Basically loop through and unescape_html the values
245 foreach my $k (keys %$doc_rec) {
246 my @escaped_v = ();
247 foreach my $v (@{$doc_rec->{$k}}) {
248 my $ev = &ghtml::unescape_html($v);
249 push(@escaped_v, $ev);
250 }
251 $doc_rec->{$k} = \@escaped_v;
252 }
253
254 # Obtain the specified metadata value
255 $metapos = 0 if (!defined $metapos);
256 my $metavalue = $doc_rec->{$metaname}->[$metapos];
257 $gsdl_cgi->generate_ok_message("$metavalue");
258
259 # Release the lock once it is done
260 $self->unlock_collection($username, $collect);
261}
262
263
264sub get_import_metadata
265{
266 my $self = shift @_;
267
268 my $username = $self->{'username'};
269 my $collect = $self->{'collect'};
270 my $gsdl_cgi = $self->{'gsdl_cgi'};
271 my $gsdlhome = $self->{'gsdlhome'};
272
273 # Authenticate user if it is enabled
274 if ($baseaction::authentication_enabled) {
275 # Ensure the user is allowed to edit this collection
276 &authenticate_user($gsdl_cgi, $username, $collect);
277 }
278
279 # Obtain the collect dir
280 my $site = $self->{'site'};
281 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
282 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
283
284 # Make sure the collection isn't locked by someone else
285 $self->lock_collection($username, $collect);
286
287 # look up additional args
288 my $docid = $self->{'d'};
289 my $metaname = $self->{'metaname'};
290 my $infodbtype = $self->{'infodbtype'};
291 if (!defined $docid)
292 {
293 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
294 }
295
296 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
297 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
298 my $metadata_xml_file;
299 my $import_filename = undef;
300
301
302 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
303 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
304 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
305
306 # This now stores the full pathname
307 $import_filename = $doc_rec->{'src-file'}->[0];
308
309 # figure out correct metadata.xml file [?]
310 # Assuming the metadata.xml file is next to the source file
311 # Note: This will not work if it is using the inherited metadata from the parent folder
312 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
313 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
314
315 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
316
317 # Release the lock once it is done
318 $self->unlock_collection($username, $collect);
319}
320
321sub get_metadata_from_metadata_xml
322{
323 my $self = shift @_;
324 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
325
326 my @rules =
327 (
328 _default => 'raw',
329 'Metadata' => \&gfmxml_metadata,
330 'FileName' => \&mxml_filename
331 );
332
333 my $parser = XML::Rules->new
334 (
335 rules => \@rules,
336 output_encoding => 'utf8'
337 );
338
339 my $xml_in = "";
340 if (!open(MIN,"<$metadata_xml_filename"))
341 {
342 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
343 }
344 else
345 {
346 # Read them in
347 my $line;
348 while (defined ($line=<MIN>)) {
349 $xml_in .= $line;
350 }
351 close(MIN);
352
353 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
354
355 if(defined $parser->{'pad'}->{'metavalue'})
356 {
357 return $parser->{'pad'}->{'metavalue'};
358 }
359 else
360 {
361 return "";
362 }
363 }
364}
365
366sub gfmxml_metadata
367{
368 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
369
370 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
371 {
372 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
373 }
374}
375
376sub get_archives_metadata
377{
378 my $self = shift @_;
379
380 my $username = $self->{'username'};
381 my $collect = $self->{'collect'};
382 my $gsdl_cgi = $self->{'gsdl_cgi'};
383 my $gsdlhome = $self->{'gsdlhome'};
384 my $infodbtype = $self->{'infodbtype'};
385
386 # Authenticate user if it is enabled
387 if ($baseaction::authentication_enabled) {
388 # Ensure the user is allowed to edit this collection
389 &authenticate_user($gsdl_cgi, $username, $collect);
390 }
391
392 # Obtain the collect dir
393 my $site = $self->{'site'};
394 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
395
396 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
397
398 # Make sure the collection isn't locked by someone else
399 $self->lock_collection($username, $collect);
400
401 # look up additional args
402 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
403 $docid_secnum = "" if (!defined $docid_secnum);
404
405 my $metaname = $self->{'metaname'};
406 my $metapos = $self->{'metapos'};
407 $metapos = 0 if (!defined $metapos);
408
409 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
410 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
411
412 # This now stores the full pathname
413 my $doc_filename = $doc_rec->{'doc-file'}->[0];
414
415 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
416
417 # Release the lock once it is done
418 $self->unlock_collection($username, $collect);
419}
420
421sub get_metadata_from_archive_xml
422{
423 my $self = shift @_;
424 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
425
426 my @start_rules = ('Section' => \&dxml_start_section);
427
428 my @rules =
429 (
430 _default => 'raw',
431 'Metadata' => \&gfdxml_metadata
432 );
433
434 my $parser = XML::Rules->new
435 (
436 start_rules => \@start_rules,
437 rules => \@rules,
438 output_encoding => 'utf8'
439 );
440
441 my $xml_in = "";
442 if (!open(MIN,"<$doc_xml_filename"))
443 {
444 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
445 }
446 else
447 {
448 # Read them in
449 my $line;
450 while (defined ($line=<MIN>)) {
451 $xml_in .= $line;
452 }
453 close(MIN);
454
455 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
456
457 if(defined $parser->{'pad'}->{'metavalue'})
458 {
459 return $parser->{'pad'}->{'metavalue'};
460 }
461 else
462 {
463 return "";
464 }
465 }
466}
467
468sub gfdxml_metadata
469{
470 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
471
472 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
473 {
474 return;
475 }
476
477 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
478 {
479 if (!defined $parser->{'parameters'}->{'poscount'})
480 {
481 $parser->{'parameters'}->{'poscount'} = 0;
482 }
483 else
484 {
485 $parser->{'parameters'}->{'poscount'}++;
486 }
487 }
488
489 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
490 {
491 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
492 }
493}
494
495sub set_live_metadata
496{
497 my $self = shift @_;
498
499 my $username = $self->{'username'};
500 my $collect = $self->{'collect'};
501 my $gsdl_cgi = $self->{'gsdl_cgi'};
502 my $gsdlhome = $self->{'gsdlhome'};
503 my $infodbtype = $self->{'infodbtype'};
504
505 if ($baseaction::authentication_enabled) {
506 # Ensure the user is allowed to edit this collection
507 &authenticate_user($gsdl_cgi, $username, $collect);
508 }
509
510 # Obtain the collect dir
511 my $site = $self->{'site'};
512 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
513 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
514
515 # Make sure the collection isn't locked by someone else
516 $self->lock_collection($username, $collect);
517
518 # look up additional args
519 my $docid = $self->{'d'};
520 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
521 $gsdl_cgi->generate_error("No docid (d=...) specified.");
522 }
523 my $metavalue = $self->{'metavalue'};
524
525
526 # Generate the dbkey
527 my $metaname = $self->{'metaname'};
528 my $dbkey = "$docid.$metaname";
529
530 # To people who know $collect_tail please add some comments
531 # Obtain path to the database
532 my $collect_tail = $collect;
533 $collect_tail =~ s/^.*[\/\\]//;
534 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
535 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
536
537 # Set the new value
538 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
539 my $status = system($cmd);
540 if ($status != 0) {
541 # Catch error if gdbmget failed
542 my $mess = "Failed to set metadata key: $dbkey\n";
543
544 $mess .= "PATH: $ENV{'PATH'}\n";
545 $mess .= "cmd = $cmd\n";
546 $mess .= "Exit status: $status\n";
547 $mess .= "System Error Message: $!\n";
548
549 $gsdl_cgi->generate_error($mess);
550 }
551 else {
552 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
553 }
554
555 # Release the lock once it is done
556 $self->unlock_collection($username, $collect);
557}
558
559sub set_metadata_entry
560{
561 my $self = shift @_;
562 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
563
564 # To people who know $collect_tail please add some comments
565 # Obtain path to the database
566 my $collect_tail = $collect;
567 $collect_tail =~ s/^.*[\/\\]//;
568 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
569 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
570
571# print STDERR "**** infodb file path = $infodb_file_path\n";
572# print STDERR "***** infodb type = $infodbtype\n";
573
574 # Read the docid entry
575 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
576
577 # Set the metadata value
578 if (defined $metapos) {
579 $doc_rec->{$metaname}->[$metapos] = $metavalue;
580 }
581 else {
582 $doc_rec->{$metaname} = [ $metavalue ];
583 }
584
585 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
586
587 return $status;
588
589}
590
591sub set_metadata
592{
593 my $self = shift @_;
594
595 my $username = $self->{'username'};
596 my $collect = $self->{'collect'};
597 my $gsdl_cgi = $self->{'gsdl_cgi'};
598 my $gsdlhome = $self->{'gsdlhome'};
599
600 if ($baseaction::authentication_enabled) {
601 # Ensure the user is allowed to edit this collection
602 &authenticate_user($gsdl_cgi, $username, $collect);
603 }
604
605 my $site = $self->{'site'};
606 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
607
608 $gsdl_cgi->checked_chdir($collect_dir);
609
610 # Obtain the collect dir
611 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
612
613 # Make sure the collection isn't locked by someone else
614 $self->lock_collection($username, $collect);
615
616 # look up additional args
617 my $docid = $self->{'d'};
618 my $metaname = $self->{'metaname'};
619 my $metapos = $self->{'metapos'};
620 my $metavalue = $self->{'metavalue'};
621 my $infodbtype = $self->{'infodbtype'};
622
623 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
624
625 if ($status != 0) {
626 # Catch error if set infodb entry failed
627 my $mess = "Failed to set metadata key: $docid\n";
628
629 $mess .= "PATH: $ENV{'PATH'}\n";
630 $mess .= "Exit status: $status\n";
631 $mess .= "System Error Message: $!\n";
632
633 $gsdl_cgi->generate_error($mess);
634 }
635 else {
636 my $mess = "set-metadata successful: Key[$docid]\n";
637 $mess .= " $metaname";
638 $mess .= "->[$metapos]" if (defined $metapos);
639 $mess .= " = $metavalue";
640
641 $gsdl_cgi->generate_ok_message($mess);
642 }
643
644 # Release the lock once it is done
645 $self->unlock_collection($username, $collect);
646}
647
648
649sub set_metadata_array
650{
651 my $self = shift @_;
652
653 my $username = $self->{'username'};
654 my $collect = $self->{'collect'};
655 my $gsdl_cgi = $self->{'gsdl_cgi'};
656 my $gsdlhome = $self->{'gsdlhome'};
657
658 if ($baseaction::authentication_enabled) {
659 # Ensure the user is allowed to edit this collection
660 &authenticate_user($gsdl_cgi, $username, $collect);
661 }
662
663 my $site = $self->{'site'};
664 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
665
666 $gsdl_cgi->checked_chdir($collect_dir);
667
668 # Obtain the collect dir
669 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
670
671 # Make sure the collection isn't locked by someone else
672 $self->lock_collection($username, $collect);
673
674 # look up additional args
675
676 my $infodbtype = $self->{'infodbtype'};
677
678 my $json_str = $self->{'json'};
679 my $doc_array = decode_json $json_str;
680
681
682 my $global_status = 0;
683 my $global_mess = "";
684
685 my @all_docids = ();
686
687 foreach my $doc_array_rec ( @$doc_array ) {
688
689 my $docid = $doc_array_rec->{'docid'};
690 my $metaname = $doc_array_rec->{'metaname'};
691 my $metapos = $doc_array_rec->{'metapos'};
692 my $metavalue = $doc_array_rec->{'metavalue'};
693
694 push(@all_docids,$docid);
695
696 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
697
698 if ($status != 0) {
699 # Catch error if set infodb entry failed
700 $global_status = $status;
701 $global_mess .= "Failed to set metadata key: $docid\n";
702 $global_mess .= "Exit status: $status\n";
703 $global_mess .= "System Error Message: $!\n";
704 $global_mess .= "-" x 20;
705 }
706 }
707
708 if ($global_status != 0) {
709 $global_mess .= "PATH: $ENV{'PATH'}\n";
710 $gsdl_cgi->generate_error($global_mess);
711 }
712 else {
713 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
714 $gsdl_cgi->generate_ok_message($mess);
715 }
716
717 # Release the lock once it is done
718 $self->unlock_collection($username, $collect);
719}
720
721
722sub dxml_metadata
723{
724 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
725 my $metaname = $parser->{'parameters'}->{'metaname'};
726 my $metamode = $parser->{'parameters'}->{'metamode'};
727
728 print STDERR "**** IN METADATA \n";
729
730 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
731
732 # Find the right metadata tag and checks if we are going to
733 # override it
734 #
735 # Note: This over writes the first metadata block it
736 # encountered. If there are multiple Sections in the doc.xml, it
737 # might not behave as you would expect
738
739 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
740## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
741## print STDERR "**** metamode = $metamode\n";
742
743 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
744 {
745 my $name_attr = $attrHash->{'name'};
746 if (($name_attr eq $metaname) && ($metamode eq "override"))
747 {
748 if (!defined $parser->{'parameters'}->{'poscount'})
749 {
750 $parser->{'parameters'}->{'poscount'} = 0;
751 }
752 else
753 {
754 $parser->{'parameters'}->{'poscount'}++;
755 }
756
757 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
758 {
759 ##print STDERR "**** got match!!\n";
760 # Get the value and override the current value
761 my $metavalue = $parser->{'parameters'}->{'metavalue'};
762 $attrHash->{'_content'} = $metavalue;
763
764 # Don't want it to wipe out any other pieces of metadata
765 $parser->{'parameters'}->{'metamode'} = "done";
766 }
767 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
768 {
769 my $metavalue = $parser->{'parameters'}->{'metavalue'};
770 $attrHash->{'_content'} = $metavalue;
771 $parser->{'parameters'}->{'metamode'} = "done";
772 }
773 }
774 }
775
776 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
777 return [$tagname => $attrHash];
778}
779
780
781sub dxml_description
782{
783 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
784 my $metamode = $parser->{'parameters'}->{'metamode'};
785
786 print STDERR "**** IN DESCRIPTION! \n";
787
788 # Accumulate the metadata
789 # NOTE: This appends new metadata element to all description fields.
790 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
791 if (($metamode eq "accumulate")) {
792 print STDERR "**** ACCUMULATE! \n";
793 # If get to here and metamode is override, the this means there
794 # was no existing value to overide => treat as an append operation
795
796 # Tack a new metadata tag on to the end of the <Metadata>+ block
797 my $metaname = $parser->{'parameters'}->{'metaname'};
798 my $metavalue = $parser->{'parameters'}->{'metavalue'};
799
800 my $metadata_attr = {
801 '_content' => $metavalue,
802 'name' => $metaname,
803 'mode' => "accumulate"
804 };
805
806 my $append_metadata = [ "Metadata" => $metadata_attr ];
807 my $description_content = $attrHash->{'_content'};
808
809 print STDERR "**** appending to doc.xml\n";
810
811 if (ref($description_content)) {
812 # got some existing interesting nested content
813 push(@$description_content, " ", $append_metadata ,"\n ");
814 }
815 else {
816 #description_content is most likely a string such as "\n"
817 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
818 }
819
820 $parser->{'parameters'}->{'metamode'} = "done";
821 }
822 else {
823 print STDERR "**** NOT ACCUMULATE?!? \n";
824 }
825
826 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
827 return [$tagname => $attrHash];
828}
829
830
831sub dxml_start_section
832{
833 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
834
835 my $new_depth = scalar(@$contextArray);
836
837 print STDERR "**** START SECTION \n";
838
839 if ($new_depth == 1) {
840 $parser->{'parameters'}->{'curr_section_depth'} = 1;
841 $parser->{'parameters'}->{'curr_section_num'} = "";
842 }
843
844 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
845 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
846
847 my $new_secnum;
848
849 if ($new_depth > $old_depth) {
850 # child subsection
851 $new_secnum = "$old_secnum.1";
852 }
853 elsif ($new_depth == $old_depth) {
854 # sibling section => increase it's value by 1
855 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
856 $tail_num++;
857 $new_secnum = $old_secnum;
858 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
859 }
860 else {
861 # back up to parent section => lopp off tail
862 $new_secnum = $old_secnum;
863 $new_secnum =~ s/\.\d+$//;
864 }
865
866 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
867 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
868
869 1;
870}
871
872sub edit_xml_file
873{
874 my $self = shift @_;
875 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
876
877 # use XML::Rules to add it in (read in and out again)
878 my $parser = XML::Rules->new(start_rules => $start_rules,
879 rules => $rules,
880 style => 'filter',
881 output_encoding => 'utf8' );
882
883 my $xml_in = "";
884 if (!open(MIN,"<$filename")) {
885 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
886 }
887 else {
888 # Read all the text in
889 my $line;
890 while (defined ($line=<MIN>)) {
891 $xml_in .= $line;
892 }
893 close(MIN);
894
895 my $MOUT;
896 if (!open($MOUT,">$filename")) {
897 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
898 }
899 else {
900 # Matched lines will get handled by the call backs
901## my $xml_out = "";
902
903 binmode($MOUT,":utf8");
904 $parser->filter($xml_in,$MOUT, $options);
905
906# binmode(MOUT,":utf8");
907# print MOUT $xml_out;
908 close($MOUT);
909 }
910 }
911}
912
913sub edit_doc_xml
914{
915 my $self = shift @_;
916 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
917
918 $gsdl_cgi->generate_ok_message("IN EDIT DOC XML\n****************************\n");
919 $gsdl_cgi->generate_ok_message("doc_xml_filename = $doc_xml_filename\n");
920 $gsdl_cgi->generate_ok_message("metaname = $metaname\n");
921 $gsdl_cgi->generate_ok_message("metavalue = $metavalue\n");
922 $gsdl_cgi->generate_ok_message("metapos = $metapos\n");
923 $gsdl_cgi->generate_ok_message("metamode = $metamode\n");
924 $gsdl_cgi->generate_ok_message("opt_secnum = $opt_secnum\n");
925 $gsdl_cgi->generate_ok_message("prevmetavalue = $prevmetavalue\n");
926
927 # To monitor which section/subsection number we are in
928 my @start_rules =
929 ( 'Section' => \&dxml_start_section );
930
931 # use XML::Rules to add it in (read in and out again)
932 # Set the call back functions
933 my @rules =
934 ( _default => 'raw',
935 'Metadata' => \&dxml_metadata,
936 'Description' => \&dxml_description);
937
938 # Sets the parameters
939 my $options = { 'metaname' => $metaname,
940 'metapos' => $metapos,
941 'metavalue' => $metavalue,
942 'metamode' => $metamode,
943 'prevmetavalue' => $prevmetavalue };
944
945 if (defined $opt_secnum) {
946 $options->{'secnum'} = $opt_secnum;
947 }
948
949 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
950}
951
952sub set_archives_metadata_entry
953{
954 my $self = shift @_;
955 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
956
957 $gsdl_cgi->generate_ok_message("IN SET ARCHIVES METADATA ENTRY\n****************************\n");
958 $gsdl_cgi->generate_ok_message("archive_dir = $archive_dir\n");
959 $gsdl_cgi->generate_ok_message("collect_dir = $collect_dir\n");
960 $gsdl_cgi->generate_ok_message("collect = $collect\n");
961 $gsdl_cgi->generate_ok_message("infodbtype = $infodbtype\n");
962 $gsdl_cgi->generate_ok_message("docid = $docid\n");
963 $gsdl_cgi->generate_ok_message("metaname = $metaname\n");
964 $gsdl_cgi->generate_ok_message("metapos = $metapos\n");
965 $gsdl_cgi->generate_ok_message("metavalue = $metavalue\n");
966 $gsdl_cgi->generate_ok_message("metamode = $metamode\n");
967 $gsdl_cgi->generate_ok_message("prevmetavalue = $prevmetavalue\n");
968
969 # Obtain the doc.xml path for the specified docID
970 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
971
972 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
973 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
974 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
975
976 # The $doc_xml_file is relative to the archives, and now let's get the full path
977 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
978 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
979
980 # Edit the doc.xml file with the specified metadata name, value and position.
981 # TODO: there is a potential problem here as this edit_doc_xml function
982 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
983 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
984
985 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
986 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
987
988 return 0; # return 0 for now to indicate no error
989}
990
991
992sub set_archives_metadata
993{
994 my $self = shift @_;
995
996 my $username = $self->{'username'};
997 my $collect = $self->{'collect'};
998 my $gsdl_cgi = $self->{'gsdl_cgi'};
999 my $gsdlhome = $self->{'gsdlhome'};
1000 my $infodbtype = $self->{'infodbtype'};
1001
1002 if ($baseaction::authentication_enabled) {
1003 # Ensure the user is allowed to edit this collection
1004 $self->authenticate_user($username, $collect);
1005 }
1006
1007 my $site = $self->{'site'};
1008
1009 # Obtain the collect and archive dir
1010 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1011
1012 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1013
1014 # Make sure the collection isn't locked by someone else
1015 $self->lock_collection($username, $collect);
1016
1017 # look up additional args
1018 my $docid = $self->{'d'};
1019 my $metaname = $self->{'metaname'};
1020 my $metavalue = $self->{'metavalue'};
1021 my $prevmetavalue = $self->{'prevmetavalue'};
1022
1023 my $metapos = $self->{'metapos'};
1024 $metapos = 0 if (!defined $metapos);
1025
1026 my $metamode = $self->{'metamode'};
1027 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1028 # make "accumulate" the default (less destructive, as won't actually
1029 # delete any existing values)
1030 $metamode = "accumulate";
1031 }
1032
1033 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1034 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1035
1036 # Release the lock once it is done
1037 $self->unlock_collection($username, $collect);
1038
1039 if ($status == 0) {
1040 my $mess = "set-archives-metadata successful: Key[$docid]\n";
1041 $mess .= " $metaname";
1042 $mess .= "->[$metapos]" if (defined $metapos);
1043 $mess .= " = $metavalue";
1044 $mess .= " ($metamode)\n";
1045
1046 $gsdl_cgi->generate_ok_message($mess);
1047 }
1048 else {
1049 my $mess .= "Failed to set archives metadata key: $docid\n";
1050 $mess .= "Exit status: $status\n";
1051 $mess .= "System Error Message: $!\n";
1052 $mess .= "-" x 20 . "\n";
1053
1054 $gsdl_cgi->generate_error($mess);
1055 }
1056}
1057
1058
1059sub set_archives_metadata_array
1060{
1061 my $self = shift @_;
1062
1063 my $username = $self->{'username'};
1064 my $collect = $self->{'collect'};
1065 my $gsdl_cgi = $self->{'gsdl_cgi'};
1066 my $gsdlhome = $self->{'gsdlhome'};
1067
1068 if ($baseaction::authentication_enabled) {
1069 # Ensure the user is allowed to edit this collection
1070 &authenticate_user($gsdl_cgi, $username, $collect);
1071 }
1072
1073 my $site = $self->{'site'};
1074 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1075
1076 $gsdl_cgi->checked_chdir($collect_dir);
1077
1078 # Obtain the collect dir
1079 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1080
1081 # Make sure the collection isn't locked by someone else
1082 $self->lock_collection($username, $collect);
1083
1084 # look up additional args
1085
1086 my $infodbtype = $self->{'infodbtype'};
1087
1088 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1089
1090 my $json_str = $self->{'json'};
1091 my $doc_array = decode_json $json_str;
1092
1093
1094 my $global_status = 0;
1095 my $global_mess = "";
1096
1097 my @all_docids = ();
1098
1099 foreach my $doc_array_rec ( @$doc_array ) {
1100
1101 my $docid = $doc_array_rec->{'docid'};
1102 my $metaname = $doc_array_rec->{'metaname'};
1103 my $metapos = $doc_array_rec->{'metapos'};
1104 my $metamode = $self->{'metamode'};
1105 my $metavalue = $doc_array_rec->{'metavalue'};
1106
1107 # Some sanity checks
1108 $metapos = 0 if (!defined $metapos);
1109
1110 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1111 # make "accumulate" the default (less destructive, as won't actually
1112 # delete any existing values)
1113 $metamode = "accumulate";
1114 }
1115
1116 push(@all_docids,$docid);
1117
1118 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1119 $metaname,$metapos,$metavalue,$metamode);
1120
1121 if ($status != 0) {
1122 # Catch error if set infodb entry failed
1123 $global_status = $status;
1124 $global_mess .= "Failed to set metadata key: $docid\n";
1125 $global_mess .= "Exit status: $status\n";
1126 $global_mess .= "System Error Message: $!\n";
1127 $global_mess .= "-" x 20 . "\n";
1128 }
1129 }
1130
1131 if ($global_status != 0) {
1132 $global_mess .= "PATH: $ENV{'PATH'}\n";
1133 $gsdl_cgi->generate_error($global_mess);
1134 }
1135 else {
1136 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1137 $gsdl_cgi->generate_ok_message($mess);
1138 }
1139
1140 # Release the lock once it is done
1141 $self->unlock_collection($username, $collect);
1142}
1143
1144sub remove_archives_metadata
1145{
1146 my $self = shift @_;
1147
1148 my $username = $self->{'username'};
1149 my $collect = $self->{'collect'};
1150 my $gsdl_cgi = $self->{'gsdl_cgi'};
1151 my $gsdlhome = $self->{'gsdlhome'};
1152 my $infodbtype = $self->{'infodbtype'};
1153
1154 if ($baseaction::authentication_enabled)
1155 {
1156 # Ensure the user is allowed to edit this collection
1157 &authenticate_user($gsdl_cgi, $username, $collect);
1158 }
1159
1160 my $site = $self->{'site'};
1161
1162 # Obtain the collect and archive dir
1163 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1164
1165 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1166
1167 # Make sure the collection isn't locked by someone else
1168 $self->lock_collection($username, $collect);
1169
1170 # look up additional args
1171 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1172
1173 my $metaname = $self->{'metaname'};
1174 my $metapos = $self->{'metapos'};
1175 $metapos = 0 if (!defined $metapos);
1176
1177 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1178 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1179
1180 # This now stores the full pathname
1181 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1182
1183 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1184
1185 # Release the lock once it is done
1186 $self->unlock_collection($username, $collect);
1187
1188 if ($status == 0)
1189 {
1190 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1191 $mess .= " $metaname";
1192 $mess .= "->[$metapos]" if (defined $metapos);
1193
1194 $gsdl_cgi->generate_ok_message($mess);
1195 }
1196 else
1197 {
1198 my $mess .= "Failed to remove archives metadata key: $docid\n";
1199 $mess .= "Exit status: $status\n";
1200 $mess .= "System Error Message: $!\n";
1201 $mess .= "-" x 20 . "\n";
1202
1203 $gsdl_cgi->generate_error($mess);
1204 }
1205}
1206
1207sub remove_from_doc_xml
1208{
1209 my $self = shift @_;
1210 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
1211
1212 my @start_rules = ('Section' => \&dxml_start_section);
1213
1214 # Set the call-back functions for the metadata tags
1215 my @rules =
1216 (
1217 _default => 'raw',
1218 'Metadata' => \&rfdxml_metadata
1219 );
1220
1221 my $parser = XML::Rules->new
1222 (
1223 start_rules => \@start_rules,
1224 rules => \@rules,
1225 style => 'filter',
1226 output_encoding => 'utf8'
1227 );
1228
1229 my $status = 0;
1230 my $xml_in = "";
1231 if (!open(MIN,"<$doc_xml_filename"))
1232 {
1233 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1234 $status = 1;
1235 }
1236 else
1237 {
1238 # Read them in
1239 my $line;
1240 while (defined ($line=<MIN>)) {
1241 $xml_in .= $line;
1242 }
1243 close(MIN);
1244
1245 # Filter with the call-back functions
1246 my $xml_out = "";
1247
1248 my $MOUT;
1249 if (!open($MOUT,">$doc_xml_filename")) {
1250 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1251 $status = 1;
1252 }
1253 else {
1254 binmode($MOUT,":utf8");
1255 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
1256 close($MOUT);
1257 }
1258 }
1259 return $status;
1260}
1261
1262sub rfdxml_metadata
1263{
1264 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1265
1266 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1267 {
1268 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1269 return [$tagname => $attrHash];
1270 }
1271
1272 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1273 {
1274 if (!defined $parser->{'parameters'}->{'poscount'})
1275 {
1276 $parser->{'parameters'}->{'poscount'} = 0;
1277 }
1278 else
1279 {
1280 $parser->{'parameters'}->{'poscount'}++;
1281 }
1282 }
1283
1284 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1285 {
1286 return [];
1287 }
1288
1289 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1290 {
1291 return [];
1292 }
1293
1294 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1295 return [$tagname => $attrHash];
1296}
1297
1298sub mxml_metadata
1299{
1300 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1301 my $metaname = $parser->{'parameters'}->{'metaname'};
1302 my $metamode = $parser->{'parameters'}->{'metamode'};
1303
1304 # Report error if we don't see FileName tag before this
1305 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1306
1307 # Don't do anything if we are not in the right FileSet
1308 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1309 if ($file_regexp =~ /\.\*/) {
1310 # Only interested in a file_regexp if it specifies precisely one
1311 # file.
1312 # So, skip anything with a .* in it as it is too general
1313 return [$tagname => $attrHash];
1314 }
1315 my $src_file = $parser->{'parameters'}->{'src_file'};
1316 if (!($src_file =~ /$file_regexp/)) {
1317 return [$tagname => $attrHash];
1318 }
1319## print STDERR "*** mxl metamode = $metamode\n";
1320
1321 # Find the right metadata tag and checks if we are going to override it
1322 my $name_attr = $attrHash->{'name'};
1323 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1324 # Get the value and override the current value
1325 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1326 $attrHash->{'_content'} = $metavalue;
1327
1328## print STDERR "**** overrideing metadata.xml\n";
1329
1330 # Don't want it to wipe out any other pieces of metadata
1331 $parser->{'parameters'}->{'metamode'} = "done";
1332 }
1333
1334 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1335 return [$tagname => $attrHash];
1336}
1337
1338
1339sub mxml_description
1340{
1341 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1342 my $metamode = $parser->{'parameters'}->{'metamode'};
1343
1344 # Failed... Report error if we don't see FileName tag before this
1345 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1346
1347 # Don't do anything if we are not in the right FileSet
1348 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1349 if ($file_regexp =~ m/\.\*/) {
1350 # Only interested in a file_regexp if it specifies precisely one
1351 # file.
1352 # So, skip anything with a .* in it as it is too general
1353 return [$tagname => $attrHash];
1354 }
1355 my $src_file = $parser->{'parameters'}->{'src_file'};
1356
1357 if (!($src_file =~ m/$file_regexp/)) {
1358 return [$tagname => $attrHash];
1359 }
1360
1361 # Accumulate the metadata block to the end of the description block
1362 # Note: This adds metadata block to all description blocks, so if there are
1363 # multiple FileSets, it will add to all of them
1364 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1365 # if metamode was "override" but get to here then it failed to
1366 # find an item to override, in which case it should append its
1367 # value to the end, just like the "accumulate" mode
1368
1369 # tack a new metadata tag on to the end of the <Metadata>+ block
1370 my $metaname = $parser->{'parameters'}->{'metaname'};
1371 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1372
1373 my $metadata_attr = { '_content' => $metavalue,
1374 'name' => $metaname,
1375 'mode' => "accumulate" };
1376
1377 my $append_metadata = [ "Metadata" => $metadata_attr ];
1378 my $description_content = $attrHash->{'_content'};
1379
1380## print STDERR "*** appending to metadata.xml\n";
1381
1382 # append the new metadata element to the end of the current
1383 # content contained inside this tag
1384 if (ref($description_content) eq "") {
1385 # => string or numeric literal
1386 # this is caused by a <Description> block has no <Metadata> child elements
1387 # => set up an empty array in '_content'
1388 $attrHash->{'_content'} = [ "\n" ];
1389 $description_content = $attrHash->{'_content'};
1390 }
1391
1392 push(@$description_content," ", $append_metadata ,"\n ");
1393 $parser->{'parameters'}->{'metamode'} = "done";
1394 }
1395
1396 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1397 return [$tagname => $attrHash];
1398}
1399
1400
1401sub mxml_filename
1402{
1403 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1404
1405 # Store the filename of the Current Fileset
1406 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1407 # FileName tag must come before Description tag
1408 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1409
1410 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1411 return [$tagname => $attrHash];
1412}
1413
1414
1415sub mxml_fileset
1416{
1417 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1418
1419 # Initilise the current_file
1420 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1421 # FileName tag must come before Description tag
1422 $parser->{'parameters'}->{'current_file'} = "";
1423
1424 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1425 return [$tagname => $attrHash];
1426}
1427
1428
1429sub edit_metadata_xml
1430{
1431 my $self = shift @_;
1432 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
1433
1434 # Set the call-back functions for the metadata tags
1435 my @rules =
1436 ( _default => 'raw',
1437 'FileName' => \&mxml_filename,
1438 'Metadata' => \&mxml_metadata,
1439 'Description' => \&mxml_description,
1440 'FileSet' => \&mxml_fileset);
1441
1442 # use XML::Rules to add it in (read in and out again)
1443 my $parser = XML::Rules->new(rules => \@rules,
1444 style => 'filter',
1445 output_encoding => 'utf8');
1446
1447 if (!-e $metadata_xml_filename) {
1448
1449 if (open(MOUT,">$metadata_xml_filename")) {
1450
1451 my $src_file_re = &util::filename_to_regex($src_file);
1452 # shouldn't the following also be in the above utility routine??
1453 # $src_file_re =~ s/\./\\./g;
1454
1455 print MOUT "<?xml version=\"1.0\"?>\n";
1456 print MOUT "<DirectoryMetadata>\n";
1457 print MOUT " <FileSet>\n";
1458 print MOUT " <FileName>$src_file_re</FileName>\n";
1459 print MOUT " <Description>\n";
1460 print MOUT " </Description>\n";
1461 print MOUT " </FileSet>\n";
1462 print MOUT "</DirectoryMetadata>\n";
1463
1464 close(MOUT);
1465 }
1466 else {
1467 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1468 }
1469 }
1470
1471
1472 my $xml_in = "";
1473 if (!open(MIN,"<$metadata_xml_filename")) {
1474 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1475 }
1476 else {
1477 # Read them in
1478 my $line;
1479 while (defined ($line=<MIN>)) {
1480 $xml_in .= $line;
1481 }
1482 close(MIN);
1483
1484 # Filter with the call-back functions
1485 my $xml_out = "";
1486
1487 my $MOUT;
1488 if (!open($MOUT,">$metadata_xml_filename")) {
1489 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1490 }
1491 else {
1492 binmode($MOUT,":utf8");
1493
1494 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1495 # At the moment, I will just hack it!
1496 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1497 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1498 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1499 #print MOUT $xml_out;
1500
1501 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1502 metavalue => $metavalue,
1503 metamode => $metamode,
1504 src_file => $src_file,
1505 current_file => undef} );
1506 close($MOUT);
1507 }
1508 }
1509}
1510
1511
1512sub set_import_metadata
1513{
1514 my $self = shift @_;
1515
1516 my $username = $self->{'username'};
1517 my $collect = $self->{'collect'};
1518 my $gsdl_cgi = $self->{'gsdl_cgi'};
1519 my $gsdlhome = $self->{'gsdlhome'};
1520 my $infodbtype = $self->{'infodbtype'};
1521
1522 if ($baseaction::authentication_enabled) {
1523 # Ensure the user is allowed to edit this collection
1524 $self->authenticate_user($username, $collect);
1525 }
1526
1527
1528 # Obtain the collect and archive dir
1529 my $site = $self->{'site'};
1530 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1531
1532 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1533 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1534
1535 # Make sure the collection isn't locked by someone else
1536 $self->lock_collection($username, $collect);
1537
1538 # look up additional args
1539 # want either d= or f=
1540 my $docid = $self->{'d'};
1541 my $import_file = $self->{'f'};
1542 if ((!defined $docid) && (!defined $import_file)) {
1543 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1544 }
1545
1546 # Get the parameters and set default mode to "accumulate"
1547 my $metaname = $self->{'metaname'};
1548 my $metavalue = $self->{'metavalue'};
1549## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1550 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1551
1552 my $metamode = $self->{'metamode'};
1553 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1554 # make "accumulate" the default (less destructive, as won't actually
1555 # delete any existing values)
1556 $metamode = "accumulate";
1557 }
1558
1559 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1560 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1561 my $metadata_xml_file;
1562 my $import_filename = undef;
1563 if (defined $docid) {
1564 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1565 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1566
1567 # This now stores the full pathname
1568 $import_filename = $doc_rec->{'src-file'}->[0];
1569 }
1570 else {
1571 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1572 }
1573
1574 # figure out correct metadata.xml file [?]
1575 # Assuming the metadata.xml file is next to the source file
1576 # Note: This will not work if it is using the inherited metadata from the parent folder
1577 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1578 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1579
1580 # Edit the metadata.xml
1581 # Modified by Jeffrey from DL Consulting
1582 # Handle the case where there is one metadata.xml file for multiple FileSets
1583 # The XML filter needs to know whether it is in the right FileSet
1584 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1585 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1586 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1587 $metaname, $metavalue, $metamode, $import_tailname);
1588
1589 # Release the lock once it is done
1590 $self->unlock_collection($username, $collect);
1591
1592 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1593 $mess .= " $metaname";
1594 $mess .= " = $metavalue";
1595 $mess .= " ($metamode)\n";
1596
1597 $gsdl_cgi->generate_ok_message($mess);
1598
1599}
1600
1601sub set_import_metadata_array
1602{
1603 my $self = shift @_;
1604
1605 my $username = $self->{'username'};
1606 my $collect = $self->{'collect'};
1607 my $gsdl_cgi = $self->{'gsdl_cgi'};
1608 my $gsdlhome = $self->{'gsdlhome'};
1609
1610 if ($baseaction::authentication_enabled) {
1611 # Ensure the user is allowed to edit this collection
1612 &authenticate_user($gsdl_cgi, $username, $collect);
1613 }
1614
1615 my $site = $self->{'site'};
1616 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1617
1618 $gsdl_cgi->checked_chdir($collect_dir);
1619
1620 # Make sure the collection isn't locked by someone else
1621 $self->lock_collection($username, $collect);
1622
1623 # look up additional args
1624
1625 my $infodbtype = $self->{'infodbtype'};
1626
1627 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1628
1629 my $json_str = $self->{'json'};
1630 my $doc_array = decode_json $json_str;
1631
1632 my $global_status = 0;
1633 my $global_mess = "";
1634
1635 my @all_docids = ();
1636
1637 foreach my $doc_array_rec ( @$doc_array )
1638 {
1639 my $docid = $doc_array_rec->{'docid'};
1640 my $metaname = $doc_array_rec->{'metaname'};
1641 my $metamode = $self->{'metamode'};
1642 my $metavalue = $doc_array_rec->{'metavalue'};
1643
1644 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1645 # make "accumulate" the default (less destructive, as won't actually
1646 # delete any existing values)
1647 $metamode = "accumulate";
1648 }
1649
1650 push(@all_docids,$docid);
1651
1652 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1653 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1654 my $metadata_xml_file;
1655 my $import_filename = undef;
1656
1657 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1658 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1659
1660 # This now stores the full pathname
1661 $import_filename = $doc_rec->{'src-file'}->[0];
1662
1663 # figure out correct metadata.xml file [?]
1664 # Assuming the metadata.xml file is next to the source file
1665 # Note: This will not work if it is using the inherited metadata from the parent folder
1666 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1667 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1668
1669 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1670 }
1671
1672 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1673 $gsdl_cgi->generate_ok_message($mess);
1674
1675 # Release the lock once it is done
1676 $self->unlock_collection($username, $collect);
1677}
1678
1679sub remove_import_metadata
1680{
1681 my $self = shift @_;
1682
1683 my $username = $self->{'username'};
1684 my $collect = $self->{'collect'};
1685 my $gsdl_cgi = $self->{'gsdl_cgi'};
1686
1687 if ($baseaction::authentication_enabled) {
1688 # Ensure the user is allowed to edit this collection
1689 &authenticate_user($gsdl_cgi, $username, $collect);
1690 }
1691
1692 my $gsdlhome = $self->{'gsdlhome'};
1693 my $infodbtype = $self->{'infodbtype'};
1694
1695 # Obtain the collect dir
1696 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1697 my $site = $self->{'site'};
1698 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1699
1700 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1701 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1702
1703 # Make sure the collection isn't locked by someone else
1704 $self->lock_collection($username, $collect);
1705
1706 # look up additional args
1707 my $docid = $self->{'d'};
1708 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1709 {
1710 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
1711 }
1712
1713 my $metaname = $self->{'metaname'};
1714 my $metavalue = $self->{'metavalue'};
1715 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1716
1717 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1718 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1719 my $metadata_xml_file;
1720 my $import_filename = undef;
1721 if (defined $docid)
1722 {
1723 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1724 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1725
1726 # This now stores the full pathname
1727 $import_filename = $doc_rec->{'src-file'}->[0];
1728 }
1729
1730 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1731 {
1732 $gsdl_cgi->generate_error_message("There is no metadata\n");
1733 }
1734
1735 # figure out correct metadata.xml file [?]
1736 # Assuming the metadata.xml file is next to the source file
1737 # Note: This will not work if it is using the inherited metadata from the parent folder
1738 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1739 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1740
1741 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1742
1743 # Release the lock once it is done
1744 $self->unlock_collection($username, $collect);
1745
1746 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1747 $mess .= " $metaname";
1748 $mess .= " = $metavalue\n";
1749
1750 $gsdl_cgi->generate_ok_message($mess);
1751}
1752
1753sub remove_from_metadata_xml
1754{
1755 my $self = shift @_;
1756 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
1757
1758 # Set the call-back functions for the metadata tags
1759 my @rules =
1760 (
1761 _default => 'raw',
1762 'Metadata' => \&rfmxml_metadata,
1763 'FileName' => \&mxml_filename
1764 );
1765
1766 my $parser = XML::Rules->new
1767 (
1768 rules => \@rules,
1769 style => 'filter',
1770 output_encoding => 'utf8'
1771 );
1772
1773 my $xml_in = "";
1774 if (!open(MIN,"<$metadata_xml_filename"))
1775 {
1776 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1777 }
1778 else
1779 {
1780 # Read them in
1781 my $line;
1782 while (defined ($line=<MIN>)) {
1783 $xml_in .= $line;
1784 }
1785 close(MIN);
1786
1787 # Filter with the call-back functions
1788 my $xml_out = "";
1789
1790 my $MOUT;
1791 if (!open($MOUT,">$metadata_xml_filename")) {
1792 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1793 }
1794 else {
1795 binmode($MOUT,":utf8");
1796 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
1797 close($MOUT);
1798 }
1799 }
1800}
1801
1802sub rfmxml_metadata
1803{
1804 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1805
1806 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
1807 {
1808 return [];
1809 }
1810
1811 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1812 return [$tagname => $attrHash];
1813}
1814
1815sub remove_live_metadata
1816{
1817 my $self = shift @_;
1818
1819 my $username = $self->{'username'};
1820 my $collect = $self->{'collect'};
1821 my $gsdl_cgi = $self->{'gsdl_cgi'};
1822 my $gsdlhome = $self->{'gsdlhome'};
1823 my $infodbtype = $self->{'infodbtype'};
1824
1825 if ($baseaction::authentication_enabled) {
1826 # Ensure the user is allowed to edit this collection
1827 &authenticate_user($gsdl_cgi, $username, $collect);
1828 }
1829
1830 # Obtain the collect dir
1831 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1832 my $site = $self->{'site'};
1833 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1834
1835 # Make sure the collection isn't locked by someone else
1836 $self->lock_collection($username, $collect);
1837
1838 # look up additional args
1839 my $docid = $self->{'d'};
1840 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1841 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1842 }
1843
1844 # Generate the dbkey
1845 my $metaname = $self->{'metaname'};
1846 my $dbkey = "$docid.$metaname";
1847
1848 # To people who know $collect_tail please add some comments
1849 # Obtain the live gdbm_db path
1850 my $collect_tail = $collect;
1851 $collect_tail =~ s/^.*[\/\\]//;
1852 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1853 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1854
1855 # Remove the key
1856 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
1857 my $status = system($cmd);
1858 if ($status != 0) {
1859 # Catch error if gdbmdel failed
1860 my $mess = "Failed to set metadata key: $dbkey\n";
1861
1862 $mess .= "PATH: $ENV{'PATH'}\n";
1863 $mess .= "cmd = $cmd\n";
1864 $mess .= "Exit status: $status\n";
1865 $mess .= "System Error Message: $!\n";
1866
1867 $gsdl_cgi->generate_error($mess);
1868 }
1869 else {
1870 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1871 }
1872
1873}
1874
1875
1876sub remove_metadata
1877{
1878 my $self = shift @_;
1879
1880 my $username = $self->{'username'};
1881 my $collect = $self->{'collect'};
1882 my $gsdl_cgi = $self->{'gsdl_cgi'};
1883 my $gsdlhome = $self->{'gsdlhome'};
1884 my $infodbtype = $self->{'infodbtype'};
1885
1886 if ($baseaction::authentication_enabled) {
1887 # Ensure the user is allowed to edit this collection
1888 &authenticate_user($gsdl_cgi, $username, $collect);
1889 }
1890
1891 # Obtain the collect dir
1892 my $site = $self->{'site'};
1893 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1894 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1895
1896 # Make sure the collection isn't locked by someone else
1897 $self->lock_collection($username, $collect);
1898
1899 # look up additional args
1900 my $docid = $self->{'d'};
1901 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1902 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1903 }
1904 my $metaname = $self->{'metaname'};
1905 my $metapos = $self->{'metapos'};
1906 my $metavalue = $self->{'metavalue'};
1907
1908 # To people who know $collect_tail please add some comments
1909 # Obtain the path to the database
1910 my $collect_tail = $collect;
1911 $collect_tail =~ s/^.*[\/\\]//;
1912 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1913 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1914
1915 # Read the docid entry
1916 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1917
1918 # Basically loop through and unescape_html the values
1919 foreach my $k (keys %$doc_rec) {
1920 my @escaped_v = ();
1921 foreach my $v (@{$doc_rec->{$k}}) {
1922 if ($k eq "contains") {
1923 # protect quotes in ".2;".3 etc
1924 $v =~ s/\"/\\\"/g;
1925 push(@escaped_v, $v);
1926 }
1927 else {
1928 my $ev = &ghtml::unescape_html($v);
1929 $ev =~ s/\"/\\\"/g;
1930 push(@escaped_v, $ev);
1931 }
1932 }
1933 $doc_rec->{$k} = \@escaped_v;
1934 }
1935
1936 # Check to make sure the key does exist
1937 if (!defined ($doc_rec->{$metaname})) {
1938 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1939 }
1940
1941 # Obtain the specified metadata pos
1942 $metapos = 0 if (!defined $metapos);
1943
1944 # consider check key is defined before deleting?
1945 # Loop through the metadata array and ignore the specified position
1946 my $filtered_metadata = [];
1947 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
1948 for (my $i=0; $i<$num_metadata_vals; $i++) {
1949 my $metaval = shift(@{$doc_rec->{$metaname}});
1950
1951 if (!defined $metavalue && $i != $metapos) {
1952 push(@$filtered_metadata,$metaval)
1953 }
1954
1955 if(defined $metavalue && !($metavalue eq $metaval))
1956 {
1957 push(@$filtered_metadata,$metavalue)
1958 }
1959 }
1960 $doc_rec->{$metaname} = $filtered_metadata;
1961
1962 # Turn the record back to string
1963 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1964
1965 # Store it back to the database
1966 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1967 my $status = system($cmd);
1968 if ($status != 0) {
1969 my $mess = "Failed to set metadata key: $docid\n";
1970
1971 $mess .= "PATH: $ENV{'PATH'}\n";
1972 $mess .= "cmd = $cmd\n";
1973 $mess .= "Exit status: $status\n";
1974 $mess .= "System Error Message: $!\n";
1975
1976 $gsdl_cgi->generate_error($mess);
1977 }
1978 else {
1979 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1980 $mess .= " $metaname";
1981 $mess .= "->[$metapos]" if (defined $metapos);
1982
1983 $gsdl_cgi->generate_ok_message($mess);
1984 }
1985}
1986
1987
1988# Was trying to reused the codes, but the functions need to be broken
1989# down more before they can be reused, otherwise there will be too
1990# much overhead and duplicate process...
1991sub insert_metadata
1992{
1993 my $self = shift @_;
1994
1995 my $username = $self->{'username'};
1996 my $collect = $self->{'collect'};
1997 my $gsdl_cgi = $self->{'gsdl_cgi'};
1998 my $gsdlhome = $self->{'gsdlhome'};
1999 my $infodbtype = $self->{'infodbtype'};
2000
2001 # If the import metadata and gdbm database have been updated, we
2002 # need to insert some notification to warn user that the the text
2003 # they see at the moment is not indexed and require a rebuild.
2004 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2005
2006 if ($baseaction::authentication_enabled) {
2007 # Ensure the user is allowed to edit this collection
2008 $self->authenticate_user($username, $collect);
2009 }
2010
2011 # Obtain the collect and archive dir
2012 my $site = $self->{'site'};
2013 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2014 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2015 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2016
2017 # Make sure the collection isn't locked by someone else
2018 $self->lock_collection($username, $collect);
2019
2020 # Check additional args
2021 my $docid = $self->{'d'};
2022 if (!defined($docid)) {
2023 $gsdl_cgi->generate_error("No document id is specified: d=...");
2024 }
2025 my $metaname = $self->{'metaname'};
2026 if (!defined($metaname)) {
2027 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2028 }
2029 my $metavalue = $self->{'metavalue'};
2030 if (!defined($metavalue) || $metavalue eq "") {
2031 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2032 }
2033 # make "accumulate" the default (less destructive, as won't actually
2034 # delete any existing values)
2035 my $metamode = "accumulate";
2036
2037 #=======================================================================#
2038 # set_import_metadata [START]
2039 #=======================================================================#
2040 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2041 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2042 my $metadata_xml_file;
2043 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2044 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2045
2046 # This now stores the full pathname
2047 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2048
2049 # figure out correct metadata.xml file [?]
2050 # Assuming the metadata.xml file is next to the source file
2051 # Note: This will not work if it is using the inherited metadata from the parent folder
2052 my ($import_tailname, $import_dirname)
2053 = File::Basename::fileparse($import_filename);
2054 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2055
2056 # Shane's escape characters
2057 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2058 $metavalue =~ s/\,/&#44;/g;
2059 $metavalue =~ s/\:/&#58;/g;
2060 $metavalue =~ s/\|/&#124;/g;
2061 $metavalue =~ s/\(/&#40;/g;
2062 $metavalue =~ s/\)/&#41;/g;
2063 $metavalue =~ s/\[/&#91;/g;
2064 $metavalue =~ s/\\/&#92;/g;
2065 $metavalue =~ s/\]/&#93;/g;
2066 $metavalue =~ s/\{/&#123;/g;
2067 $metavalue =~ s/\}/&#125;/g;
2068 $metavalue =~ s/\"/&#34;/g;
2069 $metavalue =~ s/\`/&#96;/g;
2070 $metavalue =~ s/\n/_newline_/g;
2071
2072 # Edit the metadata.xml
2073 # Modified by Jeffrey from DL Consulting
2074 # Handle the case where there is one metadata.xml file for multiple FileSets
2075 # The XML filter needs to know whether it is in the right FileSet
2076 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2077 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2078 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2079 $metaname, $metavalue, $metamode, $import_tailname);
2080 #=======================================================================#
2081 # set_import_metadata [END]
2082 #=======================================================================#
2083
2084
2085 #=======================================================================#
2086 # set_metadata (accumulate version) [START]
2087 #=======================================================================#
2088 # To people who know $collect_tail please add some comments
2089 # Obtain path to the database
2090 my $collect_tail = $collect;
2091 $collect_tail =~ s/^.*[\/\\]//;
2092 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2093 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2094
2095 # Read the docid entry
2096 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2097
2098 foreach my $k (keys %$doc_rec) {
2099 my @escaped_v = ();
2100 foreach my $v (@{$doc_rec->{$k}}) {
2101 if ($k eq "contains") {
2102 # protect quotes in ".2;".3 etc
2103 $v =~ s/\"/\\\"/g;
2104 push(@escaped_v, $v);
2105 }
2106 else {
2107 my $ev = &ghtml::unescape_html($v);
2108 $ev =~ s/\"/\\\"/g;
2109 push(@escaped_v, $ev);
2110 }
2111 }
2112 $doc_rec->{$k} = \@escaped_v;
2113 }
2114
2115 # Protect the quotes
2116 $metavalue =~ s/\"/\\\"/g;
2117
2118 # Adds the pending macro
2119 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2120
2121 # If the metadata doesn't exist, create a new one
2122 if (!defined($doc_rec->{$metaname})){
2123 $doc_rec->{$metaname} = [ $macro_metavalue ];
2124 }
2125 # Else, let's acculumate the values
2126 else {
2127 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2128 }
2129
2130 # Generate the record string
2131 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2132
2133 # Store it into GDBM
2134 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2135 my $status = system($cmd);
2136 if ($status != 0) {
2137 # Catch error if gdbmget failed
2138 my $mess = "Failed to set metadata key: $docid\n";
2139
2140 $mess .= "PATH: $ENV{'PATH'}\n";
2141 $mess .= "cmd = $cmd\n";
2142 $mess .= "Exit status: $status\n";
2143 $mess .= "System Error Message: $!\n";
2144
2145 $gsdl_cgi->generate_error($mess);
2146 }
2147 else {
2148 my $mess = "insert-metadata successful: Key[$docid]\n";
2149 $mess .= " [In metadata.xml] $metaname";
2150 $mess .= " = $metavalue\n";
2151 $mess .= " [In database] $metaname";
2152 $mess .= " = $macro_metavalue\n";
2153 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2154 $gsdl_cgi->generate_ok_message($mess);
2155 }
2156 #=======================================================================#
2157 # set_metadata (accumulate version) [END]
2158 #=======================================================================#
2159
2160 # Release the lock once it is done
2161 $self->unlock_collection($username, $collect);
2162}
2163
21641;
Note: See TracBrowser for help on using the repository browser.