source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27007

Last change on this file since 27007 was 27007, checked in by davidb, 11 years ago

Additional case for storing metadata now handled: that of when the mode is set to 'override' but when no exising metadata exits. The code now spots that no existing value was found and switches to treating it effectively like an "accumulate" request.

Some code type up performed also (tweaks to some debugging output), and the concatenation of strings before printing them out through the GSDLcgi module (before it was calling the GSDLcgi module function each time, which added MIME header information each time).

File size: 67.0 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-live-metadata" => {
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ ] },
63
64 #SET METHODS
65 "set-live-metadata" => {
66 'compulsory-args' => [ "d", "metaname", "metavalue" ],
67 'optional-args' => [ ] },
68
69 "set-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ "metapos" ] },
72
73 "set-archives-metadata" => {
74 'compulsory-args' => [ "d", "metaname", "metavalue" ],
75 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
76
77 "set-import-metadata" => {
78 'compulsory-args' => [ "metaname", "metavalue" ],
79 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
80
81 #SET METHODS (ARRAY)
82 "set-metadata-array" => {
83 'compulsory-args' => [ "json" ],
84 'optional-args' => [ ] },
85
86 "set-archives-metadata-array" => {
87 'compulsory-args' => [ "json" ],
88 'optional-args' => [ ] },
89
90 "set-import-metadata-array" => {
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ ] },
93
94 #REMOVE METHODS
95 "remove-import-metadata" => {
96 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
97 'optional-args' => [ ] },
98
99 "remove-archives-metadata" => {
100 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
101 'optional-args' => [ "metapos", "metavalue" ] },
102
103 "remove-live-metadata" => {
104 'compulsory-args' => [ "d", "metaname" ],
105 'optional-args' => [ ] },
106
107 "remove-metadata" => {
108 'compulsory-args' => [ "d", "metaname" ],
109 'optional-args' => [ "metapos", "metavalue" ] },
110
111 #INSERT METHODS
112 "insert-metadata" => {
113 'compulsory-args' => [ "d", "metaname", "metavalue" ],
114 'optional-args' => [ ] }
115};
116
117
118sub new
119{
120 my $class = shift (@_);
121 my ($gsdl_cgi,$iis6_mode) = @_;
122
123 # Treat metavalue specially. To transmit this through a GET request
124 # the Javascript side has url-encoded it, so here we need to decode
125 # it before proceeding
126
127 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
128 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
129 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
130
131 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
132 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
133
134 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
135
136 return bless $self, $class;
137}
138
139
140sub get_live_metadata
141{
142 my $self = shift @_;
143
144 my $username = $self->{'username'};
145 my $collect = $self->{'collect'};
146 my $gsdl_cgi = $self->{'gsdl_cgi'};
147 my $gsdlhome = $self->{'gsdlhome'};
148 my $infodbtype = $self->{'infodbtype'};
149
150 # live metadata gets/saves value scoped (prefixed) by the current usename
151 # so (for now) let's not bother to enforce authentication
152
153 # Obtain the collect dir
154 my $site = $self->{'site'};
155 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
156 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
157
158 # Make sure the collection isn't locked by someone else
159 $self->lock_collection($username, $collect);
160
161 # look up additional args
162 my $docid = $self->{'d'};
163 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
164 $gsdl_cgi->generate_error("No docid (d=...) specified.");
165 }
166
167 # Generate the dbkey
168 my $metaname = $self->{'metaname'};
169 my $dbkey = "$docid.$metaname";
170
171 # To people who know $collect_tail please add some comments
172 # Obtain path to the database
173 my $collect_tail = $collect;
174 $collect_tail =~ s/^.*[\/|\\]//;
175 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
176 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
177
178 # Obtain the content of the key
179 my $cmd = "gdbmget $infodb_file_path $dbkey";
180 if (open(GIN,"$cmd |") == 0) {
181 # Catch error if gdbmget failed
182 my $mess = "Failed to get metadata key: $metaname\n";
183 $mess .= "$!\n";
184
185 $gsdl_cgi->generate_error($mess);
186 }
187 else {
188 binmode(GIN,":utf8");
189 # Read everything in and concatenate them into $metavalue
190 my $metavalue = "";
191 my $line;
192 while (defined ($line=<GIN>)) {
193 $metavalue .= $line;
194 }
195 close(GIN);
196 chomp($metavalue); # Get rid off the tailing newlines
197 $gsdl_cgi->generate_ok_message("$metavalue");
198 }
199
200 # Release the lock once it is done
201 $self->unlock_collection($username, $collect);
202}
203
204
205sub get_metadata
206{
207 my $self = shift @_;
208
209 my $username = $self->{'username'};
210 my $collect = $self->{'collect'};
211 my $gsdl_cgi = $self->{'gsdl_cgi'};
212 my $gsdlhome = $self->{'gsdlhome'};
213
214 # Authenticate user if it is enabled
215 if ($baseaction::authentication_enabled) {
216 # Ensure the user is allowed to edit this collection
217 &authenticate_user($gsdl_cgi, $username, $collect);
218 }
219
220 # Obtain the collect dir
221 my $site = $self->{'site'};
222 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
223 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
224
225 # Make sure the collection isn't locked by someone else
226 $self->lock_collection($username, $collect);
227
228 # look up additional args
229 my $docid = $self->{'d'};
230 my $metaname = $self->{'metaname'};
231 my $metapos = $self->{'metapos'};
232 my $infodbtype = $self->{'infodbtype'};
233
234 # To people who know $collect_tail please add some comments
235 # Obtain path to the database
236 my $collect_tail = $collect;
237 $collect_tail =~ s/^.*[\/\\]//;
238 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
239 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
240
241 # Read the docid entry
242 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
243
244 # Basically loop through and unescape_html the values
245 foreach my $k (keys %$doc_rec) {
246 my @escaped_v = ();
247 foreach my $v (@{$doc_rec->{$k}}) {
248 my $ev = &ghtml::unescape_html($v);
249 push(@escaped_v, $ev);
250 }
251 $doc_rec->{$k} = \@escaped_v;
252 }
253
254 # Obtain the specified metadata value
255 $metapos = 0 if (!defined $metapos);
256 my $metavalue = $doc_rec->{$metaname}->[$metapos];
257 $gsdl_cgi->generate_ok_message("$metavalue");
258
259 # Release the lock once it is done
260 $self->unlock_collection($username, $collect);
261}
262
263
264sub get_import_metadata
265{
266 my $self = shift @_;
267
268 my $username = $self->{'username'};
269 my $collect = $self->{'collect'};
270 my $gsdl_cgi = $self->{'gsdl_cgi'};
271 my $gsdlhome = $self->{'gsdlhome'};
272
273 # Authenticate user if it is enabled
274 if ($baseaction::authentication_enabled) {
275 # Ensure the user is allowed to edit this collection
276 &authenticate_user($gsdl_cgi, $username, $collect);
277 }
278
279 # Obtain the collect dir
280 my $site = $self->{'site'};
281 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
282 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
283
284 # Make sure the collection isn't locked by someone else
285 $self->lock_collection($username, $collect);
286
287 # look up additional args
288 my $docid = $self->{'d'};
289 my $metaname = $self->{'metaname'};
290 my $infodbtype = $self->{'infodbtype'};
291 if (!defined $docid)
292 {
293 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
294 }
295
296 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
297 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
298 my $metadata_xml_file;
299 my $import_filename = undef;
300
301
302 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
303 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
304 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
305
306 # This now stores the full pathname
307 $import_filename = $doc_rec->{'src-file'}->[0];
308
309 # figure out correct metadata.xml file [?]
310 # Assuming the metadata.xml file is next to the source file
311 # Note: This will not work if it is using the inherited metadata from the parent folder
312 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
313 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
314
315 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
316
317 # Release the lock once it is done
318 $self->unlock_collection($username, $collect);
319}
320
321sub get_metadata_from_metadata_xml
322{
323 my $self = shift @_;
324 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
325
326 my @rules =
327 (
328 _default => 'raw',
329 'Metadata' => \&gfmxml_metadata,
330 'FileName' => \&mxml_filename
331 );
332
333 my $parser = XML::Rules->new
334 (
335 rules => \@rules,
336 output_encoding => 'utf8'
337 );
338
339 my $xml_in = "";
340 if (!open(MIN,"<$metadata_xml_filename"))
341 {
342 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
343 }
344 else
345 {
346 # Read them in
347 my $line;
348 while (defined ($line=<MIN>)) {
349 $xml_in .= $line;
350 }
351 close(MIN);
352
353 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
354
355 if(defined $parser->{'pad'}->{'metavalue'})
356 {
357 return $parser->{'pad'}->{'metavalue'};
358 }
359 else
360 {
361 return "";
362 }
363 }
364}
365
366sub gfmxml_metadata
367{
368 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
369
370 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
371 {
372 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
373 }
374}
375
376sub get_archives_metadata
377{
378 my $self = shift @_;
379
380 my $username = $self->{'username'};
381 my $collect = $self->{'collect'};
382 my $gsdl_cgi = $self->{'gsdl_cgi'};
383 my $gsdlhome = $self->{'gsdlhome'};
384 my $infodbtype = $self->{'infodbtype'};
385
386 # Authenticate user if it is enabled
387 if ($baseaction::authentication_enabled) {
388 # Ensure the user is allowed to edit this collection
389 &authenticate_user($gsdl_cgi, $username, $collect);
390 }
391
392 # Obtain the collect dir
393 my $site = $self->{'site'};
394 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
395
396 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
397
398 # Make sure the collection isn't locked by someone else
399 $self->lock_collection($username, $collect);
400
401 # look up additional args
402 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
403 $docid_secnum = "" if (!defined $docid_secnum);
404
405 my $metaname = $self->{'metaname'};
406 my $metapos = $self->{'metapos'};
407 $metapos = 0 if (!defined $metapos);
408
409 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
410 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
411
412 # This now stores the full pathname
413 my $doc_filename = $doc_rec->{'doc-file'}->[0];
414
415 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
416
417 # Release the lock once it is done
418 $self->unlock_collection($username, $collect);
419}
420
421sub get_metadata_from_archive_xml
422{
423 my $self = shift @_;
424 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
425
426 my @start_rules = ('Section' => \&dxml_start_section);
427
428 my @rules =
429 (
430 _default => 'raw',
431 'Metadata' => \&gfdxml_metadata
432 );
433
434 my $parser = XML::Rules->new
435 (
436 start_rules => \@start_rules,
437 rules => \@rules,
438 output_encoding => 'utf8'
439 );
440
441 my $xml_in = "";
442 if (!open(MIN,"<$doc_xml_filename"))
443 {
444 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
445 }
446 else
447 {
448 # Read them in
449 my $line;
450 while (defined ($line=<MIN>)) {
451 $xml_in .= $line;
452 }
453 close(MIN);
454
455 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
456
457 if(defined $parser->{'pad'}->{'metavalue'})
458 {
459 return $parser->{'pad'}->{'metavalue'};
460 }
461 else
462 {
463 return "";
464 }
465 }
466}
467
468sub gfdxml_metadata
469{
470 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
471
472 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
473 {
474 return;
475 }
476
477 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
478 {
479 if (!defined $parser->{'parameters'}->{'poscount'})
480 {
481 $parser->{'parameters'}->{'poscount'} = 0;
482 }
483 else
484 {
485 $parser->{'parameters'}->{'poscount'}++;
486 }
487 }
488
489 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
490 {
491 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
492 }
493}
494
495sub set_live_metadata
496{
497 my $self = shift @_;
498
499 my $username = $self->{'username'};
500 my $collect = $self->{'collect'};
501 my $gsdl_cgi = $self->{'gsdl_cgi'};
502 my $gsdlhome = $self->{'gsdlhome'};
503 my $infodbtype = $self->{'infodbtype'};
504
505 if ($baseaction::authentication_enabled) {
506 # Ensure the user is allowed to edit this collection
507 &authenticate_user($gsdl_cgi, $username, $collect);
508 }
509
510 # Obtain the collect dir
511 my $site = $self->{'site'};
512 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
513 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
514
515 # Make sure the collection isn't locked by someone else
516 $self->lock_collection($username, $collect);
517
518 # look up additional args
519 my $docid = $self->{'d'};
520 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
521 $gsdl_cgi->generate_error("No docid (d=...) specified.");
522 }
523 my $metavalue = $self->{'metavalue'};
524
525
526 # Generate the dbkey
527 my $metaname = $self->{'metaname'};
528 my $dbkey = "$docid.$metaname";
529
530 # To people who know $collect_tail please add some comments
531 # Obtain path to the database
532 my $collect_tail = $collect;
533 $collect_tail =~ s/^.*[\/\\]//;
534 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
535 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
536
537 # Set the new value
538 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
539 my $status = system($cmd);
540 if ($status != 0) {
541 # Catch error if gdbmget failed
542 my $mess = "Failed to set metadata key: $dbkey\n";
543
544 $mess .= "PATH: $ENV{'PATH'}\n";
545 $mess .= "cmd = $cmd\n";
546 $mess .= "Exit status: $status\n";
547 $mess .= "System Error Message: $!\n";
548
549 $gsdl_cgi->generate_error($mess);
550 }
551 else {
552 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
553 }
554
555 # Release the lock once it is done
556 $self->unlock_collection($username, $collect);
557}
558
559sub set_metadata_entry
560{
561 my $self = shift @_;
562 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
563
564 # To people who know $collect_tail please add some comments
565 # Obtain path to the database
566 my $collect_tail = $collect;
567 $collect_tail =~ s/^.*[\/\\]//;
568 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
569 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
570
571# print STDERR "**** infodb file path = $infodb_file_path\n";
572# print STDERR "***** infodb type = $infodbtype\n";
573
574 # Read the docid entry
575 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
576
577 # Set the metadata value
578 if (defined $metapos) {
579 $doc_rec->{$metaname}->[$metapos] = $metavalue;
580 }
581 else {
582 $doc_rec->{$metaname} = [ $metavalue ];
583 }
584
585 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
586
587 return $status;
588
589}
590
591sub set_metadata
592{
593 my $self = shift @_;
594
595 my $username = $self->{'username'};
596 my $collect = $self->{'collect'};
597 my $gsdl_cgi = $self->{'gsdl_cgi'};
598 my $gsdlhome = $self->{'gsdlhome'};
599
600 if ($baseaction::authentication_enabled) {
601 # Ensure the user is allowed to edit this collection
602 &authenticate_user($gsdl_cgi, $username, $collect);
603 }
604
605 my $site = $self->{'site'};
606 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
607
608 $gsdl_cgi->checked_chdir($collect_dir);
609
610 # Obtain the collect dir
611 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
612
613 # Make sure the collection isn't locked by someone else
614 $self->lock_collection($username, $collect);
615
616 # look up additional args
617 my $docid = $self->{'d'};
618 my $metaname = $self->{'metaname'};
619 my $metapos = $self->{'metapos'};
620 my $metavalue = $self->{'metavalue'};
621 my $infodbtype = $self->{'infodbtype'};
622
623 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
624
625 if ($status != 0) {
626 # Catch error if set infodb entry failed
627 my $mess = "Failed to set metadata key: $docid\n";
628
629 $mess .= "PATH: $ENV{'PATH'}\n";
630 $mess .= "Exit status: $status\n";
631 $mess .= "System Error Message: $!\n";
632
633 $gsdl_cgi->generate_error($mess);
634 }
635 else {
636 my $mess = "set-metadata successful: Key[$docid]\n";
637 $mess .= " $metaname";
638 $mess .= "->[$metapos]" if (defined $metapos);
639 $mess .= " = $metavalue";
640
641 $gsdl_cgi->generate_ok_message($mess);
642 }
643
644 # Release the lock once it is done
645 $self->unlock_collection($username, $collect);
646}
647
648
649sub set_metadata_array
650{
651 my $self = shift @_;
652
653 my $username = $self->{'username'};
654 my $collect = $self->{'collect'};
655 my $gsdl_cgi = $self->{'gsdl_cgi'};
656 my $gsdlhome = $self->{'gsdlhome'};
657
658 if ($baseaction::authentication_enabled) {
659 # Ensure the user is allowed to edit this collection
660 &authenticate_user($gsdl_cgi, $username, $collect);
661 }
662
663 my $site = $self->{'site'};
664 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
665
666 $gsdl_cgi->checked_chdir($collect_dir);
667
668 # Obtain the collect dir
669 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
670
671 # Make sure the collection isn't locked by someone else
672 $self->lock_collection($username, $collect);
673
674 # look up additional args
675
676 my $infodbtype = $self->{'infodbtype'};
677
678 my $json_str = $self->{'json'};
679 my $doc_array = decode_json $json_str;
680
681
682 my $global_status = 0;
683 my $global_mess = "";
684
685 my @all_docids = ();
686
687 foreach my $doc_array_rec ( @$doc_array ) {
688
689 my $docid = $doc_array_rec->{'docid'};
690 my $metaname = $doc_array_rec->{'metaname'};
691 my $metapos = $doc_array_rec->{'metapos'};
692 my $metavalue = $doc_array_rec->{'metavalue'};
693
694 push(@all_docids,$docid);
695
696 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
697
698 if ($status != 0) {
699 # Catch error if set infodb entry failed
700 $global_status = $status;
701 $global_mess .= "Failed to set metadata key: $docid\n";
702 $global_mess .= "Exit status: $status\n";
703 $global_mess .= "System Error Message: $!\n";
704 $global_mess .= "-" x 20;
705 }
706 }
707
708 if ($global_status != 0) {
709 $global_mess .= "PATH: $ENV{'PATH'}\n";
710 $gsdl_cgi->generate_error($global_mess);
711 }
712 else {
713 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
714 $gsdl_cgi->generate_ok_message($mess);
715 }
716
717 # Release the lock once it is done
718 $self->unlock_collection($username, $collect);
719}
720
721
722sub dxml_metadata
723{
724 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
725 my $metaname = $parser->{'parameters'}->{'metaname'};
726 my $metamode = $parser->{'parameters'}->{'metamode'};
727
728 print STDERR "**** Processing closing </Metadata> tag\n";
729
730 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
731
732 # Find the right metadata tag and checks if we are going to
733 # override it
734 #
735 # Note: This over writes the first metadata block it
736 # encountered. If there are multiple Sections in the doc.xml, it
737 # might not behave as you would expect
738
739 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
740## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
741## print STDERR "**** metamode = $metamode\n";
742
743 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
744 {
745 my $name_attr = $attrHash->{'name'};
746 if (($name_attr eq $metaname) && ($metamode eq "override"))
747 {
748 if (!defined $parser->{'parameters'}->{'poscount'})
749 {
750 $parser->{'parameters'}->{'poscount'} = 0;
751 }
752 else
753 {
754 $parser->{'parameters'}->{'poscount'}++;
755 }
756
757 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
758 {
759 ##print STDERR "**** got match!!\n";
760 # Get the value and override the current value
761 my $metavalue = $parser->{'parameters'}->{'metavalue'};
762 $attrHash->{'_content'} = $metavalue;
763
764 # Don't want it to wipe out any other pieces of metadata
765 $parser->{'parameters'}->{'metamode'} = "done";
766 }
767 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
768 {
769 my $metavalue = $parser->{'parameters'}->{'metavalue'};
770 $attrHash->{'_content'} = $metavalue;
771 $parser->{'parameters'}->{'metamode'} = "done";
772 }
773 }
774 }
775
776 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
777 return [$tagname => $attrHash];
778}
779
780
781sub dxml_description
782{
783 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
784 my $metamode = $parser->{'parameters'}->{'metamode'};
785
786 print STDERR "**** Processing a closing </Description> tag \n";
787
788 # Accumulate the metadata
789
790 if (($metamode eq "accumulate") || ($metamode eq "override")) {
791 if ($metamode eq "override") {
792 print "No metadata value to override. Switching 'metamode' to accumulate\n";
793 }
794
795 # If get to here and metamode is override, the this means there
796 # was no existing value to overide => treat as an append operation
797
798 # Tack a new metadata tag on to the end of the <Metadata>+ block
799 my $metaname = $parser->{'parameters'}->{'metaname'};
800 my $metavalue = $parser->{'parameters'}->{'metavalue'};
801
802 my $metadata_attr = {
803 '_content' => $metavalue,
804 'name' => $metaname,
805 'mode' => "accumulate"
806 };
807
808 my $append_metadata = [ "Metadata" => $metadata_attr ];
809 my $description_content = $attrHash->{'_content'};
810
811 print "Appending metadata to doc.xml\n";
812
813 if (ref($description_content)) {
814 # got some existing interesting nested content
815 push(@$description_content, " ", $append_metadata ,"\n ");
816 }
817 else {
818 #description_content is most likely a string such as "\n"
819 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
820 }
821
822 $parser->{'parameters'}->{'metamode'} = "done";
823 }
824 else {
825 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
826## print STDERR "**** NOT ACCUMULATE?!? \n";
827 }
828
829 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
830 return [$tagname => $attrHash];
831}
832
833
834sub dxml_start_section
835{
836 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
837
838 my $new_depth = scalar(@$contextArray);
839
840 print STDERR "**** START SECTION \n";
841
842 if ($new_depth == 1) {
843 $parser->{'parameters'}->{'curr_section_depth'} = 1;
844 $parser->{'parameters'}->{'curr_section_num'} = "";
845 }
846
847 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
848 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
849
850 my $new_secnum;
851
852 if ($new_depth > $old_depth) {
853 # child subsection
854 $new_secnum = "$old_secnum.1";
855 }
856 elsif ($new_depth == $old_depth) {
857 # sibling section => increase it's value by 1
858 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
859 $tail_num++;
860 $new_secnum = $old_secnum;
861 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
862 }
863 else {
864 # back up to parent section => lopp off tail
865 $new_secnum = $old_secnum;
866 $new_secnum =~ s/\.\d+$//;
867 }
868
869 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
870 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
871
872 1;
873}
874
875sub edit_xml_file
876{
877 my $self = shift @_;
878 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
879
880 # use XML::Rules to add it in (read in and out again)
881 my $parser = XML::Rules->new(start_rules => $start_rules,
882 rules => $rules,
883 style => 'filter',
884 output_encoding => 'utf8' );
885
886 my $xml_in = "";
887 if (!open(MIN,"<$filename")) {
888 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
889 }
890 else {
891 # Read all the text in
892 my $line;
893 while (defined ($line=<MIN>)) {
894 $xml_in .= $line;
895 }
896 close(MIN);
897
898 my $MOUT;
899 if (!open($MOUT,">$filename")) {
900 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
901 }
902 else {
903 # Matched lines will get handled by the call backs
904## my $xml_out = "";
905
906 binmode($MOUT,":utf8");
907 $parser->filter($xml_in,$MOUT, $options);
908
909# binmode(MOUT,":utf8");
910# print MOUT $xml_out;
911 close($MOUT);
912 }
913 }
914}
915
916sub edit_doc_xml
917{
918 my $self = shift @_;
919 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
920
921 my $info_mess = <<RAWEND;
922****************************
923 edit_doc_xml()
924****************************
925doc_xml_filename = $doc_xml_filename
926metaname = $metaname
927metavalue = $metavalue
928metapos = $metapos
929metamode = $metamode
930opt_secnum = $opt_secnum
931prevmetavalue = $prevmetavalue
932****************************
933RAWEND
934
935 $gsdl_cgi->generate_message($info_mess);
936
937 # To monitor which section/subsection number we are in
938 my @start_rules =
939 ( 'Section' => \&dxml_start_section );
940
941 # use XML::Rules to add it in (read in and out again)
942 # Set the call back functions
943 my @rules =
944 ( _default => 'raw',
945 'Metadata' => \&dxml_metadata,
946 'Description' => \&dxml_description);
947
948 # Sets the parameters
949 my $options = { 'metaname' => $metaname,
950 'metapos' => $metapos,
951 'metavalue' => $metavalue,
952 'metamode' => $metamode,
953 'prevmetavalue' => $prevmetavalue };
954
955 if (defined $opt_secnum) {
956 $options->{'secnum'} = $opt_secnum;
957 }
958
959 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
960}
961
962sub set_archives_metadata_entry
963{
964 my $self = shift @_;
965 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
966
967 my $info_mess = <<RAWEND;
968****************************
969 set_archives_metadata_entry()
970****************************
971archive_dir = $archive_dir
972collect_dir = $collect_dir
973collect = $collect
974infodbtype = $infodbtype
975docid = $docid
976metaname = $metaname
977metapos = $metapos
978metavalue = $metavalue
979metamode = $metamode
980prevmetavalue = $prevmetavalue
981****************************
982RAWEND
983
984 $gsdl_cgi->generate_message($info_mess);
985
986 # Obtain the doc.xml path for the specified docID
987 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
988
989 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
990 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
991 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
992
993 # The $doc_xml_file is relative to the archives, and now let's get the full path
994 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
995 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
996
997 # Edit the doc.xml file with the specified metadata name, value and position.
998 # TODO: there is a potential problem here as this edit_doc_xml function
999 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1000 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1001
1002 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1003 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1004
1005 return 0; # return 0 for now to indicate no error
1006}
1007
1008
1009sub set_archives_metadata
1010{
1011 my $self = shift @_;
1012
1013 my $username = $self->{'username'};
1014 my $collect = $self->{'collect'};
1015 my $gsdl_cgi = $self->{'gsdl_cgi'};
1016 my $gsdlhome = $self->{'gsdlhome'};
1017 my $infodbtype = $self->{'infodbtype'};
1018
1019 if ($baseaction::authentication_enabled) {
1020 # Ensure the user is allowed to edit this collection
1021 $self->authenticate_user($username, $collect);
1022 }
1023
1024 my $site = $self->{'site'};
1025
1026 # Obtain the collect and archive dir
1027 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1028
1029 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1030
1031 # Make sure the collection isn't locked by someone else
1032 $self->lock_collection($username, $collect);
1033
1034 # look up additional args
1035 my $docid = $self->{'d'};
1036 my $metaname = $self->{'metaname'};
1037 my $metavalue = $self->{'metavalue'};
1038 my $prevmetavalue = $self->{'prevmetavalue'};
1039
1040 my $metapos = $self->{'metapos'};
1041 $metapos = 0 if (!defined $metapos);
1042
1043 my $metamode = $self->{'metamode'};
1044 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1045 # make "accumulate" the default (less destructive, as won't actually
1046 # delete any existing values)
1047 $metamode = "accumulate";
1048 }
1049
1050 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1051 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1052
1053 # Release the lock once it is done
1054 $self->unlock_collection($username, $collect);
1055
1056 if ($status == 0) {
1057 my $mess = "set-archives-metadata successful: Key[$docid]\n";
1058 $mess .= " $metaname";
1059 $mess .= "->[$metapos]" if (defined $metapos);
1060 $mess .= " = $metavalue";
1061 $mess .= " ($metamode)\n";
1062
1063 $gsdl_cgi->generate_ok_message($mess);
1064 }
1065 else {
1066 my $mess .= "Failed to set archives metadata key: $docid\n";
1067 $mess .= "Exit status: $status\n";
1068 $mess .= "System Error Message: $!\n";
1069 $mess .= "-" x 20 . "\n";
1070
1071 $gsdl_cgi->generate_error($mess);
1072 }
1073}
1074
1075
1076sub set_archives_metadata_array
1077{
1078 my $self = shift @_;
1079
1080 my $username = $self->{'username'};
1081 my $collect = $self->{'collect'};
1082 my $gsdl_cgi = $self->{'gsdl_cgi'};
1083 my $gsdlhome = $self->{'gsdlhome'};
1084
1085 if ($baseaction::authentication_enabled) {
1086 # Ensure the user is allowed to edit this collection
1087 &authenticate_user($gsdl_cgi, $username, $collect);
1088 }
1089
1090 my $site = $self->{'site'};
1091 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1092
1093 $gsdl_cgi->checked_chdir($collect_dir);
1094
1095 # Obtain the collect dir
1096 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1097
1098 # Make sure the collection isn't locked by someone else
1099 $self->lock_collection($username, $collect);
1100
1101 # look up additional args
1102
1103 my $infodbtype = $self->{'infodbtype'};
1104
1105 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1106
1107 my $json_str = $self->{'json'};
1108 my $doc_array = decode_json $json_str;
1109
1110
1111 my $global_status = 0;
1112 my $global_mess = "";
1113
1114 my @all_docids = ();
1115
1116 foreach my $doc_array_rec ( @$doc_array ) {
1117
1118 my $docid = $doc_array_rec->{'docid'};
1119 my $metaname = $doc_array_rec->{'metaname'};
1120 my $metapos = $doc_array_rec->{'metapos'};
1121 my $metamode = $self->{'metamode'};
1122 my $metavalue = $doc_array_rec->{'metavalue'};
1123
1124 # Some sanity checks
1125 $metapos = 0 if (!defined $metapos);
1126
1127 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1128 # make "accumulate" the default (less destructive, as won't actually
1129 # delete any existing values)
1130 $metamode = "accumulate";
1131 }
1132
1133 push(@all_docids,$docid);
1134
1135 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1136 $metaname,$metapos,$metavalue,$metamode);
1137
1138 if ($status != 0) {
1139 # Catch error if set infodb entry failed
1140 $global_status = $status;
1141 $global_mess .= "Failed to set metadata key: $docid\n";
1142 $global_mess .= "Exit status: $status\n";
1143 $global_mess .= "System Error Message: $!\n";
1144 $global_mess .= "-" x 20 . "\n";
1145 }
1146 }
1147
1148 if ($global_status != 0) {
1149 $global_mess .= "PATH: $ENV{'PATH'}\n";
1150 $gsdl_cgi->generate_error($global_mess);
1151 }
1152 else {
1153 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1154 $gsdl_cgi->generate_ok_message($mess);
1155 }
1156
1157 # Release the lock once it is done
1158 $self->unlock_collection($username, $collect);
1159}
1160
1161sub remove_archives_metadata
1162{
1163 my $self = shift @_;
1164
1165 my $username = $self->{'username'};
1166 my $collect = $self->{'collect'};
1167 my $gsdl_cgi = $self->{'gsdl_cgi'};
1168 my $gsdlhome = $self->{'gsdlhome'};
1169 my $infodbtype = $self->{'infodbtype'};
1170
1171 if ($baseaction::authentication_enabled)
1172 {
1173 # Ensure the user is allowed to edit this collection
1174 &authenticate_user($gsdl_cgi, $username, $collect);
1175 }
1176
1177 my $site = $self->{'site'};
1178
1179 # Obtain the collect and archive dir
1180 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1181
1182 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1183
1184 # Make sure the collection isn't locked by someone else
1185 $self->lock_collection($username, $collect);
1186
1187 # look up additional args
1188 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1189
1190 my $metaname = $self->{'metaname'};
1191 my $metapos = $self->{'metapos'};
1192 $metapos = 0 if (!defined $metapos);
1193
1194 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1195 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1196
1197 # This now stores the full pathname
1198 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1199
1200 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1201
1202 # Release the lock once it is done
1203 $self->unlock_collection($username, $collect);
1204
1205 if ($status == 0)
1206 {
1207 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1208 $mess .= " $metaname";
1209 $mess .= "->[$metapos]" if (defined $metapos);
1210
1211 $gsdl_cgi->generate_ok_message($mess);
1212 }
1213 else
1214 {
1215 my $mess .= "Failed to remove archives metadata key: $docid\n";
1216 $mess .= "Exit status: $status\n";
1217 $mess .= "System Error Message: $!\n";
1218 $mess .= "-" x 20 . "\n";
1219
1220 $gsdl_cgi->generate_error($mess);
1221 }
1222}
1223
1224sub remove_from_doc_xml
1225{
1226 my $self = shift @_;
1227 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
1228
1229 my @start_rules = ('Section' => \&dxml_start_section);
1230
1231 # Set the call-back functions for the metadata tags
1232 my @rules =
1233 (
1234 _default => 'raw',
1235 'Metadata' => \&rfdxml_metadata
1236 );
1237
1238 my $parser = XML::Rules->new
1239 (
1240 start_rules => \@start_rules,
1241 rules => \@rules,
1242 style => 'filter',
1243 output_encoding => 'utf8'
1244 );
1245
1246 my $status = 0;
1247 my $xml_in = "";
1248 if (!open(MIN,"<$doc_xml_filename"))
1249 {
1250 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1251 $status = 1;
1252 }
1253 else
1254 {
1255 # Read them in
1256 my $line;
1257 while (defined ($line=<MIN>)) {
1258 $xml_in .= $line;
1259 }
1260 close(MIN);
1261
1262 # Filter with the call-back functions
1263 my $xml_out = "";
1264
1265 my $MOUT;
1266 if (!open($MOUT,">$doc_xml_filename")) {
1267 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1268 $status = 1;
1269 }
1270 else {
1271 binmode($MOUT,":utf8");
1272 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
1273 close($MOUT);
1274 }
1275 }
1276 return $status;
1277}
1278
1279sub rfdxml_metadata
1280{
1281 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1282
1283 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1284 {
1285 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1286 return [$tagname => $attrHash];
1287 }
1288
1289 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1290 {
1291 if (!defined $parser->{'parameters'}->{'poscount'})
1292 {
1293 $parser->{'parameters'}->{'poscount'} = 0;
1294 }
1295 else
1296 {
1297 $parser->{'parameters'}->{'poscount'}++;
1298 }
1299 }
1300
1301 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1302 {
1303 return [];
1304 }
1305
1306 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1307 {
1308 return [];
1309 }
1310
1311 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1312 return [$tagname => $attrHash];
1313}
1314
1315sub mxml_metadata
1316{
1317 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1318 my $metaname = $parser->{'parameters'}->{'metaname'};
1319 my $metamode = $parser->{'parameters'}->{'metamode'};
1320
1321 # Report error if we don't see FileName tag before this
1322 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1323
1324 # Don't do anything if we are not in the right FileSet
1325 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1326 if ($file_regexp =~ /\.\*/) {
1327 # Only interested in a file_regexp if it specifies precisely one
1328 # file.
1329 # So, skip anything with a .* in it as it is too general
1330 return [$tagname => $attrHash];
1331 }
1332 my $src_file = $parser->{'parameters'}->{'src_file'};
1333 if (!($src_file =~ /$file_regexp/)) {
1334 return [$tagname => $attrHash];
1335 }
1336## print STDERR "*** mxl metamode = $metamode\n";
1337
1338 # Find the right metadata tag and checks if we are going to override it
1339 my $name_attr = $attrHash->{'name'};
1340 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1341 # Get the value and override the current value
1342 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1343 $attrHash->{'_content'} = $metavalue;
1344
1345## print STDERR "**** overrideing metadata.xml\n";
1346
1347 # Don't want it to wipe out any other pieces of metadata
1348 $parser->{'parameters'}->{'metamode'} = "done";
1349 }
1350
1351 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1352 return [$tagname => $attrHash];
1353}
1354
1355
1356sub mxml_description
1357{
1358 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1359 my $metamode = $parser->{'parameters'}->{'metamode'};
1360
1361 # Failed... Report error if we don't see FileName tag before this
1362 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1363
1364 # Don't do anything if we are not in the right FileSet
1365 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1366 if ($file_regexp =~ m/\.\*/) {
1367 # Only interested in a file_regexp if it specifies precisely one
1368 # file.
1369 # So, skip anything with a .* in it as it is too general
1370 return [$tagname => $attrHash];
1371 }
1372 my $src_file = $parser->{'parameters'}->{'src_file'};
1373
1374 if (!($src_file =~ m/$file_regexp/)) {
1375 return [$tagname => $attrHash];
1376 }
1377
1378 # Accumulate the metadata block to the end of the description block
1379 # Note: This adds metadata block to all description blocks, so if there are
1380 # multiple FileSets, it will add to all of them
1381 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1382 # if metamode was "override" but get to here then it failed to
1383 # find an item to override, in which case it should append its
1384 # value to the end, just like the "accumulate" mode
1385
1386 # tack a new metadata tag on to the end of the <Metadata>+ block
1387 my $metaname = $parser->{'parameters'}->{'metaname'};
1388 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1389
1390 my $metadata_attr = { '_content' => $metavalue,
1391 'name' => $metaname,
1392 'mode' => "accumulate" };
1393
1394 my $append_metadata = [ "Metadata" => $metadata_attr ];
1395 my $description_content = $attrHash->{'_content'};
1396
1397## print STDERR "*** appending to metadata.xml\n";
1398
1399 # append the new metadata element to the end of the current
1400 # content contained inside this tag
1401 if (ref($description_content) eq "") {
1402 # => string or numeric literal
1403 # this is caused by a <Description> block has no <Metadata> child elements
1404 # => set up an empty array in '_content'
1405 $attrHash->{'_content'} = [ "\n" ];
1406 $description_content = $attrHash->{'_content'};
1407 }
1408
1409 push(@$description_content," ", $append_metadata ,"\n ");
1410 $parser->{'parameters'}->{'metamode'} = "done";
1411 }
1412
1413 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1414 return [$tagname => $attrHash];
1415}
1416
1417
1418sub mxml_filename
1419{
1420 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1421
1422 # Store the filename of the Current Fileset
1423 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1424 # FileName tag must come before Description tag
1425 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1426
1427 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1428 return [$tagname => $attrHash];
1429}
1430
1431
1432sub mxml_fileset
1433{
1434 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1435
1436 # Initilise the current_file
1437 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1438 # FileName tag must come before Description tag
1439 $parser->{'parameters'}->{'current_file'} = "";
1440
1441 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1442 return [$tagname => $attrHash];
1443}
1444
1445
1446sub edit_metadata_xml
1447{
1448 my $self = shift @_;
1449 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
1450
1451 # Set the call-back functions for the metadata tags
1452 my @rules =
1453 ( _default => 'raw',
1454 'FileName' => \&mxml_filename,
1455 'Metadata' => \&mxml_metadata,
1456 'Description' => \&mxml_description,
1457 'FileSet' => \&mxml_fileset);
1458
1459 # use XML::Rules to add it in (read in and out again)
1460 my $parser = XML::Rules->new(rules => \@rules,
1461 style => 'filter',
1462 output_encoding => 'utf8');
1463
1464 if (!-e $metadata_xml_filename) {
1465
1466 if (open(MOUT,">$metadata_xml_filename")) {
1467
1468 my $src_file_re = &util::filename_to_regex($src_file);
1469 # shouldn't the following also be in the above utility routine??
1470 # $src_file_re =~ s/\./\\./g;
1471
1472 print MOUT "<?xml version=\"1.0\"?>\n";
1473 print MOUT "<DirectoryMetadata>\n";
1474 print MOUT " <FileSet>\n";
1475 print MOUT " <FileName>$src_file_re</FileName>\n";
1476 print MOUT " <Description>\n";
1477 print MOUT " </Description>\n";
1478 print MOUT " </FileSet>\n";
1479 print MOUT "</DirectoryMetadata>\n";
1480
1481 close(MOUT);
1482 }
1483 else {
1484 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1485 }
1486 }
1487
1488
1489 my $xml_in = "";
1490 if (!open(MIN,"<$metadata_xml_filename")) {
1491 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1492 }
1493 else {
1494 # Read them in
1495 my $line;
1496 while (defined ($line=<MIN>)) {
1497 $xml_in .= $line;
1498 }
1499 close(MIN);
1500
1501 # Filter with the call-back functions
1502 my $xml_out = "";
1503
1504 my $MOUT;
1505 if (!open($MOUT,">$metadata_xml_filename")) {
1506 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1507 }
1508 else {
1509 binmode($MOUT,":utf8");
1510
1511 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1512 # At the moment, I will just hack it!
1513 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1514 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1515 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1516 #print MOUT $xml_out;
1517
1518 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1519 metavalue => $metavalue,
1520 metamode => $metamode,
1521 src_file => $src_file,
1522 current_file => undef} );
1523 close($MOUT);
1524 }
1525 }
1526}
1527
1528
1529sub set_import_metadata
1530{
1531 my $self = shift @_;
1532
1533 my $username = $self->{'username'};
1534 my $collect = $self->{'collect'};
1535 my $gsdl_cgi = $self->{'gsdl_cgi'};
1536 my $gsdlhome = $self->{'gsdlhome'};
1537 my $infodbtype = $self->{'infodbtype'};
1538
1539 if ($baseaction::authentication_enabled) {
1540 # Ensure the user is allowed to edit this collection
1541 $self->authenticate_user($username, $collect);
1542 }
1543
1544
1545 # Obtain the collect and archive dir
1546 my $site = $self->{'site'};
1547 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1548
1549 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1550 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1551
1552 # Make sure the collection isn't locked by someone else
1553 $self->lock_collection($username, $collect);
1554
1555 # look up additional args
1556 # want either d= or f=
1557 my $docid = $self->{'d'};
1558 my $import_file = $self->{'f'};
1559 if ((!defined $docid) && (!defined $import_file)) {
1560 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1561 }
1562
1563 # Get the parameters and set default mode to "accumulate"
1564 my $metaname = $self->{'metaname'};
1565 my $metavalue = $self->{'metavalue'};
1566## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1567 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1568
1569 my $metamode = $self->{'metamode'};
1570 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1571 # make "accumulate" the default (less destructive, as won't actually
1572 # delete any existing values)
1573 $metamode = "accumulate";
1574 }
1575
1576 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1577 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1578 my $metadata_xml_file;
1579 my $import_filename = undef;
1580 if (defined $docid) {
1581 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1582 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1583
1584 # This now stores the full pathname
1585 $import_filename = $doc_rec->{'src-file'}->[0];
1586 }
1587 else {
1588 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1589 }
1590
1591 # figure out correct metadata.xml file [?]
1592 # Assuming the metadata.xml file is next to the source file
1593 # Note: This will not work if it is using the inherited metadata from the parent folder
1594 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1595 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1596
1597 # Edit the metadata.xml
1598 # Modified by Jeffrey from DL Consulting
1599 # Handle the case where there is one metadata.xml file for multiple FileSets
1600 # The XML filter needs to know whether it is in the right FileSet
1601 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1602 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1603 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1604 $metaname, $metavalue, $metamode, $import_tailname);
1605
1606 # Release the lock once it is done
1607 $self->unlock_collection($username, $collect);
1608
1609 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1610 $mess .= " $metaname";
1611 $mess .= " = $metavalue";
1612 $mess .= " ($metamode)\n";
1613
1614 $gsdl_cgi->generate_ok_message($mess);
1615
1616}
1617
1618sub set_import_metadata_array
1619{
1620 my $self = shift @_;
1621
1622 my $username = $self->{'username'};
1623 my $collect = $self->{'collect'};
1624 my $gsdl_cgi = $self->{'gsdl_cgi'};
1625 my $gsdlhome = $self->{'gsdlhome'};
1626
1627 if ($baseaction::authentication_enabled) {
1628 # Ensure the user is allowed to edit this collection
1629 &authenticate_user($gsdl_cgi, $username, $collect);
1630 }
1631
1632 my $site = $self->{'site'};
1633 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1634
1635 $gsdl_cgi->checked_chdir($collect_dir);
1636
1637 # Make sure the collection isn't locked by someone else
1638 $self->lock_collection($username, $collect);
1639
1640 # look up additional args
1641
1642 my $infodbtype = $self->{'infodbtype'};
1643
1644 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1645
1646 my $json_str = $self->{'json'};
1647 my $doc_array = decode_json $json_str;
1648
1649 my $global_status = 0;
1650 my $global_mess = "";
1651
1652 my @all_docids = ();
1653
1654 foreach my $doc_array_rec ( @$doc_array )
1655 {
1656 my $docid = $doc_array_rec->{'docid'};
1657 my $metaname = $doc_array_rec->{'metaname'};
1658 my $metamode = $self->{'metamode'};
1659 my $metavalue = $doc_array_rec->{'metavalue'};
1660
1661 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1662 # make "accumulate" the default (less destructive, as won't actually
1663 # delete any existing values)
1664 $metamode = "accumulate";
1665 }
1666
1667 push(@all_docids,$docid);
1668
1669 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1670 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1671 my $metadata_xml_file;
1672 my $import_filename = undef;
1673
1674 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1675 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1676
1677 # This now stores the full pathname
1678 $import_filename = $doc_rec->{'src-file'}->[0];
1679
1680 # figure out correct metadata.xml file [?]
1681 # Assuming the metadata.xml file is next to the source file
1682 # Note: This will not work if it is using the inherited metadata from the parent folder
1683 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1684 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1685
1686 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1687 }
1688
1689 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1690 $gsdl_cgi->generate_ok_message($mess);
1691
1692 # Release the lock once it is done
1693 $self->unlock_collection($username, $collect);
1694}
1695
1696sub remove_import_metadata
1697{
1698 my $self = shift @_;
1699
1700 my $username = $self->{'username'};
1701 my $collect = $self->{'collect'};
1702 my $gsdl_cgi = $self->{'gsdl_cgi'};
1703
1704 if ($baseaction::authentication_enabled) {
1705 # Ensure the user is allowed to edit this collection
1706 &authenticate_user($gsdl_cgi, $username, $collect);
1707 }
1708
1709 my $gsdlhome = $self->{'gsdlhome'};
1710 my $infodbtype = $self->{'infodbtype'};
1711
1712 # Obtain the collect dir
1713 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1714 my $site = $self->{'site'};
1715 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1716
1717 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1718 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1719
1720 # Make sure the collection isn't locked by someone else
1721 $self->lock_collection($username, $collect);
1722
1723 # look up additional args
1724 my $docid = $self->{'d'};
1725 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1726 {
1727 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
1728 }
1729
1730 my $metaname = $self->{'metaname'};
1731 my $metavalue = $self->{'metavalue'};
1732 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1733
1734 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1735 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1736 my $metadata_xml_file;
1737 my $import_filename = undef;
1738 if (defined $docid)
1739 {
1740 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1741 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1742
1743 # This now stores the full pathname
1744 $import_filename = $doc_rec->{'src-file'}->[0];
1745 }
1746
1747 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1748 {
1749 $gsdl_cgi->generate_error_message("There is no metadata\n");
1750 }
1751
1752 # figure out correct metadata.xml file [?]
1753 # Assuming the metadata.xml file is next to the source file
1754 # Note: This will not work if it is using the inherited metadata from the parent folder
1755 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1756 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1757
1758 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1759
1760 # Release the lock once it is done
1761 $self->unlock_collection($username, $collect);
1762
1763 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1764 $mess .= " $metaname";
1765 $mess .= " = $metavalue\n";
1766
1767 $gsdl_cgi->generate_ok_message($mess);
1768}
1769
1770sub remove_from_metadata_xml
1771{
1772 my $self = shift @_;
1773 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
1774
1775 # Set the call-back functions for the metadata tags
1776 my @rules =
1777 (
1778 _default => 'raw',
1779 'Metadata' => \&rfmxml_metadata,
1780 'FileName' => \&mxml_filename
1781 );
1782
1783 my $parser = XML::Rules->new
1784 (
1785 rules => \@rules,
1786 style => 'filter',
1787 output_encoding => 'utf8'
1788 );
1789
1790 my $xml_in = "";
1791 if (!open(MIN,"<$metadata_xml_filename"))
1792 {
1793 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1794 }
1795 else
1796 {
1797 # Read them in
1798 my $line;
1799 while (defined ($line=<MIN>)) {
1800 $xml_in .= $line;
1801 }
1802 close(MIN);
1803
1804 # Filter with the call-back functions
1805 my $xml_out = "";
1806
1807 my $MOUT;
1808 if (!open($MOUT,">$metadata_xml_filename")) {
1809 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1810 }
1811 else {
1812 binmode($MOUT,":utf8");
1813 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
1814 close($MOUT);
1815 }
1816 }
1817}
1818
1819sub rfmxml_metadata
1820{
1821 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1822
1823 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
1824 {
1825 return [];
1826 }
1827
1828 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1829 return [$tagname => $attrHash];
1830}
1831
1832sub remove_live_metadata
1833{
1834 my $self = shift @_;
1835
1836 my $username = $self->{'username'};
1837 my $collect = $self->{'collect'};
1838 my $gsdl_cgi = $self->{'gsdl_cgi'};
1839 my $gsdlhome = $self->{'gsdlhome'};
1840 my $infodbtype = $self->{'infodbtype'};
1841
1842 if ($baseaction::authentication_enabled) {
1843 # Ensure the user is allowed to edit this collection
1844 &authenticate_user($gsdl_cgi, $username, $collect);
1845 }
1846
1847 # Obtain the collect dir
1848 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1849 my $site = $self->{'site'};
1850 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1851
1852 # Make sure the collection isn't locked by someone else
1853 $self->lock_collection($username, $collect);
1854
1855 # look up additional args
1856 my $docid = $self->{'d'};
1857 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1858 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1859 }
1860
1861 # Generate the dbkey
1862 my $metaname = $self->{'metaname'};
1863 my $dbkey = "$docid.$metaname";
1864
1865 # To people who know $collect_tail please add some comments
1866 # Obtain the live gdbm_db path
1867 my $collect_tail = $collect;
1868 $collect_tail =~ s/^.*[\/\\]//;
1869 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1870 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1871
1872 # Remove the key
1873 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
1874 my $status = system($cmd);
1875 if ($status != 0) {
1876 # Catch error if gdbmdel failed
1877 my $mess = "Failed to set metadata key: $dbkey\n";
1878
1879 $mess .= "PATH: $ENV{'PATH'}\n";
1880 $mess .= "cmd = $cmd\n";
1881 $mess .= "Exit status: $status\n";
1882 $mess .= "System Error Message: $!\n";
1883
1884 $gsdl_cgi->generate_error($mess);
1885 }
1886 else {
1887 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1888 }
1889
1890}
1891
1892
1893sub remove_metadata
1894{
1895 my $self = shift @_;
1896
1897 my $username = $self->{'username'};
1898 my $collect = $self->{'collect'};
1899 my $gsdl_cgi = $self->{'gsdl_cgi'};
1900 my $gsdlhome = $self->{'gsdlhome'};
1901 my $infodbtype = $self->{'infodbtype'};
1902
1903 if ($baseaction::authentication_enabled) {
1904 # Ensure the user is allowed to edit this collection
1905 &authenticate_user($gsdl_cgi, $username, $collect);
1906 }
1907
1908 # Obtain the collect dir
1909 my $site = $self->{'site'};
1910 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1911 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1912
1913 # Make sure the collection isn't locked by someone else
1914 $self->lock_collection($username, $collect);
1915
1916 # look up additional args
1917 my $docid = $self->{'d'};
1918 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1919 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1920 }
1921 my $metaname = $self->{'metaname'};
1922 my $metapos = $self->{'metapos'};
1923 my $metavalue = $self->{'metavalue'};
1924
1925 # To people who know $collect_tail please add some comments
1926 # Obtain the path to the database
1927 my $collect_tail = $collect;
1928 $collect_tail =~ s/^.*[\/\\]//;
1929 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1930 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1931
1932 # Read the docid entry
1933 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1934
1935 # Basically loop through and unescape_html the values
1936 foreach my $k (keys %$doc_rec) {
1937 my @escaped_v = ();
1938 foreach my $v (@{$doc_rec->{$k}}) {
1939 if ($k eq "contains") {
1940 # protect quotes in ".2;".3 etc
1941 $v =~ s/\"/\\\"/g;
1942 push(@escaped_v, $v);
1943 }
1944 else {
1945 my $ev = &ghtml::unescape_html($v);
1946 $ev =~ s/\"/\\\"/g;
1947 push(@escaped_v, $ev);
1948 }
1949 }
1950 $doc_rec->{$k} = \@escaped_v;
1951 }
1952
1953 # Check to make sure the key does exist
1954 if (!defined ($doc_rec->{$metaname})) {
1955 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1956 }
1957
1958 # Obtain the specified metadata pos
1959 $metapos = 0 if (!defined $metapos);
1960
1961 # consider check key is defined before deleting?
1962 # Loop through the metadata array and ignore the specified position
1963 my $filtered_metadata = [];
1964 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
1965 for (my $i=0; $i<$num_metadata_vals; $i++) {
1966 my $metaval = shift(@{$doc_rec->{$metaname}});
1967
1968 if (!defined $metavalue && $i != $metapos) {
1969 push(@$filtered_metadata,$metaval)
1970 }
1971
1972 if(defined $metavalue && !($metavalue eq $metaval))
1973 {
1974 push(@$filtered_metadata,$metavalue)
1975 }
1976 }
1977 $doc_rec->{$metaname} = $filtered_metadata;
1978
1979 # Turn the record back to string
1980 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1981
1982 # Store it back to the database
1983 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1984 my $status = system($cmd);
1985 if ($status != 0) {
1986 my $mess = "Failed to set metadata key: $docid\n";
1987
1988 $mess .= "PATH: $ENV{'PATH'}\n";
1989 $mess .= "cmd = $cmd\n";
1990 $mess .= "Exit status: $status\n";
1991 $mess .= "System Error Message: $!\n";
1992
1993 $gsdl_cgi->generate_error($mess);
1994 }
1995 else {
1996 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1997 $mess .= " $metaname";
1998 $mess .= "->[$metapos]" if (defined $metapos);
1999
2000 $gsdl_cgi->generate_ok_message($mess);
2001 }
2002}
2003
2004
2005# Was trying to reused the codes, but the functions need to be broken
2006# down more before they can be reused, otherwise there will be too
2007# much overhead and duplicate process...
2008sub insert_metadata
2009{
2010 my $self = shift @_;
2011
2012 my $username = $self->{'username'};
2013 my $collect = $self->{'collect'};
2014 my $gsdl_cgi = $self->{'gsdl_cgi'};
2015 my $gsdlhome = $self->{'gsdlhome'};
2016 my $infodbtype = $self->{'infodbtype'};
2017
2018 # If the import metadata and gdbm database have been updated, we
2019 # need to insert some notification to warn user that the the text
2020 # they see at the moment is not indexed and require a rebuild.
2021 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2022
2023 if ($baseaction::authentication_enabled) {
2024 # Ensure the user is allowed to edit this collection
2025 $self->authenticate_user($username, $collect);
2026 }
2027
2028 # Obtain the collect and archive dir
2029 my $site = $self->{'site'};
2030 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2031 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2032 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2033
2034 # Make sure the collection isn't locked by someone else
2035 $self->lock_collection($username, $collect);
2036
2037 # Check additional args
2038 my $docid = $self->{'d'};
2039 if (!defined($docid)) {
2040 $gsdl_cgi->generate_error("No document id is specified: d=...");
2041 }
2042 my $metaname = $self->{'metaname'};
2043 if (!defined($metaname)) {
2044 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2045 }
2046 my $metavalue = $self->{'metavalue'};
2047 if (!defined($metavalue) || $metavalue eq "") {
2048 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2049 }
2050 # make "accumulate" the default (less destructive, as won't actually
2051 # delete any existing values)
2052 my $metamode = "accumulate";
2053
2054 #=======================================================================#
2055 # set_import_metadata [START]
2056 #=======================================================================#
2057 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2058 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2059 my $metadata_xml_file;
2060 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2061 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2062
2063 # This now stores the full pathname
2064 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2065
2066 # figure out correct metadata.xml file [?]
2067 # Assuming the metadata.xml file is next to the source file
2068 # Note: This will not work if it is using the inherited metadata from the parent folder
2069 my ($import_tailname, $import_dirname)
2070 = File::Basename::fileparse($import_filename);
2071 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2072
2073 # Shane's escape characters
2074 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2075 $metavalue =~ s/\,/&#44;/g;
2076 $metavalue =~ s/\:/&#58;/g;
2077 $metavalue =~ s/\|/&#124;/g;
2078 $metavalue =~ s/\(/&#40;/g;
2079 $metavalue =~ s/\)/&#41;/g;
2080 $metavalue =~ s/\[/&#91;/g;
2081 $metavalue =~ s/\\/&#92;/g;
2082 $metavalue =~ s/\]/&#93;/g;
2083 $metavalue =~ s/\{/&#123;/g;
2084 $metavalue =~ s/\}/&#125;/g;
2085 $metavalue =~ s/\"/&#34;/g;
2086 $metavalue =~ s/\`/&#96;/g;
2087 $metavalue =~ s/\n/_newline_/g;
2088
2089 # Edit the metadata.xml
2090 # Modified by Jeffrey from DL Consulting
2091 # Handle the case where there is one metadata.xml file for multiple FileSets
2092 # The XML filter needs to know whether it is in the right FileSet
2093 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2094 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2095 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2096 $metaname, $metavalue, $metamode, $import_tailname);
2097 #=======================================================================#
2098 # set_import_metadata [END]
2099 #=======================================================================#
2100
2101
2102 #=======================================================================#
2103 # set_metadata (accumulate version) [START]
2104 #=======================================================================#
2105 # To people who know $collect_tail please add some comments
2106 # Obtain path to the database
2107 my $collect_tail = $collect;
2108 $collect_tail =~ s/^.*[\/\\]//;
2109 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2110 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2111
2112 # Read the docid entry
2113 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2114
2115 foreach my $k (keys %$doc_rec) {
2116 my @escaped_v = ();
2117 foreach my $v (@{$doc_rec->{$k}}) {
2118 if ($k eq "contains") {
2119 # protect quotes in ".2;".3 etc
2120 $v =~ s/\"/\\\"/g;
2121 push(@escaped_v, $v);
2122 }
2123 else {
2124 my $ev = &ghtml::unescape_html($v);
2125 $ev =~ s/\"/\\\"/g;
2126 push(@escaped_v, $ev);
2127 }
2128 }
2129 $doc_rec->{$k} = \@escaped_v;
2130 }
2131
2132 # Protect the quotes
2133 $metavalue =~ s/\"/\\\"/g;
2134
2135 # Adds the pending macro
2136 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2137
2138 # If the metadata doesn't exist, create a new one
2139 if (!defined($doc_rec->{$metaname})){
2140 $doc_rec->{$metaname} = [ $macro_metavalue ];
2141 }
2142 # Else, let's acculumate the values
2143 else {
2144 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2145 }
2146
2147 # Generate the record string
2148 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2149
2150 # Store it into GDBM
2151 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2152 my $status = system($cmd);
2153 if ($status != 0) {
2154 # Catch error if gdbmget failed
2155 my $mess = "Failed to set metadata key: $docid\n";
2156
2157 $mess .= "PATH: $ENV{'PATH'}\n";
2158 $mess .= "cmd = $cmd\n";
2159 $mess .= "Exit status: $status\n";
2160 $mess .= "System Error Message: $!\n";
2161
2162 $gsdl_cgi->generate_error($mess);
2163 }
2164 else {
2165 my $mess = "insert-metadata successful: Key[$docid]\n";
2166 $mess .= " [In metadata.xml] $metaname";
2167 $mess .= " = $metavalue\n";
2168 $mess .= " [In database] $metaname";
2169 $mess .= " = $macro_metavalue\n";
2170 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2171 $gsdl_cgi->generate_ok_message($mess);
2172 }
2173 #=======================================================================#
2174 # set_metadata (accumulate version) [END]
2175 #=======================================================================#
2176
2177 # Release the lock once it is done
2178 $self->unlock_collection($username, $collect);
2179}
2180
21811;
Note: See TracBrowser for help on using the repository browser.