source: gs2-extensions/parallel-building/trunk/src/perllib/cgiactions/metadataaction.pm@ 24626

Last change on this file since 24626 was 24626, checked in by jmt12, 13 years ago

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

File size: 46.7 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43
44@metadataaction::ISA = ('baseaction');
45
46
47my $action_table =
48{
49 "get-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [] },
51
52 "get-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
53 'optional-args' => [ "metapos" ] },
54
55 "set-live-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
56 'optional-args' => [ ] },
57
58 "set-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
59 'optional-args' => [ "metapos" ] },
60
61 "set-metadata-array" => { 'compulsory-args' => [ "json" ],
62 'optional-args' => [ ] },
63
64 "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
65 'optional-args' => [ "metapos", "metamode" ]
66 # metamode can be "accumulate", "override",
67 },
68
69 "set-archives-metadata-array" => { 'compulsory-args' => [ "json" ],
70 'optional-args' => [ "metamode" ]
71 },
72
73 "set-import-metadata" => { 'compulsory-args' => [ "metaname", "metavalue" ],
74 'optional-args' => [ "d", "f", "metamode" ]
75 # metamode can be "accumulate", "override", or "unique-id"
76 },
77
78
79 "remove-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
80 'optional-args' => [ ] },
81
82 "remove-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
83 'optional-args' => [ "metapos" ] },
84
85 "insert-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
86 'optional-args' => [ ]
87 }
88};
89
90
91sub new
92{
93 my $class = shift (@_);
94 my ($gsdl_cgi,$iis6_mode) = @_;
95
96 # Treat metavalue specially. To transmit this through a GET request
97 # the Javascript side has url-encoded it, so here we need to decode
98 # it before proceeding
99
100 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
101 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
102
103 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
104
105 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
106
107 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
108
109 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
110
111 return bless $self, $class;
112}
113
114
115sub get_live_metadata
116{
117 my $self = shift @_;
118
119 my $username = $self->{'username'};
120 my $collect = $self->{'collect'};
121 my $gsdl_cgi = $self->{'gsdl_cgi'};
122 my $gsdlhome = $self->{'gsdlhome'};
123 my $infodbtype = $self->{'infodbtype'};
124
125 # live metadata gets/saves value scoped (prefixed) by the current usename
126 # so (for now) let's not bother to enforce authentication
127
128 # Obtain the collect dir
129 my $site = $self->{'site'};
130 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
131 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
132
133 # Make sure the collection isn't locked by someone else
134 $self->lock_collection($username, $collect);
135
136 # look up additional args
137 my $docid = $self->{'d'};
138 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
139 $gsdl_cgi->generate_error("No docid (d=...) specified.");
140 }
141
142 # Generate the dbkey
143 my $metaname = $self->{'metaname'};
144 my $dbkey = "$docid.$metaname";
145
146 # To people who know $collect_tail please add some comments
147 # Obtain path to the database
148 my $collect_tail = $collect;
149 $collect_tail =~ s/^.*[\/|\\]//;
150 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
151 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
152
153 # Obtain the content of the key
154 my $cmd = "gdbmget $infodb_file_path $dbkey";
155 if (open(GIN,"$cmd |") == 0) {
156 # Catch error if gdbmget failed
157 my $mess = "Failed to get metadata key: $metaname\n";
158 $mess .= "$!\n";
159
160 $gsdl_cgi->generate_error($mess);
161 }
162 else {
163 binmode(GIN,":utf8");
164 # Read everything in and concatenate them into $metavalue
165 my $metavalue = "";
166 my $line;
167 while (defined ($line=<GIN>)) {
168 $metavalue .= $line;
169 }
170 close(GIN);
171 chomp($metavalue); # Get rid off the tailing newlines
172 $gsdl_cgi->generate_ok_message("$metavalue");
173 }
174
175 # Release the lock once it is done
176 $self->unlock_collection($username, $collect);
177}
178
179
180sub get_metadata
181{
182 my $self = shift @_;
183
184 my $username = $self->{'username'};
185 my $collect = $self->{'collect'};
186 my $gsdl_cgi = $self->{'gsdl_cgi'};
187 my $gsdlhome = $self->{'gsdlhome'};
188
189 # Authenticate user if it is enabled
190 if ($baseaction::authentication_enabled) {
191 # Ensure the user is allowed to edit this collection
192 &authenticate_user($gsdl_cgi, $username, $collect);
193 }
194
195 # Obtain the collect dir
196 my $site = $self->{'site'};
197 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
198 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
199
200 # Make sure the collection isn't locked by someone else
201 $self->lock_collection($username, $collect);
202
203 # look up additional args
204 my $docid = $self->{'d'};
205 my $metaname = $self->{'metaname'};
206 my $metapos = $self->{'metapos'};
207 my $infodbtype = $self->{'infodbtype'};
208
209 # To people who know $collect_tail please add some comments
210 # Obtain path to the database
211 my $collect_tail = $collect;
212 $collect_tail =~ s/^.*[\/\\]//;
213 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
214 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
215
216 # Read the docid entry
217 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
218
219 # Basically loop through and unescape_html the values
220 foreach my $k (keys %$doc_rec) {
221 my @escaped_v = ();
222 foreach my $v (@{$doc_rec->{$k}}) {
223 my $ev = &ghtml::unescape_html($v);
224 push(@escaped_v, $ev);
225 }
226 $doc_rec->{$k} = \@escaped_v;
227 }
228
229 # Obtain the specified metadata value
230 $metapos = 0 if (!defined $metapos);
231 my $metavalue = $doc_rec->{$metaname}->[$metapos];
232 $gsdl_cgi->generate_ok_message("$metavalue");
233
234 # Release the lock once it is done
235 $self->unlock_collection($username, $collect);
236}
237
238
239sub set_live_metadata
240{
241 my $self = shift @_;
242
243 my $username = $self->{'username'};
244 my $collect = $self->{'collect'};
245 my $gsdl_cgi = $self->{'gsdl_cgi'};
246 my $gsdlhome = $self->{'gsdlhome'};
247 my $infodbtype = $self->{'infodbtype'};
248
249 if ($baseaction::authentication_enabled) {
250 # Ensure the user is allowed to edit this collection
251 &authenticate_user($gsdl_cgi, $username, $collect);
252 }
253
254 # Obtain the collect dir
255 my $site = $self->{'site'};
256 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
257 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
258
259 # Make sure the collection isn't locked by someone else
260 $self->lock_collection($username, $collect);
261
262 # look up additional args
263 my $docid = $self->{'d'};
264 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
265 $gsdl_cgi->generate_error("No docid (d=...) specified.");
266 }
267 my $metavalue = $self->{'metavalue'};
268
269
270 # Generate the dbkey
271 my $metaname = $self->{'metaname'};
272 my $dbkey = "$docid.$metaname";
273
274 # To people who know $collect_tail please add some comments
275 # Obtain path to the database
276 my $collect_tail = $collect;
277 $collect_tail =~ s/^.*[\/\\]//;
278 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
279 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
280
281 # Set the new value
282 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
283 my $status = system($cmd);
284 if ($status != 0) {
285 # Catch error if gdbmget failed
286 my $mess = "Failed to set metadata key: $dbkey\n";
287
288 $mess .= "PATH: $ENV{'PATH'}\n";
289 $mess .= "cmd = $cmd\n";
290 $mess .= "Exit status: $status\n";
291 $mess .= "System Error Message: $!\n";
292
293 $gsdl_cgi->generate_error($mess);
294 }
295 else {
296 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
297 }
298
299 # Release the lock once it is done
300 $self->unlock_collection($username, $collect);
301}
302
303sub set_metadata_entry
304{
305 my $self = shift @_;
306 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
307
308 # To people who know $collect_tail please add some comments
309 # Obtain path to the database
310 my $collect_tail = $collect;
311 $collect_tail =~ s/^.*[\/\\]//;
312 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
313 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
314
315# print STDERR "**** infodb file path = $infodb_file_path\n";
316# print STDERR "***** infodb type = $infodbtype\n";
317
318 # Read the docid entry
319 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
320
321 # Set the metadata value
322 if (defined $metapos) {
323 $doc_rec->{$metaname}->[$metapos] = $metavalue;
324 }
325 else {
326 $doc_rec->{$metaname} = [ $metavalue ];
327 }
328
329 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
330
331 return $status;
332
333}
334
335sub set_metadata
336{
337 my $self = shift @_;
338
339 my $username = $self->{'username'};
340 my $collect = $self->{'collect'};
341 my $gsdl_cgi = $self->{'gsdl_cgi'};
342 my $gsdlhome = $self->{'gsdlhome'};
343
344 if ($baseaction::authentication_enabled) {
345 # Ensure the user is allowed to edit this collection
346 &authenticate_user($gsdl_cgi, $username, $collect);
347 }
348
349 my $site = $self->{'site'};
350 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
351
352 $gsdl_cgi->checked_chdir($collect_dir);
353
354 # Obtain the collect dir
355 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
356
357 # Make sure the collection isn't locked by someone else
358 $self->lock_collection($username, $collect);
359
360 # look up additional args
361 my $docid = $self->{'d'};
362 my $metaname = $self->{'metaname'};
363 my $metapos = $self->{'metapos'};
364 my $metavalue = $self->{'metavalue'};
365 my $infodbtype = $self->{'infodbtype'};
366
367 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
368
369 if ($status != 0) {
370 # Catch error if set infodb entry failed
371 my $mess = "Failed to set metadata key: $docid\n";
372
373 $mess .= "PATH: $ENV{'PATH'}\n";
374 $mess .= "Exit status: $status\n";
375 $mess .= "System Error Message: $!\n";
376
377 $gsdl_cgi->generate_error($mess);
378 }
379 else {
380 my $mess = "set-metadata successful: Key[$docid]\n";
381 $mess .= " $metaname";
382 $mess .= "->[$metapos]" if (defined $metapos);
383 $mess .= " = $metavalue";
384
385 $gsdl_cgi->generate_ok_message($mess);
386 }
387
388 # Release the lock once it is done
389 $self->unlock_collection($username, $collect);
390}
391
392
393
394
395sub set_metadata_array
396{
397 my $self = shift @_;
398
399 my $username = $self->{'username'};
400 my $collect = $self->{'collect'};
401 my $gsdl_cgi = $self->{'gsdl_cgi'};
402 my $gsdlhome = $self->{'gsdlhome'};
403
404 if ($baseaction::authentication_enabled) {
405 # Ensure the user is allowed to edit this collection
406 &authenticate_user($gsdl_cgi, $username, $collect);
407 }
408
409 my $site = $self->{'site'};
410 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
411
412 $gsdl_cgi->checked_chdir($collect_dir);
413
414 # Obtain the collect dir
415 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
416
417 # Make sure the collection isn't locked by someone else
418 $self->lock_collection($username, $collect);
419
420 # look up additional args
421
422 my $infodbtype = $self->{'infodbtype'};
423
424 my $json_str = $self->{'json'};
425 my $doc_array = decode_json $json_str;
426
427
428 my $global_status = 0;
429 my $global_mess = "";
430
431 my @all_docids = ();
432
433 foreach my $doc_array_rec ( @$doc_array ) {
434
435 my $docid = $doc_array_rec->{'docid'};
436 my $metaname = $doc_array_rec->{'metaname'};
437 my $metapos = $doc_array_rec->{'metapos'};
438 my $metavalue = $doc_array_rec->{'metavalue'};
439
440 push(@all_docids,$docid);
441
442 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
443
444 if ($status != 0) {
445 # Catch error if set infodb entry failed
446 $global_status = $status;
447 $global_mess .= "Failed to set metadata key: $docid\n";
448 $global_mess .= "Exit status: $status\n";
449 $global_mess .= "System Error Message: $!\n";
450 $global_mess .= "-" x 20;
451 }
452 }
453
454 if ($global_status != 0) {
455 $global_mess .= "PATH: $ENV{'PATH'}\n";
456 $gsdl_cgi->generate_error($global_mess);
457 }
458 else {
459 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
460 $gsdl_cgi->generate_ok_message($mess);
461 }
462
463 # Release the lock once it is done
464 $self->unlock_collection($username, $collect);
465}
466
467
468
469sub dxml_metadata
470{
471 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
472 my $metaname = $parser->{'parameters'}->{'metaname'};
473 my $metamode = $parser->{'parameters'}->{'metamode'};
474
475 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
476
477 # Find the right metadata tag and checks if we are going to
478 # override it
479 #
480 # Note: This over writes the first metadata block it
481 # encountered. If there are multiple Sections in the doc.xml, it
482 # might not behave as you would expect
483
484 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
485## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
486## print STDERR "**** metamode = $metamode\n";
487
488 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum)) {
489 my $name_attr = $attrHash->{'name'};
490 if (($name_attr eq $metaname) && ($metamode eq "override")) {
491## print STDERR "**** got match!!\n";
492 # Get the value and override the current value
493 my $metavalue = $parser->{'parameters'}->{'metavalue'};
494 $attrHash->{'_content'} = $metavalue;
495
496 # Don't want it to wipe out any other pieces of metadata
497 $parser->{'parameters'}->{'metamode'} = "done";
498 }
499 }
500
501 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
502 return [$tagname => $attrHash];
503}
504
505
506sub dxml_description
507{
508 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
509 my $metamode = $parser->{'parameters'}->{'metamode'};
510
511 # Accumulate the metadata
512 # NOTE: This appends new metadata element to all description fields.
513 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
514 if (($metamode eq "accumulate") || ($metamode eq "override")) {
515 # If get to here and metamode is override, the this means there
516 # was no existing value to overide => treat as an append operation
517
518 # Tack a new metadata tag on to the end of the <Metadata>+ block
519 my $metaname = $parser->{'parameters'}->{'metaname'};
520 my $metavalue = $parser->{'parameters'}->{'metavalue'};
521
522 my $metadata_attr = { '_content' => $metavalue,
523 'name' => $metaname,
524 'mode' => "accumulate" };
525
526 my $append_metadata = [ "Metadata" => $metadata_attr ];
527 my $description_content = $attrHash->{'_content'};
528
529## print STDERR "**** appending to doc.xml\n";
530
531 push(@$description_content," ", $append_metadata ,"\n ");
532 $parser->{'parameters'}->{'metamode'} = "done";
533 }
534
535
536 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
537 return [$tagname => $attrHash];
538}
539
540
541
542sub dxml_start_section
543{
544 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
545
546 my $new_depth = scalar(@$contextArray);
547
548 if ($new_depth == 1) {
549 $parser->{'parameters'}->{'curr_section_depth'} = 1;
550 $parser->{'parameters'}->{'curr_section_num'} = "";
551 }
552
553 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
554 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
555
556 my $new_secnum;
557
558 if ($new_depth > $old_depth) {
559 # child subsection
560 $new_secnum = "$old_secnum.1";
561 }
562 elsif ($new_depth == $old_depth) {
563 # sibling section => increase it's value by 1
564 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
565 $tail_num++;
566 $new_secnum = $old_secnum;
567 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
568 }
569 else {
570 # back up to parent section => lopp off tail
571 $new_secnum = $old_secnum;
572 $new_secnum =~ s/\.\d+$//;
573 }
574
575 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
576 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
577
578 print STDERR "*** In Section: $new_secnum\n";
579}
580
581sub edit_xml_file
582{
583 my $self = shift @_;
584 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
585
586 # use XML::Rules to add it in (read in and out again)
587 my $parser = XML::Rules->new(start_rules => $start_rules,
588 rules => $rules,
589 style => 'filter',
590 output_encoding => 'utf8' );
591
592 my $xml_in = "";
593 if (!open(MIN,"<$filename")) {
594 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
595 }
596 else {
597 # Read all the text in
598 my $line;
599 while (defined ($line=<MIN>)) {
600 $xml_in .= $line;
601 }
602 close(MIN);
603
604 my $MOUT;
605 if (!open($MOUT,">$filename")) {
606 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
607 }
608 else {
609 # Matched lines will get handled by the call backs
610## my $xml_out = "";
611
612 binmode($MOUT,":utf8");
613 $parser->filter($xml_in,$MOUT, $options);
614
615# binmode(MOUT,":utf8");
616# print MOUT $xml_out;
617 close($MOUT);
618 }
619 }
620}
621
622
623sub edit_doc_xml
624{
625 my $self = shift @_;
626 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum) = @_;
627
628 # To monitor which section/subsection number we are in
629 my @start_rules =
630 ( 'Section' => \&dxml_start_section );
631
632 # use XML::Rules to add it in (read in and out again)
633 # Set the call back functions
634 my @rules =
635 ( _default => 'raw',
636 'Metadata' => \&dxml_metadata,
637 'Description' => \&dxml_description);
638
639 # Sets the parameters
640 my $options = { 'metaname' => $metaname,
641 'metapos' => $metapos,
642 'metavalue' => $metavalue,
643 'metamode' => $metamode };
644
645 if (defined $opt_secnum) {
646 $options->{'secnum'} = $opt_secnum;
647 }
648
649 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
650}
651
652sub set_archives_metadata_entry
653{
654 my $self = shift @_;
655 my ($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
656
657 # Obtain the doc.xml path for the specified docID
658 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
659
660 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
661 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
662 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
663
664 # The $doc_xml_file is relative to the archives, and now let's get the full path
665 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
666 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
667
668 # Edit the doc.xml file with the specified metadata name, value and position.
669 # TODO: there is a potential problem here as this edit_doc_xml function
670 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
671 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
672
673 print STDERR "** away to call edit_doc_xml\n";
674
675 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
676 $metaname,$metavalue,$metapos,$metamode,$docid_secnum);
677
678 print STDERR "*** finished edit_doc_xml\n";
679
680 return 0; # return 0 for now to indicate no error
681
682}
683
684
685sub set_archives_metadata
686{
687 my $self = shift @_;
688
689 my $username = $self->{'username'};
690 my $collect = $self->{'collect'};
691 my $gsdl_cgi = $self->{'gsdl_cgi'};
692 my $gsdlhome = $self->{'gsdlhome'};
693 my $infodbtype = $self->{'infodbtype'};
694
695
696
697 if ($baseaction::authentication_enabled) {
698 # Ensure the user is allowed to edit this collection
699 $self->authenticate_user($username, $collect);
700 }
701
702 my $site = $self->{'site'};
703
704 # Obtain the collect and archive dir
705 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
706
707 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
708
709 # Make sure the collection isn't locked by someone else
710 $self->lock_collection($username, $collect);
711
712 # look up additional args
713 my $docid = $self->{'d'};
714 my $metaname = $self->{'metaname'};
715 my $metavalue = $self->{'metavalue'};
716
717 my $metapos = $self->{'metapos'};
718 $metapos = 0 if (!defined $metapos);
719
720 my $metamode = $self->{'metamode'};
721 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
722 # make "accumulate" the default (less destructive, as won't actually
723 # delete any existing values)
724 $metamode = "accumulate";
725 }
726
727 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
728 $metaname,$metapos,$metavalue,$metamode);
729
730 # Release the lock once it is done
731 $self->unlock_collection($username, $collect);
732
733 if ($status == 0) {
734 my $mess = "set-archives-metadata successful: Key[$docid]\n";
735 $mess .= " $metaname";
736 $mess .= "->[$metapos]" if (defined $metapos);
737 $mess .= " = $metavalue";
738 $mess .= " ($metamode)\n";
739
740 $gsdl_cgi->generate_ok_message($mess);
741 }
742 else {
743 my $mess .= "Failed to set archives metadata key: $docid\n";
744 $mess .= "Exit status: $status\n";
745 $mess .= "System Error Message: $!\n";
746 $mess .= "-" x 20 . "\n";
747
748 $gsdl_cgi->generate_error($mess);
749 }
750}
751
752
753sub set_archives_metadata_array
754{
755 my $self = shift @_;
756
757 my $username = $self->{'username'};
758 my $collect = $self->{'collect'};
759 my $gsdl_cgi = $self->{'gsdl_cgi'};
760 my $gsdlhome = $self->{'gsdlhome'};
761
762 if ($baseaction::authentication_enabled) {
763 # Ensure the user is allowed to edit this collection
764 &authenticate_user($gsdl_cgi, $username, $collect);
765 }
766
767 my $site = $self->{'site'};
768 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
769
770 $gsdl_cgi->checked_chdir($collect_dir);
771
772 # Obtain the collect dir
773 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
774
775 # Make sure the collection isn't locked by someone else
776 $self->lock_collection($username, $collect);
777
778 # look up additional args
779
780 my $infodbtype = $self->{'infodbtype'};
781
782 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
783
784 my $json_str = $self->{'json'};
785 my $doc_array = decode_json $json_str;
786
787
788 my $global_status = 0;
789 my $global_mess = "";
790
791 my @all_docids = ();
792
793 foreach my $doc_array_rec ( @$doc_array ) {
794
795 my $docid = $doc_array_rec->{'docid'};
796 my $metaname = $doc_array_rec->{'metaname'};
797 my $metapos = $doc_array_rec->{'metapos'};
798 my $metamode = $self->{'metamode'};
799 my $metavalue = $doc_array_rec->{'metavalue'};
800
801 # Some sanity checks
802 $metapos = 0 if (!defined $metapos);
803
804 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
805 # make "accumulate" the default (less destructive, as won't actually
806 # delete any existing values)
807 $metamode = "accumulate";
808 }
809
810 push(@all_docids,$docid);
811
812 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
813 $metaname,$metapos,$metavalue,$metamode);
814
815 if ($status != 0) {
816 # Catch error if set infodb entry failed
817 $global_status = $status;
818 $global_mess .= "Failed to set metadata key: $docid\n";
819 $global_mess .= "Exit status: $status\n";
820 $global_mess .= "System Error Message: $!\n";
821 $global_mess .= "-" x 20 . "\n";
822 }
823 }
824
825 if ($global_status != 0) {
826 $global_mess .= "PATH: $ENV{'PATH'}\n";
827 $gsdl_cgi->generate_error($global_mess);
828 }
829 else {
830 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
831 $gsdl_cgi->generate_ok_message($mess);
832 }
833
834 # Release the lock once it is done
835 $self->unlock_collection($username, $collect);
836}
837
838
839sub mxml_metadata
840{
841 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
842 my $metaname = $parser->{'parameters'}->{'metaname'};
843 my $metamode = $parser->{'parameters'}->{'metamode'};
844
845 # Report error if we don't see FileName tag before this
846 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
847
848 # Don't do anything if we are not in the right FileSet
849 my $file_regexp = $parser->{'parameters'}->{'current_file'};
850 if ($file_regexp =~ /\.\*/) {
851 # Only interested in a file_regexp if it specifies precisely one
852 # file.
853 # So, skip anything with a .* in it as it is too general
854 return [$tagname => $attrHash];
855 }
856 my $src_file = $parser->{'parameters'}->{'src_file'};
857 if (!($src_file =~ /$file_regexp/)) {
858 return [$tagname => $attrHash];
859 }
860## print STDERR "*** mxl metamode = $metamode\n";
861
862 # Find the right metadata tag and checks if we are going to override it
863 my $name_attr = $attrHash->{'name'};
864 if (($name_attr eq $metaname) && ($metamode eq "override")) {
865 # Get the value and override the current value
866 my $metavalue = $parser->{'parameters'}->{'metavalue'};
867 $attrHash->{'_content'} = $metavalue;
868
869## print STDERR "**** overrideing metadata.xml\n";
870
871 # Don't want it to wipe out any other pieces of metadata
872 $parser->{'parameters'}->{'metamode'} = "done";
873 }
874
875 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
876 return [$tagname => $attrHash];
877}
878
879
880sub mxml_description
881{
882 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
883 my $metamode = $parser->{'parameters'}->{'metamode'};
884
885 # Failed... Report error if we don't see FileName tag before this
886 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
887
888 # Don't do anything if we are not in the right FileSet
889 my $file_regexp = $parser->{'parameters'}->{'current_file'};
890 if ($file_regexp =~ /\.\*/) {
891 # Only interested in a file_regexp if it specifies precisely one
892 # file.
893 # So, skip anything with a .* in it as it is too general
894 return [$tagname => $attrHash];
895 }
896 my $src_file = $parser->{'parameters'}->{'src_file'};
897 if (!($src_file =~ /$file_regexp/)) {
898 return [$tagname => $attrHash];
899 }
900
901 # Accumulate the metadata block to the end of the description block
902 # Note: This adds metadata block to all description blocks, so if there are
903 # multiple FileSets, it will add to all of them
904 if (($metamode eq "accumulate") || ($metamode eq "override")) {
905 # if metamode was "override" but get to here then it failed to
906 # find an item to override, in which case it should append its
907 # value to the end, just like the "accumulate" mode
908
909 # tack a new metadata tag on to the end of the <Metadata>+ block
910 my $metaname = $parser->{'parameters'}->{'metaname'};
911 my $metavalue = $parser->{'parameters'}->{'metavalue'};
912
913 my $metadata_attr = { '_content' => $metavalue,
914 'name' => $metaname,
915 'mode' => "accumulate" };
916
917 my $append_metadata = [ "Metadata" => $metadata_attr ];
918 my $description_content = $attrHash->{'_content'};
919
920## print STDERR "*** appending to metadata.xml\n";
921
922 # append the new metadata element to the end of the current
923 # content contained inside this tag
924 push(@$description_content," ", $append_metadata ,"\n ");
925
926 $parser->{'parameters'}->{'metamode'} = "done";
927 }
928
929 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
930 return [$tagname => $attrHash];
931}
932
933
934sub mxml_filename
935{
936 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
937
938 # Store the filename of the Current Fileset
939 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
940 # FileName tag must come before Description tag
941 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
942
943 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
944 return [$tagname => $attrHash];
945}
946
947
948sub mxml_fileset
949{
950 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
951
952 # Initilise the current_file
953 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
954 # FileName tag must come before Description tag
955 $parser->{'parameters'}->{'current_file'} = "";
956
957 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
958 return [$tagname => $attrHash];
959}
960
961
962sub edit_metadata_xml
963{
964 my $self = shift @_;
965 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
966
967 # Set the call-back functions for the metadata tags
968 my @rules =
969 ( _default => 'raw',
970 'FileName' => \&mxml_filename,
971 'Metadata' => \&mxml_metadata,
972 'Description' => \&mxml_description,
973 'FileSet' => \&mxml_fileset);
974
975 # use XML::Rules to add it in (read in and out again)
976 my $parser = XML::Rules->new(rules => \@rules,
977 style => 'filter',
978 output_encoding => 'utf8');
979
980 my $xml_in = "";
981 if (!open(MIN,"<$metadata_xml_filename")) {
982 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
983 }
984 else {
985 # Read them in
986 my $line;
987 while (defined ($line=<MIN>)) {
988 $xml_in .= $line;
989 }
990 close(MIN);
991
992 # Filter with the call-back functions
993 my $xml_out = "";
994
995 my $MOUT;
996 if (!open($MOUT,">$metadata_xml_filename")) {
997 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
998 }
999 else {
1000 binmode($MOUT,":utf8");
1001
1002 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1003 # At the moment, I will just hack it!
1004 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1005 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1006 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1007 #print MOUT $xml_out;
1008
1009 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1010 metavalue => $metavalue,
1011 metamode => $metamode,
1012 src_file => $src_file,
1013 current_file => undef} );
1014 close($MOUT);
1015 }
1016 }
1017}
1018
1019
1020sub set_import_metadata
1021{
1022 my $self = shift @_;
1023
1024 my $username = $self->{'username'};
1025 my $collect = $self->{'collect'};
1026 my $gsdl_cgi = $self->{'gsdl_cgi'};
1027 my $gsdlhome = $self->{'gsdlhome'};
1028 my $infodbtype = $self->{'infodbtype'};
1029
1030 if ($baseaction::authentication_enabled) {
1031 # Ensure the user is allowed to edit this collection
1032 $self->authenticate_user($username, $collect);
1033 }
1034
1035
1036 # Obtain the collect and archive dir
1037 my $site = $self->{'site'};
1038 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1039
1040 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1041 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1042
1043 # Make sure the collection isn't locked by someone else
1044 $self->lock_collection($username, $collect);
1045
1046 # look up additional args
1047 # want either d= or f=
1048 my $docid = $self->{'d'};
1049 my $import_file = $self->{'f'};
1050 if ((!defined $docid) && (!defined $import_file)) {
1051 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1052 }
1053
1054 # Get the parameters and set default mode to "accumulate"
1055 my $metaname = $self->{'metaname'};
1056 my $metavalue = $self->{'metavalue'};
1057## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1058 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1059 print STDERR "*** set import meta: val = $metavalue\n";
1060
1061 my $metamode = $self->{'metamode'};
1062 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1063 # make "accumulate" the default (less destructive, as won't actually
1064 # delete any existing values)
1065 $metamode = "accumulate";
1066 }
1067
1068 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1069 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1070 my $metadata_xml_file;
1071 my $import_filename = undef;
1072 if (defined $docid) {
1073 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1074 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1075
1076 # This now stores the full pathname
1077 $import_filename = $doc_rec->{'src-file'}->[0];
1078 }
1079 else {
1080 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1081 }
1082
1083 # figure out correct metadata.xml file [?]
1084 # Assuming the metadata.xml file is next to the source file
1085 # Note: This will not work if it is using the inherited metadata from the parent folder
1086 my ($import_tailname, $import_dirname)
1087 = File::Basename::fileparse($import_filename);
1088 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1089
1090 # Edit the metadata.xml
1091 # Modified by Jeffrey from DL Consulting
1092 # Handle the case where there is one metadata.xml file for multiple FileSets
1093 # The XML filter needs to know whether it is in the right FileSet
1094 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1095 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1096 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1097 $metaname, $metavalue, $metamode, $import_tailname);
1098
1099 # Release the lock once it is done
1100 $self->unlock_collection($username, $collect);
1101
1102 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1103 $mess .= " $metaname";
1104 $mess .= " = $metavalue";
1105 $mess .= " ($metamode)\n";
1106
1107 $gsdl_cgi->generate_ok_message($mess);
1108
1109}
1110
1111
1112sub remove_live_metadata
1113{
1114 my $self = shift @_;
1115
1116 my $username = $self->{'username'};
1117 my $collect = $self->{'collect'};
1118 my $gsdl_cgi = $self->{'gsdl_cgi'};
1119 my $gsdlhome = $self->{'gsdlhome'};
1120 my $infodbtype = $self->{'infodbtype'};
1121
1122 if ($baseaction::authentication_enabled) {
1123 # Ensure the user is allowed to edit this collection
1124 &authenticate_user($gsdl_cgi, $username, $collect);
1125 }
1126
1127 # Obtain the collect dir
1128 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1129 my $site = $self->{'site'};
1130 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1131
1132 # Make sure the collection isn't locked by someone else
1133 $self->lock_collection($username, $collect);
1134
1135 # look up additional args
1136 my $docid = $self->{'d'};
1137 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1138 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1139 }
1140
1141 # Generate the dbkey
1142 my $metaname = $self->{'metaname'};
1143 my $dbkey = "$docid.$metaname";
1144
1145 # To people who know $collect_tail please add some comments
1146 # Obtain the live gdbm_db path
1147 my $collect_tail = $collect;
1148 $collect_tail =~ s/^.*[\/\\]//;
1149 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1150 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1151
1152 # Remove the key
1153 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
1154 my $status = system($cmd);
1155 if ($status != 0) {
1156 # Catch error if gdbmdel failed
1157 my $mess = "Failed to set metadata key: $dbkey\n";
1158
1159 $mess .= "PATH: $ENV{'PATH'}\n";
1160 $mess .= "cmd = $cmd\n";
1161 $mess .= "Exit status: $status\n";
1162 $mess .= "System Error Message: $!\n";
1163
1164 $gsdl_cgi->generate_error($mess);
1165 }
1166 else {
1167 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1168 }
1169
1170}
1171
1172
1173sub remove_metadata
1174{
1175 my $self = shift @_;
1176
1177 my $username = $self->{'username'};
1178 my $collect = $self->{'collect'};
1179 my $gsdl_cgi = $self->{'gsdl_cgi'};
1180 my $gsdlhome = $self->{'gsdlhome'};
1181 my $infodbtype = $self->{'infodbtype'};
1182
1183 if ($baseaction::authentication_enabled) {
1184 # Ensure the user is allowed to edit this collection
1185 &authenticate_user($gsdl_cgi, $username, $collect);
1186 }
1187
1188 # Obtain the collect dir
1189 my $site = $self->{'site'};
1190 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1191 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1192
1193 # Make sure the collection isn't locked by someone else
1194 $self->lock_collection($username, $collect);
1195
1196 # look up additional args
1197 my $docid = $self->{'d'};
1198 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1199 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1200 }
1201 my $metaname = $self->{'metaname'};
1202 my $metapos = $self->{'metapos'};
1203
1204 # To people who know $collect_tail please add some comments
1205 # Obtain the path to the database
1206 my $collect_tail = $collect;
1207 $collect_tail =~ s/^.*[\/\\]//;
1208 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1209 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1210
1211 # Read the docid entry
1212 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1213
1214 # Basically loop through and unescape_html the values
1215 foreach my $k (keys %$doc_rec) {
1216 my @escaped_v = ();
1217 foreach my $v (@{$doc_rec->{$k}}) {
1218 if ($k eq "contains") {
1219 # protect quotes in ".2;".3 etc
1220 $v =~ s/\"/\\\"/g;
1221 push(@escaped_v, $v);
1222 }
1223 else {
1224 my $ev = &ghtml::unescape_html($v);
1225 $ev =~ s/\"/\\\"/g;
1226 push(@escaped_v, $ev);
1227 }
1228 }
1229 $doc_rec->{$k} = \@escaped_v;
1230 }
1231
1232 # Check to make sure the key does exist
1233 if (!defined ($doc_rec->{$metaname})) {
1234 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1235 }
1236
1237 # Obtain the specified metadata pos
1238 $metapos = 0 if (!defined $metapos);
1239
1240 # consider check key is defined before deleting?
1241 # Loop through the metadata array and ignore the specified position
1242 my $filtered_metadata = [];
1243 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
1244 for (my $i=0; $i<$num_metadata_vals; $i++) {
1245 my $metavalue = shift(@{$doc_rec->{$metaname}});
1246
1247 if ($i != $metapos) {
1248 push(@$filtered_metadata,$metavalue)
1249 }
1250 }
1251 $doc_rec->{$metaname} = $filtered_metadata;
1252
1253 # Turn the record back to string
1254 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1255
1256 # Store it back to the database
1257 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1258 my $status = system($cmd);
1259 if ($status != 0) {
1260 my $mess = "Failed to set metadata key: $docid\n";
1261
1262 $mess .= "PATH: $ENV{'PATH'}\n";
1263 $mess .= "cmd = $cmd\n";
1264 $mess .= "Exit status: $status\n";
1265 $mess .= "System Error Message: $!\n";
1266
1267 $gsdl_cgi->generate_error($mess);
1268 }
1269 else {
1270 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1271 $mess .= " $metaname";
1272 $mess .= "->[$metapos]" if (defined $metapos);
1273
1274 $gsdl_cgi->generate_ok_message($mess);
1275 }
1276}
1277
1278
1279# Was trying to reused the codes, but the functions need to be broken
1280# down more before they can be reused, otherwise there will be too
1281# much overhead and duplicate process...
1282sub insert_metadata
1283{
1284 my $self = shift @_;
1285
1286 my $username = $self->{'username'};
1287 my $collect = $self->{'collect'};
1288 my $gsdl_cgi = $self->{'gsdl_cgi'};
1289 my $gsdlhome = $self->{'gsdlhome'};
1290 my $infodbtype = $self->{'infodbtype'};
1291
1292 # If the import metadata and gdbm database have been updated, we
1293 # need to insert some notification to warn user that the the text
1294 # they see at the moment is not indexed and require a rebuild.
1295 my $rebuild_pending_macro = "_rebuildpendingmessage_";
1296
1297 if ($baseaction::authentication_enabled) {
1298 # Ensure the user is allowed to edit this collection
1299 $self->authenticate_user($username, $collect);
1300 }
1301
1302 # Obtain the collect and archive dir
1303 my $site = $self->{'site'};
1304 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1305 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1306 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1307
1308 # Make sure the collection isn't locked by someone else
1309 $self->lock_collection($username, $collect);
1310
1311 # Check additional args
1312 my $docid = $self->{'d'};
1313 if (!defined($docid)) {
1314 $gsdl_cgi->generate_error("No document id is specified: d=...");
1315 }
1316 my $metaname = $self->{'metaname'};
1317 if (!defined($metaname)) {
1318 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1319 }
1320 my $metavalue = $self->{'metavalue'};
1321 if (!defined($metavalue) || $metavalue eq "") {
1322 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
1323 }
1324 # make "accumulate" the default (less destructive, as won't actually
1325 # delete any existing values)
1326 my $metamode = "accumulate";
1327
1328 #=======================================================================#
1329 # set_import_metadata [START]
1330 #=======================================================================#
1331 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1332 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1333 my $metadata_xml_file;
1334 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1335 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1336
1337 # This now stores the full pathname
1338 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
1339
1340 # figure out correct metadata.xml file [?]
1341 # Assuming the metadata.xml file is next to the source file
1342 # Note: This will not work if it is using the inherited metadata from the parent folder
1343 my ($import_tailname, $import_dirname)
1344 = File::Basename::fileparse($import_filename);
1345 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1346
1347 # Shane's escape characters
1348 $metavalue = pack "U0C*", unpack "C*", $metavalue;
1349 $metavalue =~ s/\,/&#44;/g;
1350 $metavalue =~ s/\:/&#58;/g;
1351 $metavalue =~ s/\|/&#124;/g;
1352 $metavalue =~ s/\(/&#40;/g;
1353 $metavalue =~ s/\)/&#41;/g;
1354 $metavalue =~ s/\[/&#91;/g;
1355 $metavalue =~ s/\\/&#92;/g;
1356 $metavalue =~ s/\]/&#93;/g;
1357 $metavalue =~ s/\{/&#123;/g;
1358 $metavalue =~ s/\}/&#125;/g;
1359 $metavalue =~ s/\"/&#34;/g;
1360 $metavalue =~ s/\`/&#96;/g;
1361 $metavalue =~ s/\n/_newline_/g;
1362
1363 # Edit the metadata.xml
1364 # Modified by Jeffrey from DL Consulting
1365 # Handle the case where there is one metadata.xml file for multiple FileSets
1366 # The XML filter needs to know whether it is in the right FileSet
1367 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1368 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1369 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1370 $metaname, $metavalue, $metamode, $import_tailname);
1371 #=======================================================================#
1372 # set_import_metadata [END]
1373 #=======================================================================#
1374
1375
1376 #=======================================================================#
1377 # set_metadata (accumulate version) [START]
1378 #=======================================================================#
1379 # To people who know $collect_tail please add some comments
1380 # Obtain path to the database
1381 my $collect_tail = $collect;
1382 $collect_tail =~ s/^.*[\/\\]//;
1383 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1384 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1385
1386 # Read the docid entry
1387 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1388
1389 foreach my $k (keys %$doc_rec) {
1390 my @escaped_v = ();
1391 foreach my $v (@{$doc_rec->{$k}}) {
1392 if ($k eq "contains") {
1393 # protect quotes in ".2;".3 etc
1394 $v =~ s/\"/\\\"/g;
1395 push(@escaped_v, $v);
1396 }
1397 else {
1398 my $ev = &ghtml::unescape_html($v);
1399 $ev =~ s/\"/\\\"/g;
1400 push(@escaped_v, $ev);
1401 }
1402 }
1403 $doc_rec->{$k} = \@escaped_v;
1404 }
1405
1406 # Protect the quotes
1407 $metavalue =~ s/\"/\\\"/g;
1408
1409 # Adds the pending macro
1410 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
1411
1412 # If the metadata doesn't exist, create a new one
1413 if (!defined($doc_rec->{$metaname})){
1414 $doc_rec->{$metaname} = [ $macro_metavalue ];
1415 }
1416 # Else, let's acculumate the values
1417 else {
1418 push(@{$doc_rec->{$metaname}},$macro_metavalue);
1419 }
1420
1421 # Generate the record string
1422 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1423
1424 # Store it into GDBM
1425 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1426 my $status = system($cmd);
1427 if ($status != 0) {
1428 # Catch error if gdbmget failed
1429 my $mess = "Failed to set metadata key: $docid\n";
1430
1431 $mess .= "PATH: $ENV{'PATH'}\n";
1432 $mess .= "cmd = $cmd\n";
1433 $mess .= "Exit status: $status\n";
1434 $mess .= "System Error Message: $!\n";
1435
1436 $gsdl_cgi->generate_error($mess);
1437 }
1438 else {
1439 my $mess = "insert-metadata successful: Key[$docid]\n";
1440 $mess .= " [In metadata.xml] $metaname";
1441 $mess .= " = $metavalue\n";
1442 $mess .= " [In database] $metaname";
1443 $mess .= " = $macro_metavalue\n";
1444 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
1445 $gsdl_cgi->generate_ok_message($mess);
1446 }
1447 #=======================================================================#
1448 # set_metadata (accumulate version) [END]
1449 #=======================================================================#
1450
1451 # Release the lock once it is done
1452 $self->unlock_collection($username, $collect);
1453}
1454
14551;
Note: See TracBrowser for help on using the repository browser.