source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 24071

Last change on this file since 24071 was 24071, checked in by davidb, 13 years ago

Introduction of actions that take an array of items (e.g. an array of OIDs or filenames). In adding in this ability, we have started to make use of JSON.

Another action added in is the ability to control building using a manifest file (its fields passed in using JSON). Also the ability to delete files in the archives directory (i.e. when a collection is beeing used in an 'onlyadd' way). Still needs to the more general case to be implemented.

/DB/

File size: 46.7 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43
44@metadataaction::ISA = ('baseaction');
45
46
47my $action_table =
48{
49 "get-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [] },
51
52 "get-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
53 'optional-args' => [ "metapos" ] },
54
55 "set-live-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
56 'optional-args' => [ ] },
57
58 "set-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
59 'optional-args' => [ "metapos" ] },
60
61 "set-metadata-array" => { 'compulsory-args' => [ "json" ],
62 'optional-args' => [ ] },
63
64 "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
65 'optional-args' => [ "metapos", "metamode" ]
66 # metamode can be "accumulate", "override",
67 },
68
69 "set-archives-metadata-array" => { 'compulsory-args' => [ "json" ],
70 'optional-args' => [ "metamode" ]
71 },
72
73 "set-import-metadata" => { 'compulsory-args' => [ "metaname", "metavalue" ],
74 'optional-args' => [ "d", "f", "metamode" ]
75 # metamode can be "accumulate", "override", or "unique-id"
76 },
77
78
79 "remove-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
80 'optional-args' => [ ] },
81
82 "remove-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
83 'optional-args' => [ "metapos" ] },
84
85 "insert-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
86 'optional-args' => [ ]
87 }
88};
89
90
91sub new
92{
93 my $class = shift (@_);
94 my ($gsdl_cgi,$iis6_mode) = @_;
95
96 # Treat metavalue specially. To transmit this through a GET request
97 # the Javascript side has url-encoded it, so here we need to decode
98 # it before proceeding
99
100 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
101 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
102
103 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
104
105 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
106
107 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
108
109 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
110
111 return bless $self, $class;
112}
113
114
115sub get_live_metadata
116{
117 my $self = shift @_;
118
119 my $username = $self->{'username'};
120 my $collect = $self->{'collect'};
121 my $gsdl_cgi = $self->{'gsdl_cgi'};
122 my $gsdlhome = $self->{'gsdlhome'};
123 my $infodbtype = $self->{'infodbtype'};
124
125 # live metadata gets/saves value scoped (prefixed) by the current usename
126 # so (for now) let's not bother to enforce authentication
127
128 # Obtain the collect dir
129 my $site = $self->{'site'};
130 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
131 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
132
133 # Make sure the collection isn't locked by someone else
134 $self->lock_collection($username, $collect);
135
136 # look up additional args
137 my $docid = $self->{'d'};
138 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
139 $gsdl_cgi->generate_error("No docid (d=...) specified.");
140 }
141
142 # Generate the dbkey
143 my $metaname = $self->{'metaname'};
144 my $dbkey = "$docid.$metaname";
145
146 # To people who know $collect_tail please add some comments
147 # Obtain path to the database
148 my $collect_tail = $collect;
149 $collect_tail =~ s/^.*[\/|\\]//;
150 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
151 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
152
153 # Obtain the content of the key
154 my $cmd = "gdbmget $infodb_file_path $dbkey";
155 if (open(GIN,"$cmd |") == 0) {
156 # Catch error if gdbmget failed
157 my $mess = "Failed to get metadata key: $metaname\n";
158 $mess .= "$!\n";
159
160 $gsdl_cgi->generate_error($mess);
161 }
162 else {
163 binmode(GIN,":utf8");
164 # Read everything in and concatenate them into $metavalue
165 my $metavalue = "";
166 my $line;
167 while (defined ($line=<GIN>)) {
168 $metavalue .= $line;
169 }
170 close(GIN);
171 chomp($metavalue); # Get rid off the tailing newlines
172 $gsdl_cgi->generate_ok_message("$metavalue");
173 }
174
175 # Release the lock once it is done
176 $self->unlock_collection($username, $collect);
177}
178
179
180sub get_metadata
181{
182 my $self = shift @_;
183
184 my $username = $self->{'username'};
185 my $collect = $self->{'collect'};
186 my $gsdl_cgi = $self->{'gsdl_cgi'};
187 my $gsdlhome = $self->{'gsdlhome'};
188
189 # Authenticate user if it is enabled
190 if ($baseaction::authentication_enabled) {
191 # Ensure the user is allowed to edit this collection
192 &authenticate_user($gsdl_cgi, $username, $collect);
193 }
194
195 # Obtain the collect dir
196 my $site = $self->{'site'};
197 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
198 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
199
200 # Make sure the collection isn't locked by someone else
201 $self->lock_collection($username, $collect);
202
203 # look up additional args
204 my $docid = $self->{'d'};
205 my $metaname = $self->{'metaname'};
206 my $metapos = $self->{'metapos'};
207 my $infodbtype = $self->{'infodbtype'};
208
209 # To people who know $collect_tail please add some comments
210 # Obtain path to the database
211 my $collect_tail = $collect;
212 $collect_tail =~ s/^.*[\/\\]//;
213 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
214 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
215
216 # Read the docid entry
217 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
218
219 # Basically loop through and unescape_html the values
220 foreach my $k (keys %$doc_rec) {
221 my @escaped_v = ();
222 foreach my $v (@{$doc_rec->{$k}}) {
223 my $ev = &ghtml::unescape_html($v);
224 push(@escaped_v, $ev);
225 }
226 $doc_rec->{$k} = \@escaped_v;
227 }
228
229 # Obtain the specified metadata value
230 $metapos = 0 if (!defined $metapos);
231 my $metavalue = $doc_rec->{$metaname}->[$metapos];
232 $gsdl_cgi->generate_ok_message("$metavalue");
233
234 # Release the lock once it is done
235 $self->unlock_collection($username, $collect);
236}
237
238
239sub set_live_metadata
240{
241 my $self = shift @_;
242
243 my $username = $self->{'username'};
244 my $collect = $self->{'collect'};
245 my $gsdl_cgi = $self->{'gsdl_cgi'};
246 my $gsdlhome = $self->{'gsdlhome'};
247 my $infodbtype = $self->{'infodbtype'};
248
249 if ($baseaction::authentication_enabled) {
250 # Ensure the user is allowed to edit this collection
251 &authenticate_user($gsdl_cgi, $username, $collect);
252 }
253
254 # Obtain the collect dir
255 my $site = $self->{'site'};
256 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
257 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
258
259 # Make sure the collection isn't locked by someone else
260 $self->lock_collection($username, $collect);
261
262 # look up additional args
263 my $docid = $self->{'d'};
264 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
265 $gsdl_cgi->generate_error("No docid (d=...) specified.");
266 }
267 my $metavalue = $self->{'metavalue'};
268
269
270 # Generate the dbkey
271 my $metaname = $self->{'metaname'};
272 my $dbkey = "$docid.$metaname";
273
274 # To people who know $collect_tail please add some comments
275 # Obtain path to the database
276 my $collect_tail = $collect;
277 $collect_tail =~ s/^.*[\/\\]//;
278 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
279 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
280
281 # Set the new value
282 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
283 my $status = system($cmd);
284 if ($status != 0) {
285 # Catch error if gdbmget failed
286 my $mess = "Failed to set metadata key: $dbkey\n";
287
288 $mess .= "PATH: $ENV{'PATH'}\n";
289 $mess .= "cmd = $cmd\n";
290 $mess .= "Exit status: $status\n";
291 $mess .= "System Error Message: $!\n";
292
293 $gsdl_cgi->generate_error($mess);
294 }
295 else {
296 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
297 }
298
299 # Release the lock once it is done
300 $self->unlock_collection($username, $collect);
301}
302
303sub set_metadata_entry
304{
305 my $self = shift @_;
306 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
307
308 # To people who know $collect_tail please add some comments
309 # Obtain path to the database
310 my $collect_tail = $collect;
311 $collect_tail =~ s/^.*[\/\\]//;
312 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
313 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
314
315# print STDERR "**** infodb file path = $infodb_file_path\n";
316# print STDERR "***** infodb type = $infodbtype\n";
317
318 # Read the docid entry
319 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
320
321 # Set the metadata value
322 if (defined $metapos) {
323 $doc_rec->{$metaname}->[$metapos] = $metavalue;
324 }
325 else {
326 $doc_rec->{$metaname} = [ $metavalue ];
327 }
328
329 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
330
331 return $status;
332
333}
334
335sub set_metadata
336{
337 my $self = shift @_;
338
339 my $username = $self->{'username'};
340 my $collect = $self->{'collect'};
341 my $gsdl_cgi = $self->{'gsdl_cgi'};
342 my $gsdlhome = $self->{'gsdlhome'};
343
344 if ($baseaction::authentication_enabled) {
345 # Ensure the user is allowed to edit this collection
346 &authenticate_user($gsdl_cgi, $username, $collect);
347 }
348
349 my $site = $self->{'site'};
350 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
351
352 $gsdl_cgi->checked_chdir($collect_dir);
353
354 # Obtain the collect dir
355 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
356
357 # Make sure the collection isn't locked by someone else
358 $self->lock_collection($username, $collect);
359
360 # look up additional args
361 my $docid = $self->{'d'};
362 my $metaname = $self->{'metaname'};
363 my $metapos = $self->{'metapos'};
364 my $metavalue = $self->{'metavalue'};
365 my $infodbtype = $self->{'infodbtype'};
366
367 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
368
369 if ($status != 0) {
370 # Catch error if set infodb entry failed
371 my $mess = "Failed to set metadata key: $docid\n";
372
373 $mess .= "PATH: $ENV{'PATH'}\n";
374 $mess .= "Exit status: $status\n";
375 $mess .= "System Error Message: $!\n";
376
377 $gsdl_cgi->generate_error($mess);
378 }
379 else {
380 my $mess = "set-metadata successful: Key[$docid]\n";
381 $mess .= " $metaname";
382 $mess .= "->[$metapos]" if (defined $metapos);
383 $mess .= " = $metavalue";
384
385 $gsdl_cgi->generate_ok_message($mess);
386 }
387
388 # Release the lock once it is done
389 $self->unlock_collection($username, $collect);
390}
391
392
393
394
395sub set_metadata_array
396{
397 my $self = shift @_;
398
399 my $username = $self->{'username'};
400 my $collect = $self->{'collect'};
401 my $gsdl_cgi = $self->{'gsdl_cgi'};
402 my $gsdlhome = $self->{'gsdlhome'};
403
404 if ($baseaction::authentication_enabled) {
405 # Ensure the user is allowed to edit this collection
406 &authenticate_user($gsdl_cgi, $username, $collect);
407 }
408
409 my $site = $self->{'site'};
410 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
411
412 $gsdl_cgi->checked_chdir($collect_dir);
413
414 # Obtain the collect dir
415 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
416
417 # Make sure the collection isn't locked by someone else
418 $self->lock_collection($username, $collect);
419
420 # look up additional args
421
422 my $infodbtype = $self->{'infodbtype'};
423
424 my $json_str = $self->{'json'};
425 my $doc_array = decode_json $json_str;
426
427
428 my $global_status = 0;
429 my $global_mess = "";
430
431 my @all_docids = ();
432
433 foreach my $doc_array_rec ( @$doc_array ) {
434
435 my $docid = $doc_array_rec->{'docid'};
436 my $metaname = $doc_array_rec->{'metaname'};
437 my $metapos = $doc_array_rec->{'metapos'};
438 my $metavalue = $doc_array_rec->{'metavalue'};
439
440 push(@all_docids,$docid);
441
442 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
443
444 if ($status != 0) {
445 # Catch error if set infodb entry failed
446 $global_status = $status;
447 $global_mess .= "Failed to set metadata key: $docid\n";
448 $global_mess .= "Exit status: $status\n";
449 $global_mess .= "System Error Message: $!\n";
450 $global_mess .= "-" x 20;
451 }
452 }
453
454 if ($global_status != 0) {
455 $global_mess .= "PATH: $ENV{'PATH'}\n";
456 $gsdl_cgi->generate_error($global_mess);
457 }
458 else {
459 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
460 $gsdl_cgi->generate_ok_message($mess);
461 }
462
463 # Release the lock once it is done
464 $self->unlock_collection($username, $collect);
465}
466
467
468
469sub dxml_metadata
470{
471 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
472 my $metaname = $parser->{'parameters'}->{'metaname'};
473 my $metamode = $parser->{'parameters'}->{'metamode'};
474
475 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
476
477 # Find the right metadata tag and checks if we are going to
478 # override it
479 #
480 # Note: This over writes the first metadata block it
481 # encountered. If there are multiple Sections in the doc.xml, it
482 # might not behave as you would expect
483
484 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
485## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
486## print STDERR "**** metamode = $metamode\n";
487
488 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum)) {
489 my $name_attr = $attrHash->{'name'};
490 if (($name_attr eq $metaname) && ($metamode eq "override")) {
491## print STDERR "**** got match!!\n";
492 # Get the value and override the current value
493 my $metavalue = $parser->{'parameters'}->{'metavalue'};
494 $attrHash->{'_content'} = $metavalue;
495
496 # Don't want it to wipe out any other pieces of metadata
497 $parser->{'parameters'}->{'metamode'} = "done";
498 }
499 }
500
501 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
502 return [$tagname => $attrHash];
503}
504
505
506sub dxml_description
507{
508 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
509 my $metamode = $parser->{'parameters'}->{'metamode'};
510
511 # Accumulate the metadata
512 # NOTE: This appends new metadata element to all description fields.
513 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
514 if (($metamode eq "accumulate") || ($metamode eq "override")) {
515 # If get to here and metamode is override, the this means there
516 # was no existing value to overide => treat as an append operation
517
518 # Tack a new metadata tag on to the end of the <Metadata>+ block
519 my $metaname = $parser->{'parameters'}->{'metaname'};
520 my $metavalue = $parser->{'parameters'}->{'metavalue'};
521
522 my $metadata_attr = { '_content' => $metavalue,
523 'name' => $metaname,
524 'mode' => "accumulate" };
525
526 my $append_metadata = [ "Metadata" => $metadata_attr ];
527 my $description_content = $attrHash->{'_content'};
528
529## print STDERR "**** appending to doc.xml\n";
530
531 push(@$description_content," ", $append_metadata ,"\n ");
532 $parser->{'parameters'}->{'metamode'} = "done";
533 }
534
535
536 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
537 return [$tagname => $attrHash];
538}
539
540
541
542sub dxml_start_section
543{
544 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
545
546 my $new_depth = scalar(@$contextArray);
547
548 if ($new_depth == 1) {
549 $parser->{'parameters'}->{'curr_section_depth'} = 1;
550 $parser->{'parameters'}->{'curr_section_num'} = "";
551 }
552
553 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
554 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
555
556 my $new_secnum;
557
558 if ($new_depth > $old_depth) {
559 # child subsection
560 $new_secnum = "$old_secnum.1";
561 }
562 elsif ($new_depth == $old_depth) {
563 # sibling section => increase it's value by 1
564 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
565 $tail_num++;
566 $new_secnum = $old_secnum;
567 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
568 }
569 else {
570 # back up to parent section => lopp off tail
571 $new_secnum = $old_secnum;
572 $new_secnum =~ s/\.\d+$//;
573 }
574
575 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
576 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
577
578 print STDERR "*** In Section: $new_secnum\n";
579}
580
581sub edit_xml_file
582{
583 my $self = shift @_;
584 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
585
586 # use XML::Rules to add it in (read in and out again)
587 my $parser = XML::Rules->new(start_rules => $start_rules,
588 rules => $rules,
589 style => 'filter',
590 output_encoding => 'utf8' );
591
592 my $xml_in = "";
593 if (!open(MIN,"<$filename")) {
594 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
595 }
596 else {
597 # Read all the text in
598 my $line;
599 while (defined ($line=<MIN>)) {
600 $xml_in .= $line;
601 }
602 close(MIN);
603
604 my $MOUT;
605 if (!open($MOUT,">$filename")) {
606 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
607 }
608 else {
609 # Matched lines will get handled by the call backs
610## my $xml_out = "";
611
612 binmode($MOUT,":utf8");
613 $parser->filter($xml_in,$MOUT, $options);
614
615# binmode(MOUT,":utf8");
616# print MOUT $xml_out;
617 close($MOUT);
618 }
619 }
620}
621
622
623sub edit_doc_xml
624{
625 my $self = shift @_;
626 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum) = @_;
627
628 # To monitor which section/subsection number we are in
629 my @start_rules =
630 ( 'Section' => \&dxml_start_section );
631
632 # use XML::Rules to add it in (read in and out again)
633 # Set the call back functions
634 my @rules =
635 ( _default => 'raw',
636 'Metadata' => \&dxml_metadata,
637 'Description' => \&dxml_description);
638
639 # Sets the parameters
640 my $options = { 'metaname' => $metaname,
641 'metapos' => $metapos,
642 'metavalue' => $metavalue,
643 'metamode' => $metamode };
644
645 if (defined $opt_secnum) {
646 $options->{'secnum'} = $opt_secnum;
647 }
648
649 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
650}
651
652sub set_archives_metadata_entry
653{
654 my $self = shift @_;
655 my ($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
656
657 # Obtain the doc.xml path for the specified docID
658 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
659
660 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
661 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
662 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
663
664 # The $doc_xml_file is relative to the archives, and now let's get the full path
665 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
666 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
667
668 # Edit the doc.xml file with the specified metadata name, value and position.
669 # TODO: there is a potential problem here as this edit_doc_xml function
670 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
671 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
672
673 print STDERR "** away to call edit_doc_xml\n";
674
675 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
676 $metaname,$metavalue,$metapos,$metamode,$docid_secnum);
677
678 print STDERR "*** finished edit_doc_xml\n";
679
680 return 0; # return 0 for now to indicate no error
681
682}
683
684
685sub set_archives_metadata
686{
687 my $self = shift @_;
688
689 my $username = $self->{'username'};
690 my $collect = $self->{'collect'};
691 my $gsdl_cgi = $self->{'gsdl_cgi'};
692 my $gsdlhome = $self->{'gsdlhome'};
693 my $infodbtype = $self->{'infodbtype'};
694
695
696
697 if ($baseaction::authentication_enabled) {
698 # Ensure the user is allowed to edit this collection
699 $self->authenticate_user($username, $collect);
700 }
701
702 my $site = $self->{'site'};
703
704 # Obtain the collect and archive dir
705 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
706
707 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
708
709 # Make sure the collection isn't locked by someone else
710 $self->lock_collection($username, $collect);
711
712 # look up additional args
713 my $docid = $self->{'d'};
714 my $metaname = $self->{'metaname'};
715 my $metavalue = $self->{'metavalue'};
716
717 my $metapos = $self->{'metapos'};
718 $metapos = 0 if (!defined $metapos);
719
720 my $metamode = $self->{'metamode'};
721 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
722 # make "accumulate" the default (less destructive, as won't actually
723 # delete any existing values)
724 $metamode = "accumulate";
725 }
726
727 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
728 $metaname,$metapos,$metavalue,$metamode);
729
730 # Release the lock once it is done
731 $self->unlock_collection($username, $collect);
732
733 if ($status == 0) {
734 my $mess = "set-archives-metadata successful: Key[$docid]\n";
735 $mess .= " $metaname";
736 $mess .= "->[$metapos]" if (defined $metapos);
737 $mess .= " = $metavalue";
738 $mess .= " ($metamode)\n";
739
740 $gsdl_cgi->generate_ok_message($mess);
741 }
742 else {
743 my $mess .= "Failed to set archives metadata key: $docid\n";
744 $mess .= "Exit status: $status\n";
745 $mess .= "System Error Message: $!\n";
746 $mess .= "-" x 20 . "\n";
747
748 $gsdl_cgi->generate_error($mess);
749 }
750}
751
752
753sub set_archives_metadata_array
754{
755 my $self = shift @_;
756
757 my $username = $self->{'username'};
758 my $collect = $self->{'collect'};
759 my $gsdl_cgi = $self->{'gsdl_cgi'};
760 my $gsdlhome = $self->{'gsdlhome'};
761
762 if ($baseaction::authentication_enabled) {
763 # Ensure the user is allowed to edit this collection
764 &authenticate_user($gsdl_cgi, $username, $collect);
765 }
766
767 my $site = $self->{'site'};
768 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
769
770 $gsdl_cgi->checked_chdir($collect_dir);
771
772 # Obtain the collect dir
773 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
774
775 # Make sure the collection isn't locked by someone else
776 $self->lock_collection($username, $collect);
777
778 # look up additional args
779
780 my $infodbtype = $self->{'infodbtype'};
781
782 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
783
784 my $json_str = $self->{'json'};
785 my $doc_array = decode_json $json_str;
786
787
788 my $global_status = 0;
789 my $global_mess = "";
790
791 my @all_docids = ();
792
793 foreach my $doc_array_rec ( @$doc_array ) {
794
795 my $docid = $doc_array_rec->{'docid'};
796 my $metaname = $doc_array_rec->{'metaname'};
797 my $metapos = $doc_array_rec->{'metapos'};
798 my $metamode = $self->{'metamode'};
799 my $metavalue = $doc_array_rec->{'metavalue'};
800
801 # Some sanity checks
802 $metapos = 0 if (!defined $metapos);
803
804 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
805 # make "accumulate" the default (less destructive, as won't actually
806 # delete any existing values)
807 $metamode = "accumulate";
808 }
809
810 push(@all_docids,$docid);
811
812 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
813 $metaname,$metapos,$metavalue,$metamode);
814
815 if ($status != 0) {
816 # Catch error if set infodb entry failed
817 $global_status = $status;
818 $global_mess .= "Failed to set metadata key: $docid\n";
819 $global_mess .= "Exit status: $status\n";
820 $global_mess .= "System Error Message: $!\n";
821 $global_mess .= "-" x 20 . "\n";
822 }
823 }
824
825 if ($global_status != 0) {
826 $global_mess .= "PATH: $ENV{'PATH'}\n";
827 $gsdl_cgi->generate_error($global_mess);
828 }
829 else {
830 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
831 $gsdl_cgi->generate_ok_message($mess);
832 }
833
834 # Release the lock once it is done
835 $self->unlock_collection($username, $collect);
836}
837
838
839sub mxml_metadata
840{
841 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
842 my $metaname = $parser->{'parameters'}->{'metaname'};
843 my $metamode = $parser->{'parameters'}->{'metamode'};
844
845 # Report error if we don't see FileName tag before this
846 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
847
848 # Don't do anything if we are not in the right FileSet
849 my $file_regexp = $parser->{'parameters'}->{'current_file'};
850 if ($file_regexp =~ /\.\*/) {
851 # Only interested in a file_regexp if it specifies precisely one
852 # file.
853 # So, skip anything with a .* in it as it is too general
854 return [$tagname => $attrHash];
855 }
856 my $src_file = $parser->{'parameters'}->{'src_file'};
857 if (!($src_file =~ /$file_regexp/)) {
858 return [$tagname => $attrHash];
859 }
860## print STDERR "*** mxl metamode = $metamode\n";
861
862 # Find the right metadata tag and checks if we are going to override it
863 my $name_attr = $attrHash->{'name'};
864 if (($name_attr eq $metaname) && ($metamode eq "override")) {
865 # Get the value and override the current value
866 my $metavalue = $parser->{'parameters'}->{'metavalue'};
867 $attrHash->{'_content'} = $metavalue;
868
869## print STDERR "**** overrideing metadata.xml\n";
870
871 # Don't want it to wipe out any other pieces of metadata
872 $parser->{'parameters'}->{'metamode'} = "done";
873 }
874
875 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
876 return [$tagname => $attrHash];
877}
878
879
880sub mxml_description
881{
882 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
883 my $metamode = $parser->{'parameters'}->{'metamode'};
884
885 # Failed... Report error if we don't see FileName tag before this
886 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
887
888 # Don't do anything if we are not in the right FileSet
889 my $file_regexp = $parser->{'parameters'}->{'current_file'};
890 if ($file_regexp =~ /\.\*/) {
891 # Only interested in a file_regexp if it specifies precisely one
892 # file.
893 # So, skip anything with a .* in it as it is too general
894 return [$tagname => $attrHash];
895 }
896 my $src_file = $parser->{'parameters'}->{'src_file'};
897 if (!($src_file =~ /$file_regexp/)) {
898 return [$tagname => $attrHash];
899 }
900
901 # Accumulate the metadata block to the end of the description block
902 # Note: This adds metadata block to all description blocks, so if there are
903 # multiple FileSets, it will add to all of them
904 if (($metamode eq "accumulate") || ($metamode eq "override")) {
905 # if metamode was "override" but get to here then it failed to
906 # find an item to override, in which case it should append its
907 # value to the end, just like the "accumulate" mode
908
909 # tack a new metadata tag on to the end of the <Metadata>+ block
910 my $metaname = $parser->{'parameters'}->{'metaname'};
911 my $metavalue = $parser->{'parameters'}->{'metavalue'};
912
913 my $metadata_attr = { '_content' => $metavalue,
914 'name' => $metaname,
915 'mode' => "accumulate" };
916
917 my $append_metadata = [ "Metadata" => $metadata_attr ];
918 my $description_content = $attrHash->{'_content'};
919
920## print STDERR "*** appending to metadata.xml\n";
921
922 # append the new metadata element to the end of the current
923 # content contained inside this tag
924 push(@$description_content," ", $append_metadata ,"\n ");
925
926 $parser->{'parameters'}->{'metamode'} = "done";
927 }
928
929 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
930 return [$tagname => $attrHash];
931}
932
933
934sub mxml_filename
935{
936 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
937
938 # Store the filename of the Current Fileset
939 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
940 # FileName tag must come before Description tag
941 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
942
943 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
944 return [$tagname => $attrHash];
945}
946
947
948sub mxml_fileset
949{
950 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
951
952 # Initilise the current_file
953 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
954 # FileName tag must come before Description tag
955 $parser->{'parameters'}->{'current_file'} = "";
956
957 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
958 return [$tagname => $attrHash];
959}
960
961
962sub edit_metadata_xml
963{
964 my $self = shift @_;
965 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
966
967 # Set the call-back functions for the metadata tags
968 my @rules =
969 ( _default => 'raw',
970 'FileName' => \&mxml_filename,
971 'Metadata' => \&mxml_metadata,
972 'Description' => \&mxml_description,
973 'FileSet' => \&mxml_fileset);
974
975 # use XML::Rules to add it in (read in and out again)
976 my $parser = XML::Rules->new(rules => \@rules,
977 style => 'filter',
978 output_encoding => 'utf8');
979
980 my $xml_in = "";
981 if (!open(MIN,"<$metadata_xml_filename")) {
982 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
983 }
984 else {
985 # Read them in
986 my $line;
987 while (defined ($line=<MIN>)) {
988 $xml_in .= $line;
989 }
990 close(MIN);
991
992 # Filter with the call-back functions
993 my $xml_out = "";
994
995 my $MOUT;
996 if (!open($MOUT,">$metadata_xml_filename")) {
997 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
998 }
999 else {
1000 binmode($MOUT,":utf8");
1001
1002 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1003 # At the moment, I will just hack it!
1004 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1005 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1006 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1007 #print MOUT $xml_out;
1008
1009 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1010 metavalue => $metavalue,
1011 metamode => $metamode,
1012 src_file => $src_file,
1013 current_file => undef} );
1014 close($MOUT);
1015 }
1016 }
1017}
1018
1019
1020sub set_import_metadata
1021{
1022 my $self = shift @_;
1023
1024 my $username = $self->{'username'};
1025 my $collect = $self->{'collect'};
1026 my $gsdl_cgi = $self->{'gsdl_cgi'};
1027 my $gsdlhome = $self->{'gsdlhome'};
1028 my $infodbtype = $self->{'infodbtype'};
1029
1030 if ($baseaction::authentication_enabled) {
1031 # Ensure the user is allowed to edit this collection
1032 $self->authenticate_user($username, $collect);
1033 }
1034
1035
1036 # Obtain the collect and archive dir
1037 my $site = $self->{'site'};
1038 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1039
1040 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1041 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1042
1043 # Make sure the collection isn't locked by someone else
1044 $self->lock_collection($username, $collect);
1045
1046 # look up additional args
1047 # want either d= or f=
1048 my $docid = $self->{'d'};
1049 my $import_file = $self->{'f'};
1050 if ((!defined $docid) && (!defined $import_file)) {
1051 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1052 }
1053
1054 # Get the parameters and set default mode to "accumulate"
1055 my $metaname = $self->{'metaname'};
1056 my $metavalue = $self->{'metavalue'};
1057## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1058 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1059 print STDERR "*** set import meta: val = $metavalue\n";
1060
1061 my $metamode = $self->{'metamode'};
1062 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1063 # make "accumulate" the default (less destructive, as won't actually
1064 # delete any existing values)
1065 $metamode = "accumulate";
1066 }
1067
1068 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1069 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1070 my $metadata_xml_file;
1071 my $import_filename = undef;
1072 if (defined $docid) {
1073 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1074 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1075
1076 # This now stores the full pathname
1077 $import_filename = $doc_rec->{'src-file'}->[0];
1078 }
1079 else {
1080 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1081 }
1082
1083 # figure out correct metadata.xml file [?]
1084 # Assuming the metadata.xml file is next to the source file
1085 # Note: This will not work if it is using the inherited metadata from the parent folder
1086 my ($import_tailname, $import_dirname)
1087 = File::Basename::fileparse($import_filename);
1088 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1089
1090 # Edit the metadata.xml
1091 # Modified by Jeffrey from DL Consulting
1092 # Handle the case where there is one metadata.xml file for multiple FileSets
1093 # The XML filter needs to know whether it is in the right FileSet
1094 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1095 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1096 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1097 $metaname, $metavalue, $metamode, $import_tailname);
1098
1099 # Release the lock once it is done
1100 $self->unlock_collection($username, $collect);
1101
1102 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1103 $mess .= " $metaname";
1104 $mess .= " = $metavalue";
1105 $mess .= " ($metamode)\n";
1106
1107 $gsdl_cgi->generate_ok_message($mess);
1108
1109}
1110
1111
1112sub remove_live_metadata
1113{
1114 my $self = shift @_;
1115
1116 my $username = $self->{'username'};
1117 my $collect = $self->{'collect'};
1118 my $gsdl_cgi = $self->{'gsdl_cgi'};
1119 my $gsdlhome = $self->{'gsdlhome'};
1120 my $infodbtype = $self->{'infodbtype'};
1121
1122 if ($baseaction::authentication_enabled) {
1123 # Ensure the user is allowed to edit this collection
1124 &authenticate_user($gsdl_cgi, $username, $collect);
1125 }
1126
1127 # Obtain the collect dir
1128 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1129 my $site = $self->{'site'};
1130 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1131
1132 # Make sure the collection isn't locked by someone else
1133 $self->lock_collection($username, $collect);
1134
1135 # look up additional args
1136 my $docid = $self->{'d'};
1137 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1138 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1139 }
1140
1141 # Generate the dbkey
1142 my $metaname = $self->{'metaname'};
1143 my $dbkey = "$docid.$metaname";
1144
1145 # To people who know $collect_tail please add some comments
1146 # Obtain the live gdbm_db path
1147 my $collect_tail = $collect;
1148 $collect_tail =~ s/^.*[\/\\]//;
1149 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1150 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1151
1152 # Remove the key
1153 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
1154 my $status = system($cmd);
1155 if ($status != 0) {
1156 # Catch error if gdbmdel failed
1157 my $mess = "Failed to set metadata key: $dbkey\n";
1158
1159 $mess .= "PATH: $ENV{'PATH'}\n";
1160 $mess .= "cmd = $cmd\n";
1161 $mess .= "Exit status: $status\n";
1162 $mess .= "System Error Message: $!\n";
1163
1164 $gsdl_cgi->generate_error($mess);
1165 }
1166 else {
1167 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1168 }
1169
1170}
1171
1172
1173sub remove_metadata
1174{
1175 my $self = shift @_;
1176
1177 my $username = $self->{'username'};
1178 my $collect = $self->{'collect'};
1179 my $gsdl_cgi = $self->{'gsdl_cgi'};
1180 my $gsdlhome = $self->{'gsdlhome'};
1181 my $infodbtype = $self->{'infodbtype'};
1182
1183 if ($baseaction::authentication_enabled) {
1184 # Ensure the user is allowed to edit this collection
1185 &authenticate_user($gsdl_cgi, $username, $collect);
1186 }
1187
1188 # Obtain the collect dir
1189 my $site = $self->{'site'};
1190 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1191 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1192
1193 # Make sure the collection isn't locked by someone else
1194 $self->lock_collection($username, $collect);
1195
1196 # look up additional args
1197 my $docid = $self->{'d'};
1198 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1199 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1200 }
1201 my $metaname = $self->{'metaname'};
1202 my $metapos = $self->{'metapos'};
1203
1204 # To people who know $collect_tail please add some comments
1205 # Obtain the path to the database
1206 my $collect_tail = $collect;
1207 $collect_tail =~ s/^.*[\/\\]//;
1208 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1209 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1210
1211 # Read the docid entry
1212 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1213
1214 # Basically loop through and unescape_html the values
1215 foreach my $k (keys %$doc_rec) {
1216 my @escaped_v = ();
1217 foreach my $v (@{$doc_rec->{$k}}) {
1218 if ($k eq "contains") {
1219 # protect quotes in ".2;".3 etc
1220 $v =~ s/\"/\\\"/g;
1221 push(@escaped_v, $v);
1222 }
1223 else {
1224 my $ev = &ghtml::unescape_html($v);
1225 $ev =~ s/\"/\\\"/g;
1226 push(@escaped_v, $ev);
1227 }
1228 }
1229 $doc_rec->{$k} = \@escaped_v;
1230 }
1231
1232 # Check to make sure the key does exist
1233 if (!defined ($doc_rec->{$metaname})) {
1234 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1235 }
1236
1237 # Obtain the specified metadata pos
1238 $metapos = 0 if (!defined $metapos);
1239
1240 # consider check key is defined before deleting?
1241 # Loop through the metadata array and ignore the specified position
1242 my $filtered_metadata = [];
1243 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
1244 for (my $i=0; $i<$num_metadata_vals; $i++) {
1245 my $metavalue = shift(@{$doc_rec->{$metaname}});
1246
1247 if ($i != $metapos) {
1248 push(@$filtered_metadata,$metavalue)
1249 }
1250 }
1251 $doc_rec->{$metaname} = $filtered_metadata;
1252
1253 # Turn the record back to string
1254 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1255
1256 # Store it back to the database
1257 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1258 my $status = system($cmd);
1259 if ($status != 0) {
1260 my $mess = "Failed to set metadata key: $docid\n";
1261
1262 $mess .= "PATH: $ENV{'PATH'}\n";
1263 $mess .= "cmd = $cmd\n";
1264 $mess .= "Exit status: $status\n";
1265 $mess .= "System Error Message: $!\n";
1266
1267 $gsdl_cgi->generate_error($mess);
1268 }
1269 else {
1270 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1271 $mess .= " $metaname";
1272 $mess .= "->[$metapos]" if (defined $metapos);
1273
1274 $gsdl_cgi->generate_ok_message($mess);
1275 }
1276}
1277
1278
1279# Was trying to reused the codes, but the functions need to be broken
1280# down more before they can be reused, otherwise there will be too
1281# much overhead and duplicate process...
1282sub insert_metadata
1283{
1284 my $self = shift @_;
1285
1286 my $username = $self->{'username'};
1287 my $collect = $self->{'collect'};
1288 my $gsdl_cgi = $self->{'gsdl_cgi'};
1289 my $gsdlhome = $self->{'gsdlhome'};
1290 my $infodbtype = $self->{'infodbtype'};
1291
1292 # If the import metadata and gdbm database have been updated, we
1293 # need to insert some notification to warn user that the the text
1294 # they see at the moment is not indexed and require a rebuild.
1295 my $rebuild_pending_macro = "_rebuildpendingmessage_";
1296
1297 if ($baseaction::authentication_enabled) {
1298 # Ensure the user is allowed to edit this collection
1299 $self->authenticate_user($username, $collect);
1300 }
1301
1302 # Obtain the collect and archive dir
1303 my $site = $self->{'site'};
1304 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1305 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1306 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1307
1308 # Make sure the collection isn't locked by someone else
1309 $self->lock_collection($username, $collect);
1310
1311 # Check additional args
1312 my $docid = $self->{'d'};
1313 if (!defined($docid)) {
1314 $gsdl_cgi->generate_error("No document id is specified: d=...");
1315 }
1316 my $metaname = $self->{'metaname'};
1317 if (!defined($metaname)) {
1318 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1319 }
1320 my $metavalue = $self->{'metavalue'};
1321 if (!defined($metavalue) || $metavalue eq "") {
1322 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
1323 }
1324 # make "accumulate" the default (less destructive, as won't actually
1325 # delete any existing values)
1326 my $metamode = "accumulate";
1327
1328 #=======================================================================#
1329 # set_import_metadata [START]
1330 #=======================================================================#
1331 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1332 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1333 my $metadata_xml_file;
1334 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1335 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1336
1337 # This now stores the full pathname
1338 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
1339
1340 # figure out correct metadata.xml file [?]
1341 # Assuming the metadata.xml file is next to the source file
1342 # Note: This will not work if it is using the inherited metadata from the parent folder
1343 my ($import_tailname, $import_dirname)
1344 = File::Basename::fileparse($import_filename);
1345 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1346
1347 # Shane's escape characters
1348 $metavalue = pack "U0C*", unpack "C*", $metavalue;
1349 $metavalue =~ s/\,/&#44;/g;
1350 $metavalue =~ s/\:/&#58;/g;
1351 $metavalue =~ s/\|/&#124;/g;
1352 $metavalue =~ s/\(/&#40;/g;
1353 $metavalue =~ s/\)/&#41;/g;
1354 $metavalue =~ s/\[/&#91;/g;
1355 $metavalue =~ s/\\/&#92;/g;
1356 $metavalue =~ s/\]/&#93;/g;
1357 $metavalue =~ s/\{/&#123;/g;
1358 $metavalue =~ s/\}/&#125;/g;
1359 $metavalue =~ s/\"/&#34;/g;
1360 $metavalue =~ s/\`/&#96;/g;
1361 $metavalue =~ s/\n/_newline_/g;
1362
1363 # Edit the metadata.xml
1364 # Modified by Jeffrey from DL Consulting
1365 # Handle the case where there is one metadata.xml file for multiple FileSets
1366 # The XML filter needs to know whether it is in the right FileSet
1367 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1368 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1369 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1370 $metaname, $metavalue, $metamode, $import_tailname);
1371 #=======================================================================#
1372 # set_import_metadata [END]
1373 #=======================================================================#
1374
1375
1376 #=======================================================================#
1377 # set_metadata (accumulate version) [START]
1378 #=======================================================================#
1379 # To people who know $collect_tail please add some comments
1380 # Obtain path to the database
1381 my $collect_tail = $collect;
1382 $collect_tail =~ s/^.*[\/\\]//;
1383 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1384 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1385
1386 # Read the docid entry
1387 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1388
1389 foreach my $k (keys %$doc_rec) {
1390 my @escaped_v = ();
1391 foreach my $v (@{$doc_rec->{$k}}) {
1392 if ($k eq "contains") {
1393 # protect quotes in ".2;".3 etc
1394 $v =~ s/\"/\\\"/g;
1395 push(@escaped_v, $v);
1396 }
1397 else {
1398 my $ev = &ghtml::unescape_html($v);
1399 $ev =~ s/\"/\\\"/g;
1400 push(@escaped_v, $ev);
1401 }
1402 }
1403 $doc_rec->{$k} = \@escaped_v;
1404 }
1405
1406 # Protect the quotes
1407 $metavalue =~ s/\"/\\\"/g;
1408
1409 # Adds the pending macro
1410 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
1411
1412 # If the metadata doesn't exist, create a new one
1413 if (!defined($doc_rec->{$metaname})){
1414 $doc_rec->{$metaname} = [ $macro_metavalue ];
1415 }
1416 # Else, let's acculumate the values
1417 else {
1418 push(@{$doc_rec->{$metaname}},$macro_metavalue);
1419 }
1420
1421 # Generate the record string
1422 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1423
1424 # Store it into GDBM
1425 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1426 my $status = system($cmd);
1427 if ($status != 0) {
1428 # Catch error if gdbmget failed
1429 my $mess = "Failed to set metadata key: $docid\n";
1430
1431 $mess .= "PATH: $ENV{'PATH'}\n";
1432 $mess .= "cmd = $cmd\n";
1433 $mess .= "Exit status: $status\n";
1434 $mess .= "System Error Message: $!\n";
1435
1436 $gsdl_cgi->generate_error($mess);
1437 }
1438 else {
1439 my $mess = "insert-metadata successful: Key[$docid]\n";
1440 $mess .= " [In metadata.xml] $metaname";
1441 $mess .= " = $metavalue\n";
1442 $mess .= " [In database] $metaname";
1443 $mess .= " = $macro_metavalue\n";
1444 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
1445 $gsdl_cgi->generate_ok_message($mess);
1446 }
1447 #=======================================================================#
1448 # set_metadata (accumulate version) [END]
1449 #=======================================================================#
1450
1451 # Release the lock once it is done
1452 $self->unlock_collection($username, $collect);
1453}
1454
14551;
Note: See TracBrowser for help on using the repository browser.