source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 24943

Last change on this file since 24943 was 24943, checked in by sjm84, 12 years ago

Added remove_import_metadata and remove_archive_metadata, more functionality should be added to both at some stage, also some other fixes

File size: 55.7 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43
44@metadataaction::ISA = ('baseaction');
45
46
47my $action_table =
48{
49 "get-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [] },
51
52 "get-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
53 'optional-args' => [ "metapos" ] },
54
55 "set-live-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
56 'optional-args' => [ ] },
57
58 "set-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
59 'optional-args' => [ "metapos" ] },
60
61 "set-metadata-array" => { 'compulsory-args' => [ "json" ],
62 'optional-args' => [ ] },
63
64 "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
65 'optional-args' => [ "metapos", "metamode" ]
66 # metamode can be "accumulate", "override",
67 },
68
69 "set-archives-metadata-array" => { 'compulsory-args' => [ "json" ],
70 'optional-args' => [ "metamode" ]
71 },
72
73 "set-import-metadata" => { 'compulsory-args' => [ "metaname", "metavalue" ],
74 'optional-args' => [ "d", "f", "metamode" ] # Need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
75 # metamode can be "accumulate", "override", or "unique-id"
76 },
77
78 "remove-import-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
79 'optional-args' => [ ] },
80
81 "remove-archives-metadata" => { 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
82 'optional-args' => [ "metapos" ] },
83
84 "remove-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
85 'optional-args' => [ ] },
86
87 "remove-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
88 'optional-args' => [ "metapos" ] },
89
90 "insert-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
91 'optional-args' => [ ]
92 }
93};
94
95
96sub new
97{
98 my $class = shift (@_);
99 my ($gsdl_cgi,$iis6_mode) = @_;
100
101 # Treat metavalue specially. To transmit this through a GET request
102 # the Javascript side has url-encoded it, so here we need to decode
103 # it before proceeding
104
105 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
106 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
107
108 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
109
110 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
111
112 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
113
114 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
115
116 return bless $self, $class;
117}
118
119
120sub get_live_metadata
121{
122 my $self = shift @_;
123
124 my $username = $self->{'username'};
125 my $collect = $self->{'collect'};
126 my $gsdl_cgi = $self->{'gsdl_cgi'};
127 my $gsdlhome = $self->{'gsdlhome'};
128 my $infodbtype = $self->{'infodbtype'};
129
130 # live metadata gets/saves value scoped (prefixed) by the current usename
131 # so (for now) let's not bother to enforce authentication
132
133 # Obtain the collect dir
134 my $site = $self->{'site'};
135 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
136 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
137
138 # Make sure the collection isn't locked by someone else
139 $self->lock_collection($username, $collect);
140
141 # look up additional args
142 my $docid = $self->{'d'};
143 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
144 $gsdl_cgi->generate_error("No docid (d=...) specified.");
145 }
146
147 # Generate the dbkey
148 my $metaname = $self->{'metaname'};
149 my $dbkey = "$docid.$metaname";
150
151 # To people who know $collect_tail please add some comments
152 # Obtain path to the database
153 my $collect_tail = $collect;
154 $collect_tail =~ s/^.*[\/|\\]//;
155 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
156 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
157
158 # Obtain the content of the key
159 my $cmd = "gdbmget $infodb_file_path $dbkey";
160 if (open(GIN,"$cmd |") == 0) {
161 # Catch error if gdbmget failed
162 my $mess = "Failed to get metadata key: $metaname\n";
163 $mess .= "$!\n";
164
165 $gsdl_cgi->generate_error($mess);
166 }
167 else {
168 binmode(GIN,":utf8");
169 # Read everything in and concatenate them into $metavalue
170 my $metavalue = "";
171 my $line;
172 while (defined ($line=<GIN>)) {
173 $metavalue .= $line;
174 }
175 close(GIN);
176 chomp($metavalue); # Get rid off the tailing newlines
177 $gsdl_cgi->generate_ok_message("$metavalue");
178 }
179
180 # Release the lock once it is done
181 $self->unlock_collection($username, $collect);
182}
183
184
185sub get_metadata
186{
187 my $self = shift @_;
188
189 my $username = $self->{'username'};
190 my $collect = $self->{'collect'};
191 my $gsdl_cgi = $self->{'gsdl_cgi'};
192 my $gsdlhome = $self->{'gsdlhome'};
193
194 # Authenticate user if it is enabled
195 if ($baseaction::authentication_enabled) {
196 # Ensure the user is allowed to edit this collection
197 &authenticate_user($gsdl_cgi, $username, $collect);
198 }
199
200 # Obtain the collect dir
201 my $site = $self->{'site'};
202 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
203 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
204
205 # Make sure the collection isn't locked by someone else
206 $self->lock_collection($username, $collect);
207
208 # look up additional args
209 my $docid = $self->{'d'};
210 my $metaname = $self->{'metaname'};
211 my $metapos = $self->{'metapos'};
212 my $infodbtype = $self->{'infodbtype'};
213
214 # To people who know $collect_tail please add some comments
215 # Obtain path to the database
216 my $collect_tail = $collect;
217 $collect_tail =~ s/^.*[\/\\]//;
218 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
219 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
220
221 # Read the docid entry
222 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
223
224 # Basically loop through and unescape_html the values
225 foreach my $k (keys %$doc_rec) {
226 my @escaped_v = ();
227 foreach my $v (@{$doc_rec->{$k}}) {
228 my $ev = &ghtml::unescape_html($v);
229 push(@escaped_v, $ev);
230 }
231 $doc_rec->{$k} = \@escaped_v;
232 }
233
234 # Obtain the specified metadata value
235 $metapos = 0 if (!defined $metapos);
236 my $metavalue = $doc_rec->{$metaname}->[$metapos];
237 $gsdl_cgi->generate_ok_message("$metavalue");
238
239 # Release the lock once it is done
240 $self->unlock_collection($username, $collect);
241}
242
243
244sub set_live_metadata
245{
246 my $self = shift @_;
247
248 my $username = $self->{'username'};
249 my $collect = $self->{'collect'};
250 my $gsdl_cgi = $self->{'gsdl_cgi'};
251 my $gsdlhome = $self->{'gsdlhome'};
252 my $infodbtype = $self->{'infodbtype'};
253
254 if ($baseaction::authentication_enabled) {
255 # Ensure the user is allowed to edit this collection
256 &authenticate_user($gsdl_cgi, $username, $collect);
257 }
258
259 # Obtain the collect dir
260 my $site = $self->{'site'};
261 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
262 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
263
264 # Make sure the collection isn't locked by someone else
265 $self->lock_collection($username, $collect);
266
267 # look up additional args
268 my $docid = $self->{'d'};
269 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
270 $gsdl_cgi->generate_error("No docid (d=...) specified.");
271 }
272 my $metavalue = $self->{'metavalue'};
273
274
275 # Generate the dbkey
276 my $metaname = $self->{'metaname'};
277 my $dbkey = "$docid.$metaname";
278
279 # To people who know $collect_tail please add some comments
280 # Obtain path to the database
281 my $collect_tail = $collect;
282 $collect_tail =~ s/^.*[\/\\]//;
283 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
284 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
285
286 # Set the new value
287 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
288 my $status = system($cmd);
289 if ($status != 0) {
290 # Catch error if gdbmget failed
291 my $mess = "Failed to set metadata key: $dbkey\n";
292
293 $mess .= "PATH: $ENV{'PATH'}\n";
294 $mess .= "cmd = $cmd\n";
295 $mess .= "Exit status: $status\n";
296 $mess .= "System Error Message: $!\n";
297
298 $gsdl_cgi->generate_error($mess);
299 }
300 else {
301 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
302 }
303
304 # Release the lock once it is done
305 $self->unlock_collection($username, $collect);
306}
307
308sub set_metadata_entry
309{
310 my $self = shift @_;
311 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
312
313 # To people who know $collect_tail please add some comments
314 # Obtain path to the database
315 my $collect_tail = $collect;
316 $collect_tail =~ s/^.*[\/\\]//;
317 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
318 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
319
320# print STDERR "**** infodb file path = $infodb_file_path\n";
321# print STDERR "***** infodb type = $infodbtype\n";
322
323 # Read the docid entry
324 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
325
326 # Set the metadata value
327 if (defined $metapos) {
328 $doc_rec->{$metaname}->[$metapos] = $metavalue;
329 }
330 else {
331 $doc_rec->{$metaname} = [ $metavalue ];
332 }
333
334 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
335
336 return $status;
337
338}
339
340sub set_metadata
341{
342 my $self = shift @_;
343
344 my $username = $self->{'username'};
345 my $collect = $self->{'collect'};
346 my $gsdl_cgi = $self->{'gsdl_cgi'};
347 my $gsdlhome = $self->{'gsdlhome'};
348
349 if ($baseaction::authentication_enabled) {
350 # Ensure the user is allowed to edit this collection
351 &authenticate_user($gsdl_cgi, $username, $collect);
352 }
353
354 my $site = $self->{'site'};
355 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
356
357 $gsdl_cgi->checked_chdir($collect_dir);
358
359 # Obtain the collect dir
360 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
361
362 # Make sure the collection isn't locked by someone else
363 $self->lock_collection($username, $collect);
364
365 # look up additional args
366 my $docid = $self->{'d'};
367 my $metaname = $self->{'metaname'};
368 my $metapos = $self->{'metapos'};
369 my $metavalue = $self->{'metavalue'};
370 my $infodbtype = $self->{'infodbtype'};
371
372 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
373
374 if ($status != 0) {
375 # Catch error if set infodb entry failed
376 my $mess = "Failed to set metadata key: $docid\n";
377
378 $mess .= "PATH: $ENV{'PATH'}\n";
379 $mess .= "Exit status: $status\n";
380 $mess .= "System Error Message: $!\n";
381
382 $gsdl_cgi->generate_error($mess);
383 }
384 else {
385 my $mess = "set-metadata successful: Key[$docid]\n";
386 $mess .= " $metaname";
387 $mess .= "->[$metapos]" if (defined $metapos);
388 $mess .= " = $metavalue";
389
390 $gsdl_cgi->generate_ok_message($mess);
391 }
392
393 # Release the lock once it is done
394 $self->unlock_collection($username, $collect);
395}
396
397
398
399
400sub set_metadata_array
401{
402 my $self = shift @_;
403
404 my $username = $self->{'username'};
405 my $collect = $self->{'collect'};
406 my $gsdl_cgi = $self->{'gsdl_cgi'};
407 my $gsdlhome = $self->{'gsdlhome'};
408
409 if ($baseaction::authentication_enabled) {
410 # Ensure the user is allowed to edit this collection
411 &authenticate_user($gsdl_cgi, $username, $collect);
412 }
413
414 my $site = $self->{'site'};
415 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
416
417 $gsdl_cgi->checked_chdir($collect_dir);
418
419 # Obtain the collect dir
420 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
421
422 # Make sure the collection isn't locked by someone else
423 $self->lock_collection($username, $collect);
424
425 # look up additional args
426
427 my $infodbtype = $self->{'infodbtype'};
428
429 my $json_str = $self->{'json'};
430 my $doc_array = decode_json $json_str;
431
432
433 my $global_status = 0;
434 my $global_mess = "";
435
436 my @all_docids = ();
437
438 foreach my $doc_array_rec ( @$doc_array ) {
439
440 my $docid = $doc_array_rec->{'docid'};
441 my $metaname = $doc_array_rec->{'metaname'};
442 my $metapos = $doc_array_rec->{'metapos'};
443 my $metavalue = $doc_array_rec->{'metavalue'};
444
445 push(@all_docids,$docid);
446
447 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
448
449 if ($status != 0) {
450 # Catch error if set infodb entry failed
451 $global_status = $status;
452 $global_mess .= "Failed to set metadata key: $docid\n";
453 $global_mess .= "Exit status: $status\n";
454 $global_mess .= "System Error Message: $!\n";
455 $global_mess .= "-" x 20;
456 }
457 }
458
459 if ($global_status != 0) {
460 $global_mess .= "PATH: $ENV{'PATH'}\n";
461 $gsdl_cgi->generate_error($global_mess);
462 }
463 else {
464 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
465 $gsdl_cgi->generate_ok_message($mess);
466 }
467
468 # Release the lock once it is done
469 $self->unlock_collection($username, $collect);
470}
471
472
473
474sub dxml_metadata
475{
476 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
477 my $metaname = $parser->{'parameters'}->{'metaname'};
478 my $metamode = $parser->{'parameters'}->{'metamode'};
479
480 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
481
482 # Find the right metadata tag and checks if we are going to
483 # override it
484 #
485 # Note: This over writes the first metadata block it
486 # encountered. If there are multiple Sections in the doc.xml, it
487 # might not behave as you would expect
488
489 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
490## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
491## print STDERR "**** metamode = $metamode\n";
492
493 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum)) {
494 my $name_attr = $attrHash->{'name'};
495 if (($name_attr eq $metaname) && ($metamode eq "override")) {
496## print STDERR "**** got match!!\n";
497 # Get the value and override the current value
498 my $metavalue = $parser->{'parameters'}->{'metavalue'};
499 $attrHash->{'_content'} = $metavalue;
500
501 # Don't want it to wipe out any other pieces of metadata
502 $parser->{'parameters'}->{'metamode'} = "done";
503 }
504 }
505
506 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
507 return [$tagname => $attrHash];
508}
509
510
511sub dxml_description
512{
513 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
514 my $metamode = $parser->{'parameters'}->{'metamode'};
515
516 # Accumulate the metadata
517 # NOTE: This appends new metadata element to all description fields.
518 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
519 if (($metamode eq "accumulate") || ($metamode eq "override")) {
520 # If get to here and metamode is override, the this means there
521 # was no existing value to overide => treat as an append operation
522
523 # Tack a new metadata tag on to the end of the <Metadata>+ block
524 my $metaname = $parser->{'parameters'}->{'metaname'};
525 my $metavalue = $parser->{'parameters'}->{'metavalue'};
526
527 my $metadata_attr = { '_content' => $metavalue,
528 'name' => $metaname,
529 'mode' => "accumulate" };
530
531 my $append_metadata = [ "Metadata" => $metadata_attr ];
532 my $description_content = $attrHash->{'_content'};
533
534## print STDERR "**** appending to doc.xml\n";
535
536 push(@$description_content," ", $append_metadata ,"\n ");
537 $parser->{'parameters'}->{'metamode'} = "done";
538 }
539
540
541 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
542 return [$tagname => $attrHash];
543}
544
545
546
547sub dxml_start_section
548{
549 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
550
551 my $new_depth = scalar(@$contextArray);
552
553 if ($new_depth == 1) {
554 $parser->{'parameters'}->{'curr_section_depth'} = 1;
555 $parser->{'parameters'}->{'curr_section_num'} = "";
556 }
557
558 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
559 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
560
561 my $new_secnum;
562
563 if ($new_depth > $old_depth) {
564 # child subsection
565 $new_secnum = "$old_secnum.1";
566 }
567 elsif ($new_depth == $old_depth) {
568 # sibling section => increase it's value by 1
569 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
570 $tail_num++;
571 $new_secnum = $old_secnum;
572 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
573 }
574 else {
575 # back up to parent section => lopp off tail
576 $new_secnum = $old_secnum;
577 $new_secnum =~ s/\.\d+$//;
578 }
579
580 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
581 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
582
583 print STDERR "*** In Section: $new_secnum\n";
584}
585
586sub edit_xml_file
587{
588 my $self = shift @_;
589 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
590
591 # use XML::Rules to add it in (read in and out again)
592 my $parser = XML::Rules->new(start_rules => $start_rules,
593 rules => $rules,
594 style => 'filter',
595 output_encoding => 'utf8' );
596
597 my $xml_in = "";
598 if (!open(MIN,"<$filename")) {
599 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
600 }
601 else {
602 # Read all the text in
603 my $line;
604 while (defined ($line=<MIN>)) {
605 $xml_in .= $line;
606 }
607 close(MIN);
608
609 my $MOUT;
610 if (!open($MOUT,">$filename")) {
611 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
612 }
613 else {
614 # Matched lines will get handled by the call backs
615## my $xml_out = "";
616
617 binmode($MOUT,":utf8");
618 $parser->filter($xml_in,$MOUT, $options);
619
620# binmode(MOUT,":utf8");
621# print MOUT $xml_out;
622 close($MOUT);
623 }
624 }
625}
626
627
628sub edit_doc_xml
629{
630 my $self = shift @_;
631 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum) = @_;
632
633 # To monitor which section/subsection number we are in
634 my @start_rules =
635 ( 'Section' => \&dxml_start_section );
636
637 # use XML::Rules to add it in (read in and out again)
638 # Set the call back functions
639 my @rules =
640 ( _default => 'raw',
641 'Metadata' => \&dxml_metadata,
642 'Description' => \&dxml_description);
643
644 # Sets the parameters
645 my $options = { 'metaname' => $metaname,
646 'metapos' => $metapos,
647 'metavalue' => $metavalue,
648 'metamode' => $metamode };
649
650 if (defined $opt_secnum) {
651 $options->{'secnum'} = $opt_secnum;
652 }
653
654 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
655}
656
657sub set_archives_metadata_entry
658{
659 my $self = shift @_;
660 my ($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
661
662 # Obtain the doc.xml path for the specified docID
663 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
664
665 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
666 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
667 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
668
669 # The $doc_xml_file is relative to the archives, and now let's get the full path
670 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
671 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
672
673 # Edit the doc.xml file with the specified metadata name, value and position.
674 # TODO: there is a potential problem here as this edit_doc_xml function
675 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
676 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
677
678 print STDERR "** away to call edit_doc_xml\n";
679
680 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
681 $metaname,$metavalue,$metapos,$metamode,$docid_secnum);
682
683 print STDERR "*** finished edit_doc_xml\n";
684
685 return 0; # return 0 for now to indicate no error
686
687}
688
689
690sub set_archives_metadata
691{
692 my $self = shift @_;
693
694 my $username = $self->{'username'};
695 my $collect = $self->{'collect'};
696 my $gsdl_cgi = $self->{'gsdl_cgi'};
697 my $gsdlhome = $self->{'gsdlhome'};
698 my $infodbtype = $self->{'infodbtype'};
699
700 if ($baseaction::authentication_enabled) {
701 # Ensure the user is allowed to edit this collection
702 $self->authenticate_user($username, $collect);
703 }
704
705 my $site = $self->{'site'};
706
707 # Obtain the collect and archive dir
708 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
709
710 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
711
712 # Make sure the collection isn't locked by someone else
713 $self->lock_collection($username, $collect);
714
715 # look up additional args
716 my $docid = $self->{'d'};
717 my $metaname = $self->{'metaname'};
718 my $metavalue = $self->{'metavalue'};
719
720 my $metapos = $self->{'metapos'};
721 $metapos = 0 if (!defined $metapos);
722
723 my $metamode = $self->{'metamode'};
724 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
725 # make "accumulate" the default (less destructive, as won't actually
726 # delete any existing values)
727 $metamode = "accumulate";
728 }
729
730 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
731 $metaname,$metapos,$metavalue,$metamode);
732
733 # Release the lock once it is done
734 $self->unlock_collection($username, $collect);
735
736 if ($status == 0) {
737 my $mess = "set-archives-metadata successful: Key[$docid]\n";
738 $mess .= " $metaname";
739 $mess .= "->[$metapos]" if (defined $metapos);
740 $mess .= " = $metavalue";
741 $mess .= " ($metamode)\n";
742
743 $gsdl_cgi->generate_ok_message($mess);
744 }
745 else {
746 my $mess .= "Failed to set archives metadata key: $docid\n";
747 $mess .= "Exit status: $status\n";
748 $mess .= "System Error Message: $!\n";
749 $mess .= "-" x 20 . "\n";
750
751 $gsdl_cgi->generate_error($mess);
752 }
753}
754
755
756sub set_archives_metadata_array
757{
758 my $self = shift @_;
759
760 my $username = $self->{'username'};
761 my $collect = $self->{'collect'};
762 my $gsdl_cgi = $self->{'gsdl_cgi'};
763 my $gsdlhome = $self->{'gsdlhome'};
764
765 if ($baseaction::authentication_enabled) {
766 # Ensure the user is allowed to edit this collection
767 &authenticate_user($gsdl_cgi, $username, $collect);
768 }
769
770 my $site = $self->{'site'};
771 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
772
773 $gsdl_cgi->checked_chdir($collect_dir);
774
775 # Obtain the collect dir
776 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
777
778 # Make sure the collection isn't locked by someone else
779 $self->lock_collection($username, $collect);
780
781 # look up additional args
782
783 my $infodbtype = $self->{'infodbtype'};
784
785 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
786
787 my $json_str = $self->{'json'};
788 my $doc_array = decode_json $json_str;
789
790
791 my $global_status = 0;
792 my $global_mess = "";
793
794 my @all_docids = ();
795
796 foreach my $doc_array_rec ( @$doc_array ) {
797
798 my $docid = $doc_array_rec->{'docid'};
799 my $metaname = $doc_array_rec->{'metaname'};
800 my $metapos = $doc_array_rec->{'metapos'};
801 my $metamode = $self->{'metamode'};
802 my $metavalue = $doc_array_rec->{'metavalue'};
803
804 # Some sanity checks
805 $metapos = 0 if (!defined $metapos);
806
807 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
808 # make "accumulate" the default (less destructive, as won't actually
809 # delete any existing values)
810 $metamode = "accumulate";
811 }
812
813 push(@all_docids,$docid);
814
815 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
816 $metaname,$metapos,$metavalue,$metamode);
817
818 if ($status != 0) {
819 # Catch error if set infodb entry failed
820 $global_status = $status;
821 $global_mess .= "Failed to set metadata key: $docid\n";
822 $global_mess .= "Exit status: $status\n";
823 $global_mess .= "System Error Message: $!\n";
824 $global_mess .= "-" x 20 . "\n";
825 }
826 }
827
828 if ($global_status != 0) {
829 $global_mess .= "PATH: $ENV{'PATH'}\n";
830 $gsdl_cgi->generate_error($global_mess);
831 }
832 else {
833 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
834 $gsdl_cgi->generate_ok_message($mess);
835 }
836
837 # Release the lock once it is done
838 $self->unlock_collection($username, $collect);
839}
840
841sub remove_archives_metadata
842{
843 my $self = shift @_;
844
845 my $username = $self->{'username'};
846 my $collect = $self->{'collect'};
847 my $gsdl_cgi = $self->{'gsdl_cgi'};
848 my $gsdlhome = $self->{'gsdlhome'};
849 my $infodbtype = $self->{'infodbtype'};
850
851 if ($baseaction::authentication_enabled)
852 {
853 # Ensure the user is allowed to edit this collection
854 &authenticate_user($gsdl_cgi, $username, $collect);
855 }
856
857 my $site = $self->{'site'};
858
859 # Obtain the collect and archive dir
860 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
861
862 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
863
864 # Make sure the collection isn't locked by someone else
865 $self->lock_collection($username, $collect);
866
867 # look up additional args
868 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
869
870 my $metaname = $self->{'metaname'};
871 my $metapos = $self->{'metapos'};
872 $metapos = 0 if (!defined $metapos);
873
874 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
875 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
876
877 # This now stores the full pathname
878 my $import_filename = $doc_rec->{'doc-file'}->[0];
879
880 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $import_filename), $metaname, $metapos, $docid_secnum);
881
882 # Release the lock once it is done
883 $self->unlock_collection($username, $collect);
884
885 if ($status == 0)
886 {
887 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
888 $mess .= " $metaname";
889 $mess .= "->[$metapos]" if (defined $metapos);
890
891 $gsdl_cgi->generate_ok_message($mess);
892 }
893 else
894 {
895 my $mess .= "Failed to remove archives metadata key: $docid\n";
896 $mess .= "Exit status: $status\n";
897 $mess .= "System Error Message: $!\n";
898 $mess .= "-" x 20 . "\n";
899
900 $gsdl_cgi->generate_error($mess);
901 }
902}
903
904sub remove_from_doc_xml
905{
906 my $self = shift @_;
907 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
908
909 my @start_rules = ('Section' => \&dxml_start_section);
910
911 # Set the call-back functions for the metadata tags
912 my @rules =
913 (
914 _default => 'raw',
915 'Metadata' => \&rfdxml_metadata
916 );
917
918 my $parser = XML::Rules->new
919 (
920 start_rules => \@start_rules,
921 rules => \@rules,
922 style => 'filter',
923 output_encoding => 'utf8'
924 );
925
926 my $status = 0;
927 my $xml_in = "";
928 if (!open(MIN,"<$doc_xml_filename"))
929 {
930 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
931 $status = 1;
932 }
933 else
934 {
935 # Read them in
936 my $line;
937 while (defined ($line=<MIN>)) {
938 $xml_in .= $line;
939 }
940 close(MIN);
941
942 # Filter with the call-back functions
943 my $xml_out = "";
944
945 my $MOUT;
946 if (!open($MOUT,">$doc_xml_filename")) {
947 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
948 $status = 1;
949 }
950 else {
951 binmode($MOUT,":utf8");
952 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, secid => $secid});
953 close($MOUT);
954 }
955 }
956 return $status;
957}
958
959sub rfdxml_metadata
960{
961 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
962
963 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
964 {
965 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
966 return [$tagname => $attrHash];
967 }
968
969 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
970 {
971 if (!defined $parser->{'parameters'}->{'poscount'})
972 {
973 $parser->{'parameters'}->{'poscount'} = 0;
974 }
975 else
976 {
977 $parser->{'parameters'}->{'poscount'}++;
978 }
979 }
980
981 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
982 {
983 return [];
984 }
985
986 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
987 return [$tagname => $attrHash];
988}
989
990sub mxml_metadata
991{
992 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
993 my $metaname = $parser->{'parameters'}->{'metaname'};
994 my $metamode = $parser->{'parameters'}->{'metamode'};
995
996 # Report error if we don't see FileName tag before this
997 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
998
999 # Don't do anything if we are not in the right FileSet
1000 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1001 if ($file_regexp =~ /\.\*/) {
1002 # Only interested in a file_regexp if it specifies precisely one
1003 # file.
1004 # So, skip anything with a .* in it as it is too general
1005 return [$tagname => $attrHash];
1006 }
1007 my $src_file = $parser->{'parameters'}->{'src_file'};
1008 if (!($src_file =~ /$file_regexp/)) {
1009 return [$tagname => $attrHash];
1010 }
1011## print STDERR "*** mxl metamode = $metamode\n";
1012
1013 # Find the right metadata tag and checks if we are going to override it
1014 my $name_attr = $attrHash->{'name'};
1015 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1016 # Get the value and override the current value
1017 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1018 $attrHash->{'_content'} = $metavalue;
1019
1020## print STDERR "**** overrideing metadata.xml\n";
1021
1022 # Don't want it to wipe out any other pieces of metadata
1023 $parser->{'parameters'}->{'metamode'} = "done";
1024 }
1025
1026 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1027 return [$tagname => $attrHash];
1028}
1029
1030
1031sub mxml_description
1032{
1033 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1034 my $metamode = $parser->{'parameters'}->{'metamode'};
1035
1036 # Failed... Report error if we don't see FileName tag before this
1037 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1038
1039 # Don't do anything if we are not in the right FileSet
1040 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1041 if ($file_regexp =~ m/\.\*/) {
1042 # Only interested in a file_regexp if it specifies precisely one
1043 # file.
1044 # So, skip anything with a .* in it as it is too general
1045 return [$tagname => $attrHash];
1046 }
1047 my $src_file = $parser->{'parameters'}->{'src_file'};
1048
1049 if (!($src_file =~ m/$file_regexp/)) {
1050 return [$tagname => $attrHash];
1051 }
1052
1053 # Accumulate the metadata block to the end of the description block
1054 # Note: This adds metadata block to all description blocks, so if there are
1055 # multiple FileSets, it will add to all of them
1056 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1057 # if metamode was "override" but get to here then it failed to
1058 # find an item to override, in which case it should append its
1059 # value to the end, just like the "accumulate" mode
1060
1061 # tack a new metadata tag on to the end of the <Metadata>+ block
1062 my $metaname = $parser->{'parameters'}->{'metaname'};
1063 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1064
1065 my $metadata_attr = { '_content' => $metavalue,
1066 'name' => $metaname,
1067 'mode' => "accumulate" };
1068
1069 my $append_metadata = [ "Metadata" => $metadata_attr ];
1070 my $description_content = $attrHash->{'_content'};
1071
1072## print STDERR "*** appending to metadata.xml\n";
1073
1074 # append the new metadata element to the end of the current
1075 # content contained inside this tag
1076 if (ref($description_content) eq "") {
1077 # => string or numeric literal
1078 # this is caused by a <Description> block has no <Metadata> child elements
1079 # => set up an empty array in '_content'
1080 $attrHash->{'_content'} = [ "\n" ];
1081 $description_content = $attrHash->{'_content'};
1082 }
1083
1084 push(@$description_content," ", $append_metadata ,"\n ");
1085 $parser->{'parameters'}->{'metamode'} = "done";
1086 }
1087
1088 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1089 return [$tagname => $attrHash];
1090}
1091
1092
1093sub mxml_filename
1094{
1095 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1096
1097 # Store the filename of the Current Fileset
1098 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1099 # FileName tag must come before Description tag
1100 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1101
1102 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1103 return [$tagname => $attrHash];
1104}
1105
1106
1107sub mxml_fileset
1108{
1109 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1110
1111 # Initilise the current_file
1112 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1113 # FileName tag must come before Description tag
1114 $parser->{'parameters'}->{'current_file'} = "";
1115
1116 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1117 return [$tagname => $attrHash];
1118}
1119
1120
1121sub edit_metadata_xml
1122{
1123 my $self = shift @_;
1124 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
1125
1126 # Set the call-back functions for the metadata tags
1127 my @rules =
1128 ( _default => 'raw',
1129 'FileName' => \&mxml_filename,
1130 'Metadata' => \&mxml_metadata,
1131 'Description' => \&mxml_description,
1132 'FileSet' => \&mxml_fileset);
1133
1134 # use XML::Rules to add it in (read in and out again)
1135 my $parser = XML::Rules->new(rules => \@rules,
1136 style => 'filter',
1137 output_encoding => 'utf8');
1138
1139 if (!-e $metadata_xml_filename) {
1140
1141 if (open(MOUT,">$metadata_xml_filename")) {
1142
1143 my $src_file_re = &util::filename_to_regex($src_file);
1144 # shouldn't the following also be in the above utility routine??
1145 # $src_file_re =~ s/\./\\./g;
1146
1147 print MOUT "<?xml version=\"1.0\"?>\n";
1148 print MOUT "<DirectoryMetadata>\n";
1149 print MOUT " <FileSet>\n";
1150 print MOUT " <FileName>$src_file_re</FileName>\n";
1151 print MOUT " <Description>\n";
1152 print MOUT " </Description>\n";
1153 print MOUT " </FileSet>\n";
1154 print MOUT "</DirectoryMetadata>\n";
1155
1156 close(MOUT);
1157 }
1158 else {
1159 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1160 }
1161 }
1162
1163
1164 my $xml_in = "";
1165 if (!open(MIN,"<$metadata_xml_filename")) {
1166 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1167 }
1168 else {
1169 # Read them in
1170 my $line;
1171 while (defined ($line=<MIN>)) {
1172 $xml_in .= $line;
1173 }
1174 close(MIN);
1175
1176 # Filter with the call-back functions
1177 my $xml_out = "";
1178
1179 my $MOUT;
1180 if (!open($MOUT,">$metadata_xml_filename")) {
1181 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1182 }
1183 else {
1184 binmode($MOUT,":utf8");
1185
1186 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1187 # At the moment, I will just hack it!
1188 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1189 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1190 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1191 #print MOUT $xml_out;
1192
1193 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1194 metavalue => $metavalue,
1195 metamode => $metamode,
1196 src_file => $src_file,
1197 current_file => undef} );
1198 close($MOUT);
1199 }
1200 }
1201}
1202
1203
1204sub set_import_metadata
1205{
1206 my $self = shift @_;
1207
1208 my $username = $self->{'username'};
1209 my $collect = $self->{'collect'};
1210 my $gsdl_cgi = $self->{'gsdl_cgi'};
1211 my $gsdlhome = $self->{'gsdlhome'};
1212 my $infodbtype = $self->{'infodbtype'};
1213
1214 if ($baseaction::authentication_enabled) {
1215 # Ensure the user is allowed to edit this collection
1216 $self->authenticate_user($username, $collect);
1217 }
1218
1219
1220 # Obtain the collect and archive dir
1221 my $site = $self->{'site'};
1222 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1223
1224 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1225 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1226
1227 # Make sure the collection isn't locked by someone else
1228 $self->lock_collection($username, $collect);
1229
1230 # look up additional args
1231 # want either d= or f=
1232 my $docid = $self->{'d'};
1233 my $import_file = $self->{'f'};
1234 if ((!defined $docid) && (!defined $import_file)) {
1235 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1236 }
1237
1238 # Get the parameters and set default mode to "accumulate"
1239 my $metaname = $self->{'metaname'};
1240 my $metavalue = $self->{'metavalue'};
1241## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1242 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1243 print STDERR "*** set import meta: val = $metavalue\n";
1244
1245 my $metamode = $self->{'metamode'};
1246 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1247 # make "accumulate" the default (less destructive, as won't actually
1248 # delete any existing values)
1249 $metamode = "accumulate";
1250 }
1251
1252 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1253 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1254 my $metadata_xml_file;
1255 my $import_filename = undef;
1256 if (defined $docid) {
1257 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1258 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1259
1260 # This now stores the full pathname
1261 $import_filename = $doc_rec->{'src-file'}->[0];
1262 }
1263 else {
1264 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1265 }
1266
1267 # figure out correct metadata.xml file [?]
1268 # Assuming the metadata.xml file is next to the source file
1269 # Note: This will not work if it is using the inherited metadata from the parent folder
1270 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1271 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1272
1273 # Edit the metadata.xml
1274 # Modified by Jeffrey from DL Consulting
1275 # Handle the case where there is one metadata.xml file for multiple FileSets
1276 # The XML filter needs to know whether it is in the right FileSet
1277 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1278 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1279 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1280 $metaname, $metavalue, $metamode, $import_tailname);
1281
1282 # Release the lock once it is done
1283 $self->unlock_collection($username, $collect);
1284
1285 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1286 $mess .= " $metaname";
1287 $mess .= " = $metavalue";
1288 $mess .= " ($metamode)\n";
1289
1290 $gsdl_cgi->generate_ok_message($mess);
1291
1292}
1293
1294sub remove_import_metadata
1295{
1296 my $self = shift @_;
1297
1298 my $username = $self->{'username'};
1299 my $collect = $self->{'collect'};
1300 my $gsdl_cgi = $self->{'gsdl_cgi'};
1301
1302 if ($baseaction::authentication_enabled) {
1303 # Ensure the user is allowed to edit this collection
1304 &authenticate_user($gsdl_cgi, $username, $collect);
1305 }
1306
1307 my $gsdlhome = $self->{'gsdlhome'};
1308 my $infodbtype = $self->{'infodbtype'};
1309
1310 # Obtain the collect dir
1311 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1312 my $site = $self->{'site'};
1313 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1314
1315 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1316 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1317
1318 # Make sure the collection isn't locked by someone else
1319 $self->lock_collection($username, $collect);
1320
1321 # look up additional args
1322 my $docid = $self->{'d'};
1323 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1324 {
1325 die "No docid (d=...) specified.\n";
1326 }
1327
1328 my $metaname = $self->{'metaname'};
1329 my $metavalue = $self->{'metavalue'};
1330 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1331
1332 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1333 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1334 my $metadata_xml_file;
1335 my $import_filename = undef;
1336 if (defined $docid)
1337 {
1338 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1339 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1340
1341 # This now stores the full pathname
1342 $import_filename = $doc_rec->{'src-file'}->[0];
1343 }
1344
1345 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1346 {
1347 die "There is no metadata\n";
1348 }
1349
1350 # figure out correct metadata.xml file [?]
1351 # Assuming the metadata.xml file is next to the source file
1352 # Note: This will not work if it is using the inherited metadata from the parent folder
1353 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1354 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1355
1356 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1357
1358 # Release the lock once it is done
1359 $self->unlock_collection($username, $collect);
1360
1361 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1362 $mess .= " $metaname";
1363 $mess .= " = $metavalue\n";
1364
1365 $gsdl_cgi->generate_ok_message($mess);
1366}
1367
1368sub remove_from_metadata_xml
1369{
1370 my $self = shift @_;
1371 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
1372
1373 # Set the call-back functions for the metadata tags
1374 my @rules =
1375 (
1376 _default => 'raw',
1377 'Metadata' => \&rfmxml_metadata,
1378 'FileName' => \&mxml_filename
1379 );
1380
1381 my $parser = XML::Rules->new
1382 (
1383 rules => \@rules,
1384 style => 'filter',
1385 output_encoding => 'utf8'
1386 );
1387
1388 my $xml_in = "";
1389 if (!open(MIN,"<$metadata_xml_filename"))
1390 {
1391 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1392 }
1393 else
1394 {
1395 # Read them in
1396 my $line;
1397 while (defined ($line=<MIN>)) {
1398 $xml_in .= $line;
1399 }
1400 close(MIN);
1401
1402 # Filter with the call-back functions
1403 my $xml_out = "";
1404
1405 my $MOUT;
1406 if (!open($MOUT,">$metadata_xml_filename")) {
1407 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1408 }
1409 else {
1410 binmode($MOUT,":utf8");
1411 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
1412 close($MOUT);
1413 }
1414 }
1415}
1416
1417sub rfmxml_metadata
1418{
1419 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1420
1421 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
1422 {
1423 return [];
1424 }
1425
1426 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1427 return [$tagname => $attrHash];
1428}
1429
1430sub remove_live_metadata
1431{
1432 my $self = shift @_;
1433
1434 my $username = $self->{'username'};
1435 my $collect = $self->{'collect'};
1436 my $gsdl_cgi = $self->{'gsdl_cgi'};
1437 my $gsdlhome = $self->{'gsdlhome'};
1438 my $infodbtype = $self->{'infodbtype'};
1439
1440 if ($baseaction::authentication_enabled) {
1441 # Ensure the user is allowed to edit this collection
1442 &authenticate_user($gsdl_cgi, $username, $collect);
1443 }
1444
1445 # Obtain the collect dir
1446 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1447 my $site = $self->{'site'};
1448 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1449
1450 # Make sure the collection isn't locked by someone else
1451 $self->lock_collection($username, $collect);
1452
1453 # look up additional args
1454 my $docid = $self->{'d'};
1455 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1456 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1457 }
1458
1459 # Generate the dbkey
1460 my $metaname = $self->{'metaname'};
1461 my $dbkey = "$docid.$metaname";
1462
1463 # To people who know $collect_tail please add some comments
1464 # Obtain the live gdbm_db path
1465 my $collect_tail = $collect;
1466 $collect_tail =~ s/^.*[\/\\]//;
1467 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1468 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1469
1470 # Remove the key
1471 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
1472 my $status = system($cmd);
1473 if ($status != 0) {
1474 # Catch error if gdbmdel failed
1475 my $mess = "Failed to set metadata key: $dbkey\n";
1476
1477 $mess .= "PATH: $ENV{'PATH'}\n";
1478 $mess .= "cmd = $cmd\n";
1479 $mess .= "Exit status: $status\n";
1480 $mess .= "System Error Message: $!\n";
1481
1482 $gsdl_cgi->generate_error($mess);
1483 }
1484 else {
1485 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1486 }
1487
1488}
1489
1490
1491sub remove_metadata
1492{
1493 my $self = shift @_;
1494
1495 my $username = $self->{'username'};
1496 my $collect = $self->{'collect'};
1497 my $gsdl_cgi = $self->{'gsdl_cgi'};
1498 my $gsdlhome = $self->{'gsdlhome'};
1499 my $infodbtype = $self->{'infodbtype'};
1500
1501 if ($baseaction::authentication_enabled) {
1502 # Ensure the user is allowed to edit this collection
1503 &authenticate_user($gsdl_cgi, $username, $collect);
1504 }
1505
1506 # Obtain the collect dir
1507 my $site = $self->{'site'};
1508 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1509 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1510
1511 # Make sure the collection isn't locked by someone else
1512 $self->lock_collection($username, $collect);
1513
1514 # look up additional args
1515 my $docid = $self->{'d'};
1516 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1517 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1518 }
1519 my $metaname = $self->{'metaname'};
1520 my $metapos = $self->{'metapos'};
1521
1522 # To people who know $collect_tail please add some comments
1523 # Obtain the path to the database
1524 my $collect_tail = $collect;
1525 $collect_tail =~ s/^.*[\/\\]//;
1526 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1527 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1528
1529 # Read the docid entry
1530 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1531
1532 # Basically loop through and unescape_html the values
1533 foreach my $k (keys %$doc_rec) {
1534 my @escaped_v = ();
1535 foreach my $v (@{$doc_rec->{$k}}) {
1536 if ($k eq "contains") {
1537 # protect quotes in ".2;".3 etc
1538 $v =~ s/\"/\\\"/g;
1539 push(@escaped_v, $v);
1540 }
1541 else {
1542 my $ev = &ghtml::unescape_html($v);
1543 $ev =~ s/\"/\\\"/g;
1544 push(@escaped_v, $ev);
1545 }
1546 }
1547 $doc_rec->{$k} = \@escaped_v;
1548 }
1549
1550 # Check to make sure the key does exist
1551 if (!defined ($doc_rec->{$metaname})) {
1552 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1553 }
1554
1555 # Obtain the specified metadata pos
1556 $metapos = 0 if (!defined $metapos);
1557
1558 # consider check key is defined before deleting?
1559 # Loop through the metadata array and ignore the specified position
1560 my $filtered_metadata = [];
1561 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
1562 for (my $i=0; $i<$num_metadata_vals; $i++) {
1563 my $metavalue = shift(@{$doc_rec->{$metaname}});
1564
1565 if ($i != $metapos) {
1566 push(@$filtered_metadata,$metavalue)
1567 }
1568 }
1569 $doc_rec->{$metaname} = $filtered_metadata;
1570
1571 # Turn the record back to string
1572 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1573
1574 # Store it back to the database
1575 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1576 my $status = system($cmd);
1577 if ($status != 0) {
1578 my $mess = "Failed to set metadata key: $docid\n";
1579
1580 $mess .= "PATH: $ENV{'PATH'}\n";
1581 $mess .= "cmd = $cmd\n";
1582 $mess .= "Exit status: $status\n";
1583 $mess .= "System Error Message: $!\n";
1584
1585 $gsdl_cgi->generate_error($mess);
1586 }
1587 else {
1588 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1589 $mess .= " $metaname";
1590 $mess .= "->[$metapos]" if (defined $metapos);
1591
1592 $gsdl_cgi->generate_ok_message($mess);
1593 }
1594}
1595
1596
1597# Was trying to reused the codes, but the functions need to be broken
1598# down more before they can be reused, otherwise there will be too
1599# much overhead and duplicate process...
1600sub insert_metadata
1601{
1602 my $self = shift @_;
1603
1604 my $username = $self->{'username'};
1605 my $collect = $self->{'collect'};
1606 my $gsdl_cgi = $self->{'gsdl_cgi'};
1607 my $gsdlhome = $self->{'gsdlhome'};
1608 my $infodbtype = $self->{'infodbtype'};
1609
1610 # If the import metadata and gdbm database have been updated, we
1611 # need to insert some notification to warn user that the the text
1612 # they see at the moment is not indexed and require a rebuild.
1613 my $rebuild_pending_macro = "_rebuildpendingmessage_";
1614
1615 if ($baseaction::authentication_enabled) {
1616 # Ensure the user is allowed to edit this collection
1617 $self->authenticate_user($username, $collect);
1618 }
1619
1620 # Obtain the collect and archive dir
1621 my $site = $self->{'site'};
1622 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1623 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1624 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1625
1626 # Make sure the collection isn't locked by someone else
1627 $self->lock_collection($username, $collect);
1628
1629 # Check additional args
1630 my $docid = $self->{'d'};
1631 if (!defined($docid)) {
1632 $gsdl_cgi->generate_error("No document id is specified: d=...");
1633 }
1634 my $metaname = $self->{'metaname'};
1635 if (!defined($metaname)) {
1636 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1637 }
1638 my $metavalue = $self->{'metavalue'};
1639 if (!defined($metavalue) || $metavalue eq "") {
1640 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
1641 }
1642 # make "accumulate" the default (less destructive, as won't actually
1643 # delete any existing values)
1644 my $metamode = "accumulate";
1645
1646 #=======================================================================#
1647 # set_import_metadata [START]
1648 #=======================================================================#
1649 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1650 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1651 my $metadata_xml_file;
1652 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1653 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1654
1655 # This now stores the full pathname
1656 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
1657
1658 # figure out correct metadata.xml file [?]
1659 # Assuming the metadata.xml file is next to the source file
1660 # Note: This will not work if it is using the inherited metadata from the parent folder
1661 my ($import_tailname, $import_dirname)
1662 = File::Basename::fileparse($import_filename);
1663 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1664
1665 # Shane's escape characters
1666 $metavalue = pack "U0C*", unpack "C*", $metavalue;
1667 $metavalue =~ s/\,/&#44;/g;
1668 $metavalue =~ s/\:/&#58;/g;
1669 $metavalue =~ s/\|/&#124;/g;
1670 $metavalue =~ s/\(/&#40;/g;
1671 $metavalue =~ s/\)/&#41;/g;
1672 $metavalue =~ s/\[/&#91;/g;
1673 $metavalue =~ s/\\/&#92;/g;
1674 $metavalue =~ s/\]/&#93;/g;
1675 $metavalue =~ s/\{/&#123;/g;
1676 $metavalue =~ s/\}/&#125;/g;
1677 $metavalue =~ s/\"/&#34;/g;
1678 $metavalue =~ s/\`/&#96;/g;
1679 $metavalue =~ s/\n/_newline_/g;
1680
1681 # Edit the metadata.xml
1682 # Modified by Jeffrey from DL Consulting
1683 # Handle the case where there is one metadata.xml file for multiple FileSets
1684 # The XML filter needs to know whether it is in the right FileSet
1685 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1686 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1687 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1688 $metaname, $metavalue, $metamode, $import_tailname);
1689 #=======================================================================#
1690 # set_import_metadata [END]
1691 #=======================================================================#
1692
1693
1694 #=======================================================================#
1695 # set_metadata (accumulate version) [START]
1696 #=======================================================================#
1697 # To people who know $collect_tail please add some comments
1698 # Obtain path to the database
1699 my $collect_tail = $collect;
1700 $collect_tail =~ s/^.*[\/\\]//;
1701 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1702 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1703
1704 # Read the docid entry
1705 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1706
1707 foreach my $k (keys %$doc_rec) {
1708 my @escaped_v = ();
1709 foreach my $v (@{$doc_rec->{$k}}) {
1710 if ($k eq "contains") {
1711 # protect quotes in ".2;".3 etc
1712 $v =~ s/\"/\\\"/g;
1713 push(@escaped_v, $v);
1714 }
1715 else {
1716 my $ev = &ghtml::unescape_html($v);
1717 $ev =~ s/\"/\\\"/g;
1718 push(@escaped_v, $ev);
1719 }
1720 }
1721 $doc_rec->{$k} = \@escaped_v;
1722 }
1723
1724 # Protect the quotes
1725 $metavalue =~ s/\"/\\\"/g;
1726
1727 # Adds the pending macro
1728 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
1729
1730 # If the metadata doesn't exist, create a new one
1731 if (!defined($doc_rec->{$metaname})){
1732 $doc_rec->{$metaname} = [ $macro_metavalue ];
1733 }
1734 # Else, let's acculumate the values
1735 else {
1736 push(@{$doc_rec->{$metaname}},$macro_metavalue);
1737 }
1738
1739 # Generate the record string
1740 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1741
1742 # Store it into GDBM
1743 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1744 my $status = system($cmd);
1745 if ($status != 0) {
1746 # Catch error if gdbmget failed
1747 my $mess = "Failed to set metadata key: $docid\n";
1748
1749 $mess .= "PATH: $ENV{'PATH'}\n";
1750 $mess .= "cmd = $cmd\n";
1751 $mess .= "Exit status: $status\n";
1752 $mess .= "System Error Message: $!\n";
1753
1754 $gsdl_cgi->generate_error($mess);
1755 }
1756 else {
1757 my $mess = "insert-metadata successful: Key[$docid]\n";
1758 $mess .= " [In metadata.xml] $metaname";
1759 $mess .= " = $metavalue\n";
1760 $mess .= " [In database] $metaname";
1761 $mess .= " = $macro_metavalue\n";
1762 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
1763 $gsdl_cgi->generate_ok_message($mess);
1764 }
1765 #=======================================================================#
1766 # set_metadata (accumulate version) [END]
1767 #=======================================================================#
1768
1769 # Release the lock once it is done
1770 $self->unlock_collection($username, $collect);
1771}
1772
17731;
Note: See TracBrowser for help on using the repository browser.