source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 23766

Last change on this file since 23766 was 23766, checked in by davidb, 13 years ago

Setting of the collect directory changed to be compliant with Greenstone 3 and its 'site' variable

File size: 41.2 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35
36BEGIN {
37# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
38 require XML::Rules;
39}
40
41
42@metadataaction::ISA = ('baseaction');
43
44
45my $action_table =
46{
47 "get-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
48 'optional-args' => [] },
49
50 "get-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
51 'optional-args' => [ "metapos" ] },
52
53 "set-live-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
54 'optional-args' => [ ] },
55
56 "set-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
57 'optional-args' => [ "metapos" ] },
58
59 "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
60 'optional-args' => [ "metapos", "metamode" ]
61 # metamode can be "accumulate", "override",
62 },
63
64 "set-import-metadata" => { 'compulsory-args' => [ "metaname", "metavalue" ],
65 'optional-args' => [ "d", "f", "metamode" ]
66 # metamode can be "accumulate", "override", or "unique-id"
67 },
68
69
70 "remove-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
71 'optional-args' => [ ] },
72
73 "remove-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
74 'optional-args' => [ "metapos" ] },
75
76 "insert-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
77 'optional-args' => [ ]
78 }
79};
80
81
82sub new
83{
84 my $class = shift (@_);
85 my ($gsdl_cgi,$iis6_mode) = @_;
86
87 # Treat metavalue specially. To transmit this through a GET request
88 # the Javascript side has url-encoded it, so here we need to decode
89 # it before proceeding
90
91 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
92 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
93
94 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
95
96 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
97
98 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
99
100 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
101
102 return bless $self, $class;
103}
104
105
106sub get_live_metadata
107{
108 my $self = shift @_;
109
110 my $username = $self->{'username'};
111 my $collect = $self->{'collect'};
112 my $gsdl_cgi = $self->{'gsdl_cgi'};
113 my $gsdlhome = $self->{'gsdlhome'};
114 my $infodbtype = $self->{'infodbtype'};
115
116 # live metadata gets/saves value scoped (prefixed) by the current usename
117 # so (for now) let's not bother to enforce authentication
118
119 # Obtain the collect dir
120 my $site = $self->{'site'};
121 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
122 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
123
124 # Make sure the collection isn't locked by someone else
125 $self->lock_collection($username, $collect);
126
127 # look up additional args
128 my $docid = $self->{'d'};
129 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
130 $gsdl_cgi->generate_error("No docid (d=...) specified.");
131 }
132
133 # Generate the dbkey
134 my $metaname = $self->{'metaname'};
135 my $dbkey = "$docid.$metaname";
136
137 # To people who know $collect_tail please add some comments
138 # Obtain path to the database
139 my $collect_tail = $collect;
140 $collect_tail =~ s/^.*[\/|\\]//;
141 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
142 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
143
144 # Obtain the content of the key
145 my $cmd = "gdbmget $infodb_file_path $dbkey";
146 if (open(GIN,"$cmd |") == 0) {
147 # Catch error if gdbmget failed
148 my $mess = "Failed to get metadata key: $metaname\n";
149 $mess .= "$!\n";
150
151 $gsdl_cgi->generate_error($mess);
152 }
153 else {
154 binmode(GIN,":utf8");
155 # Read everything in and concatenate them into $metavalue
156 my $metavalue = "";
157 my $line;
158 while (defined ($line=<GIN>)) {
159 $metavalue .= $line;
160 }
161 close(GIN);
162 chomp($metavalue); # Get rid off the tailing newlines
163 $gsdl_cgi->generate_ok_message("$metavalue");
164 }
165
166 # Release the lock once it is done
167 $self->unlock_collection($username, $collect);
168}
169
170
171sub get_metadata
172{
173 my $self = shift @_;
174
175 my $username = $self->{'username'};
176 my $collect = $self->{'collect'};
177 my $gsdl_cgi = $self->{'gsdl_cgi'};
178 my $gsdlhome = $self->{'gsdlhome'};
179
180 # Authenticate user if it is enabled
181 if ($baseaction::authentication_enabled) {
182 # Ensure the user is allowed to edit this collection
183 &authenticate_user($gsdl_cgi, $username, $collect);
184 }
185
186 # Obtain the collect dir
187 my $site = $self->{'site'};
188 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
189 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
190
191 # Make sure the collection isn't locked by someone else
192 $self->lock_collection($username, $collect);
193
194 # look up additional args
195 my $docid = $self->{'d'};
196 my $metaname = $self->{'metaname'};
197 my $metapos = $self->{'metapos'};
198 my $infodbtype = $self->{'infodbtype'};
199
200 # To people who know $collect_tail please add some comments
201 # Obtain path to the database
202 my $collect_tail = $collect;
203 $collect_tail =~ s/^.*[\/\\]//;
204 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
205 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
206
207 # Read the docid entry
208 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
209
210 # Basically loop through and unescape_html the values
211 foreach my $k (keys %$doc_rec) {
212 my @escaped_v = ();
213 foreach my $v (@{$doc_rec->{$k}}) {
214 my $ev = &ghtml::unescape_html($v);
215 push(@escaped_v, $ev);
216 }
217 $doc_rec->{$k} = \@escaped_v;
218 }
219
220 # Obtain the specified metadata value
221 $metapos = 0 if (!defined $metapos);
222 my $metavalue = $doc_rec->{$metaname}->[$metapos];
223 $gsdl_cgi->generate_ok_message("$metavalue");
224
225 # Release the lock once it is done
226 $self->unlock_collection($username, $collect);
227}
228
229
230sub set_live_metadata
231{
232 my $self = shift @_;
233
234 my $username = $self->{'username'};
235 my $collect = $self->{'collect'};
236 my $gsdl_cgi = $self->{'gsdl_cgi'};
237 my $gsdlhome = $self->{'gsdlhome'};
238 my $infodbtype = $self->{'infodbtype'};
239
240 if ($baseaction::authentication_enabled) {
241 # Ensure the user is allowed to edit this collection
242 &authenticate_user($gsdl_cgi, $username, $collect);
243 }
244
245 # Obtain the collect dir
246 my $site = $self->{'site'};
247 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
248 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
249
250 # Make sure the collection isn't locked by someone else
251 $self->lock_collection($username, $collect);
252
253 # look up additional args
254 my $docid = $self->{'d'};
255 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
256 $gsdl_cgi->generate_error("No docid (d=...) specified.");
257 }
258 my $metavalue = $self->{'metavalue'};
259
260
261 # Generate the dbkey
262 my $metaname = $self->{'metaname'};
263 my $dbkey = "$docid.$metaname";
264
265 # To people who know $collect_tail please add some comments
266 # Obtain path to the database
267 my $collect_tail = $collect;
268 $collect_tail =~ s/^.*[\/\\]//;
269 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
270 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
271
272 # Set the new value
273 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
274 my $status = system($cmd);
275 if ($status != 0) {
276 # Catch error if gdbmget failed
277 my $mess = "Failed to set metadata key: $dbkey\n";
278
279 $mess .= "PATH: $ENV{'PATH'}\n";
280 $mess .= "cmd = $cmd\n";
281 $mess .= "Exit status: $status\n";
282 $mess .= "System Error Message: $!\n";
283
284 $gsdl_cgi->generate_error($mess);
285 }
286 else {
287 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
288 }
289
290 # Release the lock once it is done
291 $self->unlock_collection($username, $collect);
292}
293
294
295sub set_metadata
296{
297 my $self = shift @_;
298
299 my $username = $self->{'username'};
300 my $collect = $self->{'collect'};
301 my $gsdl_cgi = $self->{'gsdl_cgi'};
302 my $gsdlhome = $self->{'gsdlhome'};
303
304 if ($baseaction::authentication_enabled) {
305 # Ensure the user is allowed to edit this collection
306 &authenticate_user($gsdl_cgi, $username, $collect);
307 }
308
309 my $site = $self->{'site'};
310 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
311
312 $gsdl_cgi->checked_chdir($collect_dir);
313
314 # Obtain the collect dir
315 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
316
317 # Make sure the collection isn't locked by someone else
318 $self->lock_collection($username, $collect);
319
320 # look up additional args
321 my $docid = $self->{'d'};
322 my $metaname = $self->{'metaname'};
323 my $metapos = $self->{'metapos'};
324 my $metavalue = $self->{'metavalue'};
325 my $infodbtype = $self->{'infodbtype'};
326
327 # To people who know $collect_tail please add some comments
328 # Obtain path to the database
329 my $collect_tail = $collect;
330 $collect_tail =~ s/^.*[\/\\]//;
331 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
332 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
333
334 print STDERR "**** infodb file path = $infodb_file_path\n";
335 print STDERR "***** infodb type = $infodbtype\n";
336
337 # Read the docid entry
338 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
339
340 # Set the metadata value
341 if (defined $metapos) {
342 $doc_rec->{$metaname}->[$metapos] = $metavalue;
343 }
344 else {
345 $doc_rec->{$metaname} = [ $metavalue ];
346 }
347
348 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
349 if ($status != 0) {
350 # Catch error if set infodb entry failed
351 my $mess = "Failed to set metadata key: $docid\n";
352
353 $mess .= "PATH: $ENV{'PATH'}\n";
354 $mess .= "Exit status: $status\n";
355 $mess .= "System Error Message: $!\n";
356
357 $gsdl_cgi->generate_error($mess);
358 }
359 else {
360 my $mess = "set-document-metadata successful: Key[$docid]\n";
361 $mess .= " $metaname";
362 $mess .= "->[$metapos]" if (defined $metapos);
363 $mess .= " = $metavalue";
364
365 $gsdl_cgi->generate_ok_message($mess);
366 }
367
368 # Release the lock once it is done
369 $self->unlock_collection($username, $collect);
370}
371
372
373sub dxml_metadata
374{
375 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
376 my $metaname = $parser->{'parameters'}->{'metaname'};
377 my $metamode = $parser->{'parameters'}->{'metamode'};
378
379 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
380
381 # Find the right metadata tag and checks if we are going to
382 # override it
383 #
384 # Note: This over writes the first metadata block it
385 # encountered. If there are multiple Sections in the doc.xml, it
386 # might not behave as you would expect
387
388 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
389## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
390## print STDERR "**** metamode = $metamode\n";
391
392 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum)) {
393 my $name_attr = $attrHash->{'name'};
394 if (($name_attr eq $metaname) && ($metamode eq "override")) {
395## print STDERR "**** got match!!\n";
396 # Get the value and override the current value
397 my $metavalue = $parser->{'parameters'}->{'metavalue'};
398 $attrHash->{'_content'} = $metavalue;
399
400 # Don't want it to wipe out any other pieces of metadata
401 $parser->{'parameters'}->{'metamode'} = "done";
402 }
403 }
404
405 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
406 return [$tagname => $attrHash];
407}
408
409
410sub dxml_description
411{
412 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
413 my $metamode = $parser->{'parameters'}->{'metamode'};
414
415 # Accumulate the metadata
416 # NOTE: This appends new metadata element to all description fields.
417 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
418 if (($metamode eq "accumulate") || ($metamode eq "override")) {
419 # If get to here and metamode is override, the this means there
420 # was no existing value to overide => treat as an append operation
421
422 # Tack a new metadata tag on to the end of the <Metadata>+ block
423 my $metaname = $parser->{'parameters'}->{'metaname'};
424 my $metavalue = $parser->{'parameters'}->{'metavalue'};
425
426 my $metadata_attr = { '_content' => $metavalue,
427 'name' => $metaname,
428 'mode' => "accumulate" };
429
430 my $append_metadata = [ "Metadata" => $metadata_attr ];
431 my $description_content = $attrHash->{'_content'};
432
433## print STDERR "**** appending to doc.xml\n";
434
435 push(@$description_content," ", $append_metadata ,"\n ");
436 $parser->{'parameters'}->{'metamode'} = "done";
437 }
438
439
440 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
441 return [$tagname => $attrHash];
442}
443
444
445
446sub dxml_start_section
447{
448 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
449
450 my $new_depth = scalar(@$contextArray);
451
452 if ($new_depth == 1) {
453 $parser->{'parameters'}->{'curr_section_depth'} = 1;
454 $parser->{'parameters'}->{'curr_section_num'} = "";
455 }
456
457 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
458 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
459
460 my $new_secnum;
461
462 if ($new_depth > $old_depth) {
463 # child subsection
464 $new_secnum = "$old_secnum.1";
465 }
466 elsif ($new_depth == $old_depth) {
467 # sibling section => increase it's value by 1
468 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
469 $tail_num++;
470 $new_secnum = $old_secnum;
471 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
472 }
473 else {
474 # back up to parent section => lopp off tail
475 $new_secnum = $old_secnum;
476 $new_secnum =~ s/\.\d+$//;
477 }
478
479 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
480 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
481
482 print STDERR "*** In Section: $new_secnum\n";
483}
484
485sub edit_xml_file
486{
487 my $self = shift @_;
488 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
489
490 # use XML::Rules to add it in (read in and out again)
491 my $parser = XML::Rules->new(start_rules => $start_rules,
492 rules => $rules,
493 style => 'filter',
494 output_encoding => 'utf8' );
495
496 my $xml_in = "";
497 if (!open(MIN,"<$filename")) {
498 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
499 }
500 else {
501 # Read all the text in
502 my $line;
503 while (defined ($line=<MIN>)) {
504 $xml_in .= $line;
505 }
506 close(MIN);
507
508 my $MOUT;
509 if (!open($MOUT,">$filename")) {
510 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
511 }
512 else {
513 # Matched lines will get handled by the call backs
514## my $xml_out = "";
515
516 binmode($MOUT,":utf8");
517 $parser->filter($xml_in,$MOUT, $options);
518
519# binmode(MOUT,":utf8");
520# print MOUT $xml_out;
521 close($MOUT);
522 }
523 }
524}
525
526
527sub edit_doc_xml
528{
529 my $self = shift @_;
530 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum) = @_;
531
532 # To monitor which section/subsection number we are in
533 my @start_rules =
534 ( 'Section' => \&dxml_start_section );
535
536 # use XML::Rules to add it in (read in and out again)
537 # Set the call back functions
538 my @rules =
539 ( _default => 'raw',
540 'Metadata' => \&dxml_metadata,
541 'Description' => \&dxml_description);
542
543 # Sets the parameters
544 my $options = { 'metaname' => $metaname,
545 'metapos' => $metapos,
546 'metavalue' => $metavalue,
547 'metamode' => $metamode };
548
549 if (defined $opt_secnum) {
550 $options->{'secnum'} = $opt_secnum;
551 }
552
553 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
554}
555
556
557sub set_archives_metadata
558{
559 my $self = shift @_;
560
561 my $username = $self->{'username'};
562 my $collect = $self->{'collect'};
563 my $gsdl_cgi = $self->{'gsdl_cgi'};
564 my $gsdlhome = $self->{'gsdlhome'};
565 my $infodbtype = $self->{'infodbtype'};
566
567 if ($baseaction::authentication_enabled) {
568 # Ensure the user is allowed to edit this collection
569 $self->authenticate_user($username, $collect);
570 }
571
572 # Obtain the collect and archive dir
573 my $site = $self->{'site'};
574 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
575 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
576
577 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
578
579 # Make sure the collection isn't locked by someone else
580 $self->lock_collection($username, $collect);
581
582 # look up additional args
583 my $docid = $self->{'d'};
584 my $metaname = $self->{'metaname'};
585 my $metavalue = $self->{'metavalue'};
586
587 my $metapos = $self->{'metapos'};
588 $metapos = 0 if (!defined $metapos);
589
590 my $metamode = $self->{'metamode'};
591 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
592 # make "accumulate" the default (less destructive, as won't actually
593 # delete any existing values)
594 $metamode = "accumulate";
595 }
596
597 # Obtain the doc.xml path for the specified docID
598 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
599
600 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
601 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
602 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
603
604 # The $doc_xml_file is relative to the archives, and now let's get the full path
605 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
606 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
607
608 # Edit the doc.xml file with the specified metadata name, value and position.
609 # TODO: there is a potential problem here as this edit_doc_xml function
610 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
611 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
612 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
613 $metaname,$metavalue,$metapos,$metamode,$docid_secnum);
614
615 # Release the lock once it is done
616 $self->unlock_collection($username, $collect);
617
618 my $mess = "set-archives-metadata successful: Key[$docid]\n";
619 $mess .= " $metaname";
620 $mess .= "->[$metapos]" if (defined $metapos);
621 $mess .= " = $metavalue";
622 $mess .= " ($metamode)\n";
623
624 $gsdl_cgi->generate_ok_message($mess);
625}
626
627
628sub mxml_metadata
629{
630 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
631 my $metaname = $parser->{'parameters'}->{'metaname'};
632 my $metamode = $parser->{'parameters'}->{'metamode'};
633
634 # Report error if we don't see FileName tag before this
635 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
636
637 # Don't do anything if we are not in the right FileSet
638 my $file_regexp = $parser->{'parameters'}->{'current_file'};
639 if ($file_regexp =~ /\.\*/) {
640 # Only interested in a file_regexp if it specifies precisely one
641 # file.
642 # So, skip anything with a .* in it as it is too general
643 return [$tagname => $attrHash];
644 }
645 my $src_file = $parser->{'parameters'}->{'src_file'};
646 if (!($src_file =~ /$file_regexp/)) {
647 return [$tagname => $attrHash];
648 }
649## print STDERR "*** mxl metamode = $metamode\n";
650
651 # Find the right metadata tag and checks if we are going to override it
652 my $name_attr = $attrHash->{'name'};
653 if (($name_attr eq $metaname) && ($metamode eq "override")) {
654 # Get the value and override the current value
655 my $metavalue = $parser->{'parameters'}->{'metavalue'};
656 $attrHash->{'_content'} = $metavalue;
657
658## print STDERR "**** overrideing metadata.xml\n";
659
660 # Don't want it to wipe out any other pieces of metadata
661 $parser->{'parameters'}->{'metamode'} = "done";
662 }
663
664 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
665 return [$tagname => $attrHash];
666}
667
668
669sub mxml_description
670{
671 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
672 my $metamode = $parser->{'parameters'}->{'metamode'};
673
674 # Failed... Report error if we don't see FileName tag before this
675 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
676
677 # Don't do anything if we are not in the right FileSet
678 my $file_regexp = $parser->{'parameters'}->{'current_file'};
679 if ($file_regexp =~ /\.\*/) {
680 # Only interested in a file_regexp if it specifies precisely one
681 # file.
682 # So, skip anything with a .* in it as it is too general
683 return [$tagname => $attrHash];
684 }
685 my $src_file = $parser->{'parameters'}->{'src_file'};
686 if (!($src_file =~ /$file_regexp/)) {
687 return [$tagname => $attrHash];
688 }
689
690 # Accumulate the metadata block to the end of the description block
691 # Note: This adds metadata block to all description blocks, so if there are
692 # multiple FileSets, it will add to all of them
693 if (($metamode eq "accumulate") || ($metamode eq "override")) {
694 # if metamode was "override" but get to here then it failed to
695 # find an item to override, in which case it should append its
696 # value to the end, just like the "accumulate" mode
697
698 # tack a new metadata tag on to the end of the <Metadata>+ block
699 my $metaname = $parser->{'parameters'}->{'metaname'};
700 my $metavalue = $parser->{'parameters'}->{'metavalue'};
701
702 my $metadata_attr = { '_content' => $metavalue,
703 'name' => $metaname,
704 'mode' => "accumulate" };
705
706 my $append_metadata = [ "Metadata" => $metadata_attr ];
707 my $description_content = $attrHash->{'_content'};
708
709## print STDERR "*** appending to metadata.xml\n";
710
711 # append the new metadata element to the end of the current
712 # content contained inside this tag
713 push(@$description_content," ", $append_metadata ,"\n ");
714
715 $parser->{'parameters'}->{'metamode'} = "done";
716 }
717
718 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
719 return [$tagname => $attrHash];
720}
721
722
723sub mxml_filename
724{
725 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
726
727 # Store the filename of the Current Fileset
728 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
729 # FileName tag must come before Description tag
730 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
731
732 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
733 return [$tagname => $attrHash];
734}
735
736
737sub mxml_fileset
738{
739 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
740
741 # Initilise the current_file
742 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
743 # FileName tag must come before Description tag
744 $parser->{'parameters'}->{'current_file'} = "";
745
746 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
747 return [$tagname => $attrHash];
748}
749
750
751sub edit_metadata_xml
752{
753 my $self = shift @_;
754 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
755
756 # Set the call-back functions for the metadata tags
757 my @rules =
758 ( _default => 'raw',
759 'FileName' => \&mxml_filename,
760 'Metadata' => \&mxml_metadata,
761 'Description' => \&mxml_description,
762 'FileSet' => \&mxml_fileset);
763
764 # use XML::Rules to add it in (read in and out again)
765 my $parser = XML::Rules->new(rules => \@rules,
766 style => 'filter',
767 output_encoding => 'utf8');
768
769 my $xml_in = "";
770 if (!open(MIN,"<$metadata_xml_filename")) {
771 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
772 }
773 else {
774 # Read them in
775 my $line;
776 while (defined ($line=<MIN>)) {
777 $xml_in .= $line;
778 }
779 close(MIN);
780
781 # Filter with the call-back functions
782 my $xml_out = "";
783
784 my $MOUT;
785 if (!open($MOUT,">$metadata_xml_filename")) {
786 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
787 }
788 else {
789 binmode($MOUT,":utf8");
790
791 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
792 # At the moment, I will just hack it!
793 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
794 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
795 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
796 #print MOUT $xml_out;
797
798 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
799 metavalue => $metavalue,
800 metamode => $metamode,
801 src_file => $src_file,
802 current_file => undef} );
803 close($MOUT);
804 }
805 }
806}
807
808
809sub set_import_metadata
810{
811 my $self = shift @_;
812
813 my $username = $self->{'username'};
814 my $collect = $self->{'collect'};
815 my $gsdl_cgi = $self->{'gsdl_cgi'};
816 my $gsdlhome = $self->{'gsdlhome'};
817 my $infodbtype = $self->{'infodbtype'};
818
819 if ($baseaction::authentication_enabled) {
820 # Ensure the user is allowed to edit this collection
821 $self->authenticate_user($username, $collect);
822 }
823
824
825 # Obtain the collect and archive dir
826 my $site = $self->{'site'};
827 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
828
829 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
830 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
831
832 # Make sure the collection isn't locked by someone else
833 $self->lock_collection($username, $collect);
834
835 # look up additional args
836 # want either d= or f=
837 my $docid = $self->{'d'};
838 my $import_file = $self->{'f'};
839 if ((!defined $docid) && (!defined $import_file)) {
840 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
841 }
842
843 # Get the parameters and set default mode to "accumulate"
844 my $metaname = $self->{'metaname'};
845 my $metavalue = $self->{'metavalue'};
846## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
847 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
848 print STDERR "*** set import meta: val = $metavalue\n";
849
850 my $metamode = $self->{'metamode'};
851 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
852 # make "accumulate" the default (less destructive, as won't actually
853 # delete any existing values)
854 $metamode = "accumulate";
855 }
856
857 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
858 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
859 my $metadata_xml_file;
860 my $import_filename = undef;
861 if (defined $docid) {
862 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
863 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
864
865 # This now stores the full pathname
866 $import_filename = $doc_rec->{'src-file'}->[0];
867 }
868 else {
869 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
870 }
871
872 # figure out correct metadata.xml file [?]
873 # Assuming the metadata.xml file is next to the source file
874 # Note: This will not work if it is using the inherited metadata from the parent folder
875 my ($import_tailname, $import_dirname)
876 = File::Basename::fileparse($import_filename);
877 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
878
879 # Edit the metadata.xml
880 # Modified by Jeffrey from DL Consulting
881 # Handle the case where there is one metadata.xml file for multiple FileSets
882 # The XML filter needs to know whether it is in the right FileSet
883 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
884 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
885 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
886 $metaname, $metavalue, $metamode, $import_tailname);
887
888 # Release the lock once it is done
889 $self->unlock_collection($username, $collect);
890
891 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
892 $mess .= " $metaname";
893 $mess .= " = $metavalue";
894 $mess .= " ($metamode)\n";
895
896 $gsdl_cgi->generate_ok_message($mess);
897
898}
899
900
901sub remove_live_metadata
902{
903 my $self = shift @_;
904
905 my $username = $self->{'username'};
906 my $collect = $self->{'collect'};
907 my $gsdl_cgi = $self->{'gsdl_cgi'};
908 my $gsdlhome = $self->{'gsdlhome'};
909 my $infodbtype = $self->{'infodbtype'};
910
911 if ($baseaction::authentication_enabled) {
912 # Ensure the user is allowed to edit this collection
913 &authenticate_user($gsdl_cgi, $username, $collect);
914 }
915
916 # Obtain the collect dir
917 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
918 my $site = $self->{'site'};
919 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
920
921 # Make sure the collection isn't locked by someone else
922 $self->lock_collection($username, $collect);
923
924 # look up additional args
925 my $docid = $self->{'d'};
926 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
927 $gsdl_cgi->generate_error("No docid (d=...) specified.");
928 }
929
930 # Generate the dbkey
931 my $metaname = $self->{'metaname'};
932 my $dbkey = "$docid.$metaname";
933
934 # To people who know $collect_tail please add some comments
935 # Obtain the live gdbm_db path
936 my $collect_tail = $collect;
937 $collect_tail =~ s/^.*[\/\\]//;
938 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
939 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
940
941 # Remove the key
942 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
943 my $status = system($cmd);
944 if ($status != 0) {
945 # Catch error if gdbmdel failed
946 my $mess = "Failed to set metadata key: $dbkey\n";
947
948 $mess .= "PATH: $ENV{'PATH'}\n";
949 $mess .= "cmd = $cmd\n";
950 $mess .= "Exit status: $status\n";
951 $mess .= "System Error Message: $!\n";
952
953 $gsdl_cgi->generate_error($mess);
954 }
955 else {
956 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
957 }
958
959}
960
961
962sub remove_metadata
963{
964 my $self = shift @_;
965
966 my $username = $self->{'username'};
967 my $collect = $self->{'collect'};
968 my $gsdl_cgi = $self->{'gsdl_cgi'};
969 my $gsdlhome = $self->{'gsdlhome'};
970 my $infodbtype = $self->{'infodbtype'};
971
972 if ($baseaction::authentication_enabled) {
973 # Ensure the user is allowed to edit this collection
974 &authenticate_user($gsdl_cgi, $username, $collect);
975 }
976
977 # Obtain the collect dir
978 my $site = $self->{'site'};
979 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
980 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
981
982 # Make sure the collection isn't locked by someone else
983 $self->lock_collection($username, $collect);
984
985 # look up additional args
986 my $docid = $self->{'d'};
987 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
988 $gsdl_cgi->generate_error("No docid (d=...) specified.");
989 }
990 my $metaname = $self->{'metaname'};
991 my $metapos = $self->{'metapos'};
992
993 # To people who know $collect_tail please add some comments
994 # Obtain the path to the database
995 my $collect_tail = $collect;
996 $collect_tail =~ s/^.*[\/\\]//;
997 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
998 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
999
1000 # Read the docid entry
1001 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1002
1003 # Basically loop through and unescape_html the values
1004 foreach my $k (keys %$doc_rec) {
1005 my @escaped_v = ();
1006 foreach my $v (@{$doc_rec->{$k}}) {
1007 if ($k eq "contains") {
1008 # protect quotes in ".2;".3 etc
1009 $v =~ s/\"/\\\"/g;
1010 push(@escaped_v, $v);
1011 }
1012 else {
1013 my $ev = &ghtml::unescape_html($v);
1014 $ev =~ s/\"/\\\"/g;
1015 push(@escaped_v, $ev);
1016 }
1017 }
1018 $doc_rec->{$k} = \@escaped_v;
1019 }
1020
1021 # Check to make sure the key does exist
1022 if (!defined ($doc_rec->{$metaname})) {
1023 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1024 }
1025
1026 # Obtain the specified metadata pos
1027 $metapos = 0 if (!defined $metapos);
1028
1029 # consider check key is defined before deleting?
1030 # Loop through the metadata array and ignore the specified position
1031 my $filtered_metadata = [];
1032 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
1033 for (my $i=0; $i<$num_metadata_vals; $i++) {
1034 my $metavalue = shift(@{$doc_rec->{$metaname}});
1035
1036 if ($i != $metapos) {
1037 push(@$filtered_metadata,$metavalue)
1038 }
1039 }
1040 $doc_rec->{$metaname} = $filtered_metadata;
1041
1042 # Turn the record back to string
1043 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1044
1045 # Store it back to the database
1046 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1047 my $status = system($cmd);
1048 if ($status != 0) {
1049 my $mess = "Failed to set metadata key: $docid\n";
1050
1051 $mess .= "PATH: $ENV{'PATH'}\n";
1052 $mess .= "cmd = $cmd\n";
1053 $mess .= "Exit status: $status\n";
1054 $mess .= "System Error Message: $!\n";
1055
1056 $gsdl_cgi->generate_error($mess);
1057 }
1058 else {
1059 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1060 $mess .= " $metaname";
1061 $mess .= "->[$metapos]" if (defined $metapos);
1062
1063 $gsdl_cgi->generate_ok_message($mess);
1064 }
1065}
1066
1067
1068# Was trying to reused the codes, but the functions need to be broken
1069# down more before they can be reused, otherwise there will be too
1070# much overhead and duplicate process...
1071sub insert_metadata
1072{
1073 my $self = shift @_;
1074
1075 my $username = $self->{'username'};
1076 my $collect = $self->{'collect'};
1077 my $gsdl_cgi = $self->{'gsdl_cgi'};
1078 my $gsdlhome = $self->{'gsdlhome'};
1079 my $infodbtype = $self->{'infodbtype'};
1080
1081 # If the import metadata and gdbm database have been updated, we
1082 # need to insert some notification to warn user that the the text
1083 # they see at the moment is not indexed and require a rebuild.
1084 my $rebuild_pending_macro = "_rebuildpendingmessage_";
1085
1086 if ($baseaction::authentication_enabled) {
1087 # Ensure the user is allowed to edit this collection
1088 $self->authenticate_user($username, $collect);
1089 }
1090
1091 # Obtain the collect and archive dir
1092 my $site = $self->{'site'};
1093 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1094 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1095 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1096
1097 # Make sure the collection isn't locked by someone else
1098 $self->lock_collection($username, $collect);
1099
1100 # Check additional args
1101 my $docid = $self->{'d'};
1102 if (!defined($docid)) {
1103 $gsdl_cgi->generate_error("No document id is specified: d=...");
1104 }
1105 my $metaname = $self->{'metaname'};
1106 if (!defined($metaname)) {
1107 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1108 }
1109 my $metavalue = $self->{'metavalue'};
1110 if (!defined($metavalue) || $metavalue eq "") {
1111 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
1112 }
1113 # make "accumulate" the default (less destructive, as won't actually
1114 # delete any existing values)
1115 my $metamode = "accumulate";
1116
1117 #=======================================================================#
1118 # set_import_metadata [START]
1119 #=======================================================================#
1120 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1121 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1122 my $metadata_xml_file;
1123 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1124 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1125
1126 # This now stores the full pathname
1127 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
1128
1129 # figure out correct metadata.xml file [?]
1130 # Assuming the metadata.xml file is next to the source file
1131 # Note: This will not work if it is using the inherited metadata from the parent folder
1132 my ($import_tailname, $import_dirname)
1133 = File::Basename::fileparse($import_filename);
1134 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1135
1136 # Shane's escape characters
1137 $metavalue = pack "U0C*", unpack "C*", $metavalue;
1138 $metavalue =~ s/\,/&#44;/g;
1139 $metavalue =~ s/\:/&#58;/g;
1140 $metavalue =~ s/\|/&#124;/g;
1141 $metavalue =~ s/\(/&#40;/g;
1142 $metavalue =~ s/\)/&#41;/g;
1143 $metavalue =~ s/\[/&#91;/g;
1144 $metavalue =~ s/\\/&#92;/g;
1145 $metavalue =~ s/\]/&#93;/g;
1146 $metavalue =~ s/\{/&#123;/g;
1147 $metavalue =~ s/\}/&#125;/g;
1148 $metavalue =~ s/\"/&#34;/g;
1149 $metavalue =~ s/\`/&#96;/g;
1150 $metavalue =~ s/\n/_newline_/g;
1151
1152 # Edit the metadata.xml
1153 # Modified by Jeffrey from DL Consulting
1154 # Handle the case where there is one metadata.xml file for multiple FileSets
1155 # The XML filter needs to know whether it is in the right FileSet
1156 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1157 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1158 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1159 $metaname, $metavalue, $metamode, $import_tailname);
1160 #=======================================================================#
1161 # set_import_metadata [END]
1162 #=======================================================================#
1163
1164
1165 #=======================================================================#
1166 # set_metadata (accumulate version) [START]
1167 #=======================================================================#
1168 # To people who know $collect_tail please add some comments
1169 # Obtain path to the database
1170 my $collect_tail = $collect;
1171 $collect_tail =~ s/^.*[\/\\]//;
1172 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1173 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1174
1175 # Read the docid entry
1176 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1177
1178 foreach my $k (keys %$doc_rec) {
1179 my @escaped_v = ();
1180 foreach my $v (@{$doc_rec->{$k}}) {
1181 if ($k eq "contains") {
1182 # protect quotes in ".2;".3 etc
1183 $v =~ s/\"/\\\"/g;
1184 push(@escaped_v, $v);
1185 }
1186 else {
1187 my $ev = &ghtml::unescape_html($v);
1188 $ev =~ s/\"/\\\"/g;
1189 push(@escaped_v, $ev);
1190 }
1191 }
1192 $doc_rec->{$k} = \@escaped_v;
1193 }
1194
1195 # Protect the quotes
1196 $metavalue =~ s/\"/\\\"/g;
1197
1198 # Adds the pending macro
1199 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
1200
1201 # If the metadata doesn't exist, create a new one
1202 if (!defined($doc_rec->{$metaname})){
1203 $doc_rec->{$metaname} = [ $macro_metavalue ];
1204 }
1205 # Else, let's acculumate the values
1206 else {
1207 push(@{$doc_rec->{$metaname}},$macro_metavalue);
1208 }
1209
1210 # Generate the record string
1211 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1212
1213 # Store it into GDBM
1214 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1215 my $status = system($cmd);
1216 if ($status != 0) {
1217 # Catch error if gdbmget failed
1218 my $mess = "Failed to set metadata key: $docid\n";
1219
1220 $mess .= "PATH: $ENV{'PATH'}\n";
1221 $mess .= "cmd = $cmd\n";
1222 $mess .= "Exit status: $status\n";
1223 $mess .= "System Error Message: $!\n";
1224
1225 $gsdl_cgi->generate_error($mess);
1226 }
1227 else {
1228 my $mess = "insert-metadata successful: Key[$docid]\n";
1229 $mess .= " [In metadata.xml] $metaname";
1230 $mess .= " = $metavalue\n";
1231 $mess .= " [In database] $metaname";
1232 $mess .= " = $macro_metavalue\n";
1233 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
1234 $gsdl_cgi->generate_ok_message($mess);
1235 }
1236 #=======================================================================#
1237 # set_metadata (accumulate version) [END]
1238 #=======================================================================#
1239
1240 # Release the lock once it is done
1241 $self->unlock_collection($username, $collect);
1242}
1243
12441;
Note: See TracBrowser for help on using the repository browser.