source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 23761

Last change on this file since 23761 was 23761, checked in by davidb, 13 years ago

General upgrading of the set metadata action to cover more cases (such as setting metadata values at the sub-section level). To ensure the output file correctly maintains it's 'UTF-8'-ness, I have had to change the code that explicity prints out the DOCTYPE tag -- the comment for this itself says this is a hack. Without the 'binmode(...)' then accented characters etc. will be incorrectly coded and the whole deck of cards comes crashing down. I noticed there is a new version of XML::Rule out, and so with luck this version has a better way to handle setting UTF-8 within its API, rather than resorting the the external 'binmode' now used. If so, then this would let us go back to printing out the DOCTYPE tag ... it might even be that this element can be more gracefully handled within the updated XML::Rule implementation.

File size: 40.3 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35
36BEGIN {
37# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
38 require XML::Rules;
39}
40
41
42@metadataaction::ISA = ('baseaction');
43
44
45my $action_table =
46{
47 "get-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
48 'optional-args' => [] },
49
50 "get-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
51 'optional-args' => [ "metapos" ] },
52
53 "set-live-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
54 'optional-args' => [ ] },
55
56 "set-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
57 'optional-args' => [ "metapos" ] },
58
59 "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
60 'optional-args' => [ "metapos", "metamode" ]
61 # metamode can be "accumulate", "override",
62 },
63
64 "set-import-metadata" => { 'compulsory-args' => [ "metaname", "metavalue" ],
65 'optional-args' => [ "d", "f", "metamode" ]
66 # metamode can be "accumulate", "override", or "unique-id"
67 },
68
69
70 "remove-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
71 'optional-args' => [ ] },
72
73 "remove-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
74 'optional-args' => [ "metapos" ] },
75
76 "insert-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
77 'optional-args' => [ ]
78 }
79};
80
81
82sub new
83{
84 my $class = shift (@_);
85 my ($gsdl_cgi,$iis6_mode) = @_;
86
87 # Treat metavalue specially. To transmit this through a GET request
88 # the Javascript side has url-encoded it, so here we need to decode
89 # it before proceeding
90
91 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
92 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
93
94 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
95
96 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
97
98 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
99
100 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
101
102 return bless $self, $class;
103}
104
105
106sub get_live_metadata
107{
108 my $self = shift @_;
109
110 my $username = $self->{'username'};
111 my $collect = $self->{'collect'};
112 my $gsdl_cgi = $self->{'gsdl_cgi'};
113 my $gsdlhome = $self->{'gsdlhome'};
114 my $infodbtype = $self->{'infodbtype'};
115
116 # live metadata gets/saves value scoped (prefixed) by the current usename
117 # so (for now) let's not bother to enforce authentication
118
119 # Obtain the collect dir
120 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
121
122 # Make sure the collection isn't locked by someone else
123 $self->lock_collection($username, $collect);
124
125 # look up additional args
126 my $docid = $self->{'d'};
127 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
128 $gsdl_cgi->generate_error("No docid (d=...) specified.");
129 }
130
131 # Generate the dbkey
132 my $metaname = $self->{'metaname'};
133 my $dbkey = "$docid.$metaname";
134
135 # To people who know $collect_tail please add some comments
136 # Obtain path to the database
137 my $collect_tail = $collect;
138 $collect_tail =~ s/^.*[\/|\\]//;
139 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
140 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
141
142 # Obtain the content of the key
143 my $cmd = "gdbmget $infodb_file_path $dbkey";
144 if (open(GIN,"$cmd |") == 0) {
145 # Catch error if gdbmget failed
146 my $mess = "Failed to get metadata key: $metaname\n";
147 $mess .= "$!\n";
148
149 $gsdl_cgi->generate_error($mess);
150 }
151 else {
152 binmode(GIN,":utf8");
153 # Read everything in and concatenate them into $metavalue
154 my $metavalue = "";
155 my $line;
156 while (defined ($line=<GIN>)) {
157 $metavalue .= $line;
158 }
159 close(GIN);
160 chomp($metavalue); # Get rid off the tailing newlines
161 $gsdl_cgi->generate_ok_message("$metavalue");
162 }
163
164 # Release the lock once it is done
165 $self->unlock_collection($username, $collect);
166}
167
168
169sub get_metadata
170{
171 my $self = shift @_;
172
173 my $username = $self->{'username'};
174 my $collect = $self->{'collect'};
175 my $gsdl_cgi = $self->{'gsdl_cgi'};
176 my $gsdlhome = $self->{'gsdlhome'};
177
178 # Authenticate user if it is enabled
179 if ($baseaction::authentication_enabled) {
180 # Ensure the user is allowed to edit this collection
181 &authenticate_user($gsdl_cgi, $username, $collect);
182 }
183
184 # Obtain the collect dir
185 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
186
187 # Make sure the collection isn't locked by someone else
188 $self->lock_collection($username, $collect);
189
190 # look up additional args
191 my $docid = $self->{'d'};
192 my $metaname = $self->{'metaname'};
193 my $metapos = $self->{'metapos'};
194 my $infodbtype = $self->{'infodbtype'};
195
196 # To people who know $collect_tail please add some comments
197 # Obtain path to the database
198 my $collect_tail = $collect;
199 $collect_tail =~ s/^.*[\/\\]//;
200 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
201 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
202
203 # Read the docid entry
204 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
205
206 # Basically loop through and unescape_html the values
207 foreach my $k (keys %$doc_rec) {
208 my @escaped_v = ();
209 foreach my $v (@{$doc_rec->{$k}}) {
210 my $ev = &ghtml::unescape_html($v);
211 push(@escaped_v, $ev);
212 }
213 $doc_rec->{$k} = \@escaped_v;
214 }
215
216 # Obtain the specified metadata value
217 $metapos = 0 if (!defined $metapos);
218 my $metavalue = $doc_rec->{$metaname}->[$metapos];
219 $gsdl_cgi->generate_ok_message("$metavalue");
220
221 # Release the lock once it is done
222 $self->unlock_collection($username, $collect);
223}
224
225
226sub set_live_metadata
227{
228 my $self = shift @_;
229
230 my $username = $self->{'username'};
231 my $collect = $self->{'collect'};
232 my $gsdl_cgi = $self->{'gsdl_cgi'};
233 my $gsdlhome = $self->{'gsdlhome'};
234 my $infodbtype = $self->{'infodbtype'};
235
236 if ($baseaction::authentication_enabled) {
237 # Ensure the user is allowed to edit this collection
238 &authenticate_user($gsdl_cgi, $username, $collect);
239 }
240
241 # Obtain the collect dir
242 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
243
244 # Make sure the collection isn't locked by someone else
245 $self->lock_collection($username, $collect);
246
247 # look up additional args
248 my $docid = $self->{'d'};
249 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
250 $gsdl_cgi->generate_error("No docid (d=...) specified.");
251 }
252 my $metavalue = $self->{'metavalue'};
253
254
255 # Generate the dbkey
256 my $metaname = $self->{'metaname'};
257 my $dbkey = "$docid.$metaname";
258
259 # To people who know $collect_tail please add some comments
260 # Obtain path to the database
261 my $collect_tail = $collect;
262 $collect_tail =~ s/^.*[\/\\]//;
263 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
264 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
265
266 # Set the new value
267 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
268 my $status = system($cmd);
269 if ($status != 0) {
270 # Catch error if gdbmget failed
271 my $mess = "Failed to set metadata key: $dbkey\n";
272
273 $mess .= "PATH: $ENV{'PATH'}\n";
274 $mess .= "cmd = $cmd\n";
275 $mess .= "Exit status: $status\n";
276 $mess .= "System Error Message: $!\n";
277
278 $gsdl_cgi->generate_error($mess);
279 }
280 else {
281 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
282 }
283
284 # Release the lock once it is done
285 $self->unlock_collection($username, $collect);
286}
287
288
289sub set_metadata
290{
291 my $self = shift @_;
292
293 my $username = $self->{'username'};
294 my $collect = $self->{'collect'};
295 my $gsdl_cgi = $self->{'gsdl_cgi'};
296 my $gsdlhome = $self->{'gsdlhome'};
297
298 if ($baseaction::authentication_enabled) {
299 # Ensure the user is allowed to edit this collection
300 &authenticate_user($gsdl_cgi, $username, $collect);
301 }
302
303 # Obtain the collect dir
304 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
305
306 # Make sure the collection isn't locked by someone else
307 $self->lock_collection($username, $collect);
308
309 # look up additional args
310 my $docid = $self->{'d'};
311 my $metaname = $self->{'metaname'};
312 my $metapos = $self->{'metapos'};
313 my $metavalue = $self->{'metavalue'};
314 my $infodbtype = $self->{'infodbtype'};
315
316 # To people who know $collect_tail please add some comments
317 # Obtain path to the database
318 my $collect_tail = $collect;
319 $collect_tail =~ s/^.*[\/\\]//;
320 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
321 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
322
323 # Read the docid entry
324 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
325
326 # Set the metadata value
327 if (defined $metapos) {
328 $doc_rec->{$metaname}->[$metapos] = $metavalue;
329 }
330 else {
331 $doc_rec->{$metaname} = [ $metavalue ];
332 }
333
334 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
335 if ($status != 0) {
336 # Catch error if set infodb entry failed
337 my $mess = "Failed to set metadata key: $docid\n";
338
339 $mess .= "PATH: $ENV{'PATH'}\n";
340 $mess .= "Exit status: $status\n";
341 $mess .= "System Error Message: $!\n";
342
343 $gsdl_cgi->generate_error($mess);
344 }
345 else {
346 my $mess = "set-document-metadata successful: Key[$docid]\n";
347 $mess .= " $metaname";
348 $mess .= "->[$metapos]" if (defined $metapos);
349 $mess .= " = $metavalue";
350
351 $gsdl_cgi->generate_ok_message($mess);
352 }
353
354 # Release the lock once it is done
355 $self->unlock_collection($username, $collect);
356}
357
358
359sub dxml_metadata
360{
361 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
362 my $metaname = $parser->{'parameters'}->{'metaname'};
363 my $metamode = $parser->{'parameters'}->{'metamode'};
364
365 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
366
367 # Find the right metadata tag and checks if we are going to
368 # override it
369 #
370 # Note: This over writes the first metadata block it
371 # encountered. If there are multiple Sections in the doc.xml, it
372 # might not behave as you would expect
373
374 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
375## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
376## print STDERR "**** metamode = $metamode\n";
377
378 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum)) {
379 my $name_attr = $attrHash->{'name'};
380 if (($name_attr eq $metaname) && ($metamode eq "override")) {
381## print STDERR "**** got match!!\n";
382 # Get the value and override the current value
383 my $metavalue = $parser->{'parameters'}->{'metavalue'};
384 $attrHash->{'_content'} = $metavalue;
385
386 # Don't want it to wipe out any other pieces of metadata
387 $parser->{'parameters'}->{'metamode'} = "done";
388 }
389 }
390
391 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
392 return [$tagname => $attrHash];
393}
394
395
396sub dxml_description
397{
398 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
399 my $metamode = $parser->{'parameters'}->{'metamode'};
400
401 # Accumulate the metadata
402 # NOTE: This appends new metadata element to all description fields.
403 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
404 if (($metamode eq "accumulate") || ($metamode eq "override")) {
405 # If get to here and metamode is override, the this means there
406 # was no existing value to overide => treat as an append operation
407
408 # Tack a new metadata tag on to the end of the <Metadata>+ block
409 my $metaname = $parser->{'parameters'}->{'metaname'};
410 my $metavalue = $parser->{'parameters'}->{'metavalue'};
411
412 my $metadata_attr = { '_content' => $metavalue,
413 'name' => $metaname,
414 'mode' => "accumulate" };
415
416 my $append_metadata = [ "Metadata" => $metadata_attr ];
417 my $description_content = $attrHash->{'_content'};
418
419## print STDERR "**** appending to doc.xml\n";
420
421 push(@$description_content," ", $append_metadata ,"\n ");
422 $parser->{'parameters'}->{'metamode'} = "done";
423 }
424
425
426 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
427 return [$tagname => $attrHash];
428}
429
430
431
432sub dxml_start_section
433{
434 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
435
436 my $new_depth = scalar(@$contextArray);
437
438 if ($new_depth == 1) {
439 $parser->{'parameters'}->{'curr_section_depth'} = 1;
440 $parser->{'parameters'}->{'curr_section_num'} = "";
441 }
442
443 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
444 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
445
446 my $new_secnum;
447
448 if ($new_depth > $old_depth) {
449 # child subsection
450 $new_secnum = "$old_secnum.1";
451 }
452 elsif ($new_depth == $old_depth) {
453 # sibling section => increase it's value by 1
454 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
455 $tail_num++;
456 $new_secnum = $old_secnum;
457 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
458 }
459 else {
460 # back up to parent section => lopp off tail
461 $new_secnum = $old_secnum;
462 $new_secnum =~ s/\.\d+$//;
463 }
464
465 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
466 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
467
468 print STDERR "*** In Section: $new_secnum\n";
469}
470
471sub edit_xml_file
472{
473 my $self = shift @_;
474 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
475
476 # use XML::Rules to add it in (read in and out again)
477 my $parser = XML::Rules->new(start_rules => $start_rules,
478 rules => $rules,
479 style => 'filter',
480 output_encoding => 'utf8' );
481
482 my $xml_in = "";
483 if (!open(MIN,"<$filename")) {
484 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
485 }
486 else {
487 # Read all the text in
488 my $line;
489 while (defined ($line=<MIN>)) {
490 $xml_in .= $line;
491 }
492 close(MIN);
493
494 my $MOUT;
495 if (!open($MOUT,">$filename")) {
496 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
497 }
498 else {
499 # Matched lines will get handled by the call backs
500## my $xml_out = "";
501
502 binmode($MOUT,":utf8");
503 $parser->filter($xml_in,$MOUT, $options);
504
505# binmode(MOUT,":utf8");
506# print MOUT $xml_out;
507 close($MOUT);
508 }
509 }
510}
511
512
513sub edit_doc_xml
514{
515 my $self = shift @_;
516 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum) = @_;
517
518 # To monitor which section/subsection number we are in
519 my @start_rules =
520 ( 'Section' => \&dxml_start_section );
521
522 # use XML::Rules to add it in (read in and out again)
523 # Set the call back functions
524 my @rules =
525 ( _default => 'raw',
526 'Metadata' => \&dxml_metadata,
527 'Description' => \&dxml_description);
528
529 # Sets the parameters
530 my $options = { 'metaname' => $metaname,
531 'metapos' => $metapos,
532 'metavalue' => $metavalue,
533 'metamode' => $metamode };
534
535 if (defined $opt_secnum) {
536 $options->{'secnum'} = $opt_secnum;
537 }
538
539 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
540}
541
542
543sub set_archives_metadata
544{
545 my $self = shift @_;
546
547 my $username = $self->{'username'};
548 my $collect = $self->{'collect'};
549 my $gsdl_cgi = $self->{'gsdl_cgi'};
550 my $gsdlhome = $self->{'gsdlhome'};
551 my $infodbtype = $self->{'infodbtype'};
552
553 if ($baseaction::authentication_enabled) {
554 # Ensure the user is allowed to edit this collection
555 $self->authenticate_user($username, $collect);
556 }
557
558 # Obtain the collect and archive dir
559 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
560 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
561
562 # Make sure the collection isn't locked by someone else
563 $self->lock_collection($username, $collect);
564
565 # look up additional args
566 my $docid = $self->{'d'};
567 my $metaname = $self->{'metaname'};
568 my $metavalue = $self->{'metavalue'};
569
570 my $metapos = $self->{'metapos'};
571 $metapos = 0 if (!defined $metapos);
572
573 my $metamode = $self->{'metamode'};
574 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
575 # make "accumulate" the default (less destructive, as won't actually
576 # delete any existing values)
577 $metamode = "accumulate";
578 }
579
580 # Obtain the doc.xml path for the specified docID
581 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
582
583 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
584 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
585 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
586
587 # The $doc_xml_file is relative to the archives, and now let's get the full path
588 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
589 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
590
591 # Edit the doc.xml file with the specified metadata name, value and position.
592 # TODO: there is a potential problem here as this edit_doc_xml function
593 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
594 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
595 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
596 $metaname,$metavalue,$metapos,$metamode,$docid_secnum);
597
598 # Release the lock once it is done
599 $self->unlock_collection($username, $collect);
600
601 my $mess = "set-archives-metadata successful: Key[$docid]\n";
602 $mess .= " $metaname";
603 $mess .= "->[$metapos]" if (defined $metapos);
604 $mess .= " = $metavalue";
605 $mess .= " ($metamode)\n";
606
607 $gsdl_cgi->generate_ok_message($mess);
608}
609
610
611sub mxml_metadata
612{
613 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
614 my $metaname = $parser->{'parameters'}->{'metaname'};
615 my $metamode = $parser->{'parameters'}->{'metamode'};
616
617 # Report error if we don't see FileName tag before this
618 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
619
620 # Don't do anything if we are not in the right FileSet
621 my $file_regexp = $parser->{'parameters'}->{'current_file'};
622 if ($file_regexp =~ /\.\*/) {
623 # Only interested in a file_regexp if it specifies precisely one
624 # file.
625 # So, skip anything with a .* in it as it is too general
626 return [$tagname => $attrHash];
627 }
628 my $src_file = $parser->{'parameters'}->{'src_file'};
629 if (!($src_file =~ /$file_regexp/)) {
630 return [$tagname => $attrHash];
631 }
632## print STDERR "*** mxl metamode = $metamode\n";
633
634 # Find the right metadata tag and checks if we are going to override it
635 my $name_attr = $attrHash->{'name'};
636 if (($name_attr eq $metaname) && ($metamode eq "override")) {
637 # Get the value and override the current value
638 my $metavalue = $parser->{'parameters'}->{'metavalue'};
639 $attrHash->{'_content'} = $metavalue;
640
641## print STDERR "**** overrideing metadata.xml\n";
642
643 # Don't want it to wipe out any other pieces of metadata
644 $parser->{'parameters'}->{'metamode'} = "done";
645 }
646
647 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
648 return [$tagname => $attrHash];
649}
650
651
652sub mxml_description
653{
654 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
655 my $metamode = $parser->{'parameters'}->{'metamode'};
656
657 # Failed... Report error if we don't see FileName tag before this
658 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
659
660 # Don't do anything if we are not in the right FileSet
661 my $file_regexp = $parser->{'parameters'}->{'current_file'};
662 if ($file_regexp =~ /\.\*/) {
663 # Only interested in a file_regexp if it specifies precisely one
664 # file.
665 # So, skip anything with a .* in it as it is too general
666 return [$tagname => $attrHash];
667 }
668 my $src_file = $parser->{'parameters'}->{'src_file'};
669 if (!($src_file =~ /$file_regexp/)) {
670 return [$tagname => $attrHash];
671 }
672
673 # Accumulate the metadata block to the end of the description block
674 # Note: This adds metadata block to all description blocks, so if there are
675 # multiple FileSets, it will add to all of them
676 if (($metamode eq "accumulate") || ($metamode eq "override")) {
677 # if metamode was "override" but get to here then it failed to
678 # find an item to override, in which case it should append its
679 # value to the end, just like the "accumulate" mode
680
681 # tack a new metadata tag on to the end of the <Metadata>+ block
682 my $metaname = $parser->{'parameters'}->{'metaname'};
683 my $metavalue = $parser->{'parameters'}->{'metavalue'};
684
685 my $metadata_attr = { '_content' => $metavalue,
686 'name' => $metaname,
687 'mode' => "accumulate" };
688
689 my $append_metadata = [ "Metadata" => $metadata_attr ];
690 my $description_content = $attrHash->{'_content'};
691
692## print STDERR "*** appending to metadata.xml\n";
693
694 # append the new metadata element to the end of the current
695 # content contained inside this tag
696 push(@$description_content," ", $append_metadata ,"\n ");
697
698 $parser->{'parameters'}->{'metamode'} = "done";
699 }
700
701 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
702 return [$tagname => $attrHash];
703}
704
705
706sub mxml_filename
707{
708 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
709
710 # Store the filename of the Current Fileset
711 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
712 # FileName tag must come before Description tag
713 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
714
715 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
716 return [$tagname => $attrHash];
717}
718
719
720sub mxml_fileset
721{
722 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
723
724 # Initilise the current_file
725 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
726 # FileName tag must come before Description tag
727 $parser->{'parameters'}->{'current_file'} = "";
728
729 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
730 return [$tagname => $attrHash];
731}
732
733
734sub edit_metadata_xml
735{
736 my $self = shift @_;
737 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
738
739 # Set the call-back functions for the metadata tags
740 my @rules =
741 ( _default => 'raw',
742 'FileName' => \&mxml_filename,
743 'Metadata' => \&mxml_metadata,
744 'Description' => \&mxml_description,
745 'FileSet' => \&mxml_fileset);
746
747 # use XML::Rules to add it in (read in and out again)
748 my $parser = XML::Rules->new(rules => \@rules,
749 style => 'filter',
750 output_encoding => 'utf8');
751
752 my $xml_in = "";
753 if (!open(MIN,"<$metadata_xml_filename")) {
754 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
755 }
756 else {
757 # Read them in
758 my $line;
759 while (defined ($line=<MIN>)) {
760 $xml_in .= $line;
761 }
762 close(MIN);
763
764 # Filter with the call-back functions
765 my $xml_out = "";
766
767 my $MOUT;
768 if (!open($MOUT,">$metadata_xml_filename")) {
769 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
770 }
771 else {
772 binmode($MOUT,":utf8");
773
774 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
775 # At the moment, I will just hack it!
776 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
777 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
778 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
779 #print MOUT $xml_out;
780
781 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
782 metavalue => $metavalue,
783 metamode => $metamode,
784 src_file => $src_file,
785 current_file => undef} );
786 close($MOUT);
787 }
788 }
789}
790
791
792sub set_import_metadata
793{
794 my $self = shift @_;
795
796 my $username = $self->{'username'};
797 my $collect = $self->{'collect'};
798 my $gsdl_cgi = $self->{'gsdl_cgi'};
799 my $gsdlhome = $self->{'gsdlhome'};
800 my $infodbtype = $self->{'infodbtype'};
801
802 if ($baseaction::authentication_enabled) {
803 # Ensure the user is allowed to edit this collection
804 $self->authenticate_user($username, $collect);
805 }
806
807
808 # Obtain the collect and archive dir
809 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
810 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
811
812 # Make sure the collection isn't locked by someone else
813 $self->lock_collection($username, $collect);
814
815 # look up additional args
816 # want either d= or f=
817 my $docid = $self->{'d'};
818 my $import_file = $self->{'f'};
819 if ((!defined $docid) && (!defined $import_file)) {
820 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
821 }
822
823 # Get the parameters and set default mode to "accumulate"
824 my $metaname = $self->{'metaname'};
825 my $metavalue = $self->{'metavalue'};
826## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
827 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
828 print STDERR "*** set import meta: val = $metavalue\n";
829
830 my $metamode = $self->{'metamode'};
831 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
832 # make "accumulate" the default (less destructive, as won't actually
833 # delete any existing values)
834 $metamode = "accumulate";
835 }
836
837 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
838 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
839 my $metadata_xml_file;
840 my $import_filename = undef;
841 if (defined $docid) {
842 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
843 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
844
845 # This now stores the full pathname
846 $import_filename = $doc_rec->{'src-file'}->[0];
847 }
848 else {
849 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
850 }
851
852 # figure out correct metadata.xml file [?]
853 # Assuming the metadata.xml file is next to the source file
854 # Note: This will not work if it is using the inherited metadata from the parent folder
855 my ($import_tailname, $import_dirname)
856 = File::Basename::fileparse($import_filename);
857 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
858
859 # Edit the metadata.xml
860 # Modified by Jeffrey from DL Consulting
861 # Handle the case where there is one metadata.xml file for multiple FileSets
862 # The XML filter needs to know whether it is in the right FileSet
863 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
864 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
865 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
866 $metaname, $metavalue, $metamode, $import_tailname);
867
868 # Release the lock once it is done
869 $self->unlock_collection($username, $collect);
870
871 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
872 $mess .= " $metaname";
873 $mess .= " = $metavalue";
874 $mess .= " ($metamode)\n";
875
876 $gsdl_cgi->generate_ok_message($mess);
877
878}
879
880
881sub remove_live_metadata
882{
883 my $self = shift @_;
884
885 my $username = $self->{'username'};
886 my $collect = $self->{'collect'};
887 my $gsdl_cgi = $self->{'gsdl_cgi'};
888 my $gsdlhome = $self->{'gsdlhome'};
889 my $infodbtype = $self->{'infodbtype'};
890
891 if ($baseaction::authentication_enabled) {
892 # Ensure the user is allowed to edit this collection
893 &authenticate_user($gsdl_cgi, $username, $collect);
894 }
895
896 # Obtain the collect dir
897 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
898
899 # Make sure the collection isn't locked by someone else
900 $self->lock_collection($username, $collect);
901
902 # look up additional args
903 my $docid = $self->{'d'};
904 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
905 $gsdl_cgi->generate_error("No docid (d=...) specified.");
906 }
907
908 # Generate the dbkey
909 my $metaname = $self->{'metaname'};
910 my $dbkey = "$docid.$metaname";
911
912 # To people who know $collect_tail please add some comments
913 # Obtain the live gdbm_db path
914 my $collect_tail = $collect;
915 $collect_tail =~ s/^.*[\/\\]//;
916 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
917 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
918
919 # Remove the key
920 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
921 my $status = system($cmd);
922 if ($status != 0) {
923 # Catch error if gdbmdel failed
924 my $mess = "Failed to set metadata key: $dbkey\n";
925
926 $mess .= "PATH: $ENV{'PATH'}\n";
927 $mess .= "cmd = $cmd\n";
928 $mess .= "Exit status: $status\n";
929 $mess .= "System Error Message: $!\n";
930
931 $gsdl_cgi->generate_error($mess);
932 }
933 else {
934 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
935 }
936
937}
938
939
940sub remove_metadata
941{
942 my $self = shift @_;
943
944 my $username = $self->{'username'};
945 my $collect = $self->{'collect'};
946 my $gsdl_cgi = $self->{'gsdl_cgi'};
947 my $gsdlhome = $self->{'gsdlhome'};
948 my $infodbtype = $self->{'infodbtype'};
949
950 if ($baseaction::authentication_enabled) {
951 # Ensure the user is allowed to edit this collection
952 &authenticate_user($gsdl_cgi, $username, $collect);
953 }
954
955 # Obtain the collect dir
956 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
957
958 # Make sure the collection isn't locked by someone else
959 $self->lock_collection($username, $collect);
960
961 # look up additional args
962 my $docid = $self->{'d'};
963 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
964 $gsdl_cgi->generate_error("No docid (d=...) specified.");
965 }
966 my $metaname = $self->{'metaname'};
967 my $metapos = $self->{'metapos'};
968
969 # To people who know $collect_tail please add some comments
970 # Obtain the path to the database
971 my $collect_tail = $collect;
972 $collect_tail =~ s/^.*[\/\\]//;
973 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
974 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
975
976 # Read the docid entry
977 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
978
979 # Basically loop through and unescape_html the values
980 foreach my $k (keys %$doc_rec) {
981 my @escaped_v = ();
982 foreach my $v (@{$doc_rec->{$k}}) {
983 if ($k eq "contains") {
984 # protect quotes in ".2;".3 etc
985 $v =~ s/\"/\\\"/g;
986 push(@escaped_v, $v);
987 }
988 else {
989 my $ev = &ghtml::unescape_html($v);
990 $ev =~ s/\"/\\\"/g;
991 push(@escaped_v, $ev);
992 }
993 }
994 $doc_rec->{$k} = \@escaped_v;
995 }
996
997 # Check to make sure the key does exist
998 if (!defined ($doc_rec->{$metaname})) {
999 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1000 }
1001
1002 # Obtain the specified metadata pos
1003 $metapos = 0 if (!defined $metapos);
1004
1005 # consider check key is defined before deleting?
1006 # Loop through the metadata array and ignore the specified position
1007 my $filtered_metadata = [];
1008 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
1009 for (my $i=0; $i<$num_metadata_vals; $i++) {
1010 my $metavalue = shift(@{$doc_rec->{$metaname}});
1011
1012 if ($i != $metapos) {
1013 push(@$filtered_metadata,$metavalue)
1014 }
1015 }
1016 $doc_rec->{$metaname} = $filtered_metadata;
1017
1018 # Turn the record back to string
1019 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1020
1021 # Store it back to the database
1022 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1023 my $status = system($cmd);
1024 if ($status != 0) {
1025 my $mess = "Failed to set metadata key: $docid\n";
1026
1027 $mess .= "PATH: $ENV{'PATH'}\n";
1028 $mess .= "cmd = $cmd\n";
1029 $mess .= "Exit status: $status\n";
1030 $mess .= "System Error Message: $!\n";
1031
1032 $gsdl_cgi->generate_error($mess);
1033 }
1034 else {
1035 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1036 $mess .= " $metaname";
1037 $mess .= "->[$metapos]" if (defined $metapos);
1038
1039 $gsdl_cgi->generate_ok_message($mess);
1040 }
1041}
1042
1043
1044# Was trying to reused the codes, but the functions need to be broken
1045# down more before they can be reused, otherwise there will be too
1046# much overhead and duplicate process...
1047sub insert_metadata
1048{
1049 my $self = shift @_;
1050
1051 my $username = $self->{'username'};
1052 my $collect = $self->{'collect'};
1053 my $gsdl_cgi = $self->{'gsdl_cgi'};
1054 my $gsdlhome = $self->{'gsdlhome'};
1055 my $infodbtype = $self->{'infodbtype'};
1056
1057 # If the import metadata and gdbm database have been updated, we
1058 # need to insert some notification to warn user that the the text
1059 # they see at the moment is not indexed and require a rebuild.
1060 my $rebuild_pending_macro = "_rebuildpendingmessage_";
1061
1062 if ($baseaction::authentication_enabled) {
1063 # Ensure the user is allowed to edit this collection
1064 $self->authenticate_user($username, $collect);
1065 }
1066
1067 # Obtain the collect and archive dir
1068 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1069 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1070
1071 # Make sure the collection isn't locked by someone else
1072 $self->lock_collection($username, $collect);
1073
1074 # Check additional args
1075 my $docid = $self->{'d'};
1076 if (!defined($docid)) {
1077 $gsdl_cgi->generate_error("No document id is specified: d=...");
1078 }
1079 my $metaname = $self->{'metaname'};
1080 if (!defined($metaname)) {
1081 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1082 }
1083 my $metavalue = $self->{'metavalue'};
1084 if (!defined($metavalue) || $metavalue eq "") {
1085 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
1086 }
1087 # make "accumulate" the default (less destructive, as won't actually
1088 # delete any existing values)
1089 my $metamode = "accumulate";
1090
1091 #=======================================================================#
1092 # set_import_metadata [START]
1093 #=======================================================================#
1094 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1095 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1096 my $metadata_xml_file;
1097 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1098 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1099
1100 # This now stores the full pathname
1101 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
1102
1103 # figure out correct metadata.xml file [?]
1104 # Assuming the metadata.xml file is next to the source file
1105 # Note: This will not work if it is using the inherited metadata from the parent folder
1106 my ($import_tailname, $import_dirname)
1107 = File::Basename::fileparse($import_filename);
1108 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1109
1110 # Shane's escape characters
1111 $metavalue = pack "U0C*", unpack "C*", $metavalue;
1112 $metavalue =~ s/\,/&#44;/g;
1113 $metavalue =~ s/\:/&#58;/g;
1114 $metavalue =~ s/\|/&#124;/g;
1115 $metavalue =~ s/\(/&#40;/g;
1116 $metavalue =~ s/\)/&#41;/g;
1117 $metavalue =~ s/\[/&#91;/g;
1118 $metavalue =~ s/\\/&#92;/g;
1119 $metavalue =~ s/\]/&#93;/g;
1120 $metavalue =~ s/\{/&#123;/g;
1121 $metavalue =~ s/\}/&#125;/g;
1122 $metavalue =~ s/\"/&#34;/g;
1123 $metavalue =~ s/\`/&#96;/g;
1124 $metavalue =~ s/\n/_newline_/g;
1125
1126 # Edit the metadata.xml
1127 # Modified by Jeffrey from DL Consulting
1128 # Handle the case where there is one metadata.xml file for multiple FileSets
1129 # The XML filter needs to know whether it is in the right FileSet
1130 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1131 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1132 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1133 $metaname, $metavalue, $metamode, $import_tailname);
1134 #=======================================================================#
1135 # set_import_metadata [END]
1136 #=======================================================================#
1137
1138
1139 #=======================================================================#
1140 # set_metadata (accumulate version) [START]
1141 #=======================================================================#
1142 # To people who know $collect_tail please add some comments
1143 # Obtain path to the database
1144 my $collect_tail = $collect;
1145 $collect_tail =~ s/^.*[\/\\]//;
1146 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1147 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1148
1149 # Read the docid entry
1150 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1151
1152 foreach my $k (keys %$doc_rec) {
1153 my @escaped_v = ();
1154 foreach my $v (@{$doc_rec->{$k}}) {
1155 if ($k eq "contains") {
1156 # protect quotes in ".2;".3 etc
1157 $v =~ s/\"/\\\"/g;
1158 push(@escaped_v, $v);
1159 }
1160 else {
1161 my $ev = &ghtml::unescape_html($v);
1162 $ev =~ s/\"/\\\"/g;
1163 push(@escaped_v, $ev);
1164 }
1165 }
1166 $doc_rec->{$k} = \@escaped_v;
1167 }
1168
1169 # Protect the quotes
1170 $metavalue =~ s/\"/\\\"/g;
1171
1172 # Adds the pending macro
1173 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
1174
1175 # If the metadata doesn't exist, create a new one
1176 if (!defined($doc_rec->{$metaname})){
1177 $doc_rec->{$metaname} = [ $macro_metavalue ];
1178 }
1179 # Else, let's acculumate the values
1180 else {
1181 push(@{$doc_rec->{$metaname}},$macro_metavalue);
1182 }
1183
1184 # Generate the record string
1185 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1186
1187 # Store it into GDBM
1188 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1189 my $status = system($cmd);
1190 if ($status != 0) {
1191 # Catch error if gdbmget failed
1192 my $mess = "Failed to set metadata key: $docid\n";
1193
1194 $mess .= "PATH: $ENV{'PATH'}\n";
1195 $mess .= "cmd = $cmd\n";
1196 $mess .= "Exit status: $status\n";
1197 $mess .= "System Error Message: $!\n";
1198
1199 $gsdl_cgi->generate_error($mess);
1200 }
1201 else {
1202 my $mess = "insert-metadata successful: Key[$docid]\n";
1203 $mess .= " [In metadata.xml] $metaname";
1204 $mess .= " = $metavalue\n";
1205 $mess .= " [In database] $metaname";
1206 $mess .= " = $macro_metavalue\n";
1207 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
1208 $gsdl_cgi->generate_ok_message($mess);
1209 }
1210 #=======================================================================#
1211 # set_metadata (accumulate version) [END]
1212 #=======================================================================#
1213
1214 # Release the lock once it is done
1215 $self->unlock_collection($username, $collect);
1216}
1217
12181;
Note: See TracBrowser for help on using the repository browser.