source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 22331

Last change on this file since 22331 was 22331, checked in by ak19, 14 years ago

Goes with the changes made in revision 21822: fixed several additional perl files that depended on perl 5.8 to work and used to fail with Perl 5.10.

File size: 36.6 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35
36BEGIN {
37# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
38 require XML::Rules;
39}
40
41
42@metadataaction::ISA = ('baseaction');
43
44
45my $action_table =
46{
47 "get-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
48 'optional-args' => [] },
49
50 "get-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
51 'optional-args' => [ "metapos" ] },
52
53 "set-live-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
54 'optional-args' => [ ] },
55
56 "set-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
57 'optional-args' => [ "metapos" ] },
58
59 "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
60 'optional-args' => [ "metapos" ]
61
62 },
63
64 "set-import-metadata" => { 'compulsory-args' => [ "metaname", "metavalue" ],
65 'optional-args' => [ "d", "f", "metamode" ]
66 # metamode can be "accumulate", "override", or "unique-id"
67 },
68
69
70 "remove-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
71 'optional-args' => [ ] },
72
73 "remove-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
74 'optional-args' => [ "metapos" ] },
75
76 "insert-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
77 'optional-args' => [ ]
78 }
79};
80
81
82sub new
83{
84 my $class = shift (@_);
85 my ($gsdl_cgi,$iis6_mode) = @_;
86
87 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
88
89 return bless $self, $class;
90}
91
92
93sub get_live_metadata
94{
95 my $self = shift @_;
96
97 my $username = $self->{'username'};
98 my $collect = $self->{'collect'};
99 my $gsdl_cgi = $self->{'gsdl_cgi'};
100 my $gsdlhome = $self->{'gsdlhome'};
101
102 # Note: Not sure why get_live_metadata doesn't need the authentication check
103
104 # Obtain the collect dir
105 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
106
107 # Make sure the collection isn't locked by someone else
108 $self->lock_collection($username, $collect);
109
110 # look up additional args
111 my $docid = $self->{'d'};
112 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
113 $gsdl_cgi->generate_error("No docid (d=...) specified.");
114 }
115
116 # Generate the dbkey
117 my $metaname = $self->{'metaname'};
118 my $dbkey = "$docid.$metaname";
119
120 # To people who know $collect_tail please add some comments
121 # Obtain path to the database
122 my $collect_tail = $collect;
123 $collect_tail =~ s/^.*[\/|\\]//;
124 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
125 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", "live-$collect_tail", $index_text_directory);
126
127 # Obtain the content of the key
128 my $cmd = "gdbmget $infodb_file_path $dbkey";
129 if (open(GIN,"$cmd |") == 0) {
130 # Catch error if gdbmget failed
131 my $mess = "Failed to get metadata key: $metaname\n";
132 $mess .= "$!\n";
133
134 $gsdl_cgi->generate_error($mess);
135 }
136 else {
137 # Read everything in and concatenate them into $metavalue
138 my $metavalue = "";
139 my $line;
140 while (defined ($line=<GIN>)) {
141 $metavalue .= $line;
142 }
143 close(GIN);
144 chomp($metavalue); # Get rid off the tailing newlines
145 $gsdl_cgi->generate_ok_message("$metavalue");
146 }
147
148 # Release the lock once it is done
149 $self->unlock_collection($username, $collect);
150}
151
152
153sub get_metadata
154{
155 my $self = shift @_;
156
157 my $username = $self->{'username'};
158 my $collect = $self->{'collect'};
159 my $gsdl_cgi = $self->{'gsdl_cgi'};
160 my $gsdlhome = $self->{'gsdlhome'};
161
162 # Authenticate user if it is enabled
163 if ($baseaction::authentication_enabled) {
164 # Ensure the user is allowed to edit this collection
165 &authenticate_user($gsdl_cgi, $username, $collect);
166 }
167
168 # Obtain the collect dir
169 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
170
171 # Make sure the collection isn't locked by someone else
172 $self->lock_collection($username, $collect);
173
174 # look up additional args
175 my $docid = $self->{'d'};
176 my $metaname = $self->{'metaname'};
177 my $metapos = $self->{'metapos'};
178
179 # To people who know $collect_tail please add some comments
180 # Obtain path to the database
181 my $collect_tail = $collect;
182 $collect_tail =~ s/^.*[\/\\]//;
183 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
184 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collect_tail, $index_text_directory);
185
186 # Read the docid entry
187 my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $docid);
188 my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string);
189
190 # Basically loop through and unescape_html the values
191 foreach my $k (keys %$doc_rec) {
192 my @escaped_v = ();
193 foreach my $v (@{$doc_rec->{$k}}) {
194 my $ev = &ghtml::unescape_html($v);
195 push(@escaped_v, $ev);
196 }
197 $doc_rec->{$k} = \@escaped_v;
198 }
199
200 # Obtain the specified metadata value
201 $metapos = 0 if (!defined $metapos);
202 my $metavalue = $doc_rec->{$metaname}->[$metapos];
203 $gsdl_cgi->generate_ok_message("$metavalue");
204
205 # Release the lock once it is done
206 $self->unlock_collection($username, $collect);
207}
208
209
210sub set_live_metadata
211{
212 my $self = shift @_;
213
214 my $username = $self->{'username'};
215 my $collect = $self->{'collect'};
216 my $gsdl_cgi = $self->{'gsdl_cgi'};
217 my $gsdlhome = $self->{'gsdlhome'};
218
219 # don't user authenticate for now
220 if ($baseaction::authentication_enabled) {
221 # Ensure the user is allowed to edit this collection
222 &authenticate_user($gsdl_cgi, $username, $collect);
223 }
224
225 # Obtain the collect dir
226 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
227
228 # Make sure the collection isn't locked by someone else
229 $self->lock_collection($username, $collect);
230
231 # look up additional args
232 my $docid = $self->{'d'};
233 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
234 $gsdl_cgi->generate_error("No docid (d=...) specified.");
235 }
236 my $metavalue = $self->{'metavalue'};
237
238 # Generate the dbkey
239 my $metaname = $self->{'metaname'};
240 my $dbkey = "$docid.$metaname";
241
242 # To people who know $collect_tail please add some comments
243 # Obtain path to the database
244 my $collect_tail = $collect;
245 $collect_tail =~ s/^.*[\/\\]//;
246 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
247 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", "live-$collect_tail", $index_text_directory);
248
249 # Set the new value
250 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
251 my $status = system($cmd);
252 if ($status != 0) {
253 # Catch error if gdbmget failed
254 my $mess = "Failed to set metadata key: $dbkey\n";
255
256 $mess .= "PATH: $ENV{'PATH'}\n";
257 $mess .= "cmd = $cmd\n";
258 $mess .= "Exit status: $status\n";
259 $mess .= "System Error Message: $!\n";
260
261 $gsdl_cgi->generate_error($mess);
262 }
263 else {
264 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
265 }
266
267 # Release the lock once it is done
268 $self->unlock_collection($username, $collect);
269}
270
271
272sub set_metadata
273{
274 my $self = shift @_;
275
276 my $username = $self->{'username'};
277 my $collect = $self->{'collect'};
278 my $gsdl_cgi = $self->{'gsdl_cgi'};
279 my $gsdlhome = $self->{'gsdlhome'};
280
281 # don't user authenticate for now
282 if ($baseaction::authentication_enabled) {
283 # Ensure the user is allowed to edit this collection
284 &authenticate_user($gsdl_cgi, $username, $collect);
285 }
286
287 # Obtain the collect dir
288 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
289
290 # Make sure the collection isn't locked by someone else
291 $self->lock_collection($username, $collect);
292
293 # look up additional args
294 my $docid = $self->{'d'};
295 my $metaname = $self->{'metaname'};
296 my $metapos = $self->{'metapos'};
297 my $metavalue = $self->{'metavalue'};
298
299 # To people who know $collect_tail please add some comments
300 # Obtain path to the database
301 my $collect_tail = $collect;
302 $collect_tail =~ s/^.*[\/\\]//;
303 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
304 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collect_tail, $index_text_directory);
305
306 # Read the docid entry
307 my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $docid);
308 my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string);
309 foreach my $k (keys %$doc_rec) {
310 my @escaped_v = ();
311 foreach my $v (@{$doc_rec->{$k}}) {
312 if ($k eq "contains") {
313 # protect quotes in ".2;".3 etc
314 $v =~ s/\"/\\\"/g;
315 push(@escaped_v, $v);
316 }
317 else {
318 my $ev = &ghtml::unescape_html($v);
319 $ev =~ s/\"/\\\"/g;
320 push(@escaped_v, $ev);
321 }
322 }
323 $doc_rec->{$k} = \@escaped_v;
324 }
325 ## print STDERR "**** metavalue = $metavalue\n";
326
327 # Protect the quotes
328 $metavalue =~ s/\"/\\\"/g;
329
330 # Set the metadata value
331 if (defined $metapos) {
332 $doc_rec->{$metaname}->[$metapos] = $metavalue;
333 }
334 else {
335 $doc_rec->{$metaname} = [ $metavalue ];
336 }
337 ## print STDERR "**** metavalue = $metavalue\n";
338
339 # Generate the record string
340 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
341 ## print STDERR "**** ser dr\n$serialized_doc_rec\n\n\n";
342
343 # Store it into GDBM
344 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
345 my $status = system($cmd);
346 if ($status != 0) {
347 # Catch error if gdbmget failed
348 my $mess = "Failed to set metadata key: $docid\n";
349
350 $mess .= "PATH: $ENV{'PATH'}\n";
351 $mess .= "cmd = $cmd\n";
352 $mess .= "Exit status: $status\n";
353 $mess .= "System Error Message: $!\n";
354
355 $gsdl_cgi->generate_error($mess);
356 }
357 else {
358 my $mess = "set-document-metadata successful: Key[$docid]\n";
359 $mess .= " $metaname";
360 $mess .= "->[$metapos]" if (defined $metapos);
361 $mess .= " = $metavalue";
362
363 $gsdl_cgi->generate_ok_message($mess);
364 }
365
366 # Release the lock once it is done
367 $self->unlock_collection($username, $collect);
368}
369
370
371sub dxml_metadata
372{
373 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
374 my $metaname = $parser->{'parameters'}->{'metaname'};
375 my $metamode = $parser->{'parameters'}->{'metamode'};
376
377 # Find the right metadata tag and checks if we are going to override it
378 # Note: This over writes the first metadata block it encountered. If there are multiple Sections in the doc.xml, it might not behave as you would expect
379 my $name_attr = $attrHash->{'name'};
380 if (($name_attr eq $metaname) && ($metamode eq "override")) {
381 # Get the value and override the current value
382 my $metavalue = $parser->{'parameters'}->{'metavalue'};
383 $attrHash->{'_content'} = $metavalue;
384
385 # Don't want it to wipe out any other pieces of metadata
386 $parser->{'parameters'}->{'metamode'} = "done";
387 }
388
389 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
390 return [$tagname => $attrHash];
391}
392
393
394sub dxml_description
395{
396 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
397 my $metamode = $parser->{'parameters'}->{'metamode'};
398
399 # Accumulate the metadata
400 # NOTE: This appends new metadata element to all description fields.
401 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
402 if ($metamode eq "accumulate") {
403 # tack a new metadata tag on to the end of the <Metadata>+ block
404 my $metaname = $parser->{'parameters'}->{'metaname'};
405 my $metavalue = $parser->{'parameters'}->{'metavalue'};
406
407 my $metadata_attr = { '_content' => $metavalue,
408 'name' => $metaname,
409 'mode' => "accumulate" };
410
411 my $append_metadata = [ "Metadata" => $metadata_attr ];
412 my $description_content = $attrHash->{'_content'};
413
414 push(@$description_content," ", $append_metadata ,"\n ");
415 }
416
417
418 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
419 return [$tagname => $attrHash];
420}
421
422
423sub edit_xml_file
424{
425 my $self = shift @_;
426 my ($gsdl_cgi, $filename, $rules, $options) = @_;
427
428 # use XML::Rules to add it in (read in and out again)
429 my $parser = XML::Rules->new(rules => $rules,
430 style => 'filter' );
431
432 my $xml_in = "";
433 if (!open(MIN,"<$filename")) {
434 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
435 }
436 else {
437 # Read all the text in
438 my $line;
439 while (defined ($line=<MIN>)) {
440 $xml_in .= $line;
441 }
442 close(MIN);
443
444 # Matched lines will get handled by the call backs
445 my $xml_out = "";
446 $parser->filter($xml_in,\$xml_out, $options);
447
448 if (!open(MOUT,">$filename")) {
449 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
450 }
451 else {
452 print MOUT $xml_out;
453 close(MOUT);
454 }
455 }
456}
457
458
459sub edit_doc_xml
460{
461 my $self = shift @_;
462 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos) = @_;
463
464 # use XML::Rules to add it in (read in and out again)
465 # Set the call back functions
466 my @rules =
467 ( _default => 'raw',
468 'Metadata' => \&dxml_metadata,
469 'Description' => \&dxml_description );
470
471 # Sets the parameters
472 my $options = { 'metaname' => $metaname,
473 'metapos' => $metapos,
474 'metavalue' => $metavalue };
475 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@rules,$options);
476}
477
478
479sub set_archives_metadata
480{
481 my $self = shift @_;
482
483 my $username = $self->{'username'};
484 my $collect = $self->{'collect'};
485 my $gsdl_cgi = $self->{'gsdl_cgi'};
486 my $gsdlhome = $self->{'gsdlhome'};
487
488 # don't user authenticate for now
489 if ($baseaction::authentication_enabled) {
490 # Ensure the user is allowed to edit this collection
491 $self->authenticate_user($username, $collect);
492 }
493
494 # Obtain the collect and archive dir
495 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
496 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
497
498 # Make sure the collection isn't locked by someone else
499 $self->lock_collection($username, $collect);
500
501 # look up additional args
502 my $docid = $self->{'d'};
503 my $metaname = $self->{'metaname'};
504 my $metavalue = $self->{'metavalue'};
505 my $metapos = $self->{'metapos'};
506 $metapos = 0 if (!defined $metapos);
507
508 # Obtain the doc.xml path for the specified docID
509 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path("gdbm", "archiveinf-doc", $archive_dir);
510 my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $arcinfo_doc_filename, $docid);
511 my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string);
512 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
513
514 # The $doc_xml_file is relative to the archives, and now let's get the full path
515 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
516 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
517
518 # Edit the doc.xml file with the specified metadata name, value and position.
519 # TODO: there is a potential problem here as this edit_doc_xml function
520 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
521 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
522 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
523 $metaname,$metavalue,$metapos);
524
525 # Release the lock once it is done
526 $self->unlock_collection($username, $collect);
527}
528
529
530sub mxml_metadata
531{
532 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
533 my $metaname = $parser->{'parameters'}->{'metaname'};
534 my $metamode = $parser->{'parameters'}->{'metamode'};
535
536 # Report error if we don't see FileName tag before this
537 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
538
539 # Don't do anything if we are not in the right FileSet
540 my $file_regexp = $parser->{'parameters'}->{'current_file'};
541 return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/));
542
543 # Find the right metadata tag and checks if we are going to override it
544 # Note: This over writes the first metadata block it encountered even if it doesn't belong to the source file we specified
545 my $name_attr = $attrHash->{'name'};
546 if (($name_attr eq $metaname) && ($metamode eq "override")) {
547 # Get the value and override the current value
548 my $metavalue = $parser->{'parameters'}->{'metavalue'};
549 $attrHash->{'_content'} = $metavalue;
550
551 # Don't want it to wipe out any other pieces of metadata
552 $parser->{'parameters'}->{'metamode'} = "done";
553 }
554
555 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
556 return [$tagname => $attrHash];
557}
558
559
560sub mxml_description
561{
562 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
563 my $metamode = $parser->{'parameters'}->{'metamode'};
564
565 # Failed... Report error if we don't see FileName tag before this
566 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
567
568 # Don't do anything if we are not in the right FileSet
569 my $file_regexp = $parser->{'parameters'}->{'current_file'};
570 return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/));
571
572 # Accumulate the metadata block to the end of the description block
573 # Note: This adds metadata block to all description blocks, so if there are
574 # multiple FileSets, it will add to all of them
575 if ($metamode eq "accumulate") {
576 # tack a new metadata tag on to the end of the <Metadata>+ block
577 my $metaname = $parser->{'parameters'}->{'metaname'};
578 my $metavalue = $parser->{'parameters'}->{'metavalue'};
579
580 my $metadata_attr = { '_content' => $metavalue,
581 'name' => $metaname,
582 'mode' => "accumulate" };
583
584 my $append_metadata = [ "Metadata" => $metadata_attr ];
585 my $description_content = $attrHash->{'_content'};
586
587 push(@$description_content," ", $append_metadata ,"\n ");
588 }
589
590 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
591 return [$tagname => $attrHash];
592}
593
594
595sub mxml_filename
596{
597 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
598
599 # Store the filename of the Current Fileset
600 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
601 # FileName tag must come before Description tag
602 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
603
604 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
605 return [$tagname => $attrHash];
606}
607
608
609sub mxml_fileset
610{
611 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
612
613 # Initilise the current_file
614 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
615 # FileName tag must come before Description tag
616 $parser->{'parameters'}->{'current_file'} = "";
617
618 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
619 return [$tagname => $attrHash];
620}
621
622
623sub edit_metadata_xml
624{
625 my $self = shift @_;
626 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
627
628 # Set the call-back functions for the metadata tags
629 my @rules =
630 ( _default => 'raw',
631 'FileName' => \&mxml_filename,
632 'Metadata' => \&mxml_metadata,
633 'Description' => \&mxml_description,
634 'FileSet' => \&mxml_fileset);
635
636 # use XML::Rules to add it in (read in and out again)
637 my $parser = XML::Rules->new(rules => \@rules,
638 style => 'filter',
639 output_encoding => 'utf8');
640
641 my $xml_in = "";
642 if (!open(MIN,"<$metadata_xml_filename")) {
643 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
644 }
645 else {
646 # Read them in
647 my $line;
648 while (defined ($line=<MIN>)) {
649 $xml_in .= $line;
650 }
651 close(MIN);
652
653 # Filter with the call-back functions
654 my $xml_out = "";
655 $parser->filter($xml_in,\$xml_out, { metaname => $metaname,
656 metavalue => $metavalue,
657 metamode => $metamode,
658 src_file => $src_file,
659 current_file => undef} );
660
661 if (!open(MOUT,">$metadata_xml_filename")) {
662 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
663 }
664 else {
665 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
666 # At the moment, I will just hack it!
667 my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
668 $xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
669 print MOUT $xml_out;
670 close(MOUT);
671 }
672 }
673}
674
675
676sub set_import_metadata
677{
678 my $self = shift @_;
679
680 my $username = $self->{'username'};
681 my $collect = $self->{'collect'};
682 my $gsdl_cgi = $self->{'gsdl_cgi'};
683 my $gsdlhome = $self->{'gsdlhome'};
684
685 # don't user authenticate for now
686 if ($baseaction::authentication_enabled) {
687 # Ensure the user is allowed to edit this collection
688 $self->authenticate_user($username, $collect);
689 }
690
691 # Obtain the collect and archive dir
692 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
693 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
694
695 # Make sure the collection isn't locked by someone else
696 $self->lock_collection($username, $collect);
697
698 # look up additional args
699 # want either d= or f=
700 my $docid = $self->{'d'};
701 my $import_file = $self->{'f'};
702 if ((!defined $docid) && (!defined $import_file)) {
703 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
704 }
705
706 # Get the parameters and set default mode to "accumulate"
707 my $metaname = $self->{'metaname'};
708 my $metavalue = $self->{'metavalue'};
709 my $metamode = $self->{'metamode'};
710 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
711 # make "accumulate" the default (less destructive, as won't actually
712 # delete any existing values)
713 $metamode = "accumulate";
714 }
715
716 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
717 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
718 my $metadata_xml_file;
719 my $import_filename = undef;
720 if (defined $docid) {
721 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path("gdbm", "archiveinf-doc", $archive_dir);
722 my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $arcinfo_doc_filename, $docid);
723 my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string);
724
725 # This now stores the full pathname
726 $import_filename = $doc_rec->{'src-file'}->[0];
727 }
728 else {
729 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
730 }
731
732 # figure out correct metadata.xml file [?]
733 # Assuming the metadata.xml file is next to the source file
734 # Note: This will not work if it is using the inherited metadata from the parent folder
735 my ($import_tailname, $import_dirname)
736 = File::Basename::fileparse($import_filename);
737 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
738
739 # Edit the metadata.xml
740 # Modified by Jeffrey from DL Consulting
741 # Handle the case where there is one metadata.xml file for multiple FileSets
742 # The XML filter needs to know whether it is in the right FileSet
743 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
744 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
745 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
746 $metaname, $metavalue, $metamode, $import_tailname);
747
748 # Release the lock once it is done
749 $self->unlock_collection($username, $collect);
750}
751
752
753sub remove_live_metadata
754{
755 my $self = shift @_;
756
757 my $username = $self->{'username'};
758 my $collect = $self->{'collect'};
759 my $gsdl_cgi = $self->{'gsdl_cgi'};
760 my $gsdlhome = $self->{'gsdlhome'};
761
762 if ($baseaction::authentication_enabled) {
763 # Ensure the user is allowed to edit this collection
764 &authenticate_user($gsdl_cgi, $username, $collect);
765 }
766
767 # Obtain the collect dir
768 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
769
770 # Make sure the collection isn't locked by someone else
771 $self->lock_collection($username, $collect);
772
773 # look up additional args
774 my $docid = $self->{'d'};
775 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
776 $gsdl_cgi->generate_error("No docid (d=...) specified.");
777 }
778
779 # Generate the dbkey
780 my $metaname = $self->{'metaname'};
781 my $dbkey = "$docid.$metaname";
782
783 # To people who know $collect_tail please add some comments
784 # Obtain the live gdbm_db path
785 my $collect_tail = $collect;
786 $collect_tail =~ s/^.*[\/\\]//;
787 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
788 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", "live-$collect_tail", $index_text_directory);
789
790 # Remove the key
791 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
792 my $status = system($cmd);
793 if ($status != 0) {
794 # Catch error if gdbmdel failed
795 my $mess = "Failed to set metadata key: $dbkey\n";
796
797 $mess .= "PATH: $ENV{'PATH'}\n";
798 $mess .= "cmd = $cmd\n";
799 $mess .= "Exit status: $status\n";
800 $mess .= "System Error Message: $!\n";
801
802 $gsdl_cgi->generate_error($mess);
803 }
804 else {
805 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
806 }
807
808}
809
810
811sub remove_metadata
812{
813 my $self = shift @_;
814
815 my $username = $self->{'username'};
816 my $collect = $self->{'collect'};
817 my $gsdl_cgi = $self->{'gsdl_cgi'};
818 my $gsdlhome = $self->{'gsdlhome'};
819
820 # don't user authenticate for now
821 if ($baseaction::authentication_enabled) {
822 # Ensure the user is allowed to edit this collection
823 &authenticate_user($gsdl_cgi, $username, $collect);
824 }
825
826 # Obtain the collect dir
827 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
828
829 # Make sure the collection isn't locked by someone else
830 $self->lock_collection($username, $collect);
831
832 # look up additional args
833 my $docid = $self->{'d'};
834 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
835 $gsdl_cgi->generate_error("No docid (d=...) specified.");
836 }
837 my $metaname = $self->{'metaname'};
838 my $metapos = $self->{'metapos'};
839
840 # To people who know $collect_tail please add some comments
841 # Obtain the path to the database
842 my $collect_tail = $collect;
843 $collect_tail =~ s/^.*[\/\\]//;
844 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
845 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collect_tail, $index_text_directory);
846
847 # Read the docid entry
848 my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $docid);
849 my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string);
850
851 # Basically loop through and unescape_html the values
852 foreach my $k (keys %$doc_rec) {
853 my @escaped_v = ();
854 foreach my $v (@{$doc_rec->{$k}}) {
855 if ($k eq "contains") {
856 # protect quotes in ".2;".3 etc
857 $v =~ s/\"/\\\"/g;
858 push(@escaped_v, $v);
859 }
860 else {
861 my $ev = &ghtml::unescape_html($v);
862 $ev =~ s/\"/\\\"/g;
863 push(@escaped_v, $ev);
864 }
865 }
866 $doc_rec->{$k} = \@escaped_v;
867 }
868
869 # Check to make sure the key does exist
870 if (!defined ($doc_rec->{$metaname})) {
871 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
872 }
873
874 # Obtain the specified metadata pos
875 $metapos = 0 if (!defined $metapos);
876
877 # consider check key is defined before deleting?
878 # Loop through the metadata array and ignore the specified position
879 my $filtered_metadata = [];
880 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
881 for (my $i=0; $i<$num_metadata_vals; $i++) {
882 my $metavalue = shift(@{$doc_rec->{$metaname}});
883
884 if ($i != $metapos) {
885 push(@$filtered_metadata,$metavalue)
886 }
887 }
888 $doc_rec->{$metaname} = $filtered_metadata;
889
890 # Turn the record back to string
891 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
892
893 # Store it back to the database
894 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
895 my $status = system($cmd);
896 if ($status != 0) {
897 my $mess = "Failed to set metadata key: $docid\n";
898
899 $mess .= "PATH: $ENV{'PATH'}\n";
900 $mess .= "cmd = $cmd\n";
901 $mess .= "Exit status: $status\n";
902 $mess .= "System Error Message: $!\n";
903
904 $gsdl_cgi->generate_error($mess);
905 }
906 else {
907 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
908 $mess .= " $metaname";
909 $mess .= "->[$metapos]" if (defined $metapos);
910
911 $gsdl_cgi->generate_ok_message($mess);
912 }
913}
914
915
916# Was trying to reused the codes, but the functions need to be broken down more before they can be reused, otherwise there will be too much overhead and duplicate process...
917sub insert_metadata
918{
919 my $self = shift @_;
920
921 my $username = $self->{'username'};
922 my $collect = $self->{'collect'};
923 my $gsdl_cgi = $self->{'gsdl_cgi'};
924 my $gsdlhome = $self->{'gsdlhome'};
925
926 # If the import metadata and gdbm database have been updated, we need to insert some notification to warn user that the the text they see at the moment is not indexed and require a rebuild.
927 my $rebuild_pending_macro = "_rebuildpendingmessage_";
928
929 # don't user authenticate for now
930 if ($baseaction::authentication_enabled) {
931 # Ensure the user is allowed to edit this collection
932 $self->authenticate_user($username, $collect);
933 }
934
935 # Obtain the collect and archive dir
936 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
937 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
938
939 # Make sure the collection isn't locked by someone else
940 $self->lock_collection($username, $collect);
941
942 # Check additional args
943 my $docid = $self->{'d'};
944 if (!defined($docid)) {
945 $gsdl_cgi->generate_error("No document id is specified: d=...");
946 }
947 my $metaname = $self->{'metaname'};
948 if (!defined($metaname)) {
949 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
950 }
951 my $metavalue = $self->{'metavalue'};
952 if (!defined($metavalue) || $metavalue eq "") {
953 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
954 }
955 # make "accumulate" the default (less destructive, as won't actually
956 # delete any existing values)
957 my $metamode = "accumulate";
958
959 #=======================================================================#
960 # set_import_metadata [START]
961 #=======================================================================#
962 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
963 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
964 my $metadata_xml_file;
965 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path("gdbm", "archiveinf-doc", $archive_dir);
966 my $archive_doc_rec_string = &dbutil::read_infodb_entry("gdbm", $arcinfo_doc_filename, $docid);
967 my $archive_doc_rec = &dbutil::convert_infodb_string_to_hash($archive_doc_rec_string);
968
969 # This now stores the full pathname
970 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
971
972 # figure out correct metadata.xml file [?]
973 # Assuming the metadata.xml file is next to the source file
974 # Note: This will not work if it is using the inherited metadata from the parent folder
975 my ($import_tailname, $import_dirname)
976 = File::Basename::fileparse($import_filename);
977 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
978
979 # Shane's escape characters
980 $metavalue = pack "U0C*", unpack "C*", $metavalue;
981 $metavalue =~ s/\,/&#44;/g;
982 $metavalue =~ s/\:/&#58;/g;
983 $metavalue =~ s/\|/&#124;/g;
984 $metavalue =~ s/\(/&#40;/g;
985 $metavalue =~ s/\)/&#41;/g;
986 $metavalue =~ s/\[/&#91;/g;
987 $metavalue =~ s/\\/&#92;/g;
988 $metavalue =~ s/\]/&#93;/g;
989 $metavalue =~ s/\{/&#123;/g;
990 $metavalue =~ s/\}/&#125;/g;
991 $metavalue =~ s/\"/&#34;/g;
992 $metavalue =~ s/\`/&#96;/g;
993 $metavalue =~ s/\n/_newline_/g;
994
995 # Edit the metadata.xml
996 # Modified by Jeffrey from DL Consulting
997 # Handle the case where there is one metadata.xml file for multiple FileSets
998 # The XML filter needs to know whether it is in the right FileSet
999 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1000 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1001 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1002 $metaname, $metavalue, $metamode, $import_tailname);
1003 #=======================================================================#
1004 # set_import_metadata [END]
1005 #=======================================================================#
1006
1007
1008 #=======================================================================#
1009 # set_metadata (accumulate version) [START]
1010 #=======================================================================#
1011 # To people who know $collect_tail please add some comments
1012 # Obtain path to the database
1013 my $collect_tail = $collect;
1014 $collect_tail =~ s/^.*[\/\\]//;
1015 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1016 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collect_tail, $index_text_directory);
1017
1018 # Read the docid entry
1019 my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $docid);
1020 my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string);
1021 foreach my $k (keys %$doc_rec) {
1022 my @escaped_v = ();
1023 foreach my $v (@{$doc_rec->{$k}}) {
1024 if ($k eq "contains") {
1025 # protect quotes in ".2;".3 etc
1026 $v =~ s/\"/\\\"/g;
1027 push(@escaped_v, $v);
1028 }
1029 else {
1030 my $ev = &ghtml::unescape_html($v);
1031 $ev =~ s/\"/\\\"/g;
1032 push(@escaped_v, $ev);
1033 }
1034 }
1035 $doc_rec->{$k} = \@escaped_v;
1036 }
1037
1038 # Protect the quotes
1039 $metavalue =~ s/\"/\\\"/g;
1040
1041 # Adds the pending macro
1042 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
1043
1044 # If the metadata doesn't exist, create a new one
1045 if (!defined($doc_rec->{$metaname})){
1046 $doc_rec->{$metaname} = [ $macro_metavalue ];
1047 }
1048 # Else, let's acculumate the values
1049 else {
1050 push(@{$doc_rec->{$metaname}},$macro_metavalue);
1051 }
1052
1053 # Generate the record string
1054 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1055
1056 # Store it into GDBM
1057 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1058 my $status = system($cmd);
1059 if ($status != 0) {
1060 # Catch error if gdbmget failed
1061 my $mess = "Failed to set metadata key: $docid\n";
1062
1063 $mess .= "PATH: $ENV{'PATH'}\n";
1064 $mess .= "cmd = $cmd\n";
1065 $mess .= "Exit status: $status\n";
1066 $mess .= "System Error Message: $!\n";
1067
1068 $gsdl_cgi->generate_error($mess);
1069 }
1070 else {
1071 my $mess = "insert-metadata successful: Key[$docid]\n";
1072 $mess .= " [In metadata.xml] $metaname";
1073 $mess .= " = $metavalue\n";
1074 $mess .= " [In database] $metaname";
1075 $mess .= " = $macro_metavalue\n";
1076 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
1077 $gsdl_cgi->generate_ok_message($mess);
1078 }
1079 #=======================================================================#
1080 # set_metadata (accumulate version) [END]
1081 #=======================================================================#
1082
1083 # Release the lock once it is done
1084 $self->unlock_collection($username, $collect);
1085}
1086
10871;
Note: See TracBrowser for help on using the repository browser.