source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 23447

Last change on this file since 23447 was 23447, checked in by davidb, 13 years ago

More clear comment written

File size: 36.4 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35
36BEGIN {
37# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
38 require XML::Rules;
39}
40
41
42@metadataaction::ISA = ('baseaction');
43
44
45my $action_table =
46{
47 "get-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
48 'optional-args' => [] },
49
50 "get-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
51 'optional-args' => [ "metapos" ] },
52
53 "set-live-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
54 'optional-args' => [ ] },
55
56 "set-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
57 'optional-args' => [ "metapos" ] },
58
59 "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
60 'optional-args' => [ "metapos", "metamode" ]
61 # metamode can be "accumulate", "override",
62 },
63
64 "set-import-metadata" => { 'compulsory-args' => [ "metaname", "metavalue" ],
65 'optional-args' => [ "d", "f", "metamode" ]
66 # metamode can be "accumulate", "override", or "unique-id"
67 },
68
69
70 "remove-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
71 'optional-args' => [ ] },
72
73 "remove-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
74 'optional-args' => [ "metapos" ] },
75
76 "insert-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
77 'optional-args' => [ ]
78 }
79};
80
81
82sub new
83{
84 my $class = shift (@_);
85 my ($gsdl_cgi,$iis6_mode) = @_;
86
87 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
88
89 return bless $self, $class;
90}
91
92
93sub get_live_metadata
94{
95 my $self = shift @_;
96
97 my $username = $self->{'username'};
98 my $collect = $self->{'collect'};
99 my $gsdl_cgi = $self->{'gsdl_cgi'};
100 my $gsdlhome = $self->{'gsdlhome'};
101 my $infodbtype = $self->{'infodbtype'};
102
103 # live metadata gets/saves value scoped (prefixed) by the current usename
104 # so (for now) let's not bother to enforce authentication
105
106 # Obtain the collect dir
107 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
108
109 # Make sure the collection isn't locked by someone else
110 $self->lock_collection($username, $collect);
111
112 # look up additional args
113 my $docid = $self->{'d'};
114 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
115 $gsdl_cgi->generate_error("No docid (d=...) specified.");
116 }
117
118 # Generate the dbkey
119 my $metaname = $self->{'metaname'};
120 my $dbkey = "$docid.$metaname";
121
122 # To people who know $collect_tail please add some comments
123 # Obtain path to the database
124 my $collect_tail = $collect;
125 $collect_tail =~ s/^.*[\/|\\]//;
126 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
127 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
128
129 # Obtain the content of the key
130 my $cmd = "gdbmget $infodb_file_path $dbkey";
131 if (open(GIN,"$cmd |") == 0) {
132 # Catch error if gdbmget failed
133 my $mess = "Failed to get metadata key: $metaname\n";
134 $mess .= "$!\n";
135
136 $gsdl_cgi->generate_error($mess);
137 }
138 else {
139 # Read everything in and concatenate them into $metavalue
140 my $metavalue = "";
141 my $line;
142 while (defined ($line=<GIN>)) {
143 $metavalue .= $line;
144 }
145 close(GIN);
146 chomp($metavalue); # Get rid off the tailing newlines
147 $gsdl_cgi->generate_ok_message("$metavalue");
148 }
149
150 # Release the lock once it is done
151 $self->unlock_collection($username, $collect);
152}
153
154
155sub get_metadata
156{
157 my $self = shift @_;
158
159 my $username = $self->{'username'};
160 my $collect = $self->{'collect'};
161 my $gsdl_cgi = $self->{'gsdl_cgi'};
162 my $gsdlhome = $self->{'gsdlhome'};
163
164 # Authenticate user if it is enabled
165 if ($baseaction::authentication_enabled) {
166 # Ensure the user is allowed to edit this collection
167 &authenticate_user($gsdl_cgi, $username, $collect);
168 }
169
170 # Obtain the collect dir
171 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
172
173 # Make sure the collection isn't locked by someone else
174 $self->lock_collection($username, $collect);
175
176 # look up additional args
177 my $docid = $self->{'d'};
178 my $metaname = $self->{'metaname'};
179 my $metapos = $self->{'metapos'};
180 my $infodbtype = $self->{'infodbtype'};
181
182 # To people who know $collect_tail please add some comments
183 # Obtain path to the database
184 my $collect_tail = $collect;
185 $collect_tail =~ s/^.*[\/\\]//;
186 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
187 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
188
189 # Read the docid entry
190 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
191
192 # Basically loop through and unescape_html the values
193 foreach my $k (keys %$doc_rec) {
194 my @escaped_v = ();
195 foreach my $v (@{$doc_rec->{$k}}) {
196 my $ev = &ghtml::unescape_html($v);
197 push(@escaped_v, $ev);
198 }
199 $doc_rec->{$k} = \@escaped_v;
200 }
201
202 # Obtain the specified metadata value
203 $metapos = 0 if (!defined $metapos);
204 my $metavalue = $doc_rec->{$metaname}->[$metapos];
205 $gsdl_cgi->generate_ok_message("$metavalue");
206
207 # Release the lock once it is done
208 $self->unlock_collection($username, $collect);
209}
210
211
212sub set_live_metadata
213{
214 my $self = shift @_;
215
216 my $username = $self->{'username'};
217 my $collect = $self->{'collect'};
218 my $gsdl_cgi = $self->{'gsdl_cgi'};
219 my $gsdlhome = $self->{'gsdlhome'};
220 my $infodbtype = $self->{'infodbtype'};
221
222 # don't user authenticate for now
223 if ($baseaction::authentication_enabled) {
224 # Ensure the user is allowed to edit this collection
225 &authenticate_user($gsdl_cgi, $username, $collect);
226 }
227
228 # Obtain the collect dir
229 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
230
231 # Make sure the collection isn't locked by someone else
232 $self->lock_collection($username, $collect);
233
234 # look up additional args
235 my $docid = $self->{'d'};
236 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
237 $gsdl_cgi->generate_error("No docid (d=...) specified.");
238 }
239 my $metavalue = $self->{'metavalue'};
240
241
242 # Generate the dbkey
243 my $metaname = $self->{'metaname'};
244 my $dbkey = "$docid.$metaname";
245
246 # To people who know $collect_tail please add some comments
247 # Obtain path to the database
248 my $collect_tail = $collect;
249 $collect_tail =~ s/^.*[\/\\]//;
250 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
251 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
252
253 # Set the new value
254 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
255 my $status = system($cmd);
256 if ($status != 0) {
257 # Catch error if gdbmget failed
258 my $mess = "Failed to set metadata key: $dbkey\n";
259
260 $mess .= "PATH: $ENV{'PATH'}\n";
261 $mess .= "cmd = $cmd\n";
262 $mess .= "Exit status: $status\n";
263 $mess .= "System Error Message: $!\n";
264
265 $gsdl_cgi->generate_error($mess);
266 }
267 else {
268 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
269 }
270
271 # Release the lock once it is done
272 $self->unlock_collection($username, $collect);
273}
274
275
276sub set_metadata
277{
278 my $self = shift @_;
279
280 my $username = $self->{'username'};
281 my $collect = $self->{'collect'};
282 my $gsdl_cgi = $self->{'gsdl_cgi'};
283 my $gsdlhome = $self->{'gsdlhome'};
284
285
286
287 # don't user authenticate for now
288 if ($baseaction::authentication_enabled) {
289 # Ensure the user is allowed to edit this collection
290 &authenticate_user($gsdl_cgi, $username, $collect);
291 }
292
293 # Obtain the collect dir
294 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
295
296 # Make sure the collection isn't locked by someone else
297 $self->lock_collection($username, $collect);
298
299 # look up additional args
300 my $docid = $self->{'d'};
301 my $metaname = $self->{'metaname'};
302 my $metapos = $self->{'metapos'};
303 my $metavalue = $self->{'metavalue'};
304 my $infodbtype = $self->{'infodbtype'};
305
306 # To people who know $collect_tail please add some comments
307 # Obtain path to the database
308 my $collect_tail = $collect;
309 $collect_tail =~ s/^.*[\/\\]//;
310 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
311 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
312
313 # Read the docid entry
314 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
315
316 # Set the metadata value
317 if (defined $metapos) {
318 $doc_rec->{$metaname}->[$metapos] = $metavalue;
319 }
320 else {
321 $doc_rec->{$metaname} = [ $metavalue ];
322 }
323
324 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
325 if ($status != 0) {
326 # Catch error if gdbmget failed
327 my $mess = "Failed to set metadata key: $docid\n";
328
329 $mess .= "PATH: $ENV{'PATH'}\n";
330 $mess .= "Exit status: $status\n";
331 $mess .= "System Error Message: $!\n";
332
333 $gsdl_cgi->generate_error($mess);
334 }
335 else {
336 my $mess = "set-document-metadata successful: Key[$docid]\n";
337 $mess .= " $metaname";
338 $mess .= "->[$metapos]" if (defined $metapos);
339 $mess .= " = $metavalue";
340
341 $gsdl_cgi->generate_ok_message($mess);
342 }
343
344 # Release the lock once it is done
345 $self->unlock_collection($username, $collect);
346}
347
348
349sub dxml_metadata
350{
351 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
352 my $metaname = $parser->{'parameters'}->{'metaname'};
353 my $metamode = $parser->{'parameters'}->{'metamode'};
354
355 # Find the right metadata tag and checks if we are going to override it
356 # Note: This over writes the first metadata block it encountered. If there are multiple Sections in the doc.xml, it might not behave as you would expect
357 my $name_attr = $attrHash->{'name'};
358 if (($name_attr eq $metaname) && ($metamode eq "override")) {
359 # Get the value and override the current value
360 my $metavalue = $parser->{'parameters'}->{'metavalue'};
361 $attrHash->{'_content'} = $metavalue;
362
363 # Don't want it to wipe out any other pieces of metadata
364 $parser->{'parameters'}->{'metamode'} = "done";
365 }
366
367 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
368 return [$tagname => $attrHash];
369}
370
371
372sub dxml_description
373{
374 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
375 my $metamode = $parser->{'parameters'}->{'metamode'};
376
377 # Accumulate the metadata
378 # NOTE: This appends new metadata element to all description fields.
379 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
380 if ($metamode eq "accumulate") {
381 # tack a new metadata tag on to the end of the <Metadata>+ block
382 my $metaname = $parser->{'parameters'}->{'metaname'};
383 my $metavalue = $parser->{'parameters'}->{'metavalue'};
384
385 my $metadata_attr = { '_content' => $metavalue,
386 'name' => $metaname,
387 'mode' => "accumulate" };
388
389 my $append_metadata = [ "Metadata" => $metadata_attr ];
390 my $description_content = $attrHash->{'_content'};
391
392 push(@$description_content," ", $append_metadata ,"\n ");
393 }
394
395
396 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
397 return [$tagname => $attrHash];
398}
399
400
401sub edit_xml_file
402{
403 my $self = shift @_;
404 my ($gsdl_cgi, $filename, $rules, $options) = @_;
405
406 # use XML::Rules to add it in (read in and out again)
407 my $parser = XML::Rules->new(rules => $rules,
408 style => 'filter' );
409
410 my $xml_in = "";
411 if (!open(MIN,"<$filename")) {
412 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
413 }
414 else {
415 # Read all the text in
416 my $line;
417 while (defined ($line=<MIN>)) {
418 $xml_in .= $line;
419 }
420 close(MIN);
421
422 # Matched lines will get handled by the call backs
423 my $xml_out = "";
424 $parser->filter($xml_in,\$xml_out, $options);
425
426 if (!open(MOUT,">$filename")) {
427 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
428 }
429 else {
430 print MOUT $xml_out;
431 close(MOUT);
432 }
433 }
434}
435
436
437sub edit_doc_xml
438{
439 my $self = shift @_;
440 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode) = @_;
441
442 # use XML::Rules to add it in (read in and out again)
443 # Set the call back functions
444 my @rules =
445 ( _default => 'raw',
446 'Metadata' => \&dxml_metadata,
447 'Description' => \&dxml_description );
448
449 # Sets the parameters
450 my $options = { 'metaname' => $metaname,
451 'metapos' => $metapos,
452 'metavalue' => $metavalue,
453 'metamode' => $metamode };
454
455 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@rules,$options);
456}
457
458
459sub set_archives_metadata
460{
461 my $self = shift @_;
462
463 my $username = $self->{'username'};
464 my $collect = $self->{'collect'};
465 my $gsdl_cgi = $self->{'gsdl_cgi'};
466 my $gsdlhome = $self->{'gsdlhome'};
467 my $infodbtype = $self->{'infodbtype'};
468
469 # don't user authenticate for now
470 if ($baseaction::authentication_enabled) {
471 # Ensure the user is allowed to edit this collection
472 $self->authenticate_user($username, $collect);
473 }
474
475 # Obtain the collect and archive dir
476 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
477 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
478
479 # Make sure the collection isn't locked by someone else
480 $self->lock_collection($username, $collect);
481
482 # look up additional args
483 my $docid = $self->{'d'};
484 my $metaname = $self->{'metaname'};
485 my $metavalue = $self->{'metavalue'};
486
487 my $metapos = $self->{'metapos'};
488 $metapos = 0 if (!defined $metapos);
489
490 my $metamode = $self->{'metamode'};
491 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
492 # make "accumulate" the default (less destructive, as won't actually
493 # delete any existing values)
494 $metamode = "accumulate";
495 }
496
497 # Obtain the doc.xml path for the specified docID
498 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
499 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
500 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
501
502 # The $doc_xml_file is relative to the archives, and now let's get the full path
503 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
504 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
505
506 # Edit the doc.xml file with the specified metadata name, value and position.
507 # TODO: there is a potential problem here as this edit_doc_xml function
508 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
509 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
510 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
511 $metaname,$metavalue,$metapos,$metamode);
512
513 my $mess = "set-archives-metadata successful: Key[$docid]\n";
514 $mess .= " $metaname";
515 $mess .= "->[$metapos]" if (defined $metapos);
516 $mess .= " = $metavalue";
517 $mess .= " ($metamode)\n";
518
519 $gsdl_cgi->generate_ok_message($mess);
520
521 # Release the lock once it is done
522 $self->unlock_collection($username, $collect);
523
524}
525
526
527sub mxml_metadata
528{
529 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
530 my $metaname = $parser->{'parameters'}->{'metaname'};
531 my $metamode = $parser->{'parameters'}->{'metamode'};
532
533 # Report error if we don't see FileName tag before this
534 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
535
536 # Don't do anything if we are not in the right FileSet
537 my $file_regexp = $parser->{'parameters'}->{'current_file'};
538 return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/));
539
540 # Find the right metadata tag and checks if we are going to override it
541 # Note: This over writes the first metadata block it encountered even if it doesn't belong to the source file we specified
542 my $name_attr = $attrHash->{'name'};
543 if (($name_attr eq $metaname) && ($metamode eq "override")) {
544 # Get the value and override the current value
545 my $metavalue = $parser->{'parameters'}->{'metavalue'};
546 $attrHash->{'_content'} = $metavalue;
547
548 # Don't want it to wipe out any other pieces of metadata
549 $parser->{'parameters'}->{'metamode'} = "done";
550 }
551
552 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
553 return [$tagname => $attrHash];
554}
555
556
557sub mxml_description
558{
559 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
560 my $metamode = $parser->{'parameters'}->{'metamode'};
561
562 # Failed... Report error if we don't see FileName tag before this
563 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
564
565 # Don't do anything if we are not in the right FileSet
566 my $file_regexp = $parser->{'parameters'}->{'current_file'};
567 return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/));
568
569 # Accumulate the metadata block to the end of the description block
570 # Note: This adds metadata block to all description blocks, so if there are
571 # multiple FileSets, it will add to all of them
572 if ($metamode eq "accumulate") {
573 # tack a new metadata tag on to the end of the <Metadata>+ block
574 my $metaname = $parser->{'parameters'}->{'metaname'};
575 my $metavalue = $parser->{'parameters'}->{'metavalue'};
576
577 my $metadata_attr = { '_content' => $metavalue,
578 'name' => $metaname,
579 'mode' => "accumulate" };
580
581 my $append_metadata = [ "Metadata" => $metadata_attr ];
582 my $description_content = $attrHash->{'_content'};
583
584 push(@$description_content," ", $append_metadata ,"\n ");
585 }
586
587 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
588 return [$tagname => $attrHash];
589}
590
591
592sub mxml_filename
593{
594 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
595
596 # Store the filename of the Current Fileset
597 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
598 # FileName tag must come before Description tag
599 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
600
601 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
602 return [$tagname => $attrHash];
603}
604
605
606sub mxml_fileset
607{
608 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
609
610 # Initilise the current_file
611 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
612 # FileName tag must come before Description tag
613 $parser->{'parameters'}->{'current_file'} = "";
614
615 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
616 return [$tagname => $attrHash];
617}
618
619
620sub edit_metadata_xml
621{
622 my $self = shift @_;
623 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
624
625 # Set the call-back functions for the metadata tags
626 my @rules =
627 ( _default => 'raw',
628 'FileName' => \&mxml_filename,
629 'Metadata' => \&mxml_metadata,
630 'Description' => \&mxml_description,
631 'FileSet' => \&mxml_fileset);
632
633 # use XML::Rules to add it in (read in and out again)
634 my $parser = XML::Rules->new(rules => \@rules,
635 style => 'filter',
636 output_encoding => 'utf8');
637
638 my $xml_in = "";
639 if (!open(MIN,"<$metadata_xml_filename")) {
640 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
641 }
642 else {
643 # Read them in
644 my $line;
645 while (defined ($line=<MIN>)) {
646 $xml_in .= $line;
647 }
648 close(MIN);
649
650 # Filter with the call-back functions
651 my $xml_out = "";
652 $parser->filter($xml_in,\$xml_out, { metaname => $metaname,
653 metavalue => $metavalue,
654 metamode => $metamode,
655 src_file => $src_file,
656 current_file => undef} );
657
658 if (!open(MOUT,">$metadata_xml_filename")) {
659 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
660 }
661 else {
662 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
663 # At the moment, I will just hack it!
664 my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
665 $xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
666 print MOUT $xml_out;
667 close(MOUT);
668 }
669 }
670}
671
672
673sub set_import_metadata
674{
675 my $self = shift @_;
676
677 my $username = $self->{'username'};
678 my $collect = $self->{'collect'};
679 my $gsdl_cgi = $self->{'gsdl_cgi'};
680 my $gsdlhome = $self->{'gsdlhome'};
681 my $infodbtype = $self->{'infodbtype'};
682
683 # don't user authenticate for now
684 if ($baseaction::authentication_enabled) {
685 # Ensure the user is allowed to edit this collection
686 $self->authenticate_user($username, $collect);
687 }
688
689 # Obtain the collect and archive dir
690 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
691 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
692
693 # Make sure the collection isn't locked by someone else
694 $self->lock_collection($username, $collect);
695
696 # look up additional args
697 # want either d= or f=
698 my $docid = $self->{'d'};
699 my $import_file = $self->{'f'};
700 if ((!defined $docid) && (!defined $import_file)) {
701 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
702 }
703
704 # Get the parameters and set default mode to "accumulate"
705 my $metaname = $self->{'metaname'};
706 my $metavalue = $self->{'metavalue'};
707 my $metamode = $self->{'metamode'};
708 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
709 # make "accumulate" the default (less destructive, as won't actually
710 # delete any existing values)
711 $metamode = "accumulate";
712 }
713
714 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
715 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
716 my $metadata_xml_file;
717 my $import_filename = undef;
718 if (defined $docid) {
719 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
720 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
721
722 # This now stores the full pathname
723 $import_filename = $doc_rec->{'src-file'}->[0];
724 }
725 else {
726 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
727 }
728
729 # figure out correct metadata.xml file [?]
730 # Assuming the metadata.xml file is next to the source file
731 # Note: This will not work if it is using the inherited metadata from the parent folder
732 my ($import_tailname, $import_dirname)
733 = File::Basename::fileparse($import_filename);
734 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
735
736 # Edit the metadata.xml
737 # Modified by Jeffrey from DL Consulting
738 # Handle the case where there is one metadata.xml file for multiple FileSets
739 # The XML filter needs to know whether it is in the right FileSet
740 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
741 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
742 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
743 $metaname, $metavalue, $metamode, $import_tailname);
744
745 # Release the lock once it is done
746 $self->unlock_collection($username, $collect);
747}
748
749
750sub remove_live_metadata
751{
752 my $self = shift @_;
753
754 my $username = $self->{'username'};
755 my $collect = $self->{'collect'};
756 my $gsdl_cgi = $self->{'gsdl_cgi'};
757 my $gsdlhome = $self->{'gsdlhome'};
758 my $infodbtype = $self->{'infodbtype'};
759
760 if ($baseaction::authentication_enabled) {
761 # Ensure the user is allowed to edit this collection
762 &authenticate_user($gsdl_cgi, $username, $collect);
763 }
764
765 # Obtain the collect dir
766 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
767
768 # Make sure the collection isn't locked by someone else
769 $self->lock_collection($username, $collect);
770
771 # look up additional args
772 my $docid = $self->{'d'};
773 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
774 $gsdl_cgi->generate_error("No docid (d=...) specified.");
775 }
776
777 # Generate the dbkey
778 my $metaname = $self->{'metaname'};
779 my $dbkey = "$docid.$metaname";
780
781 # To people who know $collect_tail please add some comments
782 # Obtain the live gdbm_db path
783 my $collect_tail = $collect;
784 $collect_tail =~ s/^.*[\/\\]//;
785 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
786 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
787
788 # Remove the key
789 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
790 my $status = system($cmd);
791 if ($status != 0) {
792 # Catch error if gdbmdel failed
793 my $mess = "Failed to set metadata key: $dbkey\n";
794
795 $mess .= "PATH: $ENV{'PATH'}\n";
796 $mess .= "cmd = $cmd\n";
797 $mess .= "Exit status: $status\n";
798 $mess .= "System Error Message: $!\n";
799
800 $gsdl_cgi->generate_error($mess);
801 }
802 else {
803 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
804 }
805
806}
807
808
809sub remove_metadata
810{
811 my $self = shift @_;
812
813 my $username = $self->{'username'};
814 my $collect = $self->{'collect'};
815 my $gsdl_cgi = $self->{'gsdl_cgi'};
816 my $gsdlhome = $self->{'gsdlhome'};
817 my $infodbtype = $self->{'infodbtype'};
818
819 # don't user authenticate for now
820 if ($baseaction::authentication_enabled) {
821 # Ensure the user is allowed to edit this collection
822 &authenticate_user($gsdl_cgi, $username, $collect);
823 }
824
825 # Obtain the collect dir
826 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
827
828 # Make sure the collection isn't locked by someone else
829 $self->lock_collection($username, $collect);
830
831 # look up additional args
832 my $docid = $self->{'d'};
833 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
834 $gsdl_cgi->generate_error("No docid (d=...) specified.");
835 }
836 my $metaname = $self->{'metaname'};
837 my $metapos = $self->{'metapos'};
838
839 # To people who know $collect_tail please add some comments
840 # Obtain the path to the database
841 my $collect_tail = $collect;
842 $collect_tail =~ s/^.*[\/\\]//;
843 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
844 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
845
846 # Read the docid entry
847 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
848
849 # Basically loop through and unescape_html the values
850 foreach my $k (keys %$doc_rec) {
851 my @escaped_v = ();
852 foreach my $v (@{$doc_rec->{$k}}) {
853 if ($k eq "contains") {
854 # protect quotes in ".2;".3 etc
855 $v =~ s/\"/\\\"/g;
856 push(@escaped_v, $v);
857 }
858 else {
859 my $ev = &ghtml::unescape_html($v);
860 $ev =~ s/\"/\\\"/g;
861 push(@escaped_v, $ev);
862 }
863 }
864 $doc_rec->{$k} = \@escaped_v;
865 }
866
867 # Check to make sure the key does exist
868 if (!defined ($doc_rec->{$metaname})) {
869 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
870 }
871
872 # Obtain the specified metadata pos
873 $metapos = 0 if (!defined $metapos);
874
875 # consider check key is defined before deleting?
876 # Loop through the metadata array and ignore the specified position
877 my $filtered_metadata = [];
878 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
879 for (my $i=0; $i<$num_metadata_vals; $i++) {
880 my $metavalue = shift(@{$doc_rec->{$metaname}});
881
882 if ($i != $metapos) {
883 push(@$filtered_metadata,$metavalue)
884 }
885 }
886 $doc_rec->{$metaname} = $filtered_metadata;
887
888 # Turn the record back to string
889 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
890
891 # Store it back to the database
892 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
893 my $status = system($cmd);
894 if ($status != 0) {
895 my $mess = "Failed to set metadata key: $docid\n";
896
897 $mess .= "PATH: $ENV{'PATH'}\n";
898 $mess .= "cmd = $cmd\n";
899 $mess .= "Exit status: $status\n";
900 $mess .= "System Error Message: $!\n";
901
902 $gsdl_cgi->generate_error($mess);
903 }
904 else {
905 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
906 $mess .= " $metaname";
907 $mess .= "->[$metapos]" if (defined $metapos);
908
909 $gsdl_cgi->generate_ok_message($mess);
910 }
911}
912
913
914# Was trying to reused the codes, but the functions need to be broken down more before they can be reused, otherwise there will be too much overhead and duplicate process...
915sub insert_metadata
916{
917 my $self = shift @_;
918
919 my $username = $self->{'username'};
920 my $collect = $self->{'collect'};
921 my $gsdl_cgi = $self->{'gsdl_cgi'};
922 my $gsdlhome = $self->{'gsdlhome'};
923 my $infodbtype = $self->{'infodbtype'};
924
925 # If the import metadata and gdbm database have been updated, we need to insert some notification to warn user that the the text they see at the moment is not indexed and require a rebuild.
926 my $rebuild_pending_macro = "_rebuildpendingmessage_";
927
928 # don't user authenticate for now
929 if ($baseaction::authentication_enabled) {
930 # Ensure the user is allowed to edit this collection
931 $self->authenticate_user($username, $collect);
932 }
933
934 # Obtain the collect and archive dir
935 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
936 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
937
938 # Make sure the collection isn't locked by someone else
939 $self->lock_collection($username, $collect);
940
941 # Check additional args
942 my $docid = $self->{'d'};
943 if (!defined($docid)) {
944 $gsdl_cgi->generate_error("No document id is specified: d=...");
945 }
946 my $metaname = $self->{'metaname'};
947 if (!defined($metaname)) {
948 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
949 }
950 my $metavalue = $self->{'metavalue'};
951 if (!defined($metavalue) || $metavalue eq "") {
952 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
953 }
954 # make "accumulate" the default (less destructive, as won't actually
955 # delete any existing values)
956 my $metamode = "accumulate";
957
958 #=======================================================================#
959 # set_import_metadata [START]
960 #=======================================================================#
961 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
962 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
963 my $metadata_xml_file;
964 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
965 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
966
967 # This now stores the full pathname
968 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
969
970 # figure out correct metadata.xml file [?]
971 # Assuming the metadata.xml file is next to the source file
972 # Note: This will not work if it is using the inherited metadata from the parent folder
973 my ($import_tailname, $import_dirname)
974 = File::Basename::fileparse($import_filename);
975 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
976
977 # Shane's escape characters
978 $metavalue = pack "U0C*", unpack "C*", $metavalue;
979 $metavalue =~ s/\,/&#44;/g;
980 $metavalue =~ s/\:/&#58;/g;
981 $metavalue =~ s/\|/&#124;/g;
982 $metavalue =~ s/\(/&#40;/g;
983 $metavalue =~ s/\)/&#41;/g;
984 $metavalue =~ s/\[/&#91;/g;
985 $metavalue =~ s/\\/&#92;/g;
986 $metavalue =~ s/\]/&#93;/g;
987 $metavalue =~ s/\{/&#123;/g;
988 $metavalue =~ s/\}/&#125;/g;
989 $metavalue =~ s/\"/&#34;/g;
990 $metavalue =~ s/\`/&#96;/g;
991 $metavalue =~ s/\n/_newline_/g;
992
993 # Edit the metadata.xml
994 # Modified by Jeffrey from DL Consulting
995 # Handle the case where there is one metadata.xml file for multiple FileSets
996 # The XML filter needs to know whether it is in the right FileSet
997 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
998 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
999 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1000 $metaname, $metavalue, $metamode, $import_tailname);
1001 #=======================================================================#
1002 # set_import_metadata [END]
1003 #=======================================================================#
1004
1005
1006 #=======================================================================#
1007 # set_metadata (accumulate version) [START]
1008 #=======================================================================#
1009 # To people who know $collect_tail please add some comments
1010 # Obtain path to the database
1011 my $collect_tail = $collect;
1012 $collect_tail =~ s/^.*[\/\\]//;
1013 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1014 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1015
1016 # Read the docid entry
1017 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1018
1019 foreach my $k (keys %$doc_rec) {
1020 my @escaped_v = ();
1021 foreach my $v (@{$doc_rec->{$k}}) {
1022 if ($k eq "contains") {
1023 # protect quotes in ".2;".3 etc
1024 $v =~ s/\"/\\\"/g;
1025 push(@escaped_v, $v);
1026 }
1027 else {
1028 my $ev = &ghtml::unescape_html($v);
1029 $ev =~ s/\"/\\\"/g;
1030 push(@escaped_v, $ev);
1031 }
1032 }
1033 $doc_rec->{$k} = \@escaped_v;
1034 }
1035
1036 # Protect the quotes
1037 $metavalue =~ s/\"/\\\"/g;
1038
1039 # Adds the pending macro
1040 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
1041
1042 # If the metadata doesn't exist, create a new one
1043 if (!defined($doc_rec->{$metaname})){
1044 $doc_rec->{$metaname} = [ $macro_metavalue ];
1045 }
1046 # Else, let's acculumate the values
1047 else {
1048 push(@{$doc_rec->{$metaname}},$macro_metavalue);
1049 }
1050
1051 # Generate the record string
1052 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1053
1054 # Store it into GDBM
1055 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1056 my $status = system($cmd);
1057 if ($status != 0) {
1058 # Catch error if gdbmget failed
1059 my $mess = "Failed to set metadata key: $docid\n";
1060
1061 $mess .= "PATH: $ENV{'PATH'}\n";
1062 $mess .= "cmd = $cmd\n";
1063 $mess .= "Exit status: $status\n";
1064 $mess .= "System Error Message: $!\n";
1065
1066 $gsdl_cgi->generate_error($mess);
1067 }
1068 else {
1069 my $mess = "insert-metadata successful: Key[$docid]\n";
1070 $mess .= " [In metadata.xml] $metaname";
1071 $mess .= " = $metavalue\n";
1072 $mess .= " [In database] $metaname";
1073 $mess .= " = $macro_metavalue\n";
1074 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
1075 $gsdl_cgi->generate_ok_message($mess);
1076 }
1077 #=======================================================================#
1078 # set_metadata (accumulate version) [END]
1079 #=======================================================================#
1080
1081 # Release the lock once it is done
1082 $self->unlock_collection($username, $collect);
1083}
1084
10851;
Note: See TracBrowser for help on using the repository browser.