source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 23400

Last change on this file since 23400 was 23400, checked in by max, 13 years ago

Tested using this action under windows using Sqlite as the database. Code needed a variety of updates to support this.

File size: 36.4 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35
36BEGIN {
37# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
38 require XML::Rules;
39}
40
41
42@metadataaction::ISA = ('baseaction');
43
44
45my $action_table =
46{
47 "get-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
48 'optional-args' => [] },
49
50 "get-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
51 'optional-args' => [ "metapos" ] },
52
53 "set-live-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
54 'optional-args' => [ ] },
55
56 "set-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
57 'optional-args' => [ "metapos" ] },
58
59 "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
60 'optional-args' => [ "metapos", "metamode" ]
61 # metamode can be "accumulate", "override",
62 },
63
64 "set-import-metadata" => { 'compulsory-args' => [ "metaname", "metavalue" ],
65 'optional-args' => [ "d", "f", "metamode" ]
66 # metamode can be "accumulate", "override", or "unique-id"
67 },
68
69
70 "remove-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
71 'optional-args' => [ ] },
72
73 "remove-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
74 'optional-args' => [ "metapos" ] },
75
76 "insert-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
77 'optional-args' => [ ]
78 }
79};
80
81
82sub new
83{
84 my $class = shift (@_);
85 my ($gsdl_cgi,$iis6_mode) = @_;
86
87 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
88
89 return bless $self, $class;
90}
91
92
93sub get_live_metadata
94{
95 my $self = shift @_;
96
97 my $username = $self->{'username'};
98 my $collect = $self->{'collect'};
99 my $gsdl_cgi = $self->{'gsdl_cgi'};
100 my $gsdlhome = $self->{'gsdlhome'};
101 my $infodbtype = $self->{'infodbtype'};
102
103 # Note: Not sure why get_live_metadata doesn't need the authentication check
104
105 # Obtain the collect dir
106 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
107
108 # Make sure the collection isn't locked by someone else
109 $self->lock_collection($username, $collect);
110
111 # look up additional args
112 my $docid = $self->{'d'};
113 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
114 $gsdl_cgi->generate_error("No docid (d=...) specified.");
115 }
116
117 # Generate the dbkey
118 my $metaname = $self->{'metaname'};
119 my $dbkey = "$docid.$metaname";
120
121 # To people who know $collect_tail please add some comments
122 # Obtain path to the database
123 my $collect_tail = $collect;
124 $collect_tail =~ s/^.*[\/|\\]//;
125 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
126 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
127
128 # Obtain the content of the key
129 my $cmd = "gdbmget $infodb_file_path $dbkey";
130 if (open(GIN,"$cmd |") == 0) {
131 # Catch error if gdbmget failed
132 my $mess = "Failed to get metadata key: $metaname\n";
133 $mess .= "$!\n";
134
135 $gsdl_cgi->generate_error($mess);
136 }
137 else {
138 # Read everything in and concatenate them into $metavalue
139 my $metavalue = "";
140 my $line;
141 while (defined ($line=<GIN>)) {
142 $metavalue .= $line;
143 }
144 close(GIN);
145 chomp($metavalue); # Get rid off the tailing newlines
146 $gsdl_cgi->generate_ok_message("$metavalue");
147 }
148
149 # Release the lock once it is done
150 $self->unlock_collection($username, $collect);
151}
152
153
154sub get_metadata
155{
156 my $self = shift @_;
157
158 my $username = $self->{'username'};
159 my $collect = $self->{'collect'};
160 my $gsdl_cgi = $self->{'gsdl_cgi'};
161 my $gsdlhome = $self->{'gsdlhome'};
162
163 # Authenticate user if it is enabled
164 if ($baseaction::authentication_enabled) {
165 # Ensure the user is allowed to edit this collection
166 &authenticate_user($gsdl_cgi, $username, $collect);
167 }
168
169 # Obtain the collect dir
170 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
171
172 # Make sure the collection isn't locked by someone else
173 $self->lock_collection($username, $collect);
174
175 # look up additional args
176 my $docid = $self->{'d'};
177 my $metaname = $self->{'metaname'};
178 my $metapos = $self->{'metapos'};
179 my $infodbtype = $self->{'infodbtype'};
180
181 # To people who know $collect_tail please add some comments
182 # Obtain path to the database
183 my $collect_tail = $collect;
184 $collect_tail =~ s/^.*[\/\\]//;
185 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
186 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
187
188 # Read the docid entry
189 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
190
191 # Basically loop through and unescape_html the values
192 foreach my $k (keys %$doc_rec) {
193 my @escaped_v = ();
194 foreach my $v (@{$doc_rec->{$k}}) {
195 my $ev = &ghtml::unescape_html($v);
196 push(@escaped_v, $ev);
197 }
198 $doc_rec->{$k} = \@escaped_v;
199 }
200
201 # Obtain the specified metadata value
202 $metapos = 0 if (!defined $metapos);
203 my $metavalue = $doc_rec->{$metaname}->[$metapos];
204 $gsdl_cgi->generate_ok_message("$metavalue");
205
206 # Release the lock once it is done
207 $self->unlock_collection($username, $collect);
208}
209
210
211sub set_live_metadata
212{
213 my $self = shift @_;
214
215 my $username = $self->{'username'};
216 my $collect = $self->{'collect'};
217 my $gsdl_cgi = $self->{'gsdl_cgi'};
218 my $gsdlhome = $self->{'gsdlhome'};
219 my $infodbtype = $self->{'infodbtype'};
220
221 # don't user authenticate for now
222 if ($baseaction::authentication_enabled) {
223 # Ensure the user is allowed to edit this collection
224 &authenticate_user($gsdl_cgi, $username, $collect);
225 }
226
227 # Obtain the collect dir
228 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
229
230 # Make sure the collection isn't locked by someone else
231 $self->lock_collection($username, $collect);
232
233 # look up additional args
234 my $docid = $self->{'d'};
235 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
236 $gsdl_cgi->generate_error("No docid (d=...) specified.");
237 }
238 my $metavalue = $self->{'metavalue'};
239
240
241 # Generate the dbkey
242 my $metaname = $self->{'metaname'};
243 my $dbkey = "$docid.$metaname";
244
245 # To people who know $collect_tail please add some comments
246 # Obtain path to the database
247 my $collect_tail = $collect;
248 $collect_tail =~ s/^.*[\/\\]//;
249 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
250 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
251
252 # Set the new value
253 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
254 my $status = system($cmd);
255 if ($status != 0) {
256 # Catch error if gdbmget failed
257 my $mess = "Failed to set metadata key: $dbkey\n";
258
259 $mess .= "PATH: $ENV{'PATH'}\n";
260 $mess .= "cmd = $cmd\n";
261 $mess .= "Exit status: $status\n";
262 $mess .= "System Error Message: $!\n";
263
264 $gsdl_cgi->generate_error($mess);
265 }
266 else {
267 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
268 }
269
270 # Release the lock once it is done
271 $self->unlock_collection($username, $collect);
272}
273
274
275sub set_metadata
276{
277 my $self = shift @_;
278
279 my $username = $self->{'username'};
280 my $collect = $self->{'collect'};
281 my $gsdl_cgi = $self->{'gsdl_cgi'};
282 my $gsdlhome = $self->{'gsdlhome'};
283
284
285
286 # don't user authenticate for now
287 if ($baseaction::authentication_enabled) {
288 # Ensure the user is allowed to edit this collection
289 &authenticate_user($gsdl_cgi, $username, $collect);
290 }
291
292 # Obtain the collect dir
293 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
294
295 # Make sure the collection isn't locked by someone else
296 $self->lock_collection($username, $collect);
297
298 # look up additional args
299 my $docid = $self->{'d'};
300 my $metaname = $self->{'metaname'};
301 my $metapos = $self->{'metapos'};
302 my $metavalue = $self->{'metavalue'};
303 my $infodbtype = $self->{'infodbtype'};
304
305 # To people who know $collect_tail please add some comments
306 # Obtain path to the database
307 my $collect_tail = $collect;
308 $collect_tail =~ s/^.*[\/\\]//;
309 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
310 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
311
312 # Read the docid entry
313 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
314
315 # Set the metadata value
316 if (defined $metapos) {
317 $doc_rec->{$metaname}->[$metapos] = $metavalue;
318 }
319 else {
320 $doc_rec->{$metaname} = [ $metavalue ];
321 }
322
323 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
324 if ($status != 0) {
325 # Catch error if gdbmget failed
326 my $mess = "Failed to set metadata key: $docid\n";
327
328 $mess .= "PATH: $ENV{'PATH'}\n";
329 $mess .= "Exit status: $status\n";
330 $mess .= "System Error Message: $!\n";
331
332 $gsdl_cgi->generate_error($mess);
333 }
334 else {
335 my $mess = "set-document-metadata successful: Key[$docid]\n";
336 $mess .= " $metaname";
337 $mess .= "->[$metapos]" if (defined $metapos);
338 $mess .= " = $metavalue";
339
340 $gsdl_cgi->generate_ok_message($mess);
341 }
342
343 # Release the lock once it is done
344 $self->unlock_collection($username, $collect);
345}
346
347
348sub dxml_metadata
349{
350 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
351 my $metaname = $parser->{'parameters'}->{'metaname'};
352 my $metamode = $parser->{'parameters'}->{'metamode'};
353
354 # Find the right metadata tag and checks if we are going to override it
355 # Note: This over writes the first metadata block it encountered. If there are multiple Sections in the doc.xml, it might not behave as you would expect
356 my $name_attr = $attrHash->{'name'};
357 if (($name_attr eq $metaname) && ($metamode eq "override")) {
358 # Get the value and override the current value
359 my $metavalue = $parser->{'parameters'}->{'metavalue'};
360 $attrHash->{'_content'} = $metavalue;
361
362 # Don't want it to wipe out any other pieces of metadata
363 $parser->{'parameters'}->{'metamode'} = "done";
364 }
365
366 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
367 return [$tagname => $attrHash];
368}
369
370
371sub dxml_description
372{
373 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
374 my $metamode = $parser->{'parameters'}->{'metamode'};
375
376 # Accumulate the metadata
377 # NOTE: This appends new metadata element to all description fields.
378 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
379 if ($metamode eq "accumulate") {
380 # tack a new metadata tag on to the end of the <Metadata>+ block
381 my $metaname = $parser->{'parameters'}->{'metaname'};
382 my $metavalue = $parser->{'parameters'}->{'metavalue'};
383
384 my $metadata_attr = { '_content' => $metavalue,
385 'name' => $metaname,
386 'mode' => "accumulate" };
387
388 my $append_metadata = [ "Metadata" => $metadata_attr ];
389 my $description_content = $attrHash->{'_content'};
390
391 push(@$description_content," ", $append_metadata ,"\n ");
392 }
393
394
395 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
396 return [$tagname => $attrHash];
397}
398
399
400sub edit_xml_file
401{
402 my $self = shift @_;
403 my ($gsdl_cgi, $filename, $rules, $options) = @_;
404
405 # use XML::Rules to add it in (read in and out again)
406 my $parser = XML::Rules->new(rules => $rules,
407 style => 'filter' );
408
409 my $xml_in = "";
410 if (!open(MIN,"<$filename")) {
411 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
412 }
413 else {
414 # Read all the text in
415 my $line;
416 while (defined ($line=<MIN>)) {
417 $xml_in .= $line;
418 }
419 close(MIN);
420
421 # Matched lines will get handled by the call backs
422 my $xml_out = "";
423 $parser->filter($xml_in,\$xml_out, $options);
424
425 if (!open(MOUT,">$filename")) {
426 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
427 }
428 else {
429 print MOUT $xml_out;
430 close(MOUT);
431 }
432 }
433}
434
435
436sub edit_doc_xml
437{
438 my $self = shift @_;
439 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode) = @_;
440
441 # use XML::Rules to add it in (read in and out again)
442 # Set the call back functions
443 my @rules =
444 ( _default => 'raw',
445 'Metadata' => \&dxml_metadata,
446 'Description' => \&dxml_description );
447
448 # Sets the parameters
449 my $options = { 'metaname' => $metaname,
450 'metapos' => $metapos,
451 'metavalue' => $metavalue,
452 'metamode' => $metamode };
453
454 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@rules,$options);
455}
456
457
458sub set_archives_metadata
459{
460 my $self = shift @_;
461
462 my $username = $self->{'username'};
463 my $collect = $self->{'collect'};
464 my $gsdl_cgi = $self->{'gsdl_cgi'};
465 my $gsdlhome = $self->{'gsdlhome'};
466 my $infodbtype = $self->{'infodbtype'};
467
468 # don't user authenticate for now
469 if ($baseaction::authentication_enabled) {
470 # Ensure the user is allowed to edit this collection
471 $self->authenticate_user($username, $collect);
472 }
473
474 # Obtain the collect and archive dir
475 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
476 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
477
478 # Make sure the collection isn't locked by someone else
479 $self->lock_collection($username, $collect);
480
481 # look up additional args
482 my $docid = $self->{'d'};
483 my $metaname = $self->{'metaname'};
484 my $metavalue = $self->{'metavalue'};
485
486 my $metapos = $self->{'metapos'};
487 $metapos = 0 if (!defined $metapos);
488
489 my $metamode = $self->{'metamode'};
490 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
491 # make "accumulate" the default (less destructive, as won't actually
492 # delete any existing values)
493 $metamode = "accumulate";
494 }
495
496 # Obtain the doc.xml path for the specified docID
497 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
498 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
499 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
500
501 # The $doc_xml_file is relative to the archives, and now let's get the full path
502 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
503 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
504
505 # Edit the doc.xml file with the specified metadata name, value and position.
506 # TODO: there is a potential problem here as this edit_doc_xml function
507 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
508 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
509 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
510 $metaname,$metavalue,$metapos,$metamode);
511
512 my $mess = "set-archives-metadata successful: Key[$docid]\n";
513 $mess .= " $metaname";
514 $mess .= "->[$metapos]" if (defined $metapos);
515 $mess .= " = $metavalue";
516 $mess .= " ($metamode)\n";
517
518 $gsdl_cgi->generate_ok_message($mess);
519
520 # Release the lock once it is done
521 $self->unlock_collection($username, $collect);
522
523}
524
525
526sub mxml_metadata
527{
528 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
529 my $metaname = $parser->{'parameters'}->{'metaname'};
530 my $metamode = $parser->{'parameters'}->{'metamode'};
531
532 # Report error if we don't see FileName tag before this
533 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
534
535 # Don't do anything if we are not in the right FileSet
536 my $file_regexp = $parser->{'parameters'}->{'current_file'};
537 return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/));
538
539 # Find the right metadata tag and checks if we are going to override it
540 # Note: This over writes the first metadata block it encountered even if it doesn't belong to the source file we specified
541 my $name_attr = $attrHash->{'name'};
542 if (($name_attr eq $metaname) && ($metamode eq "override")) {
543 # Get the value and override the current value
544 my $metavalue = $parser->{'parameters'}->{'metavalue'};
545 $attrHash->{'_content'} = $metavalue;
546
547 # Don't want it to wipe out any other pieces of metadata
548 $parser->{'parameters'}->{'metamode'} = "done";
549 }
550
551 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
552 return [$tagname => $attrHash];
553}
554
555
556sub mxml_description
557{
558 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
559 my $metamode = $parser->{'parameters'}->{'metamode'};
560
561 # Failed... Report error if we don't see FileName tag before this
562 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
563
564 # Don't do anything if we are not in the right FileSet
565 my $file_regexp = $parser->{'parameters'}->{'current_file'};
566 return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/));
567
568 # Accumulate the metadata block to the end of the description block
569 # Note: This adds metadata block to all description blocks, so if there are
570 # multiple FileSets, it will add to all of them
571 if ($metamode eq "accumulate") {
572 # tack a new metadata tag on to the end of the <Metadata>+ block
573 my $metaname = $parser->{'parameters'}->{'metaname'};
574 my $metavalue = $parser->{'parameters'}->{'metavalue'};
575
576 my $metadata_attr = { '_content' => $metavalue,
577 'name' => $metaname,
578 'mode' => "accumulate" };
579
580 my $append_metadata = [ "Metadata" => $metadata_attr ];
581 my $description_content = $attrHash->{'_content'};
582
583 push(@$description_content," ", $append_metadata ,"\n ");
584 }
585
586 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
587 return [$tagname => $attrHash];
588}
589
590
591sub mxml_filename
592{
593 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
594
595 # Store the filename of the Current Fileset
596 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
597 # FileName tag must come before Description tag
598 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
599
600 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
601 return [$tagname => $attrHash];
602}
603
604
605sub mxml_fileset
606{
607 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
608
609 # Initilise the current_file
610 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
611 # FileName tag must come before Description tag
612 $parser->{'parameters'}->{'current_file'} = "";
613
614 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
615 return [$tagname => $attrHash];
616}
617
618
619sub edit_metadata_xml
620{
621 my $self = shift @_;
622 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
623
624 # Set the call-back functions for the metadata tags
625 my @rules =
626 ( _default => 'raw',
627 'FileName' => \&mxml_filename,
628 'Metadata' => \&mxml_metadata,
629 'Description' => \&mxml_description,
630 'FileSet' => \&mxml_fileset);
631
632 # use XML::Rules to add it in (read in and out again)
633 my $parser = XML::Rules->new(rules => \@rules,
634 style => 'filter',
635 output_encoding => 'utf8');
636
637 my $xml_in = "";
638 if (!open(MIN,"<$metadata_xml_filename")) {
639 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
640 }
641 else {
642 # Read them in
643 my $line;
644 while (defined ($line=<MIN>)) {
645 $xml_in .= $line;
646 }
647 close(MIN);
648
649 # Filter with the call-back functions
650 my $xml_out = "";
651 $parser->filter($xml_in,\$xml_out, { metaname => $metaname,
652 metavalue => $metavalue,
653 metamode => $metamode,
654 src_file => $src_file,
655 current_file => undef} );
656
657 if (!open(MOUT,">$metadata_xml_filename")) {
658 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
659 }
660 else {
661 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
662 # At the moment, I will just hack it!
663 my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
664 $xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
665 print MOUT $xml_out;
666 close(MOUT);
667 }
668 }
669}
670
671
672sub set_import_metadata
673{
674 my $self = shift @_;
675
676 my $username = $self->{'username'};
677 my $collect = $self->{'collect'};
678 my $gsdl_cgi = $self->{'gsdl_cgi'};
679 my $gsdlhome = $self->{'gsdlhome'};
680 my $infodbtype = $self->{'infodbtype'};
681
682 # don't user authenticate for now
683 if ($baseaction::authentication_enabled) {
684 # Ensure the user is allowed to edit this collection
685 $self->authenticate_user($username, $collect);
686 }
687
688 # Obtain the collect and archive dir
689 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
690 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
691
692 # Make sure the collection isn't locked by someone else
693 $self->lock_collection($username, $collect);
694
695 # look up additional args
696 # want either d= or f=
697 my $docid = $self->{'d'};
698 my $import_file = $self->{'f'};
699 if ((!defined $docid) && (!defined $import_file)) {
700 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
701 }
702
703 # Get the parameters and set default mode to "accumulate"
704 my $metaname = $self->{'metaname'};
705 my $metavalue = $self->{'metavalue'};
706 my $metamode = $self->{'metamode'};
707 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
708 # make "accumulate" the default (less destructive, as won't actually
709 # delete any existing values)
710 $metamode = "accumulate";
711 }
712
713 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
714 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
715 my $metadata_xml_file;
716 my $import_filename = undef;
717 if (defined $docid) {
718 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
719 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
720
721 # This now stores the full pathname
722 $import_filename = $doc_rec->{'src-file'}->[0];
723 }
724 else {
725 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
726 }
727
728 # figure out correct metadata.xml file [?]
729 # Assuming the metadata.xml file is next to the source file
730 # Note: This will not work if it is using the inherited metadata from the parent folder
731 my ($import_tailname, $import_dirname)
732 = File::Basename::fileparse($import_filename);
733 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
734
735 # Edit the metadata.xml
736 # Modified by Jeffrey from DL Consulting
737 # Handle the case where there is one metadata.xml file for multiple FileSets
738 # The XML filter needs to know whether it is in the right FileSet
739 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
740 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
741 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
742 $metaname, $metavalue, $metamode, $import_tailname);
743
744 # Release the lock once it is done
745 $self->unlock_collection($username, $collect);
746}
747
748
749sub remove_live_metadata
750{
751 my $self = shift @_;
752
753 my $username = $self->{'username'};
754 my $collect = $self->{'collect'};
755 my $gsdl_cgi = $self->{'gsdl_cgi'};
756 my $gsdlhome = $self->{'gsdlhome'};
757 my $infodbtype = $self->{'infodbtype'};
758
759 if ($baseaction::authentication_enabled) {
760 # Ensure the user is allowed to edit this collection
761 &authenticate_user($gsdl_cgi, $username, $collect);
762 }
763
764 # Obtain the collect dir
765 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
766
767 # Make sure the collection isn't locked by someone else
768 $self->lock_collection($username, $collect);
769
770 # look up additional args
771 my $docid = $self->{'d'};
772 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
773 $gsdl_cgi->generate_error("No docid (d=...) specified.");
774 }
775
776 # Generate the dbkey
777 my $metaname = $self->{'metaname'};
778 my $dbkey = "$docid.$metaname";
779
780 # To people who know $collect_tail please add some comments
781 # Obtain the live gdbm_db path
782 my $collect_tail = $collect;
783 $collect_tail =~ s/^.*[\/\\]//;
784 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
785 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
786
787 # Remove the key
788 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
789 my $status = system($cmd);
790 if ($status != 0) {
791 # Catch error if gdbmdel failed
792 my $mess = "Failed to set metadata key: $dbkey\n";
793
794 $mess .= "PATH: $ENV{'PATH'}\n";
795 $mess .= "cmd = $cmd\n";
796 $mess .= "Exit status: $status\n";
797 $mess .= "System Error Message: $!\n";
798
799 $gsdl_cgi->generate_error($mess);
800 }
801 else {
802 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
803 }
804
805}
806
807
808sub remove_metadata
809{
810 my $self = shift @_;
811
812 my $username = $self->{'username'};
813 my $collect = $self->{'collect'};
814 my $gsdl_cgi = $self->{'gsdl_cgi'};
815 my $gsdlhome = $self->{'gsdlhome'};
816 my $infodbtype = $self->{'infodbtype'};
817
818 # don't user authenticate for now
819 if ($baseaction::authentication_enabled) {
820 # Ensure the user is allowed to edit this collection
821 &authenticate_user($gsdl_cgi, $username, $collect);
822 }
823
824 # Obtain the collect dir
825 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
826
827 # Make sure the collection isn't locked by someone else
828 $self->lock_collection($username, $collect);
829
830 # look up additional args
831 my $docid = $self->{'d'};
832 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
833 $gsdl_cgi->generate_error("No docid (d=...) specified.");
834 }
835 my $metaname = $self->{'metaname'};
836 my $metapos = $self->{'metapos'};
837
838 # To people who know $collect_tail please add some comments
839 # Obtain the path to the database
840 my $collect_tail = $collect;
841 $collect_tail =~ s/^.*[\/\\]//;
842 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
843 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
844
845 # Read the docid entry
846 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
847
848 # Basically loop through and unescape_html the values
849 foreach my $k (keys %$doc_rec) {
850 my @escaped_v = ();
851 foreach my $v (@{$doc_rec->{$k}}) {
852 if ($k eq "contains") {
853 # protect quotes in ".2;".3 etc
854 $v =~ s/\"/\\\"/g;
855 push(@escaped_v, $v);
856 }
857 else {
858 my $ev = &ghtml::unescape_html($v);
859 $ev =~ s/\"/\\\"/g;
860 push(@escaped_v, $ev);
861 }
862 }
863 $doc_rec->{$k} = \@escaped_v;
864 }
865
866 # Check to make sure the key does exist
867 if (!defined ($doc_rec->{$metaname})) {
868 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
869 }
870
871 # Obtain the specified metadata pos
872 $metapos = 0 if (!defined $metapos);
873
874 # consider check key is defined before deleting?
875 # Loop through the metadata array and ignore the specified position
876 my $filtered_metadata = [];
877 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
878 for (my $i=0; $i<$num_metadata_vals; $i++) {
879 my $metavalue = shift(@{$doc_rec->{$metaname}});
880
881 if ($i != $metapos) {
882 push(@$filtered_metadata,$metavalue)
883 }
884 }
885 $doc_rec->{$metaname} = $filtered_metadata;
886
887 # Turn the record back to string
888 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
889
890 # Store it back to the database
891 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
892 my $status = system($cmd);
893 if ($status != 0) {
894 my $mess = "Failed to set metadata key: $docid\n";
895
896 $mess .= "PATH: $ENV{'PATH'}\n";
897 $mess .= "cmd = $cmd\n";
898 $mess .= "Exit status: $status\n";
899 $mess .= "System Error Message: $!\n";
900
901 $gsdl_cgi->generate_error($mess);
902 }
903 else {
904 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
905 $mess .= " $metaname";
906 $mess .= "->[$metapos]" if (defined $metapos);
907
908 $gsdl_cgi->generate_ok_message($mess);
909 }
910}
911
912
913# Was trying to reused the codes, but the functions need to be broken down more before they can be reused, otherwise there will be too much overhead and duplicate process...
914sub insert_metadata
915{
916 my $self = shift @_;
917
918 my $username = $self->{'username'};
919 my $collect = $self->{'collect'};
920 my $gsdl_cgi = $self->{'gsdl_cgi'};
921 my $gsdlhome = $self->{'gsdlhome'};
922 my $infodbtype = $self->{'infodbtype'};
923
924 # If the import metadata and gdbm database have been updated, we need to insert some notification to warn user that the the text they see at the moment is not indexed and require a rebuild.
925 my $rebuild_pending_macro = "_rebuildpendingmessage_";
926
927 # don't user authenticate for now
928 if ($baseaction::authentication_enabled) {
929 # Ensure the user is allowed to edit this collection
930 $self->authenticate_user($username, $collect);
931 }
932
933 # Obtain the collect and archive dir
934 my $collect_dir = &util::filename_cat($gsdlhome, "collect");
935 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
936
937 # Make sure the collection isn't locked by someone else
938 $self->lock_collection($username, $collect);
939
940 # Check additional args
941 my $docid = $self->{'d'};
942 if (!defined($docid)) {
943 $gsdl_cgi->generate_error("No document id is specified: d=...");
944 }
945 my $metaname = $self->{'metaname'};
946 if (!defined($metaname)) {
947 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
948 }
949 my $metavalue = $self->{'metavalue'};
950 if (!defined($metavalue) || $metavalue eq "") {
951 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
952 }
953 # make "accumulate" the default (less destructive, as won't actually
954 # delete any existing values)
955 my $metamode = "accumulate";
956
957 #=======================================================================#
958 # set_import_metadata [START]
959 #=======================================================================#
960 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
961 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
962 my $metadata_xml_file;
963 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
964 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
965
966 # This now stores the full pathname
967 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
968
969 # figure out correct metadata.xml file [?]
970 # Assuming the metadata.xml file is next to the source file
971 # Note: This will not work if it is using the inherited metadata from the parent folder
972 my ($import_tailname, $import_dirname)
973 = File::Basename::fileparse($import_filename);
974 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
975
976 # Shane's escape characters
977 $metavalue = pack "U0C*", unpack "C*", $metavalue;
978 $metavalue =~ s/\,/&#44;/g;
979 $metavalue =~ s/\:/&#58;/g;
980 $metavalue =~ s/\|/&#124;/g;
981 $metavalue =~ s/\(/&#40;/g;
982 $metavalue =~ s/\)/&#41;/g;
983 $metavalue =~ s/\[/&#91;/g;
984 $metavalue =~ s/\\/&#92;/g;
985 $metavalue =~ s/\]/&#93;/g;
986 $metavalue =~ s/\{/&#123;/g;
987 $metavalue =~ s/\}/&#125;/g;
988 $metavalue =~ s/\"/&#34;/g;
989 $metavalue =~ s/\`/&#96;/g;
990 $metavalue =~ s/\n/_newline_/g;
991
992 # Edit the metadata.xml
993 # Modified by Jeffrey from DL Consulting
994 # Handle the case where there is one metadata.xml file for multiple FileSets
995 # The XML filter needs to know whether it is in the right FileSet
996 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
997 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
998 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
999 $metaname, $metavalue, $metamode, $import_tailname);
1000 #=======================================================================#
1001 # set_import_metadata [END]
1002 #=======================================================================#
1003
1004
1005 #=======================================================================#
1006 # set_metadata (accumulate version) [START]
1007 #=======================================================================#
1008 # To people who know $collect_tail please add some comments
1009 # Obtain path to the database
1010 my $collect_tail = $collect;
1011 $collect_tail =~ s/^.*[\/\\]//;
1012 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1013 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1014
1015 # Read the docid entry
1016 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1017
1018 foreach my $k (keys %$doc_rec) {
1019 my @escaped_v = ();
1020 foreach my $v (@{$doc_rec->{$k}}) {
1021 if ($k eq "contains") {
1022 # protect quotes in ".2;".3 etc
1023 $v =~ s/\"/\\\"/g;
1024 push(@escaped_v, $v);
1025 }
1026 else {
1027 my $ev = &ghtml::unescape_html($v);
1028 $ev =~ s/\"/\\\"/g;
1029 push(@escaped_v, $ev);
1030 }
1031 }
1032 $doc_rec->{$k} = \@escaped_v;
1033 }
1034
1035 # Protect the quotes
1036 $metavalue =~ s/\"/\\\"/g;
1037
1038 # Adds the pending macro
1039 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
1040
1041 # If the metadata doesn't exist, create a new one
1042 if (!defined($doc_rec->{$metaname})){
1043 $doc_rec->{$metaname} = [ $macro_metavalue ];
1044 }
1045 # Else, let's acculumate the values
1046 else {
1047 push(@{$doc_rec->{$metaname}},$macro_metavalue);
1048 }
1049
1050 # Generate the record string
1051 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1052
1053 # Store it into GDBM
1054 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1055 my $status = system($cmd);
1056 if ($status != 0) {
1057 # Catch error if gdbmget failed
1058 my $mess = "Failed to set metadata key: $docid\n";
1059
1060 $mess .= "PATH: $ENV{'PATH'}\n";
1061 $mess .= "cmd = $cmd\n";
1062 $mess .= "Exit status: $status\n";
1063 $mess .= "System Error Message: $!\n";
1064
1065 $gsdl_cgi->generate_error($mess);
1066 }
1067 else {
1068 my $mess = "insert-metadata successful: Key[$docid]\n";
1069 $mess .= " [In metadata.xml] $metaname";
1070 $mess .= " = $metavalue\n";
1071 $mess .= " [In database] $metaname";
1072 $mess .= " = $macro_metavalue\n";
1073 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
1074 $gsdl_cgi->generate_ok_message($mess);
1075 }
1076 #=======================================================================#
1077 # set_metadata (accumulate version) [END]
1078 #=======================================================================#
1079
1080 # Release the lock once it is done
1081 $self->unlock_collection($username, $collect);
1082}
1083
10841;
Note: See TracBrowser for help on using the repository browser.