root/main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm @ 23766

Revision 23766, 41.2 KB (checked in by davidb, 8 years ago)

Setting of the collect directory changed to be compliant with Greenstone 3 and its 'site' variable

Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr   te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35
36BEGIN {
37#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
38    require XML::Rules;
39}
40
41
42@metadataaction::ISA = ('baseaction');
43
44
45my $action_table =
46{
47    "get-live-metadata"     => { 'compulsory-args' => [ "d", "metaname" ],
48                     'optional-args'   => [] },
49
50    "get-metadata"          => { 'compulsory-args' => [ "d", "metaname" ],
51                     'optional-args'   => [ "metapos" ] },
52
53    "set-live-metadata"     => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
54                     'optional-args'   => [ ] },
55
56    "set-metadata"          => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
57                     'optional-args'   => [ "metapos" ] },
58
59    "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
60                     'optional-args'   => [ "metapos", "metamode" ]
61                      # metamode can be "accumulate", "override",
62                },
63
64    "set-import-metadata"   => { 'compulsory-args' => [ "metaname", "metavalue" ],
65                     'optional-args'   => [ "d", "f", "metamode" ]
66                    # metamode can be "accumulate", "override", or "unique-id"
67                 },
68
69
70    "remove-live-metadata"  => { 'compulsory-args' => [ "d", "metaname" ],
71                     'optional-args'   => [ ] },
72
73    "remove-metadata"       => { 'compulsory-args' => [ "d", "metaname" ],
74                     'optional-args'   => [ "metapos" ] },
75
76    "insert-metadata"       => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
77                     'optional-args'   => [ ]
78                   }
79};
80
81
82sub new
83{
84    my $class = shift (@_);
85    my ($gsdl_cgi,$iis6_mode) = @_;
86
87    # Treat metavalue specially.  To transmit this through a GET request
88    # the Javascript side has url-encoded it, so here we need to decode
89    # it before proceeding
90
91    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
92    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
93
94    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
95
96    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
97
98    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
99
100    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
101
102    return bless $self, $class;
103}
104
105
106sub get_live_metadata
107{
108    my $self = shift @_;
109
110    my $username  = $self->{'username'};
111    my $collect   = $self->{'collect'};
112    my $gsdl_cgi  = $self->{'gsdl_cgi'};
113    my $gsdlhome  = $self->{'gsdlhome'};
114    my $infodbtype = $self->{'infodbtype'};
115   
116    # live metadata gets/saves value scoped (prefixed) by the current usename
117    # so (for now) let's not bother to enforce authentication
118
119    # Obtain the collect dir
120    my $site = $self->{'site'};
121    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
122    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
123
124    # Make sure the collection isn't locked by someone else
125    $self->lock_collection($username, $collect);
126
127    # look up additional args
128    my $docid  = $self->{'d'};
129    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
130       $gsdl_cgi->generate_error("No docid (d=...) specified.");
131    }
132
133    # Generate the dbkey
134    my $metaname  = $self->{'metaname'};
135    my $dbkey = "$docid.$metaname";
136
137    # To people who know $collect_tail please add some comments
138    # Obtain path to the database
139    my $collect_tail = $collect;
140    $collect_tail =~ s/^.*[\/|\\]//;
141    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
142    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
143   
144    # Obtain the content of the key
145    my $cmd = "gdbmget $infodb_file_path $dbkey";
146    if (open(GIN,"$cmd |") == 0) {
147        # Catch error if gdbmget failed
148    my $mess = "Failed to get metadata key: $metaname\n";
149    $mess .= "$!\n";
150
151    $gsdl_cgi->generate_error($mess);
152    }
153    else {
154    binmode(GIN,":utf8");
155        # Read everything in and concatenate them into $metavalue
156    my $metavalue = "";
157    my $line;
158    while (defined ($line=<GIN>)) {
159        $metavalue .= $line;
160    }
161    close(GIN);
162    chomp($metavalue); # Get rid off the tailing newlines
163    $gsdl_cgi->generate_ok_message("$metavalue");
164    }
165
166    # Release the lock once it is done
167    $self->unlock_collection($username, $collect);
168}
169
170
171sub get_metadata
172{
173    my $self = shift @_;
174
175    my $username  = $self->{'username'};
176    my $collect   = $self->{'collect'};
177    my $gsdl_cgi  = $self->{'gsdl_cgi'};
178    my $gsdlhome  = $self->{'gsdlhome'};
179
180    # Authenticate user if it is enabled
181    if ($baseaction::authentication_enabled) {
182    # Ensure the user is allowed to edit this collection
183    &authenticate_user($gsdl_cgi, $username, $collect);
184    }
185
186    # Obtain the collect dir
187    my $site = $self->{'site'};
188    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
189    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
190
191    # Make sure the collection isn't locked by someone else
192    $self->lock_collection($username, $collect);
193
194    # look up additional args
195    my $docid     = $self->{'d'};
196    my $metaname  = $self->{'metaname'};
197    my $metapos   = $self->{'metapos'};
198    my $infodbtype = $self->{'infodbtype'};
199
200    # To people who know $collect_tail please add some comments
201    # Obtain path to the database
202    my $collect_tail = $collect;
203    $collect_tail =~ s/^.*[\/\\]//;
204    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
205    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
206
207    # Read the docid entry
208    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
209 
210    # Basically loop through and unescape_html the values
211    foreach my $k (keys %$doc_rec) {
212    my @escaped_v = ();
213    foreach my $v (@{$doc_rec->{$k}}) {
214        my $ev = &ghtml::unescape_html($v);
215        push(@escaped_v, $ev);
216    }
217    $doc_rec->{$k} = \@escaped_v;
218    }
219
220    # Obtain the specified metadata value
221    $metapos = 0 if (!defined $metapos);
222    my $metavalue = $doc_rec->{$metaname}->[$metapos];
223    $gsdl_cgi->generate_ok_message("$metavalue");
224   
225    # Release the lock once it is done
226    $self->unlock_collection($username, $collect);
227}
228
229
230sub set_live_metadata
231{
232    my $self = shift @_;
233
234    my $username  = $self->{'username'};
235    my $collect   = $self->{'collect'};
236    my $gsdl_cgi  = $self->{'gsdl_cgi'};
237    my $gsdlhome  = $self->{'gsdlhome'};
238    my $infodbtype = $self->{'infodbtype'};
239 
240    if ($baseaction::authentication_enabled) {
241    # Ensure the user is allowed to edit this collection
242    &authenticate_user($gsdl_cgi, $username, $collect);
243    }
244
245    # Obtain the collect dir
246    my $site = $self->{'site'};
247    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
248    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
249
250    # Make sure the collection isn't locked by someone else
251    $self->lock_collection($username, $collect);
252
253    # look up additional args
254    my $docid     = $self->{'d'};
255    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
256      $gsdl_cgi->generate_error("No docid (d=...) specified.");
257    }
258    my $metavalue = $self->{'metavalue'};
259 
260
261    # Generate the dbkey   
262    my $metaname  = $self->{'metaname'};
263    my $dbkey = "$docid.$metaname";
264
265    # To people who know $collect_tail please add some comments
266    # Obtain path to the database
267    my $collect_tail = $collect;
268    $collect_tail =~ s/^.*[\/\\]//;
269    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
270    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
271
272    # Set the new value
273    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
274    my $status = system($cmd);
275    if ($status != 0) {
276        # Catch error if gdbmget failed
277    my $mess = "Failed to set metadata key: $dbkey\n";
278
279    $mess .= "PATH: $ENV{'PATH'}\n";
280    $mess .= "cmd = $cmd\n";
281    $mess .= "Exit status: $status\n";
282    $mess .= "System Error Message: $!\n";
283
284    $gsdl_cgi->generate_error($mess);
285    }
286    else {
287    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
288    }
289   
290    # Release the lock once it is done
291    $self->unlock_collection($username, $collect);
292}
293
294
295sub set_metadata
296{
297    my $self = shift @_;
298
299    my $username  = $self->{'username'};
300    my $collect   = $self->{'collect'};
301    my $gsdl_cgi  = $self->{'gsdl_cgi'};
302    my $gsdlhome  = $self->{'gsdlhome'};
303
304    if ($baseaction::authentication_enabled) {
305    # Ensure the user is allowed to edit this collection
306    &authenticate_user($gsdl_cgi, $username, $collect);
307    }
308
309    my $site = $self->{'site'};
310    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
311   
312    $gsdl_cgi->checked_chdir($collect_dir);
313
314    # Obtain the collect dir
315    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
316
317    # Make sure the collection isn't locked by someone else
318    $self->lock_collection($username, $collect);
319
320    # look up additional args
321    my $docid     = $self->{'d'};
322    my $metaname  = $self->{'metaname'};
323    my $metapos   = $self->{'metapos'};
324    my $metavalue = $self->{'metavalue'};
325    my $infodbtype = $self->{'infodbtype'};
326   
327    # To people who know $collect_tail please add some comments
328    # Obtain path to the database
329    my $collect_tail = $collect;
330    $collect_tail =~ s/^.*[\/\\]//;
331    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
332    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
333   
334    print STDERR "**** infodb file path = $infodb_file_path\n";
335    print STDERR "***** infodb type = $infodbtype\n";
336   
337    # Read the docid entry
338    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
339   
340    # Set the metadata value
341    if (defined $metapos) {
342    $doc_rec->{$metaname}->[$metapos] = $metavalue;
343    }
344    else {
345    $doc_rec->{$metaname} = [ $metavalue ];
346    }
347 
348    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
349    if ($status != 0) {
350        # Catch error if set infodb entry failed
351    my $mess = "Failed to set metadata key: $docid\n";
352   
353    $mess .= "PATH: $ENV{'PATH'}\n";
354    $mess .= "Exit status: $status\n";
355    $mess .= "System Error Message: $!\n";
356   
357    $gsdl_cgi->generate_error($mess);
358    }
359    else {
360    my $mess = "set-document-metadata successful: Key[$docid]\n";
361    $mess .= "  $metaname";
362    $mess .= "->[$metapos]" if (defined $metapos);
363    $mess .= " = $metavalue";
364   
365    $gsdl_cgi->generate_ok_message($mess);
366    }
367   
368    # Release the lock once it is done
369    $self->unlock_collection($username, $collect);
370}
371
372
373sub dxml_metadata
374{
375    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
376    my $metaname = $parser->{'parameters'}->{'metaname'};
377    my $metamode = $parser->{'parameters'}->{'metamode'};
378   
379    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
380   
381    # Find the right metadata tag and checks if we are going to
382    # override it
383    #
384    # Note: This over writes the first metadata block it
385    # encountered. If there are multiple Sections in the doc.xml, it
386    # might not behave as you would expect
387
388    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
389##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
390##    print STDERR "**** metamode = $metamode\n";
391   
392    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum)) {
393    my $name_attr = $attrHash->{'name'};
394    if (($name_attr eq $metaname) && ($metamode eq "override")) {
395##      print STDERR "**** got match!!\n";
396        # Get the value and override the current value
397        my $metavalue = $parser->{'parameters'}->{'metavalue'};
398        $attrHash->{'_content'} = $metavalue;
399       
400        # Don't want it to wipe out any other pieces of metadata
401        $parser->{'parameters'}->{'metamode'} = "done";
402    }
403    }
404
405    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
406    return [$tagname => $attrHash];
407}
408
409
410sub dxml_description
411{
412    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
413    my $metamode = $parser->{'parameters'}->{'metamode'};
414
415    # Accumulate the metadata
416    # NOTE: This appends new metadata element to all description fields.
417    # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
418    if (($metamode eq "accumulate") || ($metamode eq "override")) {
419    # If get to here and metamode is override, the this means there
420    # was no existing value to overide => treat as an append operation
421
422    # Tack a new metadata tag on to the end of the <Metadata>+ block
423    my $metaname = $parser->{'parameters'}->{'metaname'};
424    my $metavalue = $parser->{'parameters'}->{'metavalue'};
425   
426    my $metadata_attr = { '_content' => $metavalue,
427                  'name'     => $metaname,
428                  'mode'     => "accumulate" };
429
430    my $append_metadata = [ "Metadata" => $metadata_attr ];
431    my $description_content = $attrHash->{'_content'};
432
433##  print STDERR "**** appending to doc.xml\n";
434
435    push(@$description_content,"    ", $append_metadata ,"\n        ");
436    $parser->{'parameters'}->{'metamode'} = "done";
437    }
438
439
440    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
441    return [$tagname => $attrHash];
442}
443
444
445
446sub dxml_start_section
447{
448    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
449
450    my $new_depth = scalar(@$contextArray);
451
452    if ($new_depth == 1) {
453    $parser->{'parameters'}->{'curr_section_depth'} = 1;
454    $parser->{'parameters'}->{'curr_section_num'}   = "";
455    }
456
457    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
458    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
459
460    my $new_secnum;
461
462    if ($new_depth > $old_depth) {
463    # child subsection
464    $new_secnum = "$old_secnum.1";
465    }
466    elsif ($new_depth == $old_depth) {
467    # sibling section => increase it's value by 1
468    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
469    $tail_num++;
470    $new_secnum = $old_secnum;
471    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
472    }
473    else {
474    # back up to parent section => lopp off tail
475    $new_secnum = $old_secnum;
476    $new_secnum =~ s/\.\d+$//;
477    }
478
479    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
480    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
481
482    print STDERR "*** In Section: $new_secnum\n";
483}
484
485sub edit_xml_file
486{
487    my $self = shift @_;
488    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
489
490    # use XML::Rules to add it in (read in and out again)
491    my $parser = XML::Rules->new(start_rules     => $start_rules,
492                 rules           => $rules,
493                 style           => 'filter',
494                                 output_encoding => 'utf8' );
495
496    my $xml_in = "";
497    if (!open(MIN,"<$filename")) {
498    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
499    }
500    else {
501        # Read all the text in
502    my $line;
503    while (defined ($line=<MIN>)) {
504        $xml_in .= $line;
505    }
506    close(MIN);
507   
508    my $MOUT;   
509    if (!open($MOUT,">$filename")) {
510        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
511    }
512    else {
513        # Matched lines will get handled by the call backs
514##      my $xml_out = "";
515
516        binmode($MOUT,":utf8");
517        $parser->filter($xml_in,$MOUT, $options);
518
519#       binmode(MOUT,":utf8");
520#       print MOUT $xml_out;
521        close($MOUT);       
522    }
523    }
524}
525
526
527sub edit_doc_xml
528{
529    my $self = shift @_;
530    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum) = @_;
531
532    # To monitor which section/subsection number we are in
533    my @start_rules =
534    ( 'Section'    => \&dxml_start_section );
535
536    # use XML::Rules to add it in (read in and out again)
537    # Set the call back functions
538    my @rules =
539    ( _default => 'raw',
540      'Metadata'    => \&dxml_metadata,
541      'Description' => \&dxml_description);
542     
543    # Sets the parameters
544    my $options = { 'metaname'  => $metaname,
545            'metapos'   => $metapos,
546            'metavalue' => $metavalue,
547            'metamode'  => $metamode };
548           
549    if (defined $opt_secnum) {
550    $options->{'secnum'} = $opt_secnum;
551    }
552
553    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
554}
555
556
557sub set_archives_metadata
558{
559    my $self = shift @_;
560
561    my $username  = $self->{'username'};
562    my $collect   = $self->{'collect'};
563    my $gsdl_cgi  = $self->{'gsdl_cgi'};
564    my $gsdlhome  = $self->{'gsdlhome'};
565    my $infodbtype = $self->{'infodbtype'};
566   
567    if ($baseaction::authentication_enabled) {
568    # Ensure the user is allowed to edit this collection
569    $self->authenticate_user($username, $collect);
570    }
571
572    # Obtain the collect and archive dir   
573    my $site = $self->{'site'};
574    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
575    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
576   
577    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
578
579    # Make sure the collection isn't locked by someone else
580    $self->lock_collection($username, $collect);
581
582    # look up additional args
583    my $docid  = $self->{'d'};
584    my $metaname   = $self->{'metaname'};
585    my $metavalue  = $self->{'metavalue'};
586   
587    my $metapos    = $self->{'metapos'};
588    $metapos = 0 if (!defined $metapos);
589
590    my $metamode   = $self->{'metamode'};
591    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
592    # make "accumulate" the default (less destructive, as won't actually
593    # delete any existing values)
594    $metamode = "accumulate";
595    }
596   
597    # Obtain the doc.xml path for the specified docID
598    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
599
600    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
601    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
602    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
603   
604    # The $doc_xml_file is relative to the archives, and now let's get the full path
605    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
606    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
607   
608    # Edit the doc.xml file with the specified metadata name, value and position.
609    # TODO: there is a potential problem here as this edit_doc_xml function
610    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
611    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
612    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
613            $metaname,$metavalue,$metapos,$metamode,$docid_secnum);
614   
615    # Release the lock once it is done
616    $self->unlock_collection($username, $collect);
617
618    my $mess = "set-archives-metadata successful: Key[$docid]\n";
619    $mess .= "  $metaname";
620    $mess .= "->[$metapos]" if (defined $metapos);
621    $mess .= " = $metavalue";
622    $mess .= " ($metamode)\n";
623   
624    $gsdl_cgi->generate_ok_message($mess); 
625}
626
627
628sub mxml_metadata
629{
630    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
631    my $metaname = $parser->{'parameters'}->{'metaname'};
632    my $metamode = $parser->{'parameters'}->{'metamode'};
633
634    # Report error if we don't see FileName tag before this
635    die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
636   
637    # Don't do anything if we are not in the right FileSet
638    my $file_regexp = $parser->{'parameters'}->{'current_file'};
639    if ($file_regexp =~ /\.\*/) {
640    # Only interested in a file_regexp if it specifies precisely one
641    # file. 
642    # So, skip anything with a .* in it as it is too general
643    return [$tagname => $attrHash];
644    }
645    my $src_file = $parser->{'parameters'}->{'src_file'};
646    if (!($src_file =~ /$file_regexp/)) {
647    return [$tagname => $attrHash];
648    }
649##    print STDERR "*** mxl metamode = $metamode\n";
650
651    # Find the right metadata tag and checks if we are going to override it
652    my $name_attr = $attrHash->{'name'};
653    if (($name_attr eq $metaname) && ($metamode eq "override")) {
654        # Get the value and override the current value
655    my $metavalue = $parser->{'parameters'}->{'metavalue'};
656    $attrHash->{'_content'} = $metavalue;
657
658##  print STDERR "**** overrideing metadata.xml\n";
659
660    # Don't want it to wipe out any other pieces of metadata
661    $parser->{'parameters'}->{'metamode'} = "done";
662    }
663
664    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
665    return [$tagname => $attrHash];
666}
667
668
669sub mxml_description
670{
671    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
672    my $metamode = $parser->{'parameters'}->{'metamode'};   
673
674    # Failed... Report error if we don't see FileName tag before this
675    die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
676
677    # Don't do anything if we are not in the right FileSet
678    my $file_regexp = $parser->{'parameters'}->{'current_file'};
679    if ($file_regexp =~ /\.\*/) {
680    # Only interested in a file_regexp if it specifies precisely one
681    # file. 
682    # So, skip anything with a .* in it as it is too general
683    return [$tagname => $attrHash];
684    }
685    my $src_file = $parser->{'parameters'}->{'src_file'};
686    if (!($src_file =~ /$file_regexp/)) {
687    return [$tagname => $attrHash];
688    }
689
690    # Accumulate the metadata block to the end of the description block
691    # Note: This adds metadata block to all description blocks, so if there are
692    # multiple FileSets, it will add to all of them
693    if (($metamode eq "accumulate") || ($metamode eq "override")) {
694    # if metamode was "override" but get to here then it failed to
695    # find an item to override, in which case it should append its
696    # value to the end, just like the "accumulate" mode
697
698    # tack a new metadata tag on to the end of the <Metadata>+ block
699    my $metaname = $parser->{'parameters'}->{'metaname'};
700    my $metavalue = $parser->{'parameters'}->{'metavalue'};
701   
702    my $metadata_attr = { '_content' => $metavalue,
703                  'name'     => $metaname,
704                  'mode'     => "accumulate" };
705
706    my $append_metadata = [ "Metadata" => $metadata_attr ];
707    my $description_content = $attrHash->{'_content'};
708
709##  print STDERR "*** appending to metadata.xml\n";
710
711    # append the new metadata element to the end of the current
712    # content contained inside this tag
713    push(@$description_content,"    ", $append_metadata ,"\n        ");
714
715    $parser->{'parameters'}->{'metamode'} = "done";
716    }
717
718    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
719    return [$tagname => $attrHash];
720}
721
722
723sub mxml_filename
724{
725    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
726
727    # Store the filename of the Current Fileset
728    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
729    # FileName tag must come before Description tag
730    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
731
732    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
733    return [$tagname => $attrHash];
734}
735
736
737sub mxml_fileset
738{
739    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
740
741    # Initilise the current_file
742    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
743    # FileName tag must come before Description tag
744    $parser->{'parameters'}->{'current_file'} = "";
745
746    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
747    return [$tagname => $attrHash];
748}
749
750
751sub edit_metadata_xml
752{
753    my $self = shift @_;
754    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
755
756    # Set the call-back functions for the metadata tags
757    my @rules =
758    ( _default => 'raw',
759          'FileName' => \&mxml_filename,
760      'Metadata' => \&mxml_metadata,
761      'Description' => \&mxml_description,
762          'FileSet' => \&mxml_fileset);
763
764    # use XML::Rules to add it in (read in and out again)
765    my $parser = XML::Rules->new(rules => \@rules,
766                 style => 'filter',
767                                 output_encoding => 'utf8');
768
769    my $xml_in = "";
770    if (!open(MIN,"<$metadata_xml_filename")) {
771    $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
772    }
773    else {
774        # Read them in
775    my $line;
776    while (defined ($line=<MIN>)) {
777        $xml_in .= $line;
778    }
779    close(MIN);
780
781        # Filter with the call-back functions
782    my $xml_out = "";
783
784    my $MOUT;
785    if (!open($MOUT,">$metadata_xml_filename")) {
786        $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
787    }
788    else {
789        binmode($MOUT,":utf8");
790
791            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
792            # At the moment, I will just hack it!
793            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
794        #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
795            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
796        #print MOUT $xml_out;
797
798        $parser->filter($xml_in, $MOUT, { metaname => $metaname,
799                          metavalue => $metavalue,
800                          metamode => $metamode,
801                          src_file => $src_file,
802                          current_file => undef} );
803        close($MOUT);       
804    }
805    }
806}
807
808
809sub set_import_metadata
810{
811    my $self = shift @_;
812   
813    my $username  = $self->{'username'};
814    my $collect   = $self->{'collect'};
815    my $gsdl_cgi  = $self->{'gsdl_cgi'};
816    my $gsdlhome  = $self->{'gsdlhome'};
817    my $infodbtype = $self->{'infodbtype'};
818   
819    if ($baseaction::authentication_enabled) {
820    # Ensure the user is allowed to edit this collection
821    $self->authenticate_user($username, $collect);
822    }
823
824
825    # Obtain the collect and archive dir   
826    my $site = $self->{'site'};
827    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
828
829    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
830    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
831
832    # Make sure the collection isn't locked by someone else
833    $self->lock_collection($username, $collect);
834   
835    # look up additional args
836    # want either d= or f=
837    my $docid  = $self->{'d'};
838    my $import_file  = $self->{'f'};
839    if ((!defined $docid) && (!defined $import_file)) {
840    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
841    }
842
843    # Get the parameters and set default mode to "accumulate"
844    my $metaname   = $self->{'metaname'};
845    my $metavalue  = $self->{'metavalue'};
846##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
847    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
848    print STDERR "*** set import meta: val = $metavalue\n";
849   
850    my $metamode   = $self->{'metamode'};
851    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
852    # make "accumulate" the default (less destructive, as won't actually
853    # delete any existing values)
854    $metamode = "accumulate";
855    }
856
857    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
858    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
859    my $metadata_xml_file;
860    my $import_filename = undef;
861    if (defined $docid) {
862    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
863    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
864
865    # This now stores the full pathname
866    $import_filename = $doc_rec->{'src-file'}->[0];
867    }
868    else {
869        $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
870    }
871
872    # figure out correct metadata.xml file [?]
873    # Assuming the metadata.xml file is next to the source file
874    # Note: This will not work if it is using the inherited metadata from the parent folder
875    my ($import_tailname, $import_dirname)
876    = File::Basename::fileparse($import_filename);
877    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
878
879    # Edit the metadata.xml
880    # Modified by Jeffrey from DL Consulting
881    # Handle the case where there is one metadata.xml file for multiple FileSets
882    # The XML filter needs to know whether it is in the right FileSet
883    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
884    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
885    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
886                             $metaname, $metavalue, $metamode, $import_tailname);
887
888    # Release the lock once it is done
889    $self->unlock_collection($username, $collect);
890
891    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
892    $mess .= "  $metaname";
893    $mess .= " = $metavalue";
894    $mess .= " ($metamode)\n";
895   
896    $gsdl_cgi->generate_ok_message($mess);
897   
898}
899
900
901sub remove_live_metadata
902{
903    my $self = shift @_;
904
905    my $username  = $self->{'username'};
906    my $collect   = $self->{'collect'};
907    my $gsdl_cgi  = $self->{'gsdl_cgi'};
908    my $gsdlhome  = $self->{'gsdlhome'};
909    my $infodbtype = $self->{'infodbtype'};
910   
911    if ($baseaction::authentication_enabled) {
912    # Ensure the user is allowed to edit this collection
913    &authenticate_user($gsdl_cgi, $username, $collect);
914    }
915
916    # Obtain the collect dir
917    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
918    my $site = $self->{'site'};
919    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
920
921    # Make sure the collection isn't locked by someone else
922    $self->lock_collection($username, $collect);
923
924    # look up additional args
925    my $docid     = $self->{'d'};
926    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
927      $gsdl_cgi->generate_error("No docid (d=...) specified.");
928    }
929   
930    # Generate the dbkey
931    my $metaname  = $self->{'metaname'};
932    my $dbkey = "$docid.$metaname";
933
934    # To people who know $collect_tail please add some comments
935    # Obtain the live gdbm_db path
936    my $collect_tail = $collect;
937    $collect_tail =~ s/^.*[\/\\]//;
938    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
939    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
940
941    # Remove the key
942    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
943    my $status = system($cmd);
944    if ($status != 0) {
945        # Catch error if gdbmdel failed
946    my $mess = "Failed to set metadata key: $dbkey\n";
947   
948    $mess .= "PATH: $ENV{'PATH'}\n";
949    $mess .= "cmd = $cmd\n";
950    $mess .= "Exit status: $status\n";
951    $mess .= "System Error Message: $!\n";
952
953    $gsdl_cgi->generate_error($mess);
954    }
955    else {
956    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
957    }
958
959}
960
961
962sub remove_metadata
963{
964    my $self = shift @_;
965
966    my $username  = $self->{'username'};
967    my $collect   = $self->{'collect'};
968    my $gsdl_cgi  = $self->{'gsdl_cgi'};
969    my $gsdlhome  = $self->{'gsdlhome'};
970    my $infodbtype = $self->{'infodbtype'};
971   
972    if ($baseaction::authentication_enabled) {
973    # Ensure the user is allowed to edit this collection
974    &authenticate_user($gsdl_cgi, $username, $collect);
975    }
976
977    # Obtain the collect dir
978    my $site = $self->{'site'};
979    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
980    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
981
982    # Make sure the collection isn't locked by someone else
983    $self->lock_collection($username, $collect);
984
985    # look up additional args
986    my $docid     = $self->{'d'};
987    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
988      $gsdl_cgi->generate_error("No docid (d=...) specified.");
989    }
990    my $metaname  = $self->{'metaname'};
991    my $metapos   = $self->{'metapos'};
992
993    # To people who know $collect_tail please add some comments
994    # Obtain the path to the database
995    my $collect_tail = $collect;
996    $collect_tail =~ s/^.*[\/\\]//;
997    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
998    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
999
1000    # Read the docid entry
1001    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1002
1003    # Basically loop through and unescape_html the values
1004    foreach my $k (keys %$doc_rec) {
1005    my @escaped_v = ();
1006    foreach my $v (@{$doc_rec->{$k}}) {
1007        if ($k eq "contains") {
1008        # protect quotes in ".2;".3 etc
1009        $v =~ s/\"/\\\"/g;
1010        push(@escaped_v, $v);
1011        }
1012        else {
1013        my $ev = &ghtml::unescape_html($v);
1014        $ev =~ s/\"/\\\"/g;
1015        push(@escaped_v, $ev);
1016        }
1017    }
1018    $doc_rec->{$k} = \@escaped_v;
1019    }
1020
1021    # Check to make sure the key does exist
1022    if (!defined ($doc_rec->{$metaname})) {
1023        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1024    }
1025
1026    # Obtain the specified metadata pos
1027    $metapos = 0 if (!defined $metapos);
1028
1029    # consider check key is defined before deleting?
1030    # Loop through the metadata array and ignore the specified position
1031    my $filtered_metadata = [];
1032    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
1033    for (my $i=0; $i<$num_metadata_vals; $i++) {
1034    my $metavalue = shift(@{$doc_rec->{$metaname}});
1035
1036    if ($i != $metapos) {
1037        push(@$filtered_metadata,$metavalue)
1038    }
1039    }
1040    $doc_rec->{$metaname} = $filtered_metadata;
1041
1042    # Turn the record back to string
1043    my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1044
1045    # Store it back to the database
1046    my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1047    my $status = system($cmd);
1048    if ($status != 0) {
1049    my $mess = "Failed to set metadata key: $docid\n";
1050   
1051    $mess .= "PATH: $ENV{'PATH'}\n";
1052    $mess .= "cmd = $cmd\n";
1053    $mess .= "Exit status: $status\n";
1054    $mess .= "System Error Message: $!\n";
1055
1056    $gsdl_cgi->generate_error($mess);
1057    }
1058    else {
1059    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1060    $mess .= "  $metaname";
1061    $mess .= "->[$metapos]" if (defined $metapos);
1062
1063    $gsdl_cgi->generate_ok_message($mess);
1064    }
1065}
1066
1067
1068# Was trying to reused the codes, but the functions need to be broken
1069# down more before they can be reused, otherwise there will be too
1070# much overhead and duplicate process...
1071sub insert_metadata
1072{
1073    my $self = shift @_;
1074   
1075    my $username  = $self->{'username'};
1076    my $collect   = $self->{'collect'};
1077    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1078    my $gsdlhome  = $self->{'gsdlhome'};
1079    my $infodbtype = $self->{'infodbtype'};
1080   
1081    # If the import metadata and gdbm database have been updated, we
1082    # need to insert some notification to warn user that the the text
1083    # they see at the moment is not indexed and require a rebuild.
1084    my $rebuild_pending_macro = "_rebuildpendingmessage_";
1085
1086    if ($baseaction::authentication_enabled) {
1087    # Ensure the user is allowed to edit this collection
1088    $self->authenticate_user($username, $collect);
1089    }
1090
1091    # Obtain the collect and archive dir   
1092    my $site = $self->{'site'};
1093    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1094    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1095    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1096
1097    # Make sure the collection isn't locked by someone else
1098    $self->lock_collection($username, $collect);
1099   
1100    # Check additional args
1101    my $docid = $self->{'d'};
1102    if (!defined($docid)) {
1103    $gsdl_cgi->generate_error("No document id is specified: d=...");
1104    }
1105    my $metaname = $self->{'metaname'};
1106    if (!defined($metaname)) {
1107    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1108    }
1109    my $metavalue = $self->{'metavalue'};
1110    if (!defined($metavalue) || $metavalue eq "") {
1111    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
1112    }
1113    # make "accumulate" the default (less destructive, as won't actually
1114    # delete any existing values)
1115    my $metamode = "accumulate";
1116
1117    #=======================================================================#
1118    # set_import_metadata [START]
1119    #=======================================================================#
1120    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1121    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1122    my $metadata_xml_file;
1123    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1124    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1125   
1126    # This now stores the full pathname
1127    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
1128   
1129    # figure out correct metadata.xml file [?]
1130    # Assuming the metadata.xml file is next to the source file
1131    # Note: This will not work if it is using the inherited metadata from the parent folder
1132    my ($import_tailname, $import_dirname)
1133    = File::Basename::fileparse($import_filename);
1134    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1135
1136    # Shane's escape characters
1137    $metavalue = pack "U0C*", unpack "C*", $metavalue;
1138    $metavalue =~ s/\,/&#44;/g;
1139    $metavalue =~ s/\:/&#58;/g;
1140    $metavalue =~ s/\|/&#124;/g;
1141    $metavalue =~ s/\(/&#40;/g;
1142    $metavalue =~ s/\)/&#41;/g;
1143    $metavalue =~ s/\[/&#91;/g;
1144    $metavalue =~ s/\\/&#92;/g;
1145    $metavalue =~ s/\]/&#93;/g;
1146    $metavalue =~ s/\{/&#123;/g;
1147    $metavalue =~ s/\}/&#125;/g;
1148    $metavalue =~ s/\"/&#34;/g;
1149    $metavalue =~ s/\`/&#96;/g;
1150    $metavalue =~ s/\n/_newline_/g;
1151
1152    # Edit the metadata.xml
1153    # Modified by Jeffrey from DL Consulting
1154    # Handle the case where there is one metadata.xml file for multiple FileSets
1155    # The XML filter needs to know whether it is in the right FileSet
1156    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1157    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1158    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1159                             $metaname, $metavalue, $metamode, $import_tailname);
1160    #=======================================================================#
1161    # set_import_metadata [END]
1162    #=======================================================================#
1163
1164
1165    #=======================================================================#
1166    # set_metadata (accumulate version) [START]
1167    #=======================================================================#
1168    # To people who know $collect_tail please add some comments
1169    # Obtain path to the database
1170    my $collect_tail = $collect;
1171    $collect_tail =~ s/^.*[\/\\]//;
1172    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1173    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1174
1175    # Read the docid entry
1176    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1177   
1178    foreach my $k (keys %$doc_rec) {
1179    my @escaped_v = ();
1180    foreach my $v (@{$doc_rec->{$k}}) {
1181        if ($k eq "contains") {
1182        # protect quotes in ".2;".3 etc
1183        $v =~ s/\"/\\\"/g;
1184        push(@escaped_v, $v);
1185        }
1186        else {
1187        my $ev = &ghtml::unescape_html($v);
1188        $ev =~ s/\"/\\\"/g;
1189        push(@escaped_v, $ev);
1190        }
1191    }
1192    $doc_rec->{$k} = \@escaped_v;
1193    }
1194
1195    # Protect the quotes
1196    $metavalue =~ s/\"/\\\"/g;
1197
1198    # Adds the pending macro
1199    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
1200
1201    # If the metadata doesn't exist, create a new one
1202    if (!defined($doc_rec->{$metaname})){   
1203    $doc_rec->{$metaname} = [ $macro_metavalue ];
1204    }
1205    # Else, let's acculumate the values
1206    else {
1207        push(@{$doc_rec->{$metaname}},$macro_metavalue);
1208    }
1209
1210    # Generate the record string
1211    my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1212
1213    # Store it into GDBM
1214    my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1215    my $status = system($cmd);
1216    if ($status != 0) {
1217        # Catch error if gdbmget failed
1218    my $mess = "Failed to set metadata key: $docid\n";
1219   
1220    $mess .= "PATH: $ENV{'PATH'}\n";
1221    $mess .= "cmd = $cmd\n";
1222    $mess .= "Exit status: $status\n";
1223    $mess .= "System Error Message: $!\n";
1224
1225    $gsdl_cgi->generate_error($mess);
1226    }
1227    else {
1228    my $mess = "insert-metadata successful: Key[$docid]\n";
1229    $mess .= "  [In metadata.xml] $metaname";
1230    $mess .= " = $metavalue\n";
1231    $mess .= "  [In database] $metaname";
1232    $mess .= " = $macro_metavalue\n";
1233    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
1234        $gsdl_cgi->generate_ok_message($mess);
1235    }   
1236    #=======================================================================#
1237    # set_metadata (accumulate version) [END]
1238    #=======================================================================#
1239
1240    # Release the lock once it is done
1241    $self->unlock_collection($username, $collect);
1242}
1243
12441;
Note: See TracBrowser for help on using the browser.