root/main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm @ 23761

Revision 23761, 40.3 KB (checked in by davidb, 8 years ago)

General upgrading of the set metadata action to cover more cases (such as setting metadata values at the sub-section level). To ensure the output file correctly maintains it's 'UTF-8'-ness, I have had to change the code that explicity prints out the DOCTYPE tag -- the comment for this itself says this is a hack. Without the 'binmode(...)' then accented characters etc. will be incorrectly coded and the whole deck of cards comes crashing down. I noticed there is a new version of XML::Rule out, and so with luck this version has a better way to handle setting UTF-8 within its API, rather than resorting the the external 'binmode' now used. If so, then this would let us go back to printing out the DOCTYPE tag ... it might even be that this element can be more gracefully handled within the updated XML::Rule implementation.

Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr   te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35
36BEGIN {
37#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
38    require XML::Rules;
39}
40
41
42@metadataaction::ISA = ('baseaction');
43
44
45my $action_table =
46{
47    "get-live-metadata"     => { 'compulsory-args' => [ "d", "metaname" ],
48                     'optional-args'   => [] },
49
50    "get-metadata"          => { 'compulsory-args' => [ "d", "metaname" ],
51                     'optional-args'   => [ "metapos" ] },
52
53    "set-live-metadata"     => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
54                     'optional-args'   => [ ] },
55
56    "set-metadata"          => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
57                     'optional-args'   => [ "metapos" ] },
58
59    "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
60                     'optional-args'   => [ "metapos", "metamode" ]
61                      # metamode can be "accumulate", "override",
62                },
63
64    "set-import-metadata"   => { 'compulsory-args' => [ "metaname", "metavalue" ],
65                     'optional-args'   => [ "d", "f", "metamode" ]
66                    # metamode can be "accumulate", "override", or "unique-id"
67                 },
68
69
70    "remove-live-metadata"  => { 'compulsory-args' => [ "d", "metaname" ],
71                     'optional-args'   => [ ] },
72
73    "remove-metadata"       => { 'compulsory-args' => [ "d", "metaname" ],
74                     'optional-args'   => [ "metapos" ] },
75
76    "insert-metadata"       => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
77                     'optional-args'   => [ ]
78                   }
79};
80
81
82sub new
83{
84    my $class = shift (@_);
85    my ($gsdl_cgi,$iis6_mode) = @_;
86
87    # Treat metavalue specially.  To transmit this through a GET request
88    # the Javascript side has url-encoded it, so here we need to decode
89    # it before proceeding
90
91    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
92    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
93
94    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
95
96    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
97
98    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
99
100    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
101
102    return bless $self, $class;
103}
104
105
106sub get_live_metadata
107{
108    my $self = shift @_;
109
110    my $username  = $self->{'username'};
111    my $collect   = $self->{'collect'};
112    my $gsdl_cgi  = $self->{'gsdl_cgi'};
113    my $gsdlhome  = $self->{'gsdlhome'};
114    my $infodbtype = $self->{'infodbtype'};
115   
116    # live metadata gets/saves value scoped (prefixed) by the current usename
117    # so (for now) let's not bother to enforce authentication
118
119    # Obtain the collect dir
120    my $collect_dir = &util::filename_cat($gsdlhome, "collect");
121
122    # Make sure the collection isn't locked by someone else
123    $self->lock_collection($username, $collect);
124
125    # look up additional args
126    my $docid  = $self->{'d'};
127    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
128       $gsdl_cgi->generate_error("No docid (d=...) specified.");
129    }
130
131    # Generate the dbkey
132    my $metaname  = $self->{'metaname'};
133    my $dbkey = "$docid.$metaname";
134
135    # To people who know $collect_tail please add some comments
136    # Obtain path to the database
137    my $collect_tail = $collect;
138    $collect_tail =~ s/^.*[\/|\\]//;
139    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
140    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
141   
142    # Obtain the content of the key
143    my $cmd = "gdbmget $infodb_file_path $dbkey";
144    if (open(GIN,"$cmd |") == 0) {
145        # Catch error if gdbmget failed
146    my $mess = "Failed to get metadata key: $metaname\n";
147    $mess .= "$!\n";
148
149    $gsdl_cgi->generate_error($mess);
150    }
151    else {
152    binmode(GIN,":utf8");
153        # Read everything in and concatenate them into $metavalue
154    my $metavalue = "";
155    my $line;
156    while (defined ($line=<GIN>)) {
157        $metavalue .= $line;
158    }
159    close(GIN);
160    chomp($metavalue); # Get rid off the tailing newlines
161    $gsdl_cgi->generate_ok_message("$metavalue");
162    }
163
164    # Release the lock once it is done
165    $self->unlock_collection($username, $collect);
166}
167
168
169sub get_metadata
170{
171    my $self = shift @_;
172
173    my $username  = $self->{'username'};
174    my $collect   = $self->{'collect'};
175    my $gsdl_cgi  = $self->{'gsdl_cgi'};
176    my $gsdlhome  = $self->{'gsdlhome'};
177
178    # Authenticate user if it is enabled
179    if ($baseaction::authentication_enabled) {
180    # Ensure the user is allowed to edit this collection
181    &authenticate_user($gsdl_cgi, $username, $collect);
182    }
183
184    # Obtain the collect dir
185    my $collect_dir = &util::filename_cat($gsdlhome, "collect");
186
187    # Make sure the collection isn't locked by someone else
188    $self->lock_collection($username, $collect);
189
190    # look up additional args
191    my $docid     = $self->{'d'};
192    my $metaname  = $self->{'metaname'};
193    my $metapos   = $self->{'metapos'};
194    my $infodbtype = $self->{'infodbtype'};
195
196    # To people who know $collect_tail please add some comments
197    # Obtain path to the database
198    my $collect_tail = $collect;
199    $collect_tail =~ s/^.*[\/\\]//;
200    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
201    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
202
203    # Read the docid entry
204    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
205 
206    # Basically loop through and unescape_html the values
207    foreach my $k (keys %$doc_rec) {
208    my @escaped_v = ();
209    foreach my $v (@{$doc_rec->{$k}}) {
210        my $ev = &ghtml::unescape_html($v);
211        push(@escaped_v, $ev);
212    }
213    $doc_rec->{$k} = \@escaped_v;
214    }
215
216    # Obtain the specified metadata value
217    $metapos = 0 if (!defined $metapos);
218    my $metavalue = $doc_rec->{$metaname}->[$metapos];
219    $gsdl_cgi->generate_ok_message("$metavalue");
220   
221    # Release the lock once it is done
222    $self->unlock_collection($username, $collect);
223}
224
225
226sub set_live_metadata
227{
228    my $self = shift @_;
229
230    my $username  = $self->{'username'};
231    my $collect   = $self->{'collect'};
232    my $gsdl_cgi  = $self->{'gsdl_cgi'};
233    my $gsdlhome  = $self->{'gsdlhome'};
234    my $infodbtype = $self->{'infodbtype'};
235 
236    if ($baseaction::authentication_enabled) {
237    # Ensure the user is allowed to edit this collection
238    &authenticate_user($gsdl_cgi, $username, $collect);
239    }
240
241    # Obtain the collect dir
242    my $collect_dir = &util::filename_cat($gsdlhome, "collect");
243
244    # Make sure the collection isn't locked by someone else
245    $self->lock_collection($username, $collect);
246
247    # look up additional args
248    my $docid     = $self->{'d'};
249    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
250      $gsdl_cgi->generate_error("No docid (d=...) specified.");
251    }
252    my $metavalue = $self->{'metavalue'};
253 
254
255    # Generate the dbkey   
256    my $metaname  = $self->{'metaname'};
257    my $dbkey = "$docid.$metaname";
258
259    # To people who know $collect_tail please add some comments
260    # Obtain path to the database
261    my $collect_tail = $collect;
262    $collect_tail =~ s/^.*[\/\\]//;
263    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
264    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
265
266    # Set the new value
267    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
268    my $status = system($cmd);
269    if ($status != 0) {
270        # Catch error if gdbmget failed
271    my $mess = "Failed to set metadata key: $dbkey\n";
272
273    $mess .= "PATH: $ENV{'PATH'}\n";
274    $mess .= "cmd = $cmd\n";
275    $mess .= "Exit status: $status\n";
276    $mess .= "System Error Message: $!\n";
277
278    $gsdl_cgi->generate_error($mess);
279    }
280    else {
281    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
282    }
283   
284    # Release the lock once it is done
285    $self->unlock_collection($username, $collect);
286}
287
288
289sub set_metadata
290{
291    my $self = shift @_;
292
293    my $username  = $self->{'username'};
294    my $collect   = $self->{'collect'};
295    my $gsdl_cgi  = $self->{'gsdl_cgi'};
296    my $gsdlhome  = $self->{'gsdlhome'};
297
298    if ($baseaction::authentication_enabled) {
299    # Ensure the user is allowed to edit this collection
300    &authenticate_user($gsdl_cgi, $username, $collect);
301    }
302
303    # Obtain the collect dir
304    my $collect_dir = &util::filename_cat($gsdlhome, "collect");
305
306    # Make sure the collection isn't locked by someone else
307    $self->lock_collection($username, $collect);
308
309    # look up additional args
310    my $docid     = $self->{'d'};
311    my $metaname  = $self->{'metaname'};
312    my $metapos   = $self->{'metapos'};
313    my $metavalue = $self->{'metavalue'};
314    my $infodbtype = $self->{'infodbtype'};
315   
316    # To people who know $collect_tail please add some comments
317    # Obtain path to the database
318    my $collect_tail = $collect;
319    $collect_tail =~ s/^.*[\/\\]//;
320    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
321    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
322   
323    # Read the docid entry
324    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
325   
326    # Set the metadata value
327    if (defined $metapos) {
328    $doc_rec->{$metaname}->[$metapos] = $metavalue;
329    }
330    else {
331    $doc_rec->{$metaname} = [ $metavalue ];
332    }
333 
334    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
335    if ($status != 0) {
336        # Catch error if set infodb entry failed
337    my $mess = "Failed to set metadata key: $docid\n";
338   
339    $mess .= "PATH: $ENV{'PATH'}\n";
340    $mess .= "Exit status: $status\n";
341    $mess .= "System Error Message: $!\n";
342   
343    $gsdl_cgi->generate_error($mess);
344    }
345    else {
346    my $mess = "set-document-metadata successful: Key[$docid]\n";
347    $mess .= "  $metaname";
348    $mess .= "->[$metapos]" if (defined $metapos);
349    $mess .= " = $metavalue";
350   
351    $gsdl_cgi->generate_ok_message($mess);
352    }
353   
354    # Release the lock once it is done
355    $self->unlock_collection($username, $collect);
356}
357
358
359sub dxml_metadata
360{
361    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
362    my $metaname = $parser->{'parameters'}->{'metaname'};
363    my $metamode = $parser->{'parameters'}->{'metamode'};
364   
365    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
366   
367    # Find the right metadata tag and checks if we are going to
368    # override it
369    #
370    # Note: This over writes the first metadata block it
371    # encountered. If there are multiple Sections in the doc.xml, it
372    # might not behave as you would expect
373
374    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
375##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
376##    print STDERR "**** metamode = $metamode\n";
377   
378    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum)) {
379    my $name_attr = $attrHash->{'name'};
380    if (($name_attr eq $metaname) && ($metamode eq "override")) {
381##      print STDERR "**** got match!!\n";
382        # Get the value and override the current value
383        my $metavalue = $parser->{'parameters'}->{'metavalue'};
384        $attrHash->{'_content'} = $metavalue;
385       
386        # Don't want it to wipe out any other pieces of metadata
387        $parser->{'parameters'}->{'metamode'} = "done";
388    }
389    }
390
391    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
392    return [$tagname => $attrHash];
393}
394
395
396sub dxml_description
397{
398    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
399    my $metamode = $parser->{'parameters'}->{'metamode'};
400
401    # Accumulate the metadata
402    # NOTE: This appends new metadata element to all description fields.
403    # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
404    if (($metamode eq "accumulate") || ($metamode eq "override")) {
405    # If get to here and metamode is override, the this means there
406    # was no existing value to overide => treat as an append operation
407
408    # Tack a new metadata tag on to the end of the <Metadata>+ block
409    my $metaname = $parser->{'parameters'}->{'metaname'};
410    my $metavalue = $parser->{'parameters'}->{'metavalue'};
411   
412    my $metadata_attr = { '_content' => $metavalue,
413                  'name'     => $metaname,
414                  'mode'     => "accumulate" };
415
416    my $append_metadata = [ "Metadata" => $metadata_attr ];
417    my $description_content = $attrHash->{'_content'};
418
419##  print STDERR "**** appending to doc.xml\n";
420
421    push(@$description_content,"    ", $append_metadata ,"\n        ");
422    $parser->{'parameters'}->{'metamode'} = "done";
423    }
424
425
426    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
427    return [$tagname => $attrHash];
428}
429
430
431
432sub dxml_start_section
433{
434    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
435
436    my $new_depth = scalar(@$contextArray);
437
438    if ($new_depth == 1) {
439    $parser->{'parameters'}->{'curr_section_depth'} = 1;
440    $parser->{'parameters'}->{'curr_section_num'}   = "";
441    }
442
443    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
444    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
445
446    my $new_secnum;
447
448    if ($new_depth > $old_depth) {
449    # child subsection
450    $new_secnum = "$old_secnum.1";
451    }
452    elsif ($new_depth == $old_depth) {
453    # sibling section => increase it's value by 1
454    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
455    $tail_num++;
456    $new_secnum = $old_secnum;
457    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
458    }
459    else {
460    # back up to parent section => lopp off tail
461    $new_secnum = $old_secnum;
462    $new_secnum =~ s/\.\d+$//;
463    }
464
465    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
466    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
467
468    print STDERR "*** In Section: $new_secnum\n";
469}
470
471sub edit_xml_file
472{
473    my $self = shift @_;
474    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
475
476    # use XML::Rules to add it in (read in and out again)
477    my $parser = XML::Rules->new(start_rules     => $start_rules,
478                 rules           => $rules,
479                 style           => 'filter',
480                                 output_encoding => 'utf8' );
481
482    my $xml_in = "";
483    if (!open(MIN,"<$filename")) {
484    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
485    }
486    else {
487        # Read all the text in
488    my $line;
489    while (defined ($line=<MIN>)) {
490        $xml_in .= $line;
491    }
492    close(MIN);
493   
494    my $MOUT;   
495    if (!open($MOUT,">$filename")) {
496        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
497    }
498    else {
499        # Matched lines will get handled by the call backs
500##      my $xml_out = "";
501
502        binmode($MOUT,":utf8");
503        $parser->filter($xml_in,$MOUT, $options);
504
505#       binmode(MOUT,":utf8");
506#       print MOUT $xml_out;
507        close($MOUT);       
508    }
509    }
510}
511
512
513sub edit_doc_xml
514{
515    my $self = shift @_;
516    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum) = @_;
517
518    # To monitor which section/subsection number we are in
519    my @start_rules =
520    ( 'Section'    => \&dxml_start_section );
521
522    # use XML::Rules to add it in (read in and out again)
523    # Set the call back functions
524    my @rules =
525    ( _default => 'raw',
526      'Metadata'    => \&dxml_metadata,
527      'Description' => \&dxml_description);
528     
529    # Sets the parameters
530    my $options = { 'metaname'  => $metaname,
531            'metapos'   => $metapos,
532            'metavalue' => $metavalue,
533            'metamode'  => $metamode };
534           
535    if (defined $opt_secnum) {
536    $options->{'secnum'} = $opt_secnum;
537    }
538
539    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
540}
541
542
543sub set_archives_metadata
544{
545    my $self = shift @_;
546
547    my $username  = $self->{'username'};
548    my $collect   = $self->{'collect'};
549    my $gsdl_cgi  = $self->{'gsdl_cgi'};
550    my $gsdlhome  = $self->{'gsdlhome'};
551    my $infodbtype = $self->{'infodbtype'};
552   
553    if ($baseaction::authentication_enabled) {
554    # Ensure the user is allowed to edit this collection
555    $self->authenticate_user($username, $collect);
556    }
557
558    # Obtain the collect and archive dir   
559    my $collect_dir = &util::filename_cat($gsdlhome, "collect");
560    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
561
562    # Make sure the collection isn't locked by someone else
563    $self->lock_collection($username, $collect);
564
565    # look up additional args
566    my $docid  = $self->{'d'};
567    my $metaname   = $self->{'metaname'};
568    my $metavalue  = $self->{'metavalue'};
569   
570    my $metapos    = $self->{'metapos'};
571    $metapos = 0 if (!defined $metapos);
572
573    my $metamode   = $self->{'metamode'};
574    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
575    # make "accumulate" the default (less destructive, as won't actually
576    # delete any existing values)
577    $metamode = "accumulate";
578    }
579   
580    # Obtain the doc.xml path for the specified docID
581    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
582
583    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
584    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
585    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
586   
587    # The $doc_xml_file is relative to the archives, and now let's get the full path
588    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
589    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
590   
591    # Edit the doc.xml file with the specified metadata name, value and position.
592    # TODO: there is a potential problem here as this edit_doc_xml function
593    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
594    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
595    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
596            $metaname,$metavalue,$metapos,$metamode,$docid_secnum);
597   
598    # Release the lock once it is done
599    $self->unlock_collection($username, $collect);
600
601    my $mess = "set-archives-metadata successful: Key[$docid]\n";
602    $mess .= "  $metaname";
603    $mess .= "->[$metapos]" if (defined $metapos);
604    $mess .= " = $metavalue";
605    $mess .= " ($metamode)\n";
606   
607    $gsdl_cgi->generate_ok_message($mess); 
608}
609
610
611sub mxml_metadata
612{
613    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
614    my $metaname = $parser->{'parameters'}->{'metaname'};
615    my $metamode = $parser->{'parameters'}->{'metamode'};
616
617    # Report error if we don't see FileName tag before this
618    die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
619   
620    # Don't do anything if we are not in the right FileSet
621    my $file_regexp = $parser->{'parameters'}->{'current_file'};
622    if ($file_regexp =~ /\.\*/) {
623    # Only interested in a file_regexp if it specifies precisely one
624    # file. 
625    # So, skip anything with a .* in it as it is too general
626    return [$tagname => $attrHash];
627    }
628    my $src_file = $parser->{'parameters'}->{'src_file'};
629    if (!($src_file =~ /$file_regexp/)) {
630    return [$tagname => $attrHash];
631    }
632##    print STDERR "*** mxl metamode = $metamode\n";
633
634    # Find the right metadata tag and checks if we are going to override it
635    my $name_attr = $attrHash->{'name'};
636    if (($name_attr eq $metaname) && ($metamode eq "override")) {
637        # Get the value and override the current value
638    my $metavalue = $parser->{'parameters'}->{'metavalue'};
639    $attrHash->{'_content'} = $metavalue;
640
641##  print STDERR "**** overrideing metadata.xml\n";
642
643    # Don't want it to wipe out any other pieces of metadata
644    $parser->{'parameters'}->{'metamode'} = "done";
645    }
646
647    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
648    return [$tagname => $attrHash];
649}
650
651
652sub mxml_description
653{
654    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
655    my $metamode = $parser->{'parameters'}->{'metamode'};   
656
657    # Failed... Report error if we don't see FileName tag before this
658    die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
659
660    # Don't do anything if we are not in the right FileSet
661    my $file_regexp = $parser->{'parameters'}->{'current_file'};
662    if ($file_regexp =~ /\.\*/) {
663    # Only interested in a file_regexp if it specifies precisely one
664    # file. 
665    # So, skip anything with a .* in it as it is too general
666    return [$tagname => $attrHash];
667    }
668    my $src_file = $parser->{'parameters'}->{'src_file'};
669    if (!($src_file =~ /$file_regexp/)) {
670    return [$tagname => $attrHash];
671    }
672
673    # Accumulate the metadata block to the end of the description block
674    # Note: This adds metadata block to all description blocks, so if there are
675    # multiple FileSets, it will add to all of them
676    if (($metamode eq "accumulate") || ($metamode eq "override")) {
677    # if metamode was "override" but get to here then it failed to
678    # find an item to override, in which case it should append its
679    # value to the end, just like the "accumulate" mode
680
681    # tack a new metadata tag on to the end of the <Metadata>+ block
682    my $metaname = $parser->{'parameters'}->{'metaname'};
683    my $metavalue = $parser->{'parameters'}->{'metavalue'};
684   
685    my $metadata_attr = { '_content' => $metavalue,
686                  'name'     => $metaname,
687                  'mode'     => "accumulate" };
688
689    my $append_metadata = [ "Metadata" => $metadata_attr ];
690    my $description_content = $attrHash->{'_content'};
691
692##  print STDERR "*** appending to metadata.xml\n";
693
694    # append the new metadata element to the end of the current
695    # content contained inside this tag
696    push(@$description_content,"    ", $append_metadata ,"\n        ");
697
698    $parser->{'parameters'}->{'metamode'} = "done";
699    }
700
701    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
702    return [$tagname => $attrHash];
703}
704
705
706sub mxml_filename
707{
708    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
709
710    # Store the filename of the Current Fileset
711    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
712    # FileName tag must come before Description tag
713    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
714
715    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
716    return [$tagname => $attrHash];
717}
718
719
720sub mxml_fileset
721{
722    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
723
724    # Initilise the current_file
725    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
726    # FileName tag must come before Description tag
727    $parser->{'parameters'}->{'current_file'} = "";
728
729    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
730    return [$tagname => $attrHash];
731}
732
733
734sub edit_metadata_xml
735{
736    my $self = shift @_;
737    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
738
739    # Set the call-back functions for the metadata tags
740    my @rules =
741    ( _default => 'raw',
742          'FileName' => \&mxml_filename,
743      'Metadata' => \&mxml_metadata,
744      'Description' => \&mxml_description,
745          'FileSet' => \&mxml_fileset);
746
747    # use XML::Rules to add it in (read in and out again)
748    my $parser = XML::Rules->new(rules => \@rules,
749                 style => 'filter',
750                                 output_encoding => 'utf8');
751
752    my $xml_in = "";
753    if (!open(MIN,"<$metadata_xml_filename")) {
754    $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
755    }
756    else {
757        # Read them in
758    my $line;
759    while (defined ($line=<MIN>)) {
760        $xml_in .= $line;
761    }
762    close(MIN);
763
764        # Filter with the call-back functions
765    my $xml_out = "";
766
767    my $MOUT;
768    if (!open($MOUT,">$metadata_xml_filename")) {
769        $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
770    }
771    else {
772        binmode($MOUT,":utf8");
773
774            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
775            # At the moment, I will just hack it!
776            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
777        #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
778            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
779        #print MOUT $xml_out;
780
781        $parser->filter($xml_in, $MOUT, { metaname => $metaname,
782                          metavalue => $metavalue,
783                          metamode => $metamode,
784                          src_file => $src_file,
785                          current_file => undef} );
786        close($MOUT);       
787    }
788    }
789}
790
791
792sub set_import_metadata
793{
794    my $self = shift @_;
795   
796    my $username  = $self->{'username'};
797    my $collect   = $self->{'collect'};
798    my $gsdl_cgi  = $self->{'gsdl_cgi'};
799    my $gsdlhome  = $self->{'gsdlhome'};
800    my $infodbtype = $self->{'infodbtype'};
801   
802    if ($baseaction::authentication_enabled) {
803    # Ensure the user is allowed to edit this collection
804    $self->authenticate_user($username, $collect);
805    }
806
807
808    # Obtain the collect and archive dir   
809    my $collect_dir = &util::filename_cat($gsdlhome, "collect");
810    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
811
812    # Make sure the collection isn't locked by someone else
813    $self->lock_collection($username, $collect);
814   
815    # look up additional args
816    # want either d= or f=
817    my $docid  = $self->{'d'};
818    my $import_file  = $self->{'f'};
819    if ((!defined $docid) && (!defined $import_file)) {
820    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
821    }
822
823    # Get the parameters and set default mode to "accumulate"
824    my $metaname   = $self->{'metaname'};
825    my $metavalue  = $self->{'metavalue'};
826##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
827    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
828    print STDERR "*** set import meta: val = $metavalue\n";
829   
830    my $metamode   = $self->{'metamode'};
831    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
832    # make "accumulate" the default (less destructive, as won't actually
833    # delete any existing values)
834    $metamode = "accumulate";
835    }
836
837    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
838    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
839    my $metadata_xml_file;
840    my $import_filename = undef;
841    if (defined $docid) {
842    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
843    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
844
845    # This now stores the full pathname
846    $import_filename = $doc_rec->{'src-file'}->[0];
847    }
848    else {
849        $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
850    }
851
852    # figure out correct metadata.xml file [?]
853    # Assuming the metadata.xml file is next to the source file
854    # Note: This will not work if it is using the inherited metadata from the parent folder
855    my ($import_tailname, $import_dirname)
856    = File::Basename::fileparse($import_filename);
857    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
858
859    # Edit the metadata.xml
860    # Modified by Jeffrey from DL Consulting
861    # Handle the case where there is one metadata.xml file for multiple FileSets
862    # The XML filter needs to know whether it is in the right FileSet
863    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
864    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
865    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
866                             $metaname, $metavalue, $metamode, $import_tailname);
867
868    # Release the lock once it is done
869    $self->unlock_collection($username, $collect);
870
871    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
872    $mess .= "  $metaname";
873    $mess .= " = $metavalue";
874    $mess .= " ($metamode)\n";
875   
876    $gsdl_cgi->generate_ok_message($mess);
877   
878}
879
880
881sub remove_live_metadata
882{
883    my $self = shift @_;
884
885    my $username  = $self->{'username'};
886    my $collect   = $self->{'collect'};
887    my $gsdl_cgi  = $self->{'gsdl_cgi'};
888    my $gsdlhome  = $self->{'gsdlhome'};
889    my $infodbtype = $self->{'infodbtype'};
890   
891    if ($baseaction::authentication_enabled) {
892    # Ensure the user is allowed to edit this collection
893    &authenticate_user($gsdl_cgi, $username, $collect);
894    }
895
896    # Obtain the collect dir
897    my $collect_dir = &util::filename_cat($gsdlhome, "collect");
898
899    # Make sure the collection isn't locked by someone else
900    $self->lock_collection($username, $collect);
901
902    # look up additional args
903    my $docid     = $self->{'d'};
904    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
905      $gsdl_cgi->generate_error("No docid (d=...) specified.");
906    }
907   
908    # Generate the dbkey
909    my $metaname  = $self->{'metaname'};
910    my $dbkey = "$docid.$metaname";
911
912    # To people who know $collect_tail please add some comments
913    # Obtain the live gdbm_db path
914    my $collect_tail = $collect;
915    $collect_tail =~ s/^.*[\/\\]//;
916    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
917    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
918
919    # Remove the key
920    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
921    my $status = system($cmd);
922    if ($status != 0) {
923        # Catch error if gdbmdel failed
924    my $mess = "Failed to set metadata key: $dbkey\n";
925   
926    $mess .= "PATH: $ENV{'PATH'}\n";
927    $mess .= "cmd = $cmd\n";
928    $mess .= "Exit status: $status\n";
929    $mess .= "System Error Message: $!\n";
930
931    $gsdl_cgi->generate_error($mess);
932    }
933    else {
934    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
935    }
936
937}
938
939
940sub remove_metadata
941{
942    my $self = shift @_;
943
944    my $username  = $self->{'username'};
945    my $collect   = $self->{'collect'};
946    my $gsdl_cgi  = $self->{'gsdl_cgi'};
947    my $gsdlhome  = $self->{'gsdlhome'};
948    my $infodbtype = $self->{'infodbtype'};
949   
950    if ($baseaction::authentication_enabled) {
951    # Ensure the user is allowed to edit this collection
952    &authenticate_user($gsdl_cgi, $username, $collect);
953    }
954
955    # Obtain the collect dir
956    my $collect_dir = &util::filename_cat($gsdlhome, "collect");
957
958    # Make sure the collection isn't locked by someone else
959    $self->lock_collection($username, $collect);
960
961    # look up additional args
962    my $docid     = $self->{'d'};
963    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
964      $gsdl_cgi->generate_error("No docid (d=...) specified.");
965    }
966    my $metaname  = $self->{'metaname'};
967    my $metapos   = $self->{'metapos'};
968
969    # To people who know $collect_tail please add some comments
970    # Obtain the path to the database
971    my $collect_tail = $collect;
972    $collect_tail =~ s/^.*[\/\\]//;
973    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
974    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
975
976    # Read the docid entry
977    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
978
979    # Basically loop through and unescape_html the values
980    foreach my $k (keys %$doc_rec) {
981    my @escaped_v = ();
982    foreach my $v (@{$doc_rec->{$k}}) {
983        if ($k eq "contains") {
984        # protect quotes in ".2;".3 etc
985        $v =~ s/\"/\\\"/g;
986        push(@escaped_v, $v);
987        }
988        else {
989        my $ev = &ghtml::unescape_html($v);
990        $ev =~ s/\"/\\\"/g;
991        push(@escaped_v, $ev);
992        }
993    }
994    $doc_rec->{$k} = \@escaped_v;
995    }
996
997    # Check to make sure the key does exist
998    if (!defined ($doc_rec->{$metaname})) {
999        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1000    }
1001
1002    # Obtain the specified metadata pos
1003    $metapos = 0 if (!defined $metapos);
1004
1005    # consider check key is defined before deleting?
1006    # Loop through the metadata array and ignore the specified position
1007    my $filtered_metadata = [];
1008    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
1009    for (my $i=0; $i<$num_metadata_vals; $i++) {
1010    my $metavalue = shift(@{$doc_rec->{$metaname}});
1011
1012    if ($i != $metapos) {
1013        push(@$filtered_metadata,$metavalue)
1014    }
1015    }
1016    $doc_rec->{$metaname} = $filtered_metadata;
1017
1018    # Turn the record back to string
1019    my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1020
1021    # Store it back to the database
1022    my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1023    my $status = system($cmd);
1024    if ($status != 0) {
1025    my $mess = "Failed to set metadata key: $docid\n";
1026   
1027    $mess .= "PATH: $ENV{'PATH'}\n";
1028    $mess .= "cmd = $cmd\n";
1029    $mess .= "Exit status: $status\n";
1030    $mess .= "System Error Message: $!\n";
1031
1032    $gsdl_cgi->generate_error($mess);
1033    }
1034    else {
1035    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1036    $mess .= "  $metaname";
1037    $mess .= "->[$metapos]" if (defined $metapos);
1038
1039    $gsdl_cgi->generate_ok_message($mess);
1040    }
1041}
1042
1043
1044# Was trying to reused the codes, but the functions need to be broken
1045# down more before they can be reused, otherwise there will be too
1046# much overhead and duplicate process...
1047sub insert_metadata
1048{
1049    my $self = shift @_;
1050   
1051    my $username  = $self->{'username'};
1052    my $collect   = $self->{'collect'};
1053    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1054    my $gsdlhome  = $self->{'gsdlhome'};
1055    my $infodbtype = $self->{'infodbtype'};
1056   
1057    # If the import metadata and gdbm database have been updated, we
1058    # need to insert some notification to warn user that the the text
1059    # they see at the moment is not indexed and require a rebuild.
1060    my $rebuild_pending_macro = "_rebuildpendingmessage_";
1061
1062    if ($baseaction::authentication_enabled) {
1063    # Ensure the user is allowed to edit this collection
1064    $self->authenticate_user($username, $collect);
1065    }
1066
1067    # Obtain the collect and archive dir   
1068    my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1069    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1070
1071    # Make sure the collection isn't locked by someone else
1072    $self->lock_collection($username, $collect);
1073   
1074    # Check additional args
1075    my $docid = $self->{'d'};
1076    if (!defined($docid)) {
1077    $gsdl_cgi->generate_error("No document id is specified: d=...");
1078    }
1079    my $metaname = $self->{'metaname'};
1080    if (!defined($metaname)) {
1081    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1082    }
1083    my $metavalue = $self->{'metavalue'};
1084    if (!defined($metavalue) || $metavalue eq "") {
1085    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
1086    }
1087    # make "accumulate" the default (less destructive, as won't actually
1088    # delete any existing values)
1089    my $metamode = "accumulate";
1090
1091    #=======================================================================#
1092    # set_import_metadata [START]
1093    #=======================================================================#
1094    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1095    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1096    my $metadata_xml_file;
1097    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1098    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1099   
1100    # This now stores the full pathname
1101    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
1102   
1103    # figure out correct metadata.xml file [?]
1104    # Assuming the metadata.xml file is next to the source file
1105    # Note: This will not work if it is using the inherited metadata from the parent folder
1106    my ($import_tailname, $import_dirname)
1107    = File::Basename::fileparse($import_filename);
1108    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1109
1110    # Shane's escape characters
1111    $metavalue = pack "U0C*", unpack "C*", $metavalue;
1112    $metavalue =~ s/\,/&#44;/g;
1113    $metavalue =~ s/\:/&#58;/g;
1114    $metavalue =~ s/\|/&#124;/g;
1115    $metavalue =~ s/\(/&#40;/g;
1116    $metavalue =~ s/\)/&#41;/g;
1117    $metavalue =~ s/\[/&#91;/g;
1118    $metavalue =~ s/\\/&#92;/g;
1119    $metavalue =~ s/\]/&#93;/g;
1120    $metavalue =~ s/\{/&#123;/g;
1121    $metavalue =~ s/\}/&#125;/g;
1122    $metavalue =~ s/\"/&#34;/g;
1123    $metavalue =~ s/\`/&#96;/g;
1124    $metavalue =~ s/\n/_newline_/g;
1125
1126    # Edit the metadata.xml
1127    # Modified by Jeffrey from DL Consulting
1128    # Handle the case where there is one metadata.xml file for multiple FileSets
1129    # The XML filter needs to know whether it is in the right FileSet
1130    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1131    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1132    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1133                             $metaname, $metavalue, $metamode, $import_tailname);
1134    #=======================================================================#
1135    # set_import_metadata [END]
1136    #=======================================================================#
1137
1138
1139    #=======================================================================#
1140    # set_metadata (accumulate version) [START]
1141    #=======================================================================#
1142    # To people who know $collect_tail please add some comments
1143    # Obtain path to the database
1144    my $collect_tail = $collect;
1145    $collect_tail =~ s/^.*[\/\\]//;
1146    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1147    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1148
1149    # Read the docid entry
1150    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1151   
1152    foreach my $k (keys %$doc_rec) {
1153    my @escaped_v = ();
1154    foreach my $v (@{$doc_rec->{$k}}) {
1155        if ($k eq "contains") {
1156        # protect quotes in ".2;".3 etc
1157        $v =~ s/\"/\\\"/g;
1158        push(@escaped_v, $v);
1159        }
1160        else {
1161        my $ev = &ghtml::unescape_html($v);
1162        $ev =~ s/\"/\\\"/g;
1163        push(@escaped_v, $ev);
1164        }
1165    }
1166    $doc_rec->{$k} = \@escaped_v;
1167    }
1168
1169    # Protect the quotes
1170    $metavalue =~ s/\"/\\\"/g;
1171
1172    # Adds the pending macro
1173    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
1174
1175    # If the metadata doesn't exist, create a new one
1176    if (!defined($doc_rec->{$metaname})){   
1177    $doc_rec->{$metaname} = [ $macro_metavalue ];
1178    }
1179    # Else, let's acculumate the values
1180    else {
1181        push(@{$doc_rec->{$metaname}},$macro_metavalue);
1182    }
1183
1184    # Generate the record string
1185    my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1186
1187    # Store it into GDBM
1188    my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1189    my $status = system($cmd);
1190    if ($status != 0) {
1191        # Catch error if gdbmget failed
1192    my $mess = "Failed to set metadata key: $docid\n";
1193   
1194    $mess .= "PATH: $ENV{'PATH'}\n";
1195    $mess .= "cmd = $cmd\n";
1196    $mess .= "Exit status: $status\n";
1197    $mess .= "System Error Message: $!\n";
1198
1199    $gsdl_cgi->generate_error($mess);
1200    }
1201    else {
1202    my $mess = "insert-metadata successful: Key[$docid]\n";
1203    $mess .= "  [In metadata.xml] $metaname";
1204    $mess .= " = $metavalue\n";
1205    $mess .= "  [In database] $metaname";
1206    $mess .= " = $macro_metavalue\n";
1207    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
1208        $gsdl_cgi->generate_ok_message($mess);
1209    }   
1210    #=======================================================================#
1211    # set_metadata (accumulate version) [END]
1212    #=======================================================================#
1213
1214    # Release the lock once it is done
1215    $self->unlock_collection($username, $collect);
1216}
1217
12181;
Note: See TracBrowser for help on using the browser.