root/main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm @ 24950

Revision 24950, 56.2 KB (checked in by sjm84, 8 years ago)

Fixed use of null instead of undef

Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr   te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40    require XML::Rules;
41}
42
43
44@metadataaction::ISA = ('baseaction');
45
46
47my $action_table =
48{
49    "get-live-metadata"     => { 'compulsory-args' => [ "d", "metaname" ],
50                     'optional-args'   => [] },
51
52    "get-metadata"          => { 'compulsory-args' => [ "d", "metaname" ],
53                     'optional-args'   => [ "metapos" ] },
54
55    "set-live-metadata"     => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
56                     'optional-args'   => [ ] },
57
58    "set-metadata"          => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
59                     'optional-args'   => [ "metapos" ] },
60
61    "set-metadata-array"     => { 'compulsory-args' => [ "json" ],
62                     'optional-args'   => [ ] },
63                     
64    "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
65                     'optional-args'   => [ "metapos", "metamode" ]
66                      # metamode can be "accumulate", "override",
67                },
68
69    "set-archives-metadata-array" => { 'compulsory-args' => [ "json" ],
70                     'optional-args'   => [ "metamode" ]
71                },
72               
73    "set-import-metadata"   => { 'compulsory-args' => [ "metaname", "metavalue" ],
74                     'optional-args'   => [ "d", "f", "metamode" ] # Need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
75                    # metamode can be "accumulate", "override", or "unique-id"
76                 },
77
78    "remove-import-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
79                     'optional-args'   => [ ] },
80                     
81    "remove-archives-metadata" => { 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
82                     'optional-args'   => [ "metapos", "metavalue" ] },
83
84    "remove-live-metadata"  => { 'compulsory-args' => [ "d", "metaname" ],
85                     'optional-args'   => [ ] },
86
87    "remove-metadata"       => { 'compulsory-args' => [ "d", "metaname" ],
88                     'optional-args'   => [ "metapos", "metavalue" ] },
89
90    "insert-metadata"       => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
91                     'optional-args'   => [ ]
92                   }
93};
94
95
96sub new
97{
98    my $class = shift (@_);
99    my ($gsdl_cgi,$iis6_mode) = @_;
100
101    # Treat metavalue specially.  To transmit this through a GET request
102    # the Javascript side has url-encoded it, so here we need to decode
103    # it before proceeding
104
105    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
106    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
107
108    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
109
110    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
111
112    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
113
114    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
115
116    return bless $self, $class;
117}
118
119
120sub get_live_metadata
121{
122    my $self = shift @_;
123
124    my $username  = $self->{'username'};
125    my $collect   = $self->{'collect'};
126    my $gsdl_cgi  = $self->{'gsdl_cgi'};
127    my $gsdlhome  = $self->{'gsdlhome'};
128    my $infodbtype = $self->{'infodbtype'};
129   
130    # live metadata gets/saves value scoped (prefixed) by the current usename
131    # so (for now) let's not bother to enforce authentication
132
133    # Obtain the collect dir
134    my $site = $self->{'site'};
135    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
136    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
137
138    # Make sure the collection isn't locked by someone else
139    $self->lock_collection($username, $collect);
140
141    # look up additional args
142    my $docid  = $self->{'d'};
143    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
144       $gsdl_cgi->generate_error("No docid (d=...) specified.");
145    }
146
147    # Generate the dbkey
148    my $metaname  = $self->{'metaname'};
149    my $dbkey = "$docid.$metaname";
150
151    # To people who know $collect_tail please add some comments
152    # Obtain path to the database
153    my $collect_tail = $collect;
154    $collect_tail =~ s/^.*[\/|\\]//;
155    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
156    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
157   
158    # Obtain the content of the key
159    my $cmd = "gdbmget $infodb_file_path $dbkey";
160    if (open(GIN,"$cmd |") == 0) {
161        # Catch error if gdbmget failed
162    my $mess = "Failed to get metadata key: $metaname\n";
163    $mess .= "$!\n";
164
165    $gsdl_cgi->generate_error($mess);
166    }
167    else {
168    binmode(GIN,":utf8");
169        # Read everything in and concatenate them into $metavalue
170    my $metavalue = "";
171    my $line;
172    while (defined ($line=<GIN>)) {
173        $metavalue .= $line;
174    }
175    close(GIN);
176    chomp($metavalue); # Get rid off the tailing newlines
177    $gsdl_cgi->generate_ok_message("$metavalue");
178    }
179
180    # Release the lock once it is done
181    $self->unlock_collection($username, $collect);
182}
183
184
185sub get_metadata
186{
187    my $self = shift @_;
188
189    my $username  = $self->{'username'};
190    my $collect   = $self->{'collect'};
191    my $gsdl_cgi  = $self->{'gsdl_cgi'};
192    my $gsdlhome  = $self->{'gsdlhome'};
193
194    # Authenticate user if it is enabled
195    if ($baseaction::authentication_enabled) {
196    # Ensure the user is allowed to edit this collection
197    &authenticate_user($gsdl_cgi, $username, $collect);
198    }
199
200    # Obtain the collect dir
201    my $site = $self->{'site'};
202    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
203    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
204
205    # Make sure the collection isn't locked by someone else
206    $self->lock_collection($username, $collect);
207
208    # look up additional args
209    my $docid     = $self->{'d'};
210    my $metaname  = $self->{'metaname'};
211    my $metapos   = $self->{'metapos'};
212    my $infodbtype = $self->{'infodbtype'};
213
214    # To people who know $collect_tail please add some comments
215    # Obtain path to the database
216    my $collect_tail = $collect;
217    $collect_tail =~ s/^.*[\/\\]//;
218    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
219    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
220
221    # Read the docid entry
222    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
223 
224    # Basically loop through and unescape_html the values
225    foreach my $k (keys %$doc_rec) {
226    my @escaped_v = ();
227    foreach my $v (@{$doc_rec->{$k}}) {
228        my $ev = &ghtml::unescape_html($v);
229        push(@escaped_v, $ev);
230    }
231    $doc_rec->{$k} = \@escaped_v;
232    }
233
234    # Obtain the specified metadata value
235    $metapos = 0 if (!defined $metapos);
236    my $metavalue = $doc_rec->{$metaname}->[$metapos];
237    $gsdl_cgi->generate_ok_message("$metavalue");
238   
239    # Release the lock once it is done
240    $self->unlock_collection($username, $collect);
241}
242
243
244sub set_live_metadata
245{
246    my $self = shift @_;
247
248    my $username  = $self->{'username'};
249    my $collect   = $self->{'collect'};
250    my $gsdl_cgi  = $self->{'gsdl_cgi'};
251    my $gsdlhome  = $self->{'gsdlhome'};
252    my $infodbtype = $self->{'infodbtype'};
253 
254    if ($baseaction::authentication_enabled) {
255    # Ensure the user is allowed to edit this collection
256    &authenticate_user($gsdl_cgi, $username, $collect);
257    }
258
259    # Obtain the collect dir
260    my $site = $self->{'site'};
261    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
262    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
263
264    # Make sure the collection isn't locked by someone else
265    $self->lock_collection($username, $collect);
266
267    # look up additional args
268    my $docid     = $self->{'d'};
269    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
270      $gsdl_cgi->generate_error("No docid (d=...) specified.");
271    }
272    my $metavalue = $self->{'metavalue'};
273 
274
275    # Generate the dbkey   
276    my $metaname  = $self->{'metaname'};
277    my $dbkey = "$docid.$metaname";
278
279    # To people who know $collect_tail please add some comments
280    # Obtain path to the database
281    my $collect_tail = $collect;
282    $collect_tail =~ s/^.*[\/\\]//;
283    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
284    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
285
286    # Set the new value
287    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
288    my $status = system($cmd);
289    if ($status != 0) {
290        # Catch error if gdbmget failed
291    my $mess = "Failed to set metadata key: $dbkey\n";
292
293    $mess .= "PATH: $ENV{'PATH'}\n";
294    $mess .= "cmd = $cmd\n";
295    $mess .= "Exit status: $status\n";
296    $mess .= "System Error Message: $!\n";
297
298    $gsdl_cgi->generate_error($mess);
299    }
300    else {
301    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
302    }
303   
304    # Release the lock once it is done
305    $self->unlock_collection($username, $collect);
306}
307
308sub set_metadata_entry
309{
310    my $self = shift @_;
311    my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
312   
313    # To people who know $collect_tail please add some comments
314    # Obtain path to the database
315    my $collect_tail = $collect;
316    $collect_tail =~ s/^.*[\/\\]//;
317    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
318    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
319   
320#   print STDERR "**** infodb file path = $infodb_file_path\n";
321#   print STDERR "***** infodb type = $infodbtype\n";
322   
323    # Read the docid entry
324    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
325   
326    # Set the metadata value
327    if (defined $metapos) {
328        $doc_rec->{$metaname}->[$metapos] = $metavalue;
329    }
330    else {
331        $doc_rec->{$metaname} = [ $metavalue ];
332    }
333 
334    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
335   
336    return $status;
337   
338}
339
340sub set_metadata
341{
342    my $self = shift @_;
343
344    my $username  = $self->{'username'};
345    my $collect   = $self->{'collect'};
346    my $gsdl_cgi  = $self->{'gsdl_cgi'};
347    my $gsdlhome  = $self->{'gsdlhome'};
348
349    if ($baseaction::authentication_enabled) {
350    # Ensure the user is allowed to edit this collection
351    &authenticate_user($gsdl_cgi, $username, $collect);
352    }
353
354    my $site = $self->{'site'};
355    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
356   
357    $gsdl_cgi->checked_chdir($collect_dir);
358
359    # Obtain the collect dir
360    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
361
362    # Make sure the collection isn't locked by someone else
363    $self->lock_collection($username, $collect);
364
365    # look up additional args
366    my $docid     = $self->{'d'};
367    my $metaname  = $self->{'metaname'};
368    my $metapos   = $self->{'metapos'};
369    my $metavalue = $self->{'metavalue'};
370    my $infodbtype = $self->{'infodbtype'};
371   
372    my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
373   
374    if ($status != 0) {
375        # Catch error if set infodb entry failed
376    my $mess = "Failed to set metadata key: $docid\n";
377   
378    $mess .= "PATH: $ENV{'PATH'}\n";
379    $mess .= "Exit status: $status\n";
380    $mess .= "System Error Message: $!\n";
381   
382    $gsdl_cgi->generate_error($mess);
383    }
384    else {
385    my $mess = "set-metadata successful: Key[$docid]\n";
386    $mess .= "  $metaname";
387    $mess .= "->[$metapos]" if (defined $metapos);
388    $mess .= " = $metavalue";
389   
390    $gsdl_cgi->generate_ok_message($mess);
391    }
392   
393    # Release the lock once it is done
394    $self->unlock_collection($username, $collect);
395}
396
397
398
399
400sub set_metadata_array
401{
402    my $self = shift @_;
403
404    my $username  = $self->{'username'};
405    my $collect   = $self->{'collect'};
406    my $gsdl_cgi  = $self->{'gsdl_cgi'};
407    my $gsdlhome  = $self->{'gsdlhome'};
408
409    if ($baseaction::authentication_enabled) {
410    # Ensure the user is allowed to edit this collection
411    &authenticate_user($gsdl_cgi, $username, $collect);
412    }
413
414    my $site = $self->{'site'};
415    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
416   
417    $gsdl_cgi->checked_chdir($collect_dir);
418
419    # Obtain the collect dir
420    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
421
422    # Make sure the collection isn't locked by someone else
423    $self->lock_collection($username, $collect);
424
425    # look up additional args
426   
427    my $infodbtype = $self->{'infodbtype'};
428   
429    my $json_str      = $self->{'json'};
430    my $doc_array = decode_json $json_str;
431   
432   
433    my $global_status = 0;
434    my $global_mess = "";
435   
436    my @all_docids = ();
437   
438    foreach my $doc_array_rec ( @$doc_array ) {
439       
440        my $docid     = $doc_array_rec->{'docid'};
441        my $metaname  = $doc_array_rec->{'metaname'};
442        my $metapos   = $doc_array_rec->{'metapos'};
443        my $metavalue = $doc_array_rec->{'metavalue'};
444       
445        push(@all_docids,$docid);
446       
447        my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
448       
449        if ($status != 0) {
450            # Catch error if set infodb entry failed
451            $global_status = $status;
452            $global_mess .= "Failed to set metadata key: $docid\n";
453            $global_mess .= "Exit status: $status\n";
454            $global_mess .= "System Error Message: $!\n";
455            $global_mess .= "-" x 20;
456        }
457    }
458
459    if ($global_status != 0) {
460        $global_mess .= "PATH: $ENV{'PATH'}\n";
461        $gsdl_cgi->generate_error($global_mess);
462    }
463    else {
464        my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
465        $gsdl_cgi->generate_ok_message($mess);
466    }
467   
468    # Release the lock once it is done
469    $self->unlock_collection($username, $collect);
470}
471
472
473
474sub dxml_metadata
475{
476    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
477    my $metaname = $parser->{'parameters'}->{'metaname'};
478    my $metamode = $parser->{'parameters'}->{'metamode'};
479   
480    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
481   
482    # Find the right metadata tag and checks if we are going to
483    # override it
484    #
485    # Note: This over writes the first metadata block it
486    # encountered. If there are multiple Sections in the doc.xml, it
487    # might not behave as you would expect
488
489    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
490##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
491##    print STDERR "**** metamode = $metamode\n";
492   
493    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum)) {
494    my $name_attr = $attrHash->{'name'};
495    if (($name_attr eq $metaname) && ($metamode eq "override")) {
496##      print STDERR "**** got match!!\n";
497        # Get the value and override the current value
498        my $metavalue = $parser->{'parameters'}->{'metavalue'};
499        $attrHash->{'_content'} = $metavalue;
500       
501        # Don't want it to wipe out any other pieces of metadata
502        $parser->{'parameters'}->{'metamode'} = "done";
503    }
504    }
505
506    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
507    return [$tagname => $attrHash];
508}
509
510
511sub dxml_description
512{
513    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
514    my $metamode = $parser->{'parameters'}->{'metamode'};
515
516    # Accumulate the metadata
517    # NOTE: This appends new metadata element to all description fields.
518    # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
519    if (($metamode eq "accumulate") || ($metamode eq "override")) {
520    # If get to here and metamode is override, the this means there
521    # was no existing value to overide => treat as an append operation
522
523    # Tack a new metadata tag on to the end of the <Metadata>+ block
524    my $metaname = $parser->{'parameters'}->{'metaname'};
525    my $metavalue = $parser->{'parameters'}->{'metavalue'};
526   
527    my $metadata_attr = { '_content' => $metavalue,
528                  'name'     => $metaname,
529                  'mode'     => "accumulate" };
530
531    my $append_metadata = [ "Metadata" => $metadata_attr ];
532    my $description_content = $attrHash->{'_content'};
533
534##  print STDERR "**** appending to doc.xml\n";
535
536    push(@$description_content,"    ", $append_metadata ,"\n        ");
537    $parser->{'parameters'}->{'metamode'} = "done";
538    }
539
540
541    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
542    return [$tagname => $attrHash];
543}
544
545
546
547sub dxml_start_section
548{
549    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
550
551    my $new_depth = scalar(@$contextArray);
552
553    if ($new_depth == 1) {
554    $parser->{'parameters'}->{'curr_section_depth'} = 1;
555    $parser->{'parameters'}->{'curr_section_num'}   = "";
556    }
557
558    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
559    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
560
561    my $new_secnum;
562
563    if ($new_depth > $old_depth) {
564    # child subsection
565    $new_secnum = "$old_secnum.1";
566    }
567    elsif ($new_depth == $old_depth) {
568    # sibling section => increase it's value by 1
569    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
570    $tail_num++;
571    $new_secnum = $old_secnum;
572    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
573    }
574    else {
575    # back up to parent section => lopp off tail
576    $new_secnum = $old_secnum;
577    $new_secnum =~ s/\.\d+$//;
578    }
579
580    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
581    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
582
583    print STDERR "*** In Section: $new_secnum\n";
584}
585
586sub edit_xml_file
587{
588    my $self = shift @_;
589    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
590
591    # use XML::Rules to add it in (read in and out again)
592    my $parser = XML::Rules->new(start_rules     => $start_rules,
593                 rules           => $rules,
594                 style           => 'filter',
595                                 output_encoding => 'utf8' );
596
597    my $xml_in = "";
598    if (!open(MIN,"<$filename")) {
599    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
600    }
601    else {
602        # Read all the text in
603    my $line;
604    while (defined ($line=<MIN>)) {
605        $xml_in .= $line;
606    }
607    close(MIN);
608   
609    my $MOUT;   
610    if (!open($MOUT,">$filename")) {
611        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
612    }
613    else {
614        # Matched lines will get handled by the call backs
615##      my $xml_out = "";
616
617        binmode($MOUT,":utf8");
618        $parser->filter($xml_in,$MOUT, $options);
619
620#       binmode(MOUT,":utf8");
621#       print MOUT $xml_out;
622        close($MOUT);       
623    }
624    }
625}
626
627
628sub edit_doc_xml
629{
630    my $self = shift @_;
631    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum) = @_;
632
633    # To monitor which section/subsection number we are in
634    my @start_rules =
635    ( 'Section'    => \&dxml_start_section );
636
637    # use XML::Rules to add it in (read in and out again)
638    # Set the call back functions
639    my @rules =
640    ( _default => 'raw',
641      'Metadata'    => \&dxml_metadata,
642      'Description' => \&dxml_description);
643     
644    # Sets the parameters
645    my $options = { 'metaname'  => $metaname,
646            'metapos'   => $metapos,
647            'metavalue' => $metavalue,
648            'metamode'  => $metamode };
649           
650    if (defined $opt_secnum) {
651    $options->{'secnum'} = $opt_secnum;
652    }
653
654    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
655}
656
657sub set_archives_metadata_entry
658{
659    my $self = shift @_;
660    my ($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
661   
662    # Obtain the doc.xml path for the specified docID
663    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
664
665    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
666    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
667    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
668   
669    # The $doc_xml_file is relative to the archives, and now let's get the full path
670    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
671    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
672   
673    # Edit the doc.xml file with the specified metadata name, value and position.
674    # TODO: there is a potential problem here as this edit_doc_xml function
675    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
676    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
677   
678    print STDERR "** away to call edit_doc_xml\n";
679   
680    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
681            $metaname,$metavalue,$metapos,$metamode,$docid_secnum);
682           
683    print STDERR "*** finished edit_doc_xml\n";
684   
685    return 0; # return 0 for now to indicate no error
686           
687}
688
689
690sub set_archives_metadata
691{
692    my $self = shift @_;
693
694    my $username  = $self->{'username'};
695    my $collect   = $self->{'collect'};
696    my $gsdl_cgi  = $self->{'gsdl_cgi'};
697    my $gsdlhome  = $self->{'gsdlhome'};
698    my $infodbtype = $self->{'infodbtype'};
699   
700    if ($baseaction::authentication_enabled) {
701        # Ensure the user is allowed to edit this collection
702        $self->authenticate_user($username, $collect);
703    }
704
705    my $site = $self->{'site'};
706       
707    # Obtain the collect and archive dir   
708    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
709   
710    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
711
712    # Make sure the collection isn't locked by someone else
713    $self->lock_collection($username, $collect);
714
715    # look up additional args
716    my $docid  = $self->{'d'};
717    my $metaname   = $self->{'metaname'};
718    my $metavalue  = $self->{'metavalue'};
719   
720    my $metapos    = $self->{'metapos'};
721    $metapos = 0 if (!defined $metapos);
722
723    my $metamode   = $self->{'metamode'};
724    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
725    # make "accumulate" the default (less destructive, as won't actually
726    # delete any existing values)
727    $metamode = "accumulate";
728    }
729   
730    my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
731                $metaname,$metapos,$metavalue,$metamode);
732   
733    # Release the lock once it is done
734    $self->unlock_collection($username, $collect);
735
736    if ($status == 0) {
737        my $mess = "set-archives-metadata successful: Key[$docid]\n";
738        $mess .= "  $metaname";
739        $mess .= "->[$metapos]" if (defined $metapos);
740        $mess .= " = $metavalue";
741        $mess .= " ($metamode)\n";
742   
743        $gsdl_cgi->generate_ok_message($mess); 
744    }
745    else {
746        my $mess .= "Failed to set archives metadata key: $docid\n";
747        $mess .= "Exit status: $status\n";
748        $mess .= "System Error Message: $!\n";
749        $mess .= "-" x 20 . "\n";
750       
751        $gsdl_cgi->generate_error($mess);
752    }
753}
754
755
756sub set_archives_metadata_array
757{
758    my $self = shift @_;
759
760    my $username  = $self->{'username'};
761    my $collect   = $self->{'collect'};
762    my $gsdl_cgi  = $self->{'gsdl_cgi'};
763    my $gsdlhome  = $self->{'gsdlhome'};
764
765    if ($baseaction::authentication_enabled) {
766    # Ensure the user is allowed to edit this collection
767    &authenticate_user($gsdl_cgi, $username, $collect);
768    }
769
770    my $site = $self->{'site'};
771    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
772   
773    $gsdl_cgi->checked_chdir($collect_dir);
774
775    # Obtain the collect dir
776    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
777
778    # Make sure the collection isn't locked by someone else
779    $self->lock_collection($username, $collect);
780
781    # look up additional args
782   
783    my $infodbtype = $self->{'infodbtype'};
784
785   my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
786   
787    my $json_str      = $self->{'json'};
788    my $doc_array = decode_json $json_str;
789   
790   
791    my $global_status = 0;
792    my $global_mess = "";
793   
794    my @all_docids = ();
795   
796    foreach my $doc_array_rec ( @$doc_array ) {
797       
798        my $docid     = $doc_array_rec->{'docid'};
799        my $metaname  = $doc_array_rec->{'metaname'};
800        my $metapos   = $doc_array_rec->{'metapos'};
801        my $metamode   = $self->{'metamode'};
802        my $metavalue = $doc_array_rec->{'metavalue'};
803       
804        # Some sanity checks
805        $metapos = 0 if (!defined $metapos);
806           
807        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
808            # make "accumulate" the default (less destructive, as won't actually
809            # delete any existing values)
810            $metamode = "accumulate";
811        }
812   
813        push(@all_docids,$docid);
814       
815        my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
816                $metaname,$metapos,$metavalue,$metamode);
817       
818        if ($status != 0) {
819            # Catch error if set infodb entry failed
820            $global_status = $status;
821            $global_mess .= "Failed to set metadata key: $docid\n";
822            $global_mess .= "Exit status: $status\n";
823            $global_mess .= "System Error Message: $!\n";
824            $global_mess .= "-" x 20 . "\n";
825        }
826    }
827
828    if ($global_status != 0) {
829        $global_mess .= "PATH: $ENV{'PATH'}\n";
830        $gsdl_cgi->generate_error($global_mess);
831    }
832    else {
833        my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
834        $gsdl_cgi->generate_ok_message($mess);
835    }
836   
837    # Release the lock once it is done
838    $self->unlock_collection($username, $collect);
839}
840
841sub remove_archives_metadata
842{
843    my $self = shift @_;
844
845    my $username  = $self->{'username'};
846    my $collect   = $self->{'collect'};
847    my $gsdl_cgi  = $self->{'gsdl_cgi'};
848    my $gsdlhome  = $self->{'gsdlhome'};
849    my $infodbtype = $self->{'infodbtype'};
850   
851    if ($baseaction::authentication_enabled)
852    {
853        # Ensure the user is allowed to edit this collection
854        &authenticate_user($gsdl_cgi, $username, $collect);
855    }
856   
857    my $site = $self->{'site'};
858       
859    # Obtain the collect and archive dir   
860    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
861   
862    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
863
864    # Make sure the collection isn't locked by someone else
865    $self->lock_collection($username, $collect);
866   
867    # look up additional args
868    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
869   
870    my $metaname = $self->{'metaname'};
871    my $metapos = $self->{'metapos'};
872    $metapos = 0 if (!defined $metapos);
873   
874    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
875    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
876
877    # This now stores the full pathname
878    my $import_filename = $doc_rec->{'doc-file'}->[0]; 
879
880    my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $import_filename), $metaname, $metapos, undef, $docid_secnum);
881   
882    # Release the lock once it is done
883    $self->unlock_collection($username, $collect);
884
885    if ($status == 0)
886    {
887        my $mess = "remove-archives-metadata successful: Key[$docid]\n";
888        $mess .= "  $metaname";
889        $mess .= "->[$metapos]" if (defined $metapos);
890
891        $gsdl_cgi->generate_ok_message($mess); 
892    }
893    else
894    {
895        my $mess .= "Failed to remove archives metadata key: $docid\n";
896        $mess .= "Exit status: $status\n";
897        $mess .= "System Error Message: $!\n";
898        $mess .= "-" x 20 . "\n";
899       
900        $gsdl_cgi->generate_error($mess);
901    }
902}
903
904sub remove_from_doc_xml
905{
906    my $self = shift @_;
907    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
908   
909    my @start_rules = ('Section' => \&dxml_start_section);
910   
911    # Set the call-back functions for the metadata tags
912    my @rules =
913    (
914        _default => 'raw',
915        'Metadata' => \&rfdxml_metadata
916    );
917       
918    my $parser = XML::Rules->new
919    (
920        start_rules => \@start_rules,
921        rules => \@rules,
922        style => 'filter',
923        output_encoding => 'utf8'
924    );
925   
926    my $status = 0;
927    my $xml_in = "";
928    if (!open(MIN,"<$doc_xml_filename"))
929    {
930        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
931        $status = 1;
932    }
933    else
934    {
935        # Read them in
936        my $line;
937        while (defined ($line=<MIN>)) {
938            $xml_in .= $line;
939        }
940        close(MIN);
941
942        # Filter with the call-back functions
943        my $xml_out = "";
944
945        my $MOUT;
946        if (!open($MOUT,">$doc_xml_filename")) {
947            $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
948            $status = 1;
949        }
950        else {
951            binmode($MOUT,":utf8");
952            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
953            close($MOUT);       
954        }
955    }
956    return $status;
957}
958
959sub rfdxml_metadata
960{
961    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
962
963    if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
964    {
965        # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
966        return [$tagname => $attrHash];
967    }
968
969    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
970    {
971        if (!defined $parser->{'parameters'}->{'poscount'})
972        {
973            $parser->{'parameters'}->{'poscount'} = 0;
974        }
975        else
976        {
977            $parser->{'parameters'}->{'poscount'}++;
978        }
979    }
980   
981    if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
982    {   
983        return [];
984    }
985   
986    if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
987    {   
988        return [];
989    }
990   
991    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
992    return [$tagname => $attrHash];
993}
994
995sub mxml_metadata
996{
997    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
998    my $metaname = $parser->{'parameters'}->{'metaname'};
999    my $metamode = $parser->{'parameters'}->{'metamode'};
1000
1001    # Report error if we don't see FileName tag before this
1002    die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1003   
1004    # Don't do anything if we are not in the right FileSet
1005    my $file_regexp = $parser->{'parameters'}->{'current_file'};
1006    if ($file_regexp =~ /\.\*/) {
1007    # Only interested in a file_regexp if it specifies precisely one
1008    # file. 
1009    # So, skip anything with a .* in it as it is too general
1010    return [$tagname => $attrHash];
1011    }
1012    my $src_file = $parser->{'parameters'}->{'src_file'};
1013    if (!($src_file =~ /$file_regexp/)) {
1014    return [$tagname => $attrHash];
1015    }
1016##    print STDERR "*** mxl metamode = $metamode\n";
1017
1018    # Find the right metadata tag and checks if we are going to override it
1019    my $name_attr = $attrHash->{'name'};
1020    if (($name_attr eq $metaname) && ($metamode eq "override")) {
1021        # Get the value and override the current value
1022    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1023    $attrHash->{'_content'} = $metavalue;
1024
1025##  print STDERR "**** overrideing metadata.xml\n";
1026
1027    # Don't want it to wipe out any other pieces of metadata
1028    $parser->{'parameters'}->{'metamode'} = "done";
1029    }
1030
1031    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1032    return [$tagname => $attrHash];
1033}
1034
1035
1036sub mxml_description
1037{
1038    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1039    my $metamode = $parser->{'parameters'}->{'metamode'};   
1040
1041    # Failed... Report error if we don't see FileName tag before this
1042    die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1043
1044    # Don't do anything if we are not in the right FileSet
1045    my $file_regexp = $parser->{'parameters'}->{'current_file'};
1046    if ($file_regexp =~ m/\.\*/) {
1047    # Only interested in a file_regexp if it specifies precisely one
1048    # file. 
1049    # So, skip anything with a .* in it as it is too general
1050    return [$tagname => $attrHash];
1051    }
1052    my $src_file = $parser->{'parameters'}->{'src_file'};
1053   
1054    if (!($src_file =~ m/$file_regexp/)) {
1055    return [$tagname => $attrHash];
1056    }
1057
1058    # Accumulate the metadata block to the end of the description block
1059    # Note: This adds metadata block to all description blocks, so if there are
1060    # multiple FileSets, it will add to all of them
1061    if (($metamode eq "accumulate") || ($metamode eq "override")) {
1062    # if metamode was "override" but get to here then it failed to
1063    # find an item to override, in which case it should append its
1064    # value to the end, just like the "accumulate" mode
1065
1066    # tack a new metadata tag on to the end of the <Metadata>+ block
1067    my $metaname = $parser->{'parameters'}->{'metaname'};
1068    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1069   
1070    my $metadata_attr = { '_content' => $metavalue,
1071                  'name'     => $metaname,
1072                  'mode'     => "accumulate" };
1073
1074    my $append_metadata = [ "Metadata" => $metadata_attr ];
1075    my $description_content = $attrHash->{'_content'};
1076   
1077##  print STDERR "*** appending to metadata.xml\n";
1078
1079    # append the new metadata element to the end of the current
1080    # content contained inside this tag
1081    if (ref($description_content) eq "") {
1082        # => string or numeric literal
1083        # this is caused by a <Description> block has no <Metadata> child elements
1084        # => set up an empty array in '_content'
1085        $attrHash->{'_content'} = [ "\n" ];
1086        $description_content = $attrHash->{'_content'};
1087    }
1088
1089    push(@$description_content,"    ", $append_metadata ,"\n        ");
1090    $parser->{'parameters'}->{'metamode'} = "done";
1091    }
1092
1093    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1094    return [$tagname => $attrHash];
1095}
1096
1097
1098sub mxml_filename
1099{
1100    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1101
1102    # Store the filename of the Current Fileset
1103    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1104    # FileName tag must come before Description tag
1105    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1106
1107    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1108    return [$tagname => $attrHash];
1109}
1110
1111
1112sub mxml_fileset
1113{
1114    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1115
1116    # Initilise the current_file
1117    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1118    # FileName tag must come before Description tag
1119    $parser->{'parameters'}->{'current_file'} = "";
1120
1121    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1122    return [$tagname => $attrHash];
1123}
1124
1125
1126sub edit_metadata_xml
1127{
1128    my $self = shift @_;
1129    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
1130
1131    # Set the call-back functions for the metadata tags
1132    my @rules =
1133    ( _default => 'raw',
1134          'FileName' => \&mxml_filename,
1135      'Metadata' => \&mxml_metadata,
1136      'Description' => \&mxml_description,
1137          'FileSet' => \&mxml_fileset);
1138
1139    # use XML::Rules to add it in (read in and out again)
1140    my $parser = XML::Rules->new(rules => \@rules,
1141                 style => 'filter',
1142                                 output_encoding => 'utf8');
1143
1144    if (!-e $metadata_xml_filename) {
1145   
1146        if (open(MOUT,">$metadata_xml_filename")) {
1147           
1148            my $src_file_re = &util::filename_to_regex($src_file);
1149            # shouldn't the following also be in the above utility routine??
1150            # $src_file_re =~ s/\./\\./g;
1151       
1152            print MOUT "<?xml version=\"1.0\"?>\n";
1153            print MOUT "<DirectoryMetadata>\n";
1154            print MOUT " <FileSet>\n";
1155            print MOUT "  <FileName>$src_file_re</FileName>\n";
1156            print MOUT "  <Description>\n";
1157            print MOUT "  </Description>\n";
1158            print MOUT " </FileSet>\n";
1159            print MOUT "</DirectoryMetadata>\n";
1160
1161            close(MOUT);
1162        }
1163        else {
1164            $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1165        }
1166    }
1167   
1168   
1169    my $xml_in = "";
1170    if (!open(MIN,"<$metadata_xml_filename")) {
1171        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1172    }
1173    else {
1174        # Read them in
1175        my $line;
1176        while (defined ($line=<MIN>)) {
1177            $xml_in .= $line;
1178        }
1179        close(MIN);
1180
1181        # Filter with the call-back functions
1182        my $xml_out = "";
1183
1184        my $MOUT;
1185        if (!open($MOUT,">$metadata_xml_filename")) {
1186            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1187        }
1188        else {
1189            binmode($MOUT,":utf8");
1190
1191            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1192            # At the moment, I will just hack it!
1193            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1194            #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1195            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1196            #print MOUT $xml_out;
1197
1198            $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1199                      metavalue => $metavalue,
1200                      metamode => $metamode,
1201                      src_file => $src_file,
1202                      current_file => undef} );
1203            close($MOUT);       
1204        }
1205   }
1206}
1207
1208
1209sub set_import_metadata
1210{
1211    my $self = shift @_;
1212   
1213    my $username  = $self->{'username'};
1214    my $collect   = $self->{'collect'};
1215    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1216    my $gsdlhome  = $self->{'gsdlhome'};
1217    my $infodbtype = $self->{'infodbtype'};
1218   
1219    if ($baseaction::authentication_enabled) {
1220    # Ensure the user is allowed to edit this collection
1221    $self->authenticate_user($username, $collect);
1222    }
1223
1224
1225    # Obtain the collect and archive dir   
1226    my $site = $self->{'site'};
1227    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1228
1229    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1230    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1231
1232    # Make sure the collection isn't locked by someone else
1233    $self->lock_collection($username, $collect);
1234   
1235    # look up additional args
1236    # want either d= or f=
1237    my $docid  = $self->{'d'};
1238    my $import_file  = $self->{'f'};
1239    if ((!defined $docid) && (!defined $import_file)) {
1240    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1241    }
1242
1243    # Get the parameters and set default mode to "accumulate"
1244    my $metaname   = $self->{'metaname'};
1245    my $metavalue  = $self->{'metavalue'};
1246##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1247    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1248    print STDERR "*** set import meta: val = $metavalue\n";
1249   
1250    my $metamode   = $self->{'metamode'};
1251    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1252    # make "accumulate" the default (less destructive, as won't actually
1253    # delete any existing values)
1254    $metamode = "accumulate";
1255    }
1256
1257    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1258    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1259    my $metadata_xml_file;
1260    my $import_filename = undef;
1261    if (defined $docid) {
1262    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1263    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1264
1265    # This now stores the full pathname
1266    $import_filename = $doc_rec->{'src-file'}->[0];
1267    }
1268    else {
1269        $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1270    }
1271
1272    # figure out correct metadata.xml file [?]
1273    # Assuming the metadata.xml file is next to the source file
1274    # Note: This will not work if it is using the inherited metadata from the parent folder
1275    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1276    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1277
1278    # Edit the metadata.xml
1279    # Modified by Jeffrey from DL Consulting
1280    # Handle the case where there is one metadata.xml file for multiple FileSets
1281    # The XML filter needs to know whether it is in the right FileSet
1282    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1283    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1284    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1285                             $metaname, $metavalue, $metamode, $import_tailname);
1286
1287    # Release the lock once it is done
1288    $self->unlock_collection($username, $collect);
1289
1290    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1291    $mess .= "  $metaname";
1292    $mess .= " = $metavalue";
1293    $mess .= " ($metamode)\n";
1294   
1295    $gsdl_cgi->generate_ok_message($mess);
1296   
1297}
1298
1299sub remove_import_metadata
1300{
1301    my $self = shift @_;
1302   
1303    my $username = $self->{'username'};
1304    my $collect   = $self->{'collect'};
1305    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1306   
1307    if ($baseaction::authentication_enabled) {
1308        # Ensure the user is allowed to edit this collection
1309        &authenticate_user($gsdl_cgi, $username, $collect);
1310    }
1311
1312    my $gsdlhome  = $self->{'gsdlhome'};
1313    my $infodbtype = $self->{'infodbtype'};
1314   
1315    # Obtain the collect dir
1316    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1317    my $site = $self->{'site'};
1318    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1319   
1320    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1321    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1322   
1323    # Make sure the collection isn't locked by someone else
1324    $self->lock_collection($username, $collect);
1325   
1326    # look up additional args
1327    my $docid = $self->{'d'};
1328    if ((!defined $docid) || ($docid =~ m/^\s*$/))
1329    {
1330        die "No docid (d=...) specified.\n";
1331    }
1332   
1333    my $metaname = $self->{'metaname'};
1334    my $metavalue = $self->{'metavalue'};
1335    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1336   
1337    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1338    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1339    my $metadata_xml_file;
1340    my $import_filename = undef;
1341    if (defined $docid)
1342    {
1343        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1344        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1345
1346        # This now stores the full pathname
1347        $import_filename = $doc_rec->{'src-file'}->[0];
1348    }
1349
1350    if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1351    {
1352        die "There is no metadata\n";
1353    }
1354   
1355    # figure out correct metadata.xml file [?]
1356    # Assuming the metadata.xml file is next to the source file
1357    # Note: This will not work if it is using the inherited metadata from the parent folder
1358    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1359    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1360   
1361    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1362   
1363    # Release the lock once it is done
1364    $self->unlock_collection($username, $collect);
1365
1366    my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1367    $mess .= "  $metaname";
1368    $mess .= " = $metavalue\n";
1369   
1370    $gsdl_cgi->generate_ok_message($mess);
1371}
1372
1373sub remove_from_metadata_xml
1374{
1375    my $self = shift @_;
1376    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
1377   
1378    # Set the call-back functions for the metadata tags
1379    my @rules =
1380    (
1381        _default => 'raw',
1382        'Metadata' => \&rfmxml_metadata,
1383        'FileName' => \&mxml_filename
1384    );
1385       
1386    my $parser = XML::Rules->new
1387    (
1388        rules => \@rules,
1389        style => 'filter',
1390        output_encoding => 'utf8'
1391    );
1392   
1393    my $xml_in = "";
1394    if (!open(MIN,"<$metadata_xml_filename"))
1395    {
1396        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1397    }
1398    else
1399    {
1400        # Read them in
1401        my $line;
1402        while (defined ($line=<MIN>)) {
1403            $xml_in .= $line;
1404        }
1405        close(MIN);
1406
1407        # Filter with the call-back functions
1408        my $xml_out = "";
1409
1410        my $MOUT;
1411        if (!open($MOUT,">$metadata_xml_filename")) {
1412            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1413        }
1414        else {
1415            binmode($MOUT,":utf8");
1416            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
1417            close($MOUT);       
1418        }
1419    }
1420}
1421
1422sub rfmxml_metadata
1423{
1424    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1425
1426    if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
1427    {
1428        return [];
1429    }
1430
1431    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1432    return [$tagname => $attrHash];
1433}
1434
1435sub remove_live_metadata
1436{
1437    my $self = shift @_;
1438
1439    my $username  = $self->{'username'};
1440    my $collect   = $self->{'collect'};
1441    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1442    my $gsdlhome  = $self->{'gsdlhome'};
1443    my $infodbtype = $self->{'infodbtype'};
1444   
1445    if ($baseaction::authentication_enabled) {
1446    # Ensure the user is allowed to edit this collection
1447    &authenticate_user($gsdl_cgi, $username, $collect);
1448    }
1449
1450    # Obtain the collect dir
1451    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1452    my $site = $self->{'site'};
1453    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1454
1455    # Make sure the collection isn't locked by someone else
1456    $self->lock_collection($username, $collect);
1457
1458    # look up additional args
1459    my $docid     = $self->{'d'};
1460    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1461      $gsdl_cgi->generate_error("No docid (d=...) specified.");
1462    }
1463   
1464    # Generate the dbkey
1465    my $metaname  = $self->{'metaname'};
1466    my $dbkey = "$docid.$metaname";
1467
1468    # To people who know $collect_tail please add some comments
1469    # Obtain the live gdbm_db path
1470    my $collect_tail = $collect;
1471    $collect_tail =~ s/^.*[\/\\]//;
1472    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1473    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1474
1475    # Remove the key
1476    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
1477    my $status = system($cmd);
1478    if ($status != 0) {
1479        # Catch error if gdbmdel failed
1480    my $mess = "Failed to set metadata key: $dbkey\n";
1481   
1482    $mess .= "PATH: $ENV{'PATH'}\n";
1483    $mess .= "cmd = $cmd\n";
1484    $mess .= "Exit status: $status\n";
1485    $mess .= "System Error Message: $!\n";
1486
1487    $gsdl_cgi->generate_error($mess);
1488    }
1489    else {
1490    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1491    }
1492
1493}
1494
1495
1496sub remove_metadata
1497{
1498    my $self = shift @_;
1499
1500    my $username  = $self->{'username'};
1501    my $collect   = $self->{'collect'};
1502    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1503    my $gsdlhome  = $self->{'gsdlhome'};
1504    my $infodbtype = $self->{'infodbtype'};
1505   
1506    if ($baseaction::authentication_enabled) {
1507    # Ensure the user is allowed to edit this collection
1508    &authenticate_user($gsdl_cgi, $username, $collect);
1509    }
1510
1511    # Obtain the collect dir
1512    my $site = $self->{'site'};
1513    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1514    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1515
1516    # Make sure the collection isn't locked by someone else
1517    $self->lock_collection($username, $collect);
1518
1519    # look up additional args
1520    my $docid     = $self->{'d'};
1521    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1522      $gsdl_cgi->generate_error("No docid (d=...) specified.");
1523    }
1524    my $metaname  = $self->{'metaname'};
1525    my $metapos   = $self->{'metapos'};
1526    my $metavalue = $self->{'metavalue'};
1527
1528    # To people who know $collect_tail please add some comments
1529    # Obtain the path to the database
1530    my $collect_tail = $collect;
1531    $collect_tail =~ s/^.*[\/\\]//;
1532    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1533    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1534
1535    # Read the docid entry
1536    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1537
1538    # Basically loop through and unescape_html the values
1539    foreach my $k (keys %$doc_rec) {
1540    my @escaped_v = ();
1541    foreach my $v (@{$doc_rec->{$k}}) {
1542        if ($k eq "contains") {
1543        # protect quotes in ".2;".3 etc
1544        $v =~ s/\"/\\\"/g;
1545        push(@escaped_v, $v);
1546        }
1547        else {
1548        my $ev = &ghtml::unescape_html($v);
1549        $ev =~ s/\"/\\\"/g;
1550        push(@escaped_v, $ev);
1551        }
1552    }
1553    $doc_rec->{$k} = \@escaped_v;
1554    }
1555
1556    # Check to make sure the key does exist
1557    if (!defined ($doc_rec->{$metaname})) {
1558        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1559    }
1560
1561    # Obtain the specified metadata pos
1562    $metapos = 0 if (!defined $metapos);
1563
1564    # consider check key is defined before deleting?
1565    # Loop through the metadata array and ignore the specified position
1566    my $filtered_metadata = [];
1567    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
1568    for (my $i=0; $i<$num_metadata_vals; $i++) {
1569    my $metaval = shift(@{$doc_rec->{$metaname}});
1570
1571    if (!defined $metavalue && $i != $metapos) {
1572        push(@$filtered_metadata,$metaval)
1573    }
1574   
1575    if(defined $metavalue && !($metavalue eq $metaval))
1576    {
1577        push(@$filtered_metadata,$metavalue)
1578    }
1579    }
1580    $doc_rec->{$metaname} = $filtered_metadata;
1581
1582    # Turn the record back to string
1583    my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1584
1585    # Store it back to the database
1586    my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1587    my $status = system($cmd);
1588    if ($status != 0) {
1589    my $mess = "Failed to set metadata key: $docid\n";
1590   
1591    $mess .= "PATH: $ENV{'PATH'}\n";
1592    $mess .= "cmd = $cmd\n";
1593    $mess .= "Exit status: $status\n";
1594    $mess .= "System Error Message: $!\n";
1595
1596    $gsdl_cgi->generate_error($mess);
1597    }
1598    else {
1599    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1600    $mess .= "  $metaname";
1601    $mess .= "->[$metapos]" if (defined $metapos);
1602
1603    $gsdl_cgi->generate_ok_message($mess);
1604    }
1605}
1606
1607
1608# Was trying to reused the codes, but the functions need to be broken
1609# down more before they can be reused, otherwise there will be too
1610# much overhead and duplicate process...
1611sub insert_metadata
1612{
1613    my $self = shift @_;
1614   
1615    my $username  = $self->{'username'};
1616    my $collect   = $self->{'collect'};
1617    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1618    my $gsdlhome  = $self->{'gsdlhome'};
1619    my $infodbtype = $self->{'infodbtype'};
1620   
1621    # If the import metadata and gdbm database have been updated, we
1622    # need to insert some notification to warn user that the the text
1623    # they see at the moment is not indexed and require a rebuild.
1624    my $rebuild_pending_macro = "_rebuildpendingmessage_";
1625
1626    if ($baseaction::authentication_enabled) {
1627    # Ensure the user is allowed to edit this collection
1628    $self->authenticate_user($username, $collect);
1629    }
1630
1631    # Obtain the collect and archive dir   
1632    my $site = $self->{'site'};
1633    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1634    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1635    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1636
1637    # Make sure the collection isn't locked by someone else
1638    $self->lock_collection($username, $collect);
1639   
1640    # Check additional args
1641    my $docid = $self->{'d'};
1642    if (!defined($docid)) {
1643    $gsdl_cgi->generate_error("No document id is specified: d=...");
1644    }
1645    my $metaname = $self->{'metaname'};
1646    if (!defined($metaname)) {
1647    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1648    }
1649    my $metavalue = $self->{'metavalue'};
1650    if (!defined($metavalue) || $metavalue eq "") {
1651    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
1652    }
1653    # make "accumulate" the default (less destructive, as won't actually
1654    # delete any existing values)
1655    my $metamode = "accumulate";
1656
1657    #=======================================================================#
1658    # set_import_metadata [START]
1659    #=======================================================================#
1660    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1661    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1662    my $metadata_xml_file;
1663    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1664    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1665   
1666    # This now stores the full pathname
1667    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
1668   
1669    # figure out correct metadata.xml file [?]
1670    # Assuming the metadata.xml file is next to the source file
1671    # Note: This will not work if it is using the inherited metadata from the parent folder
1672    my ($import_tailname, $import_dirname)
1673    = File::Basename::fileparse($import_filename);
1674    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1675
1676    # Shane's escape characters
1677    $metavalue = pack "U0C*", unpack "C*", $metavalue;
1678    $metavalue =~ s/\,/&#44;/g;
1679    $metavalue =~ s/\:/&#58;/g;
1680    $metavalue =~ s/\|/&#124;/g;
1681    $metavalue =~ s/\(/&#40;/g;
1682    $metavalue =~ s/\)/&#41;/g;
1683    $metavalue =~ s/\[/&#91;/g;
1684    $metavalue =~ s/\\/&#92;/g;
1685    $metavalue =~ s/\]/&#93;/g;
1686    $metavalue =~ s/\{/&#123;/g;
1687    $metavalue =~ s/\}/&#125;/g;
1688    $metavalue =~ s/\"/&#34;/g;
1689    $metavalue =~ s/\`/&#96;/g;
1690    $metavalue =~ s/\n/_newline_/g;
1691
1692    # Edit the metadata.xml
1693    # Modified by Jeffrey from DL Consulting
1694    # Handle the case where there is one metadata.xml file for multiple FileSets
1695    # The XML filter needs to know whether it is in the right FileSet
1696    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1697    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1698    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1699                             $metaname, $metavalue, $metamode, $import_tailname);
1700    #=======================================================================#
1701    # set_import_metadata [END]
1702    #=======================================================================#
1703
1704
1705    #=======================================================================#
1706    # set_metadata (accumulate version) [START]
1707    #=======================================================================#
1708    # To people who know $collect_tail please add some comments
1709    # Obtain path to the database
1710    my $collect_tail = $collect;
1711    $collect_tail =~ s/^.*[\/\\]//;
1712    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1713    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1714
1715    # Read the docid entry
1716    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1717   
1718    foreach my $k (keys %$doc_rec) {
1719    my @escaped_v = ();
1720    foreach my $v (@{$doc_rec->{$k}}) {
1721        if ($k eq "contains") {
1722        # protect quotes in ".2;".3 etc
1723        $v =~ s/\"/\\\"/g;
1724        push(@escaped_v, $v);
1725        }
1726        else {
1727        my $ev = &ghtml::unescape_html($v);
1728        $ev =~ s/\"/\\\"/g;
1729        push(@escaped_v, $ev);
1730        }
1731    }
1732    $doc_rec->{$k} = \@escaped_v;
1733    }
1734
1735    # Protect the quotes
1736    $metavalue =~ s/\"/\\\"/g;
1737
1738    # Adds the pending macro
1739    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
1740
1741    # If the metadata doesn't exist, create a new one
1742    if (!defined($doc_rec->{$metaname})){   
1743    $doc_rec->{$metaname} = [ $macro_metavalue ];
1744    }
1745    # Else, let's acculumate the values
1746    else {
1747        push(@{$doc_rec->{$metaname}},$macro_metavalue);
1748    }
1749
1750    # Generate the record string
1751    my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1752
1753    # Store it into GDBM
1754    my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1755    my $status = system($cmd);
1756    if ($status != 0) {
1757        # Catch error if gdbmget failed
1758    my $mess = "Failed to set metadata key: $docid\n";
1759   
1760    $mess .= "PATH: $ENV{'PATH'}\n";
1761    $mess .= "cmd = $cmd\n";
1762    $mess .= "Exit status: $status\n";
1763    $mess .= "System Error Message: $!\n";
1764
1765    $gsdl_cgi->generate_error($mess);
1766    }
1767    else {
1768    my $mess = "insert-metadata successful: Key[$docid]\n";
1769    $mess .= "  [In metadata.xml] $metaname";
1770    $mess .= " = $metavalue\n";
1771    $mess .= "  [In database] $metaname";
1772    $mess .= " = $macro_metavalue\n";
1773    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
1774        $gsdl_cgi->generate_ok_message($mess);
1775    }   
1776    #=======================================================================#
1777    # set_metadata (accumulate version) [END]
1778    #=======================================================================#
1779
1780    # Release the lock once it is done
1781    $self->unlock_collection($username, $collect);
1782}
1783
17841;
Note: See TracBrowser for help on using the browser.