root/main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm @ 25097

Revision 25097, 65.7 KB (checked in by sjm84, 8 years ago)

More changes to metadataaction as well as sorting the keys when printing

RevLine 
[19293]1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr   te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
[21551]32use dbutil;
[19499]33use ghtml;
[19293]34
[24071]35use JSON;
[21563]36
[24071]37
[19293]38BEGIN {
[22331]39#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
[19293]40    require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
[25097]46{
47    #GET METHODS
48    "get-import-metadata" => {
49        'compulsory-args' => [ "d", "metaname" ],
50        'optional-args'   => [ ] },
[19499]51
[25097]52    "get-archives-metadata" => {
53        'compulsory-args' => [ "d", "metaname" ],
54        'optional-args'   => [ "metapos" ] },
55   
56    "get-metadata" => {
57        'compulsory-args' => [ "d", "metaname" ],
58        'optional-args'   => [ "metapos" ] },
[19499]59
[25097]60    "get-live-metadata" => {
61        'compulsory-args' => [ "d", "metaname" ],
62        'optional-args'   => [ ] },
[19499]63
[25097]64    #SET METHODS
65    "set-live-metadata" => {
66        'compulsory-args' => [ "d", "metaname", "metavalue" ],
67        'optional-args'   => [ ] },
[19499]68
[25097]69    "set-metadata" => {
70        'compulsory-args' => [ "d", "metaname", "metavalue" ],
71        'optional-args'   => [ "metapos" ] },
[19499]72
[25097]73    "set-archives-metadata" => {
74        'compulsory-args' => [ "d", "metaname", "metavalue" ],
75        'optional-args'   => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
76   
77    "set-import-metadata" => {
78        'compulsory-args' => [ "metaname", "metavalue" ],
79        'optional-args'   => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
80                 
81    #SET METHODS (ARRAY)
82    "set-metadata-array" => {
83        'compulsory-args' => [ "json" ],
84        'optional-args'   => [ ] },
[24943]85                     
[25097]86    "set-archives-metadata-array" => {
87        'compulsory-args' => [ "json" ],
88        'optional-args'   => [ ] },
89       
90    "set-import-metadata-array" => {
91        'compulsory-args' => [ "json" ],
92        'optional-args'   => [ ] },
93       
94    #REMOVE METHODS
95    "remove-import-metadata" => {
96        'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
97        'optional-args'   => [ ] },
98                     
99    "remove-archives-metadata" => {
100        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
101        'optional-args'   => [ "metapos", "metavalue" ] },
[20538]102
[25097]103    "remove-live-metadata" => {
104        'compulsory-args' => [ "d", "metaname" ],
105        'optional-args'   => [ ] },
[20538]106
[25097]107    "remove-metadata" => {
108        'compulsory-args' => [ "d", "metaname" ],
109        'optional-args'   => [ "metapos", "metavalue" ] },
[21716]110
[25097]111    #INSERT METHODS
112    "insert-metadata" => {
113        'compulsory-args' => [ "d", "metaname", "metavalue" ],
114        'optional-args'   => [ ] }
[19293]115};
116
117
118sub new
119{
120    my $class = shift (@_);
121    my ($gsdl_cgi,$iis6_mode) = @_;
122
[23761]123    # Treat metavalue specially.  To transmit this through a GET request
124    # the Javascript side has url-encoded it, so here we need to decode
125    # it before proceeding
126
127    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
128    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
129
130    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
131
132    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
133
134    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
135
[19293]136    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
137
138    return bless $self, $class;
139}
140
141
142sub get_live_metadata
143{
144    my $self = shift @_;
145
146    my $username  = $self->{'username'};
147    my $collect   = $self->{'collect'};
148    my $gsdl_cgi  = $self->{'gsdl_cgi'};
149    my $gsdlhome  = $self->{'gsdlhome'};
[23478]150    my $infodbtype = $self->{'infodbtype'};
[23400]151   
[23447]152    # live metadata gets/saves value scoped (prefixed) by the current usename
[23761]153    # so (for now) let's not bother to enforce authentication
[21715]154
155    # Obtain the collect dir
[23766]156    my $site = $self->{'site'};
157    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
158    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]159
160    # Make sure the collection isn't locked by someone else
161    $self->lock_collection($username, $collect);
162
163    # look up additional args
164    my $docid  = $self->{'d'};
165    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[21715]166       $gsdl_cgi->generate_error("No docid (d=...) specified.");
[19293]167    }
168
[21715]169    # Generate the dbkey
[19293]170    my $metaname  = $self->{'metaname'};
171    my $dbkey = "$docid.$metaname";
172
[21715]173    # To people who know $collect_tail please add some comments
174    # Obtain path to the database
[19293]175    my $collect_tail = $collect;
176    $collect_tail =~ s/^.*[\/|\\]//;
[21564]177    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]178    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[21715]179   
180    # Obtain the content of the key
[21569]181    my $cmd = "gdbmget $infodb_file_path $dbkey";
[19293]182    if (open(GIN,"$cmd |") == 0) {
[21715]183        # Catch error if gdbmget failed
[19293]184    my $mess = "Failed to get metadata key: $metaname\n";
185    $mess .= "$!\n";
186
187    $gsdl_cgi->generate_error($mess);
188    }
189    else {
[23761]190    binmode(GIN,":utf8");
[21715]191        # Read everything in and concatenate them into $metavalue
[19293]192    my $metavalue = "";
193    my $line;
194    while (defined ($line=<GIN>)) {
195        $metavalue .= $line;
196    }
197    close(GIN);
[21715]198    chomp($metavalue); # Get rid off the tailing newlines
[19293]199    $gsdl_cgi->generate_ok_message("$metavalue");
200    }
[21715]201
202    # Release the lock once it is done
203    $self->unlock_collection($username, $collect);
[19499]204}
[19293]205
206
[19499]207sub get_metadata
208{
209    my $self = shift @_;
210
211    my $username  = $self->{'username'};
212    my $collect   = $self->{'collect'};
213    my $gsdl_cgi  = $self->{'gsdl_cgi'};
214    my $gsdlhome  = $self->{'gsdlhome'};
215
[21715]216    # Authenticate user if it is enabled
[19499]217    if ($baseaction::authentication_enabled) {
218    # Ensure the user is allowed to edit this collection
219    &authenticate_user($gsdl_cgi, $username, $collect);
220    }
221
[21715]222    # Obtain the collect dir
[23766]223    my $site = $self->{'site'};
224    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
225    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]226
227    # Make sure the collection isn't locked by someone else
228    $self->lock_collection($username, $collect);
229
230    # look up additional args
231    my $docid     = $self->{'d'};
232    my $metaname  = $self->{'metaname'};
233    my $metapos   = $self->{'metapos'};
[23400]234    my $infodbtype = $self->{'infodbtype'};
[19499]235
[21715]236    # To people who know $collect_tail please add some comments
237    # Obtain path to the database
[19499]238    my $collect_tail = $collect;
239    $collect_tail =~ s/^.*[\/\\]//;
[21564]240    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]241    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]242
243    # Read the docid entry
[23400]244    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
245 
[21715]246    # Basically loop through and unescape_html the values
[19499]247    foreach my $k (keys %$doc_rec) {
248    my @escaped_v = ();
249    foreach my $v (@{$doc_rec->{$k}}) {
250        my $ev = &ghtml::unescape_html($v);
251        push(@escaped_v, $ev);
252    }
253    $doc_rec->{$k} = \@escaped_v;
254    }
255
[21715]256    # Obtain the specified metadata value
[19499]257    $metapos = 0 if (!defined $metapos);
258    my $metavalue = $doc_rec->{$metaname}->[$metapos];
259    $gsdl_cgi->generate_ok_message("$metavalue");
[21715]260   
261    # Release the lock once it is done
262    $self->unlock_collection($username, $collect);
[19293]263}
264
265
[25097]266sub get_import_metadata
267{
268    my $self = shift @_;
269
270    my $username  = $self->{'username'};
271    my $collect   = $self->{'collect'};
272    my $gsdl_cgi  = $self->{'gsdl_cgi'};
273    my $gsdlhome  = $self->{'gsdlhome'};
274
275    # Authenticate user if it is enabled
276    if ($baseaction::authentication_enabled) {
277        # Ensure the user is allowed to edit this collection
278        &authenticate_user($gsdl_cgi, $username, $collect);
279    }
280
281    # Obtain the collect dir
282    my $site = $self->{'site'};
283    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
284    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
285
286    # Make sure the collection isn't locked by someone else
287    $self->lock_collection($username, $collect);
288
289    # look up additional args
290    my $docid     = $self->{'d'};
291    my $metaname  = $self->{'metaname'};
292    my $infodbtype = $self->{'infodbtype'};
293    if (!defined $docid)
294    {
295        $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
296    }
297
298    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
299    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
300    my $metadata_xml_file;
301    my $import_filename = undef;
302   
303
304    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
305    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
306    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
307
308    # This now stores the full pathname
309    $import_filename = $doc_rec->{'src-file'}->[0];
310
311    # figure out correct metadata.xml file [?]
312    # Assuming the metadata.xml file is next to the source file
313    # Note: This will not work if it is using the inherited metadata from the parent folder
314    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
315    my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
316
317    $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
318
319    # Release the lock once it is done
320    $self->unlock_collection($username, $collect);
321}
322
323sub get_metadata_from_metadata_xml
324{
325    my $self = shift @_;
326    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
327   
328    my @rules =
329    (
330        _default => 'raw',
331        'Metadata' => \&gfmxml_metadata,
332        'FileName' => \&mxml_filename
333    );
334       
335    my $parser = XML::Rules->new
336    (
337        rules => \@rules,
338        output_encoding => 'utf8'
339    );
340   
341    my $xml_in = "";
342    if (!open(MIN,"<$metadata_xml_filename"))
343    {
344        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
345    }
346    else
347    {
348        # Read them in
349        my $line;
350        while (defined ($line=<MIN>)) {
351            $xml_in .= $line;
352        }
353        close(MIN);
354
355        $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
356       
357        if(defined $parser->{'pad'}->{'metavalue'})
358        {
359            return $parser->{'pad'}->{'metavalue'};
360        }
361        else
362        {
363            return "";
364        }
365    }
366}
367
368sub gfmxml_metadata
369{
370    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
371
372    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
373    {
374        $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
375    }
376}
377
378sub get_archives_metadata
379{
380    my $self = shift @_;
381
382    my $username  = $self->{'username'};
383    my $collect   = $self->{'collect'};
384    my $gsdl_cgi  = $self->{'gsdl_cgi'};
385    my $gsdlhome  = $self->{'gsdlhome'};
386    my $infodbtype = $self->{'infodbtype'};
387
388    # Authenticate user if it is enabled
389    if ($baseaction::authentication_enabled) {
390        # Ensure the user is allowed to edit this collection
391        &authenticate_user($gsdl_cgi, $username, $collect);
392    }
393
394    # Obtain the collect dir
395    my $site = $self->{'site'};
396    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
397   
398    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
399
400    # Make sure the collection isn't locked by someone else
401    $self->lock_collection($username, $collect);
402   
403    # look up additional args
404    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
405    $docid_secnum = "" if (!defined $docid_secnum);
406   
407    my $metaname = $self->{'metaname'};
408    my $metapos = $self->{'metapos'};
409    $metapos = 0 if (!defined $metapos);
410   
411    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
412    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
413
414    # This now stores the full pathname
415    my $doc_filename = $doc_rec->{'doc-file'}->[0];
416
417    $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
418
419    # Release the lock once it is done
420    $self->unlock_collection($username, $collect);
421}
422
423sub get_metadata_from_archive_xml
424{
425    my $self = shift @_;
426    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
427   
428    my @start_rules = ('Section' => \&dxml_start_section);
429   
430    my @rules =
431    (
432        _default => 'raw',
433        'Metadata' => \&gfdxml_metadata
434    );
435       
436    my $parser = XML::Rules->new
437    (
438        start_rules => \@start_rules,
439        rules => \@rules,
440        output_encoding => 'utf8'
441    );
442   
443    my $xml_in = "";
444    if (!open(MIN,"<$doc_xml_filename"))
445    {
446        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
447    }
448    else
449    {
450        # Read them in
451        my $line;
452        while (defined ($line=<MIN>)) {
453            $xml_in .= $line;
454        }
455        close(MIN);
456
457        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
458       
459        if(defined $parser->{'pad'}->{'metavalue'})
460        {
461            return $parser->{'pad'}->{'metavalue'};
462        }
463        else
464        {
465            return "";
466        }
467    }
468}
469
470sub gfdxml_metadata
471{
472    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
473   
474    if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
475    {
476        return;
477    }
478
479    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
480    {
481        if (!defined $parser->{'parameters'}->{'poscount'})
482        {
483            $parser->{'parameters'}->{'poscount'} = 0;
484        }
485        else
486        {
487            $parser->{'parameters'}->{'poscount'}++;
488        }
489    }
490
491    if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
492    {   
493        $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
494    }
495}
496
[19293]497sub set_live_metadata
498{
499    my $self = shift @_;
500
501    my $username  = $self->{'username'};
502    my $collect   = $self->{'collect'};
503    my $gsdl_cgi  = $self->{'gsdl_cgi'};
504    my $gsdlhome  = $self->{'gsdlhome'};
[23400]505    my $infodbtype = $self->{'infodbtype'};
506 
[19293]507    if ($baseaction::authentication_enabled) {
508    # Ensure the user is allowed to edit this collection
509    &authenticate_user($gsdl_cgi, $username, $collect);
510    }
511
[21715]512    # Obtain the collect dir
[23766]513    my $site = $self->{'site'};
514    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
515    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]516
517    # Make sure the collection isn't locked by someone else
518    $self->lock_collection($username, $collect);
519
520    # look up additional args
521    my $docid     = $self->{'d'};
[21715]522    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
523      $gsdl_cgi->generate_error("No docid (d=...) specified.");
524    }
[19293]525    my $metavalue = $self->{'metavalue'};
[23400]526 
[19293]527
[21715]528    # Generate the dbkey   
529    my $metaname  = $self->{'metaname'};
[19293]530    my $dbkey = "$docid.$metaname";
531
[21715]532    # To people who know $collect_tail please add some comments
533    # Obtain path to the database
[19293]534    my $collect_tail = $collect;
535    $collect_tail =~ s/^.*[\/\\]//;
[21564]536    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]537    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19293]538
[21715]539    # Set the new value
[21569]540    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
[19293]541    my $status = system($cmd);
542    if ($status != 0) {
[21715]543        # Catch error if gdbmget failed
[19293]544    my $mess = "Failed to set metadata key: $dbkey\n";
[21715]545
[19293]546    $mess .= "PATH: $ENV{'PATH'}\n";
547    $mess .= "cmd = $cmd\n";
548    $mess .= "Exit status: $status\n";
549    $mess .= "System Error Message: $!\n";
550
[19499]551    $gsdl_cgi->generate_error($mess);
[19293]552    }
553    else {
[19499]554    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
[19293]555    }
[21715]556   
557    # Release the lock once it is done
558    $self->unlock_collection($username, $collect);
[19293]559}
560
[24071]561sub set_metadata_entry
562{
563    my $self = shift @_;
564    my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
565   
566    # To people who know $collect_tail please add some comments
567    # Obtain path to the database
568    my $collect_tail = $collect;
569    $collect_tail =~ s/^.*[\/\\]//;
570    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
571    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
572   
573#   print STDERR "**** infodb file path = $infodb_file_path\n";
574#   print STDERR "***** infodb type = $infodbtype\n";
575   
576    # Read the docid entry
577    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
578   
579    # Set the metadata value
580    if (defined $metapos) {
581        $doc_rec->{$metaname}->[$metapos] = $metavalue;
582    }
583    else {
584        $doc_rec->{$metaname} = [ $metavalue ];
585    }
586 
587    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
588   
589    return $status;
590   
591}
[19293]592
[19499]593sub set_metadata
594{
595    my $self = shift @_;
[19293]596
[19499]597    my $username  = $self->{'username'};
598    my $collect   = $self->{'collect'};
599    my $gsdl_cgi  = $self->{'gsdl_cgi'};
600    my $gsdlhome  = $self->{'gsdlhome'};
[19293]601
[19499]602    if ($baseaction::authentication_enabled) {
603    # Ensure the user is allowed to edit this collection
604    &authenticate_user($gsdl_cgi, $username, $collect);
605    }
606
[23766]607    my $site = $self->{'site'};
608    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
609   
610    $gsdl_cgi->checked_chdir($collect_dir);
611
[21715]612    # Obtain the collect dir
[23766]613    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]614
615    # Make sure the collection isn't locked by someone else
616    $self->lock_collection($username, $collect);
617
618    # look up additional args
619    my $docid     = $self->{'d'};
620    my $metaname  = $self->{'metaname'};
621    my $metapos   = $self->{'metapos'};
622    my $metavalue = $self->{'metavalue'};
[23761]623    my $infodbtype = $self->{'infodbtype'};
[23400]624   
[24071]625    my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
[23400]626   
[19499]627    if ($status != 0) {
[23761]628        # Catch error if set infodb entry failed
629    my $mess = "Failed to set metadata key: $docid\n";
[19499]630   
[23761]631    $mess .= "PATH: $ENV{'PATH'}\n";
632    $mess .= "Exit status: $status\n";
633    $mess .= "System Error Message: $!\n";
634   
635    $gsdl_cgi->generate_error($mess);
[19499]636    }
637    else {
[24071]638    my $mess = "set-metadata successful: Key[$docid]\n";
[23761]639    $mess .= "  $metaname";
640    $mess .= "->[$metapos]" if (defined $metapos);
641    $mess .= " = $metavalue";
642   
643    $gsdl_cgi->generate_ok_message($mess);
[19499]644    }
[21715]645   
646    # Release the lock once it is done
647    $self->unlock_collection($username, $collect);
[19499]648}
649
650
[24071]651sub set_metadata_array
652{
653    my $self = shift @_;
654
655    my $username  = $self->{'username'};
656    my $collect   = $self->{'collect'};
657    my $gsdl_cgi  = $self->{'gsdl_cgi'};
658    my $gsdlhome  = $self->{'gsdlhome'};
659
660    if ($baseaction::authentication_enabled) {
661    # Ensure the user is allowed to edit this collection
662    &authenticate_user($gsdl_cgi, $username, $collect);
663    }
664
665    my $site = $self->{'site'};
666    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
667   
668    $gsdl_cgi->checked_chdir($collect_dir);
669
670    # Obtain the collect dir
671    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
672
673    # Make sure the collection isn't locked by someone else
674    $self->lock_collection($username, $collect);
675
676    # look up additional args
677   
678    my $infodbtype = $self->{'infodbtype'};
679   
680    my $json_str      = $self->{'json'};
681    my $doc_array = decode_json $json_str;
682   
683   
684    my $global_status = 0;
685    my $global_mess = "";
686   
687    my @all_docids = ();
688   
689    foreach my $doc_array_rec ( @$doc_array ) {
690       
691        my $docid     = $doc_array_rec->{'docid'};
692        my $metaname  = $doc_array_rec->{'metaname'};
693        my $metapos   = $doc_array_rec->{'metapos'};
694        my $metavalue = $doc_array_rec->{'metavalue'};
695       
696        push(@all_docids,$docid);
697       
698        my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
699       
700        if ($status != 0) {
701            # Catch error if set infodb entry failed
702            $global_status = $status;
703            $global_mess .= "Failed to set metadata key: $docid\n";
704            $global_mess .= "Exit status: $status\n";
705            $global_mess .= "System Error Message: $!\n";
706            $global_mess .= "-" x 20;
707        }
708    }
709
710    if ($global_status != 0) {
711        $global_mess .= "PATH: $ENV{'PATH'}\n";
712        $gsdl_cgi->generate_error($global_mess);
713    }
714    else {
715        my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
716        $gsdl_cgi->generate_ok_message($mess);
717    }
718   
719    # Release the lock once it is done
720    $self->unlock_collection($username, $collect);
721}
722
723
[20538]724sub dxml_metadata
725{
726    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
727    my $metaname = $parser->{'parameters'}->{'metaname'};
728    my $metamode = $parser->{'parameters'}->{'metamode'};
[23761]729   
730    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
731   
732    # Find the right metadata tag and checks if we are going to
733    # override it
734    #
735    # Note: This over writes the first metadata block it
736    # encountered. If there are multiple Sections in the doc.xml, it
737    # might not behave as you would expect
[20538]738
[23761]739    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
740##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
741##    print STDERR "**** metamode = $metamode\n";
742   
[25097]743    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
744    {
745        my $name_attr = $attrHash->{'name'};
746        if (($name_attr eq $metaname) && ($metamode eq "override"))
747        {
748            if (!defined $parser->{'parameters'}->{'poscount'})
749            {
750                $parser->{'parameters'}->{'poscount'} = 0;
751            }
752            else
753            {
754                $parser->{'parameters'}->{'poscount'}++;
755            }
756           
757            if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
758            {
759                ##print STDERR "**** got match!!\n";
760                # Get the value and override the current value
761                my $metavalue = $parser->{'parameters'}->{'metavalue'};
762                $attrHash->{'_content'} = $metavalue;
763               
764                # Don't want it to wipe out any other pieces of metadata
765                $parser->{'parameters'}->{'metamode'} = "done";
766            }
767            else if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} == $attrHash->{'_content'})
768            {
769                $attrHash->{'_content'} = $metavalue;
770                $parser->{'parameters'}->{'metamode'} = "done";
771            }
772        }
[20538]773    }
774
[21716]775    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
776    return [$tagname => $attrHash];
[20538]777}
778
779
780sub dxml_description
781{
782    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
783    my $metamode = $parser->{'parameters'}->{'metamode'};
784
[21715]785    # Accumulate the metadata
786    # NOTE: This appends new metadata element to all description fields.
787    # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
[23761]788    if (($metamode eq "accumulate") || ($metamode eq "override")) {
789    # If get to here and metamode is override, the this means there
790    # was no existing value to overide => treat as an append operation
791
792    # Tack a new metadata tag on to the end of the <Metadata>+ block
[20538]793    my $metaname = $parser->{'parameters'}->{'metaname'};
794    my $metavalue = $parser->{'parameters'}->{'metavalue'};
795   
796    my $metadata_attr = { '_content' => $metavalue,
797                  'name'     => $metaname,
798                  'mode'     => "accumulate" };
799
800    my $append_metadata = [ "Metadata" => $metadata_attr ];
801    my $description_content = $attrHash->{'_content'};
802
[23761]803##  print STDERR "**** appending to doc.xml\n";
804
[20538]805    push(@$description_content,"    ", $append_metadata ,"\n        ");
[23761]806    $parser->{'parameters'}->{'metamode'} = "done";
[20538]807    }
808
[21716]809
810    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
811    return [$tagname => $attrHash];
[20538]812}
813
[21715]814
[23761]815sub dxml_start_section
816{
817    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
818
819    my $new_depth = scalar(@$contextArray);
820
821    if ($new_depth == 1) {
822    $parser->{'parameters'}->{'curr_section_depth'} = 1;
823    $parser->{'parameters'}->{'curr_section_num'}   = "";
824    }
825
826    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
827    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
828
829    my $new_secnum;
830
831    if ($new_depth > $old_depth) {
832    # child subsection
833    $new_secnum = "$old_secnum.1";
834    }
835    elsif ($new_depth == $old_depth) {
836    # sibling section => increase it's value by 1
837    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
838    $tail_num++;
839    $new_secnum = $old_secnum;
840    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
841    }
842    else {
843    # back up to parent section => lopp off tail
844    $new_secnum = $old_secnum;
845    $new_secnum =~ s/\.\d+$//;
846    }
847
848    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
849    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
850
[25097]851    ##print STDERR "*** In Section: $new_secnum\n";
[23761]852}
853
[20538]854sub edit_xml_file
855{
856    my $self = shift @_;
[23761]857    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
[20538]858
859    # use XML::Rules to add it in (read in and out again)
[23761]860    my $parser = XML::Rules->new(start_rules     => $start_rules,
861                 rules           => $rules,
862                 style           => 'filter',
863                                 output_encoding => 'utf8' );
[20538]864
865    my $xml_in = "";
866    if (!open(MIN,"<$filename")) {
867    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
868    }
869    else {
[21715]870        # Read all the text in
[20538]871    my $line;
872    while (defined ($line=<MIN>)) {
873        $xml_in .= $line;
874    }
875    close(MIN);
876   
[23761]877    my $MOUT;   
878    if (!open($MOUT,">$filename")) {
[20538]879        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
880    }
881    else {
[23761]882        # Matched lines will get handled by the call backs
883##      my $xml_out = "";
884
885        binmode($MOUT,":utf8");
886        $parser->filter($xml_in,$MOUT, $options);
887
888#       binmode(MOUT,":utf8");
889#       print MOUT $xml_out;
890        close($MOUT);       
[20538]891    }
892    }
893}
894
895sub edit_doc_xml
896{
897    my $self = shift @_;
[25097]898    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
[20538]899
[23761]900    # To monitor which section/subsection number we are in
901    my @start_rules =
902    ( 'Section'    => \&dxml_start_section );
903
[20538]904    # use XML::Rules to add it in (read in and out again)
[21715]905    # Set the call back functions
[20538]906    my @rules =
[21716]907    ( _default => 'raw',
[23761]908      'Metadata'    => \&dxml_metadata,
909      'Description' => \&dxml_description);
[20538]910     
[21715]911    # Sets the parameters
[20538]912    my $options = { 'metaname'  => $metaname,
913            'metapos'   => $metapos,
[23400]914            'metavalue' => $metavalue,
[25097]915            'metamode'  => $metamode,
916            'prevmetavalue' => $prevmetavalue };
[23400]917           
[23761]918    if (defined $opt_secnum) {
919    $options->{'secnum'} = $opt_secnum;
920    }
921
922    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
[20538]923}
924
[24071]925sub set_archives_metadata_entry
926{
927    my $self = shift @_;
[25097]928    my ($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
[24071]929   
930    # Obtain the doc.xml path for the specified docID
931    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
[20538]932
[24071]933    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
934    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
935    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
936   
937    # The $doc_xml_file is relative to the archives, and now let's get the full path
938    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
939    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
940   
941    # Edit the doc.xml file with the specified metadata name, value and position.
942    # TODO: there is a potential problem here as this edit_doc_xml function
943    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
944    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
945   
946    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
[25097]947            $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
[24071]948   
949    return 0; # return 0 for now to indicate no error
950           
951}
952
953
[20538]954sub set_archives_metadata
955{
956    my $self = shift @_;
957
958    my $username  = $self->{'username'};
959    my $collect   = $self->{'collect'};
960    my $gsdl_cgi  = $self->{'gsdl_cgi'};
961    my $gsdlhome  = $self->{'gsdlhome'};
[23400]962    my $infodbtype = $self->{'infodbtype'};
963   
[20538]964    if ($baseaction::authentication_enabled) {
[24071]965        # Ensure the user is allowed to edit this collection
966        $self->authenticate_user($username, $collect);
[20538]967    }
968
[24071]969    my $site = $self->{'site'};
970       
[23766]971    # Obtain the collect and archive dir   
972    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
973   
[20538]974    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
975
976    # Make sure the collection isn't locked by someone else
977    $self->lock_collection($username, $collect);
978
979    # look up additional args
980    my $docid  = $self->{'d'};
981    my $metaname   = $self->{'metaname'};
982    my $metavalue  = $self->{'metavalue'};
[25097]983    my $prevmetavalue = $self->{'prevmetavalue'}
[23400]984   
[20538]985    my $metapos    = $self->{'metapos'};
986    $metapos = 0 if (!defined $metapos);
[23400]987
[23761]988    my $metamode   = $self->{'metamode'};
[23400]989    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
990    # make "accumulate" the default (less destructive, as won't actually
991    # delete any existing values)
992    $metamode = "accumulate";
[24071]993    }
994   
995    my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
[25097]996                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
[24071]997   
998    # Release the lock once it is done
999    $self->unlock_collection($username, $collect);
1000
1001    if ($status == 0) {
1002        my $mess = "set-archives-metadata successful: Key[$docid]\n";
1003        $mess .= "  $metaname";
1004        $mess .= "->[$metapos]" if (defined $metapos);
1005        $mess .= " = $metavalue";
1006        $mess .= " ($metamode)\n";
1007   
1008        $gsdl_cgi->generate_ok_message($mess); 
1009    }
1010    else {
1011        my $mess .= "Failed to set archives metadata key: $docid\n";
1012        $mess .= "Exit status: $status\n";
1013        $mess .= "System Error Message: $!\n";
1014        $mess .= "-" x 20 . "\n";
1015       
1016        $gsdl_cgi->generate_error($mess);
1017    }
1018}
1019
1020
1021sub set_archives_metadata_array
1022{
1023    my $self = shift @_;
1024
1025    my $username  = $self->{'username'};
1026    my $collect   = $self->{'collect'};
1027    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1028    my $gsdlhome  = $self->{'gsdlhome'};
1029
1030    if ($baseaction::authentication_enabled) {
1031    # Ensure the user is allowed to edit this collection
1032    &authenticate_user($gsdl_cgi, $username, $collect);
[23400]1033    }
[23761]1034
[24071]1035    my $site = $self->{'site'};
1036    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1037   
1038    $gsdl_cgi->checked_chdir($collect_dir);
1039
1040    # Obtain the collect dir
1041    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1042
1043    # Make sure the collection isn't locked by someone else
1044    $self->lock_collection($username, $collect);
1045
1046    # look up additional args
1047   
1048    my $infodbtype = $self->{'infodbtype'};
1049
1050   my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1051   
1052    my $json_str      = $self->{'json'};
1053    my $doc_array = decode_json $json_str;
1054   
1055   
1056    my $global_status = 0;
1057    my $global_mess = "";
1058   
1059    my @all_docids = ();
1060   
1061    foreach my $doc_array_rec ( @$doc_array ) {
1062       
1063        my $docid     = $doc_array_rec->{'docid'};
1064        my $metaname  = $doc_array_rec->{'metaname'};
1065        my $metapos   = $doc_array_rec->{'metapos'};
1066        my $metamode   = $self->{'metamode'};
1067        my $metavalue = $doc_array_rec->{'metavalue'};
1068       
1069        # Some sanity checks
1070        $metapos = 0 if (!defined $metapos);
1071           
1072        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1073            # make "accumulate" the default (less destructive, as won't actually
1074            # delete any existing values)
1075            $metamode = "accumulate";
1076        }
1077   
1078        push(@all_docids,$docid);
1079       
1080        my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1081                $metaname,$metapos,$metavalue,$metamode);
1082       
1083        if ($status != 0) {
1084            # Catch error if set infodb entry failed
1085            $global_status = $status;
1086            $global_mess .= "Failed to set metadata key: $docid\n";
1087            $global_mess .= "Exit status: $status\n";
1088            $global_mess .= "System Error Message: $!\n";
1089            $global_mess .= "-" x 20 . "\n";
1090        }
1091    }
1092
1093    if ($global_status != 0) {
1094        $global_mess .= "PATH: $ENV{'PATH'}\n";
1095        $gsdl_cgi->generate_error($global_mess);
1096    }
1097    else {
1098        my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1099        $gsdl_cgi->generate_ok_message($mess);
1100    }
[20538]1101   
[23761]1102    # Release the lock once it is done
1103    $self->unlock_collection($username, $collect);
[20538]1104}
1105
[24943]1106sub remove_archives_metadata
1107{
1108    my $self = shift @_;
[20538]1109
[24943]1110    my $username  = $self->{'username'};
1111    my $collect   = $self->{'collect'};
1112    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1113    my $gsdlhome  = $self->{'gsdlhome'};
1114    my $infodbtype = $self->{'infodbtype'};
1115   
1116    if ($baseaction::authentication_enabled)
1117    {
1118        # Ensure the user is allowed to edit this collection
1119        &authenticate_user($gsdl_cgi, $username, $collect);
1120    }
1121   
1122    my $site = $self->{'site'};
1123       
1124    # Obtain the collect and archive dir   
1125    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1126   
1127    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1128
1129    # Make sure the collection isn't locked by someone else
1130    $self->lock_collection($username, $collect);
1131   
1132    # look up additional args
1133    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1134   
1135    my $metaname = $self->{'metaname'};
1136    my $metapos = $self->{'metapos'};
1137    $metapos = 0 if (!defined $metapos);
1138   
1139    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1140    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1141
1142    # This now stores the full pathname
[25097]1143    my $doc_filename = $doc_rec->{'doc-file'}->[0];
[24943]1144
[25097]1145    my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
[24943]1146   
1147    # Release the lock once it is done
1148    $self->unlock_collection($username, $collect);
1149
1150    if ($status == 0)
1151    {
1152        my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1153        $mess .= "  $metaname";
1154        $mess .= "->[$metapos]" if (defined $metapos);
1155
1156        $gsdl_cgi->generate_ok_message($mess); 
1157    }
1158    else
1159    {
1160        my $mess .= "Failed to remove archives metadata key: $docid\n";
1161        $mess .= "Exit status: $status\n";
1162        $mess .= "System Error Message: $!\n";
1163        $mess .= "-" x 20 . "\n";
1164       
1165        $gsdl_cgi->generate_error($mess);
1166    }
1167}
1168
1169sub remove_from_doc_xml
1170{
1171    my $self = shift @_;
[24949]1172    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
[24943]1173   
1174    my @start_rules = ('Section' => \&dxml_start_section);
1175   
1176    # Set the call-back functions for the metadata tags
1177    my @rules =
1178    (
1179        _default => 'raw',
1180        'Metadata' => \&rfdxml_metadata
1181    );
1182       
1183    my $parser = XML::Rules->new
1184    (
1185        start_rules => \@start_rules,
1186        rules => \@rules,
1187        style => 'filter',
1188        output_encoding => 'utf8'
1189    );
1190   
1191    my $status = 0;
1192    my $xml_in = "";
1193    if (!open(MIN,"<$doc_xml_filename"))
1194    {
1195        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1196        $status = 1;
1197    }
1198    else
1199    {
1200        # Read them in
1201        my $line;
1202        while (defined ($line=<MIN>)) {
1203            $xml_in .= $line;
1204        }
1205        close(MIN);
1206
1207        # Filter with the call-back functions
1208        my $xml_out = "";
1209
1210        my $MOUT;
1211        if (!open($MOUT,">$doc_xml_filename")) {
1212            $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1213            $status = 1;
1214        }
1215        else {
1216            binmode($MOUT,":utf8");
[24949]1217            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
[24943]1218            close($MOUT);       
1219        }
1220    }
1221    return $status;
1222}
1223
1224sub rfdxml_metadata
1225{
1226    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1227
1228    if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1229    {
1230        # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1231        return [$tagname => $attrHash];
1232    }
1233
1234    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1235    {
1236        if (!defined $parser->{'parameters'}->{'poscount'})
1237        {
1238            $parser->{'parameters'}->{'poscount'} = 0;
1239        }
1240        else
1241        {
1242            $parser->{'parameters'}->{'poscount'}++;
1243        }
1244    }
1245   
1246    if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1247    {   
1248        return [];
1249    }
1250   
[24949]1251    if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1252    {   
1253        return [];
1254    }
1255   
[24943]1256    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1257    return [$tagname => $attrHash];
1258}
1259
[19293]1260sub mxml_metadata
1261{
1262    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1263    my $metaname = $parser->{'parameters'}->{'metaname'};
1264    my $metamode = $parser->{'parameters'}->{'metamode'};
1265
[21716]1266    # Report error if we don't see FileName tag before this
[25097]1267    die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
[21716]1268   
1269    # Don't do anything if we are not in the right FileSet
1270    my $file_regexp = $parser->{'parameters'}->{'current_file'};
[23761]1271    if ($file_regexp =~ /\.\*/) {
1272    # Only interested in a file_regexp if it specifies precisely one
1273    # file. 
1274    # So, skip anything with a .* in it as it is too general
1275    return [$tagname => $attrHash];
1276    }
1277    my $src_file = $parser->{'parameters'}->{'src_file'};
1278    if (!($src_file =~ /$file_regexp/)) {
1279    return [$tagname => $attrHash];
1280    }
1281##    print STDERR "*** mxl metamode = $metamode\n";
1282
[21715]1283    # Find the right metadata tag and checks if we are going to override it
[19293]1284    my $name_attr = $attrHash->{'name'};
1285    if (($name_attr eq $metaname) && ($metamode eq "override")) {
[21715]1286        # Get the value and override the current value
[19293]1287    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1288    $attrHash->{'_content'} = $metavalue;
1289
[23761]1290##  print STDERR "**** overrideing metadata.xml\n";
1291
[19293]1292    # Don't want it to wipe out any other pieces of metadata
1293    $parser->{'parameters'}->{'metamode'} = "done";
1294    }
1295
[21716]1296    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1297    return [$tagname => $attrHash];
[19293]1298}
1299
1300
1301sub mxml_description
1302{
1303    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
[21716]1304    my $metamode = $parser->{'parameters'}->{'metamode'};   
[19293]1305
[21716]1306    # Failed... Report error if we don't see FileName tag before this
[25097]1307    die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
[21716]1308
1309    # Don't do anything if we are not in the right FileSet
1310    my $file_regexp = $parser->{'parameters'}->{'current_file'};
[24943]1311    if ($file_regexp =~ m/\.\*/) {
[23761]1312    # Only interested in a file_regexp if it specifies precisely one
1313    # file. 
1314    # So, skip anything with a .* in it as it is too general
1315    return [$tagname => $attrHash];
1316    }
1317    my $src_file = $parser->{'parameters'}->{'src_file'};
[24943]1318   
1319    if (!($src_file =~ m/$file_regexp/)) {
[23761]1320    return [$tagname => $attrHash];
1321    }
[21716]1322
[21715]1323    # Accumulate the metadata block to the end of the description block
1324    # Note: This adds metadata block to all description blocks, so if there are
1325    # multiple FileSets, it will add to all of them
[23761]1326    if (($metamode eq "accumulate") || ($metamode eq "override")) {
1327    # if metamode was "override" but get to here then it failed to
1328    # find an item to override, in which case it should append its
1329    # value to the end, just like the "accumulate" mode
1330
[19293]1331    # tack a new metadata tag on to the end of the <Metadata>+ block
1332    my $metaname = $parser->{'parameters'}->{'metaname'};
1333    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1334   
1335    my $metadata_attr = { '_content' => $metavalue,
1336                  'name'     => $metaname,
1337                  'mode'     => "accumulate" };
1338
1339    my $append_metadata = [ "Metadata" => $metadata_attr ];
1340    my $description_content = $attrHash->{'_content'};
[24943]1341   
[23761]1342##  print STDERR "*** appending to metadata.xml\n";
1343
1344    # append the new metadata element to the end of the current
1345    # content contained inside this tag
[24943]1346    if (ref($description_content) eq "") {
1347        # => string or numeric literal
1348        # this is caused by a <Description> block has no <Metadata> child elements
1349        # => set up an empty array in '_content'
1350        $attrHash->{'_content'} = [ "\n" ];
1351        $description_content = $attrHash->{'_content'};
1352    }
1353
[19293]1354    push(@$description_content,"    ", $append_metadata ,"\n        ");
[23761]1355    $parser->{'parameters'}->{'metamode'} = "done";
[19293]1356    }
1357
[21716]1358    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1359    return [$tagname => $attrHash];
[19293]1360}
1361
[21715]1362
[21716]1363sub mxml_filename
1364{
1365    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1366
1367    # Store the filename of the Current Fileset
1368    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1369    # FileName tag must come before Description tag
1370    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1371
1372    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1373    return [$tagname => $attrHash];
1374}
1375
1376
1377sub mxml_fileset
1378{
1379    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1380
1381    # Initilise the current_file
1382    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1383    # FileName tag must come before Description tag
1384    $parser->{'parameters'}->{'current_file'} = "";
1385
1386    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1387    return [$tagname => $attrHash];
1388}
1389
1390
[19293]1391sub edit_metadata_xml
1392{
1393    my $self = shift @_;
[21716]1394    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
[19293]1395
[21715]1396    # Set the call-back functions for the metadata tags
[19293]1397    my @rules =
[21716]1398    ( _default => 'raw',
1399          'FileName' => \&mxml_filename,
[19293]1400      'Metadata' => \&mxml_metadata,
[21716]1401      'Description' => \&mxml_description,
1402          'FileSet' => \&mxml_fileset);
[19293]1403
[21715]1404    # use XML::Rules to add it in (read in and out again)
[19293]1405    my $parser = XML::Rules->new(rules => \@rules,
[21716]1406                 style => 'filter',
1407                                 output_encoding => 'utf8');
[19293]1408
[24943]1409    if (!-e $metadata_xml_filename) {
1410   
1411        if (open(MOUT,">$metadata_xml_filename")) {
1412           
1413            my $src_file_re = &util::filename_to_regex($src_file);
1414            # shouldn't the following also be in the above utility routine??
1415            # $src_file_re =~ s/\./\\./g;
1416       
1417            print MOUT "<?xml version=\"1.0\"?>\n";
1418            print MOUT "<DirectoryMetadata>\n";
1419            print MOUT " <FileSet>\n";
1420            print MOUT "  <FileName>$src_file_re</FileName>\n";
1421            print MOUT "  <Description>\n";
1422            print MOUT "  </Description>\n";
1423            print MOUT " </FileSet>\n";
1424            print MOUT "</DirectoryMetadata>\n";
[23761]1425
[24943]1426            close(MOUT);
1427        }
1428        else {
1429            $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1430        }
[19293]1431    }
[24943]1432   
1433   
1434    my $xml_in = "";
1435    if (!open(MIN,"<$metadata_xml_filename")) {
1436        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1437    }
[19293]1438    else {
[24943]1439        # Read them in
1440        my $line;
1441        while (defined ($line=<MIN>)) {
1442            $xml_in .= $line;
1443        }
1444        close(MIN);
[23761]1445
[24943]1446        # Filter with the call-back functions
1447        my $xml_out = "";
[23761]1448
[24943]1449        my $MOUT;
1450        if (!open($MOUT,">$metadata_xml_filename")) {
1451            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1452        }
1453        else {
1454            binmode($MOUT,":utf8");
1455
1456            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1457            # At the moment, I will just hack it!
1458            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1459            #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1460            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1461            #print MOUT $xml_out;
1462
1463            $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1464                      metavalue => $metavalue,
1465                      metamode => $metamode,
1466                      src_file => $src_file,
1467                      current_file => undef} );
1468            close($MOUT);       
1469        }
1470   }
[20538]1471}
[19293]1472
1473
1474sub set_import_metadata
1475{
1476    my $self = shift @_;
[21715]1477   
[19293]1478    my $username  = $self->{'username'};
1479    my $collect   = $self->{'collect'};
1480    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1481    my $gsdlhome  = $self->{'gsdlhome'};
[23400]1482    my $infodbtype = $self->{'infodbtype'};
1483   
[19293]1484    if ($baseaction::authentication_enabled) {
[25097]1485        # Ensure the user is allowed to edit this collection
1486        $self->authenticate_user($username, $collect);
[19293]1487    }
1488
[23761]1489
[21715]1490    # Obtain the collect and archive dir   
[23766]1491    my $site = $self->{'site'};
1492    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1493
1494    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]1495    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1496
1497    # Make sure the collection isn't locked by someone else
1498    $self->lock_collection($username, $collect);
[21716]1499   
[19293]1500    # look up additional args
1501    # want either d= or f=
1502    my $docid  = $self->{'d'};
1503    my $import_file  = $self->{'f'};
1504    if ((!defined $docid) && (!defined $import_file)) {
1505    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1506    }
1507
[21715]1508    # Get the parameters and set default mode to "accumulate"
[19293]1509    my $metaname   = $self->{'metaname'};
1510    my $metavalue  = $self->{'metavalue'};
[23761]1511##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1512    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1513    print STDERR "*** set import meta: val = $metavalue\n";
1514   
[19293]1515    my $metamode   = $self->{'metamode'};
1516    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1517    # make "accumulate" the default (less destructive, as won't actually
1518    # delete any existing values)
1519    $metamode = "accumulate";
1520    }
1521
[21715]1522    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1523    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
[19293]1524    my $metadata_xml_file;
[20935]1525    my $import_filename = undef;
[19293]1526    if (defined $docid) {
[23400]1527    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1528    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[19293]1529
[20935]1530    # This now stores the full pathname
[23761]1531    $import_filename = $doc_rec->{'src-file'}->[0];
[19293]1532    }
[20935]1533    else {
[21715]1534        $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
[20935]1535    }
[23761]1536
[21715]1537    # figure out correct metadata.xml file [?]
1538    # Assuming the metadata.xml file is next to the source file
1539    # Note: This will not work if it is using the inherited metadata from the parent folder
[24943]1540    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
[19293]1541    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1542
[21715]1543    # Edit the metadata.xml
[21716]1544    # Modified by Jeffrey from DL Consulting
1545    # Handle the case where there is one metadata.xml file for multiple FileSets
1546    # The XML filter needs to know whether it is in the right FileSet
1547    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1548    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1549    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1550                             $metaname, $metavalue, $metamode, $import_tailname);
[19293]1551
[21715]1552    # Release the lock once it is done
1553    $self->unlock_collection($username, $collect);
[23761]1554
1555    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1556    $mess .= "  $metaname";
1557    $mess .= " = $metavalue";
1558    $mess .= " ($metamode)\n";
1559   
1560    $gsdl_cgi->generate_ok_message($mess);
1561   
[19293]1562}
1563
[25097]1564sub set_import_metadata_array
1565{
1566    my $self = shift @_;
1567
1568    my $username  = $self->{'username'};
1569    my $collect   = $self->{'collect'};
1570    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1571    my $gsdlhome  = $self->{'gsdlhome'};
1572
1573    if ($baseaction::authentication_enabled) {
1574        # Ensure the user is allowed to edit this collection
1575        &authenticate_user($gsdl_cgi, $username, $collect);
1576    }
1577
1578    my $site = $self->{'site'};
1579    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1580   
1581    $gsdl_cgi->checked_chdir($collect_dir);
1582
1583    # Make sure the collection isn't locked by someone else
1584    $self->lock_collection($username, $collect);
1585
1586    # look up additional args
1587   
1588    my $infodbtype = $self->{'infodbtype'};
1589
1590    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1591   
1592    my $json_str = $self->{'json'};
1593    my $doc_array = decode_json $json_str;
1594   
1595    my $global_status = 0;
1596    my $global_mess = "";
1597   
1598    my @all_docids = ();
1599   
1600    foreach my $doc_array_rec ( @$doc_array )
1601    {   
1602        my $docid = $doc_array_rec->{'docid'};
1603        my $metaname = $doc_array_rec->{'metaname'};
1604        my $metamode = $self->{'metamode'};
1605        my $metavalue = $doc_array_rec->{'metavalue'};
1606
1607        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1608            # make "accumulate" the default (less destructive, as won't actually
1609            # delete any existing values)
1610            $metamode = "accumulate";
1611        }
1612
1613        push(@all_docids,$docid);
1614
1615        # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1616        # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1617        my $metadata_xml_file;
1618        my $import_filename = undef;
1619
1620        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1621        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1622
1623        # This now stores the full pathname
1624        $import_filename = $doc_rec->{'src-file'}->[0];
1625
1626        # figure out correct metadata.xml file [?]
1627        # Assuming the metadata.xml file is next to the source file
1628        # Note: This will not work if it is using the inherited metadata from the parent folder
1629        my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1630        my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1631
1632        $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1633    }
1634
1635    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1636    $gsdl_cgi->generate_ok_message($mess);
1637   
1638    # Release the lock once it is done
1639    $self->unlock_collection($username, $collect);
1640}
1641
[24943]1642sub remove_import_metadata
1643{
1644    my $self = shift @_;
1645   
1646    my $username = $self->{'username'};
1647    my $collect   = $self->{'collect'};
1648    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1649   
1650    if ($baseaction::authentication_enabled) {
1651        # Ensure the user is allowed to edit this collection
1652        &authenticate_user($gsdl_cgi, $username, $collect);
1653    }
[19293]1654
[24943]1655    my $gsdlhome  = $self->{'gsdlhome'};
1656    my $infodbtype = $self->{'infodbtype'};
1657   
1658    # Obtain the collect dir
1659    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1660    my $site = $self->{'site'};
1661    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1662   
1663    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1664    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1665   
1666    # Make sure the collection isn't locked by someone else
1667    $self->lock_collection($username, $collect);
1668   
1669    # look up additional args
1670    my $docid = $self->{'d'};
1671    if ((!defined $docid) || ($docid =~ m/^\s*$/))
1672    {
[25097]1673        $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
[24943]1674    }
1675   
1676    my $metaname = $self->{'metaname'};
1677    my $metavalue = $self->{'metavalue'};
1678    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1679   
1680    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1681    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1682    my $metadata_xml_file;
1683    my $import_filename = undef;
1684    if (defined $docid)
1685    {
1686        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1687        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1688
1689        # This now stores the full pathname
1690        $import_filename = $doc_rec->{'src-file'}->[0];
1691    }
1692
1693    if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1694    {
[25097]1695        $gsdl_cgi->generate_error_message("There is no metadata\n");
[24943]1696    }
1697   
1698    # figure out correct metadata.xml file [?]
1699    # Assuming the metadata.xml file is next to the source file
1700    # Note: This will not work if it is using the inherited metadata from the parent folder
1701    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1702    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1703   
1704    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1705   
1706    # Release the lock once it is done
1707    $self->unlock_collection($username, $collect);
1708
1709    my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1710    $mess .= "  $metaname";
1711    $mess .= " = $metavalue\n";
1712   
1713    $gsdl_cgi->generate_ok_message($mess);
1714}
1715
1716sub remove_from_metadata_xml
1717{
1718    my $self = shift @_;
1719    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
1720   
1721    # Set the call-back functions for the metadata tags
1722    my @rules =
1723    (
1724        _default => 'raw',
1725        'Metadata' => \&rfmxml_metadata,
1726        'FileName' => \&mxml_filename
1727    );
1728       
1729    my $parser = XML::Rules->new
1730    (
1731        rules => \@rules,
1732        style => 'filter',
1733        output_encoding => 'utf8'
1734    );
1735   
1736    my $xml_in = "";
1737    if (!open(MIN,"<$metadata_xml_filename"))
1738    {
1739        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1740    }
1741    else
1742    {
1743        # Read them in
1744        my $line;
1745        while (defined ($line=<MIN>)) {
1746            $xml_in .= $line;
1747        }
1748        close(MIN);
1749
1750        # Filter with the call-back functions
1751        my $xml_out = "";
1752
1753        my $MOUT;
1754        if (!open($MOUT,">$metadata_xml_filename")) {
1755            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1756        }
1757        else {
1758            binmode($MOUT,":utf8");
1759            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
1760            close($MOUT);       
1761        }
1762    }
1763}
1764
1765sub rfmxml_metadata
1766{
1767    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1768
1769    if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
1770    {
1771        return [];
1772    }
1773
1774    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1775    return [$tagname => $attrHash];
1776}
1777
[19499]1778sub remove_live_metadata
1779{
1780    my $self = shift @_;
1781
1782    my $username  = $self->{'username'};
1783    my $collect   = $self->{'collect'};
1784    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1785    my $gsdlhome  = $self->{'gsdlhome'};
[23400]1786    my $infodbtype = $self->{'infodbtype'};
1787   
[19499]1788    if ($baseaction::authentication_enabled) {
1789    # Ensure the user is allowed to edit this collection
1790    &authenticate_user($gsdl_cgi, $username, $collect);
1791    }
1792
[21715]1793    # Obtain the collect dir
[23766]1794    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1795    my $site = $self->{'site'};
1796    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[19499]1797
1798    # Make sure the collection isn't locked by someone else
1799    $self->lock_collection($username, $collect);
1800
1801    # look up additional args
1802    my $docid     = $self->{'d'};
[21715]1803    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1804      $gsdl_cgi->generate_error("No docid (d=...) specified.");
1805    }
1806   
1807    # Generate the dbkey
[19499]1808    my $metaname  = $self->{'metaname'};
1809    my $dbkey = "$docid.$metaname";
1810
[21715]1811    # To people who know $collect_tail please add some comments
1812    # Obtain the live gdbm_db path
[19499]1813    my $collect_tail = $collect;
1814    $collect_tail =~ s/^.*[\/\\]//;
[21564]1815    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]1816    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19499]1817
[21715]1818    # Remove the key
[21569]1819    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
[19499]1820    my $status = system($cmd);
1821    if ($status != 0) {
[21715]1822        # Catch error if gdbmdel failed
[19499]1823    my $mess = "Failed to set metadata key: $dbkey\n";
1824   
1825    $mess .= "PATH: $ENV{'PATH'}\n";
1826    $mess .= "cmd = $cmd\n";
1827    $mess .= "Exit status: $status\n";
1828    $mess .= "System Error Message: $!\n";
1829
1830    $gsdl_cgi->generate_error($mess);
1831    }
1832    else {
1833    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1834    }
1835
1836}
1837
1838
1839sub remove_metadata
1840{
1841    my $self = shift @_;
1842
1843    my $username  = $self->{'username'};
1844    my $collect   = $self->{'collect'};
1845    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1846    my $gsdlhome  = $self->{'gsdlhome'};
[23400]1847    my $infodbtype = $self->{'infodbtype'};
1848   
[19499]1849    if ($baseaction::authentication_enabled) {
1850    # Ensure the user is allowed to edit this collection
1851    &authenticate_user($gsdl_cgi, $username, $collect);
1852    }
1853
[21715]1854    # Obtain the collect dir
[23766]1855    my $site = $self->{'site'};
1856    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1857    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]1858
1859    # Make sure the collection isn't locked by someone else
1860    $self->lock_collection($username, $collect);
1861
1862    # look up additional args
1863    my $docid     = $self->{'d'};
[21715]1864    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1865      $gsdl_cgi->generate_error("No docid (d=...) specified.");
1866    }
[19499]1867    my $metaname  = $self->{'metaname'};
1868    my $metapos   = $self->{'metapos'};
[24949]1869    my $metavalue = $self->{'metavalue'};
[19499]1870
[21715]1871    # To people who know $collect_tail please add some comments
1872    # Obtain the path to the database
[19499]1873    my $collect_tail = $collect;
1874    $collect_tail =~ s/^.*[\/\\]//;
[21564]1875    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]1876    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]1877
1878    # Read the docid entry
[23400]1879    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
[21715]1880
1881    # Basically loop through and unescape_html the values
[19499]1882    foreach my $k (keys %$doc_rec) {
1883    my @escaped_v = ();
1884    foreach my $v (@{$doc_rec->{$k}}) {
1885        if ($k eq "contains") {
1886        # protect quotes in ".2;".3 etc
1887        $v =~ s/\"/\\\"/g;
1888        push(@escaped_v, $v);
1889        }
1890        else {
1891        my $ev = &ghtml::unescape_html($v);
1892        $ev =~ s/\"/\\\"/g;
1893        push(@escaped_v, $ev);
1894        }
1895    }
1896    $doc_rec->{$k} = \@escaped_v;
1897    }
1898
[21715]1899    # Check to make sure the key does exist
1900    if (!defined ($doc_rec->{$metaname})) {
1901        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1902    }
1903
1904    # Obtain the specified metadata pos
[19499]1905    $metapos = 0 if (!defined $metapos);
1906
1907    # consider check key is defined before deleting?
[21715]1908    # Loop through the metadata array and ignore the specified position
[19499]1909    my $filtered_metadata = [];
[21715]1910    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
[19499]1911    for (my $i=0; $i<$num_metadata_vals; $i++) {
[24949]1912    my $metaval = shift(@{$doc_rec->{$metaname}});
[19499]1913
[24949]1914    if (!defined $metavalue && $i != $metapos) {
1915        push(@$filtered_metadata,$metaval)
[19499]1916    }
[24949]1917   
1918    if(defined $metavalue && !($metavalue eq $metaval))
1919    {
1920        push(@$filtered_metadata,$metavalue)
1921    }
[19499]1922    }
1923    $doc_rec->{$metaname} = $filtered_metadata;
1924
[21715]1925    # Turn the record back to string
[21551]1926    my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
[19499]1927
[21715]1928    # Store it back to the database
[21569]1929    my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
[19499]1930    my $status = system($cmd);
1931    if ($status != 0) {
1932    my $mess = "Failed to set metadata key: $docid\n";
1933   
1934    $mess .= "PATH: $ENV{'PATH'}\n";
1935    $mess .= "cmd = $cmd\n";
1936    $mess .= "Exit status: $status\n";
1937    $mess .= "System Error Message: $!\n";
1938
1939    $gsdl_cgi->generate_error($mess);
1940    }
1941    else {
1942    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1943    $mess .= "  $metaname";
1944    $mess .= "->[$metapos]" if (defined $metapos);
1945
1946    $gsdl_cgi->generate_ok_message($mess);
1947    }
1948}
1949
1950
[23761]1951# Was trying to reused the codes, but the functions need to be broken
1952# down more before they can be reused, otherwise there will be too
1953# much overhead and duplicate process...
[21716]1954sub insert_metadata
1955{
1956    my $self = shift @_;
1957   
1958    my $username  = $self->{'username'};
1959    my $collect   = $self->{'collect'};
1960    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1961    my $gsdlhome  = $self->{'gsdlhome'};
[23400]1962    my $infodbtype = $self->{'infodbtype'};
1963   
[23761]1964    # If the import metadata and gdbm database have been updated, we
1965    # need to insert some notification to warn user that the the text
1966    # they see at the moment is not indexed and require a rebuild.
[21716]1967    my $rebuild_pending_macro = "_rebuildpendingmessage_";
1968
1969    if ($baseaction::authentication_enabled) {
1970    # Ensure the user is allowed to edit this collection
1971    $self->authenticate_user($username, $collect);
1972    }
1973
[23766]1974    # Obtain the collect and archive dir   
1975    my $site = $self->{'site'};
1976    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1977    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[21716]1978    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1979
1980    # Make sure the collection isn't locked by someone else
1981    $self->lock_collection($username, $collect);
1982   
1983    # Check additional args
1984    my $docid = $self->{'d'};
1985    if (!defined($docid)) {
1986    $gsdl_cgi->generate_error("No document id is specified: d=...");
1987    }
1988    my $metaname = $self->{'metaname'};
1989    if (!defined($metaname)) {
1990    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1991    }
1992    my $metavalue = $self->{'metavalue'};
1993    if (!defined($metavalue) || $metavalue eq "") {
1994    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
1995    }
1996    # make "accumulate" the default (less destructive, as won't actually
1997    # delete any existing values)
1998    my $metamode = "accumulate";
1999
2000    #=======================================================================#
2001    # set_import_metadata [START]
2002    #=======================================================================#
2003    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2004    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2005    my $metadata_xml_file;
[23400]2006    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2007    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[21716]2008   
2009    # This now stores the full pathname
2010    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2011   
2012    # figure out correct metadata.xml file [?]
2013    # Assuming the metadata.xml file is next to the source file
2014    # Note: This will not work if it is using the inherited metadata from the parent folder
2015    my ($import_tailname, $import_dirname)
2016    = File::Basename::fileparse($import_filename);
2017    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2018
2019    # Shane's escape characters
2020    $metavalue = pack "U0C*", unpack "C*", $metavalue;
2021    $metavalue =~ s/\,/&#44;/g;
2022    $metavalue =~ s/\:/&#58;/g;
2023    $metavalue =~ s/\|/&#124;/g;
2024    $metavalue =~ s/\(/&#40;/g;
2025    $metavalue =~ s/\)/&#41;/g;
2026    $metavalue =~ s/\[/&#91;/g;
2027    $metavalue =~ s/\\/&#92;/g;
2028    $metavalue =~ s/\]/&#93;/g;
2029    $metavalue =~ s/\{/&#123;/g;
2030    $metavalue =~ s/\}/&#125;/g;
2031    $metavalue =~ s/\"/&#34;/g;
2032    $metavalue =~ s/\`/&#96;/g;
2033    $metavalue =~ s/\n/_newline_/g;
2034
2035    # Edit the metadata.xml
2036    # Modified by Jeffrey from DL Consulting
2037    # Handle the case where there is one metadata.xml file for multiple FileSets
2038    # The XML filter needs to know whether it is in the right FileSet
2039    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2040    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2041    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2042                             $metaname, $metavalue, $metamode, $import_tailname);
2043    #=======================================================================#
2044    # set_import_metadata [END]
2045    #=======================================================================#
2046
2047
2048    #=======================================================================#
2049    # set_metadata (accumulate version) [START]
2050    #=======================================================================#
2051    # To people who know $collect_tail please add some comments
2052    # Obtain path to the database
2053    my $collect_tail = $collect;
2054    $collect_tail =~ s/^.*[\/\\]//;
2055    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]2056    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21716]2057
2058    # Read the docid entry
[23400]2059    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2060   
[21716]2061    foreach my $k (keys %$doc_rec) {
2062    my @escaped_v = ();
2063    foreach my $v (@{$doc_rec->{$k}}) {
2064        if ($k eq "contains") {
2065        # protect quotes in ".2;".3 etc
2066        $v =~ s/\"/\\\"/g;
2067        push(@escaped_v, $v);
2068        }
2069        else {
2070        my $ev = &ghtml::unescape_html($v);
2071        $ev =~ s/\"/\\\"/g;
2072        push(@escaped_v, $ev);
2073        }
2074    }
2075    $doc_rec->{$k} = \@escaped_v;
2076    }
2077
2078    # Protect the quotes
2079    $metavalue =~ s/\"/\\\"/g;
2080
2081    # Adds the pending macro
2082    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2083
2084    # If the metadata doesn't exist, create a new one
2085    if (!defined($doc_rec->{$metaname})){   
2086    $doc_rec->{$metaname} = [ $macro_metavalue ];
2087    }
2088    # Else, let's acculumate the values
2089    else {
2090        push(@{$doc_rec->{$metaname}},$macro_metavalue);
2091    }
2092
2093    # Generate the record string
2094    my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2095
2096    # Store it into GDBM
2097    my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2098    my $status = system($cmd);
2099    if ($status != 0) {
2100        # Catch error if gdbmget failed
2101    my $mess = "Failed to set metadata key: $docid\n";
2102   
2103    $mess .= "PATH: $ENV{'PATH'}\n";
2104    $mess .= "cmd = $cmd\n";
2105    $mess .= "Exit status: $status\n";
2106    $mess .= "System Error Message: $!\n";
2107
2108    $gsdl_cgi->generate_error($mess);
2109    }
2110    else {
2111    my $mess = "insert-metadata successful: Key[$docid]\n";
2112    $mess .= "  [In metadata.xml] $metaname";
2113    $mess .= " = $metavalue\n";
2114    $mess .= "  [In database] $metaname";
2115    $mess .= " = $macro_metavalue\n";
2116    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
2117        $gsdl_cgi->generate_ok_message($mess);
2118    }   
2119    #=======================================================================#
2120    # set_metadata (accumulate version) [END]
2121    #=======================================================================#
2122
2123    # Release the lock once it is done
2124    $self->unlock_collection($username, $collect);
2125}
2126
[19293]21271;
Note: See TracBrowser for help on using the browser.