root/main/trunk/greenstone2/perllib/cgiactions/modmetadataaction.pm @ 31602

Revision 31602, 94.5 KB (checked in by ak19, 2 years ago)

Follows Dr Bainbridge's suggestion to prevent URL based calls to set-metadata and remove-meta metadataserver.pl operations. Split metadataaction.pm into modmetadataaction.pm and metadataaction.pm, shifting the methods that modify metadata (set and remove subroutines) into the first. Now GS3 sets an env var that will control whether the meta-modifying subroutines will be available when called. If the env var is set, then metadataaction.pm will include the modmetadataaction.pm file in the begin block. For GS2, it works as before, always including the meta modifying subroutines. Tested on Linux with the GS3 web doc editor vs calling metadataserver.pl to set metadata directly from a URL.

Line 
1##########################################################################
2#
3# modmetadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This is a submodule of metadataaction.pm. It is loaded conditionally
27# upon having the permissions to modify (not just get) metadata.
28# With sufficient permissions, these additional subroutines should be made
29# available to metadataaction.pm
30
31# See http://www.perlmonks.org/?node_id=881761 for splitting module into multiple files
32# and how variables declared with 'our' are used there.
33
34# http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl
35# http://www.perlmonks.org/?node_id=881761 - splitting module into multiple files
36# http://www.perlmonks.org/?node_id=524456 - merging hashes
37
38package metadataaction;
39
40use strict;
41
42use cgiactions::baseaction;
43
44use dbutil;
45use ghtml;
46
47use JSON;
48
49
50BEGIN {
51    print STDERR "In ModMeta\n";
52}
53
54
55$metadataaction::modmeta_action_table = #OR: our $modmeta_action_table =
56{
57    #SET METHODS
58    "set-live-metadata" => {
59        'compulsory-args' => [ "d", "metaname", "metavalue" ],
60        'optional-args'   => [ ] },
61
62    "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
63        'compulsory-args' => [ "metaname", "metavalue" ],
64        'optional-args'   => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
65
66    "set-index-metadata" => {
67        'compulsory-args' => [ "d", "metaname", "metavalue" ],
68        'optional-args'   => [ "metapos", "metamode" ] },
69
70    "set-archives-metadata" => {
71        'compulsory-args' => [ "d", "metaname", "metavalue" ],
72        'optional-args'   => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
73   
74    "set-import-metadata" => {
75        'compulsory-args' => [ "metaname", "metavalue" ],
76        'optional-args'   => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
77                 
78    #SET METHODS (ARRAY)
79    "set-metadata-array" => {
80        'compulsory-args' => [ "where", "json" ],
81        'optional-args'   => [ ],
82        'help-string' => [
83        'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
84       
85        'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
86
87# The same examples rewritten for when running the metadata-server.pl script from the commandline:
88
89# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
90       
91# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
92                     
93    "set-archives-metadata-array" => {
94        'compulsory-args' => [ "json" ],
95        'optional-args'   => [ ] },
96       
97    "set-import-metadata-array" => {
98        'compulsory-args' => [ "json" ],
99        'optional-args'   => [ ] },
100
101    "set-index-metadata-array" => {
102        'compulsory-args' => [ "json" ],
103        'optional-args'   => [ ] },
104   
105    "set-live-metadata-array" => {
106        'compulsory-args' => [ "json" ],
107        'optional-args'   => [ ] },
108       
109    #REMOVE METHODS
110    "remove-import-metadata" => {
111        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
112        'optional-args'   => [ "metapos", "metavalue", "metamode" ] }, # only provide metapos arg for SIMPLE collections.
113# Metavalue is now an optional arg for remove_import_metadata() based on what the implementation did, which allowed metavalue to be undefined, and if so, used metapos.
114                     
115    "remove-archives-metadata" => {
116        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
117        'optional-args'   => [ "metapos", "metavalue", "metamode" ] },
118
119    "remove-live-metadata" => {
120        'compulsory-args' => [ "d", "metaname" ],
121        'optional-args'   => [ ] },
122
123    "remove-index-metadata" => {
124        'compulsory-args' => [ "d", "metaname" ],
125        'optional-args'   => [ "metapos", "metavalue" ] },
126
127    "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
128        'compulsory-args' => [ "d", "metaname" ],
129        'optional-args'   => [ "where", "metapos", "metavalue", "metamode" ] }, # metamode is optional since remove-metadata can call any of remove_import_meta and remove_archives_meta, remove_index_meta, of which the first two accept metamode as an optional param
130
131    #INSERT METHODS
132    "insert-metadata" => {
133        'compulsory-args' => [ "d", "metaname", "metavalue" ],
134        'optional-args'   => [ ] }
135};
136
137print STDERR "\nMODMETA_TABLE: " .join(",", keys $metadataaction::modmeta_action_table) ."\n";
138
139
140sub _set_live_metadata
141{
142    my $self = shift @_;
143
144    my $collect   = $self->{'collect'};
145    my $gsdl_cgi  = $self->{'gsdl_cgi'};
146    #my $gsdlhome  = $self->{'gsdlhome'};
147    my $infodbtype = $self->{'infodbtype'};
148
149    # Obtain the collect dir
150    my $site = $self->{'site'};
151    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
152    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
153
154
155    # look up additional args
156    my $docid     = $self->{'d'};
157    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
158      $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
159    }
160    my $metavalue = $self->{'metavalue'};
161
162    # Generate the dbkey   
163    my $metaname  = $self->{'metaname'};
164    my $dbkey = "$docid.$metaname";
165
166    # To people who know $collect_tail please add some comments
167    # Obtain path to the database
168    my $collect_tail = $collect;
169    $collect_tail =~ s/^.*[\/|\\]//;
170    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
171    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
172
173    # Set the new value
174    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
175    my $status = system($cmd);
176    if ($status != 0) {
177        # Catch error if gdbmget failed
178    my $mess = "Failed to set metadata key: $dbkey\n";
179
180    $mess .= "PATH: $ENV{'PATH'}\n";
181    $mess .= "cmd = $cmd\n";
182    $mess .= "Exit status: $status\n";
183    $mess .= "System Error Message: $!\n";
184
185    $gsdl_cgi->generate_error($mess);
186    }
187    else {
188    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
189    }
190
191    #return $status; # in case calling functions have any further use for this
192}
193
194sub set_live_metadata
195{
196    my $self = shift @_;
197
198    my $username  = $self->{'username'};
199    my $collect   = $self->{'collect'};
200    my $gsdl_cgi  = $self->{'gsdl_cgi'};
201 
202    if ($baseaction::authentication_enabled) {
203    # Ensure the user is allowed to edit this collection   
204    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
205    }
206
207    # Make sure the collection isn't locked by someone else
208    $self->lock_collection($username, $collect);
209
210    $self->_set_live_metadata(@_);
211
212    # Release the lock once it is done
213    $self->unlock_collection($username, $collect);
214}
215
216sub set_index_metadata_entry
217{
218    print STDERR "1\n";
219    my $self = shift @_;
220    my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
221   
222    # To people who know $collect_tail please add some comments
223    # Obtain path to the database
224    my $collect_tail = $collect;
225    $collect_tail =~ s/^.*[\/|\\]//;
226    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
227    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
228   
229    print STDERR "2\n";
230#   print STDERR "**** infodb file path = $infodb_file_path\n";
231#   print STDERR "***** infodb type = $infodbtype\n";
232   
233    # Read the docid entry
234    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
235   
236    # Set the metadata value
237    if (defined $metapos) {
238    print STDERR "3\n";
239    # if metamode=accumulate AND metapos, warn user and then use metapos
240    if (defined $metamode && $metamode eq "accumulate") {
241        print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
242        print STDERR "**** Proceeding by using metapos\n";
243    }
244    $doc_rec->{$metaname}->[$metapos] = $metavalue;
245    }
246    elsif (defined $prevmetavalue) {
247    print STDERR "4\n";
248        my $array = $doc_rec->{$metaname};
249        my $length = @$array;
250
251        my $found = 0;
252        for (my $i = 0; $i < $length; $i++){
253            if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
254                $doc_rec->{$metaname}->[$i] = $metavalue;
255                $found = 1;
256                last;               
257            }
258        }
259
260        if($found == 0){
261            $doc_rec->{$metaname} = [ $metavalue ];
262        }
263    }
264    elsif (defined $metamode && $metamode eq "override") {
265    print STDERR "5\n";
266    $doc_rec->{$metaname} = [ $metavalue ];
267    }
268    else { # default for index was to override, but because accumulate is less destructive,
269    # and because accumulate is the default for archives and import, that's the new default for index too
270    print STDERR "6\n";
271    if(defined $doc_rec->{$metaname}) {
272        push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
273    } else {
274        $doc_rec->{$metaname} = [ $metavalue ];
275    }
276    }
277    print STDERR "6\n";
278 
279    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
280   
281    return $status;
282   
283}
284
285sub _set_import_metadata
286{
287    my $self = shift @_;
288
289    my $collect   = $self->{'collect'};
290    my $gsdl_cgi  = $self->{'gsdl_cgi'};
291    my $infodbtype = $self->{'infodbtype'};
292#    my $gsdlhome  = $self->{'gsdlhome'};
293     
294    # Obtain the collect and archive dir   
295    my $site = $self->{'site'};
296    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
297    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
298    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
299   
300    # look up additional args
301    # want either d= or f=
302    my $docid  = $self->{'d'};
303    my ($docid_root,$docid_secnum);
304    if(defined $docid) {   
305    ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
306    # as yet no support for setting subsection metadata in metadata.xml
307    if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
308        $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
309        return;
310    }
311    }
312
313    my $import_file  = $self->{'f'};
314    if ((!defined $docid || $docid =~ m/^\s*$/) && (!defined $import_file || $import_file =~ m/^\s*$/)) {
315    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified."); # at least d or f must be specified
316    }
317
318    # Get the parameters and set default mode to "accumulate"
319    my $metaname   = $self->{'metaname'};
320    my $metavalue  = $self->{'metavalue'};
321##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
322    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
323   
324    my $metamode   = $self->{'metamode'};
325    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
326    # make "accumulate" the default (less destructive, as it won't actually
327    # delete any existing values)
328    $metamode = "accumulate";
329    }
330
331    # adding metapos and prevmetavalue support to import_metadata subroutines
332    my $metapos   = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
333    my $prevmetavalue = $self->{'prevmetavalue'};
334    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
335    $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
336
337    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
338    my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
339
340    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
341    $mess .= "  $metaname";
342    $mess .= " = $metavalue";
343    $mess .= " ($metamode)\n";
344
345    $gsdl_cgi->generate_ok_message($mess);
346
347    #return $status; # in case calling functions have any further use for this
348}
349
350# the version of set_index_meta that doesn't do authentication
351sub _set_archives_metadata
352{
353    my $self = shift @_;
354
355    my $collect   = $self->{'collect'};
356    my $gsdl_cgi  = $self->{'gsdl_cgi'};
357    my $infodbtype = $self->{'infodbtype'};
358   
359    # Obtain the collect and archive dir   
360    my $site = $self->{'site'};
361    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
362    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
363
364    # look up additional args
365    my $docid  = $self->{'d'};
366    my $metaname   = $self->{'metaname'};
367    my $metavalue  = $self->{'metavalue'};
368    my $prevmetavalue = $self->{'prevmetavalue'};
369   
370    my $metapos    = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
371                                  # Don't append "|| undef", since if metapos=0 it will then be set to undef
372
373    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
374    $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
375
376    my $metamode   = $self->{'metamode'};
377    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
378    # make "accumulate" the default (less destructive, as it won't actually
379    # delete any existing values)
380    $metamode = "accumulate";
381    }
382
383    my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
384                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
385   
386    if ($status == 0) {
387    my $mess = "set-archives-metadata successful: Key[$docid]\n";
388    $mess .= "  $metaname";
389    $mess .= "->[$metapos]" if (defined $metapos);
390    $mess .= " = $metavalue";
391    $mess .= " ($metamode)\n";
392   
393    $gsdl_cgi->generate_ok_message($mess); 
394    }
395    else {
396    my $mess .= "Failed to set archives metadata key: $docid\n";
397    $mess .= "Exit status: $status\n";
398    if(defined $self->{'error_msg'}) {
399        $mess .= "Error Message: $self->{'error_msg'}\n";
400    } else {
401        $mess .= "System Error Message: $!\n";
402    }
403    $mess .= "-" x 20 . "\n";
404   
405    $gsdl_cgi->generate_error($mess);
406    }
407
408    #return $status; # in case calling functions have any further use for this
409}
410
411
412# the version of set_index_meta that doesn't do authentication
413sub _set_index_metadata
414{
415    print STDERR "START SET INDEX METADATA\n";
416    my $self = shift @_;
417
418    my $collect   = $self->{'collect'};
419    my $gsdl_cgi  = $self->{'gsdl_cgi'};
420
421    my $site = $self->{'site'};
422    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
423
424    # look up additional args
425    my $docid     = $self->{'d'};
426    my $metaname  = $self->{'metaname'};
427    my $metapos   = $self->{'metapos'}; # undef has meaning
428    my $metavalue = $self->{'metavalue'};
429    my $infodbtype = $self->{'infodbtype'};
430    my $metamode  = $self->{'metamode'};
431    my $prevmetavalue = $self->{'prevmetavalue'};
432
433    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
434    $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
435
436    print STDERR "SETTING INDEX METADATA ENTRY\n";
437    my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
438    print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
439    if ($status != 0) {
440        # Catch error if set infodb entry failed
441    my $mess = "Failed to set metadata key: $docid\n";
442   
443    $mess .= "PATH: $ENV{'PATH'}\n";
444    $mess .= "Exit status: $status\n";
445    $mess .= "System Error Message: $!\n";
446   
447    $gsdl_cgi->generate_error($mess);
448    }
449    else {
450    my $mess = "set-index-metadata successful: Key[$docid]\n";
451    $mess .= "  $metaname";
452    $mess .= "->[$metapos]" if (defined $metapos);
453    $mess .= " = $metavalue\n";
454   
455    $gsdl_cgi->generate_ok_message($mess);
456    }
457
458    print STDERR "END SET INDEX METADATA\n";
459    #return $status; # in case calling functions have any further use for this
460}
461
462sub set_index_metadata
463{
464    my $self = shift @_;
465
466    my $username  = $self->{'username'};
467    my $collect   = $self->{'collect'};
468    my $gsdl_cgi  = $self->{'gsdl_cgi'};
469    #my $gsdlhome  = $self->{'gsdlhome'};
470
471    if ($baseaction::authentication_enabled) {
472    # Ensure the user is allowed to edit this collection   
473    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
474    }
475
476    my $site = $self->{'site'};
477    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
478   
479    $gsdl_cgi->checked_chdir($collect_dir);
480
481    # Obtain the collect dir
482    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
483
484    # Make sure the collection isn't locked by someone else
485    $self->lock_collection($username, $collect);
486
487    $self->_set_index_metadata(@_);
488   
489    # Release the lock once it is done
490    $self->unlock_collection($username, $collect);
491}
492
493# call this to set the metadata for a combination of dirs archives, import or index, or live
494# if none specified, defaults to index which was the original behaviour of set_metadata.
495sub set_metadata
496{
497    my $self = shift @_;
498
499    # Testing that not defining a variable, setting it to "" or to "  " all return false
500    # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
501
502    my $where = $self->{'where'};
503    if(!$where || ($where =~ m/^\s*$/)) {   
504    $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
505    return;
506    }
507
508    # authenticate and lock collection once, even if processing multiple dirs
509    my $username  = $self->{'username'};
510    my $collect   = $self->{'collect'};
511    my $gsdl_cgi  = $self->{'gsdl_cgi'};
512   
513    if ($baseaction::authentication_enabled) {
514    # Ensure the user is allowed to edit this collection   
515    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
516    }
517   
518    if($where =~ m/index/) {
519    my $site = $self->{'site'};
520    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
521    $gsdl_cgi->checked_chdir($collect_dir);
522    }
523
524    # Make sure the collection isn't locked by someone else
525    $self->lock_collection($username, $collect);
526
527
528    # now at last can set the metadata. $where can specify multiple
529    # $where is of the form: import|archives|index, or a subset thereof
530
531    #my @whichdirs = split('\|', $where);
532
533    # just check whether $where contains import/archives/index/live in turn, and
534    # for each case, process it accordingly
535    if($where =~ m/import/) {
536    $self->_set_import_metadata(@_);       
537    }
538
539    if($where =~ m/archives/) {
540
541    # look up docID arg which is optional to set_metadata because it's optional
542    # to set_import, but which is compulsory to set_archives_metadata
543    my $docid     = $self->{'d'};
544    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
545        $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
546    }
547    # we have a docid, so can set archives meta
548    $self->_set_archives_metadata(@_); 
549    }
550
551    if($where =~ m/index/) {
552   
553    # look up docID arg which is optional to set_metadata because it's optional
554    # to set_import, but which is compulsory to set_archives_metadata
555    my $docid     = $self->{'d'};
556    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
557        $gsdl_cgi->generate_error("No docid (d=...) specified.");
558    }
559    # we have a docid, so can set index meta
560    $self->_set_index_metadata(@_);
561    }   
562
563    if($where =~ m/live/) {
564    $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
565    }
566
567    # Release the lock once it is done
568    $self->unlock_collection($username, $collect);
569}
570
571sub set_metadata_array
572{
573    my $self = shift @_;
574
575    my $where = $self->{'where'};
576    if(!$where || ($where =~ m/^\s*$/)) {   
577    $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
578    return;
579    }
580
581    my $username  = $self->{'username'};
582    my $collect   = $self->{'collect'};
583    my $gsdl_cgi  = $self->{'gsdl_cgi'};
584
585    if ($baseaction::authentication_enabled) {
586    # Ensure the user is allowed to edit this collection
587    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
588    }
589
590    # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
591    # But including the stmt during this code reorganisation to preserve as-is what used to happen
592    my $site = $self->{'site'};
593    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
594    $gsdl_cgi->checked_chdir($collect_dir);
595
596    # Make sure the collection isn't locked by someone else
597    $self->lock_collection($username, $collect);
598
599    if($where =~ m/import/) {
600    $self->_set_import_metadata_array(@_);
601    }
602    if($where =~ m/archives/) {
603    $self->_set_archives_metadata_array(@_);
604    }
605    if($where =~ m/index/) {
606    $self->_set_index_metadata_array(@_);
607    }
608    if($where =~ m/live/) {
609        $self->_set_live_metadata_array(@_);
610    }
611
612    # Release the lock once it is done
613    $self->unlock_collection($username, $collect);
614}
615
616sub _set_index_metadata_array
617{
618    my $self = shift @_;
619
620    my $collect   = $self->{'collect'};
621    my $gsdl_cgi  = $self->{'gsdl_cgi'};
622    my $site = $self->{'site'};
623    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
624
625   
626    # look up additional args
627   
628    my $infodbtype = $self->{'infodbtype'};
629   
630    my $json_str      = $self->{'json'};
631    my $doc_array = decode_json $json_str;
632   
633   
634    my $global_status = 0;
635    my $global_mess = "";
636   
637    my @all_docids = ();
638   
639    foreach my $doc_array_rec ( @$doc_array ) {
640   
641    my $status = -1;
642    my $docid     = $doc_array_rec->{'docid'};
643   
644    push(@all_docids,$docid);
645
646    my $metaname  = $doc_array_rec->{'metaname'};
647    if(defined $metaname) {
648        my $metapos   = $doc_array_rec->{'metapos'}; # can legitimately be undef
649        my $metavalue = $doc_array_rec->{'metavalue'};
650        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
651
652        $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
653    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
654        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
655       
656        foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
657        $metaname  = $metatable_rec->{'metaname'};
658        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
659        my $metapos = undef;
660        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
661
662        foreach my $metavalue ( @$metavals ) { # metavals is an array
663            $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
664            if($metamode eq "override") { # now, having overridden the metavalue for the first,
665            # need to accumulate subsequent metavals for this metaname, else the just-assigned
666            # metavalue for this metaname will be lost
667            $metamode = "accumulate";
668            }
669        }           
670        }
671    }
672   
673    if ($status != 0) {
674        # Catch error if set infodb entry failed
675        $global_status = $status;
676        $global_mess .= "Failed to set metadata key: $docid\n";
677        $global_mess .= "Exit status: $status\n";
678        $global_mess .= "System Error Message: $!\n";
679        $global_mess .= "-" x 20;
680    }
681    }
682   
683    if ($global_status != 0) {
684    $global_mess .= "PATH: $ENV{'PATH'}\n";
685    $gsdl_cgi->generate_error($global_mess);
686    }
687    else {
688    my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
689    $gsdl_cgi->generate_ok_message($mess);
690    }
691}
692
693sub set_index_metadata_array
694{
695    my $self = shift @_;
696
697    my $username  = $self->{'username'};
698    my $collect   = $self->{'collect'};
699    my $gsdl_cgi  = $self->{'gsdl_cgi'};
700#    my $gsdlhome  = $self->{'gsdlhome'};
701
702    if ($baseaction::authentication_enabled) {
703    # Ensure the user is allowed to edit this collection   
704    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
705    }
706
707    my $site = $self->{'site'};
708    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
709   
710    $gsdl_cgi->checked_chdir($collect_dir);
711
712    # Obtain the collect dir
713    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
714
715    # Make sure the collection isn't locked by someone else
716    $self->lock_collection($username, $collect);
717
718    $self->_set_index_metadata_array(@_);
719
720    # Release the lock once it is done
721    $self->unlock_collection($username, $collect);
722}
723
724# experimental, newly added in and untested
725sub _set_live_metadata_array
726{
727    my $self = shift @_;
728
729    my $collect   = $self->{'collect'};
730    my $gsdl_cgi  = $self->{'gsdl_cgi'};
731
732    my $site = $self->{'site'};
733    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
734
735   
736    # look up additional args
737    my $infodbtype = $self->{'infodbtype'};
738    # To people who know $collect_tail please add some comments
739    # Obtain path to the database
740    my $collect_tail = $collect;
741    $collect_tail =~ s/^.*[\/|\\]//;
742    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
743    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
744
745   
746    my $json_str      = $self->{'json'};
747    my $doc_array = decode_json $json_str;
748   
749   
750    my $global_status = 0;
751    my $global_mess = "";
752   
753    my @all_docids = ();
754
755
756    foreach my $doc_array_rec ( @$doc_array ) {
757   
758    my $status = -1;
759    my $docid     = $doc_array_rec->{'docid'};
760
761    push(@all_docids,$docid);
762
763    my $metaname  = $doc_array_rec->{'metaname'};
764    if(defined $metaname) {
765        my $dbkey = "$docid.$metaname";
766        my $metavalue = $doc_array_rec->{'metavalue'};
767
768        # Set the new value
769        my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
770        $status = system($cmd);
771
772    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
773        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
774        foreach my $metatable_rec ( @$metatable ) {
775        $metaname  = $metatable_rec->{'metaname'};
776        my $dbkey = "$docid.$metaname";
777
778        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
779        foreach my $metavalue ( @$metavals ) {
780             my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
781             $status = system($cmd);
782        }
783        }
784       
785    }
786
787    if ($status != 0) {
788        # Catch error if gdbmget failed
789        $global_status = $status;
790        $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
791        $global_mess .= "Exit status: $status\n";
792        $global_mess .= "System Error Message: $!\n";
793        $global_mess .= "-" x 20;
794    }
795    }
796   
797    if ($global_status != 0) {
798    $global_mess .= "PATH: $ENV{'PATH'}\n";
799    $gsdl_cgi->generate_error($global_mess);
800    }
801    else {
802    my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
803    $gsdl_cgi->generate_ok_message($mess);
804    }
805}
806
807sub set_live_metadata_array
808{
809    my $self = shift @_;
810
811    my $username  = $self->{'username'};
812    my $collect   = $self->{'collect'};
813    my $gsdl_cgi  = $self->{'gsdl_cgi'};
814
815    if ($baseaction::authentication_enabled) {
816    # Ensure the user is allowed to edit this collection   
817    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
818    }
819
820    my $site = $self->{'site'};
821    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
822   
823    $gsdl_cgi->checked_chdir($collect_dir);
824
825    # Make sure the collection isn't locked by someone else
826    $self->lock_collection($username, $collect);
827
828    $self->_set_live_metadata_array(@_);
829
830    # Release the lock once it is done
831    $self->unlock_collection($username, $collect);
832}
833
834
835sub dxml_metadata
836{
837    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
838    my $metaname = $parser->{'parameters'}->{'metaname'};
839    my $metamode = $parser->{'parameters'}->{'metamode'};
840   
841###!!!    print STDERR "**** Processing closing </Metadata> tag\n";
842   
843    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
844   
845    # Find the right metadata tag and checks if we are going to
846    # override it
847    #
848    # Note: This over writes the first metadata block it
849    # encountered. If there are multiple Sections in the doc.xml, it
850    # might not behave as you would expect
851
852    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
853##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
854##    print STDERR "**** metamode = $metamode\n";
855   
856    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
857    {
858        my $name_attr = $attrHash->{'name'};
859        # print STDOUT "*** testing: $name_attr eq $metaname ?   and  $metamode eq override ?\n";
860        if (($name_attr eq $metaname) && ($metamode eq "override"))
861        {
862            if (!defined $parser->{'parameters'}->{'poscount'})
863            {
864                $parser->{'parameters'}->{'poscount'} = 0;
865            }
866            else
867            {
868                $parser->{'parameters'}->{'poscount'}++;
869            }
870           
871            if ((defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
872                || (!defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == 0))
873               
874            {
875                ##print STDERR "#### got match!!\n";
876                # Get the value and override the current value
877                my $metavalue = $parser->{'parameters'}->{'metavalue'};
878                $attrHash->{'_content'} = $metavalue;
879               
880                # Don't want it to wipe out any other pieces of metadata
881                $parser->{'parameters'}->{'metamode'} = "done";
882            }
883            elsif (defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
884            {
885                my $metavalue = $parser->{'parameters'}->{'metavalue'};
886                $attrHash->{'_content'} = $metavalue;
887                $parser->{'parameters'}->{'metamode'} = "done";
888            }
889        }
890    }
891
892    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
893    return [$tagname => $attrHash];
894}
895
896# This method exists purely for catching invalid section numbers that the client
897# requested to edit. Once the parser has reached the end (the final </Archive> tag),
898# we've seen all the Sections in the doc.xml, and none of their section nums matched
899# if the metamode has not been set to 'done' by then.
900sub dxml_archive
901{
902    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
903    my $metamode = $parser->{'parameters'}->{'metamode'};
904   
905    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
906    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
907   
908#    print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
909   
910    if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
911    print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
912
913    my $metaname = $parser->{'parameters'}->{'metaname'};
914    my $metavalue = $parser->{'parameters'}->{'metavalue'};
915   
916    print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
917    print STDERR "\t(last seen section number in document was $curr_secnum)\n";
918    print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
919    print STDERR "\tin section $opt_doc_secnum.\n";
920    $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
921    }
922   
923    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
924    return [$tagname => $attrHash];
925}
926
927sub dxml_description
928{
929    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
930    my $metamode = $parser->{'parameters'}->{'metamode'};
931
932    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
933    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
934   
935###!!!  print STDERR "**** Processing a closing </Description> tag \n";
936#   print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
937   
938    # Accumulate the metadata
939
940    # We'll be accumulating metadata at this point if we haven't found and therefore
941    # haven't processed the metadata yet.
942    # For subsections, this means that if we're at a matching subsection, but haven't
943    # found the correct metaname to override in that subsection, we accumulate it as new
944    # meta in the subsection by adding it to the current description.
945    # If there's no subsection info for the metadata, it will accumulate at the top level
946    # section description if we hadn't found a matching metaname to override at this point.
947
948    # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
949    # at the toplevel section. In the latter case, it means we want to process meta in the
950    # toplevel section. So the eq check between the values below will work in all cases.
951   
952    # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
953    # this document has been provided. In that case, the metadata for that opt_doc_secnum
954    # won't get attached/accumulated to any part of the doc, not even its top-level section.
955
956    if ($curr_secnum eq $opt_doc_secnum
957        && ($metamode eq "accumulate" || $metamode eq "override")) {
958        if ($metamode eq "override") {
959        print "Got to end of <Description> block. No metadata value to override.  Switching 'metamode' to accumulate\n";
960        }
961
962        # If we get to here and metamode is override, this means there
963        # was no existing value to overide => treat as an append operation
964       
965        # Tack a new metadata tag on to the end of the <Metadata>+ block
966        my $metaname = $parser->{'parameters'}->{'metaname'};
967        my $metavalue = $parser->{'parameters'}->{'metavalue'};
968       
969        my $metadata_attr = {
970        '_content' => $metavalue,
971        'name' => $metaname,
972        'mode' => "accumulate"
973        };
974       
975        my $append_metadata = [ "Metadata" => $metadata_attr ];
976        my $description_content = $attrHash->{'_content'};
977       
978        print "Appending metadata to doc.xml\n";
979       
980        if (ref($description_content)) {
981        # got some existing interesting nested content
982        push(@$description_content, "    ", $append_metadata ,"\n        ");
983        }
984        else {
985        #description_content is most likely a string such as "\n"
986        $attrHash->{'_content'} = [$description_content, "    ", $append_metadata ,"\n" ];
987        }
988       
989        $parser->{'parameters'}->{'metamode'} = "done";
990    }       
991    else {
992        # metamode most likely "done" signifying that it has already found a position to add the metadata to.
993##      print STDERR "**** NOT ACCUMULATE?!? \n";
994    }
995
996    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
997    return [$tagname => $attrHash];
998}
999
1000
1001sub dxml_start_section
1002{
1003    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1004
1005    my $new_depth = scalar(@$contextArray);
1006
1007#   print STDERR "**** START SECTION \n";
1008   
1009    if ($new_depth == 1) {
1010    $parser->{'parameters'}->{'curr_section_depth'} = 1;
1011    $parser->{'parameters'}->{'curr_section_num'}   = "";
1012    }
1013
1014    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
1015    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1016
1017    my $new_secnum;
1018
1019    if ($new_depth > $old_depth) {
1020    # child subsection
1021    $new_secnum = "$old_secnum.1";
1022    }
1023    elsif ($new_depth == $old_depth) {
1024    # sibling section => increase it's value by 1
1025    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1026    $tail_num++;
1027    $new_secnum = $old_secnum;
1028    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1029    }
1030    else {
1031    # back up to parent section => lopp off tail
1032    $new_secnum = $old_secnum;
1033    $new_secnum =~ s/\.\d+$//;
1034    }
1035
1036    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1037    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
1038   
1039    1;
1040}
1041
1042sub edit_xml_file
1043{
1044    my $self = shift @_;
1045    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1046
1047    # use XML::Rules to add it in (read in and out again)
1048    my $parser = XML::Rules->new(start_rules     => $start_rules,
1049                 rules           => $rules,
1050                 style           => 'filter',
1051                                 output_encoding => 'utf8' );
1052
1053    my $xml_in = "";
1054    if (!open(MIN,"<$filename")) {
1055    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1056    }
1057    else {
1058        # Read all the text in
1059    my $line;
1060    while (defined ($line=<MIN>)) {
1061        $xml_in .= $line;
1062    }
1063    close(MIN);
1064   
1065    my $MOUT;   
1066    if (!open($MOUT,">$filename")) {
1067        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1068    }
1069    else {
1070        # Matched lines will get handled by the call backs
1071##      my $xml_out = "";
1072
1073        binmode($MOUT,":utf8");
1074        $parser->filter($xml_in,$MOUT, $options);
1075
1076#       binmode(MOUT,":utf8");
1077#       print MOUT $xml_out;
1078        close($MOUT);       
1079    }
1080    }
1081
1082    # copy across any custom error information that was stored during parsing
1083    $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});   
1084}
1085
1086sub edit_doc_xml
1087{
1088    my $self = shift @_;
1089    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1090
1091    my $info_mess = <<RAWEND;
1092****************************
1093  edit_doc_xml()
1094****************************
1095RAWEND
1096
1097    $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1098    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
1099    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
1100    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
1101    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
1102    $info_mess .= " opt_secnum  = $opt_secnum\n"  if defined($opt_secnum);
1103    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1104     
1105    $info_mess .= "****************************\n";
1106
1107    $gsdl_cgi->generate_message($info_mess);
1108   
1109    # To monitor which section/subsection number we are in
1110    my @start_rules =
1111    ( 'Section'    => \&dxml_start_section );
1112
1113    # use XML::Rules to add it in (read in and out again)
1114    # Set the call back functions
1115    my @rules =
1116    ( _default => 'raw',
1117      'Metadata'    => \&dxml_metadata,
1118      'Description' => \&dxml_description,
1119      'Archive'     => \&dxml_archive); # just for catching errors at end
1120     
1121    # Sets the parameters
1122    my $options = { 'metaname'  => $metaname,
1123            'metapos'   => $metapos,
1124            'metavalue' => $metavalue,
1125            'metamode'  => $metamode,
1126            'prevmetavalue' => $prevmetavalue };
1127           
1128    if (defined $opt_secnum) {
1129    $options->{'secnum'} = $opt_secnum;
1130    }
1131
1132    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1133}
1134
1135sub set_archives_metadata_entry
1136{
1137    my $self = shift @_;
1138    my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1139
1140    my $info_mess = <<RAWEND;
1141****************************
1142  set_archives_metadata_entry()
1143****************************
1144RAWEND
1145
1146    $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1147    $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1148    $info_mess .= " collect     = $collect\n"     if defined($collect);
1149    $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype);
1150    $info_mess .= " docid       = $docid\n"       if defined($docid);
1151    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
1152    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
1153    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
1154    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
1155    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1156     
1157    $info_mess .= "****************************\n";
1158
1159    $gsdl_cgi->generate_message($info_mess);
1160   
1161    # Obtain the doc.xml path for the specified docID
1162    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1163
1164    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1165    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1166    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1167   
1168    # The $doc_xml_file is relative to the archives, and now let's get the full path
1169    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
1170    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1171
1172    # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
1173    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1174    # Thereafter, we will finally be setting the overriding metavalue for this metaname
1175    if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
1176    # remove all values of $metaname metadata
1177    $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
1178    }
1179    # Edit the doc.xml file with the specified metadata name, value and position.
1180    # TODO: there is a potential problem here as this edit_doc_xml function
1181    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1182    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1183   
1184    # dxml_metadata method ignores metapos if metamode anything other than override
1185    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1186            $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1187
1188    # return 0; # return 0 for now to indicate no error
1189    return (defined $self->{'error_msg'}) ? 1 : 0;
1190}
1191
1192
1193sub set_archives_metadata
1194{
1195    my $self = shift @_;
1196
1197    my $username  = $self->{'username'};
1198    my $collect   = $self->{'collect'};
1199    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1200   
1201    if ($baseaction::authentication_enabled) {
1202    # Ensure the user is allowed to edit this collection
1203    $self->authenticate_user($username, $collect);
1204    }
1205
1206    # Make sure the collection isn't locked by someone else
1207    $self->lock_collection($username, $collect);
1208
1209    $self->_set_archives_metadata(@_);
1210
1211    # Release the lock once it is done
1212    $self->unlock_collection($username, $collect);
1213}
1214
1215sub _set_archives_metadata_array
1216{
1217    my $self = shift @_;
1218   
1219    my $collect   = $self->{'collect'};
1220    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1221    my $site = $self->{'site'};
1222    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1223
1224    # look up additional args
1225   
1226    my $infodbtype = $self->{'infodbtype'};
1227   
1228    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1229   
1230    my $json_str      = $self->{'json'};
1231    my $doc_array = decode_json $json_str;
1232   
1233   
1234    my $global_status = 0;
1235    my $global_mess = "";
1236   
1237    my @all_docids = ();
1238   
1239    foreach my $doc_array_rec ( @$doc_array ) {
1240    my $status    = -1;
1241    my $docid     = $doc_array_rec->{'docid'};
1242
1243    push(@all_docids,$docid);
1244   
1245    my $metaname  = $doc_array_rec->{'metaname'};
1246    if(defined $metaname) {
1247       
1248        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1249
1250        my $metamode  = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1251        my $metavalue = $doc_array_rec->{'metavalue'};
1252        my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1253       
1254       
1255        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1256        # make "accumulate" the default (less destructive, as it won't actually
1257        # delete any existing values)
1258        $metamode = "accumulate";
1259        }       
1260       
1261        $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1262                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1263    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1264        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1265       
1266        foreach my $metatable_rec ( @$metatable ) {
1267        $metaname  = $metatable_rec->{'metaname'};
1268        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1269        my $metapos = undef;
1270        my $prevmetavalue = undef;
1271        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1272       
1273        foreach my $metavalue ( @$metavals ) {
1274            $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1275                                 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1276           
1277            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1278            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1279            # metavalue for this metaname will be lost
1280            $metamode = "accumulate";
1281            }
1282        }           
1283        }       
1284    }
1285       
1286    if ($status != 0) {
1287        # Catch error if set infodb entry failed
1288        $global_status = $status;
1289        $global_mess .= "Failed to set metadata key: $docid\n";
1290        $global_mess .= "Exit status: $status\n";
1291        $global_mess .= "System Error Message: $!\n";
1292        $global_mess .= "-" x 20 . "\n";
1293    }
1294    }
1295   
1296    if ($global_status != 0) {
1297    $global_mess .= "PATH: $ENV{'PATH'}\n";
1298    $gsdl_cgi->generate_error($global_mess);
1299    }
1300    else {
1301    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1302    $gsdl_cgi->generate_ok_message($mess);
1303    }
1304}
1305
1306sub set_archives_metadata_array
1307{
1308    my $self = shift @_;
1309
1310    my $username  = $self->{'username'};
1311    my $collect   = $self->{'collect'};
1312    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1313#    my $gsdlhome  = $self->{'gsdlhome'};
1314
1315    if ($baseaction::authentication_enabled) {
1316    # Ensure the user is allowed to edit this collection   
1317    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1318    }
1319
1320    my $site = $self->{'site'};
1321    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1322   
1323    $gsdl_cgi->checked_chdir($collect_dir);
1324
1325    # Obtain the collect dir
1326    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1327
1328    # Make sure the collection isn't locked by someone else
1329    $self->lock_collection($username, $collect);
1330
1331    $self->_set_archives_metadata_array(@_);
1332   
1333    # Release the lock once it is done
1334    $self->unlock_collection($username, $collect);
1335}
1336
1337sub _remove_archives_metadata
1338{
1339    my $self = shift @_;
1340
1341    my $collect   = $self->{'collect'};
1342    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1343#   my $gsdlhome  = $self->{'gsdlhome'};
1344    my $infodbtype = $self->{'infodbtype'};
1345   
1346    my $site = $self->{'site'};
1347       
1348    # Obtain the collect and archive dir   
1349    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1350   
1351    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1352
1353    # look up additional args
1354    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1355   
1356    my $metaname = $self->{'metaname'};
1357    my $metapos = $self->{'metapos'};
1358    my $metavalue = $self->{'metavalue'};
1359
1360    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1361    $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
1362
1363    # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1364    # default to deleting the first metavalue for the given metaname
1365    # Beware that if both metapos AND metavalue are defined, both matches (if any)
1366    # seem to get deleted in one single remove_archives_meta action invocation.
1367    # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1368    # deleted, both get deleted.
1369    if(!defined $metapos && !defined $metavalue) {
1370        $metapos = 0;
1371    }
1372
1373    my $metamode = $self->{'metamode'};
1374    $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
1375
1376    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1377    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1378
1379    # This now stores the full pathname
1380    my $doc_filename = $doc_rec->{'doc-file'}->[0];
1381
1382    my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1383#   my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1384   
1385    if ($status == 0)
1386    {
1387        my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
1388        $mess .= "  $metaname";
1389        $mess .= "->[$metapos]" if (defined $metapos);
1390        $mess .= " ($metavalue)" if (defined $metavalue);
1391        $gsdl_cgi->generate_ok_message($mess); 
1392    }
1393    else
1394    {
1395        my $mess .= "Failed to remove archives metadata key: $docid\n";
1396        $mess .= "Exit status: $status\n";
1397        $mess .= "System Error Message: $!\n";
1398        $mess .= "-" x 20 . "\n";
1399       
1400        $gsdl_cgi->generate_error($mess);
1401    }
1402   
1403    #return $status; # in case calling functions have a use for this
1404}
1405
1406sub remove_archives_metadata
1407{
1408    my $self = shift @_;
1409
1410    my $username  = $self->{'username'};
1411    my $collect   = $self->{'collect'};
1412    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1413   
1414    if ($baseaction::authentication_enabled)
1415    {
1416        # Ensure the user is allowed to edit this collection       
1417        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1418    }
1419
1420    # Make sure the collection isn't locked by someone else
1421    $self->lock_collection($username, $collect);
1422
1423    $self->_remove_archives_metadata(@_);
1424
1425    # Release the lock once it is done
1426    $self->unlock_collection($username, $collect);
1427}
1428
1429sub remove_from_doc_xml
1430{
1431    my $self = shift @_;
1432    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1433   
1434    my @start_rules = ('Section' => \&dxml_start_section);
1435   
1436    # Set the call-back functions for the metadata tags
1437    my @rules =
1438    (
1439        _default => 'raw',
1440        'Metadata' => \&rfdxml_metadata
1441    );
1442       
1443    my $parser = XML::Rules->new
1444    (
1445        start_rules => \@start_rules,
1446        rules => \@rules,
1447        style => 'filter',
1448        output_encoding => 'utf8',
1449#    normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1450        stripspaces => 2|0|0 # ineffectual
1451    );
1452   
1453    my $status = 0;
1454    my $xml_in = "";
1455    if (!open(MIN,"<$doc_xml_filename"))
1456    {
1457        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1458        $status = 1;
1459    }
1460    else
1461    {
1462        # Read them in
1463        my $line;
1464        while (defined ($line=<MIN>)) {
1465            $xml_in .= $line;
1466        }
1467        close(MIN);
1468
1469        # Filter with the call-back functions
1470        my $xml_out = "";
1471
1472        my $MOUT;
1473        if (!open($MOUT,">$doc_xml_filename")) {
1474            $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1475            $status = 1;
1476        }
1477        else {
1478            binmode($MOUT,":utf8");
1479            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1480            close($MOUT);       
1481        }
1482    }
1483    return $status;
1484}
1485
1486sub rfdxml_metadata
1487{
1488    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1489
1490    # For comparisons, toplevel section is indicated by ""
1491    my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1492    my $secid = $parser->{'parameters'}->{'secid'} || "";
1493
1494    if (!($secid eq $curr_sec_num))
1495    {
1496        # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1497        return [$tagname => $attrHash];
1498    }
1499
1500    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1501    {
1502        if (!defined $parser->{'parameters'}->{'poscount'})
1503        {
1504            $parser->{'parameters'}->{'poscount'} = 0;
1505        }
1506        else
1507        {
1508            $parser->{'parameters'}->{'poscount'}++;
1509        }
1510       
1511        # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
1512        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {         
1513            return [];
1514        }
1515
1516        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1517        {   
1518            return [];
1519        }
1520       
1521        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1522        {   
1523            return [];
1524        }
1525    }
1526   
1527    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1528    return [$tagname => $attrHash];
1529}
1530
1531sub mxml_metadata
1532{
1533    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1534    my $metaname = $parser->{'parameters'}->{'metaname'};
1535    my $metamode = $parser->{'parameters'}->{'metamode'};
1536
1537    # Report error if we don't see FileName tag before this
1538    die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1539   
1540    # Don't do anything if we are not in the right FileSet
1541    my $file_regexp = $parser->{'parameters'}->{'current_file'};
1542    if ($file_regexp =~ /\.\*/) {
1543    # Only interested in a file_regexp if it specifies precisely one
1544    # file. 
1545    # So, skip anything with a .* in it as it is too general
1546##  print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
1547    return [$tagname => $attrHash];
1548    }
1549    my $src_file = $parser->{'parameters'}->{'src_file'};
1550    if (!($src_file =~ /$file_regexp/)) {
1551    return [$tagname => $attrHash];
1552    }
1553##    print STDERR "*** mxl metamode = $metamode\n";
1554
1555    # Find the right metadata tag and checks if we are going to override it
1556    my $name_attr = $attrHash->{'name'};
1557    if (($name_attr eq $metaname) && ($metamode eq "override")) {
1558
1559    # now metadata.xml functions need to keep track of metapos
1560    if (!defined $parser->{'parameters'}->{'poscount'})
1561    {
1562        $parser->{'parameters'}->{'poscount'} = 0;
1563    }
1564    else
1565    {
1566        $parser->{'parameters'}->{'poscount'}++;
1567    }
1568
1569    # If either the metapos or prevmetavalue is set,
1570        # get the value and override the current value
1571    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1572
1573    if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1574    {
1575        $attrHash->{'_content'} = $metavalue;
1576
1577        ##  print STDERR "**** overriding metadata.xml\n";
1578       
1579        # Don't want it to wipe out any other pieces of metadata
1580        $parser->{'parameters'}->{'metamode'} = "done";
1581    }
1582    elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1583    {
1584        $attrHash->{'_content'} = $metavalue;
1585        $parser->{'parameters'}->{'metamode'} = "done";
1586    }
1587    }
1588
1589    # mxml_description will process the metadata if metadata is accumulate,
1590    # or if we haven't found the metadata to override
1591
1592    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1593    return [$tagname => $attrHash];
1594}
1595
1596
1597sub mxml_description
1598{
1599    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1600    my $metamode = $parser->{'parameters'}->{'metamode'};   
1601
1602    # Failed... Report error if we don't see FileName tag before this
1603    die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1604
1605    # Don't do anything if we are not in the right FileSet
1606    my $file_regexp = $parser->{'parameters'}->{'current_file'};
1607    if ($file_regexp =~ m/\.\*/) {
1608    # Only interested in a file_regexp if it specifies precisely one
1609    # file. 
1610    # So, skip anything with a .* in it as it is too general
1611    return [$tagname => $attrHash];
1612    }
1613    my $src_file = $parser->{'parameters'}->{'src_file'};
1614   
1615    if (!($src_file =~ m/$file_regexp/)) {
1616    return [$tagname => $attrHash];
1617    }
1618
1619    # Accumulate the metadata block to the end of the description block
1620    # Note: This adds metadata block to all description blocks, so if there are
1621    # multiple FileSets, it will add to all of them
1622    if (($metamode eq "accumulate") || ($metamode eq "override")) {
1623
1624    # if metamode was "override" but get to here then it failed to
1625    # find an item to override, in which case it should append its
1626    # value to the end, just like the "accumulate" mode
1627
1628    if ($metamode eq "override") {
1629        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
1630    }
1631
1632    # tack a new metadata tag on to the end of the <Metadata>+ block
1633    my $metaname = $parser->{'parameters'}->{'metaname'};
1634    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1635   
1636    my $metadata_attr = { '_content' => $metavalue,
1637                  'name'     => $metaname,
1638                  'mode'     => "accumulate" };
1639
1640    my $append_metadata = [ "Metadata" => $metadata_attr ];
1641    my $description_content = $attrHash->{'_content'};
1642   
1643##  print STDERR "*** appending to metadata.xml\n";
1644
1645    # append the new metadata element to the end of the current
1646    # content contained inside this tag
1647    if (ref($description_content) eq "") {
1648        # => string or numeric literal
1649        # this is caused by a <Description> block has no <Metadata> child elements
1650        # => set up an empty array in '_content'
1651        $attrHash->{'_content'} = [ "\n" ];
1652        $description_content = $attrHash->{'_content'};
1653    }
1654
1655    push(@$description_content,"    ", $append_metadata ,"\n        ");
1656    $parser->{'parameters'}->{'metamode'} = "done";
1657    }
1658
1659    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1660    return [$tagname => $attrHash];
1661}
1662
1663
1664sub mxml_filename
1665{
1666    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1667
1668    # Store the filename of the Current Fileset
1669    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1670    # FileName tag must come before Description tag
1671    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1672
1673    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1674    return [$tagname => $attrHash];
1675}
1676
1677
1678sub mxml_fileset
1679{
1680    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1681
1682    # Initilise the current_file
1683    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1684    # FileName tag must come before Description tag
1685    $parser->{'parameters'}->{'current_file'} = "";
1686
1687    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1688    return [$tagname => $attrHash];
1689}
1690
1691sub mxml_directorymetadata
1692{
1693    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1694
1695    # if we haven't processed the metadata when we reach the end of metadata.xml
1696    # it's because there's no particular FileSet element whose FileName matched
1697    # In which case, add a new FileSet for this FileName
1698    my $metamode = $parser->{'parameters'}->{'metamode'};
1699    if($metamode ne "done") {
1700   
1701    if ($metamode eq "override") {
1702        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
1703    }
1704
1705    # If we get to here and metamode is override, this means there
1706    # was no existing value to overide => treat as an append operation
1707   
1708    # Create a new FileSet element and append to DirectoryMetadata
1709    # <FileSet>
1710    # <FileName>src_file</FileName>
1711    # <Description>
1712    # <Metadata mode="" name="">metavalue</Metadata>
1713    # </Description>
1714    # </FileSet>
1715    my $src_file = $parser->{'parameters'}->{'src_file'};
1716    my $metaname = $parser->{'parameters'}->{'metaname'};
1717    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1718    my $metadata_attr = {
1719        '_content' => $metavalue,
1720        'name' => $metaname,
1721        'mode' => "accumulate"
1722    };
1723    my $append_metadata = [ "Metadata" => $metadata_attr ];
1724    my $description_attr = { '_content' => [ "\n\t\t   ", $append_metadata, "\n\t\t"] };
1725    my $description_element = [ "Description" => $description_attr ];
1726
1727    #_content is not an attribute, it's special and holds the children of this element
1728    # including the textnode value embedded in this element if any.
1729    my $filename_attr = {'_content' => $src_file};
1730    my $filename_element = [ "FileName" => $filename_attr ];
1731
1732    my $fileset_attr = {};
1733    $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
1734    my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
1735   
1736   
1737    # get children of dirmeta, and push the new FileSet element onto it
1738    print "Appending metadata to metadata.xml\n";
1739    my $dirmeta_content = $attrHash->{'_content'};
1740    if (ref($dirmeta_content)) {
1741        # got some existing interesting nested content
1742        #push(@$dirmeta_content, "    ", $fileset ,"\n        ");
1743        push(@$dirmeta_content, "\t", $fileset ,"\n");
1744    }
1745    else {
1746        #description_content is most likely a string such as "\n"
1747        #$attrHash->{'_content'} = [$dirmeta_content, "    ", $fileset ,"\n" ];
1748        $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
1749    }   
1750
1751    $parser->{'parameters'}->{'metamode'} = "done";
1752    }
1753    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1754    return [$tagname => $attrHash];
1755}
1756
1757
1758sub edit_metadata_xml
1759{
1760    my $self = shift @_;
1761    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
1762
1763    # Set the call-back functions for the metadata tags
1764    my @rules =
1765    ( _default => 'raw',
1766          'FileName' => \&mxml_filename,
1767      'Metadata' => \&mxml_metadata,
1768      'Description' => \&mxml_description,
1769          'FileSet' => \&mxml_fileset,
1770      'DirectoryMetadata' => \&mxml_directorymetadata);
1771
1772    # use XML::Rules to add it in (read in and out again)
1773    my $parser = XML::Rules->new(rules => \@rules,
1774                 style => 'filter',
1775                                 output_encoding => 'utf8',
1776                 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1777
1778    if (!-e $metadata_xml_filename) {
1779   
1780        if (open(MOUT,">$metadata_xml_filename")) {
1781           
1782            my $src_file_re = &util::filename_to_regex($src_file);
1783            # shouldn't the following also be in the above utility routine??
1784            # $src_file_re =~ s/\./\\./g;
1785       
1786            print MOUT "<?xml version=\"1.0\"?>\n";
1787            print MOUT "<DirectoryMetadata>\n";
1788            print MOUT " <FileSet>\n";
1789            print MOUT "  <FileName>$src_file_re</FileName>\n";
1790            print MOUT "  <Description>\n";
1791            print MOUT "  </Description>\n";
1792            print MOUT " </FileSet>\n";
1793            print MOUT "</DirectoryMetadata>\n";
1794
1795            close(MOUT);
1796        }
1797        else {
1798            $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1799        }
1800    }
1801   
1802   
1803    my $xml_in = "";
1804    if (!open(MIN,"<$metadata_xml_filename")) {
1805        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1806    }
1807    else {
1808        # Read them in
1809        my $line;
1810        while (defined ($line=<MIN>)) {
1811            $xml_in .= $line;
1812        }
1813        close(MIN);
1814
1815        # Filter with the call-back functions
1816        my $xml_out = "";
1817
1818        my $MOUT;
1819        if (!open($MOUT,">$metadata_xml_filename")) {
1820            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1821        }
1822        else {
1823            binmode($MOUT,":utf8");
1824
1825            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1826            # At the moment, I will just hack it!
1827            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1828            #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1829            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1830            #print MOUT $xml_out;
1831
1832            $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1833                              metapos => $metapos,
1834                      metavalue => $metavalue,
1835                      metamode => $metamode,
1836                      src_file => $src_file,
1837                      prevmetavalue => $prevmetavalue,
1838                      current_file => undef} );
1839            close($MOUT);       
1840        }
1841   }
1842}
1843
1844
1845sub set_import_metadata
1846{
1847    my $self = shift @_;
1848   
1849    my $username  = $self->{'username'};
1850    my $collect   = $self->{'collect'};
1851    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1852   
1853    if ($baseaction::authentication_enabled) {
1854    # Ensure the user is allowed to edit this collection
1855    $self->authenticate_user($username, $collect);
1856    }
1857
1858    # Make sure the collection isn't locked by someone else
1859    $self->lock_collection($username, $collect);
1860 
1861    $self->_set_import_metadata(@_);
1862
1863    # Release the lock once it is done
1864    $self->unlock_collection($username, $collect);
1865   
1866}
1867
1868sub set_import_metadata_array
1869{
1870    my $self = shift @_;
1871
1872    my $username  = $self->{'username'};
1873    my $collect   = $self->{'collect'};
1874    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1875#    my $gsdlhome  = $self->{'gsdlhome'};
1876
1877    if ($baseaction::authentication_enabled) {
1878    # Ensure the user is allowed to edit this collection   
1879    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1880    }
1881
1882    my $site = $self->{'site'};
1883    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1884   
1885    $gsdl_cgi->checked_chdir($collect_dir);
1886
1887    # Make sure the collection isn't locked by someone else
1888    $self->lock_collection($username, $collect);
1889
1890    $self->_set_import_metadata_array(@_);
1891
1892    # Release the lock once it is done
1893    $self->unlock_collection($username, $collect);
1894
1895}
1896
1897
1898sub _set_import_metadata_array
1899{
1900    my $self = shift @_;
1901
1902    my $collect   = $self->{'collect'};
1903    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1904
1905    my $site = $self->{'site'};
1906    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1907   
1908    # look up additional args
1909   
1910    my $infodbtype = $self->{'infodbtype'};
1911   
1912    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");   
1913    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1914   
1915    my $json_str = $self->{'json'};
1916    my $doc_array = decode_json $json_str;
1917   
1918    my $global_status = 0;
1919    my $global_mess = "";
1920   
1921    my @all_docids = ();
1922   
1923    foreach my $doc_array_rec ( @$doc_array )
1924    {
1925    my $status = -1;
1926    my $docid = $doc_array_rec->{'docid'};
1927   
1928    my ($docid_root,$docid_secnum);
1929    if(defined $docid) {   
1930        ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
1931        # as yet no support for setting subsection metadata in metadata.xml
1932        if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
1933        $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
1934        next; # skip this docid in for loop
1935        }
1936    }
1937
1938    push(@all_docids,$docid); # docid_root rather
1939   
1940    my $metaname = $doc_array_rec->{'metaname'};
1941    if (defined $metaname) {
1942        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1943        my $metavalue = $doc_array_rec->{'metavalue'};
1944        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1945
1946        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1947        # make "accumulate" the default (less destructive, as it won't actually
1948        # delete any existing values)
1949        $metamode = "accumulate";
1950        }
1951
1952        # adding metapos and prevmetavalue support to import_metadata subroutines
1953        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1954        my $prevmetavalue = $self->{'prevmetavalue'};
1955
1956        $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1957       
1958    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1959        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1960       
1961        foreach my $metatable_rec ( @$metatable ) {
1962        $metaname  = $metatable_rec->{'metaname'};
1963        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1964        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1965            # make "accumulate" the default (less destructive, as it won't actually
1966            # delete any existing values)
1967            $metamode = "accumulate";
1968        }
1969
1970        # No support for metapos and prevmetavalue in the JSON metatable substructure
1971        my $metapos = undef;
1972        my $prevmetavalue = undef;
1973        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1974       
1975        foreach my $metavalue ( @$metavals ) {
1976            $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1977
1978            $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1979            if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
1980            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1981            # metavalue for this metaname will be lost
1982            $metamode = "accumulate";
1983            }
1984        }
1985        }
1986    }       
1987    }
1988
1989    # always a success message
1990    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1991    $gsdl_cgi->generate_ok_message($mess);
1992}
1993
1994# always returns true (1)
1995sub set_import_metadata_entry
1996{
1997    my $self = shift @_;
1998    my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
1999
2000    my $info_mess = <<RAWEND;
2001****************************
2002  set_import_metadata_entry()
2003****************************
2004RAWEND
2005
2006    $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2007    $info_mess .= " collect     = $collect\n"     if defined($collect);
2008    $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype);
2009    $info_mess .= " arcinfo_doc_filename  = $arcinfo_doc_filename\n"  if defined($arcinfo_doc_filename);
2010    $info_mess .= " docid       = $docid\n"       if defined($docid);
2011    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
2012    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
2013    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
2014    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
2015    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2016     
2017    $info_mess .= "****************************\n";
2018
2019    $gsdl_cgi->generate_message($info_mess);
2020
2021    # import works with metadata.xml which can have inherited metadata
2022    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2023    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2024    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2025    # what they're doing if they provide a metapos.
2026    if(defined $metapos) {
2027    print STDERR "@@@@ WARNING: metapos defined.\n";
2028    print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2029    }
2030
2031    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2032    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2033    my $metadata_xml_file;
2034    my $import_filename = undef;
2035   
2036    if (defined $docid) {
2037    # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2038    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2039
2040    # This now stores the full pathname
2041    $import_filename = $doc_rec->{'src-file'}->[0];
2042    $import_filename = &util::placeholders_to_abspath($import_filename);
2043
2044    } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2045         # as the array version of the method doesn't support the -f parameter yet
2046    my $import_file  = $self->{'f'};
2047    $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2048    }
2049   
2050    # figure out correct metadata.xml file [?]
2051    # Assuming the metadata.xml file is next to the source file
2052    # Note: This will not work if it is using the inherited metadata from the parent folder
2053    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2054    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2055   
2056    # If we're overriding everything, then $prevmetavalue=undefined and
2057    # $metamode=override combined with $metapos=undefined
2058    # in which case we need to remove all metavalues for the metaname at the given (sub)section
2059    # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2060    if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2061##  print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2062    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2063
2064    }
2065
2066    # Edit the metadata.xml
2067    # Modified by Jeffrey from DL Consulting
2068    # Handle the case where there is one metadata.xml file for multiple FileSets
2069    # The XML filter needs to know whether it is in the right FileSet
2070    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2071    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2072    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2073                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2074    #return 0;
2075    return $metadata_xml_filename;
2076}
2077
2078sub _remove_import_metadata
2079{
2080    my $self = shift @_;
2081
2082    my $collect   = $self->{'collect'};
2083    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2084#   my $gsdlhome  = $self->{'gsdlhome'};
2085    my $infodbtype = $self->{'infodbtype'};
2086   
2087    # Obtain the collect dir
2088    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2089    my $site = $self->{'site'};
2090    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2091   
2092    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2093    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2094
2095    # look up additional args
2096    my $docid = $self->{'d'};
2097    if ((!defined $docid) || ($docid =~ m/^\s*$/))
2098    {
2099        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2100    }
2101   
2102    my $metaname = $self->{'metaname'};
2103    my $metapos = $self->{'metapos'};
2104    my $metavalue = $self->{'metavalue'};
2105
2106    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2107    $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/));
2108
2109    if(defined $metavalue) { # metavalue is now a compulsory arg for remove_import_metadata()
2110        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2111    } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2112        $metapos = 0;
2113    }
2114    my $metamode = $self->{'metamode'};
2115    $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
2116
2117    # import works with metadata.xml which can have inherited metadata
2118    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2119    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2120    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2121    # what they're doing if they provide a metapos.
2122    if(defined $metapos) {
2123        print STDERR "@@@@ WARNING: metapos defined.\n";
2124        print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2125    }
2126   
2127    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2128    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2129    my $metadata_xml_file;
2130    my $import_filename = undef;
2131    if (defined $docid)
2132    {
2133        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2134        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2135
2136        # This now stores the full pathname
2137        $import_filename = $doc_rec->{'src-file'}->[0];
2138        $import_filename = &util::placeholders_to_abspath($import_filename);
2139    }
2140
2141    if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2142    {
2143        $gsdl_cgi->generate_error("There is no metadata\n");
2144    }
2145   
2146    # figure out correct metadata.xml file [?]
2147    # Assuming the metadata.xml file is next to the source file
2148    # Note: This will not work if it is using the inherited metadata from the parent folder
2149    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2150    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2151   
2152    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2153   
2154    my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2155    $mess .= "  $metaname";
2156    $mess .= " = $metavalue\n";
2157   
2158    $gsdl_cgi->generate_ok_message($mess);
2159
2160    #return $status; # in case calling functions have a use for this
2161}
2162
2163sub remove_import_metadata
2164{
2165    my $self = shift @_;
2166   
2167    my $username = $self->{'username'};
2168    my $collect   = $self->{'collect'};
2169    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2170   
2171    if ($baseaction::authentication_enabled) {
2172        # Ensure the user is allowed to edit this collection       
2173        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2174    }
2175
2176    # Make sure the collection isn't locked by someone else
2177    $self->lock_collection($username, $collect);
2178   
2179    $self->_remove_import_metadata(@_);
2180
2181    # Release the lock once it is done
2182    $self->unlock_collection($username, $collect);
2183
2184}
2185
2186sub remove_from_metadata_xml
2187{
2188    my $self = shift @_;
2189    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2190    # metamode generally has no meaning for removing meta, but is used by set_meta
2191    # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2192
2193    # Set the call-back functions for the metadata tags
2194    my @rules =
2195    (
2196        _default => 'raw',
2197        'Metadata' => \&rfmxml_metadata,
2198        'FileName' => \&mxml_filename
2199    );
2200       
2201    my $parser = XML::Rules->new
2202    (
2203        rules => \@rules,
2204        style => 'filter',
2205        output_encoding => 'utf8',
2206     #normalisespaces => 1,
2207            stripspaces => 2|0|0 # ineffectual
2208    );
2209   
2210    my $xml_in = "";
2211    if (!open(MIN,"<$metadata_xml_filename"))
2212    {
2213        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2214    }
2215    else
2216    {
2217        # Read them in
2218        my $line;
2219        while (defined ($line=<MIN>)) {
2220            $xml_in .= $line;
2221        }
2222        close(MIN);
2223
2224        # Filter with the call-back functions
2225        my $xml_out = "";
2226
2227        my $MOUT;
2228        if (!open($MOUT,">$metadata_xml_filename")) {
2229            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2230        }
2231        else {
2232            binmode($MOUT,":utf8");
2233            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2234            close($MOUT);       
2235        }
2236    }
2237}
2238
2239sub rfmxml_metadata
2240{
2241    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2242
2243    # metadata.xml does not handle subsections
2244
2245    # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2246    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2247        && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2248    {
2249        if (!defined $parser->{'parameters'}->{'poscount'})
2250        {
2251            $parser->{'parameters'}->{'poscount'} = 0;
2252        }
2253        else
2254        {
2255            $parser->{'parameters'}->{'poscount'}++;
2256        }
2257
2258        # if overriding but no metapos, then clear all the meta for this metaname
2259        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
2260            return [];
2261        }
2262   
2263        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2264        {
2265            return [];
2266        }
2267        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2268        {
2269            return [];
2270        }       
2271    }
2272
2273    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2274    return [$tagname => $attrHash];
2275}
2276
2277sub _remove_live_metadata
2278{
2279    my $self = shift @_;
2280
2281    my $collect   = $self->{'collect'};
2282    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2283#    my $gsdlhome  = $self->{'gsdlhome'};
2284    my $infodbtype = $self->{'infodbtype'};
2285
2286    # Obtain the collect dir
2287    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2288    my $site = $self->{'site'};
2289    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2290
2291   
2292    # look up additional args
2293    my $docid     = $self->{'d'};
2294    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2295      $gsdl_cgi->generate_error("No docid (d=...) specified.");
2296    }
2297   
2298    # Generate the dbkey
2299    my $metaname  = $self->{'metaname'};
2300    my $dbkey = "$docid.$metaname";
2301
2302    # To people who know $collect_tail please add some comments
2303    # Obtain the live gdbm_db path
2304    my $collect_tail = $collect;
2305    $collect_tail =~ s/^.*[\/|\\]//;
2306    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2307    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2308
2309    # Remove the key
2310    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2311    my $status = system($cmd);
2312    if ($status != 0) {
2313        # Catch error if gdbmdel failed
2314    my $mess = "Failed to set metadata key: $dbkey\n";
2315   
2316    $mess .= "PATH: $ENV{'PATH'}\n";
2317    $mess .= "cmd = $cmd\n";
2318    $mess .= "Exit status: $status\n";
2319    $mess .= "System Error Message: $!\n";
2320
2321    $gsdl_cgi->generate_error($mess);
2322    }
2323    else {
2324    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2325    }
2326
2327}
2328
2329sub remove_live_metadata
2330{
2331    my $self = shift @_;
2332
2333    my $username  = $self->{'username'};
2334    my $collect   = $self->{'collect'};
2335    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2336    my $gsdlhome  = $self->{'gsdlhome'};
2337   
2338    if ($baseaction::authentication_enabled) {
2339    # Ensure the user is allowed to edit this collection   
2340    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2341    }
2342
2343    # Make sure the collection isn't locked by someone else
2344    $self->lock_collection($username, $collect);
2345
2346    $self->_remove_live_metadata(@_);
2347
2348    $self->unlock_collection($username, $collect);
2349}
2350
2351sub remove_metadata
2352{
2353    my $self = shift @_;
2354
2355    my $where = $self->{'where'};
2356    if(!$where || ($where =~ m/^\s*$/)) {
2357    $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2358    return;
2359    }
2360
2361    my $username  = $self->{'username'};
2362    my $collect   = $self->{'collect'};
2363    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2364
2365    if ($baseaction::authentication_enabled) {
2366    # Ensure the user is allowed to edit this collection   
2367    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2368    }
2369
2370    # Make sure the collection isn't locked by someone else
2371    $self->lock_collection($username, $collect);
2372
2373    # check which directories need to be processed, specified in $where as
2374    # any combination of import|archives|index|live
2375    if($where =~ m/import/) {
2376    $self->_remove_import_metadata(@_);     
2377    }
2378    if($where =~ m/archives/) {
2379    $self->_remove_archives_metadata(@_);       
2380   }
2381    if($where =~ m/index/) {
2382    $self->_remove_index_metadata(@_);     
2383    }
2384
2385    # Release the lock once it is done
2386    $self->unlock_collection($username, $collect);
2387}
2388
2389# the internal version, without authentication
2390sub _remove_index_metadata
2391{   
2392    my $self = shift @_;
2393
2394    my $collect   = $self->{'collect'};
2395    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2396#    my $gsdlhome  = $self->{'gsdlhome'};
2397    my $infodbtype = $self->{'infodbtype'};
2398   
2399    # Obtain the collect dir
2400    my $site = $self->{'site'};
2401    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2402    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2403
2404        # look up additional args
2405    my $docid     = $self->{'d'};
2406    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2407      $gsdl_cgi->generate_error("No docid (d=...) specified.");
2408    }
2409    my $metaname  = $self->{'metaname'};
2410    my $metapos   = $self->{'metapos'};
2411    my $metavalue = $self->{'metavalue'};
2412
2413    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2414    $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
2415
2416    # To people who know $collect_tail please add some comments
2417    # -> In collection groups, I think collect_tailname is the subcollection name,
2418    # e.g. colgroup-name/col-tail-name
2419    # Obtain the path to the database
2420    my $collect_tail = $collect;
2421    $collect_tail =~ s/^.*[\/|\\]//;
2422    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2423    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2424
2425    # Read the docid entry
2426    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2427
2428    # Check to make sure the key does exist
2429    if (!defined ($doc_rec->{$metaname})) {
2430        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2431    }
2432
2433    # Obtain the specified metadata pos
2434    # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2435    if(!defined $metapos && !defined $metavalue) {
2436        $metapos = 0;
2437    }
2438   
2439
2440    # consider check key is defined before deleting?
2441    # Loop through the metadata array and ignore the specified position
2442    my $filtered_metadata = [];
2443    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
2444    for (my $i=0; $i<$num_metadata_vals; $i++) {
2445    my $metaval = shift(@{$doc_rec->{$metaname}});
2446
2447    if (!defined $metavalue && $i != $metapos) {
2448        push(@$filtered_metadata,$metaval);
2449    }
2450   
2451    if(defined $metavalue && !($metavalue eq $metaval))
2452    {
2453        push(@$filtered_metadata,$metaval);
2454    }
2455    }
2456    $doc_rec->{$metaname} = $filtered_metadata;
2457
2458    ## Use the dbutil set_entry method instead of assuming the database is gdbm
2459    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2460
2461    if ($status != 0) {
2462    my $mess = "Failed to set metadata key: $docid\n";
2463   
2464    $mess .= "PATH: $ENV{'PATH'}\n";
2465    $mess .= "Exit status: $status\n";
2466    $mess .= "System Error Message: $!\n";
2467
2468    $gsdl_cgi->generate_error($mess);
2469    }
2470    else {
2471    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2472    $mess .= "  $metaname";
2473    $mess .= "->[$metapos]" if (defined $metapos);
2474    $mess .= " ($metavalue)" if (defined $metavalue);
2475
2476    $gsdl_cgi->generate_ok_message($mess);
2477    }
2478
2479    #return $status; # in case calling functions have a use for this
2480}
2481
2482sub remove_index_metadata
2483{
2484    my $self = shift @_;
2485
2486    my $username  = $self->{'username'};
2487    my $collect   = $self->{'collect'};
2488    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2489#    my $gsdlhome  = $self->{'gsdlhome'};
2490   
2491    if ($baseaction::authentication_enabled) {
2492    # Ensure the user is allowed to edit this collection   
2493    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2494    }
2495
2496    # Obtain the collect dir
2497    my $site = $self->{'site'};
2498    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2499    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2500
2501    # Make sure the collection isn't locked by someone else
2502    $self->lock_collection($username, $collect);
2503
2504    $self->_remove_index_metadata(@_);
2505
2506    # Release the lock once it is done
2507    $self->unlock_collection($username, $collect);
2508}
2509
2510
2511# Was trying to reused the codes, but the functions need to be broken
2512# down more before they can be reused, otherwise there will be too
2513# much overhead and duplicate process...
2514sub insert_metadata
2515{
2516    my $self = shift @_;
2517   
2518    my $username  = $self->{'username'};
2519    my $collect   = $self->{'collect'};
2520    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2521    my $gsdlhome  = $self->{'gsdlhome'};
2522    my $infodbtype = $self->{'infodbtype'};
2523   
2524    # If the import metadata and gdbm database have been updated, we
2525    # need to insert some notification to warn user that the the text
2526    # they see at the moment is not indexed and require a rebuild.
2527    my $rebuild_pending_macro = "_rebuildpendingmessage_";
2528
2529    if ($baseaction::authentication_enabled) {
2530    # Ensure the user is allowed to edit this collection
2531    $self->authenticate_user($username, $collect);
2532    }
2533
2534    # Obtain the collect and archive dir   
2535    my $site = $self->{'site'};
2536    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2537    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2538    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2539
2540    # Make sure the collection isn't locked by someone else
2541    $self->lock_collection($username, $collect);
2542   
2543    # Check additional args
2544    my $docid = $self->{'d'};
2545    if (!defined($docid) || ($docid =~ m/^\s*$/)) {
2546    $gsdl_cgi->generate_error("No document id is specified: d=...");
2547    }
2548    my $metaname = $self->{'metaname'};
2549    if (!defined($metaname) || ($metaname =~ m/^\s*$/)) {
2550    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2551    }
2552    my $metavalue = $self->{'metavalue'};
2553    if (!defined($metavalue) || ($metavalue =~ m/^\s*$/)) {
2554    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2555    }
2556    # make "accumulate" the default (less destructive, as it won't actually
2557    # delete any existing values)
2558    my $metamode = "accumulate";
2559
2560    # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
2561    my $metapos   = undef;
2562    my $prevmetavalue = undef;
2563
2564    #=======================================================================#
2565    # set_import_metadata [START]
2566    #=======================================================================#
2567    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2568    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2569    my $metadata_xml_file;
2570    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2571    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2572   
2573    # This now stores the full pathname
2574    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2575    $import_filename = &util::placeholders_to_abspath($import_filename);
2576   
2577    # figure out correct metadata.xml file [?]
2578    # Assuming the metadata.xml file is next to the source file
2579    # Note: This will not work if it is using the inherited metadata from the parent folder
2580    my ($import_tailname, $import_dirname)
2581    = File::Basename::fileparse($import_filename);
2582    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2583
2584    # Shane's escape characters
2585    $metavalue = pack "U0C*", unpack "C*", $metavalue;
2586    $metavalue =~ s/\,/&#44;/g;
2587    $metavalue =~ s/\:/&#58;/g;
2588    $metavalue =~ s/\|/&#124;/g;
2589    $metavalue =~ s/\(/&#40;/g;
2590    $metavalue =~ s/\)/&#41;/g;
2591    $metavalue =~ s/\[/&#91;/g;
2592    $metavalue =~ s/\\/&#92;/g;
2593    $metavalue =~ s/\]/&#93;/g;
2594    $metavalue =~ s/\{/&#123;/g;
2595    $metavalue =~ s/\}/&#125;/g;
2596    $metavalue =~ s/\"/&#34;/g;
2597    $metavalue =~ s/\`/&#96;/g;
2598    $metavalue =~ s/\n/_newline_/g;
2599
2600    # Edit the metadata.xml
2601    # Modified by Jeffrey from DL Consulting
2602    # Handle the case where there is one metadata.xml file for multiple FileSets
2603    # The XML filter needs to know whether it is in the right FileSet
2604    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2605    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2606    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2607                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2608    #=======================================================================#
2609    # set_import_metadata [END]
2610    #=======================================================================#
2611
2612
2613    #=======================================================================#
2614    # set_metadata (accumulate version) [START]
2615    #=======================================================================#
2616    # To people who know $collect_tail please add some comments
2617    # Obtain path to the database
2618    my $collect_tail = $collect;
2619    $collect_tail =~ s/^.*[\/|\\]//;
2620    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2621    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2622
2623    # Read the docid entry
2624    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2625
2626    # Protect the quotes
2627    $metavalue =~ s/\"/\\\"/g;
2628
2629    # Adds the pending macro
2630    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2631
2632    # If the metadata doesn't exist, create a new one
2633    if (!defined($doc_rec->{$metaname})){   
2634    $doc_rec->{$metaname} = [ $macro_metavalue ];
2635    }
2636    # Else, let's acculumate the values
2637    else {
2638        push(@{$doc_rec->{$metaname}},$macro_metavalue);
2639    }
2640
2641    ## Use the dbutil set_entry method instead of assuming the database is gdbm
2642    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2643
2644    if ($status != 0) {
2645        # Catch error if gdbmget failed
2646    my $mess = "Failed to set metadata key: $docid\n";
2647   
2648    $mess .= "PATH: $ENV{'PATH'}\n";
2649    $mess .= "Exit status: $status\n";
2650    $mess .= "System Error Message: $!\n";
2651
2652    $gsdl_cgi->generate_error($mess);
2653    }
2654    else {
2655    my $mess = "insert-metadata successful: Key[$docid]\n";
2656    $mess .= "  [In metadata.xml] $metaname";
2657    $mess .= " = $metavalue\n";
2658    $mess .= "  [In database] $metaname";
2659    $mess .= " = $macro_metavalue\n";
2660    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
2661        $gsdl_cgi->generate_ok_message($mess);
2662    }   
2663    #=======================================================================#
2664    # set_metadata (accumulate version) [END]
2665    #=======================================================================#
2666
2667    # Release the lock once it is done
2668    $self->unlock_collection($username, $collect);
2669}
2670
2671# not returning 1; here since this file is conditionally included by metadataction.pm
Note: See TracBrowser for help on using the browser.