root/main/trunk/greenstone2/perllib/cgiactions/modmetadataaction.pm @ 32076

Revision 32076, 93.1 KB (checked in by kjdon, 2 years ago)

Bugfix (with debugging left in). The set-archive-metadata would overwrite the very first metadata value for the specified tag if metapos not provided even if prevmetavalue was provided. Changed the logic for handling metapos, prevmetavalue when setting archive metadata to fix this.

Line 
1##########################################################################
2#
3# modmetadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This is a submodule of metadataaction.pm. It is loaded conditionally
27# upon having the permissions to modify (not just get) metadata.
28# With sufficient permissions, these additional subroutines should be made
29# available to metadataaction.pm
30
31# See http://www.perlmonks.org/?node_id=881761 for splitting module into multiple files
32# and how variables declared with 'our' are used there.
33
34package metadataaction;
35
36use strict;
37
38use cgiactions::baseaction;
39
40use dbutil;
41use ghtml;
42
43use JSON;
44
45
46$metadataaction::modmeta_action_table = #OR: our $modmeta_action_table =
47{
48    #SET METHODS
49    "set-live-metadata" => {
50        'compulsory-args' => [ "d", "metaname", "metavalue" ],
51        'optional-args'   => [ ] },
52
53    "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
54        'compulsory-args' => [ "metaname", "metavalue" ],
55        'optional-args'   => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
56
57    "set-index-metadata" => {
58        'compulsory-args' => [ "d", "metaname", "metavalue" ],
59        'optional-args'   => [ "metapos", "metamode" ] },
60
61    "set-archives-metadata" => {
62        'compulsory-args' => [ "d", "metaname", "metavalue" ],
63        'optional-args'   => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
64   
65    "set-import-metadata" => {
66        'compulsory-args' => [ "metaname", "metavalue" ],
67        'optional-args'   => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
68                 
69    #SET METHODS (ARRAY)
70    "set-metadata-array" => {
71        'compulsory-args' => [ "where", "json" ],
72        'optional-args'   => [ ],
73        'help-string' => [
74        'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
75       
76        'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
77
78# The same examples rewritten for when running the metadata-server.pl script from the commandline:
79
80# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
81       
82# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
83                     
84    "set-archives-metadata-array" => {
85        'compulsory-args' => [ "json" ],
86        'optional-args'   => [ ] },
87       
88    "set-import-metadata-array" => {
89        'compulsory-args' => [ "json" ],
90        'optional-args'   => [ ] },
91
92    "set-index-metadata-array" => {
93        'compulsory-args' => [ "json" ],
94        'optional-args'   => [ ] },
95   
96    "set-live-metadata-array" => {
97        'compulsory-args' => [ "json" ],
98        'optional-args'   => [ ] },
99       
100    #REMOVE METHODS
101    "remove-import-metadata" => {
102        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
103        'optional-args'   => [ "metapos", "metavalue", "metamode" ] }, # only provide metapos arg for SIMPLE collections.
104# Metavalue is now an optional arg for remove_import_metadata() based on what the implementation did, which allowed metavalue to be undefined, and if so, used metapos.
105                     
106    "remove-archives-metadata" => {
107        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
108        'optional-args'   => [ "metapos", "metavalue", "metamode" ] },
109
110    "remove-live-metadata" => {
111        'compulsory-args' => [ "d", "metaname" ],
112        'optional-args'   => [ ] },
113
114    "remove-index-metadata" => {
115        'compulsory-args' => [ "d", "metaname" ],
116        'optional-args'   => [ "metapos", "metavalue" ] },
117
118    "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
119        'compulsory-args' => [ "d", "metaname" ],
120        'optional-args'   => [ "where", "metapos", "metavalue", "metamode" ] }, # metamode is optional since remove-metadata can call any of remove_import_meta and remove_archives_meta, remove_index_meta, of which the first two accept metamode as an optional param
121
122    #INSERT METHODS
123    "insert-metadata" => {
124        'compulsory-args' => [ "d", "metaname", "metavalue" ],
125        'optional-args'   => [ ] }
126};
127
128
129sub _set_live_metadata
130{
131    my $self = shift @_;
132
133    my $collect   = $self->{'collect'};
134    my $gsdl_cgi  = $self->{'gsdl_cgi'};
135    #my $gsdlhome  = $self->{'gsdlhome'};
136    my $infodbtype = $self->{'infodbtype'};
137
138    # Obtain the collect dir
139    my $site = $self->{'site'};
140    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
141    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
142
143
144    # look up additional args
145    my $docid     = $self->{'d'};
146    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
147      $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
148    }
149    my $metavalue = $self->{'metavalue'};
150
151    # Generate the dbkey   
152    my $metaname  = $self->{'metaname'};
153    my $dbkey = "$docid.$metaname";
154
155    # To people who know $collect_tail please add some comments
156    # Obtain path to the database
157    my $collect_tail = $collect;
158    $collect_tail =~ s/^.*[\/|\\]//;
159    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
160    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
161
162    # Set the new value
163    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
164    my $status = system($cmd);
165    if ($status != 0) {
166        # Catch error if gdbmget failed
167    my $mess = "Failed to set metadata key: $dbkey\n";
168
169    $mess .= "PATH: $ENV{'PATH'}\n";
170    $mess .= "cmd = $cmd\n";
171    $mess .= "Exit status: $status\n";
172    $mess .= "System Error Message: $!\n";
173
174    $gsdl_cgi->generate_error($mess);
175    }
176    else {
177    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
178    }
179
180    #return $status; # in case calling functions have any further use for this
181}
182
183sub set_live_metadata
184{
185    my $self = shift @_;
186
187    my $username  = $self->{'username'};
188    my $collect   = $self->{'collect'};
189    my $gsdl_cgi  = $self->{'gsdl_cgi'};
190 
191    if ($baseaction::authentication_enabled) {
192    # Ensure the user is allowed to edit this collection   
193    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
194    }
195
196    # Make sure the collection isn't locked by someone else
197    $self->lock_collection($username, $collect);
198
199    $self->_set_live_metadata(@_);
200
201    # Release the lock once it is done
202    $self->unlock_collection($username, $collect);
203}
204
205sub set_index_metadata_entry
206{
207    print STDERR "1\n";
208    my $self = shift @_;
209    my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
210   
211    # To people who know $collect_tail please add some comments
212    # Obtain path to the database
213    my $collect_tail = $collect;
214    $collect_tail =~ s/^.*[\/|\\]//;
215    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
216    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
217   
218    print STDERR "2\n";
219#   print STDERR "**** infodb file path = $infodb_file_path\n";
220#   print STDERR "***** infodb type = $infodbtype\n";
221   
222    # Read the docid entry
223    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
224   
225    # Set the metadata value
226    if (defined $metapos) {
227    print STDERR "3\n";
228    # if metamode=accumulate AND metapos, warn user and then use metapos
229    if (defined $metamode && $metamode eq "accumulate") {
230        print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
231        print STDERR "**** Proceeding by using metapos\n";
232    }
233    $doc_rec->{$metaname}->[$metapos] = $metavalue;
234    }
235    elsif (defined $prevmetavalue) {
236    print STDERR "4\n";
237        my $array = $doc_rec->{$metaname};
238        my $length = @$array;
239
240        my $found = 0;
241        for (my $i = 0; $i < $length; $i++){
242            if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
243                $doc_rec->{$metaname}->[$i] = $metavalue;
244                $found = 1;
245                last;               
246            }
247        }
248
249        if($found == 0){
250            $doc_rec->{$metaname} = [ $metavalue ];
251        }
252    }
253    elsif (defined $metamode && $metamode eq "override") {
254    print STDERR "5\n";
255    $doc_rec->{$metaname} = [ $metavalue ];
256    }
257    else { # default for index was to override, but because accumulate is less destructive,
258    # and because accumulate is the default for archives and import, that's the new default for index too
259    print STDERR "6\n";
260    if(defined $doc_rec->{$metaname}) {
261        push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
262    } else {
263        $doc_rec->{$metaname} = [ $metavalue ];
264    }
265    }
266    print STDERR "6\n";
267 
268    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
269   
270    return $status;
271   
272}
273
274sub _set_import_metadata
275{
276    my $self = shift @_;
277
278    my $collect   = $self->{'collect'};
279    my $gsdl_cgi  = $self->{'gsdl_cgi'};
280    my $infodbtype = $self->{'infodbtype'};
281#    my $gsdlhome  = $self->{'gsdlhome'};
282     
283    # Obtain the collect and archive dir   
284    my $site = $self->{'site'};
285    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
286    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
287    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
288   
289    # look up additional args
290    # want either d= or f=
291    my $docid  = $self->{'d'};
292    my ($docid_root,$docid_secnum);
293    if(defined $docid) {   
294    ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
295    # as yet no support for setting subsection metadata in metadata.xml
296    if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
297        $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
298        return;
299    }
300    }
301
302    my $import_file  = $self->{'f'};
303    if ((!defined $docid || $docid =~ m/^\s*$/) && (!defined $import_file || $import_file =~ m/^\s*$/)) {
304    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified."); # at least d or f must be specified
305    }
306
307    # Get the parameters and set default mode to "accumulate"
308    my $metaname   = $self->{'metaname'};
309    my $metavalue  = $self->{'metavalue'};
310##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
311    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
312   
313    my $metamode   = $self->{'metamode'};
314    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
315    # make "accumulate" the default (less destructive, as it won't actually
316    # delete any existing values)
317    $metamode = "accumulate";
318    }
319
320    # adding metapos and prevmetavalue support to import_metadata subroutines
321    my $metapos   = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
322    my $prevmetavalue = $self->{'prevmetavalue'};
323    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
324    $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
325
326    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
327    my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
328
329    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
330    $mess .= "  $metaname";
331    $mess .= " = $metavalue";
332    $mess .= " ($metamode)\n";
333
334    $gsdl_cgi->generate_ok_message($mess);
335
336    #return $status; # in case calling functions have any further use for this
337}
338
339# the version of set_index_meta that doesn't do authentication
340sub _set_archives_metadata
341{
342    my $self = shift @_;
343
344    my $collect   = $self->{'collect'};
345    my $gsdl_cgi  = $self->{'gsdl_cgi'};
346    my $infodbtype = $self->{'infodbtype'};
347   
348    # Obtain the collect and archive dir   
349    my $site = $self->{'site'};
350    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
351    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
352
353    # look up additional args
354    my $docid  = $self->{'d'};
355    my $metaname   = $self->{'metaname'};
356    my $metavalue  = $self->{'metavalue'};
357    my $prevmetavalue = $self->{'prevmetavalue'};
358   
359    my $metapos    = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
360                                  # Don't append "|| undef", since if metapos=0 it will then be set to undef
361
362    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
363    $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
364
365    my $metamode   = $self->{'metamode'};
366    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
367    # make "accumulate" the default (less destructive, as it won't actually
368    # delete any existing values)
369    $metamode = "accumulate";
370    }
371
372    my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
373                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
374   
375    if ($status == 0) {
376    my $mess = "set-archives-metadata successful: Key[$docid]\n";
377    $mess .= "  $metaname";
378    $mess .= "->[$metapos]" if (defined $metapos);
379    $mess .= " = $metavalue";
380    $mess .= " ($metamode)\n";
381   
382    $gsdl_cgi->generate_ok_message($mess); 
383    }
384    else {
385    my $mess .= "Failed to set archives metadata key: $docid\n";
386    $mess .= "Exit status: $status\n";
387    if(defined $self->{'error_msg'}) {
388        $mess .= "Error Message: $self->{'error_msg'}\n";
389    } else {
390        $mess .= "System Error Message: $!\n";
391    }
392    $mess .= "-" x 20 . "\n";
393   
394    $gsdl_cgi->generate_error($mess);
395    }
396
397    #return $status; # in case calling functions have any further use for this
398}
399
400
401# the version of set_index_meta that doesn't do authentication
402sub _set_index_metadata
403{
404    print STDERR "START SET INDEX METADATA\n";
405    my $self = shift @_;
406
407    my $collect   = $self->{'collect'};
408    my $gsdl_cgi  = $self->{'gsdl_cgi'};
409
410    my $site = $self->{'site'};
411    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
412
413    # look up additional args
414    my $docid     = $self->{'d'};
415    my $metaname  = $self->{'metaname'};
416    my $metapos   = $self->{'metapos'}; # undef has meaning
417    my $metavalue = $self->{'metavalue'};
418    my $infodbtype = $self->{'infodbtype'};
419    my $metamode  = $self->{'metamode'};
420    my $prevmetavalue = $self->{'prevmetavalue'};
421
422    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
423    $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
424
425    print STDERR "SETTING INDEX METADATA ENTRY\n";
426    my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
427    print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
428    if ($status != 0) {
429        # Catch error if set infodb entry failed
430    my $mess = "Failed to set metadata key: $docid\n";
431   
432    $mess .= "PATH: $ENV{'PATH'}\n";
433    $mess .= "Exit status: $status\n";
434    $mess .= "System Error Message: $!\n";
435   
436    $gsdl_cgi->generate_error($mess);
437    }
438    else {
439    my $mess = "set-index-metadata successful: Key[$docid]\n";
440    $mess .= "  $metaname";
441    $mess .= "->[$metapos]" if (defined $metapos);
442    $mess .= " = $metavalue\n";
443   
444    $gsdl_cgi->generate_ok_message($mess);
445    }
446
447    print STDERR "END SET INDEX METADATA\n";
448    #return $status; # in case calling functions have any further use for this
449}
450
451sub set_index_metadata
452{
453    my $self = shift @_;
454
455    my $username  = $self->{'username'};
456    my $collect   = $self->{'collect'};
457    my $gsdl_cgi  = $self->{'gsdl_cgi'};
458    #my $gsdlhome  = $self->{'gsdlhome'};
459
460    if ($baseaction::authentication_enabled) {
461    # Ensure the user is allowed to edit this collection   
462    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
463    }
464
465    my $site = $self->{'site'};
466    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
467   
468    $gsdl_cgi->checked_chdir($collect_dir);
469
470    # Obtain the collect dir
471    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
472
473    # Make sure the collection isn't locked by someone else
474    $self->lock_collection($username, $collect);
475
476    $self->_set_index_metadata(@_);
477   
478    # Release the lock once it is done
479    $self->unlock_collection($username, $collect);
480}
481
482# call this to set the metadata for a combination of dirs archives, import or index, or live
483# if none specified, defaults to index which was the original behaviour of set_metadata.
484sub set_metadata
485{
486    my $self = shift @_;
487
488    # Testing that not defining a variable, setting it to "" or to "  " all return false
489    # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
490
491    my $where = $self->{'where'};
492    if(!$where || ($where =~ m/^\s*$/)) {   
493    $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
494    return;
495    }
496
497    # authenticate and lock collection once, even if processing multiple dirs
498    my $username  = $self->{'username'};
499    my $collect   = $self->{'collect'};
500    my $gsdl_cgi  = $self->{'gsdl_cgi'};
501   
502    if ($baseaction::authentication_enabled) {
503    # Ensure the user is allowed to edit this collection   
504    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
505    }
506   
507    if($where =~ m/index/) {
508    my $site = $self->{'site'};
509    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
510    $gsdl_cgi->checked_chdir($collect_dir);
511    }
512
513    # Make sure the collection isn't locked by someone else
514    $self->lock_collection($username, $collect);
515
516
517    # now at last can set the metadata. $where can specify multiple
518    # $where is of the form: import|archives|index, or a subset thereof
519
520    #my @whichdirs = split('\|', $where);
521
522    # just check whether $where contains import/archives/index/live in turn, and
523    # for each case, process it accordingly
524    if($where =~ m/import/) {
525    $self->_set_import_metadata(@_);       
526    }
527
528    if($where =~ m/archives/) {
529
530    # look up docID arg which is optional to set_metadata because it's optional
531    # to set_import, but which is compulsory to set_archives_metadata
532    my $docid     = $self->{'d'};
533    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
534        $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
535    }
536    # we have a docid, so can set archives meta
537    $self->_set_archives_metadata(@_); 
538    }
539
540    if($where =~ m/index/) {
541   
542    # look up docID arg which is optional to set_metadata because it's optional
543    # to set_import, but which is compulsory to set_archives_metadata
544    my $docid     = $self->{'d'};
545    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
546        $gsdl_cgi->generate_error("No docid (d=...) specified.");
547    }
548    # we have a docid, so can set index meta
549    $self->_set_index_metadata(@_);
550    }   
551
552    if($where =~ m/live/) {
553    $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
554    }
555
556    # Release the lock once it is done
557    $self->unlock_collection($username, $collect);
558}
559
560sub set_metadata_array
561{
562    my $self = shift @_;
563
564    my $where = $self->{'where'};
565    if(!$where || ($where =~ m/^\s*$/)) {   
566    $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
567    return;
568    }
569
570    my $username  = $self->{'username'};
571    my $collect   = $self->{'collect'};
572    my $gsdl_cgi  = $self->{'gsdl_cgi'};
573
574    if ($baseaction::authentication_enabled) {
575    # Ensure the user is allowed to edit this collection
576    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
577    }
578
579    # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
580    # But including the stmt during this code reorganisation to preserve as-is what used to happen
581    my $site = $self->{'site'};
582    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
583    $gsdl_cgi->checked_chdir($collect_dir);
584
585    # Make sure the collection isn't locked by someone else
586    $self->lock_collection($username, $collect);
587
588    if($where =~ m/import/) {
589    $self->_set_import_metadata_array(@_);
590    }
591    if($where =~ m/archives/) {
592    $self->_set_archives_metadata_array(@_);
593    }
594    if($where =~ m/index/) {
595    $self->_set_index_metadata_array(@_);
596    }
597    if($where =~ m/live/) {
598        $self->_set_live_metadata_array(@_);
599    }
600
601    # Release the lock once it is done
602    $self->unlock_collection($username, $collect);
603}
604
605sub _set_index_metadata_array
606{
607    my $self = shift @_;
608
609    my $collect   = $self->{'collect'};
610    my $gsdl_cgi  = $self->{'gsdl_cgi'};
611    my $site = $self->{'site'};
612    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
613
614   
615    # look up additional args
616   
617    my $infodbtype = $self->{'infodbtype'};
618   
619    my $json_str      = $self->{'json'};
620    my $doc_array = decode_json $json_str;
621   
622   
623    my $global_status = 0;
624    my $global_mess = "";
625   
626    my @all_docids = ();
627   
628    foreach my $doc_array_rec ( @$doc_array ) {
629   
630    my $status = -1;
631    my $docid     = $doc_array_rec->{'docid'};
632   
633    push(@all_docids,$docid);
634
635    my $metaname  = $doc_array_rec->{'metaname'};
636    if(defined $metaname) {
637        my $metapos   = $doc_array_rec->{'metapos'}; # can legitimately be undef
638        my $metavalue = $doc_array_rec->{'metavalue'};
639        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
640
641        $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
642    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
643        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
644       
645        foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
646        $metaname  = $metatable_rec->{'metaname'};
647        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
648        my $metapos = undef;
649        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
650
651        foreach my $metavalue ( @$metavals ) { # metavals is an array
652            $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
653            if($metamode eq "override") { # now, having overridden the metavalue for the first,
654            # need to accumulate subsequent metavals for this metaname, else the just-assigned
655            # metavalue for this metaname will be lost
656            $metamode = "accumulate";
657            }
658        }           
659        }
660    }
661   
662    if ($status != 0) {
663        # Catch error if set infodb entry failed
664        $global_status = $status;
665        $global_mess .= "Failed to set metadata key: $docid\n";
666        $global_mess .= "Exit status: $status\n";
667        $global_mess .= "System Error Message: $!\n";
668        $global_mess .= "-" x 20;
669    }
670    }
671   
672    if ($global_status != 0) {
673    $global_mess .= "PATH: $ENV{'PATH'}\n";
674    $gsdl_cgi->generate_error($global_mess);
675    }
676    else {
677    my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
678    $gsdl_cgi->generate_ok_message($mess);
679    }
680}
681
682sub set_index_metadata_array
683{
684    my $self = shift @_;
685
686    my $username  = $self->{'username'};
687    my $collect   = $self->{'collect'};
688    my $gsdl_cgi  = $self->{'gsdl_cgi'};
689#    my $gsdlhome  = $self->{'gsdlhome'};
690
691    if ($baseaction::authentication_enabled) {
692    # Ensure the user is allowed to edit this collection   
693    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
694    }
695
696    my $site = $self->{'site'};
697    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
698   
699    $gsdl_cgi->checked_chdir($collect_dir);
700
701    # Obtain the collect dir
702    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
703
704    # Make sure the collection isn't locked by someone else
705    $self->lock_collection($username, $collect);
706
707    $self->_set_index_metadata_array(@_);
708
709    # Release the lock once it is done
710    $self->unlock_collection($username, $collect);
711}
712
713# experimental, newly added in and untested
714sub _set_live_metadata_array
715{
716    my $self = shift @_;
717
718    my $collect   = $self->{'collect'};
719    my $gsdl_cgi  = $self->{'gsdl_cgi'};
720
721    my $site = $self->{'site'};
722    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
723
724   
725    # look up additional args
726    my $infodbtype = $self->{'infodbtype'};
727    # To people who know $collect_tail please add some comments
728    # Obtain path to the database
729    my $collect_tail = $collect;
730    $collect_tail =~ s/^.*[\/|\\]//;
731    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
732    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
733
734   
735    my $json_str      = $self->{'json'};
736    my $doc_array = decode_json $json_str;
737   
738   
739    my $global_status = 0;
740    my $global_mess = "";
741   
742    my @all_docids = ();
743
744
745    foreach my $doc_array_rec ( @$doc_array ) {
746   
747    my $status = -1;
748    my $docid     = $doc_array_rec->{'docid'};
749
750    push(@all_docids,$docid);
751
752    my $metaname  = $doc_array_rec->{'metaname'};
753    if(defined $metaname) {
754        my $dbkey = "$docid.$metaname";
755        my $metavalue = $doc_array_rec->{'metavalue'};
756
757        # Set the new value
758        my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
759        $status = system($cmd);
760
761    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
762        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
763        foreach my $metatable_rec ( @$metatable ) {
764        $metaname  = $metatable_rec->{'metaname'};
765        my $dbkey = "$docid.$metaname";
766
767        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
768        foreach my $metavalue ( @$metavals ) {
769             my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
770             $status = system($cmd);
771        }
772        }
773       
774    }
775
776    if ($status != 0) {
777        # Catch error if gdbmget failed
778        $global_status = $status;
779        $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
780        $global_mess .= "Exit status: $status\n";
781        $global_mess .= "System Error Message: $!\n";
782        $global_mess .= "-" x 20;
783    }
784    }
785   
786    if ($global_status != 0) {
787    $global_mess .= "PATH: $ENV{'PATH'}\n";
788    $gsdl_cgi->generate_error($global_mess);
789    }
790    else {
791    my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
792    $gsdl_cgi->generate_ok_message($mess);
793    }
794}
795
796sub set_live_metadata_array
797{
798    my $self = shift @_;
799
800    my $username  = $self->{'username'};
801    my $collect   = $self->{'collect'};
802    my $gsdl_cgi  = $self->{'gsdl_cgi'};
803
804    if ($baseaction::authentication_enabled) {
805    # Ensure the user is allowed to edit this collection   
806    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
807    }
808
809    my $site = $self->{'site'};
810    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
811   
812    $gsdl_cgi->checked_chdir($collect_dir);
813
814    # Make sure the collection isn't locked by someone else
815    $self->lock_collection($username, $collect);
816
817    $self->_set_live_metadata_array(@_);
818
819    # Release the lock once it is done
820    $self->unlock_collection($username, $collect);
821}
822
823
824sub dxml_metadata
825{
826    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
827    my $metaname = $parser->{'parameters'}->{'metaname'};
828    my $metamode = $parser->{'parameters'}->{'metamode'};
829   
830###!!!    print STDERR "**** Processing closing </Metadata> tag\n";
831   
832    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
833   
834    # Find the right metadata tag and checks if we are going to
835    # override it
836    #
837    # Note: This over writes the first metadata block it
838    # encountered. If there are multiple Sections in the doc.xml, it
839    # might not behave as you would expect
840
841    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
842    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n" if (defined $opt_doc_secnum);
843   print STDERR "**** metamode = $metamode\n";
844   
845    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
846    {
847        my $name_attr = $attrHash->{'name'};
848         print STDOUT "*** testing: $name_attr eq $metaname ?   and  $metamode eq override ?\n";
849        if (($name_attr eq $metaname) && ($metamode eq "override"))
850        {
851            if (!defined $parser->{'parameters'}->{'poscount'})
852            {
853                $parser->{'parameters'}->{'poscount'} = 0;
854            }
855            else
856            {
857                $parser->{'parameters'}->{'poscount'}++;
858            }
859           
860            if (defined $parser->{'parameters'}->{'metapos'}) {
861
862                if ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}) {
863                print STDERR "#### got pos match!!\n";
864                # Get the value and override the current value
865                my $metavalue = $parser->{'parameters'}->{'metavalue'};
866                $attrHash->{'_content'} = $metavalue;
867               
868                # Don't want it to wipe out any other pieces of metadata
869                $parser->{'parameters'}->{'metamode'} = "done";
870                }
871            }
872            elsif (defined $parser->{'parameters'}->{'prevmetavalue'}) {
873                if ($parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'}) {
874                print STDERR "### prev meta value matches\n";
875                my $metavalue = $parser->{'parameters'}->{'metavalue'};
876                $attrHash->{'_content'} = $metavalue;
877                $parser->{'parameters'}->{'metamode'} = "done";
878
879                }
880            }
881            elsif ($parser->{'parameters'}->{'poscount'} == 0) { # explicit catch all
882                print STDERR "#### got pos match!!\n";
883                # Get the value and override the current value
884                my $metavalue = $parser->{'parameters'}->{'metavalue'};
885                $attrHash->{'_content'} = $metavalue;
886               
887                # Don't want it to wipe out any other pieces of metadata
888                $parser->{'parameters'}->{'metamode'} = "done";
889
890               
891            }
892
893           
894           
895        }
896    }
897
898    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
899    return [$tagname => $attrHash];
900}
901
902# This method exists purely for catching invalid section numbers that the client
903# requested to edit. Once the parser has reached the end (the final </Archive> tag),
904# we've seen all the Sections in the doc.xml, and none of their section nums matched
905# if the metamode has not been set to 'done' by then.
906sub dxml_archive
907{
908    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
909    my $metamode = $parser->{'parameters'}->{'metamode'};
910   
911    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
912    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
913   
914#    print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
915   
916    if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
917    print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
918
919    my $metaname = $parser->{'parameters'}->{'metaname'};
920    my $metavalue = $parser->{'parameters'}->{'metavalue'};
921   
922    print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
923    print STDERR "\t(last seen section number in document was $curr_secnum)\n";
924    print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
925    print STDERR "\tin section $opt_doc_secnum.\n";
926    $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
927    }
928   
929    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
930    return [$tagname => $attrHash];
931}
932
933sub dxml_description
934{
935    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
936    my $metamode = $parser->{'parameters'}->{'metamode'};
937
938    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
939    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
940   
941###!!!  print STDERR "**** Processing a closing </Description> tag \n";
942#   print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
943   
944    # Accumulate the metadata
945
946    # We'll be accumulating metadata at this point if we haven't found and therefore
947    # haven't processed the metadata yet.
948    # For subsections, this means that if we're at a matching subsection, but haven't
949    # found the correct metaname to override in that subsection, we accumulate it as new
950    # meta in the subsection by adding it to the current description.
951    # If there's no subsection info for the metadata, it will accumulate at the top level
952    # section description if we hadn't found a matching metaname to override at this point.
953
954    # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
955    # at the toplevel section. In the latter case, it means we want to process meta in the
956    # toplevel section. So the eq check between the values below will work in all cases.
957   
958    # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
959    # this document has been provided. In that case, the metadata for that opt_doc_secnum
960    # won't get attached/accumulated to any part of the doc, not even its top-level section.
961
962    if ($curr_secnum eq $opt_doc_secnum
963        && ($metamode eq "accumulate" || $metamode eq "override")) {
964        if ($metamode eq "override") {
965        print "Got to end of <Description> block. No metadata value to override.  Switching 'metamode' to accumulate\n";
966        }
967
968        # If we get to here and metamode is override, this means there
969        # was no existing value to overide => treat as an append operation
970       
971        # Tack a new metadata tag on to the end of the <Metadata>+ block
972        my $metaname = $parser->{'parameters'}->{'metaname'};
973        my $metavalue = $parser->{'parameters'}->{'metavalue'};
974       
975        my $metadata_attr = {
976        '_content' => $metavalue,
977        'name' => $metaname,
978        'mode' => "accumulate"
979        };
980       
981        my $append_metadata = [ "Metadata" => $metadata_attr ];
982        my $description_content = $attrHash->{'_content'};
983       
984        print "Appending metadata to doc.xml\n";
985       
986        if (ref($description_content)) {
987        # got some existing interesting nested content
988        push(@$description_content, "    ", $append_metadata ,"\n        ");
989        }
990        else {
991        #description_content is most likely a string such as "\n"
992        $attrHash->{'_content'} = [$description_content, "    ", $append_metadata ,"\n" ];
993        }
994       
995        $parser->{'parameters'}->{'metamode'} = "done";
996    }       
997    else {
998        # metamode most likely "done" signifying that it has already found a position to add the metadata to.
999##      print STDERR "**** NOT ACCUMULATE?!? \n";
1000    }
1001
1002    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1003    return [$tagname => $attrHash];
1004}
1005
1006sub edit_xml_file
1007{
1008    my $self = shift @_;
1009    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1010
1011    # use XML::Rules to add it in (read in and out again)
1012    my $parser = XML::Rules->new(start_rules     => $start_rules,
1013                 rules           => $rules,
1014                 style           => 'filter',
1015                                 output_encoding => 'utf8' );
1016
1017    my $xml_in = "";
1018    if (!open(MIN,"<$filename")) {
1019    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1020    }
1021    else {
1022        # Read all the text in
1023    my $line;
1024    while (defined ($line=<MIN>)) {
1025        $xml_in .= $line;
1026    }
1027    close(MIN);
1028   
1029    my $MOUT;   
1030    if (!open($MOUT,">$filename")) {
1031        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1032    }
1033    else {
1034        # Matched lines will get handled by the call backs
1035##      my $xml_out = "";
1036
1037        binmode($MOUT,":utf8");
1038        $parser->filter($xml_in,$MOUT, $options);
1039
1040#       binmode(MOUT,":utf8");
1041#       print MOUT $xml_out;
1042        close($MOUT);       
1043    }
1044    }
1045
1046    # copy across any custom error information that was stored during parsing
1047    $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});   
1048}
1049
1050sub edit_doc_xml
1051{
1052    my $self = shift @_;
1053    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1054
1055    my $info_mess = <<RAWEND;
1056****************************
1057  edit_doc_xml()
1058****************************
1059RAWEND
1060
1061    $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1062    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
1063    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
1064    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
1065    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
1066    $info_mess .= " opt_secnum  = $opt_secnum\n"  if defined($opt_secnum);
1067    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1068     
1069    $info_mess .= "****************************\n";
1070
1071    $gsdl_cgi->generate_message($info_mess);
1072   
1073    # To monitor which section/subsection number we are in
1074    my @start_rules =
1075    ( 'Section'    => \&dxml_start_section );
1076
1077    # use XML::Rules to add it in (read in and out again)
1078    # Set the call back functions
1079    my @rules =
1080    ( _default => 'raw',
1081      'Metadata'    => \&dxml_metadata,
1082      'Description' => \&dxml_description,
1083      'Archive'     => \&dxml_archive); # just for catching errors at end
1084     
1085    # Sets the parameters
1086    my $options = { 'metaname'  => $metaname,
1087            'metapos'   => $metapos,
1088            'metavalue' => $metavalue,
1089            'metamode'  => $metamode,
1090            'prevmetavalue' => $prevmetavalue };
1091           
1092    if (defined $opt_secnum) {
1093    $options->{'secnum'} = $opt_secnum;
1094    }
1095
1096    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1097}
1098
1099sub set_archives_metadata_entry
1100{
1101    my $self = shift @_;
1102    my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1103
1104    my $info_mess = <<RAWEND;
1105****************************
1106  set_archives_metadata_entry()
1107****************************
1108RAWEND
1109
1110    $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1111    $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1112    $info_mess .= " collect     = $collect\n"     if defined($collect);
1113    $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype);
1114    $info_mess .= " docid       = $docid\n"       if defined($docid);
1115    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
1116    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
1117    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
1118    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
1119    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1120     
1121    $info_mess .= "****************************\n";
1122
1123    $gsdl_cgi->generate_message($info_mess);
1124   
1125    # Obtain the doc.xml path for the specified docID
1126    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1127
1128    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1129    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1130    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1131   
1132    # The $doc_xml_file is relative to the archives, and now let's get the full path
1133    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
1134    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1135
1136    # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
1137    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1138    # Thereafter, we will finally be setting the overriding metavalue for this metaname
1139    if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
1140    # remove all values of $metaname metadata
1141    $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
1142    }
1143    # Edit the doc.xml file with the specified metadata name, value and position.
1144    # TODO: there is a potential problem here as this edit_doc_xml function
1145    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1146    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1147   
1148    # dxml_metadata method ignores metapos if metamode anything other than override
1149    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1150            $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1151
1152    # return 0; # return 0 for now to indicate no error
1153    return (defined $self->{'error_msg'}) ? 1 : 0;
1154}
1155
1156
1157sub set_archives_metadata
1158{
1159    my $self = shift @_;
1160
1161    my $username  = $self->{'username'};
1162    my $collect   = $self->{'collect'};
1163    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1164   
1165    if ($baseaction::authentication_enabled) {
1166    # Ensure the user is allowed to edit this collection
1167    $self->authenticate_user($username, $collect);
1168    }
1169
1170    # Make sure the collection isn't locked by someone else
1171    $self->lock_collection($username, $collect);
1172
1173    $self->_set_archives_metadata(@_);
1174
1175    # Release the lock once it is done
1176    $self->unlock_collection($username, $collect);
1177}
1178
1179sub _set_archives_metadata_array
1180{
1181    my $self = shift @_;
1182   
1183    my $collect   = $self->{'collect'};
1184    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1185    my $site = $self->{'site'};
1186    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1187
1188    # look up additional args
1189   
1190    my $infodbtype = $self->{'infodbtype'};
1191   
1192    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1193   
1194    my $json_str      = $self->{'json'};
1195    my $doc_array = decode_json $json_str;
1196   
1197   
1198    my $global_status = 0;
1199    my $global_mess = "";
1200   
1201    my @all_docids = ();
1202   
1203    foreach my $doc_array_rec ( @$doc_array ) {
1204    my $status    = -1;
1205    my $docid     = $doc_array_rec->{'docid'};
1206
1207    push(@all_docids,$docid);
1208   
1209    my $metaname  = $doc_array_rec->{'metaname'};
1210    if(defined $metaname) {
1211       
1212        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1213
1214        my $metamode  = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1215        my $metavalue = $doc_array_rec->{'metavalue'};
1216        my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1217       
1218       
1219        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1220        # make "accumulate" the default (less destructive, as it won't actually
1221        # delete any existing values)
1222        $metamode = "accumulate";
1223        }       
1224       
1225        $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1226                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1227    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1228        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1229       
1230        foreach my $metatable_rec ( @$metatable ) {
1231        $metaname  = $metatable_rec->{'metaname'};
1232        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1233        my $metapos = undef;
1234        my $prevmetavalue = undef;
1235        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1236       
1237        foreach my $metavalue ( @$metavals ) {
1238            $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1239                                 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1240           
1241            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1242            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1243            # metavalue for this metaname will be lost
1244            $metamode = "accumulate";
1245            }
1246        }           
1247        }       
1248    }
1249       
1250    if ($status != 0) {
1251        # Catch error if set infodb entry failed
1252        $global_status = $status;
1253        $global_mess .= "Failed to set metadata key: $docid\n";
1254        $global_mess .= "Exit status: $status\n";
1255        $global_mess .= "System Error Message: $!\n";
1256        $global_mess .= "-" x 20 . "\n";
1257    }
1258    }
1259   
1260    if ($global_status != 0) {
1261    $global_mess .= "PATH: $ENV{'PATH'}\n";
1262    $gsdl_cgi->generate_error($global_mess);
1263    }
1264    else {
1265    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1266    $gsdl_cgi->generate_ok_message($mess);
1267    }
1268}
1269
1270sub set_archives_metadata_array
1271{
1272    my $self = shift @_;
1273
1274    my $username  = $self->{'username'};
1275    my $collect   = $self->{'collect'};
1276    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1277#    my $gsdlhome  = $self->{'gsdlhome'};
1278
1279    if ($baseaction::authentication_enabled) {
1280    # Ensure the user is allowed to edit this collection   
1281    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1282    }
1283
1284    my $site = $self->{'site'};
1285    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1286   
1287    $gsdl_cgi->checked_chdir($collect_dir);
1288
1289    # Obtain the collect dir
1290    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1291
1292    # Make sure the collection isn't locked by someone else
1293    $self->lock_collection($username, $collect);
1294
1295    $self->_set_archives_metadata_array(@_);
1296   
1297    # Release the lock once it is done
1298    $self->unlock_collection($username, $collect);
1299}
1300
1301sub _remove_archives_metadata
1302{
1303    my $self = shift @_;
1304
1305    my $collect   = $self->{'collect'};
1306    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1307#   my $gsdlhome  = $self->{'gsdlhome'};
1308    my $infodbtype = $self->{'infodbtype'};
1309   
1310    my $site = $self->{'site'};
1311       
1312    # Obtain the collect and archive dir   
1313    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1314   
1315    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1316
1317    # look up additional args
1318    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1319   
1320    my $metaname = $self->{'metaname'};
1321    my $metapos = $self->{'metapos'};
1322    my $metavalue = $self->{'metavalue'};
1323
1324    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1325    $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
1326
1327    # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1328    # default to deleting the first metavalue for the given metaname
1329    # Beware that if both metapos AND metavalue are defined, both matches (if any)
1330    # seem to get deleted in one single remove_archives_meta action invocation.
1331    # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1332    # deleted, both get deleted.
1333    if(!defined $metapos && !defined $metavalue) {
1334        $metapos = 0;
1335    }
1336
1337    my $metamode = $self->{'metamode'};
1338    $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
1339
1340    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1341    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1342
1343    # This now stores the full pathname
1344    my $doc_filename = $doc_rec->{'doc-file'}->[0];
1345
1346    my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1347#   my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1348   
1349    if ($status == 0)
1350    {
1351        my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
1352        $mess .= "  $metaname";
1353        $mess .= "->[$metapos]" if (defined $metapos);
1354        $mess .= " ($metavalue)" if (defined $metavalue);
1355        $gsdl_cgi->generate_ok_message($mess); 
1356    }
1357    else
1358    {
1359        my $mess .= "Failed to remove archives metadata key: $docid\n";
1360        $mess .= "Exit status: $status\n";
1361        $mess .= "System Error Message: $!\n";
1362        $mess .= "-" x 20 . "\n";
1363       
1364        $gsdl_cgi->generate_error($mess);
1365    }
1366   
1367    #return $status; # in case calling functions have a use for this
1368}
1369
1370sub remove_archives_metadata
1371{
1372    my $self = shift @_;
1373
1374    my $username  = $self->{'username'};
1375    my $collect   = $self->{'collect'};
1376    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1377   
1378    if ($baseaction::authentication_enabled)
1379    {
1380        # Ensure the user is allowed to edit this collection       
1381        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1382    }
1383
1384    # Make sure the collection isn't locked by someone else
1385    $self->lock_collection($username, $collect);
1386
1387    $self->_remove_archives_metadata(@_);
1388
1389    # Release the lock once it is done
1390    $self->unlock_collection($username, $collect);
1391}
1392
1393sub remove_from_doc_xml
1394{
1395    my $self = shift @_;
1396    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1397   
1398    my @start_rules = ('Section' => \&dxml_start_section);
1399   
1400    # Set the call-back functions for the metadata tags
1401    my @rules =
1402    (
1403        _default => 'raw',
1404        'Metadata' => \&rfdxml_metadata
1405    );
1406       
1407    my $parser = XML::Rules->new
1408    (
1409        start_rules => \@start_rules,
1410        rules => \@rules,
1411        style => 'filter',
1412        output_encoding => 'utf8',
1413#    normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1414        stripspaces => 2|0|0 # ineffectual
1415    );
1416   
1417    my $status = 0;
1418    my $xml_in = "";
1419    if (!open(MIN,"<$doc_xml_filename"))
1420    {
1421        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1422        $status = 1;
1423    }
1424    else
1425    {
1426        # Read them in
1427        my $line;
1428        while (defined ($line=<MIN>)) {
1429            $xml_in .= $line;
1430        }
1431        close(MIN);
1432
1433        # Filter with the call-back functions
1434        my $xml_out = "";
1435
1436        my $MOUT;
1437        if (!open($MOUT,">$doc_xml_filename")) {
1438            $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1439            $status = 1;
1440        }
1441        else {
1442            binmode($MOUT,":utf8");
1443            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1444            close($MOUT);       
1445        }
1446    }
1447    return $status;
1448}
1449
1450sub rfdxml_metadata
1451{
1452    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1453
1454    # For comparisons, toplevel section is indicated by ""
1455    my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1456    my $secid = $parser->{'parameters'}->{'secid'} || "";
1457
1458    if (!($secid eq $curr_sec_num))
1459    {
1460        # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1461        return [$tagname => $attrHash];
1462    }
1463
1464    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1465    {
1466        if (!defined $parser->{'parameters'}->{'poscount'})
1467        {
1468            $parser->{'parameters'}->{'poscount'} = 0;
1469        }
1470        else
1471        {
1472            $parser->{'parameters'}->{'poscount'}++;
1473        }
1474       
1475        # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
1476        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {         
1477            return [];
1478        }
1479
1480        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1481        {   
1482            return [];
1483        }
1484       
1485        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1486        {   
1487            return [];
1488        }
1489    }
1490   
1491    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1492    return [$tagname => $attrHash];
1493}
1494
1495sub mxml_metadata
1496{
1497    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1498    my $metaname = $parser->{'parameters'}->{'metaname'};
1499    my $metamode = $parser->{'parameters'}->{'metamode'};
1500
1501    # Report error if we don't see FileName tag before this
1502    die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1503   
1504    # Don't do anything if we are not in the right FileSet
1505    my $file_regexp = $parser->{'parameters'}->{'current_file'};
1506    if ($file_regexp =~ /\.\*/) {
1507    # Only interested in a file_regexp if it specifies precisely one
1508    # file. 
1509    # So, skip anything with a .* in it as it is too general
1510##  print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
1511    return [$tagname => $attrHash];
1512    }
1513    my $src_file = $parser->{'parameters'}->{'src_file'};
1514    if (!($src_file =~ /$file_regexp/)) {
1515    return [$tagname => $attrHash];
1516    }
1517##    print STDERR "*** mxl metamode = $metamode\n";
1518
1519    # Find the right metadata tag and checks if we are going to override it
1520    my $name_attr = $attrHash->{'name'};
1521    if (($name_attr eq $metaname) && ($metamode eq "override")) {
1522
1523    # now metadata.xml functions need to keep track of metapos
1524    if (!defined $parser->{'parameters'}->{'poscount'})
1525    {
1526        $parser->{'parameters'}->{'poscount'} = 0;
1527    }
1528    else
1529    {
1530        $parser->{'parameters'}->{'poscount'}++;
1531    }
1532
1533    # If either the metapos or prevmetavalue is set,
1534        # get the value and override the current value
1535    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1536
1537    if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1538    {
1539        $attrHash->{'_content'} = $metavalue;
1540
1541        ##  print STDERR "**** overriding metadata.xml\n";
1542       
1543        # Don't want it to wipe out any other pieces of metadata
1544        $parser->{'parameters'}->{'metamode'} = "done";
1545    }
1546    elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1547    {
1548        $attrHash->{'_content'} = $metavalue;
1549        $parser->{'parameters'}->{'metamode'} = "done";
1550    }
1551    }
1552
1553    # mxml_description will process the metadata if metadata is accumulate,
1554    # or if we haven't found the metadata to override
1555
1556    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1557    return [$tagname => $attrHash];
1558}
1559
1560
1561sub mxml_description
1562{
1563    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1564    my $metamode = $parser->{'parameters'}->{'metamode'};   
1565
1566    # Failed... Report error if we don't see FileName tag before this
1567    die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1568
1569    # Don't do anything if we are not in the right FileSet
1570    my $file_regexp = $parser->{'parameters'}->{'current_file'};
1571    if ($file_regexp =~ m/\.\*/) {
1572    # Only interested in a file_regexp if it specifies precisely one
1573    # file. 
1574    # So, skip anything with a .* in it as it is too general
1575    return [$tagname => $attrHash];
1576    }
1577    my $src_file = $parser->{'parameters'}->{'src_file'};
1578   
1579    if (!($src_file =~ m/$file_regexp/)) {
1580    return [$tagname => $attrHash];
1581    }
1582
1583    # Accumulate the metadata block to the end of the description block
1584    # Note: This adds metadata block to all description blocks, so if there are
1585    # multiple FileSets, it will add to all of them
1586    if (($metamode eq "accumulate") || ($metamode eq "override")) {
1587
1588    # if metamode was "override" but get to here then it failed to
1589    # find an item to override, in which case it should append its
1590    # value to the end, just like the "accumulate" mode
1591
1592    if ($metamode eq "override") {
1593        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
1594    }
1595
1596    # tack a new metadata tag on to the end of the <Metadata>+ block
1597    my $metaname = $parser->{'parameters'}->{'metaname'};
1598    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1599   
1600    my $metadata_attr = { '_content' => $metavalue,
1601                  'name'     => $metaname,
1602                  'mode'     => "accumulate" };
1603
1604    my $append_metadata = [ "Metadata" => $metadata_attr ];
1605    my $description_content = $attrHash->{'_content'};
1606   
1607##  print STDERR "*** appending to metadata.xml\n";
1608
1609    # append the new metadata element to the end of the current
1610    # content contained inside this tag
1611    if (ref($description_content) eq "") {
1612        # => string or numeric literal
1613        # this is caused by a <Description> block has no <Metadata> child elements
1614        # => set up an empty array in '_content'
1615        $attrHash->{'_content'} = [ "\n" ];
1616        $description_content = $attrHash->{'_content'};
1617    }
1618
1619    push(@$description_content,"    ", $append_metadata ,"\n        ");
1620    $parser->{'parameters'}->{'metamode'} = "done";
1621    }
1622
1623    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1624    return [$tagname => $attrHash];
1625}
1626
1627
1628sub mxml_fileset
1629{
1630    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1631
1632    # Initilise the current_file
1633    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1634    # FileName tag must come before Description tag
1635    $parser->{'parameters'}->{'current_file'} = "";
1636
1637    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1638    return [$tagname => $attrHash];
1639}
1640
1641sub mxml_directorymetadata
1642{
1643    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1644
1645    # if we haven't processed the metadata when we reach the end of metadata.xml
1646    # it's because there's no particular FileSet element whose FileName matched
1647    # In which case, add a new FileSet for this FileName
1648    my $metamode = $parser->{'parameters'}->{'metamode'};
1649    if($metamode ne "done") {
1650   
1651    if ($metamode eq "override") {
1652        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
1653    }
1654
1655    # If we get to here and metamode is override, this means there
1656    # was no existing value to overide => treat as an append operation
1657   
1658    # Create a new FileSet element and append to DirectoryMetadata
1659    # <FileSet>
1660    # <FileName>src_file</FileName>
1661    # <Description>
1662    # <Metadata mode="" name="">metavalue</Metadata>
1663    # </Description>
1664    # </FileSet>
1665    my $src_file = $parser->{'parameters'}->{'src_file'};
1666    my $metaname = $parser->{'parameters'}->{'metaname'};
1667    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1668    my $metadata_attr = {
1669        '_content' => $metavalue,
1670        'name' => $metaname,
1671        'mode' => "accumulate"
1672    };
1673    my $append_metadata = [ "Metadata" => $metadata_attr ];
1674    my $description_attr = { '_content' => [ "\n\t\t   ", $append_metadata, "\n\t\t"] };
1675    my $description_element = [ "Description" => $description_attr ];
1676
1677    #_content is not an attribute, it's special and holds the children of this element
1678    # including the textnode value embedded in this element if any.
1679    my $filename_attr = {'_content' => $src_file};
1680    my $filename_element = [ "FileName" => $filename_attr ];
1681
1682    my $fileset_attr = {};
1683    $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
1684    my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
1685   
1686   
1687    # get children of dirmeta, and push the new FileSet element onto it
1688    print "Appending metadata to metadata.xml\n";
1689    my $dirmeta_content = $attrHash->{'_content'};
1690    if (ref($dirmeta_content)) {
1691        # got some existing interesting nested content
1692        #push(@$dirmeta_content, "    ", $fileset ,"\n        ");
1693        push(@$dirmeta_content, "\t", $fileset ,"\n");
1694    }
1695    else {
1696        #description_content is most likely a string such as "\n"
1697        #$attrHash->{'_content'} = [$dirmeta_content, "    ", $fileset ,"\n" ];
1698        $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
1699    }   
1700
1701    $parser->{'parameters'}->{'metamode'} = "done";
1702    }
1703    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1704    return [$tagname => $attrHash];
1705}
1706
1707
1708sub edit_metadata_xml
1709{
1710    my $self = shift @_;
1711    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
1712
1713    # Set the call-back functions for the metadata tags
1714    my @rules =
1715    ( _default => 'raw',
1716          'FileName' => \&mxml_filename,
1717      'Metadata' => \&mxml_metadata,
1718      'Description' => \&mxml_description,
1719          'FileSet' => \&mxml_fileset,
1720      'DirectoryMetadata' => \&mxml_directorymetadata);
1721
1722    # use XML::Rules to add it in (read in and out again)
1723    my $parser = XML::Rules->new(rules => \@rules,
1724                 style => 'filter',
1725                                 output_encoding => 'utf8',
1726                 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1727
1728    if (!-e $metadata_xml_filename) {
1729   
1730        if (open(MOUT,">$metadata_xml_filename")) {
1731           
1732            my $src_file_re = &util::filename_to_regex($src_file);
1733            # shouldn't the following also be in the above utility routine??
1734            # $src_file_re =~ s/\./\\./g;
1735       
1736            print MOUT "<?xml version=\"1.0\"?>\n";
1737            print MOUT "<DirectoryMetadata>\n";
1738            print MOUT " <FileSet>\n";
1739            print MOUT "  <FileName>$src_file_re</FileName>\n";
1740            print MOUT "  <Description>\n";
1741            print MOUT "  </Description>\n";
1742            print MOUT " </FileSet>\n";
1743            print MOUT "</DirectoryMetadata>\n";
1744
1745            close(MOUT);
1746        }
1747        else {
1748            $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1749        }
1750    }
1751   
1752   
1753    my $xml_in = "";
1754    if (!open(MIN,"<$metadata_xml_filename")) {
1755        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1756    }
1757    else {
1758        # Read them in
1759        my $line;
1760        while (defined ($line=<MIN>)) {
1761            $xml_in .= $line;
1762        }
1763        close(MIN);
1764
1765        # Filter with the call-back functions
1766        my $xml_out = "";
1767
1768        my $MOUT;
1769        if (!open($MOUT,">$metadata_xml_filename")) {
1770            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1771        }
1772        else {
1773            binmode($MOUT,":utf8");
1774
1775            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1776            # At the moment, I will just hack it!
1777            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1778            #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1779            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1780            #print MOUT $xml_out;
1781
1782            $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1783                              metapos => $metapos,
1784                      metavalue => $metavalue,
1785                      metamode => $metamode,
1786                      src_file => $src_file,
1787                      prevmetavalue => $prevmetavalue,
1788                      current_file => undef} );
1789            close($MOUT);       
1790        }
1791   }
1792}
1793
1794
1795sub set_import_metadata
1796{
1797    my $self = shift @_;
1798   
1799    my $username  = $self->{'username'};
1800    my $collect   = $self->{'collect'};
1801    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1802   
1803    if ($baseaction::authentication_enabled) {
1804    # Ensure the user is allowed to edit this collection
1805    $self->authenticate_user($username, $collect);
1806    }
1807
1808    # Make sure the collection isn't locked by someone else
1809    $self->lock_collection($username, $collect);
1810 
1811    $self->_set_import_metadata(@_);
1812
1813    # Release the lock once it is done
1814    $self->unlock_collection($username, $collect);
1815   
1816}
1817
1818sub set_import_metadata_array
1819{
1820    my $self = shift @_;
1821
1822    my $username  = $self->{'username'};
1823    my $collect   = $self->{'collect'};
1824    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1825#    my $gsdlhome  = $self->{'gsdlhome'};
1826
1827    if ($baseaction::authentication_enabled) {
1828    # Ensure the user is allowed to edit this collection   
1829    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1830    }
1831
1832    my $site = $self->{'site'};
1833    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1834   
1835    $gsdl_cgi->checked_chdir($collect_dir);
1836
1837    # Make sure the collection isn't locked by someone else
1838    $self->lock_collection($username, $collect);
1839
1840    $self->_set_import_metadata_array(@_);
1841
1842    # Release the lock once it is done
1843    $self->unlock_collection($username, $collect);
1844
1845}
1846
1847
1848sub _set_import_metadata_array
1849{
1850    my $self = shift @_;
1851
1852    my $collect   = $self->{'collect'};
1853    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1854
1855    my $site = $self->{'site'};
1856    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1857   
1858    # look up additional args
1859   
1860    my $infodbtype = $self->{'infodbtype'};
1861   
1862    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");   
1863    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1864   
1865    my $json_str = $self->{'json'};
1866    my $doc_array = decode_json $json_str;
1867   
1868    my $global_status = 0;
1869    my $global_mess = "";
1870   
1871    my @all_docids = ();
1872   
1873    foreach my $doc_array_rec ( @$doc_array )
1874    {
1875    my $status = -1;
1876    my $docid = $doc_array_rec->{'docid'};
1877   
1878    my ($docid_root,$docid_secnum);
1879    if(defined $docid) {   
1880        ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
1881        # as yet no support for setting subsection metadata in metadata.xml
1882        if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
1883        $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
1884        next; # skip this docid in for loop
1885        }
1886    }
1887
1888    push(@all_docids,$docid); # docid_root rather
1889   
1890    my $metaname = $doc_array_rec->{'metaname'};
1891    if (defined $metaname) {
1892        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1893        my $metavalue = $doc_array_rec->{'metavalue'};
1894        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1895
1896        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1897        # make "accumulate" the default (less destructive, as it won't actually
1898        # delete any existing values)
1899        $metamode = "accumulate";
1900        }
1901
1902        # adding metapos and prevmetavalue support to import_metadata subroutines
1903        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1904        my $prevmetavalue = $self->{'prevmetavalue'};
1905
1906        $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1907       
1908    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1909        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1910       
1911        foreach my $metatable_rec ( @$metatable ) {
1912        $metaname  = $metatable_rec->{'metaname'};
1913        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1914        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1915            # make "accumulate" the default (less destructive, as it won't actually
1916            # delete any existing values)
1917            $metamode = "accumulate";
1918        }
1919
1920        # No support for metapos and prevmetavalue in the JSON metatable substructure
1921        my $metapos = undef;
1922        my $prevmetavalue = undef;
1923        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1924       
1925        foreach my $metavalue ( @$metavals ) {
1926            $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1927
1928            $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1929            if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
1930            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1931            # metavalue for this metaname will be lost
1932            $metamode = "accumulate";
1933            }
1934        }
1935        }
1936    }       
1937    }
1938
1939    # always a success message
1940    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1941    $gsdl_cgi->generate_ok_message($mess);
1942}
1943
1944# always returns true (1)
1945sub set_import_metadata_entry
1946{
1947    my $self = shift @_;
1948    my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
1949
1950    my $info_mess = <<RAWEND;
1951****************************
1952  set_import_metadata_entry()
1953****************************
1954RAWEND
1955
1956    $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1957    $info_mess .= " collect     = $collect\n"     if defined($collect);
1958    $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype);
1959    $info_mess .= " arcinfo_doc_filename  = $arcinfo_doc_filename\n"  if defined($arcinfo_doc_filename);
1960    $info_mess .= " docid       = $docid\n"       if defined($docid);
1961    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
1962    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
1963    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
1964    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
1965    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1966     
1967    $info_mess .= "****************************\n";
1968
1969    $gsdl_cgi->generate_message($info_mess);
1970
1971    # import works with metadata.xml which can have inherited metadata
1972    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
1973    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
1974    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
1975    # what they're doing if they provide a metapos.
1976    if(defined $metapos) {
1977    print STDERR "@@@@ WARNING: metapos defined.\n";
1978    print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
1979    }
1980
1981    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1982    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1983    my $metadata_xml_file;
1984    my $import_filename = undef;
1985   
1986    if (defined $docid) {
1987    # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1988    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1989
1990    # This now stores the full pathname
1991    $import_filename = $doc_rec->{'src-file'}->[0];
1992    $import_filename = &util::placeholders_to_abspath($import_filename);
1993
1994    } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
1995         # as the array version of the method doesn't support the -f parameter yet
1996    my $import_file  = $self->{'f'};
1997    $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
1998    }
1999   
2000    # figure out correct metadata.xml file [?]
2001    # Assuming the metadata.xml file is next to the source file
2002    # Note: This will not work if it is using the inherited metadata from the parent folder
2003    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2004    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2005   
2006    # If we're overriding everything, then $prevmetavalue=undefined and
2007    # $metamode=override combined with $metapos=undefined
2008    # in which case we need to remove all metavalues for the metaname at the given (sub)section
2009    # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2010    if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2011##  print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2012    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2013
2014    }
2015
2016    # Edit the metadata.xml
2017    # Modified by Jeffrey from DL Consulting
2018    # Handle the case where there is one metadata.xml file for multiple FileSets
2019    # The XML filter needs to know whether it is in the right FileSet
2020    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2021    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2022    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2023                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2024    #return 0;
2025    return $metadata_xml_filename;
2026}
2027
2028sub _remove_import_metadata
2029{
2030    my $self = shift @_;
2031
2032    my $collect   = $self->{'collect'};
2033    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2034#   my $gsdlhome  = $self->{'gsdlhome'};
2035    my $infodbtype = $self->{'infodbtype'};
2036   
2037    # Obtain the collect dir
2038    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2039    my $site = $self->{'site'};
2040    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2041   
2042    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2043    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2044
2045    # look up additional args
2046    my $docid = $self->{'d'};
2047    if ((!defined $docid) || ($docid =~ m/^\s*$/))
2048    {
2049        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2050    }
2051   
2052    my $metaname = $self->{'metaname'};
2053    my $metapos = $self->{'metapos'};
2054    my $metavalue = $self->{'metavalue'};
2055
2056    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2057    $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/));
2058
2059    if(defined $metavalue) { # metavalue is now a compulsory arg for remove_import_metadata()
2060        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2061    } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2062        $metapos = 0;
2063    }
2064    my $metamode = $self->{'metamode'};
2065    $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
2066
2067    # import works with metadata.xml which can have inherited metadata
2068    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2069    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2070    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2071    # what they're doing if they provide a metapos.
2072    if(defined $metapos) {
2073        print STDERR "@@@@ WARNING: metapos defined.\n";
2074        print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2075    }
2076   
2077    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2078    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2079    my $metadata_xml_file;
2080    my $import_filename = undef;
2081    if (defined $docid)
2082    {
2083        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2084        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2085
2086        # This now stores the full pathname
2087        $import_filename = $doc_rec->{'src-file'}->[0];
2088        $import_filename = &util::placeholders_to_abspath($import_filename);
2089    }
2090
2091    if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2092    {
2093        $gsdl_cgi->generate_error("There is no metadata\n");
2094    }
2095   
2096    # figure out correct metadata.xml file [?]
2097    # Assuming the metadata.xml file is next to the source file
2098    # Note: This will not work if it is using the inherited metadata from the parent folder
2099    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2100    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2101   
2102    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2103   
2104    my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2105    $mess .= "  $metaname";
2106    $mess .= " = $metavalue\n";
2107   
2108    $gsdl_cgi->generate_ok_message($mess);
2109
2110    #return $status; # in case calling functions have a use for this
2111}
2112
2113sub remove_import_metadata
2114{
2115    my $self = shift @_;
2116   
2117    my $username = $self->{'username'};
2118    my $collect   = $self->{'collect'};
2119    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2120   
2121    if ($baseaction::authentication_enabled) {
2122        # Ensure the user is allowed to edit this collection       
2123        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2124    }
2125
2126    # Make sure the collection isn't locked by someone else
2127    $self->lock_collection($username, $collect);
2128   
2129    $self->_remove_import_metadata(@_);
2130
2131    # Release the lock once it is done
2132    $self->unlock_collection($username, $collect);
2133
2134}
2135
2136sub remove_from_metadata_xml
2137{
2138    my $self = shift @_;
2139    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2140    # metamode generally has no meaning for removing meta, but is used by set_meta
2141    # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2142
2143    # Set the call-back functions for the metadata tags
2144    my @rules =
2145    (
2146        _default => 'raw',
2147        'Metadata' => \&rfmxml_metadata,
2148        'FileName' => \&mxml_filename
2149    );
2150       
2151    my $parser = XML::Rules->new
2152    (
2153        rules => \@rules,
2154        style => 'filter',
2155        output_encoding => 'utf8',
2156     #normalisespaces => 1,
2157            stripspaces => 2|0|0 # ineffectual
2158    );
2159   
2160    my $xml_in = "";
2161    if (!open(MIN,"<$metadata_xml_filename"))
2162    {
2163        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2164    }
2165    else
2166    {
2167        # Read them in
2168        my $line;
2169        while (defined ($line=<MIN>)) {
2170            $xml_in .= $line;
2171        }
2172        close(MIN);
2173
2174        # Filter with the call-back functions
2175        my $xml_out = "";
2176
2177        my $MOUT;
2178        if (!open($MOUT,">$metadata_xml_filename")) {
2179            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2180        }
2181        else {
2182            binmode($MOUT,":utf8");
2183            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2184            close($MOUT);       
2185        }
2186    }
2187}
2188
2189sub rfmxml_metadata
2190{
2191    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2192
2193    # metadata.xml does not handle subsections
2194
2195    # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2196    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2197        && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2198    {
2199        if (!defined $parser->{'parameters'}->{'poscount'})
2200        {
2201            $parser->{'parameters'}->{'poscount'} = 0;
2202        }
2203        else
2204        {
2205            $parser->{'parameters'}->{'poscount'}++;
2206        }
2207
2208        # if overriding but no metapos, then clear all the meta for this metaname
2209        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
2210            return [];
2211        }
2212   
2213        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2214        {
2215            return [];
2216        }
2217        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2218        {
2219            return [];
2220        }       
2221    }
2222
2223    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2224    return [$tagname => $attrHash];
2225}
2226
2227sub _remove_live_metadata
2228{
2229    my $self = shift @_;
2230
2231    my $collect   = $self->{'collect'};
2232    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2233#    my $gsdlhome  = $self->{'gsdlhome'};
2234    my $infodbtype = $self->{'infodbtype'};
2235
2236    # Obtain the collect dir
2237    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2238    my $site = $self->{'site'};
2239    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2240
2241   
2242    # look up additional args
2243    my $docid     = $self->{'d'};
2244    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2245      $gsdl_cgi->generate_error("No docid (d=...) specified.");
2246    }
2247   
2248    # Generate the dbkey
2249    my $metaname  = $self->{'metaname'};
2250    my $dbkey = "$docid.$metaname";
2251
2252    # To people who know $collect_tail please add some comments
2253    # Obtain the live gdbm_db path
2254    my $collect_tail = $collect;
2255    $collect_tail =~ s/^.*[\/|\\]//;
2256    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2257    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2258
2259    # Remove the key
2260    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2261    my $status = system($cmd);
2262    if ($status != 0) {
2263        # Catch error if gdbmdel failed
2264    my $mess = "Failed to set metadata key: $dbkey\n";
2265   
2266    $mess .= "PATH: $ENV{'PATH'}\n";
2267    $mess .= "cmd = $cmd\n";
2268    $mess .= "Exit status: $status\n";
2269    $mess .= "System Error Message: $!\n";
2270
2271    $gsdl_cgi->generate_error($mess);
2272    }
2273    else {
2274    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2275    }
2276
2277}
2278
2279sub remove_live_metadata
2280{
2281    my $self = shift @_;
2282
2283    my $username  = $self->{'username'};
2284    my $collect   = $self->{'collect'};
2285    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2286    my $gsdlhome  = $self->{'gsdlhome'};
2287   
2288    if ($baseaction::authentication_enabled) {
2289    # Ensure the user is allowed to edit this collection   
2290    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2291    }
2292
2293    # Make sure the collection isn't locked by someone else
2294    $self->lock_collection($username, $collect);
2295
2296    $self->_remove_live_metadata(@_);
2297
2298    $self->unlock_collection($username, $collect);
2299}
2300
2301sub remove_metadata
2302{
2303    my $self = shift @_;
2304
2305    my $where = $self->{'where'};
2306    if(!$where || ($where =~ m/^\s*$/)) {
2307    $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2308    return;
2309    }
2310
2311    my $username  = $self->{'username'};
2312    my $collect   = $self->{'collect'};
2313    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2314
2315    if ($baseaction::authentication_enabled) {
2316    # Ensure the user is allowed to edit this collection   
2317    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2318    }
2319
2320    # Make sure the collection isn't locked by someone else
2321    $self->lock_collection($username, $collect);
2322
2323    # check which directories need to be processed, specified in $where as
2324    # any combination of import|archives|index|live
2325    if($where =~ m/import/) {
2326    $self->_remove_import_metadata(@_);     
2327    }
2328    if($where =~ m/archives/) {
2329    $self->_remove_archives_metadata(@_);       
2330   }
2331    if($where =~ m/index/) {
2332    $self->_remove_index_metadata(@_);     
2333    }
2334
2335    # Release the lock once it is done
2336    $self->unlock_collection($username, $collect);
2337}
2338
2339# the internal version, without authentication
2340sub _remove_index_metadata
2341{   
2342    my $self = shift @_;
2343
2344    my $collect   = $self->{'collect'};
2345    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2346#    my $gsdlhome  = $self->{'gsdlhome'};
2347    my $infodbtype = $self->{'infodbtype'};
2348   
2349    # Obtain the collect dir
2350    my $site = $self->{'site'};
2351    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2352    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2353
2354        # look up additional args
2355    my $docid     = $self->{'d'};
2356    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2357      $gsdl_cgi->generate_error("No docid (d=...) specified.");
2358    }
2359    my $metaname  = $self->{'metaname'};
2360    my $metapos   = $self->{'metapos'};
2361    my $metavalue = $self->{'metavalue'};
2362
2363    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2364    $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
2365
2366    # To people who know $collect_tail please add some comments
2367    # -> In collection groups, I think collect_tailname is the subcollection name,
2368    # e.g. colgroup-name/col-tail-name
2369    # Obtain the path to the database
2370    my $collect_tail = $collect;
2371    $collect_tail =~ s/^.*[\/|\\]//;
2372    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2373    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2374
2375    # Read the docid entry
2376    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2377
2378    # Check to make sure the key does exist
2379    if (!defined ($doc_rec->{$metaname})) {
2380        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2381    }
2382
2383    # Obtain the specified metadata pos
2384    # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2385    if(!defined $metapos && !defined $metavalue) {
2386        $metapos = 0;
2387    }
2388   
2389
2390    # consider check key is defined before deleting?
2391    # Loop through the metadata array and ignore the specified position
2392    my $filtered_metadata = [];
2393    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
2394    for (my $i=0; $i<$num_metadata_vals; $i++) {
2395    my $metaval = shift(@{$doc_rec->{$metaname}});
2396
2397    if (!defined $metavalue && $i != $metapos) {
2398        push(@$filtered_metadata,$metaval);
2399    }
2400   
2401    if(defined $metavalue && !($metavalue eq $metaval))
2402    {
2403        push(@$filtered_metadata,$metaval);
2404    }
2405    }
2406    $doc_rec->{$metaname} = $filtered_metadata;
2407
2408    ## Use the dbutil set_entry method instead of assuming the database is gdbm
2409    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2410
2411    if ($status != 0) {
2412    my $mess = "Failed to set metadata key: $docid\n";
2413   
2414    $mess .= "PATH: $ENV{'PATH'}\n";
2415    $mess .= "Exit status: $status\n";
2416    $mess .= "System Error Message: $!\n";
2417
2418    $gsdl_cgi->generate_error($mess);
2419    }
2420    else {
2421    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2422    $mess .= "  $metaname";
2423    $mess .= "->[$metapos]" if (defined $metapos);
2424    $mess .= " ($metavalue)" if (defined $metavalue);
2425
2426    $gsdl_cgi->generate_ok_message($mess);
2427    }
2428
2429    #return $status; # in case calling functions have a use for this
2430}
2431
2432sub remove_index_metadata
2433{
2434    my $self = shift @_;
2435
2436    my $username  = $self->{'username'};
2437    my $collect   = $self->{'collect'};
2438    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2439#    my $gsdlhome  = $self->{'gsdlhome'};
2440   
2441    if ($baseaction::authentication_enabled) {
2442    # Ensure the user is allowed to edit this collection   
2443    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2444    }
2445
2446    # Obtain the collect dir
2447    my $site = $self->{'site'};
2448    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2449    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2450
2451    # Make sure the collection isn't locked by someone else
2452    $self->lock_collection($username, $collect);
2453
2454    $self->_remove_index_metadata(@_);
2455
2456    # Release the lock once it is done
2457    $self->unlock_collection($username, $collect);
2458}
2459
2460
2461# Was trying to reused the codes, but the functions need to be broken
2462# down more before they can be reused, otherwise there will be too
2463# much overhead and duplicate process...
2464sub insert_metadata
2465{
2466    my $self = shift @_;
2467   
2468    my $username  = $self->{'username'};
2469    my $collect   = $self->{'collect'};
2470    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2471    my $gsdlhome  = $self->{'gsdlhome'};
2472    my $infodbtype = $self->{'infodbtype'};
2473   
2474    # If the import metadata and gdbm database have been updated, we
2475    # need to insert some notification to warn user that the the text
2476    # they see at the moment is not indexed and require a rebuild.
2477    my $rebuild_pending_macro = "_rebuildpendingmessage_";
2478
2479    if ($baseaction::authentication_enabled) {
2480    # Ensure the user is allowed to edit this collection
2481    $self->authenticate_user($username, $collect);
2482    }
2483
2484    # Obtain the collect and archive dir   
2485    my $site = $self->{'site'};
2486    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2487    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2488    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2489
2490    # Make sure the collection isn't locked by someone else
2491    $self->lock_collection($username, $collect);
2492   
2493    # Check additional args
2494    my $docid = $self->{'d'};
2495    if (!defined($docid) || ($docid =~ m/^\s*$/)) {
2496    $gsdl_cgi->generate_error("No document id is specified: d=...");
2497    }
2498    my $metaname = $self->{'metaname'};
2499    if (!defined($metaname) || ($metaname =~ m/^\s*$/)) {
2500    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2501    }
2502    my $metavalue = $self->{'metavalue'};
2503    if (!defined($metavalue) || ($metavalue =~ m/^\s*$/)) {
2504    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2505    }
2506    # make "accumulate" the default (less destructive, as it won't actually
2507    # delete any existing values)
2508    my $metamode = "accumulate";
2509
2510    # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
2511    my $metapos   = undef;
2512    my $prevmetavalue = undef;
2513
2514    #=======================================================================#
2515    # set_import_metadata [START]
2516    #=======================================================================#
2517    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2518    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2519    my $metadata_xml_file;
2520    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2521    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2522   
2523    # This now stores the full pathname
2524    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2525    $import_filename = &util::placeholders_to_abspath($import_filename);
2526   
2527    # figure out correct metadata.xml file [?]
2528    # Assuming the metadata.xml file is next to the source file
2529    # Note: This will not work if it is using the inherited metadata from the parent folder
2530    my ($import_tailname, $import_dirname)
2531    = File::Basename::fileparse($import_filename);
2532    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2533
2534    # Shane's escape characters
2535    $metavalue = pack "U0C*", unpack "C*", $metavalue;
2536    $metavalue =~ s/\,/&#44;/g;
2537    $metavalue =~ s/\:/&#58;/g;
2538    $metavalue =~ s/\|/&#124;/g;
2539    $metavalue =~ s/\(/&#40;/g;
2540    $metavalue =~ s/\)/&#41;/g;
2541    $metavalue =~ s/\[/&#91;/g;
2542    $metavalue =~ s/\\/&#92;/g;
2543    $metavalue =~ s/\]/&#93;/g;
2544    $metavalue =~ s/\{/&#123;/g;
2545    $metavalue =~ s/\}/&#125;/g;
2546    $metavalue =~ s/\"/&#34;/g;
2547    $metavalue =~ s/\`/&#96;/g;
2548    $metavalue =~ s/\n/_newline_/g;
2549
2550    # Edit the metadata.xml
2551    # Modified by Jeffrey from DL Consulting
2552    # Handle the case where there is one metadata.xml file for multiple FileSets
2553    # The XML filter needs to know whether it is in the right FileSet
2554    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2555    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2556    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2557                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2558    #=======================================================================#
2559    # set_import_metadata [END]
2560    #=======================================================================#
2561
2562
2563    #=======================================================================#
2564    # set_metadata (accumulate version) [START]
2565    #=======================================================================#
2566    # To people who know $collect_tail please add some comments
2567    # Obtain path to the database
2568    my $collect_tail = $collect;
2569    $collect_tail =~ s/^.*[\/|\\]//;
2570    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2571    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2572
2573    # Read the docid entry
2574    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2575
2576    # Protect the quotes
2577    $metavalue =~ s/\"/\\\"/g;
2578
2579    # Adds the pending macro
2580    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2581
2582    # If the metadata doesn't exist, create a new one
2583    if (!defined($doc_rec->{$metaname})){   
2584    $doc_rec->{$metaname} = [ $macro_metavalue ];
2585    }
2586    # Else, let's acculumate the values
2587    else {
2588        push(@{$doc_rec->{$metaname}},$macro_metavalue);
2589    }
2590
2591    ## Use the dbutil set_entry method instead of assuming the database is gdbm
2592    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2593
2594    if ($status != 0) {
2595        # Catch error if gdbmget failed
2596    my $mess = "Failed to set metadata key: $docid\n";
2597   
2598    $mess .= "PATH: $ENV{'PATH'}\n";
2599    $mess .= "Exit status: $status\n";
2600    $mess .= "System Error Message: $!\n";
2601
2602    $gsdl_cgi->generate_error($mess);
2603    }
2604    else {
2605    my $mess = "insert-metadata successful: Key[$docid]\n";
2606    $mess .= "  [In metadata.xml] $metaname";
2607    $mess .= " = $metavalue\n";
2608    $mess .= "  [In database] $metaname";
2609    $mess .= " = $macro_metavalue\n";
2610    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
2611        $gsdl_cgi->generate_ok_message($mess);
2612    }   
2613    #=======================================================================#
2614    # set_metadata (accumulate version) [END]
2615    #=======================================================================#
2616
2617    # Release the lock once it is done
2618    $self->unlock_collection($username, $collect);
2619}
2620
2621# not returning 1; here since this file is conditionally included by metadataction.pm
2622# and not otherwise meant to be used on its own
2623
Note: See TracBrowser for help on using the browser.