root/main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm @ 31589

Revision 31589, 121.6 KB (checked in by ak19, 2 years ago)

Bugfixes I think. Noticed that remove-archives-meta was removing the existing two metavalues for a metaname instead of a specific one despite being given a specific metavalue to remove. This seems to have happened because metapos was not undefined yet did not have a proper value (being the empty string or a string of empty chars), and so metapos was being set to 0. In that case the metavalue at metapos 0 AND the metavalue specified were both being removed. I then noticed the same issue going on with remove-index-meta too, and then noticed that set-index-meta wasn't working the way I expected it to either. All of these had to do with incoming args like metapos and metavalue only being tested for undefinedness rather than being tested for whether they were defined yet consisted of a string of one or more empty spaces.

Line 
1##########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40    require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47    #GET METHODS
48    "get-import-metadata" => {
49        'compulsory-args' => [ "d", "metaname" ],
50        'optional-args'   => [ "metapos" ] },
51
52    "get-archives-metadata" => {
53        'compulsory-args' => [ "d", "metaname" ],
54        'optional-args'   => [ "metapos" ] },
55   
56    "get-index-metadata" => {
57        'compulsory-args' => [ "d", "metaname" ],
58        'optional-args'   => [ "metapos" ] },
59
60    "get-metadata" => { # alias for get-index-metadata
61        'compulsory-args' => [ "d", "metaname" ],
62        'optional-args'   => [ "metapos" ] },
63
64    "get-live-metadata" => {
65        'compulsory-args' => [ "d", "metaname" ],
66        'optional-args'   => [ ] },
67
68    "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69        'compulsory-args' => [ "json" ],
70        'optional-args'   => [ "where" ],
71        'help-string' => [
72        'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73        ]},
74
75    #SET METHODS
76    "set-live-metadata" => {
77        'compulsory-args' => [ "d", "metaname", "metavalue" ],
78        'optional-args'   => [ ] },
79
80    "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81        'compulsory-args' => [ "metaname", "metavalue" ],
82        'optional-args'   => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84    "set-index-metadata" => {
85        'compulsory-args' => [ "d", "metaname", "metavalue" ],
86        'optional-args'   => [ "metapos", "metamode" ] },
87
88    "set-archives-metadata" => {
89        'compulsory-args' => [ "d", "metaname", "metavalue" ],
90        'optional-args'   => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91   
92    "set-import-metadata" => {
93        'compulsory-args' => [ "metaname", "metavalue" ],
94        'optional-args'   => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
95                 
96    #SET METHODS (ARRAY)
97    "set-metadata-array" => {
98        'compulsory-args' => [ "where", "json" ],
99        'optional-args'   => [ ],
100        'help-string' => [
101        'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102       
103        'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108       
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
110                     
111    "set-archives-metadata-array" => {
112        'compulsory-args' => [ "json" ],
113        'optional-args'   => [ ] },
114       
115    "set-import-metadata-array" => {
116        'compulsory-args' => [ "json" ],
117        'optional-args'   => [ ] },
118
119    "set-index-metadata-array" => {
120        'compulsory-args' => [ "json" ],
121        'optional-args'   => [ ] },
122   
123    "set-live-metadata-array" => {
124        'compulsory-args' => [ "json" ],
125        'optional-args'   => [ ] },
126       
127    #REMOVE METHODS
128    "remove-import-metadata" => {
129        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
130        'optional-args'   => [ "metapos", "metavalue", "metamode" ] }, # only provide metapos arg for SIMPLE collections.
131# Metavalue is now an optional arg for remove_import_metadata() based on what the implementation did, which allowed metavalue to be undefined, and if so, used metapos.
132                     
133    "remove-archives-metadata" => {
134        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
135        'optional-args'   => [ "metapos", "metavalue", "metamode" ] },
136
137    "remove-live-metadata" => {
138        'compulsory-args' => [ "d", "metaname" ],
139        'optional-args'   => [ ] },
140
141    "remove-index-metadata" => {
142        'compulsory-args' => [ "d", "metaname" ],
143        'optional-args'   => [ "metapos", "metavalue" ] },
144
145    "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
146        'compulsory-args' => [ "d", "metaname" ],
147        'optional-args'   => [ "where", "metapos", "metavalue", "metamode" ] }, # metamode is optional since remove-metadata can call any of remove_import_meta and remove_archives_meta, remove_index_meta, of which the first two accept metamode as an optional param
148
149    #INSERT METHODS
150    "insert-metadata" => {
151        'compulsory-args' => [ "d", "metaname", "metavalue" ],
152        'optional-args'   => [ ] }
153};
154
155
156sub new
157{
158    my $class = shift (@_);
159    my ($gsdl_cgi,$iis6_mode) = @_;
160
161    # Treat metavalue specially.  To transmit this through a GET request
162    # the Javascript side has url-encoded it, so here we need to decode
163    # it before proceeding
164
165    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
166    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
167    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
168
169    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
170    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
171
172    # need to do the same with prevmetavalue
173    my $url_encoded_prevmetavalue = $gsdl_cgi->param("prevmetavalue");
174    my $url_decoded_prevmetavalue = &unicode::url_decode($url_encoded_prevmetavalue,1);
175    my $prevunicode_array = &unicode::utf82unicode($url_decoded_prevmetavalue);
176
177    $url_decoded_prevmetavalue = join("",map(chr($_),@$prevunicode_array));
178    $gsdl_cgi->param("prevmetavalue",$url_decoded_prevmetavalue);
179
180    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
181
182    return bless $self, $class;
183}
184
185
186sub get_live_metadata
187{
188    my $self = shift @_;
189
190    my $username  = $self->{'username'};
191    my $collect   = $self->{'collect'};
192    my $gsdl_cgi  = $self->{'gsdl_cgi'};
193    my $gsdlhome  = $self->{'gsdlhome'};
194    my $infodbtype = $self->{'infodbtype'};
195   
196    # live metadata gets/saves value scoped (prefixed) by the current usename
197    # so (for now) let's not bother to enforce authentication
198
199    # Obtain the collect dir
200    my $site = $self->{'site'};
201    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
202    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
203
204    # No locking collection when getting metadata, only when setting it
205#    $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
206
207    # look up additional args
208    my $docid  = $self->{'d'};
209    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
210       $gsdl_cgi->generate_error("No docid (d=...) specified.");
211    }
212
213    # Generate the dbkey
214    my $metaname  = $self->{'metaname'};
215    my $dbkey = "$docid.$metaname";
216
217    # To people who know $collect_tail please add some comments
218    # Obtain path to the database
219    my $collect_tail = $collect;
220    $collect_tail =~ s/^.*[\/|\\]//;
221    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
222    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
223   
224    # Obtain the content of the key
225    my $cmd = "gdbmget $infodb_file_path $dbkey";
226    if (open(GIN,"$cmd |") == 0) {
227        # Catch error if gdbmget failed
228    my $mess = "Failed to get metadata key: $metaname\n";
229    $mess .= "$!\n";
230
231    $gsdl_cgi->generate_error($mess);
232    }
233    else {
234    binmode(GIN,":utf8");
235        # Read everything in and concatenate them into $metavalue
236    my $metavalue = "";
237    my $line;
238    while (defined ($line=<GIN>)) {
239        $metavalue .= $line;
240    }
241    close(GIN);
242    chomp($metavalue); # Get rid off the tailing newlines
243    $gsdl_cgi->generate_ok_message("$metavalue");
244    }
245
246    # Release the lock once it is done
247#    $self->unlock_collection($username, $collect);
248}
249
250# just calls the index version
251sub get_metadata
252{
253    my $self = shift @_;
254    $self->get_index_metadata(@_);
255}
256
257# JSON version that will get the requested metadata values
258# from the requested source (index, import, archives or live)
259# One of the params is a JSON string and the return value is JSON too
260# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
261sub get_metadata_array
262{
263    my $self = shift @_;
264
265    my $where = $self->{'where'};
266    if (!$where || ($where =~ m/^\s*$/)) { # 0, "0", "" and undef are all false. All else is true.
267    # What is truth in perl: http://www.berkeleyinternet.com/perl/node11.html
268    # and http://www.perlmonks.org/?node_id=33638
269
270    $where = "index"; # default behaviour is to get the values from index
271    }
272
273    # Only when setting metadata do we perform authentication and do we lock the collection,
274    # not when getting metadata
275
276    # for get_meta_array, the where param can only be ONE of import, archives, index, live
277    if($where =~ m/index/) {
278    $self->_get_index_metadata_array(@_);
279    }
280    elsif($where =~ m/archives/) {
281    $self->_get_archives_metadata_array(@_);
282    }
283    elsif($where =~ m/import/) {
284    $self->_get_import_metadata_array(@_);
285    }
286    elsif($where =~ m/live/) {
287        $self->_get_live_metadata_array(@_);
288    }
289}
290
291# Unused at present. Added for completion. Tested.
292sub _get_import_metadata_array {
293   
294    my $self = shift @_;
295
296    my $collect   = $self->{'collect'};
297    my $gsdl_cgi  = $self->{'gsdl_cgi'};
298    my $site = $self->{'site'};
299    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
300   
301    # look up additional args
302    my $infodbtype = $self->{'infodbtype'};
303   
304    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
305    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
306    my $json_str      = $self->{'json'};
307    my $doc_array = decode_json $json_str;
308
309    my $json_result_str = "[";
310    my $first_doc_rec = 1;
311    foreach my $doc_array_rec ( @$doc_array ) {
312   
313    my $docid = $doc_array_rec->{'docid'}; # no subsection metadata support in metadata.xml, only toplevel meta
314   
315    if($first_doc_rec) {
316        $first_doc_rec = 0;
317    } else {
318        $json_result_str .= ",";
319    }
320    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
321
322    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
323    $json_result_str = $json_result_str . ",\"metatable\":[";
324
325    my $first_rec = 1;
326    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
327
328        # Read the docid entry     
329        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
330        # This now stores the full pathname
331        my $import_filename = $doc_rec->{'src-file'}->[0];
332        $import_filename = &util::placeholders_to_abspath($import_filename);
333
334        # figure out correct metadata.xml file [?]
335        # Assuming the metadata.xml file is next to the source file
336        # Note: This will not work if it is using the inherited metadata from the parent folder
337        my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
338        my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
339
340
341        if($first_rec) {
342        $first_rec = 0;
343        } else {
344        $json_result_str .= ",";       
345        }
346       
347        my $metaname  = $metatable_rec->{'metaname'};
348        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
349
350        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
351        if(!defined $metapos) {
352        $metapos = 0;
353        }
354
355        # Obtain the specified metadata value(s)
356        my $metavalue;
357
358        if($metapos ne "all") { # get the value at a single metapos
359        $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
360
361        #print STDERR "**** Metafilename, metaname, metapos, sec_num: $metadata_xml_filename, $metaname, $metapos, $import_tailname\n";
362       
363        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
364
365        } else {
366        my $first_metaval = 1;
367        $metapos = 0;
368        $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
369
370        while (defined $metavalue && $metavalue ne "") {
371            if($first_metaval) {       
372            $first_metaval = 0;
373            } else {
374            $json_result_str .= ",";
375            }
376       
377            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
378
379            $metapos++;
380            $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
381        }
382        }
383
384        $json_result_str .= "]}"; # close metavals array and metatable record
385    }
386   
387    $json_result_str .= "]}"; # close metatable array and docid record
388    }
389
390    $json_result_str .= "]"; # close array of docids
391    $gsdl_cgi->generate_ok_message($json_result_str."\n");
392}
393
394# Unused method, but included for completion. Tested, works. Takes a JSON string and returns a JSON string.
395# For more information on the format of the output, see _get_index_metadata_array, which is in use.
396sub _get_archives_metadata_array {
397
398    my $self = shift @_;
399
400    my $collect   = $self->{'collect'};
401    my $gsdl_cgi  = $self->{'gsdl_cgi'};
402    my $site = $self->{'site'};
403    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
404
405    # look up additional args   
406    my $infodbtype = $self->{'infodbtype'};
407
408    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
409    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
410
411    my $json_str      = $self->{'json'};
412    my $doc_array = decode_json $json_str;
413
414    my $json_result_str = "[";
415    my $first_doc_rec = 1;
416    foreach my $doc_array_rec ( @$doc_array ) {
417   
418    my $docid     = $doc_array_rec->{'docid'};
419   
420    if($first_doc_rec) {
421        $first_doc_rec = 0;
422    } else {
423        $json_result_str .= ",";
424    }
425    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
426
427    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
428    $json_result_str = $json_result_str . ",\"metatable\":[";
429
430    my $first_rec = 1;
431    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
432
433        my ($docid, $docid_secnum) = ($doc_array_rec->{'docid'} =~ m/^(.*?)(\..*)?$/);
434        $docid_secnum = "" if (!defined $docid_secnum);
435
436        # Read the docid entry     
437        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
438        # This now stores the full pathname
439        my $doc_filename = $doc_rec->{'doc-file'}->[0];
440        $doc_filename = &util::filename_cat($archive_dir, $doc_filename);
441
442        if($first_rec) {
443        $first_rec = 0;
444        } else {
445        $json_result_str .= ",";       
446        }
447       
448        my $metaname  = $metatable_rec->{'metaname'};
449        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
450
451        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
452        if(!defined $metapos) {
453        $metapos = 0;
454        }
455
456
457        # Obtain the specified metadata value(s)
458        my $metavalue;
459
460        if($metapos ne "all") { # get the value at a single metapos
461
462        $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
463        #print STDERR "**** Docname, metaname, metapos, sec_num: $doc_filename, $metaname, $metapos, $docid_secnum\n";
464       
465        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
466
467        } else {
468        my $first_metaval = 1;
469        $metapos = 0;
470        $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
471
472        while (defined $metavalue && $metavalue ne "") {
473            if($first_metaval) {       
474            $first_metaval = 0;
475            } else {
476            $json_result_str .= ",";
477            }
478       
479            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
480
481            $metapos++;
482            $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
483        }
484        }
485
486        $json_result_str .= "]}"; # close metavals array and metatable record
487    }
488   
489    $json_result_str .= "]}"; # close metatable array and docid record
490    }
491
492    $json_result_str .= "]"; # close array of docids
493    $gsdl_cgi->generate_ok_message($json_result_str."\n");
494}
495
496
497# Unused at present. Added for completion. Tested, but not sure if it retrieves metadata in the manner it's expected to.
498sub _get_live_metadata_array
499{
500    my $self = shift @_;
501
502    my $collect   = $self->{'collect'};
503    my $gsdl_cgi  = $self->{'gsdl_cgi'};
504    my $site = $self->{'site'};
505    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
506
507    # look up additional args   
508    my $infodbtype = $self->{'infodbtype'};
509   
510    # To people who know $collect_tail please add some comments
511    # Obtain the path to the database
512    my $collect_tail = $collect;
513    $collect_tail =~ s/^.*[\/|\\]//;
514    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
515    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
516
517    my $json_str      = $self->{'json'};
518    my $doc_array = decode_json $json_str;
519
520    my $json_result_str = "[";
521    my $first_doc_rec = 1;
522
523    foreach my $doc_array_rec ( @$doc_array ) {
524   
525    my $docid     = $doc_array_rec->{'docid'};
526   
527    if($first_doc_rec) {
528        $first_doc_rec = 0;
529    } else {
530        $json_result_str .= ",";
531    }
532    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
533   
534    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
535    $json_result_str = $json_result_str . ",\"metatable\":[";
536   
537    my $first_rec = 1;
538    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
539        if($first_rec) {
540        $first_rec = 0;
541        } else {
542        $json_result_str .= ",";       
543        }
544       
545        my $metaname  = $metatable_rec->{'metaname'};
546        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
547       
548        # Generate the dbkey
549        my $dbkey = "$docid.$metaname";
550       
551        # metapos for get_live_metadata is always assumed to be "all".
552        # It's always going to get all the lines of metavalues associated with a metaname
553        # (It's the metaname itself that should contain an increment number, if there are to be multiple values).
554        #my $metapos = "all";
555        my $metapos = $metatable_rec->{'metapos'} || 0; # Can be 0... 1|all|undefined. Defaults to 0 if undefined/false
556        my $metavalue = "";
557       
558        # Obtain the content of the key
559        my $cmd = "gdbmget $infodb_file_path $dbkey";
560        if (open(GIN,"$cmd |") != 0) { # Success.
561       
562        binmode(GIN,":utf8");
563        # Read everything in and concatenate them into $metavalue       
564        my $line;
565        my $first_metaval = 1;
566        my $pos = 0;
567        while (defined ($line=<GIN>)) {
568            chomp($line); # Get rid off the tailing newlines
569           
570            if($metapos eq "all") {
571            if($first_metaval) {       
572                $first_metaval = 0;
573            } else {
574                $json_result_str .= ",";
575            }           
576            $metavalue = $line;
577            $json_result_str .= "{\"metapos\":\"$pos\",\"metavalue\":\"$metavalue\"}";
578            } elsif($metapos == $pos) {
579            $metavalue = $line;
580            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
581            last;
582            } # else, the current $pos is not the required $metapos
583            $pos += 1;
584        }
585        close(GIN);
586        } # else open cmd == 0 (failed) and metavals array will be empty [] for this metaname
587       
588        $json_result_str .= "]}"; # close metavals array and metatable record
589    }
590   
591    $json_result_str .= "]}"; # close metatable array and docid record
592    }
593
594    $json_result_str .= "]"; # close array of docids
595   
596    $gsdl_cgi->generate_ok_message($json_result_str."\n");   
597}
598
599
600# Takes a JSON string and returns a JSON string
601# Request string is of the form:
602# http://localhost:8283/greenstone/cgi-bin/metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]
603# Resulting string is of the form:
604# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
605sub _get_index_metadata_array
606{
607    my $self = shift @_;
608
609    my $collect   = $self->{'collect'};
610    my $gsdl_cgi  = $self->{'gsdl_cgi'};
611    my $site = $self->{'site'};
612    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
613
614    # look up additional args   
615    my $infodbtype = $self->{'infodbtype'};
616   
617    # To people who know $collect_tail please add some comments
618    # Obtain the path to the database
619    my $collect_tail = $collect;
620    $collect_tail =~ s/^.*[\/|\\]//;
621    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
622    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
623
624    my $json_str      = $self->{'json'};
625    my $doc_array = decode_json $json_str;
626
627    my $json_result_str = "[";
628    my $first_doc_rec = 1;
629
630    foreach my $doc_array_rec ( @$doc_array ) {
631   
632    my $docid     = $doc_array_rec->{'docid'};
633   
634    if($first_doc_rec) {
635        $first_doc_rec = 0;
636    } else {
637        $json_result_str .= ",";
638    }
639    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
640
641    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
642    $json_result_str = $json_result_str . ",\"metatable\":[";
643
644    my $first_rec = 1;
645    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
646        if($first_rec) {
647        $first_rec = 0;
648        } else {
649        $json_result_str .= ",";       
650        }
651       
652        my $metaname  = $metatable_rec->{'metaname'};
653        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
654
655        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
656        if(!defined $metapos) {
657        $metapos = 0;
658        }
659
660         # Read the docid entry
661        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
662 
663        # Basically loop through and unescape_html the values
664        foreach my $k (keys %$doc_rec) {
665        my @escaped_v = ();
666        foreach my $v (@{$doc_rec->{$k}}) {
667            my $ev = &ghtml::unescape_html($v);
668            push(@escaped_v, $ev);
669        }
670        $doc_rec->{$k} = \@escaped_v;
671        }
672
673        # Obtain the specified metadata value(s)
674        my $metavalue;
675
676        if($metapos ne "all") { # get the value at a single metapos
677
678        $metavalue = $doc_rec->{$metaname}->[$metapos];
679
680        # protect any double quotes and colons in the metavalue before putting it into JSON
681        $metavalue =~ s/\"/&quot;/g if defined $metavalue;
682        $metavalue =~ s/\:/&58;/g if defined $metavalue;
683
684        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
685
686        } else {
687        my $first_metaval = 1;
688        $metapos = 0;
689        $metavalue = $doc_rec->{$metaname}->[$metapos];
690
691        while (defined $metavalue) {
692
693            # protect any double quotes and colons in the metavalue before putting it into JSON
694            $metavalue =~ s/\"/&quot;/g;
695            $metavalue =~ s/\:/&58;/g;
696
697            if($first_metaval) {       
698            $first_metaval = 0;
699            } else {
700            $json_result_str .= ",";
701            }
702       
703            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
704
705            $metapos++;
706            $metavalue = $doc_rec->{$metaname}->[$metapos];
707        }
708        }
709
710        $json_result_str .= "]}"; # close metavals array and metatable record
711    }
712   
713    $json_result_str .= "]}"; # close metatable array and docid record
714    }
715
716    $json_result_str .= "]"; # close array of docids
717
718    $gsdl_cgi->generate_ok_message($json_result_str."\n");   
719}
720
721
722sub get_index_metadata
723{
724    my $self = shift @_;
725
726    my $username  = $self->{'username'};
727    my $collect   = $self->{'collect'};
728    my $gsdl_cgi  = $self->{'gsdl_cgi'};
729    my $gsdlhome  = $self->{'gsdlhome'};
730
731    # Obtain the collect dir
732    my $site = $self->{'site'};
733    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
734    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
735
736    # look up additional args
737    my $docid     = $self->{'d'};
738    my $metaname  = $self->{'metaname'};
739    my $metapos   = $self->{'metapos'};
740    my $infodbtype = $self->{'infodbtype'};
741
742    # To people who know $collect_tail please add some comments
743    # Obtain path to the database
744    my $collect_tail = $collect;
745    $collect_tail =~ s/^.*[\/|\\]//;
746    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
747    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
748
749    # Read the docid entry
750    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
751 
752    # Basically loop through and unescape_html the values
753    foreach my $k (keys %$doc_rec) {
754    my @escaped_v = ();
755    foreach my $v (@{$doc_rec->{$k}}) {
756        my $ev = &ghtml::unescape_html($v);
757        push(@escaped_v, $ev);
758    }
759    $doc_rec->{$k} = \@escaped_v;
760    }
761
762    # Obtain the specified metadata value
763    $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
764    my $metavalue = $doc_rec->{$metaname}->[$metapos];
765    $gsdl_cgi->generate_ok_message("$metavalue");
766   
767}
768
769
770sub get_import_metadata
771{
772    my $self = shift @_;
773
774    my $username  = $self->{'username'};
775    my $collect   = $self->{'collect'};
776    my $gsdl_cgi  = $self->{'gsdl_cgi'};
777    my $gsdlhome  = $self->{'gsdlhome'};
778
779    # Obtain the collect dir
780    my $site = $self->{'site'};
781    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
782    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
783
784    # look up additional args
785    my $docid     = $self->{'d'};
786    my $metaname  = $self->{'metaname'};
787    my $metapos = $self->{'metapos'};
788    $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/)); # gets the first value by default since metapos defaults to 0
789
790    my $infodbtype = $self->{'infodbtype'};
791    if (!defined $docid)
792    {
793        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
794    }
795
796    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
797    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
798    my $metadata_xml_file;
799    my $import_filename = undef;
800   
801
802    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
803    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
804    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
805
806    # This now stores the full pathname
807    $import_filename = $doc_rec->{'src-file'}->[0];
808    $import_filename = &util::placeholders_to_abspath($import_filename);
809
810    # figure out correct metadata.xml file [?]
811    # Assuming the metadata.xml file is next to the source file
812    # Note: This will not work if it is using the inherited metadata from the parent folder
813    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
814    my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
815
816    $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
817
818}
819
820sub get_metadata_from_metadata_xml
821{
822    my $self = shift @_;
823    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
824   
825    my @rules =
826    (
827        _default => 'raw',
828        'Metadata' => \&gfmxml_metadata,
829        'FileName' => \&mxml_filename
830    );
831       
832    my $parser = XML::Rules->new
833    (
834        rules => \@rules,
835        output_encoding => 'utf8'
836    );
837   
838    my $xml_in = "";
839    if (!open(MIN,"<$metadata_xml_filename"))
840    {
841        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
842    }
843    else
844    {
845        # Read them in
846        my $line;
847        while (defined ($line=<MIN>)) {
848            $xml_in .= $line;
849        }
850        close(MIN);
851
852        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
853       
854        if(defined $parser->{'pad'}->{'metavalue'})
855        {
856            return $parser->{'pad'}->{'metavalue'};
857        }
858        else
859        {
860            return "";
861        }
862    }
863}
864
865sub gfmxml_metadata
866{
867    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
868
869    # no subsection support yet in metadata.xml
870
871    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
872    {
873        if (!defined $parser->{'parameters'}->{'poscount'})
874        {
875            $parser->{'parameters'}->{'poscount'} = 0;
876        }
877        else
878        {
879            $parser->{'parameters'}->{'poscount'}++;
880        }
881   
882        # gets the first value by default, since metapos defaults to 0
883        if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
884        {
885            if($parser->{'parameters'}->{'metapos'} > 0) {
886            print STDERR "@@@@ WARNING: non-zero metapos.\n";
887            print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at position: ".$parser->{'parameters'}->{'metapos'}.".\n";
888            }
889            $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
890        }
891    }
892}
893
894sub get_archives_metadata
895{
896    my $self = shift @_;
897
898    my $username  = $self->{'username'};
899    my $collect   = $self->{'collect'};
900    my $gsdl_cgi  = $self->{'gsdl_cgi'};
901#   my $gsdlhome  = $self->{'gsdlhome'};
902    my $infodbtype = $self->{'infodbtype'};
903
904    # Obtain the collect dir
905    my $site = $self->{'site'};
906    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
907   
908    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
909
910    # look up additional args
911    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
912    $docid_secnum = "" if (!defined $docid_secnum);
913   
914    my $metaname = $self->{'metaname'};
915    my $metapos = $self->{'metapos'};
916    $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
917   
918    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
919    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
920
921    # This now stores the full pathname
922    my $doc_filename = $doc_rec->{'doc-file'}->[0];
923
924    $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
925
926}
927
928sub get_metadata_from_archive_xml
929{
930    my $self = shift @_;
931    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
932   
933    my @start_rules = ('Section' => \&dxml_start_section);
934   
935    my @rules =
936    (
937        _default => 'raw',
938        'Metadata' => \&gfdxml_metadata
939    );
940       
941    my $parser = XML::Rules->new
942    (
943        start_rules => \@start_rules,
944        rules => \@rules,
945        output_encoding => 'utf8'
946    );
947   
948    my $xml_in = "";
949    if (!open(MIN,"<$doc_xml_filename"))
950    {
951        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
952    }
953    else
954    {
955        # Read them in
956        my $line;
957        while (defined ($line=<MIN>)) {
958            $xml_in .= $line;
959        }
960        close(MIN);
961
962        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
963       
964        if(defined $parser->{'pad'}->{'metavalue'})
965        {
966            return $parser->{'pad'}->{'metavalue'};
967        }
968        else
969        {
970            return "";
971        }
972    }
973}
974
975sub gfdxml_metadata
976{
977    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
978   
979    if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
980    {
981        return;
982    }
983
984    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
985    {
986        if (!defined $parser->{'parameters'}->{'poscount'})
987        {
988            $parser->{'parameters'}->{'poscount'} = 0;
989        }
990        else
991        {
992            $parser->{'parameters'}->{'poscount'}++;
993        }
994    }
995
996    if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
997    {   
998        $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
999    }
1000}
1001
1002sub _set_live_metadata
1003{
1004    my $self = shift @_;
1005
1006    my $collect   = $self->{'collect'};
1007    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1008    #my $gsdlhome  = $self->{'gsdlhome'};
1009    my $infodbtype = $self->{'infodbtype'};
1010
1011    # Obtain the collect dir
1012    my $site = $self->{'site'};
1013    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1014    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1015
1016
1017    # look up additional args
1018    my $docid     = $self->{'d'};
1019    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1020      $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1021    }
1022    my $metavalue = $self->{'metavalue'};
1023
1024    # Generate the dbkey   
1025    my $metaname  = $self->{'metaname'};
1026    my $dbkey = "$docid.$metaname";
1027
1028    # To people who know $collect_tail please add some comments
1029    # Obtain path to the database
1030    my $collect_tail = $collect;
1031    $collect_tail =~ s/^.*[\/|\\]//;
1032    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1033    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1034
1035    # Set the new value
1036    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1037    my $status = system($cmd);
1038    if ($status != 0) {
1039        # Catch error if gdbmget failed
1040    my $mess = "Failed to set metadata key: $dbkey\n";
1041
1042    $mess .= "PATH: $ENV{'PATH'}\n";
1043    $mess .= "cmd = $cmd\n";
1044    $mess .= "Exit status: $status\n";
1045    $mess .= "System Error Message: $!\n";
1046
1047    $gsdl_cgi->generate_error($mess);
1048    }
1049    else {
1050    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
1051    }
1052
1053    #return $status; # in case calling functions have any further use for this
1054}
1055
1056sub set_live_metadata
1057{
1058    my $self = shift @_;
1059
1060    my $username  = $self->{'username'};
1061    my $collect   = $self->{'collect'};
1062    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1063 
1064    if ($baseaction::authentication_enabled) {
1065    # Ensure the user is allowed to edit this collection   
1066    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1067    }
1068
1069    # Make sure the collection isn't locked by someone else
1070    $self->lock_collection($username, $collect);
1071
1072    $self->_set_live_metadata(@_);
1073
1074    # Release the lock once it is done
1075    $self->unlock_collection($username, $collect);
1076}
1077
1078sub set_index_metadata_entry
1079{
1080    print STDERR "1\n";
1081    my $self = shift @_;
1082    my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
1083   
1084    # To people who know $collect_tail please add some comments
1085    # Obtain path to the database
1086    my $collect_tail = $collect;
1087    $collect_tail =~ s/^.*[\/|\\]//;
1088    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1089    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1090   
1091    print STDERR "2\n";
1092#   print STDERR "**** infodb file path = $infodb_file_path\n";
1093#   print STDERR "***** infodb type = $infodbtype\n";
1094   
1095    # Read the docid entry
1096    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1097   
1098    # Set the metadata value
1099    if (defined $metapos) {
1100    print STDERR "3\n";
1101    # if metamode=accumulate AND metapos, warn user and then use metapos
1102    if (defined $metamode && $metamode eq "accumulate") {
1103        print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
1104        print STDERR "**** Proceeding by using metapos\n";
1105    }
1106    $doc_rec->{$metaname}->[$metapos] = $metavalue;
1107    }
1108    elsif (defined $prevmetavalue) {
1109    print STDERR "4\n";
1110        my $array = $doc_rec->{$metaname};
1111        my $length = @$array;
1112
1113        my $found = 0;
1114        for (my $i = 0; $i < $length; $i++){
1115            if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
1116                $doc_rec->{$metaname}->[$i] = $metavalue;
1117                $found = 1;
1118                last;               
1119            }
1120        }
1121
1122        if($found == 0){
1123            $doc_rec->{$metaname} = [ $metavalue ];
1124        }
1125    }
1126    elsif (defined $metamode && $metamode eq "override") {
1127    print STDERR "5\n";
1128    $doc_rec->{$metaname} = [ $metavalue ];
1129    }
1130    else { # default for index was to override, but because accumulate is less destructive,
1131    # and because accumulate is the default for archives and import, that's the new default for index too
1132    print STDERR "6\n";
1133    if(defined $doc_rec->{$metaname}) {
1134        push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
1135    } else {
1136        $doc_rec->{$metaname} = [ $metavalue ];
1137    }
1138    }
1139    print STDERR "6\n";
1140 
1141    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
1142   
1143    return $status;
1144   
1145}
1146
1147sub _set_import_metadata
1148{
1149    my $self = shift @_;
1150
1151    my $collect   = $self->{'collect'};
1152    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1153    my $infodbtype = $self->{'infodbtype'};
1154#    my $gsdlhome  = $self->{'gsdlhome'};
1155     
1156    # Obtain the collect and archive dir   
1157    my $site = $self->{'site'};
1158    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1159    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1160    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1161   
1162    # look up additional args
1163    # want either d= or f=
1164    my $docid  = $self->{'d'};
1165    my ($docid_root,$docid_secnum);
1166    if(defined $docid) {   
1167    ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
1168    # as yet no support for setting subsection metadata in metadata.xml
1169    if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
1170        $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
1171        return;
1172    }
1173    }
1174
1175    my $import_file  = $self->{'f'};
1176    if ((!defined $docid || $docid =~ m/^\s*$/) && (!defined $import_file || $import_file =~ m/^\s*$/)) {
1177    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified."); # at least d or f must be specified
1178    }
1179
1180    # Get the parameters and set default mode to "accumulate"
1181    my $metaname   = $self->{'metaname'};
1182    my $metavalue  = $self->{'metavalue'};
1183##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1184    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1185   
1186    my $metamode   = $self->{'metamode'};
1187    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1188    # make "accumulate" the default (less destructive, as it won't actually
1189    # delete any existing values)
1190    $metamode = "accumulate";
1191    }
1192
1193    # adding metapos and prevmetavalue support to import_metadata subroutines
1194    my $metapos   = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1195    my $prevmetavalue = $self->{'prevmetavalue'};
1196    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1197    $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
1198
1199    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1200    my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1201
1202    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1203    $mess .= "  $metaname";
1204    $mess .= " = $metavalue";
1205    $mess .= " ($metamode)\n";
1206
1207    $gsdl_cgi->generate_ok_message($mess);
1208
1209    #return $status; # in case calling functions have any further use for this
1210}
1211
1212# the version of set_index_meta that doesn't do authentication
1213sub _set_archives_metadata
1214{
1215    my $self = shift @_;
1216
1217    my $collect   = $self->{'collect'};
1218    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1219    my $infodbtype = $self->{'infodbtype'};
1220   
1221    # Obtain the collect and archive dir   
1222    my $site = $self->{'site'};
1223    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1224    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1225
1226    # look up additional args
1227    my $docid  = $self->{'d'};
1228    my $metaname   = $self->{'metaname'};
1229    my $metavalue  = $self->{'metavalue'};
1230    my $prevmetavalue = $self->{'prevmetavalue'};
1231   
1232    my $metapos    = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1233                                  # Don't append "|| undef", since if metapos=0 it will then be set to undef
1234
1235    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1236    $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
1237
1238    my $metamode   = $self->{'metamode'};
1239    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1240    # make "accumulate" the default (less destructive, as it won't actually
1241    # delete any existing values)
1242    $metamode = "accumulate";
1243    }
1244
1245    my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1246                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1247   
1248    if ($status == 0) {
1249    my $mess = "set-archives-metadata successful: Key[$docid]\n";
1250    $mess .= "  $metaname";
1251    $mess .= "->[$metapos]" if (defined $metapos);
1252    $mess .= " = $metavalue";
1253    $mess .= " ($metamode)\n";
1254   
1255    $gsdl_cgi->generate_ok_message($mess); 
1256    }
1257    else {
1258    my $mess .= "Failed to set archives metadata key: $docid\n";
1259    $mess .= "Exit status: $status\n";
1260    if(defined $self->{'error_msg'}) {
1261        $mess .= "Error Message: $self->{'error_msg'}\n";
1262    } else {
1263        $mess .= "System Error Message: $!\n";
1264    }
1265    $mess .= "-" x 20 . "\n";
1266   
1267    $gsdl_cgi->generate_error($mess);
1268    }
1269
1270    #return $status; # in case calling functions have any further use for this
1271}
1272
1273
1274# the version of set_index_meta that doesn't do authentication
1275sub _set_index_metadata
1276{
1277    print STDERR "START SET INDEX METADATA\n";
1278    my $self = shift @_;
1279
1280    my $collect   = $self->{'collect'};
1281    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1282
1283    my $site = $self->{'site'};
1284    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1285
1286    # look up additional args
1287    my $docid     = $self->{'d'};
1288    my $metaname  = $self->{'metaname'};
1289    my $metapos   = $self->{'metapos'}; # undef has meaning
1290    my $metavalue = $self->{'metavalue'};
1291    my $infodbtype = $self->{'infodbtype'};
1292    my $metamode  = $self->{'metamode'};
1293    my $prevmetavalue = $self->{'prevmetavalue'};
1294
1295    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1296    $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
1297
1298    print STDERR "SETTING INDEX METADATA ENTRY\n";
1299    my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1300    print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
1301    if ($status != 0) {
1302        # Catch error if set infodb entry failed
1303    my $mess = "Failed to set metadata key: $docid\n";
1304   
1305    $mess .= "PATH: $ENV{'PATH'}\n";
1306    $mess .= "Exit status: $status\n";
1307    $mess .= "System Error Message: $!\n";
1308   
1309    $gsdl_cgi->generate_error($mess);
1310    }
1311    else {
1312    my $mess = "set-index-metadata successful: Key[$docid]\n";
1313    $mess .= "  $metaname";
1314    $mess .= "->[$metapos]" if (defined $metapos);
1315    $mess .= " = $metavalue\n";
1316   
1317    $gsdl_cgi->generate_ok_message($mess);
1318    }
1319
1320    print STDERR "END SET INDEX METADATA\n";
1321    #return $status; # in case calling functions have any further use for this
1322}
1323
1324sub set_index_metadata
1325{
1326    my $self = shift @_;
1327
1328    my $username  = $self->{'username'};
1329    my $collect   = $self->{'collect'};
1330    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1331    #my $gsdlhome  = $self->{'gsdlhome'};
1332
1333    if ($baseaction::authentication_enabled) {
1334    # Ensure the user is allowed to edit this collection   
1335    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1336    }
1337
1338    my $site = $self->{'site'};
1339    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1340   
1341    $gsdl_cgi->checked_chdir($collect_dir);
1342
1343    # Obtain the collect dir
1344    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1345
1346    # Make sure the collection isn't locked by someone else
1347    $self->lock_collection($username, $collect);
1348
1349    $self->_set_index_metadata(@_);
1350   
1351    # Release the lock once it is done
1352    $self->unlock_collection($username, $collect);
1353}
1354
1355# call this to set the metadata for a combination of dirs archives, import or index, or live
1356# if none specified, defaults to index which was the original behaviour of set_metadata.
1357sub set_metadata
1358{
1359    my $self = shift @_;
1360
1361    # Testing that not defining a variable, setting it to "" or to "  " all return false
1362    # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
1363
1364    my $where = $self->{'where'};
1365    if(!$where || ($where =~ m/^\s*$/)) {   
1366    $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
1367    return;
1368    }
1369
1370    # authenticate and lock collection once, even if processing multiple dirs
1371    my $username  = $self->{'username'};
1372    my $collect   = $self->{'collect'};
1373    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1374   
1375    if ($baseaction::authentication_enabled) {
1376    # Ensure the user is allowed to edit this collection   
1377    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1378    }
1379   
1380    if($where =~ m/index/) {
1381    my $site = $self->{'site'};
1382    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1383    $gsdl_cgi->checked_chdir($collect_dir);
1384    }
1385
1386    # Make sure the collection isn't locked by someone else
1387    $self->lock_collection($username, $collect);
1388
1389
1390    # now at last can set the metadata. $where can specify multiple
1391    # $where is of the form: import|archives|index, or a subset thereof
1392
1393    #my @whichdirs = split('\|', $where);
1394
1395    # just check whether $where contains import/archives/index/live in turn, and
1396    # for each case, process it accordingly
1397    if($where =~ m/import/) {
1398    $self->_set_import_metadata(@_);       
1399    }
1400
1401    if($where =~ m/archives/) {
1402
1403    # look up docID arg which is optional to set_metadata because it's optional
1404    # to set_import, but which is compulsory to set_archives_metadata
1405    my $docid     = $self->{'d'};
1406    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1407        $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1408    }
1409    # we have a docid, so can set archives meta
1410    $self->_set_archives_metadata(@_); 
1411    }
1412
1413    if($where =~ m/index/) {
1414   
1415    # look up docID arg which is optional to set_metadata because it's optional
1416    # to set_import, but which is compulsory to set_archives_metadata
1417    my $docid     = $self->{'d'};
1418    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1419        $gsdl_cgi->generate_error("No docid (d=...) specified.");
1420    }
1421    # we have a docid, so can set index meta
1422    $self->_set_index_metadata(@_);
1423    }   
1424
1425    if($where =~ m/live/) {
1426    $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1427    }
1428
1429    # Release the lock once it is done
1430    $self->unlock_collection($username, $collect);
1431}
1432
1433sub set_metadata_array
1434{
1435    my $self = shift @_;
1436
1437    my $where = $self->{'where'};
1438    if(!$where || ($where =~ m/^\s*$/)) {   
1439    $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1440    return;
1441    }
1442
1443    my $username  = $self->{'username'};
1444    my $collect   = $self->{'collect'};
1445    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1446
1447    if ($baseaction::authentication_enabled) {
1448    # Ensure the user is allowed to edit this collection
1449    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1450    }
1451
1452    # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1453    # But including the stmt during this code reorganisation to preserve as-is what used to happen
1454    my $site = $self->{'site'};
1455    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1456    $gsdl_cgi->checked_chdir($collect_dir);
1457
1458    # Make sure the collection isn't locked by someone else
1459    $self->lock_collection($username, $collect);
1460
1461    if($where =~ m/import/) {
1462    $self->_set_import_metadata_array(@_);
1463    }
1464    if($where =~ m/archives/) {
1465    $self->_set_archives_metadata_array(@_);
1466    }
1467    if($where =~ m/index/) {
1468    $self->_set_index_metadata_array(@_);
1469    }
1470    if($where =~ m/live/) {
1471        $self->_set_live_metadata_array(@_);
1472    }
1473
1474    # Release the lock once it is done
1475    $self->unlock_collection($username, $collect);
1476}
1477
1478sub _set_index_metadata_array
1479{
1480    my $self = shift @_;
1481
1482    my $collect   = $self->{'collect'};
1483    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1484    my $site = $self->{'site'};
1485    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1486
1487   
1488    # look up additional args
1489   
1490    my $infodbtype = $self->{'infodbtype'};
1491   
1492    my $json_str      = $self->{'json'};
1493    my $doc_array = decode_json $json_str;
1494   
1495   
1496    my $global_status = 0;
1497    my $global_mess = "";
1498   
1499    my @all_docids = ();
1500   
1501    foreach my $doc_array_rec ( @$doc_array ) {
1502   
1503    my $status = -1;
1504    my $docid     = $doc_array_rec->{'docid'};
1505   
1506    push(@all_docids,$docid);
1507
1508    my $metaname  = $doc_array_rec->{'metaname'};
1509    if(defined $metaname) {
1510        my $metapos   = $doc_array_rec->{'metapos'}; # can legitimately be undef
1511        my $metavalue = $doc_array_rec->{'metavalue'};
1512        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1513
1514        $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1515    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1516        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1517       
1518        foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1519        $metaname  = $metatable_rec->{'metaname'};
1520        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1521        my $metapos = undef;
1522        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1523
1524        foreach my $metavalue ( @$metavals ) { # metavals is an array
1525            $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1526            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1527            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1528            # metavalue for this metaname will be lost
1529            $metamode = "accumulate";
1530            }
1531        }           
1532        }
1533    }
1534   
1535    if ($status != 0) {
1536        # Catch error if set infodb entry failed
1537        $global_status = $status;
1538        $global_mess .= "Failed to set metadata key: $docid\n";
1539        $global_mess .= "Exit status: $status\n";
1540        $global_mess .= "System Error Message: $!\n";
1541        $global_mess .= "-" x 20;
1542    }
1543    }
1544   
1545    if ($global_status != 0) {
1546    $global_mess .= "PATH: $ENV{'PATH'}\n";
1547    $gsdl_cgi->generate_error($global_mess);
1548    }
1549    else {
1550    my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1551    $gsdl_cgi->generate_ok_message($mess);
1552    }
1553}
1554
1555sub set_index_metadata_array
1556{
1557    my $self = shift @_;
1558
1559    my $username  = $self->{'username'};
1560    my $collect   = $self->{'collect'};
1561    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1562#    my $gsdlhome  = $self->{'gsdlhome'};
1563
1564    if ($baseaction::authentication_enabled) {
1565    # Ensure the user is allowed to edit this collection   
1566    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1567    }
1568
1569    my $site = $self->{'site'};
1570    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1571   
1572    $gsdl_cgi->checked_chdir($collect_dir);
1573
1574    # Obtain the collect dir
1575    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1576
1577    # Make sure the collection isn't locked by someone else
1578    $self->lock_collection($username, $collect);
1579
1580    $self->_set_index_metadata_array(@_);
1581
1582    # Release the lock once it is done
1583    $self->unlock_collection($username, $collect);
1584}
1585
1586# experimental, newly added in and untested
1587sub _set_live_metadata_array
1588{
1589    my $self = shift @_;
1590
1591    my $collect   = $self->{'collect'};
1592    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1593
1594    my $site = $self->{'site'};
1595    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1596
1597   
1598    # look up additional args
1599    my $infodbtype = $self->{'infodbtype'};
1600    # To people who know $collect_tail please add some comments
1601    # Obtain path to the database
1602    my $collect_tail = $collect;
1603    $collect_tail =~ s/^.*[\/|\\]//;
1604    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1605    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1606
1607   
1608    my $json_str      = $self->{'json'};
1609    my $doc_array = decode_json $json_str;
1610   
1611   
1612    my $global_status = 0;
1613    my $global_mess = "";
1614   
1615    my @all_docids = ();
1616
1617
1618    foreach my $doc_array_rec ( @$doc_array ) {
1619   
1620    my $status = -1;
1621    my $docid     = $doc_array_rec->{'docid'};
1622
1623    push(@all_docids,$docid);
1624
1625    my $metaname  = $doc_array_rec->{'metaname'};
1626    if(defined $metaname) {
1627        my $dbkey = "$docid.$metaname";
1628        my $metavalue = $doc_array_rec->{'metavalue'};
1629
1630        # Set the new value
1631        my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1632        $status = system($cmd);
1633
1634    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1635        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1636        foreach my $metatable_rec ( @$metatable ) {
1637        $metaname  = $metatable_rec->{'metaname'};
1638        my $dbkey = "$docid.$metaname";
1639
1640        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1641        foreach my $metavalue ( @$metavals ) {
1642             my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1643             $status = system($cmd);
1644        }
1645        }
1646       
1647    }
1648
1649    if ($status != 0) {
1650        # Catch error if gdbmget failed
1651        $global_status = $status;
1652        $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1653        $global_mess .= "Exit status: $status\n";
1654        $global_mess .= "System Error Message: $!\n";
1655        $global_mess .= "-" x 20;
1656    }
1657    }
1658   
1659    if ($global_status != 0) {
1660    $global_mess .= "PATH: $ENV{'PATH'}\n";
1661    $gsdl_cgi->generate_error($global_mess);
1662    }
1663    else {
1664    my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1665    $gsdl_cgi->generate_ok_message($mess);
1666    }
1667}
1668
1669sub set_live_metadata_array
1670{
1671    my $self = shift @_;
1672
1673    my $username  = $self->{'username'};
1674    my $collect   = $self->{'collect'};
1675    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1676
1677    if ($baseaction::authentication_enabled) {
1678    # Ensure the user is allowed to edit this collection   
1679    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1680    }
1681
1682    my $site = $self->{'site'};
1683    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1684   
1685    $gsdl_cgi->checked_chdir($collect_dir);
1686
1687    # Make sure the collection isn't locked by someone else
1688    $self->lock_collection($username, $collect);
1689
1690    $self->_set_live_metadata_array(@_);
1691
1692    # Release the lock once it is done
1693    $self->unlock_collection($username, $collect);
1694}
1695
1696
1697sub dxml_metadata
1698{
1699    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1700    my $metaname = $parser->{'parameters'}->{'metaname'};
1701    my $metamode = $parser->{'parameters'}->{'metamode'};
1702   
1703###!!!    print STDERR "**** Processing closing </Metadata> tag\n";
1704   
1705    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1706   
1707    # Find the right metadata tag and checks if we are going to
1708    # override it
1709    #
1710    # Note: This over writes the first metadata block it
1711    # encountered. If there are multiple Sections in the doc.xml, it
1712    # might not behave as you would expect
1713
1714    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1715##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1716##    print STDERR "**** metamode = $metamode\n";
1717   
1718    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1719    {
1720        my $name_attr = $attrHash->{'name'};
1721        # print STDOUT "*** testing: $name_attr eq $metaname ?   and  $metamode eq override ?\n";
1722        if (($name_attr eq $metaname) && ($metamode eq "override"))
1723        {
1724            if (!defined $parser->{'parameters'}->{'poscount'})
1725            {
1726                $parser->{'parameters'}->{'poscount'} = 0;
1727            }
1728            else
1729            {
1730                $parser->{'parameters'}->{'poscount'}++;
1731            }
1732           
1733            if ((defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1734                || (!defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == 0))
1735               
1736            {
1737                ##print STDERR "#### got match!!\n";
1738                # Get the value and override the current value
1739                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1740                $attrHash->{'_content'} = $metavalue;
1741               
1742                # Don't want it to wipe out any other pieces of metadata
1743                $parser->{'parameters'}->{'metamode'} = "done";
1744            }
1745            elsif (defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1746            {
1747                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1748                $attrHash->{'_content'} = $metavalue;
1749                $parser->{'parameters'}->{'metamode'} = "done";
1750            }
1751        }
1752    }
1753
1754    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1755    return [$tagname => $attrHash];
1756}
1757
1758# This method exists purely for catching invalid section numbers that the client
1759# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1760# we've seen all the Sections in the doc.xml, and none of their section nums matched
1761# if the metamode has not been set to 'done' by then.
1762sub dxml_archive
1763{
1764    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1765    my $metamode = $parser->{'parameters'}->{'metamode'};
1766   
1767    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1768    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1769   
1770#    print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1771   
1772    if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1773    print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1774
1775    my $metaname = $parser->{'parameters'}->{'metaname'};
1776    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1777   
1778    print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1779    print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1780    print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1781    print STDERR "\tin section $opt_doc_secnum.\n";
1782    $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1783    }
1784   
1785    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1786    return [$tagname => $attrHash];
1787}
1788
1789sub dxml_description
1790{
1791    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1792    my $metamode = $parser->{'parameters'}->{'metamode'};
1793
1794    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1795    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1796   
1797###!!!  print STDERR "**** Processing a closing </Description> tag \n";
1798#   print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1799   
1800    # Accumulate the metadata
1801
1802    # We'll be accumulating metadata at this point if we haven't found and therefore
1803    # haven't processed the metadata yet.
1804    # For subsections, this means that if we're at a matching subsection, but haven't
1805    # found the correct metaname to override in that subsection, we accumulate it as new
1806    # meta in the subsection by adding it to the current description.
1807    # If there's no subsection info for the metadata, it will accumulate at the top level
1808    # section description if we hadn't found a matching metaname to override at this point.
1809
1810    # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1811    # at the toplevel section. In the latter case, it means we want to process meta in the
1812    # toplevel section. So the eq check between the values below will work in all cases.
1813   
1814    # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1815    # this document has been provided. In that case, the metadata for that opt_doc_secnum
1816    # won't get attached/accumulated to any part of the doc, not even its top-level section.
1817
1818    if ($curr_secnum eq $opt_doc_secnum
1819        && ($metamode eq "accumulate" || $metamode eq "override")) {
1820        if ($metamode eq "override") {
1821        print "Got to end of <Description> block. No metadata value to override.  Switching 'metamode' to accumulate\n";
1822        }
1823
1824        # If we get to here and metamode is override, this means there
1825        # was no existing value to overide => treat as an append operation
1826       
1827        # Tack a new metadata tag on to the end of the <Metadata>+ block
1828        my $metaname = $parser->{'parameters'}->{'metaname'};
1829        my $metavalue = $parser->{'parameters'}->{'metavalue'};
1830       
1831        my $metadata_attr = {
1832        '_content' => $metavalue,
1833        'name' => $metaname,
1834        'mode' => "accumulate"
1835        };
1836       
1837        my $append_metadata = [ "Metadata" => $metadata_attr ];
1838        my $description_content = $attrHash->{'_content'};
1839       
1840        print "Appending metadata to doc.xml\n";
1841       
1842        if (ref($description_content)) {
1843        # got some existing interesting nested content
1844        push(@$description_content, "    ", $append_metadata ,"\n        ");
1845        }
1846        else {
1847        #description_content is most likely a string such as "\n"
1848        $attrHash->{'_content'} = [$description_content, "    ", $append_metadata ,"\n" ];
1849        }
1850       
1851        $parser->{'parameters'}->{'metamode'} = "done";
1852    }       
1853    else {
1854        # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1855##      print STDERR "**** NOT ACCUMULATE?!? \n";
1856    }
1857
1858    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1859    return [$tagname => $attrHash];
1860}
1861
1862
1863sub dxml_start_section
1864{
1865    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1866
1867    my $new_depth = scalar(@$contextArray);
1868
1869#   print STDERR "**** START SECTION \n";
1870   
1871    if ($new_depth == 1) {
1872    $parser->{'parameters'}->{'curr_section_depth'} = 1;
1873    $parser->{'parameters'}->{'curr_section_num'}   = "";
1874    }
1875
1876    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
1877    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1878
1879    my $new_secnum;
1880
1881    if ($new_depth > $old_depth) {
1882    # child subsection
1883    $new_secnum = "$old_secnum.1";
1884    }
1885    elsif ($new_depth == $old_depth) {
1886    # sibling section => increase it's value by 1
1887    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1888    $tail_num++;
1889    $new_secnum = $old_secnum;
1890    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1891    }
1892    else {
1893    # back up to parent section => lopp off tail
1894    $new_secnum = $old_secnum;
1895    $new_secnum =~ s/\.\d+$//;
1896    }
1897
1898    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1899    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
1900   
1901    1;
1902}
1903
1904sub edit_xml_file
1905{
1906    my $self = shift @_;
1907    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1908
1909    # use XML::Rules to add it in (read in and out again)
1910    my $parser = XML::Rules->new(start_rules     => $start_rules,
1911                 rules           => $rules,
1912                 style           => 'filter',
1913                                 output_encoding => 'utf8' );
1914
1915    my $xml_in = "";
1916    if (!open(MIN,"<$filename")) {
1917    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1918    }
1919    else {
1920        # Read all the text in
1921    my $line;
1922    while (defined ($line=<MIN>)) {
1923        $xml_in .= $line;
1924    }
1925    close(MIN);
1926   
1927    my $MOUT;   
1928    if (!open($MOUT,">$filename")) {
1929        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1930    }
1931    else {
1932        # Matched lines will get handled by the call backs
1933##      my $xml_out = "";
1934
1935        binmode($MOUT,":utf8");
1936        $parser->filter($xml_in,$MOUT, $options);
1937
1938#       binmode(MOUT,":utf8");
1939#       print MOUT $xml_out;
1940        close($MOUT);       
1941    }
1942    }
1943
1944    # copy across any custom error information that was stored during parsing
1945    $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});   
1946}
1947
1948sub edit_doc_xml
1949{
1950    my $self = shift @_;
1951    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1952
1953    my $info_mess = <<RAWEND;
1954****************************
1955  edit_doc_xml()
1956****************************
1957RAWEND
1958
1959    $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1960    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
1961    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
1962    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
1963    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
1964    $info_mess .= " opt_secnum  = $opt_secnum\n"  if defined($opt_secnum);
1965    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1966     
1967    $info_mess .= "****************************\n";
1968
1969    $gsdl_cgi->generate_message($info_mess);
1970   
1971    # To monitor which section/subsection number we are in
1972    my @start_rules =
1973    ( 'Section'    => \&dxml_start_section );
1974
1975    # use XML::Rules to add it in (read in and out again)
1976    # Set the call back functions
1977    my @rules =
1978    ( _default => 'raw',
1979      'Metadata'    => \&dxml_metadata,
1980      'Description' => \&dxml_description,
1981      'Archive'     => \&dxml_archive); # just for catching errors at end
1982     
1983    # Sets the parameters
1984    my $options = { 'metaname'  => $metaname,
1985            'metapos'   => $metapos,
1986            'metavalue' => $metavalue,
1987            'metamode'  => $metamode,
1988            'prevmetavalue' => $prevmetavalue };
1989           
1990    if (defined $opt_secnum) {
1991    $options->{'secnum'} = $opt_secnum;
1992    }
1993
1994    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1995}
1996
1997sub set_archives_metadata_entry
1998{
1999    my $self = shift @_;
2000    my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
2001
2002    my $info_mess = <<RAWEND;
2003****************************
2004  set_archives_metadata_entry()
2005****************************
2006RAWEND
2007
2008    $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
2009    $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2010    $info_mess .= " collect     = $collect\n"     if defined($collect);
2011    $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype);
2012    $info_mess .= " docid       = $docid\n"       if defined($docid);
2013    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
2014    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
2015    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
2016    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
2017    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2018     
2019    $info_mess .= "****************************\n";
2020
2021    $gsdl_cgi->generate_message($info_mess);
2022   
2023    # Obtain the doc.xml path for the specified docID
2024    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2025
2026    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2027    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
2028    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
2029   
2030    # The $doc_xml_file is relative to the archives, and now let's get the full path
2031    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
2032    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
2033
2034    # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
2035    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2036    # Thereafter, we will finally be setting the overriding metavalue for this metaname
2037    if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2038    # remove all values of $metaname metadata
2039    $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
2040    }
2041    # Edit the doc.xml file with the specified metadata name, value and position.
2042    # TODO: there is a potential problem here as this edit_doc_xml function
2043    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
2044    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
2045   
2046    # dxml_metadata method ignores metapos if metamode anything other than override
2047    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
2048            $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
2049
2050    # return 0; # return 0 for now to indicate no error
2051    return (defined $self->{'error_msg'}) ? 1 : 0;
2052}
2053
2054
2055sub set_archives_metadata
2056{
2057    my $self = shift @_;
2058
2059    my $username  = $self->{'username'};
2060    my $collect   = $self->{'collect'};
2061    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2062   
2063    if ($baseaction::authentication_enabled) {
2064    # Ensure the user is allowed to edit this collection
2065    $self->authenticate_user($username, $collect);
2066    }
2067
2068    # Make sure the collection isn't locked by someone else
2069    $self->lock_collection($username, $collect);
2070
2071    $self->_set_archives_metadata(@_);
2072
2073    # Release the lock once it is done
2074    $self->unlock_collection($username, $collect);
2075}
2076
2077sub _set_archives_metadata_array
2078{
2079    my $self = shift @_;
2080   
2081    my $collect   = $self->{'collect'};
2082    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2083    my $site = $self->{'site'};
2084    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2085
2086    # look up additional args
2087   
2088    my $infodbtype = $self->{'infodbtype'};
2089   
2090    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2091   
2092    my $json_str      = $self->{'json'};
2093    my $doc_array = decode_json $json_str;
2094   
2095   
2096    my $global_status = 0;
2097    my $global_mess = "";
2098   
2099    my @all_docids = ();
2100   
2101    foreach my $doc_array_rec ( @$doc_array ) {
2102    my $status    = -1;
2103    my $docid     = $doc_array_rec->{'docid'};
2104
2105    push(@all_docids,$docid);
2106   
2107    my $metaname  = $doc_array_rec->{'metaname'};
2108    if(defined $metaname) {
2109       
2110        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2111
2112        my $metamode  = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2113        my $metavalue = $doc_array_rec->{'metavalue'};
2114        my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
2115       
2116       
2117        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2118        # make "accumulate" the default (less destructive, as it won't actually
2119        # delete any existing values)
2120        $metamode = "accumulate";
2121        }       
2122       
2123        $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
2124                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2125    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2126        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2127       
2128        foreach my $metatable_rec ( @$metatable ) {
2129        $metaname  = $metatable_rec->{'metaname'};
2130        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2131        my $metapos = undef;
2132        my $prevmetavalue = undef;
2133        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2134       
2135        foreach my $metavalue ( @$metavals ) {
2136            $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
2137                                 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2138           
2139            if($metamode eq "override") { # now, having overridden the metavalue for the first,
2140            # need to accumulate subsequent metavals for this metaname, else the just-assigned
2141            # metavalue for this metaname will be lost
2142            $metamode = "accumulate";
2143            }
2144        }           
2145        }       
2146    }
2147       
2148    if ($status != 0) {
2149        # Catch error if set infodb entry failed
2150        $global_status = $status;
2151        $global_mess .= "Failed to set metadata key: $docid\n";
2152        $global_mess .= "Exit status: $status\n";
2153        $global_mess .= "System Error Message: $!\n";
2154        $global_mess .= "-" x 20 . "\n";
2155    }
2156    }
2157   
2158    if ($global_status != 0) {
2159    $global_mess .= "PATH: $ENV{'PATH'}\n";
2160    $gsdl_cgi->generate_error($global_mess);
2161    }
2162    else {
2163    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2164    $gsdl_cgi->generate_ok_message($mess);
2165    }
2166}
2167
2168sub set_archives_metadata_array
2169{
2170    my $self = shift @_;
2171
2172    my $username  = $self->{'username'};
2173    my $collect   = $self->{'collect'};
2174    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2175#    my $gsdlhome  = $self->{'gsdlhome'};
2176
2177    if ($baseaction::authentication_enabled) {
2178    # Ensure the user is allowed to edit this collection   
2179    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2180    }
2181
2182    my $site = $self->{'site'};
2183    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2184   
2185    $gsdl_cgi->checked_chdir($collect_dir);
2186
2187    # Obtain the collect dir
2188    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2189
2190    # Make sure the collection isn't locked by someone else
2191    $self->lock_collection($username, $collect);
2192
2193    $self->_set_archives_metadata_array(@_);
2194   
2195    # Release the lock once it is done
2196    $self->unlock_collection($username, $collect);
2197}
2198
2199sub _remove_archives_metadata
2200{
2201    my $self = shift @_;
2202
2203    my $collect   = $self->{'collect'};
2204    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2205#   my $gsdlhome  = $self->{'gsdlhome'};
2206    my $infodbtype = $self->{'infodbtype'};
2207   
2208    my $site = $self->{'site'};
2209       
2210    # Obtain the collect and archive dir   
2211    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2212   
2213    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2214
2215    # look up additional args
2216    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
2217   
2218    my $metaname = $self->{'metaname'};
2219    my $metapos = $self->{'metapos'};
2220    my $metavalue = $self->{'metavalue'};
2221
2222    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2223    $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
2224
2225    # if the user hasn't told us what to delete, not having given a metavalue or metapos,
2226    # default to deleting the first metavalue for the given metaname
2227    # Beware that if both metapos AND metavalue are defined, both matches (if any)
2228    # seem to get deleted in one single remove_archives_meta action invocation.
2229    # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
2230    # deleted, both get deleted.
2231    if(!defined $metapos && !defined $metavalue) {
2232        $metapos = 0;
2233    }
2234
2235    my $metamode = $self->{'metamode'};
2236    $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
2237
2238    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2239    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2240
2241    # This now stores the full pathname
2242    my $doc_filename = $doc_rec->{'doc-file'}->[0];
2243
2244    my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
2245#   my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
2246   
2247    if ($status == 0)
2248    {
2249        my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
2250        $mess .= "  $metaname";
2251        $mess .= "->[$metapos]" if (defined $metapos);
2252        $mess .= " ($metavalue)" if (defined $metavalue);
2253        $gsdl_cgi->generate_ok_message($mess); 
2254    }
2255    else
2256    {
2257        my $mess .= "Failed to remove archives metadata key: $docid\n";
2258        $mess .= "Exit status: $status\n";
2259        $mess .= "System Error Message: $!\n";
2260        $mess .= "-" x 20 . "\n";
2261       
2262        $gsdl_cgi->generate_error($mess);
2263    }
2264   
2265    #return $status; # in case calling functions have a use for this
2266}
2267
2268sub remove_archives_metadata
2269{
2270    my $self = shift @_;
2271
2272    my $username  = $self->{'username'};
2273    my $collect   = $self->{'collect'};
2274    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2275   
2276    if ($baseaction::authentication_enabled)
2277    {
2278        # Ensure the user is allowed to edit this collection       
2279        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2280    }
2281
2282    # Make sure the collection isn't locked by someone else
2283    $self->lock_collection($username, $collect);
2284
2285    $self->_remove_archives_metadata(@_);
2286
2287    # Release the lock once it is done
2288    $self->unlock_collection($username, $collect);
2289}
2290
2291sub remove_from_doc_xml
2292{
2293    my $self = shift @_;
2294    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
2295   
2296    my @start_rules = ('Section' => \&dxml_start_section);
2297   
2298    # Set the call-back functions for the metadata tags
2299    my @rules =
2300    (
2301        _default => 'raw',
2302        'Metadata' => \&rfdxml_metadata
2303    );
2304       
2305    my $parser = XML::Rules->new
2306    (
2307        start_rules => \@start_rules,
2308        rules => \@rules,
2309        style => 'filter',
2310        output_encoding => 'utf8',
2311#    normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2312        stripspaces => 2|0|0 # ineffectual
2313    );
2314   
2315    my $status = 0;
2316    my $xml_in = "";
2317    if (!open(MIN,"<$doc_xml_filename"))
2318    {
2319        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
2320        $status = 1;
2321    }
2322    else
2323    {
2324        # Read them in
2325        my $line;
2326        while (defined ($line=<MIN>)) {
2327            $xml_in .= $line;
2328        }
2329        close(MIN);
2330
2331        # Filter with the call-back functions
2332        my $xml_out = "";
2333
2334        my $MOUT;
2335        if (!open($MOUT,">$doc_xml_filename")) {
2336            $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
2337            $status = 1;
2338        }
2339        else {
2340            binmode($MOUT,":utf8");
2341            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
2342            close($MOUT);       
2343        }
2344    }
2345    return $status;
2346}
2347
2348sub rfdxml_metadata
2349{
2350    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2351
2352    # For comparisons, toplevel section is indicated by ""
2353    my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
2354    my $secid = $parser->{'parameters'}->{'secid'} || "";
2355
2356    if (!($secid eq $curr_sec_num))
2357    {
2358        # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2359        return [$tagname => $attrHash];
2360    }
2361
2362    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2363    {
2364        if (!defined $parser->{'parameters'}->{'poscount'})
2365        {
2366            $parser->{'parameters'}->{'poscount'} = 0;
2367        }
2368        else
2369        {
2370            $parser->{'parameters'}->{'poscount'}++;
2371        }
2372       
2373        # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
2374        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {         
2375            return [];
2376        }
2377
2378        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2379        {   
2380            return [];
2381        }
2382       
2383        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
2384        {   
2385            return [];
2386        }
2387    }
2388   
2389    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2390    return [$tagname => $attrHash];
2391}
2392
2393sub mxml_metadata
2394{
2395    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2396    my $metaname = $parser->{'parameters'}->{'metaname'};
2397    my $metamode = $parser->{'parameters'}->{'metamode'};
2398
2399    # Report error if we don't see FileName tag before this
2400    die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2401   
2402    # Don't do anything if we are not in the right FileSet
2403    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2404    if ($file_regexp =~ /\.\*/) {
2405    # Only interested in a file_regexp if it specifies precisely one
2406    # file. 
2407    # So, skip anything with a .* in it as it is too general
2408##  print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
2409    return [$tagname => $attrHash];
2410    }
2411    my $src_file = $parser->{'parameters'}->{'src_file'};
2412    if (!($src_file =~ /$file_regexp/)) {
2413    return [$tagname => $attrHash];
2414    }
2415##    print STDERR "*** mxl metamode = $metamode\n";
2416
2417    # Find the right metadata tag and checks if we are going to override it
2418    my $name_attr = $attrHash->{'name'};
2419    if (($name_attr eq $metaname) && ($metamode eq "override")) {
2420
2421    # now metadata.xml functions need to keep track of metapos
2422    if (!defined $parser->{'parameters'}->{'poscount'})
2423    {
2424        $parser->{'parameters'}->{'poscount'} = 0;
2425    }
2426    else
2427    {
2428        $parser->{'parameters'}->{'poscount'}++;
2429    }
2430
2431    # If either the metapos or prevmetavalue is set,
2432        # get the value and override the current value
2433    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2434
2435    if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2436    {
2437        $attrHash->{'_content'} = $metavalue;
2438
2439        ##  print STDERR "**** overriding metadata.xml\n";
2440       
2441        # Don't want it to wipe out any other pieces of metadata
2442        $parser->{'parameters'}->{'metamode'} = "done";
2443    }
2444    elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2445    {
2446        $attrHash->{'_content'} = $metavalue;
2447        $parser->{'parameters'}->{'metamode'} = "done";
2448    }
2449    }
2450
2451    # mxml_description will process the metadata if metadata is accumulate,
2452    # or if we haven't found the metadata to override
2453
2454    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2455    return [$tagname => $attrHash];
2456}
2457
2458
2459sub mxml_description
2460{
2461    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2462    my $metamode = $parser->{'parameters'}->{'metamode'};   
2463
2464    # Failed... Report error if we don't see FileName tag before this
2465    die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2466
2467    # Don't do anything if we are not in the right FileSet
2468    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2469    if ($file_regexp =~ m/\.\*/) {
2470    # Only interested in a file_regexp if it specifies precisely one
2471    # file. 
2472    # So, skip anything with a .* in it as it is too general
2473    return [$tagname => $attrHash];
2474    }
2475    my $src_file = $parser->{'parameters'}->{'src_file'};
2476   
2477    if (!($src_file =~ m/$file_regexp/)) {
2478    return [$tagname => $attrHash];
2479    }
2480
2481    # Accumulate the metadata block to the end of the description block
2482    # Note: This adds metadata block to all description blocks, so if there are
2483    # multiple FileSets, it will add to all of them
2484    if (($metamode eq "accumulate") || ($metamode eq "override")) {
2485
2486    # if metamode was "override" but get to here then it failed to
2487    # find an item to override, in which case it should append its
2488    # value to the end, just like the "accumulate" mode
2489
2490    if ($metamode eq "override") {
2491        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2492    }
2493
2494    # tack a new metadata tag on to the end of the <Metadata>+ block
2495    my $metaname = $parser->{'parameters'}->{'metaname'};
2496    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2497   
2498    my $metadata_attr = { '_content' => $metavalue,
2499                  'name'     => $metaname,
2500                  'mode'     => "accumulate" };
2501
2502    my $append_metadata = [ "Metadata" => $metadata_attr ];
2503    my $description_content = $attrHash->{'_content'};
2504   
2505##  print STDERR "*** appending to metadata.xml\n";
2506
2507    # append the new metadata element to the end of the current
2508    # content contained inside this tag
2509    if (ref($description_content) eq "") {
2510        # => string or numeric literal
2511        # this is caused by a <Description> block has no <Metadata> child elements
2512        # => set up an empty array in '_content'
2513        $attrHash->{'_content'} = [ "\n" ];
2514        $description_content = $attrHash->{'_content'};
2515    }
2516
2517    push(@$description_content,"    ", $append_metadata ,"\n        ");
2518    $parser->{'parameters'}->{'metamode'} = "done";
2519    }
2520
2521    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2522    return [$tagname => $attrHash];
2523}
2524
2525
2526sub mxml_filename
2527{
2528    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2529
2530    # Store the filename of the Current Fileset
2531    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2532    # FileName tag must come before Description tag
2533    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2534
2535    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2536    return [$tagname => $attrHash];
2537}
2538
2539
2540sub mxml_fileset
2541{
2542    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2543
2544    # Initilise the current_file
2545    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2546    # FileName tag must come before Description tag
2547    $parser->{'parameters'}->{'current_file'} = "";
2548
2549    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2550    return [$tagname => $attrHash];
2551}
2552
2553sub mxml_directorymetadata
2554{
2555    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2556
2557    # if we haven't processed the metadata when we reach the end of metadata.xml
2558    # it's because there's no particular FileSet element whose FileName matched
2559    # In which case, add a new FileSet for this FileName
2560    my $metamode = $parser->{'parameters'}->{'metamode'};
2561    if($metamode ne "done") {
2562   
2563    if ($metamode eq "override") {
2564        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2565    }
2566
2567    # If we get to here and metamode is override, this means there
2568    # was no existing value to overide => treat as an append operation
2569   
2570    # Create a new FileSet element and append to DirectoryMetadata
2571    # <FileSet>
2572    # <FileName>src_file</FileName>
2573    # <Description>
2574    # <Metadata mode="" name="">metavalue</Metadata>
2575    # </Description>
2576    # </FileSet>
2577    my $src_file = $parser->{'parameters'}->{'src_file'};
2578    my $metaname = $parser->{'parameters'}->{'metaname'};
2579    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2580    my $metadata_attr = {
2581        '_content' => $metavalue,
2582        'name' => $metaname,
2583        'mode' => "accumulate"
2584    };
2585    my $append_metadata = [ "Metadata" => $metadata_attr ];
2586    my $description_attr = { '_content' => [ "\n\t\t   ", $append_metadata, "\n\t\t"] };
2587    my $description_element = [ "Description" => $description_attr ];
2588
2589    #_content is not an attribute, it's special and holds the children of this element
2590    # including the textnode value embedded in this element if any.
2591    my $filename_attr = {'_content' => $src_file};
2592    my $filename_element = [ "FileName" => $filename_attr ];
2593
2594    my $fileset_attr = {};
2595    $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2596    my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2597   
2598   
2599    # get children of dirmeta, and push the new FileSet element onto it
2600    print "Appending metadata to metadata.xml\n";
2601    my $dirmeta_content = $attrHash->{'_content'};
2602    if (ref($dirmeta_content)) {
2603        # got some existing interesting nested content
2604        #push(@$dirmeta_content, "    ", $fileset ,"\n        ");
2605        push(@$dirmeta_content, "\t", $fileset ,"\n");
2606    }
2607    else {
2608        #description_content is most likely a string such as "\n"
2609        #$attrHash->{'_content'} = [$dirmeta_content, "    ", $fileset ,"\n" ];
2610        $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2611    }   
2612
2613    $parser->{'parameters'}->{'metamode'} = "done";
2614    }
2615    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2616    return [$tagname => $attrHash];
2617}
2618
2619
2620sub edit_metadata_xml
2621{
2622    my $self = shift @_;
2623    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2624
2625    # Set the call-back functions for the metadata tags
2626    my @rules =
2627    ( _default => 'raw',
2628          'FileName' => \&mxml_filename,
2629      'Metadata' => \&mxml_metadata,
2630      'Description' => \&mxml_description,
2631          'FileSet' => \&mxml_fileset,
2632      'DirectoryMetadata' => \&mxml_directorymetadata);
2633
2634    # use XML::Rules to add it in (read in and out again)
2635    my $parser = XML::Rules->new(rules => \@rules,
2636                 style => 'filter',
2637                                 output_encoding => 'utf8',
2638                 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2639
2640    if (!-e $metadata_xml_filename) {
2641   
2642        if (open(MOUT,">$metadata_xml_filename")) {
2643           
2644            my $src_file_re = &util::filename_to_regex($src_file);
2645            # shouldn't the following also be in the above utility routine??
2646            # $src_file_re =~ s/\./\\./g;
2647       
2648            print MOUT "<?xml version=\"1.0\"?>\n";
2649            print MOUT "<DirectoryMetadata>\n";
2650            print MOUT " <FileSet>\n";
2651            print MOUT "  <FileName>$src_file_re</FileName>\n";
2652            print MOUT "  <Description>\n";
2653            print MOUT "  </Description>\n";
2654            print MOUT " </FileSet>\n";
2655            print MOUT "</DirectoryMetadata>\n";
2656
2657            close(MOUT);
2658        }
2659        else {
2660            $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2661        }
2662    }
2663   
2664   
2665    my $xml_in = "";
2666    if (!open(MIN,"<$metadata_xml_filename")) {
2667        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2668    }
2669    else {
2670        # Read them in
2671        my $line;
2672        while (defined ($line=<MIN>)) {
2673            $xml_in .= $line;
2674        }
2675        close(MIN);
2676
2677        # Filter with the call-back functions
2678        my $xml_out = "";
2679
2680        my $MOUT;
2681        if (!open($MOUT,">$metadata_xml_filename")) {
2682            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2683        }
2684        else {
2685            binmode($MOUT,":utf8");
2686
2687            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2688            # At the moment, I will just hack it!
2689            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2690            #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2691            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2692            #print MOUT $xml_out;
2693
2694            $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2695                              metapos => $metapos,
2696                      metavalue => $metavalue,
2697                      metamode => $metamode,
2698                      src_file => $src_file,
2699                      prevmetavalue => $prevmetavalue,
2700                      current_file => undef} );
2701            close($MOUT);       
2702        }
2703   }
2704}
2705
2706
2707sub set_import_metadata
2708{
2709    my $self = shift @_;
2710   
2711    my $username  = $self->{'username'};
2712    my $collect   = $self->{'collect'};
2713    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2714   
2715    if ($baseaction::authentication_enabled) {
2716    # Ensure the user is allowed to edit this collection
2717    $self->authenticate_user($username, $collect);
2718    }
2719
2720    # Make sure the collection isn't locked by someone else
2721    $self->lock_collection($username, $collect);
2722 
2723    $self->_set_import_metadata(@_);
2724
2725    # Release the lock once it is done
2726    $self->unlock_collection($username, $collect);
2727   
2728}
2729
2730sub set_import_metadata_array
2731{
2732    my $self = shift @_;
2733
2734    my $username  = $self->{'username'};
2735    my $collect   = $self->{'collect'};
2736    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2737#    my $gsdlhome  = $self->{'gsdlhome'};
2738
2739    if ($baseaction::authentication_enabled) {
2740    # Ensure the user is allowed to edit this collection   
2741    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2742    }
2743
2744    my $site = $self->{'site'};
2745    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2746   
2747    $gsdl_cgi->checked_chdir($collect_dir);
2748
2749    # Make sure the collection isn't locked by someone else
2750    $self->lock_collection($username, $collect);
2751
2752    $self->_set_import_metadata_array(@_);
2753
2754    # Release the lock once it is done
2755    $self->unlock_collection($username, $collect);
2756
2757}
2758
2759
2760sub _set_import_metadata_array
2761{
2762    my $self = shift @_;
2763
2764    my $collect   = $self->{'collect'};
2765    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2766
2767    my $site = $self->{'site'};
2768    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2769   
2770    # look up additional args
2771   
2772    my $infodbtype = $self->{'infodbtype'};
2773   
2774    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");   
2775    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2776   
2777    my $json_str = $self->{'json'};
2778    my $doc_array = decode_json $json_str;
2779   
2780    my $global_status = 0;
2781    my $global_mess = "";
2782   
2783    my @all_docids = ();
2784   
2785    foreach my $doc_array_rec ( @$doc_array )
2786    {
2787    my $status = -1;
2788    my $docid = $doc_array_rec->{'docid'};
2789   
2790    my ($docid_root,$docid_secnum);
2791    if(defined $docid) {   
2792        ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
2793        # as yet no support for setting subsection metadata in metadata.xml
2794        if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2795        $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2796        next; # skip this docid in for loop
2797        }
2798    }
2799
2800    push(@all_docids,$docid); # docid_root rather
2801   
2802    my $metaname = $doc_array_rec->{'metaname'};
2803    if (defined $metaname) {
2804        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2805        my $metavalue = $doc_array_rec->{'metavalue'};
2806        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2807
2808        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2809        # make "accumulate" the default (less destructive, as it won't actually
2810        # delete any existing values)
2811        $metamode = "accumulate";
2812        }
2813
2814        # adding metapos and prevmetavalue support to import_metadata subroutines
2815        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2816        my $prevmetavalue = $self->{'prevmetavalue'};
2817
2818        $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2819       
2820    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2821        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2822       
2823        foreach my $metatable_rec ( @$metatable ) {
2824        $metaname  = $metatable_rec->{'metaname'};
2825        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2826        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2827            # make "accumulate" the default (less destructive, as it won't actually
2828            # delete any existing values)
2829            $metamode = "accumulate";
2830        }
2831
2832        # No support for metapos and prevmetavalue in the JSON metatable substructure
2833        my $metapos = undef;
2834        my $prevmetavalue = undef;
2835        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2836       
2837        foreach my $metavalue ( @$metavals ) {
2838            $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2839
2840            $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2841            if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2842            # need to accumulate subsequent metavals for this metaname, else the just-assigned
2843            # metavalue for this metaname will be lost
2844            $metamode = "accumulate";
2845            }
2846        }
2847        }
2848    }       
2849    }
2850
2851    # always a success message
2852    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2853    $gsdl_cgi->generate_ok_message($mess);
2854}
2855
2856# always returns true (1)
2857sub set_import_metadata_entry
2858{
2859    my $self = shift @_;
2860    my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2861
2862    my $info_mess = <<RAWEND;
2863****************************
2864  set_import_metadata_entry()
2865****************************
2866RAWEND
2867
2868    $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2869    $info_mess .= " collect     = $collect\n"     if defined($collect);
2870    $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype);
2871    $info_mess .= " arcinfo_doc_filename  = $arcinfo_doc_filename\n"  if defined($arcinfo_doc_filename);
2872    $info_mess .= " docid       = $docid\n"       if defined($docid);
2873    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
2874    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
2875    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
2876    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
2877    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2878     
2879    $info_mess .= "****************************\n";
2880
2881    $gsdl_cgi->generate_message($info_mess);
2882
2883    # import works with metadata.xml which can have inherited metadata
2884    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2885    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2886    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2887    # what they're doing if they provide a metapos.
2888    if(defined $metapos) {
2889    print STDERR "@@@@ WARNING: metapos defined.\n";
2890    print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2891    }
2892
2893    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2894    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2895    my $metadata_xml_file;
2896    my $import_filename = undef;
2897   
2898    if (defined $docid) {
2899    # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2900    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2901
2902    # This now stores the full pathname
2903    $import_filename = $doc_rec->{'src-file'}->[0];
2904    $import_filename = &util::placeholders_to_abspath($import_filename);
2905
2906    } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2907         # as the array version of the method doesn't support the -f parameter yet
2908    my $import_file  = $self->{'f'};
2909    $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2910    }
2911   
2912    # figure out correct metadata.xml file [?]
2913    # Assuming the metadata.xml file is next to the source file
2914    # Note: This will not work if it is using the inherited metadata from the parent folder
2915    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2916    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2917   
2918    # If we're overriding everything, then $prevmetavalue=undefined and
2919    # $metamode=override combined with $metapos=undefined
2920    # in which case we need to remove all metavalues for the metaname at the given (sub)section
2921    # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2922    if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2923##  print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2924    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2925
2926    }
2927
2928    # Edit the metadata.xml
2929    # Modified by Jeffrey from DL Consulting
2930    # Handle the case where there is one metadata.xml file for multiple FileSets
2931    # The XML filter needs to know whether it is in the right FileSet
2932    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2933    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2934    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2935                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2936    #return 0;
2937    return $metadata_xml_filename;
2938}
2939
2940sub _remove_import_metadata
2941{
2942    my $self = shift @_;
2943
2944    my $collect   = $self->{'collect'};
2945    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2946#   my $gsdlhome  = $self->{'gsdlhome'};
2947    my $infodbtype = $self->{'infodbtype'};
2948   
2949    # Obtain the collect dir
2950    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2951    my $site = $self->{'site'};
2952    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2953   
2954    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2955    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2956
2957    # look up additional args
2958    my $docid = $self->{'d'};
2959    if ((!defined $docid) || ($docid =~ m/^\s*$/))
2960    {
2961        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2962    }
2963   
2964    my $metaname = $self->{'metaname'};
2965    my $metapos = $self->{'metapos'};
2966    my $metavalue = $self->{'metavalue'};
2967
2968    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2969    $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/));
2970
2971    if(defined $metavalue) { # metavalue is now a compulsory arg for remove_import_metadata()
2972        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2973    } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2974        $metapos = 0;
2975    }
2976    my $metamode = $self->{'metamode'};
2977    $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
2978
2979    # import works with metadata.xml which can have inherited metadata
2980    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2981    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2982    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2983    # what they're doing if they provide a metapos.
2984    if(defined $metapos) {
2985        print STDERR "@@@@ WARNING: metapos defined.\n";
2986        print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2987    }
2988   
2989    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2990    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2991    my $metadata_xml_file;
2992    my $import_filename = undef;
2993    if (defined $docid)
2994    {
2995        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2996        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2997
2998        # This now stores the full pathname
2999        $import_filename = $doc_rec->{'src-file'}->[0];
3000        $import_filename = &util::placeholders_to_abspath($import_filename);
3001    }
3002
3003    if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
3004    {
3005        $gsdl_cgi->generate_error("There is no metadata\n");
3006    }
3007   
3008    # figure out correct metadata.xml file [?]
3009    # Assuming the metadata.xml file is next to the source file
3010    # Note: This will not work if it is using the inherited metadata from the parent folder
3011    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
3012    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3013   
3014    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
3015   
3016    my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
3017    $mess .= "  $metaname";
3018    $mess .= " = $metavalue\n";
3019   
3020    $gsdl_cgi->generate_ok_message($mess);
3021
3022    #return $status; # in case calling functions have a use for this
3023}
3024
3025sub remove_import_metadata
3026{
3027    my $self = shift @_;
3028   
3029    my $username = $self->{'username'};
3030    my $collect   = $self->{'collect'};
3031    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3032   
3033    if ($baseaction::authentication_enabled) {
3034        # Ensure the user is allowed to edit this collection       
3035        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3036    }
3037
3038    # Make sure the collection isn't locked by someone else
3039    $self->lock_collection($username, $collect);
3040   
3041    $self->_remove_import_metadata(@_);
3042
3043    # Release the lock once it is done
3044    $self->unlock_collection($username, $collect);
3045
3046}
3047
3048sub remove_from_metadata_xml
3049{
3050    my $self = shift @_;
3051    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
3052    # metamode generally has no meaning for removing meta, but is used by set_meta
3053    # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
3054
3055    # Set the call-back functions for the metadata tags
3056    my @rules =
3057    (
3058        _default => 'raw',
3059        'Metadata' => \&rfmxml_metadata,
3060        'FileName' => \&mxml_filename
3061    );
3062       
3063    my $parser = XML::Rules->new
3064    (
3065        rules => \@rules,
3066        style => 'filter',
3067        output_encoding => 'utf8',
3068     #normalisespaces => 1,
3069            stripspaces => 2|0|0 # ineffectual
3070    );
3071   
3072    my $xml_in = "";
3073    if (!open(MIN,"<$metadata_xml_filename"))
3074    {
3075        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
3076    }
3077    else
3078    {
3079        # Read them in
3080        my $line;
3081        while (defined ($line=<MIN>)) {
3082            $xml_in .= $line;
3083        }
3084        close(MIN);
3085
3086        # Filter with the call-back functions
3087        my $xml_out = "";
3088
3089        my $MOUT;
3090        if (!open($MOUT,">$metadata_xml_filename")) {
3091            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
3092        }
3093        else {
3094            binmode($MOUT,":utf8");
3095            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
3096            close($MOUT);       
3097        }
3098    }
3099}
3100
3101sub rfmxml_metadata
3102{
3103    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
3104
3105    # metadata.xml does not handle subsections
3106
3107    # since metadata.xml now has to deal with metapos, we keep track of the metadata position
3108    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
3109        && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
3110    {
3111        if (!defined $parser->{'parameters'}->{'poscount'})
3112        {
3113            $parser->{'parameters'}->{'poscount'} = 0;
3114        }
3115        else
3116        {
3117            $parser->{'parameters'}->{'poscount'}++;
3118        }
3119
3120        # if overriding but no metapos, then clear all the meta for this metaname
3121        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
3122            return [];
3123        }
3124   
3125        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
3126        {
3127            return [];
3128        }
3129        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
3130        {
3131            return [];
3132        }       
3133    }
3134
3135    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
3136    return [$tagname => $attrHash];
3137}
3138
3139sub _remove_live_metadata
3140{
3141    my $self = shift @_;
3142
3143    my $collect   = $self->{'collect'};
3144    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3145#    my $gsdlhome  = $self->{'gsdlhome'};
3146    my $infodbtype = $self->{'infodbtype'};
3147
3148    # Obtain the collect dir
3149    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3150    my $site = $self->{'site'};
3151    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3152
3153   
3154    # look up additional args
3155    my $docid     = $self->{'d'};
3156    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3157      $gsdl_cgi->generate_error("No docid (d=...) specified.");
3158    }
3159   
3160    # Generate the dbkey
3161    my $metaname  = $self->{'metaname'};
3162    my $dbkey = "$docid.$metaname";
3163
3164    # To people who know $collect_tail please add some comments
3165    # Obtain the live gdbm_db path
3166    my $collect_tail = $collect;
3167    $collect_tail =~ s/^.*[\/|\\]//;
3168    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3169    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
3170
3171    # Remove the key
3172    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
3173    my $status = system($cmd);
3174    if ($status != 0) {
3175        # Catch error if gdbmdel failed
3176    my $mess = "Failed to set metadata key: $dbkey\n";
3177   
3178    $mess .= "PATH: $ENV{'PATH'}\n";
3179    $mess .= "cmd = $cmd\n";
3180    $mess .= "Exit status: $status\n";
3181    $mess .= "System Error Message: $!\n";
3182
3183    $gsdl_cgi->generate_error($mess);
3184    }
3185    else {
3186    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
3187    }
3188
3189}
3190
3191sub remove_live_metadata
3192{
3193    my $self = shift @_;
3194
3195    my $username  = $self->{'username'};
3196    my $collect   = $self->{'collect'};
3197    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3198    my $gsdlhome  = $self->{'gsdlhome'};
3199   
3200    if ($baseaction::authentication_enabled) {
3201    # Ensure the user is allowed to edit this collection   
3202    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3203    }
3204
3205    # Make sure the collection isn't locked by someone else
3206    $self->lock_collection($username, $collect);
3207
3208    $self->_remove_live_metadata(@_);
3209
3210    $self->unlock_collection($username, $collect);
3211}
3212
3213sub remove_metadata
3214{
3215    my $self = shift @_;
3216
3217    my $where = $self->{'where'};
3218    if(!$where || ($where =~ m/^\s*$/)) {
3219    $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
3220    return;
3221    }
3222
3223    my $username  = $self->{'username'};
3224    my $collect   = $self->{'collect'};
3225    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3226
3227    if ($baseaction::authentication_enabled) {
3228    # Ensure the user is allowed to edit this collection   
3229    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3230    }
3231
3232    # Make sure the collection isn't locked by someone else
3233    $self->lock_collection($username, $collect);
3234
3235    # check which directories need to be processed, specified in $where as
3236    # any combination of import|archives|index|live
3237    if($where =~ m/import/) {
3238    $self->_remove_import_metadata(@_);     
3239    }
3240    if($where =~ m/archives/) {
3241    $self->_remove_archives_metadata(@_);       
3242   }
3243    if($where =~ m/index/) {
3244    $self->_remove_index_metadata(@_);     
3245    }
3246
3247    # Release the lock once it is done
3248    $self->unlock_collection($username, $collect);
3249}
3250
3251# the internal version, without authentication
3252sub _remove_index_metadata
3253{   
3254    my $self = shift @_;
3255
3256    my $collect   = $self->{'collect'};
3257    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3258#    my $gsdlhome  = $self->{'gsdlhome'};
3259    my $infodbtype = $self->{'infodbtype'};
3260   
3261    # Obtain the collect dir
3262    my $site = $self->{'site'};
3263    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3264    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3265
3266        # look up additional args
3267    my $docid     = $self->{'d'};
3268    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3269      $gsdl_cgi->generate_error("No docid (d=...) specified.");
3270    }
3271    my $metaname  = $self->{'metaname'};
3272    my $metapos   = $self->{'metapos'};
3273    my $metavalue = $self->{'metavalue'};
3274
3275    $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
3276    $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
3277
3278    # To people who know $collect_tail please add some comments
3279    # -> In collection groups, I think collect_tailname is the subcollection name,
3280    # e.g. colgroup-name/col-tail-name
3281    # Obtain the path to the database
3282    my $collect_tail = $collect;
3283    $collect_tail =~ s/^.*[\/|\\]//;
3284    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3285    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3286
3287    # Read the docid entry
3288    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3289
3290    # Check to make sure the key does exist
3291    if (!defined ($doc_rec->{$metaname})) {
3292        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
3293    }
3294
3295    # Obtain the specified metadata pos
3296    # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
3297    if(!defined $metapos && !defined $metavalue) {
3298        $metapos = 0;
3299    }
3300   
3301
3302    # consider check key is defined before deleting?
3303    # Loop through the metadata array and ignore the specified position
3304    my $filtered_metadata = [];
3305    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
3306    for (my $i=0; $i<$num_metadata_vals; $i++) {
3307    my $metaval = shift(@{$doc_rec->{$metaname}});
3308
3309    if (!defined $metavalue && $i != $metapos) {
3310        push(@$filtered_metadata,$metaval);
3311    }
3312   
3313    if(defined $metavalue && !($metavalue eq $metaval))
3314    {
3315        push(@$filtered_metadata,$metaval);
3316    }
3317    }
3318    $doc_rec->{$metaname} = $filtered_metadata;
3319
3320    ## Use the dbutil set_entry method instead of assuming the database is gdbm
3321    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3322
3323    if ($status != 0) {
3324    my $mess = "Failed to set metadata key: $docid\n";
3325   
3326    $mess .= "PATH: $ENV{'PATH'}\n";
3327    $mess .= "Exit status: $status\n";
3328    $mess .= "System Error Message: $!\n";
3329
3330    $gsdl_cgi->generate_error($mess);
3331    }
3332    else {
3333    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
3334    $mess .= "  $metaname";
3335    $mess .= "->[$metapos]" if (defined $metapos);
3336    $mess .= " ($metavalue)" if (defined $metavalue);
3337
3338    $gsdl_cgi->generate_ok_message($mess);
3339    }
3340
3341    #return $status; # in case calling functions have a use for this
3342}
3343
3344sub remove_index_metadata
3345{
3346    my $self = shift @_;
3347
3348    my $username  = $self->{'username'};
3349    my $collect   = $self->{'collect'};
3350    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3351#    my $gsdlhome  = $self->{'gsdlhome'};
3352   
3353    if ($baseaction::authentication_enabled) {
3354    # Ensure the user is allowed to edit this collection   
3355    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3356    }
3357
3358    # Obtain the collect dir
3359    my $site = $self->{'site'};
3360    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3361    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3362
3363    # Make sure the collection isn't locked by someone else
3364    $self->lock_collection($username, $collect);
3365
3366    $self->_remove_index_metadata(@_);
3367
3368    # Release the lock once it is done
3369    $self->unlock_collection($username, $collect);
3370}
3371
3372
3373# Was trying to reused the codes, but the functions need to be broken
3374# down more before they can be reused, otherwise there will be too
3375# much overhead and duplicate process...
3376sub insert_metadata
3377{
3378    my $self = shift @_;
3379   
3380    my $username  = $self->{'username'};
3381    my $collect   = $self->{'collect'};
3382    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3383    my $gsdlhome  = $self->{'gsdlhome'};
3384    my $infodbtype = $self->{'infodbtype'};
3385   
3386    # If the import metadata and gdbm database have been updated, we
3387    # need to insert some notification to warn user that the the text
3388    # they see at the moment is not indexed and require a rebuild.
3389    my $rebuild_pending_macro = "_rebuildpendingmessage_";
3390
3391    if ($baseaction::authentication_enabled) {
3392    # Ensure the user is allowed to edit this collection
3393    $self->authenticate_user($username, $collect);
3394    }
3395
3396    # Obtain the collect and archive dir   
3397    my $site = $self->{'site'};
3398    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3399    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3400    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3401
3402    # Make sure the collection isn't locked by someone else
3403    $self->lock_collection($username, $collect);
3404   
3405    # Check additional args
3406    my $docid = $self->{'d'};
3407    if (!defined($docid) || ($docid =~ m/^\s*$/)) {
3408    $gsdl_cgi->generate_error("No document id is specified: d=...");
3409    }
3410    my $metaname = $self->{'metaname'};
3411    if (!defined($metaname) || ($metaname =~ m/^\s*$/)) {
3412    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3413    }
3414    my $metavalue = $self->{'metavalue'};
3415    if (!defined($metavalue) || ($metavalue =~ m/^\s*$/)) {
3416    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3417    }
3418    # make "accumulate" the default (less destructive, as it won't actually
3419    # delete any existing values)
3420    my $metamode = "accumulate";
3421
3422    # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3423    my $metapos   = undef;
3424    my $prevmetavalue = undef;
3425
3426    #=======================================================================#
3427    # set_import_metadata [START]
3428    #=======================================================================#
3429    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3430    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3431    my $metadata_xml_file;
3432    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3433    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3434   
3435    # This now stores the full pathname
3436    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3437    $import_filename = &util::placeholders_to_abspath($import_filename);
3438   
3439    # figure out correct metadata.xml file [?]
3440    # Assuming the metadata.xml file is next to the source file
3441    # Note: This will not work if it is using the inherited metadata from the parent folder
3442    my ($import_tailname, $import_dirname)
3443    = File::Basename::fileparse($import_filename);
3444    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3445
3446    # Shane's escape characters
3447    $metavalue = pack "U0C*", unpack "C*", $metavalue;
3448    $metavalue =~ s/\,/&#44;/g;
3449    $metavalue =~ s/\:/&#58;/g;
3450    $metavalue =~ s/\|/&#124;/g;
3451    $metavalue =~ s/\(/&#40;/g;
3452    $metavalue =~ s/\)/&#41;/g;
3453    $metavalue =~ s/\[/&#91;/g;
3454    $metavalue =~ s/\\/&#92;/g;
3455    $metavalue =~ s/\]/&#93;/g;
3456    $metavalue =~ s/\{/&#123;/g;
3457    $metavalue =~ s/\}/&#125;/g;
3458    $metavalue =~ s/\"/&#34;/g;
3459    $metavalue =~ s/\`/&#96;/g;
3460    $metavalue =~ s/\n/_newline_/g;
3461
3462    # Edit the metadata.xml
3463    # Modified by Jeffrey from DL Consulting
3464    # Handle the case where there is one metadata.xml file for multiple FileSets
3465    # The XML filter needs to know whether it is in the right FileSet
3466    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3467    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3468    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3469                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3470    #=======================================================================#
3471    # set_import_metadata [END]
3472    #=======================================================================#
3473
3474
3475    #=======================================================================#
3476    # set_metadata (accumulate version) [START]
3477    #=======================================================================#
3478    # To people who know $collect_tail please add some comments
3479    # Obtain path to the database
3480    my $collect_tail = $collect;
3481    $collect_tail =~ s/^.*[\/|\\]//;
3482    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3483    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3484
3485    # Read the docid entry
3486    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3487
3488    # Protect the quotes
3489    $metavalue =~ s/\"/\\\"/g;
3490
3491    # Adds the pending macro
3492    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3493
3494    # If the metadata doesn't exist, create a new one
3495    if (!defined($doc_rec->{$metaname})){   
3496    $doc_rec->{$metaname} = [ $macro_metavalue ];
3497    }
3498    # Else, let's acculumate the values
3499    else {
3500        push(@{$doc_rec->{$metaname}},$macro_metavalue);
3501    }
3502
3503    ## Use the dbutil set_entry method instead of assuming the database is gdbm
3504    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3505
3506    if ($status != 0) {
3507        # Catch error if gdbmget failed
3508    my $mess = "Failed to set metadata key: $docid\n";
3509   
3510    $mess .= "PATH: $ENV{'PATH'}\n";
3511    $mess .= "Exit status: $status\n";
3512    $mess .= "System Error Message: $!\n";
3513
3514    $gsdl_cgi->generate_error($mess);
3515    }
3516    else {
3517    my $mess = "insert-metadata successful: Key[$docid]\n";
3518    $mess .= "  [In metadata.xml] $metaname";
3519    $mess .= " = $metavalue\n";
3520    $mess .= "  [In database] $metaname";
3521    $mess .= " = $macro_metavalue\n";
3522    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
3523        $gsdl_cgi->generate_ok_message($mess);
3524    }   
3525    #=======================================================================#
3526    # set_metadata (accumulate version) [END]
3527    #=======================================================================#
3528
3529    # Release the lock once it is done
3530    $self->unlock_collection($username, $collect);
3531}
3532
35331;
Note: See TracBrowser for help on using the browser.