root/main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm @ 29098

Revision 29098, 119.3 KB (checked in by ak19, 5 years ago)

Removing import metadata needs to take into account prevmetavalue and and not delete all metavalues in override mode if metapos is undefined. In the case of prevmetavalue being defined, it should only handle prevmetavalue. Kathy had already fixed the archives_metadata case, and the index_metadata case does not deal with xml files but the database.

Line 
1##########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr   te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40    require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47    #GET METHODS
48    "get-import-metadata" => {
49        'compulsory-args' => [ "d", "metaname" ],
50        'optional-args'   => [ "metapos" ] },
51
52    "get-archives-metadata" => {
53        'compulsory-args' => [ "d", "metaname" ],
54        'optional-args'   => [ "metapos" ] },
55   
56    "get-index-metadata" => {
57        'compulsory-args' => [ "d", "metaname" ],
58        'optional-args'   => [ "metapos" ] },
59
60    "get-metadata" => { # alias for get-index-metadata
61        'compulsory-args' => [ "d", "metaname" ],
62        'optional-args'   => [ "metapos" ] },
63
64    "get-live-metadata" => {
65        'compulsory-args' => [ "d", "metaname" ],
66        'optional-args'   => [ ] },
67
68    "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69        'compulsory-args' => [ "json" ],
70        'optional-args'   => [ "where" ],
71        'help-string' => [
72        'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73        ]},
74
75    #SET METHODS
76    "set-live-metadata" => {
77        'compulsory-args' => [ "d", "metaname", "metavalue" ],
78        'optional-args'   => [ ] },
79
80    "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81        'compulsory-args' => [ "metaname", "metavalue" ],
82        'optional-args'   => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84    "set-index-metadata" => {
85        'compulsory-args' => [ "d", "metaname", "metavalue" ],
86        'optional-args'   => [ "metapos", "metamode" ] },
87
88    "set-archives-metadata" => {
89        'compulsory-args' => [ "d", "metaname", "metavalue" ],
90        'optional-args'   => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91   
92    "set-import-metadata" => {
93        'compulsory-args' => [ "metaname", "metavalue" ],
94        'optional-args'   => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
95                 
96    #SET METHODS (ARRAY)
97    "set-metadata-array" => {
98        'compulsory-args' => [ "where", "json" ],
99        'optional-args'   => [ ],
100        'help-string' => [
101        'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102       
103        'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108       
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
110                     
111    "set-archives-metadata-array" => {
112        'compulsory-args' => [ "json" ],
113        'optional-args'   => [ ] },
114       
115    "set-import-metadata-array" => {
116        'compulsory-args' => [ "json" ],
117        'optional-args'   => [ ] },
118
119    "set-index-metadata-array" => {
120        'compulsory-args' => [ "json" ],
121        'optional-args'   => [ ] },
122   
123    "set-live-metadata-array" => {
124        'compulsory-args' => [ "json" ],
125        'optional-args'   => [ ] },
126       
127    #REMOVE METHODS
128    "remove-import-metadata" => {
129        'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
130        'optional-args'   => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
131                     
132    "remove-archives-metadata" => {
133        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
134        'optional-args'   => [ "metapos", "metavalue" ] },
135
136    "remove-live-metadata" => {
137        'compulsory-args' => [ "d", "metaname" ],
138        'optional-args'   => [ ] },
139
140    "remove-index-metadata" => {
141        'compulsory-args' => [ "d", "metaname" ],
142        'optional-args'   => [ "metapos", "metavalue" ] },
143
144    "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
145        'compulsory-args' => [ "d", "metaname" ],
146        'optional-args'   => [ "where", "metapos", "metavalue" ] },
147
148    #INSERT METHODS
149    "insert-metadata" => {
150        'compulsory-args' => [ "d", "metaname", "metavalue" ],
151        'optional-args'   => [ ] }
152};
153
154
155sub new
156{
157    my $class = shift (@_);
158    my ($gsdl_cgi,$iis6_mode) = @_;
159
160    # Treat metavalue specially.  To transmit this through a GET request
161    # the Javascript side has url-encoded it, so here we need to decode
162    # it before proceeding
163
164    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
165    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
166    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
167
168    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
169    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
170
171    # need to do the same with prevmetavalue
172    my $url_encoded_prevmetavalue = $gsdl_cgi->param("prevmetavalue");
173    my $url_decoded_prevmetavalue = &unicode::url_decode($url_encoded_prevmetavalue,1);
174    my $prevunicode_array = &unicode::utf82unicode($url_decoded_prevmetavalue);
175
176    $url_decoded_prevmetavalue = join("",map(chr($_),@$prevunicode_array));
177    $gsdl_cgi->param("prevmetavalue",$url_decoded_prevmetavalue);
178
179    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
180
181    return bless $self, $class;
182}
183
184
185sub get_live_metadata
186{
187    my $self = shift @_;
188
189    my $username  = $self->{'username'};
190    my $collect   = $self->{'collect'};
191    my $gsdl_cgi  = $self->{'gsdl_cgi'};
192    my $gsdlhome  = $self->{'gsdlhome'};
193    my $infodbtype = $self->{'infodbtype'};
194   
195    # live metadata gets/saves value scoped (prefixed) by the current usename
196    # so (for now) let's not bother to enforce authentication
197
198    # Obtain the collect dir
199    my $site = $self->{'site'};
200    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
201    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
202
203    # No locking collection when getting metadata, only when setting it
204#    $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
205
206    # look up additional args
207    my $docid  = $self->{'d'};
208    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
209       $gsdl_cgi->generate_error("No docid (d=...) specified.");
210    }
211
212    # Generate the dbkey
213    my $metaname  = $self->{'metaname'};
214    my $dbkey = "$docid.$metaname";
215
216    # To people who know $collect_tail please add some comments
217    # Obtain path to the database
218    my $collect_tail = $collect;
219    $collect_tail =~ s/^.*[\/|\\]//;
220    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
221    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
222   
223    # Obtain the content of the key
224    my $cmd = "gdbmget $infodb_file_path $dbkey";
225    if (open(GIN,"$cmd |") == 0) {
226        # Catch error if gdbmget failed
227    my $mess = "Failed to get metadata key: $metaname\n";
228    $mess .= "$!\n";
229
230    $gsdl_cgi->generate_error($mess);
231    }
232    else {
233    binmode(GIN,":utf8");
234        # Read everything in and concatenate them into $metavalue
235    my $metavalue = "";
236    my $line;
237    while (defined ($line=<GIN>)) {
238        $metavalue .= $line;
239    }
240    close(GIN);
241    chomp($metavalue); # Get rid off the tailing newlines
242    $gsdl_cgi->generate_ok_message("$metavalue");
243    }
244
245    # Release the lock once it is done
246#    $self->unlock_collection($username, $collect);
247}
248
249# just calls the index version
250sub get_metadata
251{
252    my $self = shift @_;
253    $self->get_index_metadata(@_);
254}
255
256# JSON version that will get the requested metadata values
257# from the requested source (index, import, archives or live)
258# One of the params is a JSON string and the return value is JSON too
259# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
260sub get_metadata_array
261{
262    my $self = shift @_;
263
264    my $where = $self->{'where'};
265    if (!$where) { 
266    $where = "index"; # default behaviour is to get the values from index
267    }
268
269    # Only when setting metadata do we perform authentication and do we lock the collection,
270    # not when getting metadata
271
272    # for get_meta_array, the where param can only be ONE of import, archives, index, live
273    if($where =~ m/index/) {
274    $self->_get_index_metadata_array(@_);
275    }
276    elsif($where =~ m/archives/) {
277    $self->_get_archives_metadata_array(@_);
278    }
279    elsif($where =~ m/import/) {
280    $self->_get_import_metadata_array(@_);
281    }
282    elsif($where =~ m/live/) {
283        $self->_get_live_metadata_array(@_);
284    }
285}
286
287# Unused at present. Added for completion. Tested.
288sub _get_import_metadata_array {
289   
290    my $self = shift @_;
291
292    my $collect   = $self->{'collect'};
293    my $gsdl_cgi  = $self->{'gsdl_cgi'};
294    my $site = $self->{'site'};
295    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
296   
297    # look up additional args
298    my $infodbtype = $self->{'infodbtype'};
299   
300    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
301    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
302    my $json_str      = $self->{'json'};
303    my $doc_array = decode_json $json_str;
304
305    my $json_result_str = "[";
306    my $first_doc_rec = 1;
307    foreach my $doc_array_rec ( @$doc_array ) {
308   
309    my $docid = $doc_array_rec->{'docid'}; # no subsection metadata support in metadata.xml, only toplevel meta
310   
311    if($first_doc_rec) {
312        $first_doc_rec = 0;
313    } else {
314        $json_result_str .= ",";
315    }
316    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
317
318    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
319    $json_result_str = $json_result_str . ",\"metatable\":[";
320
321    my $first_rec = 1;
322    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
323
324        # Read the docid entry     
325        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
326        # This now stores the full pathname
327        my $import_filename = $doc_rec->{'src-file'}->[0];
328        $import_filename = &util::placeholders_to_abspath($import_filename);
329
330        # figure out correct metadata.xml file [?]
331        # Assuming the metadata.xml file is next to the source file
332        # Note: This will not work if it is using the inherited metadata from the parent folder
333        my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
334        my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
335
336
337        if($first_rec) {
338        $first_rec = 0;
339        } else {
340        $json_result_str .= ",";       
341        }
342       
343        my $metaname  = $metatable_rec->{'metaname'};
344        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
345
346        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
347        if(!defined $metapos) {
348        $metapos = 0;
349        }
350
351        # Obtain the specified metadata value(s)
352        my $metavalue;
353
354        if($metapos ne "all") { # get the value at a single metapos
355        $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
356
357        #print STDERR "**** Metafilename, metaname, metapos, sec_num: $metadata_xml_filename, $metaname, $metapos, $import_tailname\n";
358       
359        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
360
361        } else {
362        my $first_metaval = 1;
363        $metapos = 0;
364        $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
365
366        while (defined $metavalue && $metavalue ne "") {
367            if($first_metaval) {       
368            $first_metaval = 0;
369            } else {
370            $json_result_str .= ",";
371            }
372       
373            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
374
375            $metapos++;
376            $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
377        }
378        }
379
380        $json_result_str .= "]}"; # close metavals array and metatable record
381    }
382   
383    $json_result_str .= "]}"; # close metatable array and docid record
384    }
385
386    $json_result_str .= "]"; # close array of docids
387    $gsdl_cgi->generate_ok_message($json_result_str."\n");
388}
389
390# Unused method, but included for completion. Tested, works. Takes a JSON string and returns a JSON string.
391# For more information on the format of the output, see _get_index_metadata_array, which is in use.
392sub _get_archives_metadata_array {
393
394    my $self = shift @_;
395
396    my $collect   = $self->{'collect'};
397    my $gsdl_cgi  = $self->{'gsdl_cgi'};
398    my $site = $self->{'site'};
399    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
400
401    # look up additional args   
402    my $infodbtype = $self->{'infodbtype'};
403
404    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
405    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
406
407    my $json_str      = $self->{'json'};
408    my $doc_array = decode_json $json_str;
409
410    my $json_result_str = "[";
411    my $first_doc_rec = 1;
412    foreach my $doc_array_rec ( @$doc_array ) {
413   
414    my $docid     = $doc_array_rec->{'docid'};
415   
416    if($first_doc_rec) {
417        $first_doc_rec = 0;
418    } else {
419        $json_result_str .= ",";
420    }
421    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
422
423    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
424    $json_result_str = $json_result_str . ",\"metatable\":[";
425
426    my $first_rec = 1;
427    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
428
429        my ($docid, $docid_secnum) = ($doc_array_rec->{'docid'} =~ m/^(.*?)(\..*)?$/);
430        $docid_secnum = "" if (!defined $docid_secnum);
431
432        # Read the docid entry     
433        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
434        # This now stores the full pathname
435        my $doc_filename = $doc_rec->{'doc-file'}->[0];
436        $doc_filename = &util::filename_cat($archive_dir, $doc_filename);
437
438        if($first_rec) {
439        $first_rec = 0;
440        } else {
441        $json_result_str .= ",";       
442        }
443       
444        my $metaname  = $metatable_rec->{'metaname'};
445        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
446
447        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
448        if(!defined $metapos) {
449        $metapos = 0;
450        }
451
452
453        # Obtain the specified metadata value(s)
454        my $metavalue;
455
456        if($metapos ne "all") { # get the value at a single metapos
457
458        $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
459        #print STDERR "**** Docname, metaname, metapos, sec_num: $doc_filename, $metaname, $metapos, $docid_secnum\n";
460       
461        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
462
463        } else {
464        my $first_metaval = 1;
465        $metapos = 0;
466        $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
467
468        while (defined $metavalue && $metavalue ne "") {
469            if($first_metaval) {       
470            $first_metaval = 0;
471            } else {
472            $json_result_str .= ",";
473            }
474       
475            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
476
477            $metapos++;
478            $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
479        }
480        }
481
482        $json_result_str .= "]}"; # close metavals array and metatable record
483    }
484   
485    $json_result_str .= "]}"; # close metatable array and docid record
486    }
487
488    $json_result_str .= "]"; # close array of docids
489    $gsdl_cgi->generate_ok_message($json_result_str."\n");
490}
491
492
493# Unused at present. Added for completion. Tested, but not sure if it retrieves metadata in the manner it's expected to.
494sub _get_live_metadata_array
495{
496    my $self = shift @_;
497
498    my $collect   = $self->{'collect'};
499    my $gsdl_cgi  = $self->{'gsdl_cgi'};
500    my $site = $self->{'site'};
501    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
502
503    # look up additional args   
504    my $infodbtype = $self->{'infodbtype'};
505   
506    # To people who know $collect_tail please add some comments
507    # Obtain the path to the database
508    my $collect_tail = $collect;
509    $collect_tail =~ s/^.*[\/|\\]//;
510    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
511    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
512
513    my $json_str      = $self->{'json'};
514    my $doc_array = decode_json $json_str;
515
516    my $json_result_str = "[";
517    my $first_doc_rec = 1;
518
519    foreach my $doc_array_rec ( @$doc_array ) {
520   
521    my $docid     = $doc_array_rec->{'docid'};
522   
523    if($first_doc_rec) {
524        $first_doc_rec = 0;
525    } else {
526        $json_result_str .= ",";
527    }
528    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
529   
530    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
531    $json_result_str = $json_result_str . ",\"metatable\":[";
532   
533    my $first_rec = 1;
534    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
535        if($first_rec) {
536        $first_rec = 0;
537        } else {
538        $json_result_str .= ",";       
539        }
540       
541        my $metaname  = $metatable_rec->{'metaname'};
542        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
543       
544        # Generate the dbkey
545        my $dbkey = "$docid.$metaname";
546       
547        # metapos for get_live_metadata is always assumed to be "all".
548        # It's always going to get all the lines of metavalues associated with a metaname
549        # (It's the metaname itself that should contain an increment number, if there are to be multiple values).
550        #my $metapos = "all";
551        my $metapos = $metatable_rec->{'metapos'} || 0; # Can be 0... 1|all|undefined. Defaults to 0 if undefined/false
552        my $metavalue = "";
553       
554        # Obtain the content of the key
555        my $cmd = "gdbmget $infodb_file_path $dbkey";
556        if (open(GIN,"$cmd |") != 0) { # Success.
557       
558        binmode(GIN,":utf8");
559        # Read everything in and concatenate them into $metavalue       
560        my $line;
561        my $first_metaval = 1;
562        my $pos = 0;
563        while (defined ($line=<GIN>)) {
564            chomp($line); # Get rid off the tailing newlines
565           
566            if($metapos eq "all") {
567            if($first_metaval) {       
568                $first_metaval = 0;
569            } else {
570                $json_result_str .= ",";
571            }           
572            $metavalue = $line;
573            $json_result_str .= "{\"metapos\":\"$pos\",\"metavalue\":\"$metavalue\"}";
574            } elsif($metapos == $pos) {
575            $metavalue = $line;
576            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
577            last;
578            } # else, the current $pos is not the required $metapos
579            $pos += 1;
580        }
581        close(GIN);
582        } # else open cmd == 0 (failed) and metavals array will be empty [] for this metaname
583       
584        $json_result_str .= "]}"; # close metavals array and metatable record
585    }
586   
587    $json_result_str .= "]}"; # close metatable array and docid record
588    }
589
590    $json_result_str .= "]"; # close array of docids
591   
592    $gsdl_cgi->generate_ok_message($json_result_str."\n");   
593}
594
595
596# Takes a JSON string and returns a JSON string
597# Request string is of the form:
598# http://localhost:8283/greenstone/cgi-bin/metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]
599# Resulting string is of the form:
600# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
601sub _get_index_metadata_array
602{
603    my $self = shift @_;
604
605    my $collect   = $self->{'collect'};
606    my $gsdl_cgi  = $self->{'gsdl_cgi'};
607    my $site = $self->{'site'};
608    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
609
610    # look up additional args   
611    my $infodbtype = $self->{'infodbtype'};
612   
613    # To people who know $collect_tail please add some comments
614    # Obtain the path to the database
615    my $collect_tail = $collect;
616    $collect_tail =~ s/^.*[\/|\\]//;
617    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
618    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
619
620    my $json_str      = $self->{'json'};
621    my $doc_array = decode_json $json_str;
622
623    my $json_result_str = "[";
624    my $first_doc_rec = 1;
625
626    foreach my $doc_array_rec ( @$doc_array ) {
627   
628    my $docid     = $doc_array_rec->{'docid'};
629   
630    if($first_doc_rec) {
631        $first_doc_rec = 0;
632    } else {
633        $json_result_str .= ",";
634    }
635    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
636
637    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
638    $json_result_str = $json_result_str . ",\"metatable\":[";
639
640    my $first_rec = 1;
641    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
642        if($first_rec) {
643        $first_rec = 0;
644        } else {
645        $json_result_str .= ",";       
646        }
647       
648        my $metaname  = $metatable_rec->{'metaname'};
649        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
650
651        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
652        if(!defined $metapos) {
653        $metapos = 0;
654        }
655
656         # Read the docid entry
657        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
658 
659        # Basically loop through and unescape_html the values
660        foreach my $k (keys %$doc_rec) {
661        my @escaped_v = ();
662        foreach my $v (@{$doc_rec->{$k}}) {
663            my $ev = &ghtml::unescape_html($v);
664            push(@escaped_v, $ev);
665        }
666        $doc_rec->{$k} = \@escaped_v;
667        }
668
669        # Obtain the specified metadata value(s)
670        my $metavalue;
671
672        if($metapos ne "all") { # get the value at a single metapos
673
674        $metavalue = $doc_rec->{$metaname}->[$metapos];
675
676        # protect any double quotes and colons in the metavalue before putting it into JSON
677        $metavalue =~ s/\"/&quot;/g if defined $metavalue;
678        $metavalue =~ s/\:/&58;/g if defined $metavalue;
679
680        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
681
682        } else {
683        my $first_metaval = 1;
684        $metapos = 0;
685        $metavalue = $doc_rec->{$metaname}->[$metapos];
686
687        while (defined $metavalue) {
688
689            # protect any double quotes and colons in the metavalue before putting it into JSON
690            $metavalue =~ s/\"/&quot;/g;
691            $metavalue =~ s/\:/&58;/g;
692
693            if($first_metaval) {       
694            $first_metaval = 0;
695            } else {
696            $json_result_str .= ",";
697            }
698       
699            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
700
701            $metapos++;
702            $metavalue = $doc_rec->{$metaname}->[$metapos];
703        }
704        }
705
706        $json_result_str .= "]}"; # close metavals array and metatable record
707    }
708   
709    $json_result_str .= "]}"; # close metatable array and docid record
710    }
711
712    $json_result_str .= "]"; # close array of docids
713
714    $gsdl_cgi->generate_ok_message($json_result_str."\n");   
715}
716
717
718sub get_index_metadata
719{
720    my $self = shift @_;
721
722    my $username  = $self->{'username'};
723    my $collect   = $self->{'collect'};
724    my $gsdl_cgi  = $self->{'gsdl_cgi'};
725    my $gsdlhome  = $self->{'gsdlhome'};
726
727    # Obtain the collect dir
728    my $site = $self->{'site'};
729    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
730    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
731
732    # look up additional args
733    my $docid     = $self->{'d'};
734    my $metaname  = $self->{'metaname'};
735    my $metapos   = $self->{'metapos'};
736    my $infodbtype = $self->{'infodbtype'};
737
738    # To people who know $collect_tail please add some comments
739    # Obtain path to the database
740    my $collect_tail = $collect;
741    $collect_tail =~ s/^.*[\/|\\]//;
742    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
743    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
744
745    # Read the docid entry
746    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
747 
748    # Basically loop through and unescape_html the values
749    foreach my $k (keys %$doc_rec) {
750    my @escaped_v = ();
751    foreach my $v (@{$doc_rec->{$k}}) {
752        my $ev = &ghtml::unescape_html($v);
753        push(@escaped_v, $ev);
754    }
755    $doc_rec->{$k} = \@escaped_v;
756    }
757
758    # Obtain the specified metadata value
759    $metapos = 0 if (!defined $metapos);
760    my $metavalue = $doc_rec->{$metaname}->[$metapos];
761    $gsdl_cgi->generate_ok_message("$metavalue");
762   
763}
764
765
766sub get_import_metadata
767{
768    my $self = shift @_;
769
770    my $username  = $self->{'username'};
771    my $collect   = $self->{'collect'};
772    my $gsdl_cgi  = $self->{'gsdl_cgi'};
773    my $gsdlhome  = $self->{'gsdlhome'};
774
775    # Obtain the collect dir
776    my $site = $self->{'site'};
777    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
778    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
779
780    # look up additional args
781    my $docid     = $self->{'d'};
782    my $metaname  = $self->{'metaname'};
783    my $metapos = $self->{'metapos'};
784    $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
785
786    my $infodbtype = $self->{'infodbtype'};
787    if (!defined $docid)
788    {
789        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
790    }
791
792    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
793    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
794    my $metadata_xml_file;
795    my $import_filename = undef;
796   
797
798    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
799    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
800    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
801
802    # This now stores the full pathname
803    $import_filename = $doc_rec->{'src-file'}->[0];
804    $import_filename = &util::placeholders_to_abspath($import_filename);
805
806    # figure out correct metadata.xml file [?]
807    # Assuming the metadata.xml file is next to the source file
808    # Note: This will not work if it is using the inherited metadata from the parent folder
809    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
810    my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
811
812    $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
813
814}
815
816sub get_metadata_from_metadata_xml
817{
818    my $self = shift @_;
819    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
820   
821    my @rules =
822    (
823        _default => 'raw',
824        'Metadata' => \&gfmxml_metadata,
825        'FileName' => \&mxml_filename
826    );
827       
828    my $parser = XML::Rules->new
829    (
830        rules => \@rules,
831        output_encoding => 'utf8'
832    );
833   
834    my $xml_in = "";
835    if (!open(MIN,"<$metadata_xml_filename"))
836    {
837        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
838    }
839    else
840    {
841        # Read them in
842        my $line;
843        while (defined ($line=<MIN>)) {
844            $xml_in .= $line;
845        }
846        close(MIN);
847
848        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
849       
850        if(defined $parser->{'pad'}->{'metavalue'})
851        {
852            return $parser->{'pad'}->{'metavalue'};
853        }
854        else
855        {
856            return "";
857        }
858    }
859}
860
861sub gfmxml_metadata
862{
863    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
864
865    # no subsection support yet in metadata.xml
866
867    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
868    {
869        if (!defined $parser->{'parameters'}->{'poscount'})
870        {
871            $parser->{'parameters'}->{'poscount'} = 0;
872        }
873        else
874        {
875            $parser->{'parameters'}->{'poscount'}++;
876        }
877   
878        # gets the first value by default, since metapos defaults to 0
879        if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
880        {
881            if($parser->{'parameters'}->{'metapos'} > 0) {
882            print STDERR "@@@@ WARNING: non-zero metapos.\n";
883            print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at position: ".$parser->{'parameters'}->{'metapos'}.".\n";
884            }
885            $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
886        }
887    }
888}
889
890sub get_archives_metadata
891{
892    my $self = shift @_;
893
894    my $username  = $self->{'username'};
895    my $collect   = $self->{'collect'};
896    my $gsdl_cgi  = $self->{'gsdl_cgi'};
897#   my $gsdlhome  = $self->{'gsdlhome'};
898    my $infodbtype = $self->{'infodbtype'};
899
900    # Obtain the collect dir
901    my $site = $self->{'site'};
902    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
903   
904    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
905
906    # look up additional args
907    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
908    $docid_secnum = "" if (!defined $docid_secnum);
909   
910    my $metaname = $self->{'metaname'};
911    my $metapos = $self->{'metapos'};
912    $metapos = 0 if (!defined $metapos);
913   
914    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
915    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
916
917    # This now stores the full pathname
918    my $doc_filename = $doc_rec->{'doc-file'}->[0];
919
920    $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
921
922}
923
924sub get_metadata_from_archive_xml
925{
926    my $self = shift @_;
927    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
928   
929    my @start_rules = ('Section' => \&dxml_start_section);
930   
931    my @rules =
932    (
933        _default => 'raw',
934        'Metadata' => \&gfdxml_metadata
935    );
936       
937    my $parser = XML::Rules->new
938    (
939        start_rules => \@start_rules,
940        rules => \@rules,
941        output_encoding => 'utf8'
942    );
943   
944    my $xml_in = "";
945    if (!open(MIN,"<$doc_xml_filename"))
946    {
947        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
948    }
949    else
950    {
951        # Read them in
952        my $line;
953        while (defined ($line=<MIN>)) {
954            $xml_in .= $line;
955        }
956        close(MIN);
957
958        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
959       
960        if(defined $parser->{'pad'}->{'metavalue'})
961        {
962            return $parser->{'pad'}->{'metavalue'};
963        }
964        else
965        {
966            return "";
967        }
968    }
969}
970
971sub gfdxml_metadata
972{
973    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
974   
975    if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
976    {
977        return;
978    }
979
980    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
981    {
982        if (!defined $parser->{'parameters'}->{'poscount'})
983        {
984            $parser->{'parameters'}->{'poscount'} = 0;
985        }
986        else
987        {
988            $parser->{'parameters'}->{'poscount'}++;
989        }
990    }
991
992    if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
993    {   
994        $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
995    }
996}
997
998sub _set_live_metadata
999{
1000    my $self = shift @_;
1001
1002    my $collect   = $self->{'collect'};
1003    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1004    #my $gsdlhome  = $self->{'gsdlhome'};
1005    my $infodbtype = $self->{'infodbtype'};
1006
1007    # Obtain the collect dir
1008    my $site = $self->{'site'};
1009    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1010    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1011
1012
1013    # look up additional args
1014    my $docid     = $self->{'d'};
1015    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1016      $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1017    }
1018    my $metavalue = $self->{'metavalue'};
1019
1020    # Generate the dbkey   
1021    my $metaname  = $self->{'metaname'};
1022    my $dbkey = "$docid.$metaname";
1023
1024    # To people who know $collect_tail please add some comments
1025    # Obtain path to the database
1026    my $collect_tail = $collect;
1027    $collect_tail =~ s/^.*[\/|\\]//;
1028    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1029    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1030
1031    # Set the new value
1032    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1033    my $status = system($cmd);
1034    if ($status != 0) {
1035        # Catch error if gdbmget failed
1036    my $mess = "Failed to set metadata key: $dbkey\n";
1037
1038    $mess .= "PATH: $ENV{'PATH'}\n";
1039    $mess .= "cmd = $cmd\n";
1040    $mess .= "Exit status: $status\n";
1041    $mess .= "System Error Message: $!\n";
1042
1043    $gsdl_cgi->generate_error($mess);
1044    }
1045    else {
1046    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
1047    }
1048
1049    #return $status; # in case calling functions have any further use for this
1050}
1051
1052sub set_live_metadata
1053{
1054    my $self = shift @_;
1055
1056    my $username  = $self->{'username'};
1057    my $collect   = $self->{'collect'};
1058    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1059 
1060    if ($baseaction::authentication_enabled) {
1061    # Ensure the user is allowed to edit this collection   
1062    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1063    }
1064
1065    # Make sure the collection isn't locked by someone else
1066    $self->lock_collection($username, $collect);
1067
1068    $self->_set_live_metadata(@_);
1069
1070    # Release the lock once it is done
1071    $self->unlock_collection($username, $collect);
1072}
1073
1074sub set_index_metadata_entry
1075{
1076    print STDERR "1\n";
1077    my $self = shift @_;
1078    my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
1079   
1080    # To people who know $collect_tail please add some comments
1081    # Obtain path to the database
1082    my $collect_tail = $collect;
1083    $collect_tail =~ s/^.*[\/|\\]//;
1084    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1085    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1086   
1087    print STDERR "2\n";
1088#   print STDERR "**** infodb file path = $infodb_file_path\n";
1089#   print STDERR "***** infodb type = $infodbtype\n";
1090   
1091    # Read the docid entry
1092    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1093   
1094    # Set the metadata value
1095    if (defined $metapos) {
1096    print STDERR "3\n";
1097    # if metamode=accumulate AND metapos, warn user and then use metapos
1098    if (defined $metamode && $metamode eq "accumulate") {
1099        print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
1100        print STDERR "**** Proceeding by using metapos\n";
1101    }
1102    $doc_rec->{$metaname}->[$metapos] = $metavalue;
1103    }
1104    elsif (defined $prevmetavalue) {
1105    print STDERR "4\n";
1106        my $array = $doc_rec->{$metaname};
1107        my $length = @$array;
1108
1109        my $found = 0;
1110        for (my $i = 0; $i < $length; $i++){
1111            if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
1112                $doc_rec->{$metaname}->[$i] = $metavalue;
1113                $found = 1;
1114                last;               
1115            }
1116        }
1117
1118        if($found == 0){
1119            $doc_rec->{$metaname} = [ $metavalue ];
1120        }
1121    }
1122    elsif (defined $metamode && $metamode eq "override") {
1123    print STDERR "5\n";
1124    $doc_rec->{$metaname} = [ $metavalue ];
1125    }
1126    else { # default for index was to override, but because accumulate is less destructive,
1127    # and because accumulate is the default for archives and import, that's the new default for index too
1128    print STDERR "6\n";
1129    if(defined $doc_rec->{$metaname}) {
1130        push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
1131    } else {
1132        $doc_rec->{$metaname} = [ $metavalue ];
1133    }
1134    }
1135    print STDERR "6\n";
1136 
1137    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
1138   
1139    return $status;
1140   
1141}
1142
1143sub _set_import_metadata
1144{
1145    my $self = shift @_;
1146
1147    my $collect   = $self->{'collect'};
1148    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1149    my $infodbtype = $self->{'infodbtype'};
1150#    my $gsdlhome  = $self->{'gsdlhome'};
1151     
1152    # Obtain the collect and archive dir   
1153    my $site = $self->{'site'};
1154    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1155    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1156    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1157   
1158    # look up additional args
1159    # want either d= or f=
1160    my $docid  = $self->{'d'};
1161    my ($docid_root,$docid_secnum);
1162    if(defined $docid) {   
1163    ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
1164    # as yet no support for setting subsection metadata in metadata.xml
1165    if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
1166        $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
1167        return;
1168    }
1169    }
1170
1171    my $import_file  = $self->{'f'};
1172    if ((!defined $docid) && (!defined $import_file)) {
1173    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1174    }
1175
1176    # Get the parameters and set default mode to "accumulate"
1177    my $metaname   = $self->{'metaname'};
1178    my $metavalue  = $self->{'metavalue'};
1179##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1180    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1181   
1182    my $metamode   = $self->{'metamode'};
1183    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1184    # make "accumulate" the default (less destructive, as won't actually
1185    # delete any existing values)
1186    $metamode = "accumulate";
1187    }
1188
1189    # adding metapos and prevmetavalue support to import_metadata subroutines
1190    my $metapos   = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1191    my $prevmetavalue = $self->{'prevmetavalue'};
1192
1193    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1194    my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1195
1196    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1197    $mess .= "  $metaname";
1198    $mess .= " = $metavalue";
1199    $mess .= " ($metamode)\n";
1200
1201    $gsdl_cgi->generate_ok_message($mess);
1202
1203    #return $status; # in case calling functions have any further use for this
1204}
1205
1206# the version of set_index_meta that doesn't do authentication
1207sub _set_archives_metadata
1208{
1209    my $self = shift @_;
1210
1211    my $collect   = $self->{'collect'};
1212    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1213    my $infodbtype = $self->{'infodbtype'};
1214   
1215    # Obtain the collect and archive dir   
1216    my $site = $self->{'site'};
1217    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1218    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1219
1220    # look up additional args
1221    my $docid  = $self->{'d'};
1222    my $metaname   = $self->{'metaname'};
1223    my $metavalue  = $self->{'metavalue'};
1224    my $prevmetavalue = $self->{'prevmetavalue'};
1225   
1226    my $metapos    = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1227                                  # Don't append "|| undef", since if metapos=0 it will then be set to undef
1228
1229    my $metamode   = $self->{'metamode'};
1230    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1231    # make "accumulate" the default (less destructive, as won't actually
1232    # delete any existing values)
1233    $metamode = "accumulate";
1234    }
1235
1236    my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1237                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1238   
1239    if ($status == 0) {
1240    my $mess = "set-archives-metadata successful: Key[$docid]\n";
1241    $mess .= "  $metaname";
1242    $mess .= "->[$metapos]" if (defined $metapos);
1243    $mess .= " = $metavalue";
1244    $mess .= " ($metamode)\n";
1245   
1246    $gsdl_cgi->generate_ok_message($mess); 
1247    }
1248    else {
1249    my $mess .= "Failed to set archives metadata key: $docid\n";
1250    $mess .= "Exit status: $status\n";
1251    if(defined $self->{'error_msg'}) {
1252        $mess .= "Error Message: $self->{'error_msg'}\n";
1253    } else {
1254        $mess .= "System Error Message: $!\n";
1255    }
1256    $mess .= "-" x 20 . "\n";
1257   
1258    $gsdl_cgi->generate_error($mess);
1259    }
1260
1261    #return $status; # in case calling functions have any further use for this
1262}
1263
1264
1265# the version of set_index_meta that doesn't do authentication
1266sub _set_index_metadata
1267{
1268    print STDERR "START SET INDEX METADATA\n";
1269    my $self = shift @_;
1270
1271    my $collect   = $self->{'collect'};
1272    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1273
1274    my $site = $self->{'site'};
1275    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1276
1277    # look up additional args
1278    my $docid     = $self->{'d'};
1279    my $metaname  = $self->{'metaname'};
1280    my $metapos   = $self->{'metapos'}; # undef has meaning
1281    my $metavalue = $self->{'metavalue'};
1282    my $infodbtype = $self->{'infodbtype'};
1283    my $metamode  = $self->{'metamode'};
1284    my $prevmetavalue = $self->{'prevmetavalue'};
1285
1286    print STDERR "SETTING INDEX METADATA ENTRY\n";
1287    my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1288    print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
1289    if ($status != 0) {
1290        # Catch error if set infodb entry failed
1291    my $mess = "Failed to set metadata key: $docid\n";
1292   
1293    $mess .= "PATH: $ENV{'PATH'}\n";
1294    $mess .= "Exit status: $status\n";
1295    $mess .= "System Error Message: $!\n";
1296   
1297    $gsdl_cgi->generate_error($mess);
1298    }
1299    else {
1300    my $mess = "set-index-metadata successful: Key[$docid]\n";
1301    $mess .= "  $metaname";
1302    $mess .= "->[$metapos]" if (defined $metapos);
1303    $mess .= " = $metavalue\n";
1304   
1305    $gsdl_cgi->generate_ok_message($mess);
1306    }
1307
1308    print STDERR "END SET INDEX METADATA\n";
1309    #return $status; # in case calling functions have any further use for this
1310}
1311
1312sub set_index_metadata
1313{
1314    my $self = shift @_;
1315
1316    my $username  = $self->{'username'};
1317    my $collect   = $self->{'collect'};
1318    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1319    #my $gsdlhome  = $self->{'gsdlhome'};
1320
1321    if ($baseaction::authentication_enabled) {
1322    # Ensure the user is allowed to edit this collection   
1323    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1324    }
1325
1326    my $site = $self->{'site'};
1327    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1328   
1329    $gsdl_cgi->checked_chdir($collect_dir);
1330
1331    # Obtain the collect dir
1332    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1333
1334    # Make sure the collection isn't locked by someone else
1335    $self->lock_collection($username, $collect);
1336
1337    $self->_set_index_metadata(@_);
1338   
1339    # Release the lock once it is done
1340    $self->unlock_collection($username, $collect);
1341}
1342
1343# call this to set the metadata for a combination of dirs archives, import or index, or live
1344# if none specified, defaults to index which was the original behaviour of set_metadata.
1345sub set_metadata
1346{
1347    my $self = shift @_;
1348
1349    # Testing that not defining a variable, setting it to "" or to "  " all return false
1350    # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
1351
1352    my $where = $self->{'where'};
1353    if(!$where) {   
1354    $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
1355    return;
1356    }
1357
1358    # authenticate and lock collection once, even if processing multiple dirs
1359    my $username  = $self->{'username'};
1360    my $collect   = $self->{'collect'};
1361    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1362   
1363    if ($baseaction::authentication_enabled) {
1364    # Ensure the user is allowed to edit this collection   
1365    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1366    }
1367   
1368    if($where =~ m/index/) {
1369    my $site = $self->{'site'};
1370    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1371    $gsdl_cgi->checked_chdir($collect_dir);
1372    }
1373
1374    # Make sure the collection isn't locked by someone else
1375    $self->lock_collection($username, $collect);
1376
1377
1378    # now at last can set the metadata. $where can specify multiple
1379    # $where is of the form: import|archives|index, or a subset thereof
1380
1381    #my @whichdirs = split('\|', $where);
1382
1383    # just check whether $where contains import/archives/index/live in turn, and
1384    # for each case, process it accordingly
1385    if($where =~ m/import/) {
1386    $self->_set_import_metadata(@_);       
1387    }
1388
1389    if($where =~ m/archives/) {
1390
1391    # look up docID arg which is optional to set_metadata because it's optional
1392    # to set_import, but which is compulsory to set_archives_metadata
1393    my $docid     = $self->{'d'};
1394    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1395        $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1396    }
1397    # we have a docid, so can set archives meta
1398    $self->_set_archives_metadata(@_); 
1399    }
1400
1401    if($where =~ m/index/) {
1402   
1403    # look up docID arg which is optional to set_metadata because it's optional
1404    # to set_import, but which is compulsory to set_archives_metadata
1405    my $docid     = $self->{'d'};
1406    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1407        $gsdl_cgi->generate_error("No docid (d=...) specified.");
1408    }
1409    # we have a docid, so can set index meta
1410    $self->_set_index_metadata(@_);
1411    }   
1412
1413    if($where =~ m/live/) {
1414    $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1415    }
1416
1417    # Release the lock once it is done
1418    $self->unlock_collection($username, $collect);
1419}
1420
1421sub set_metadata_array
1422{
1423    my $self = shift @_;
1424
1425    my $where = $self->{'where'};
1426    if(!$where) {   
1427    $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1428    return;
1429    }
1430
1431    my $username  = $self->{'username'};
1432    my $collect   = $self->{'collect'};
1433    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1434
1435    if ($baseaction::authentication_enabled) {
1436    # Ensure the user is allowed to edit this collection
1437    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1438    }
1439
1440    # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1441    # But including the stmt during this code reorganisation to preserve as-is what used to happen
1442    my $site = $self->{'site'};
1443    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1444    $gsdl_cgi->checked_chdir($collect_dir);
1445
1446    # Make sure the collection isn't locked by someone else
1447    $self->lock_collection($username, $collect);
1448
1449    if($where =~ m/import/) {
1450    $self->_set_import_metadata_array(@_);
1451    }
1452    if($where =~ m/archives/) {
1453    $self->_set_archives_metadata_array(@_);
1454    }
1455    if($where =~ m/index/) {
1456    $self->_set_index_metadata_array(@_);
1457    }
1458    if($where =~ m/live/) {
1459        $self->_set_live_metadata_array(@_);
1460    }
1461
1462    # Release the lock once it is done
1463    $self->unlock_collection($username, $collect);
1464}
1465
1466sub _set_index_metadata_array
1467{
1468    my $self = shift @_;
1469
1470    my $collect   = $self->{'collect'};
1471    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1472    my $site = $self->{'site'};
1473    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1474
1475   
1476    # look up additional args
1477   
1478    my $infodbtype = $self->{'infodbtype'};
1479   
1480    my $json_str      = $self->{'json'};
1481    my $doc_array = decode_json $json_str;
1482   
1483   
1484    my $global_status = 0;
1485    my $global_mess = "";
1486   
1487    my @all_docids = ();
1488   
1489    foreach my $doc_array_rec ( @$doc_array ) {
1490   
1491    my $status = -1;
1492    my $docid     = $doc_array_rec->{'docid'};
1493   
1494    push(@all_docids,$docid);
1495
1496    my $metaname  = $doc_array_rec->{'metaname'};
1497    if(defined $metaname) {
1498        my $metapos   = $doc_array_rec->{'metapos'}; # can legitimately be undef
1499        my $metavalue = $doc_array_rec->{'metavalue'};
1500        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1501
1502        $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1503    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1504        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1505       
1506        foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1507        $metaname  = $metatable_rec->{'metaname'};
1508        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1509        my $metapos = undef;
1510        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1511
1512        foreach my $metavalue ( @$metavals ) { # metavals is an array
1513            $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1514            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1515            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1516            # metavalue for this metaname will be lost
1517            $metamode = "accumulate";
1518            }
1519        }           
1520        }
1521    }
1522   
1523    if ($status != 0) {
1524        # Catch error if set infodb entry failed
1525        $global_status = $status;
1526        $global_mess .= "Failed to set metadata key: $docid\n";
1527        $global_mess .= "Exit status: $status\n";
1528        $global_mess .= "System Error Message: $!\n";
1529        $global_mess .= "-" x 20;
1530    }
1531    }
1532   
1533    if ($global_status != 0) {
1534    $global_mess .= "PATH: $ENV{'PATH'}\n";
1535    $gsdl_cgi->generate_error($global_mess);
1536    }
1537    else {
1538    my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1539    $gsdl_cgi->generate_ok_message($mess);
1540    }
1541}
1542
1543sub set_index_metadata_array
1544{
1545    my $self = shift @_;
1546
1547    my $username  = $self->{'username'};
1548    my $collect   = $self->{'collect'};
1549    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1550#    my $gsdlhome  = $self->{'gsdlhome'};
1551
1552    if ($baseaction::authentication_enabled) {
1553    # Ensure the user is allowed to edit this collection   
1554    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1555    }
1556
1557    my $site = $self->{'site'};
1558    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1559   
1560    $gsdl_cgi->checked_chdir($collect_dir);
1561
1562    # Obtain the collect dir
1563    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1564
1565    # Make sure the collection isn't locked by someone else
1566    $self->lock_collection($username, $collect);
1567
1568    $self->_set_index_metadata_array(@_);
1569
1570    # Release the lock once it is done
1571    $self->unlock_collection($username, $collect);
1572}
1573
1574# experimental, newly added in and untested
1575sub _set_live_metadata_array
1576{
1577    my $self = shift @_;
1578
1579    my $collect   = $self->{'collect'};
1580    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1581
1582    my $site = $self->{'site'};
1583    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1584
1585   
1586    # look up additional args
1587    my $infodbtype = $self->{'infodbtype'};
1588    # To people who know $collect_tail please add some comments
1589    # Obtain path to the database
1590    my $collect_tail = $collect;
1591    $collect_tail =~ s/^.*[\/|\\]//;
1592    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1593    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1594
1595   
1596    my $json_str      = $self->{'json'};
1597    my $doc_array = decode_json $json_str;
1598   
1599   
1600    my $global_status = 0;
1601    my $global_mess = "";
1602   
1603    my @all_docids = ();
1604
1605
1606    foreach my $doc_array_rec ( @$doc_array ) {
1607   
1608    my $status = -1;
1609    my $docid     = $doc_array_rec->{'docid'};
1610
1611    push(@all_docids,$docid);
1612
1613    my $metaname  = $doc_array_rec->{'metaname'};
1614    if(defined $metaname) {
1615        my $dbkey = "$docid.$metaname";
1616        my $metavalue = $doc_array_rec->{'metavalue'};
1617
1618        # Set the new value
1619        my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1620        $status = system($cmd);
1621
1622    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1623        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1624        foreach my $metatable_rec ( @$metatable ) {
1625        $metaname  = $metatable_rec->{'metaname'};
1626        my $dbkey = "$docid.$metaname";
1627
1628        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1629        foreach my $metavalue ( @$metavals ) {
1630             my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1631             $status = system($cmd);
1632        }
1633        }
1634       
1635    }
1636
1637    if ($status != 0) {
1638        # Catch error if gdbmget failed
1639        $global_status = $status;
1640        $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1641        $global_mess .= "Exit status: $status\n";
1642        $global_mess .= "System Error Message: $!\n";
1643        $global_mess .= "-" x 20;
1644    }
1645    }
1646   
1647    if ($global_status != 0) {
1648    $global_mess .= "PATH: $ENV{'PATH'}\n";
1649    $gsdl_cgi->generate_error($global_mess);
1650    }
1651    else {
1652    my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1653    $gsdl_cgi->generate_ok_message($mess);
1654    }
1655}
1656
1657sub set_live_metadata_array
1658{
1659    my $self = shift @_;
1660
1661    my $username  = $self->{'username'};
1662    my $collect   = $self->{'collect'};
1663    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1664
1665    if ($baseaction::authentication_enabled) {
1666    # Ensure the user is allowed to edit this collection   
1667    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1668    }
1669
1670    my $site = $self->{'site'};
1671    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1672   
1673    $gsdl_cgi->checked_chdir($collect_dir);
1674
1675    # Make sure the collection isn't locked by someone else
1676    $self->lock_collection($username, $collect);
1677
1678    $self->_set_live_metadata_array(@_);
1679
1680    # Release the lock once it is done
1681    $self->unlock_collection($username, $collect);
1682}
1683
1684
1685sub dxml_metadata
1686{
1687    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1688    my $metaname = $parser->{'parameters'}->{'metaname'};
1689    my $metamode = $parser->{'parameters'}->{'metamode'};
1690   
1691###!!!    print STDERR "**** Processing closing </Metadata> tag\n";
1692   
1693    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1694   
1695    # Find the right metadata tag and checks if we are going to
1696    # override it
1697    #
1698    # Note: This over writes the first metadata block it
1699    # encountered. If there are multiple Sections in the doc.xml, it
1700    # might not behave as you would expect
1701
1702    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1703##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1704##    print STDERR "**** metamode = $metamode\n";
1705   
1706    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1707    {
1708        my $name_attr = $attrHash->{'name'};
1709        if (($name_attr eq $metaname) && ($metamode eq "override"))
1710        {
1711            if (!defined $parser->{'parameters'}->{'poscount'})
1712            {
1713                $parser->{'parameters'}->{'poscount'} = 0;
1714            }
1715            else
1716            {
1717                $parser->{'parameters'}->{'poscount'}++;
1718            }
1719           
1720            if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1721            {
1722                ##print STDERR "#### got match!!\n";
1723                # Get the value and override the current value
1724                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1725                $attrHash->{'_content'} = $metavalue;
1726               
1727                # Don't want it to wipe out any other pieces of metadata
1728                $parser->{'parameters'}->{'metamode'} = "done";
1729            }
1730            elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1731            {
1732                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1733                $attrHash->{'_content'} = $metavalue;
1734                $parser->{'parameters'}->{'metamode'} = "done";
1735            }
1736        }
1737    }
1738
1739    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1740    return [$tagname => $attrHash];
1741}
1742
1743# This method exists purely for catching invalid section numbers that the client
1744# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1745# we've seen all the Sections in the doc.xml, and none of their section nums matched
1746# if the metamode has not been set to 'done' by then.
1747sub dxml_archive
1748{
1749    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1750    my $metamode = $parser->{'parameters'}->{'metamode'};
1751   
1752    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1753    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1754   
1755#    print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1756   
1757    if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1758    print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1759
1760    my $metaname = $parser->{'parameters'}->{'metaname'};
1761    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1762   
1763    print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1764    print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1765    print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1766    print STDERR "\tin section $opt_doc_secnum.\n";
1767    $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1768    }
1769   
1770    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1771    return [$tagname => $attrHash];
1772}
1773
1774sub dxml_description
1775{
1776    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1777    my $metamode = $parser->{'parameters'}->{'metamode'};
1778
1779    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1780    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1781   
1782###!!!  print STDERR "**** Processing a closing </Description> tag \n";
1783#   print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1784   
1785    # Accumulate the metadata
1786
1787    # We'll be accumulating metadata at this point if we haven't found and therefore
1788    # haven't processed the metadata yet.
1789    # For subsections, this means that if we're at a matching subsection, but haven't
1790    # found the correct metaname to override in that subsection, we accumulate it as new
1791    # meta in the subsection by adding it to the current description.
1792    # If there's no subsection info for the metadata, it will accumulate at the top level
1793    # section description if we hadn't found a matching metaname to override at this point.
1794
1795    # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1796    # at the toplevel section. In the latter case, it means we want to process meta in the
1797    # toplevel section. So the eq check between the values below will work in all cases.
1798   
1799    # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1800    # this document has been provided. In that case, the metadata for that opt_doc_secnum
1801    # won't get attached/accumulated to any part of the doc, not even its top-level section.
1802
1803    if ($curr_secnum eq $opt_doc_secnum
1804        && ($metamode eq "accumulate" || $metamode eq "override")) {
1805        if ($metamode eq "override") {
1806        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
1807        }
1808
1809        # If we get to here and metamode is override, this means there
1810        # was no existing value to overide => treat as an append operation
1811       
1812        # Tack a new metadata tag on to the end of the <Metadata>+ block
1813        my $metaname = $parser->{'parameters'}->{'metaname'};
1814        my $metavalue = $parser->{'parameters'}->{'metavalue'};
1815       
1816        my $metadata_attr = {
1817        '_content' => $metavalue,
1818        'name' => $metaname,
1819        'mode' => "accumulate"
1820        };
1821       
1822        my $append_metadata = [ "Metadata" => $metadata_attr ];
1823        my $description_content = $attrHash->{'_content'};
1824       
1825        print "Appending metadata to doc.xml\n";
1826       
1827        if (ref($description_content)) {
1828        # got some existing interesting nested content
1829        push(@$description_content, "    ", $append_metadata ,"\n        ");
1830        }
1831        else {
1832        #description_content is most likely a string such as "\n"
1833        $attrHash->{'_content'} = [$description_content, "    ", $append_metadata ,"\n" ];
1834        }
1835       
1836        $parser->{'parameters'}->{'metamode'} = "done";
1837    }       
1838    else {
1839        # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1840##      print STDERR "**** NOT ACCUMULATE?!? \n";
1841    }
1842
1843    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1844    return [$tagname => $attrHash];
1845}
1846
1847
1848sub dxml_start_section
1849{
1850    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1851
1852    my $new_depth = scalar(@$contextArray);
1853
1854#   print STDERR "**** START SECTION \n";
1855   
1856    if ($new_depth == 1) {
1857    $parser->{'parameters'}->{'curr_section_depth'} = 1;
1858    $parser->{'parameters'}->{'curr_section_num'}   = "";
1859    }
1860
1861    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
1862    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1863
1864    my $new_secnum;
1865
1866    if ($new_depth > $old_depth) {
1867    # child subsection
1868    $new_secnum = "$old_secnum.1";
1869    }
1870    elsif ($new_depth == $old_depth) {
1871    # sibling section => increase it's value by 1
1872    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1873    $tail_num++;
1874    $new_secnum = $old_secnum;
1875    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1876    }
1877    else {
1878    # back up to parent section => lopp off tail
1879    $new_secnum = $old_secnum;
1880    $new_secnum =~ s/\.\d+$//;
1881    }
1882
1883    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1884    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
1885   
1886    1;
1887}
1888
1889sub edit_xml_file
1890{
1891    my $self = shift @_;
1892    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1893
1894    # use XML::Rules to add it in (read in and out again)
1895    my $parser = XML::Rules->new(start_rules     => $start_rules,
1896                 rules           => $rules,
1897                 style           => 'filter',
1898                                 output_encoding => 'utf8' );
1899
1900    my $xml_in = "";
1901    if (!open(MIN,"<$filename")) {
1902    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1903    }
1904    else {
1905        # Read all the text in
1906    my $line;
1907    while (defined ($line=<MIN>)) {
1908        $xml_in .= $line;
1909    }
1910    close(MIN);
1911   
1912    my $MOUT;   
1913    if (!open($MOUT,">$filename")) {
1914        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1915    }
1916    else {
1917        # Matched lines will get handled by the call backs
1918##      my $xml_out = "";
1919
1920        binmode($MOUT,":utf8");
1921        $parser->filter($xml_in,$MOUT, $options);
1922
1923#       binmode(MOUT,":utf8");
1924#       print MOUT $xml_out;
1925        close($MOUT);       
1926    }
1927    }
1928
1929    # copy across any custom error information that was stored during parsing
1930    $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});   
1931}
1932
1933sub edit_doc_xml
1934{
1935    my $self = shift @_;
1936    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1937
1938    my $info_mess = <<RAWEND;
1939****************************
1940  edit_doc_xml()
1941****************************
1942RAWEND
1943
1944    $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1945    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
1946    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
1947    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
1948    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
1949    $info_mess .= " opt_secnum  = $opt_secnum\n"  if defined($opt_secnum);
1950    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1951     
1952    $info_mess .= "****************************\n";
1953
1954    $gsdl_cgi->generate_message($info_mess);
1955   
1956    # To monitor which section/subsection number we are in
1957    my @start_rules =
1958    ( 'Section'    => \&dxml_start_section );
1959
1960    # use XML::Rules to add it in (read in and out again)
1961    # Set the call back functions
1962    my @rules =
1963    ( _default => 'raw',
1964      'Metadata'    => \&dxml_metadata,
1965      'Description' => \&dxml_description,
1966      'Archive'     => \&dxml_archive); # just for catching errors at end
1967     
1968    # Sets the parameters
1969    my $options = { 'metaname'  => $metaname,
1970            'metapos'   => $metapos,
1971            'metavalue' => $metavalue,
1972            'metamode'  => $metamode,
1973            'prevmetavalue' => $prevmetavalue };
1974           
1975    if (defined $opt_secnum) {
1976    $options->{'secnum'} = $opt_secnum;
1977    }
1978
1979    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1980}
1981
1982sub set_archives_metadata_entry
1983{
1984    my $self = shift @_;
1985    my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1986
1987    my $info_mess = <<RAWEND;
1988****************************
1989  set_archives_metadata_entry()
1990****************************
1991RAWEND
1992
1993    $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1994    $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1995    $info_mess .= " collect     = $collect\n"     if defined($collect);
1996    $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype);
1997    $info_mess .= " docid       = $docid\n"       if defined($docid);
1998    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
1999    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
2000    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
2001    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
2002    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2003     
2004    $info_mess .= "****************************\n";
2005
2006    $gsdl_cgi->generate_message($info_mess);
2007   
2008    # Obtain the doc.xml path for the specified docID
2009    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2010
2011    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2012    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
2013    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
2014   
2015    # The $doc_xml_file is relative to the archives, and now let's get the full path
2016    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
2017    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
2018
2019    # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
2020    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2021    # Thereafter, we will finally be setting the overriding metavalue for this metaname
2022    if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2023    # remove all values of $metaname metadata
2024    $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
2025    }
2026    # Edit the doc.xml file with the specified metadata name, value and position.
2027    # TODO: there is a potential problem here as this edit_doc_xml function
2028    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
2029    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
2030   
2031    # dxml_metadata method ignores metapos if metamode anything other than override
2032    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
2033            $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
2034
2035    # return 0; # return 0 for now to indicate no error
2036    return (defined $self->{'error_msg'}) ? 1 : 0;
2037}
2038
2039
2040sub set_archives_metadata
2041{
2042    my $self = shift @_;
2043
2044    my $username  = $self->{'username'};
2045    my $collect   = $self->{'collect'};
2046    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2047   
2048    if ($baseaction::authentication_enabled) {
2049    # Ensure the user is allowed to edit this collection
2050    $self->authenticate_user($username, $collect);
2051    }
2052
2053    # Make sure the collection isn't locked by someone else
2054    $self->lock_collection($username, $collect);
2055
2056    $self->_set_archives_metadata(@_);
2057
2058    # Release the lock once it is done
2059    $self->unlock_collection($username, $collect);
2060}
2061
2062sub _set_archives_metadata_array
2063{
2064    my $self = shift @_;
2065   
2066    my $collect   = $self->{'collect'};
2067    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2068    my $site = $self->{'site'};
2069    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2070
2071    # look up additional args
2072   
2073    my $infodbtype = $self->{'infodbtype'};
2074   
2075    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2076   
2077    my $json_str      = $self->{'json'};
2078    my $doc_array = decode_json $json_str;
2079   
2080   
2081    my $global_status = 0;
2082    my $global_mess = "";
2083   
2084    my @all_docids = ();
2085   
2086    foreach my $doc_array_rec ( @$doc_array ) {
2087    my $status    = -1;
2088    my $docid     = $doc_array_rec->{'docid'};
2089
2090    push(@all_docids,$docid);
2091   
2092    my $metaname  = $doc_array_rec->{'metaname'};
2093    if(defined $metaname) {
2094       
2095        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2096
2097        my $metamode  = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2098        my $metavalue = $doc_array_rec->{'metavalue'};
2099        my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
2100       
2101       
2102        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2103        # make "accumulate" the default (less destructive, as it won't actually
2104        # delete any existing values)
2105        $metamode = "accumulate";
2106        }       
2107       
2108        $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
2109                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2110    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2111        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2112       
2113        foreach my $metatable_rec ( @$metatable ) {
2114        $metaname  = $metatable_rec->{'metaname'};
2115        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2116        my $metapos = undef;
2117        my $prevmetavalue = undef;
2118        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2119       
2120        foreach my $metavalue ( @$metavals ) {
2121            $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
2122                                 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2123           
2124            if($metamode eq "override") { # now, having overridden the metavalue for the first,
2125            # need to accumulate subsequent metavals for this metaname, else the just-assigned
2126            # metavalue for this metaname will be lost
2127            $metamode = "accumulate";
2128            }
2129        }           
2130        }       
2131    }
2132       
2133    if ($status != 0) {
2134        # Catch error if set infodb entry failed
2135        $global_status = $status;
2136        $global_mess .= "Failed to set metadata key: $docid\n";
2137        $global_mess .= "Exit status: $status\n";
2138        $global_mess .= "System Error Message: $!\n";
2139        $global_mess .= "-" x 20 . "\n";
2140    }
2141    }
2142   
2143    if ($global_status != 0) {
2144    $global_mess .= "PATH: $ENV{'PATH'}\n";
2145    $gsdl_cgi->generate_error($global_mess);
2146    }
2147    else {
2148    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2149    $gsdl_cgi->generate_ok_message($mess);
2150    }
2151}
2152
2153sub set_archives_metadata_array
2154{
2155    my $self = shift @_;
2156
2157    my $username  = $self->{'username'};
2158    my $collect   = $self->{'collect'};
2159    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2160#    my $gsdlhome  = $self->{'gsdlhome'};
2161
2162    if ($baseaction::authentication_enabled) {
2163    # Ensure the user is allowed to edit this collection   
2164    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2165    }
2166
2167    my $site = $self->{'site'};
2168    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2169   
2170    $gsdl_cgi->checked_chdir($collect_dir);
2171
2172    # Obtain the collect dir
2173    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2174
2175    # Make sure the collection isn't locked by someone else
2176    $self->lock_collection($username, $collect);
2177
2178    $self->_set_archives_metadata_array(@_);
2179   
2180    # Release the lock once it is done
2181    $self->unlock_collection($username, $collect);
2182}
2183
2184sub _remove_archives_metadata
2185{
2186    my $self = shift @_;
2187
2188    my $collect   = $self->{'collect'};
2189    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2190#   my $gsdlhome  = $self->{'gsdlhome'};
2191    my $infodbtype = $self->{'infodbtype'};
2192   
2193    my $site = $self->{'site'};
2194       
2195    # Obtain the collect and archive dir   
2196    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2197   
2198    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2199
2200    # look up additional args
2201    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
2202   
2203    my $metaname = $self->{'metaname'};
2204    my $metapos = $self->{'metapos'};
2205
2206    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2207
2208    # if the user hasn't told us what to delete, not having given a metavalue or metapos,
2209    # default to deleting the first metavalue for the given metaname
2210    # Beware that if both metapos AND metavalue are defined, both matches (if any)
2211    # seem to get deleted in one single remove_archives_meta action invocation.
2212    # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
2213    # deleted, both get deleted.
2214    if(!defined $metapos && !defined $metavalue) {
2215        $metapos = 0;
2216    }
2217
2218    my $metamode = $self->{'metamode'} || undef;
2219
2220    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2221    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2222
2223    # This now stores the full pathname
2224    my $doc_filename = $doc_rec->{'doc-file'}->[0];
2225
2226    my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
2227#   my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
2228   
2229    if ($status == 0)
2230    {
2231        my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
2232        $mess .= "  $metaname";
2233        $mess .= "->[$metapos]" if (defined $metapos);
2234        $mess .= " ($metavalue)" if (defined $metavalue);
2235        $gsdl_cgi->generate_ok_message($mess); 
2236    }
2237    else
2238    {
2239        my $mess .= "Failed to remove archives metadata key: $docid\n";
2240        $mess .= "Exit status: $status\n";
2241        $mess .= "System Error Message: $!\n";
2242        $mess .= "-" x 20 . "\n";
2243       
2244        $gsdl_cgi->generate_error($mess);
2245    }
2246   
2247    #return $status; # in case calling functions have a use for this
2248}
2249
2250sub remove_archives_metadata
2251{
2252    my $self = shift @_;
2253
2254    my $username  = $self->{'username'};
2255    my $collect   = $self->{'collect'};
2256    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2257   
2258    if ($baseaction::authentication_enabled)
2259    {
2260        # Ensure the user is allowed to edit this collection       
2261        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2262    }
2263
2264    # Make sure the collection isn't locked by someone else
2265    $self->lock_collection($username, $collect);
2266
2267    $self->_remove_archives_metadata(@_);
2268
2269    # Release the lock once it is done
2270    $self->unlock_collection($username, $collect);
2271}
2272
2273sub remove_from_doc_xml
2274{
2275    my $self = shift @_;
2276    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
2277   
2278    my @start_rules = ('Section' => \&dxml_start_section);
2279   
2280    # Set the call-back functions for the metadata tags
2281    my @rules =
2282    (
2283        _default => 'raw',
2284        'Metadata' => \&rfdxml_metadata
2285    );
2286       
2287    my $parser = XML::Rules->new
2288    (
2289        start_rules => \@start_rules,
2290        rules => \@rules,
2291        style => 'filter',
2292        output_encoding => 'utf8',
2293#    normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2294        stripspaces => 2|0|0 # ineffectual
2295    );
2296   
2297    my $status = 0;
2298    my $xml_in = "";
2299    if (!open(MIN,"<$doc_xml_filename"))
2300    {
2301        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
2302        $status = 1;
2303    }
2304    else
2305    {
2306        # Read them in
2307        my $line;
2308        while (defined ($line=<MIN>)) {
2309            $xml_in .= $line;
2310        }
2311        close(MIN);
2312
2313        # Filter with the call-back functions
2314        my $xml_out = "";
2315
2316        my $MOUT;
2317        if (!open($MOUT,">$doc_xml_filename")) {
2318            $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
2319            $status = 1;
2320        }
2321        else {
2322            binmode($MOUT,":utf8");
2323            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
2324            close($MOUT);       
2325        }
2326    }
2327    return $status;
2328}
2329
2330sub rfdxml_metadata
2331{
2332    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2333
2334    # For comparisons, toplevel section is indicated by ""
2335    my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
2336    my $secid = $parser->{'parameters'}->{'secid'} || "";
2337
2338    if (!($secid eq $curr_sec_num))
2339    {
2340        # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2341        return [$tagname => $attrHash];
2342    }
2343
2344    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2345    {
2346        if (!defined $parser->{'parameters'}->{'poscount'})
2347        {
2348            $parser->{'parameters'}->{'poscount'} = 0;
2349        }
2350        else
2351        {
2352            $parser->{'parameters'}->{'poscount'}++;
2353        }
2354       
2355        # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
2356        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {         
2357            return [];
2358        }
2359
2360        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2361        {   
2362            return [];
2363        }
2364       
2365        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
2366        {   
2367            return [];
2368        }
2369    }
2370   
2371    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2372    return [$tagname => $attrHash];
2373}
2374
2375sub mxml_metadata
2376{
2377    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2378    my $metaname = $parser->{'parameters'}->{'metaname'};
2379    my $metamode = $parser->{'parameters'}->{'metamode'};
2380
2381    # Report error if we don't see FileName tag before this
2382    die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2383   
2384    # Don't do anything if we are not in the right FileSet
2385    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2386    if ($file_regexp =~ /\.\*/) {
2387    # Only interested in a file_regexp if it specifies precisely one
2388    # file. 
2389    # So, skip anything with a .* in it as it is too general
2390##  print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
2391    return [$tagname => $attrHash];
2392    }
2393    my $src_file = $parser->{'parameters'}->{'src_file'};
2394    if (!($src_file =~ /$file_regexp/)) {
2395    return [$tagname => $attrHash];
2396    }
2397##    print STDERR "*** mxl metamode = $metamode\n";
2398
2399    # Find the right metadata tag and checks if we are going to override it
2400    my $name_attr = $attrHash->{'name'};
2401    if (($name_attr eq $metaname) && ($metamode eq "override")) {
2402
2403    # now metadata.xml functions need to keep track of metapos
2404    if (!defined $parser->{'parameters'}->{'poscount'})
2405    {
2406        $parser->{'parameters'}->{'poscount'} = 0;
2407    }
2408    else
2409    {
2410        $parser->{'parameters'}->{'poscount'}++;
2411    }
2412
2413    # If either the metapos or prevmetavalue is set,
2414        # get the value and override the current value
2415    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2416
2417    if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2418    {
2419        $attrHash->{'_content'} = $metavalue;
2420
2421        ##  print STDERR "**** overriding metadata.xml\n";
2422       
2423        # Don't want it to wipe out any other pieces of metadata
2424        $parser->{'parameters'}->{'metamode'} = "done";
2425    }
2426    elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2427    {
2428        $attrHash->{'_content'} = $metavalue;
2429        $parser->{'parameters'}->{'metamode'} = "done";
2430    }
2431    }
2432
2433    # mxml_description will process the metadata if metadata is accumulate,
2434    # or if we haven't found the metadata to override
2435
2436    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2437    return [$tagname => $attrHash];
2438}
2439
2440
2441sub mxml_description
2442{
2443    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2444    my $metamode = $parser->{'parameters'}->{'metamode'};   
2445
2446    # Failed... Report error if we don't see FileName tag before this
2447    die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2448
2449    # Don't do anything if we are not in the right FileSet
2450    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2451    if ($file_regexp =~ m/\.\*/) {
2452    # Only interested in a file_regexp if it specifies precisely one
2453    # file. 
2454    # So, skip anything with a .* in it as it is too general
2455    return [$tagname => $attrHash];
2456    }
2457    my $src_file = $parser->{'parameters'}->{'src_file'};
2458   
2459    if (!($src_file =~ m/$file_regexp/)) {
2460    return [$tagname => $attrHash];
2461    }
2462
2463    # Accumulate the metadata block to the end of the description block
2464    # Note: This adds metadata block to all description blocks, so if there are
2465    # multiple FileSets, it will add to all of them
2466    if (($metamode eq "accumulate") || ($metamode eq "override")) {
2467
2468    # if metamode was "override" but get to here then it failed to
2469    # find an item to override, in which case it should append its
2470    # value to the end, just like the "accumulate" mode
2471
2472    if ($metamode eq "override") {
2473        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2474    }
2475
2476    # tack a new metadata tag on to the end of the <Metadata>+ block
2477    my $metaname = $parser->{'parameters'}->{'metaname'};
2478    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2479   
2480    my $metadata_attr = { '_content' => $metavalue,
2481                  'name'     => $metaname,
2482                  'mode'     => "accumulate" };
2483
2484    my $append_metadata = [ "Metadata" => $metadata_attr ];
2485    my $description_content = $attrHash->{'_content'};
2486   
2487##  print STDERR "*** appending to metadata.xml\n";
2488
2489    # append the new metadata element to the end of the current
2490    # content contained inside this tag
2491    if (ref($description_content) eq "") {
2492        # => string or numeric literal
2493        # this is caused by a <Description> block has no <Metadata> child elements
2494        # => set up an empty array in '_content'
2495        $attrHash->{'_content'} = [ "\n" ];
2496        $description_content = $attrHash->{'_content'};
2497    }
2498
2499    push(@$description_content,"    ", $append_metadata ,"\n        ");
2500    $parser->{'parameters'}->{'metamode'} = "done";
2501    }
2502
2503    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2504    return [$tagname => $attrHash];
2505}
2506
2507
2508sub mxml_filename
2509{
2510    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2511
2512    # Store the filename of the Current Fileset
2513    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2514    # FileName tag must come before Description tag
2515    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2516
2517    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2518    return [$tagname => $attrHash];
2519}
2520
2521
2522sub mxml_fileset
2523{
2524    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2525
2526    # Initilise the current_file
2527    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2528    # FileName tag must come before Description tag
2529    $parser->{'parameters'}->{'current_file'} = "";
2530
2531    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2532    return [$tagname => $attrHash];
2533}
2534
2535sub mxml_directorymetadata
2536{
2537    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2538
2539    # if we haven't processed the metadata when we reach the end of metadata.xml
2540    # it's because there's no particular FileSet element whose FileName matched
2541    # In which case, add a new FileSet for this FileName
2542    my $metamode = $parser->{'parameters'}->{'metamode'};
2543    if($metamode ne "done") {
2544   
2545    if ($metamode eq "override") {
2546        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2547    }
2548
2549    # If we get to here and metamode is override, this means there
2550    # was no existing value to overide => treat as an append operation
2551   
2552    # Create a new FileSet element and append to DirectoryMetadata
2553    # <FileSet>
2554    # <FileName>src_file</FileName>
2555    # <Description>
2556    # <Metadata mode="" name="">metavalue</Metadata>
2557    # </Description>
2558    # </FileSet>
2559    my $src_file = $parser->{'parameters'}->{'src_file'};
2560    my $metaname = $parser->{'parameters'}->{'metaname'};
2561    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2562    my $metadata_attr = {
2563        '_content' => $metavalue,
2564        'name' => $metaname,
2565        'mode' => "accumulate"
2566    };
2567    my $append_metadata = [ "Metadata" => $metadata_attr ];
2568    my $description_attr = { '_content' => [ "\n\t\t   ", $append_metadata, "\n\t\t"] };
2569    my $description_element = [ "Description" => $description_attr ];
2570
2571    #_content is not an attribute, it's special and holds the children of this element
2572    # including the textnode value embedded in this element if any.
2573    my $filename_attr = {'_content' => $src_file};
2574    my $filename_element = [ "FileName" => $filename_attr ];
2575
2576    my $fileset_attr = {};
2577    $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2578    my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2579   
2580   
2581    # get children of dirmeta, and push the new FileSet element onto it
2582    print "Appending metadata to metadata.xml\n";
2583    my $dirmeta_content = $attrHash->{'_content'};
2584    if (ref($dirmeta_content)) {
2585        # got some existing interesting nested content
2586        #push(@$dirmeta_content, "    ", $fileset ,"\n        ");
2587        push(@$dirmeta_content, "\t", $fileset ,"\n");
2588    }
2589    else {
2590        #description_content is most likely a string such as "\n"
2591        #$attrHash->{'_content'} = [$dirmeta_content, "    ", $fileset ,"\n" ];
2592        $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2593    }   
2594
2595    $parser->{'parameters'}->{'metamode'} = "done";
2596    }
2597    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2598    return [$tagname => $attrHash];
2599}
2600
2601
2602sub edit_metadata_xml
2603{
2604    my $self = shift @_;
2605    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2606
2607    # Set the call-back functions for the metadata tags
2608    my @rules =
2609    ( _default => 'raw',
2610          'FileName' => \&mxml_filename,
2611      'Metadata' => \&mxml_metadata,
2612      'Description' => \&mxml_description,
2613          'FileSet' => \&mxml_fileset,
2614      'DirectoryMetadata' => \&mxml_directorymetadata);
2615
2616    # use XML::Rules to add it in (read in and out again)
2617    my $parser = XML::Rules->new(rules => \@rules,
2618                 style => 'filter',
2619                                 output_encoding => 'utf8',
2620                 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2621
2622    if (!-e $metadata_xml_filename) {
2623   
2624        if (open(MOUT,">$metadata_xml_filename")) {
2625           
2626            my $src_file_re = &util::filename_to_regex($src_file);
2627            # shouldn't the following also be in the above utility routine??
2628            # $src_file_re =~ s/\./\\./g;
2629       
2630            print MOUT "<?xml version=\"1.0\"?>\n";
2631            print MOUT "<DirectoryMetadata>\n";
2632            print MOUT " <FileSet>\n";
2633            print MOUT "  <FileName>$src_file_re</FileName>\n";
2634            print MOUT "  <Description>\n";
2635            print MOUT "  </Description>\n";
2636            print MOUT " </FileSet>\n";
2637            print MOUT "</DirectoryMetadata>\n";
2638
2639            close(MOUT);
2640        }
2641        else {
2642            $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2643        }
2644    }
2645   
2646   
2647    my $xml_in = "";
2648    if (!open(MIN,"<$metadata_xml_filename")) {
2649        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2650    }
2651    else {
2652        # Read them in
2653        my $line;
2654        while (defined ($line=<MIN>)) {
2655            $xml_in .= $line;
2656        }
2657        close(MIN);
2658
2659        # Filter with the call-back functions
2660        my $xml_out = "";
2661
2662        my $MOUT;
2663        if (!open($MOUT,">$metadata_xml_filename")) {
2664            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2665        }
2666        else {
2667            binmode($MOUT,":utf8");
2668
2669            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2670            # At the moment, I will just hack it!
2671            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2672            #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2673            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2674            #print MOUT $xml_out;
2675
2676            $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2677                              metapos => $metapos,
2678                      metavalue => $metavalue,
2679                      metamode => $metamode,
2680                      src_file => $src_file,
2681                      prevmetavalue => $prevmetavalue,
2682                      current_file => undef} );
2683            close($MOUT);       
2684        }
2685   }
2686}
2687
2688
2689sub set_import_metadata
2690{
2691    my $self = shift @_;
2692   
2693    my $username  = $self->{'username'};
2694    my $collect   = $self->{'collect'};
2695    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2696   
2697    if ($baseaction::authentication_enabled) {
2698    # Ensure the user is allowed to edit this collection
2699    $self->authenticate_user($username, $collect);
2700    }
2701
2702    # Make sure the collection isn't locked by someone else
2703    $self->lock_collection($username, $collect);
2704 
2705    $self->_set_import_metadata(@_);
2706
2707    # Release the lock once it is done
2708    $self->unlock_collection($username, $collect);
2709   
2710}
2711
2712sub set_import_metadata_array
2713{
2714    my $self = shift @_;
2715
2716    my $username  = $self->{'username'};
2717    my $collect   = $self->{'collect'};
2718    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2719#    my $gsdlhome  = $self->{'gsdlhome'};
2720
2721    if ($baseaction::authentication_enabled) {
2722    # Ensure the user is allowed to edit this collection   
2723    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2724    }
2725
2726    my $site = $self->{'site'};
2727    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2728   
2729    $gsdl_cgi->checked_chdir($collect_dir);
2730
2731    # Make sure the collection isn't locked by someone else
2732    $self->lock_collection($username, $collect);
2733
2734    $self->_set_import_metadata_array(@_);
2735
2736    # Release the lock once it is done
2737    $self->unlock_collection($username, $collect);
2738
2739}
2740
2741
2742sub _set_import_metadata_array
2743{
2744    my $self = shift @_;
2745
2746    my $collect   = $self->{'collect'};
2747    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2748
2749    my $site = $self->{'site'};
2750    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2751   
2752    # look up additional args
2753   
2754    my $infodbtype = $self->{'infodbtype'};
2755   
2756    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");   
2757    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2758   
2759    my $json_str = $self->{'json'};
2760    my $doc_array = decode_json $json_str;
2761   
2762    my $global_status = 0;
2763    my $global_mess = "";
2764   
2765    my @all_docids = ();
2766   
2767    foreach my $doc_array_rec ( @$doc_array )
2768    {
2769    my $status = -1;
2770    my $docid = $doc_array_rec->{'docid'};
2771   
2772    my ($docid_root,$docid_secnum);
2773    if(defined $docid) {   
2774        ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
2775        # as yet no support for setting subsection metadata in metadata.xml
2776        if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2777        $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2778        next; # skip this docid in for loop
2779        }
2780    }
2781
2782    push(@all_docids,$docid); # docid_root rather
2783   
2784    my $metaname = $doc_array_rec->{'metaname'};
2785    if (defined $metaname) {
2786        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2787        my $metavalue = $doc_array_rec->{'metavalue'};
2788        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2789
2790        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2791        # make "accumulate" the default (less destructive, as won't actually
2792        # delete any existing values)
2793        $metamode = "accumulate";
2794        }
2795
2796        # adding metapos and prevmetavalue support to import_metadata subroutines
2797        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2798        my $prevmetavalue = $self->{'prevmetavalue'};
2799
2800        $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2801       
2802    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2803        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2804       
2805        foreach my $metatable_rec ( @$metatable ) {
2806        $metaname  = $metatable_rec->{'metaname'};
2807        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2808        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2809            # make "accumulate" the default (less destructive, as won't actually
2810            # delete any existing values)
2811            $metamode = "accumulate";
2812        }
2813
2814        # No support for metapos and prevmetavalue in the JSON metatable substructure
2815        my $metapos = undef;
2816        my $prevmetavalue = undef;
2817        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2818       
2819        foreach my $metavalue ( @$metavals ) {
2820            $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2821
2822            $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2823            if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2824            # need to accumulate subsequent metavals for this metaname, else the just-assigned
2825            # metavalue for this metaname will be lost
2826            $metamode = "accumulate";
2827            }
2828        }
2829        }
2830    }       
2831    }
2832
2833    # always a success message
2834    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2835    $gsdl_cgi->generate_ok_message($mess);
2836}
2837
2838# always returns true (1)
2839sub set_import_metadata_entry
2840{
2841    my $self = shift @_;
2842    my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2843
2844    my $info_mess = <<RAWEND;
2845****************************
2846  set_import_metadata_entry()
2847****************************
2848RAWEND
2849
2850    $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2851    $info_mess .= " collect     = $collect\n"     if defined($collect);
2852    $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype);
2853    $info_mess .= " arcinfo_doc_filename  = $arcinfo_doc_filename\n"  if defined($arcinfo_doc_filename);
2854    $info_mess .= " docid       = $docid\n"       if defined($docid);
2855    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
2856    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
2857    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
2858    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
2859    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2860     
2861    $info_mess .= "****************************\n";
2862
2863    $gsdl_cgi->generate_message($info_mess);
2864
2865    # import works with metadata.xml which can have inherited metadata
2866    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2867    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2868    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2869    # what they're doing if they provide a metapos.
2870    if(defined $metapos) {
2871    print STDERR "@@@@ WARNING: metapos defined.\n";
2872    print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2873    }
2874
2875    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2876    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2877    my $metadata_xml_file;
2878    my $import_filename = undef;
2879   
2880    if (defined $docid) {
2881    # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2882    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2883
2884    # This now stores the full pathname
2885    $import_filename = $doc_rec->{'src-file'}->[0];
2886    $import_filename = &util::placeholders_to_abspath($import_filename);
2887
2888    } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2889         # as the array version of the method doesn't support the -f parameter yet
2890    my $import_file  = $self->{'f'};
2891    $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2892    }
2893   
2894    # figure out correct metadata.xml file [?]
2895    # Assuming the metadata.xml file is next to the source file
2896    # Note: This will not work if it is using the inherited metadata from the parent folder
2897    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2898    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2899   
2900    # If we're overriding everything, then $prevmetavalue=undefined and
2901    # $metamode=override combined with $metapos=undefined
2902    # in which case we need to remove all metavalues for the metaname at the given (sub)section
2903    # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2904    if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2905##  print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2906    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2907
2908    }
2909
2910    # Edit the metadata.xml
2911    # Modified by Jeffrey from DL Consulting
2912    # Handle the case where there is one metadata.xml file for multiple FileSets
2913    # The XML filter needs to know whether it is in the right FileSet
2914    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2915    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2916    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2917                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2918    #return 0;
2919    return $metadata_xml_filename;
2920}
2921
2922sub _remove_import_metadata
2923{
2924    my $self = shift @_;
2925
2926    my $collect   = $self->{'collect'};
2927    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2928#   my $gsdlhome  = $self->{'gsdlhome'};
2929    my $infodbtype = $self->{'infodbtype'};
2930   
2931    # Obtain the collect dir
2932    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2933    my $site = $self->{'site'};
2934    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2935   
2936    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2937    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2938
2939    # look up additional args
2940    my $docid = $self->{'d'};
2941    if ((!defined $docid) || ($docid =~ m/^\s*$/))
2942    {
2943        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2944    }
2945   
2946    my $metaname = $self->{'metaname'};
2947    my $metapos = $self->{'metapos'};
2948    my $metavalue = $self->{'metavalue'};
2949    if(defined $metavalue) {
2950        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2951    } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2952        $metapos = 0;
2953    }
2954    my $metamode = $self->{'metamode'} || undef;
2955
2956    # import works with metadata.xml which can have inherited metadata
2957    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2958    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2959    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2960    # what they're doing if they provide a metapos.
2961    if(defined $metapos) {
2962        print STDERR "@@@@ WARNING: metapos defined.\n";
2963        print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2964    }
2965   
2966    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2967    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2968    my $metadata_xml_file;
2969    my $import_filename = undef;
2970    if (defined $docid)
2971    {
2972        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2973        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2974
2975        # This now stores the full pathname
2976        $import_filename = $doc_rec->{'src-file'}->[0];
2977        $import_filename = &util::placeholders_to_abspath($import_filename);
2978    }
2979
2980    if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2981    {
2982        $gsdl_cgi->generate_error("There is no metadata\n");
2983    }
2984   
2985    # figure out correct metadata.xml file [?]
2986    # Assuming the metadata.xml file is next to the source file
2987    # Note: This will not work if it is using the inherited metadata from the parent folder
2988    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2989    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2990   
2991    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2992   
2993    my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2994    $mess .= "  $metaname";
2995    $mess .= " = $metavalue\n";
2996   
2997    $gsdl_cgi->generate_ok_message($mess);
2998
2999    #return $status; # in case calling functions have a use for this
3000}
3001
3002sub remove_import_metadata
3003{
3004    my $self = shift @_;
3005   
3006    my $username = $self->{'username'};
3007    my $collect   = $self->{'collect'};
3008    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3009   
3010    if ($baseaction::authentication_enabled) {
3011        # Ensure the user is allowed to edit this collection       
3012        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3013    }
3014
3015    # Make sure the collection isn't locked by someone else
3016    $self->lock_collection($username, $collect);
3017   
3018    $self->_remove_import_metadata(@_);
3019
3020    # Release the lock once it is done
3021    $self->unlock_collection($username, $collect);
3022
3023}
3024
3025sub remove_from_metadata_xml
3026{
3027    my $self = shift @_;
3028    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
3029    # metamode generally has no meaning for removing meta, but is used by set_meta
3030    # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
3031
3032    # Set the call-back functions for the metadata tags
3033    my @rules =
3034    (
3035        _default => 'raw',
3036        'Metadata' => \&rfmxml_metadata,
3037        'FileName' => \&mxml_filename
3038    );
3039       
3040    my $parser = XML::Rules->new
3041    (
3042        rules => \@rules,
3043        style => 'filter',
3044        output_encoding => 'utf8',
3045     #normalisespaces => 1,
3046            stripspaces => 2|0|0 # ineffectual
3047    );
3048   
3049    my $xml_in = "";
3050    if (!open(MIN,"<$metadata_xml_filename"))
3051    {
3052        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
3053    }
3054    else
3055    {
3056        # Read them in
3057        my $line;
3058        while (defined ($line=<MIN>)) {
3059            $xml_in .= $line;
3060        }
3061        close(MIN);
3062
3063        # Filter with the call-back functions
3064        my $xml_out = "";
3065
3066        my $MOUT;
3067        if (!open($MOUT,">$metadata_xml_filename")) {
3068            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
3069        }
3070        else {
3071            binmode($MOUT,":utf8");
3072            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
3073            close($MOUT);       
3074        }
3075    }
3076}
3077
3078sub rfmxml_metadata
3079{
3080    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
3081
3082    # metadata.xml does not handle subsections
3083
3084    # since metadata.xml now has to deal with metapos, we keep track of the metadata position
3085    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
3086        && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
3087    {
3088        if (!defined $parser->{'parameters'}->{'poscount'})
3089        {
3090            $parser->{'parameters'}->{'poscount'} = 0;
3091        }
3092        else
3093        {
3094            $parser->{'parameters'}->{'poscount'}++;
3095        }
3096
3097        # if overriding but no metapos, then clear all the meta for this metaname
3098        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
3099            return [];
3100        }
3101   
3102        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
3103        {
3104            return [];
3105        }
3106        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
3107        {
3108            return [];
3109        }       
3110    }
3111
3112    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
3113    return [$tagname => $attrHash];
3114}
3115
3116sub _remove_live_metadata
3117{
3118    my $self = shift @_;
3119
3120    my $collect   = $self->{'collect'};
3121    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3122#    my $gsdlhome  = $self->{'gsdlhome'};
3123    my $infodbtype = $self->{'infodbtype'};
3124
3125    # Obtain the collect dir
3126    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3127    my $site = $self->{'site'};
3128    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3129
3130   
3131    # look up additional args
3132    my $docid     = $self->{'d'};
3133    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3134      $gsdl_cgi->generate_error("No docid (d=...) specified.");
3135    }
3136   
3137    # Generate the dbkey
3138    my $metaname  = $self->{'metaname'};
3139    my $dbkey = "$docid.$metaname";
3140
3141    # To people who know $collect_tail please add some comments
3142    # Obtain the live gdbm_db path
3143    my $collect_tail = $collect;
3144    $collect_tail =~ s/^.*[\/|\\]//;
3145    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3146    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
3147
3148    # Remove the key
3149    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
3150    my $status = system($cmd);
3151    if ($status != 0) {
3152        # Catch error if gdbmdel failed
3153    my $mess = "Failed to set metadata key: $dbkey\n";
3154   
3155    $mess .= "PATH: $ENV{'PATH'}\n";
3156    $mess .= "cmd = $cmd\n";
3157    $mess .= "Exit status: $status\n";
3158    $mess .= "System Error Message: $!\n";
3159
3160    $gsdl_cgi->generate_error($mess);
3161    }
3162    else {
3163    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
3164    }
3165
3166}
3167
3168sub remove_live_metadata
3169{
3170    my $self = shift @_;
3171
3172    my $username  = $self->{'username'};
3173    my $collect   = $self->{'collect'};
3174    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3175    my $gsdlhome  = $self->{'gsdlhome'};
3176   
3177    if ($baseaction::authentication_enabled) {
3178    # Ensure the user is allowed to edit this collection   
3179    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3180    }
3181
3182    # Make sure the collection isn't locked by someone else
3183    $self->lock_collection($username, $collect);
3184
3185    $self->_remove_live_metadata(@_);
3186
3187    $self->unlock_collection($username, $collect);
3188}
3189
3190sub remove_metadata
3191{
3192    my $self = shift @_;
3193
3194    my $where = $self->{'where'};
3195    if(!$where) {
3196    $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
3197    return;
3198    }
3199
3200    my $username  = $self->{'username'};
3201    my $collect   = $self->{'collect'};
3202    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3203
3204    if ($baseaction::authentication_enabled) {
3205    # Ensure the user is allowed to edit this collection   
3206    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3207    }
3208
3209    # Make sure the collection isn't locked by someone else
3210    $self->lock_collection($username, $collect);
3211
3212    # check which directories need to be processed, specified in $where as
3213    # any combination of import|archives|index|live
3214    if($where =~ m/import/) {
3215    $self->_remove_import_metadata(@_);     
3216    }
3217    if($where =~ m/archives/) {
3218    $self->_remove_archives_metadata(@_);       
3219   }
3220    if($where =~ m/index/) {
3221    $self->_remove_index_metadata(@_);     
3222    }
3223
3224    # Release the lock once it is done
3225    $self->unlock_collection($username, $collect);
3226}
3227
3228# the internal version, without authentication
3229sub _remove_index_metadata
3230{   
3231    my $self = shift @_;
3232
3233    my $collect   = $self->{'collect'};
3234    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3235#    my $gsdlhome  = $self->{'gsdlhome'};
3236    my $infodbtype = $self->{'infodbtype'};
3237   
3238    # Obtain the collect dir
3239    my $site = $self->{'site'};
3240    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3241    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3242
3243        # look up additional args
3244    my $docid     = $self->{'d'};
3245    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3246      $gsdl_cgi->generate_error("No docid (d=...) specified.");
3247    }
3248    my $metaname  = $self->{'metaname'};
3249    my $metapos   = $self->{'metapos'};
3250    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
3251
3252    # To people who know $collect_tail please add some comments
3253    # Obtain the path to the database
3254    my $collect_tail = $collect;
3255    $collect_tail =~ s/^.*[\/|\\]//;
3256    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3257    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3258
3259    # Read the docid entry
3260    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3261
3262    # Check to make sure the key does exist
3263    if (!defined ($doc_rec->{$metaname})) {
3264        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
3265    }
3266
3267    # Obtain the specified metadata pos
3268    # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
3269    if(!defined $metapos && !defined $metavalue) {
3270        $metapos = 0;
3271    }
3272   
3273
3274    # consider check key is defined before deleting?
3275    # Loop through the metadata array and ignore the specified position
3276    my $filtered_metadata = [];
3277    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
3278    for (my $i=0; $i<$num_metadata_vals; $i++) {
3279    my $metaval = shift(@{$doc_rec->{$metaname}});
3280
3281    if (!defined $metavalue && $i != $metapos) {
3282        push(@$filtered_metadata,$metaval);
3283    }
3284   
3285    if(defined $metavalue && !($metavalue eq $metaval))
3286    {
3287        push(@$filtered_metadata,$metaval);
3288    }
3289    }
3290    $doc_rec->{$metaname} = $filtered_metadata;
3291
3292    ## Use the dbutil set_entry method instead of assuming the database is gdbm
3293    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3294
3295    if ($status != 0) {
3296    my $mess = "Failed to set metadata key: $docid\n";
3297   
3298    $mess .= "PATH: $ENV{'PATH'}\n";
3299    $mess .= "Exit status: $status\n";
3300    $mess .= "System Error Message: $!\n";
3301
3302    $gsdl_cgi->generate_error($mess);
3303    }
3304    else {
3305    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
3306    $mess .= "  $metaname";
3307    $mess .= "->[$metapos]" if (defined $metapos);
3308    $mess .= " ($metavalue)" if (defined $metavalue);
3309
3310    $gsdl_cgi->generate_ok_message($mess);
3311    }
3312
3313    #return $status; # in case calling functions have a use for this
3314}
3315
3316sub remove_index_metadata
3317{
3318    my $self = shift @_;
3319
3320    my $username  = $self->{'username'};
3321    my $collect   = $self->{'collect'};
3322    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3323#    my $gsdlhome  = $self->{'gsdlhome'};
3324   
3325    if ($baseaction::authentication_enabled) {
3326    # Ensure the user is allowed to edit this collection   
3327    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3328    }
3329
3330    # Obtain the collect dir
3331    my $site = $self->{'site'};
3332    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3333    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3334
3335    # Make sure the collection isn't locked by someone else
3336    $self->lock_collection($username, $collect);
3337
3338    $self->_remove_index_metadata(@_);
3339
3340    # Release the lock once it is done
3341    $self->unlock_collection($username, $collect);
3342}
3343
3344
3345# Was trying to reused the codes, but the functions need to be broken
3346# down more before they can be reused, otherwise there will be too
3347# much overhead and duplicate process...
3348sub insert_metadata
3349{
3350    my $self = shift @_;
3351   
3352    my $username  = $self->{'username'};
3353    my $collect   = $self->{'collect'};
3354    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3355    my $gsdlhome  = $self->{'gsdlhome'};
3356    my $infodbtype = $self->{'infodbtype'};
3357   
3358    # If the import metadata and gdbm database have been updated, we
3359    # need to insert some notification to warn user that the the text
3360    # they see at the moment is not indexed and require a rebuild.
3361    my $rebuild_pending_macro = "_rebuildpendingmessage_";
3362
3363    if ($baseaction::authentication_enabled) {
3364    # Ensure the user is allowed to edit this collection
3365    $self->authenticate_user($username, $collect);
3366    }
3367
3368    # Obtain the collect and archive dir   
3369    my $site = $self->{'site'};
3370    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3371    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3372    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3373
3374    # Make sure the collection isn't locked by someone else
3375    $self->lock_collection($username, $collect);
3376   
3377    # Check additional args
3378    my $docid = $self->{'d'};
3379    if (!defined($docid)) {
3380    $gsdl_cgi->generate_error("No document id is specified: d=...");
3381    }
3382    my $metaname = $self->{'metaname'};
3383    if (!defined($metaname)) {
3384    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3385    }
3386    my $metavalue = $self->{'metavalue'};
3387    if (!defined($metavalue) || $metavalue eq "") {
3388    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3389    }
3390    # make "accumulate" the default (less destructive, as won't actually
3391    # delete any existing values)
3392    my $metamode = "accumulate";
3393
3394    # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3395    my $metapos   = undef;
3396    my $prevmetavalue = undef;
3397
3398    #=======================================================================#
3399    # set_import_metadata [START]
3400    #=======================================================================#
3401    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3402    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3403    my $metadata_xml_file;
3404    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3405    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3406   
3407    # This now stores the full pathname
3408    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3409    $import_filename = &util::placeholders_to_abspath($import_filename);
3410   
3411    # figure out correct metadata.xml file [?]
3412    # Assuming the metadata.xml file is next to the source file
3413    # Note: This will not work if it is using the inherited metadata from the parent folder
3414    my ($import_tailname, $import_dirname)
3415    = File::Basename::fileparse($import_filename);
3416    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3417
3418    # Shane's escape characters
3419    $metavalue = pack "U0C*", unpack "C*", $metavalue;
3420    $metavalue =~ s/\,/&#44;/g;
3421    $metavalue =~ s/\:/&#58;/g;
3422    $metavalue =~ s/\|/&#124;/g;
3423    $metavalue =~ s/\(/&#40;/g;
3424    $metavalue =~ s/\)/&#41;/g;
3425    $metavalue =~ s/\[/&#91;/g;
3426    $metavalue =~ s/\\/&#92;/g;
3427    $metavalue =~ s/\]/&#93;/g;
3428    $metavalue =~ s/\{/&#123;/g;
3429    $metavalue =~ s/\}/&#125;/g;
3430    $metavalue =~ s/\"/&#34;/g;
3431    $metavalue =~ s/\`/&#96;/g;
3432    $metavalue =~ s/\n/_newline_/g;
3433
3434    # Edit the metadata.xml
3435    # Modified by Jeffrey from DL Consulting
3436    # Handle the case where there is one metadata.xml file for multiple FileSets
3437    # The XML filter needs to know whether it is in the right FileSet
3438    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3439    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3440    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3441                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3442    #=======================================================================#
3443    # set_import_metadata [END]
3444    #=======================================================================#
3445
3446
3447    #=======================================================================#
3448    # set_metadata (accumulate version) [START]
3449    #=======================================================================#
3450    # To people who know $collect_tail please add some comments
3451    # Obtain path to the database
3452    my $collect_tail = $collect;
3453    $collect_tail =~ s/^.*[\/|\\]//;
3454    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3455    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3456
3457    # Read the docid entry
3458    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3459
3460    # Protect the quotes
3461    $metavalue =~ s/\"/\\\"/g;
3462
3463    # Adds the pending macro
3464    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3465
3466    # If the metadata doesn't exist, create a new one
3467    if (!defined($doc_rec->{$metaname})){   
3468    $doc_rec->{$metaname} = [ $macro_metavalue ];
3469    }
3470    # Else, let's acculumate the values
3471    else {
3472        push(@{$doc_rec->{$metaname}},$macro_metavalue);
3473    }
3474
3475    ## Use the dbutil set_entry method instead of assuming the database is gdbm
3476    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3477
3478    if ($status != 0) {
3479        # Catch error if gdbmget failed
3480    my $mess = "Failed to set metadata key: $docid\n";
3481   
3482    $mess .= "PATH: $ENV{'PATH'}\n";
3483    $mess .= "Exit status: $status\n";
3484    $mess .= "System Error Message: $!\n";
3485
3486    $gsdl_cgi->generate_error($mess);
3487    }
3488    else {
3489    my $mess = "insert-metadata successful: Key[$docid]\n";
3490    $mess .= "  [In metadata.xml] $metaname";
3491    $mess .= " = $metavalue\n";
3492    $mess .= "  [In database] $metaname";
3493    $mess .= " = $macro_metavalue\n";
3494    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
3495        $gsdl_cgi->generate_ok_message($mess);
3496    }   
3497    #=======================================================================#
3498    # set_metadata (accumulate version) [END]
3499    #=======================================================================#
3500
3501    # Release the lock once it is done
3502    $self->unlock_collection($username, $collect);
3503}
3504
35051;
Note: See TracBrowser for help on using the browser.