root/main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm @ 29099

Revision 29099, 119.3 KB (checked in by ak19, 5 years ago)

Cosmetic change after more important commit.

Line 
1##########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40    require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47    #GET METHODS
48    "get-import-metadata" => {
49        'compulsory-args' => [ "d", "metaname" ],
50        'optional-args'   => [ "metapos" ] },
51
52    "get-archives-metadata" => {
53        'compulsory-args' => [ "d", "metaname" ],
54        'optional-args'   => [ "metapos" ] },
55   
56    "get-index-metadata" => {
57        'compulsory-args' => [ "d", "metaname" ],
58        'optional-args'   => [ "metapos" ] },
59
60    "get-metadata" => { # alias for get-index-metadata
61        'compulsory-args' => [ "d", "metaname" ],
62        'optional-args'   => [ "metapos" ] },
63
64    "get-live-metadata" => {
65        'compulsory-args' => [ "d", "metaname" ],
66        'optional-args'   => [ ] },
67
68    "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69        'compulsory-args' => [ "json" ],
70        'optional-args'   => [ "where" ],
71        'help-string' => [
72        'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73        ]},
74
75    #SET METHODS
76    "set-live-metadata" => {
77        'compulsory-args' => [ "d", "metaname", "metavalue" ],
78        'optional-args'   => [ ] },
79
80    "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81        'compulsory-args' => [ "metaname", "metavalue" ],
82        'optional-args'   => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84    "set-index-metadata" => {
85        'compulsory-args' => [ "d", "metaname", "metavalue" ],
86        'optional-args'   => [ "metapos", "metamode" ] },
87
88    "set-archives-metadata" => {
89        'compulsory-args' => [ "d", "metaname", "metavalue" ],
90        'optional-args'   => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91   
92    "set-import-metadata" => {
93        'compulsory-args' => [ "metaname", "metavalue" ],
94        'optional-args'   => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
95                 
96    #SET METHODS (ARRAY)
97    "set-metadata-array" => {
98        'compulsory-args' => [ "where", "json" ],
99        'optional-args'   => [ ],
100        'help-string' => [
101        'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102       
103        'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108       
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
110                     
111    "set-archives-metadata-array" => {
112        'compulsory-args' => [ "json" ],
113        'optional-args'   => [ ] },
114       
115    "set-import-metadata-array" => {
116        'compulsory-args' => [ "json" ],
117        'optional-args'   => [ ] },
118
119    "set-index-metadata-array" => {
120        'compulsory-args' => [ "json" ],
121        'optional-args'   => [ ] },
122   
123    "set-live-metadata-array" => {
124        'compulsory-args' => [ "json" ],
125        'optional-args'   => [ ] },
126       
127    #REMOVE METHODS
128    "remove-import-metadata" => {
129        'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
130        'optional-args'   => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
131                     
132    "remove-archives-metadata" => {
133        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
134        'optional-args'   => [ "metapos", "metavalue" ] },
135
136    "remove-live-metadata" => {
137        'compulsory-args' => [ "d", "metaname" ],
138        'optional-args'   => [ ] },
139
140    "remove-index-metadata" => {
141        'compulsory-args' => [ "d", "metaname" ],
142        'optional-args'   => [ "metapos", "metavalue" ] },
143
144    "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
145        'compulsory-args' => [ "d", "metaname" ],
146        'optional-args'   => [ "where", "metapos", "metavalue" ] },
147
148    #INSERT METHODS
149    "insert-metadata" => {
150        'compulsory-args' => [ "d", "metaname", "metavalue" ],
151        'optional-args'   => [ ] }
152};
153
154
155sub new
156{
157    my $class = shift (@_);
158    my ($gsdl_cgi,$iis6_mode) = @_;
159
160    # Treat metavalue specially.  To transmit this through a GET request
161    # the Javascript side has url-encoded it, so here we need to decode
162    # it before proceeding
163
164    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
165    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
166    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
167
168    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
169    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
170
171    # need to do the same with prevmetavalue
172    my $url_encoded_prevmetavalue = $gsdl_cgi->param("prevmetavalue");
173    my $url_decoded_prevmetavalue = &unicode::url_decode($url_encoded_prevmetavalue,1);
174    my $prevunicode_array = &unicode::utf82unicode($url_decoded_prevmetavalue);
175
176    $url_decoded_prevmetavalue = join("",map(chr($_),@$prevunicode_array));
177    $gsdl_cgi->param("prevmetavalue",$url_decoded_prevmetavalue);
178
179    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
180
181    return bless $self, $class;
182}
183
184
185sub get_live_metadata
186{
187    my $self = shift @_;
188
189    my $username  = $self->{'username'};
190    my $collect   = $self->{'collect'};
191    my $gsdl_cgi  = $self->{'gsdl_cgi'};
192    my $gsdlhome  = $self->{'gsdlhome'};
193    my $infodbtype = $self->{'infodbtype'};
194   
195    # live metadata gets/saves value scoped (prefixed) by the current usename
196    # so (for now) let's not bother to enforce authentication
197
198    # Obtain the collect dir
199    my $site = $self->{'site'};
200    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
201    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
202
203    # No locking collection when getting metadata, only when setting it
204#    $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
205
206    # look up additional args
207    my $docid  = $self->{'d'};
208    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
209       $gsdl_cgi->generate_error("No docid (d=...) specified.");
210    }
211
212    # Generate the dbkey
213    my $metaname  = $self->{'metaname'};
214    my $dbkey = "$docid.$metaname";
215
216    # To people who know $collect_tail please add some comments
217    # Obtain path to the database
218    my $collect_tail = $collect;
219    $collect_tail =~ s/^.*[\/|\\]//;
220    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
221    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
222   
223    # Obtain the content of the key
224    my $cmd = "gdbmget $infodb_file_path $dbkey";
225    if (open(GIN,"$cmd |") == 0) {
226        # Catch error if gdbmget failed
227    my $mess = "Failed to get metadata key: $metaname\n";
228    $mess .= "$!\n";
229
230    $gsdl_cgi->generate_error($mess);
231    }
232    else {
233    binmode(GIN,":utf8");
234        # Read everything in and concatenate them into $metavalue
235    my $metavalue = "";
236    my $line;
237    while (defined ($line=<GIN>)) {
238        $metavalue .= $line;
239    }
240    close(GIN);
241    chomp($metavalue); # Get rid off the tailing newlines
242    $gsdl_cgi->generate_ok_message("$metavalue");
243    }
244
245    # Release the lock once it is done
246#    $self->unlock_collection($username, $collect);
247}
248
249# just calls the index version
250sub get_metadata
251{
252    my $self = shift @_;
253    $self->get_index_metadata(@_);
254}
255
256# JSON version that will get the requested metadata values
257# from the requested source (index, import, archives or live)
258# One of the params is a JSON string and the return value is JSON too
259# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
260sub get_metadata_array
261{
262    my $self = shift @_;
263
264    my $where = $self->{'where'};
265    if (!$where) { 
266    $where = "index"; # default behaviour is to get the values from index
267    }
268
269    # Only when setting metadata do we perform authentication and do we lock the collection,
270    # not when getting metadata
271
272    # for get_meta_array, the where param can only be ONE of import, archives, index, live
273    if($where =~ m/index/) {
274    $self->_get_index_metadata_array(@_);
275    }
276    elsif($where =~ m/archives/) {
277    $self->_get_archives_metadata_array(@_);
278    }
279    elsif($where =~ m/import/) {
280    $self->_get_import_metadata_array(@_);
281    }
282    elsif($where =~ m/live/) {
283        $self->_get_live_metadata_array(@_);
284    }
285}
286
287# Unused at present. Added for completion. Tested.
288sub _get_import_metadata_array {
289   
290    my $self = shift @_;
291
292    my $collect   = $self->{'collect'};
293    my $gsdl_cgi  = $self->{'gsdl_cgi'};
294    my $site = $self->{'site'};
295    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
296   
297    # look up additional args
298    my $infodbtype = $self->{'infodbtype'};
299   
300    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
301    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
302    my $json_str      = $self->{'json'};
303    my $doc_array = decode_json $json_str;
304
305    my $json_result_str = "[";
306    my $first_doc_rec = 1;
307    foreach my $doc_array_rec ( @$doc_array ) {
308   
309    my $docid = $doc_array_rec->{'docid'}; # no subsection metadata support in metadata.xml, only toplevel meta
310   
311    if($first_doc_rec) {
312        $first_doc_rec = 0;
313    } else {
314        $json_result_str .= ",";
315    }
316    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
317
318    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
319    $json_result_str = $json_result_str . ",\"metatable\":[";
320
321    my $first_rec = 1;
322    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
323
324        # Read the docid entry     
325        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
326        # This now stores the full pathname
327        my $import_filename = $doc_rec->{'src-file'}->[0];
328        $import_filename = &util::placeholders_to_abspath($import_filename);
329
330        # figure out correct metadata.xml file [?]
331        # Assuming the metadata.xml file is next to the source file
332        # Note: This will not work if it is using the inherited metadata from the parent folder
333        my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
334        my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
335
336
337        if($first_rec) {
338        $first_rec = 0;
339        } else {
340        $json_result_str .= ",";       
341        }
342       
343        my $metaname  = $metatable_rec->{'metaname'};
344        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
345
346        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
347        if(!defined $metapos) {
348        $metapos = 0;
349        }
350
351        # Obtain the specified metadata value(s)
352        my $metavalue;
353
354        if($metapos ne "all") { # get the value at a single metapos
355        $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
356
357        #print STDERR "**** Metafilename, metaname, metapos, sec_num: $metadata_xml_filename, $metaname, $metapos, $import_tailname\n";
358       
359        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
360
361        } else {
362        my $first_metaval = 1;
363        $metapos = 0;
364        $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
365
366        while (defined $metavalue && $metavalue ne "") {
367            if($first_metaval) {       
368            $first_metaval = 0;
369            } else {
370            $json_result_str .= ",";
371            }
372       
373            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
374
375            $metapos++;
376            $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
377        }
378        }
379
380        $json_result_str .= "]}"; # close metavals array and metatable record
381    }
382   
383    $json_result_str .= "]}"; # close metatable array and docid record
384    }
385
386    $json_result_str .= "]"; # close array of docids
387    $gsdl_cgi->generate_ok_message($json_result_str."\n");
388}
389
390# Unused method, but included for completion. Tested, works. Takes a JSON string and returns a JSON string.
391# For more information on the format of the output, see _get_index_metadata_array, which is in use.
392sub _get_archives_metadata_array {
393
394    my $self = shift @_;
395
396    my $collect   = $self->{'collect'};
397    my $gsdl_cgi  = $self->{'gsdl_cgi'};
398    my $site = $self->{'site'};
399    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
400
401    # look up additional args   
402    my $infodbtype = $self->{'infodbtype'};
403
404    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
405    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
406
407    my $json_str      = $self->{'json'};
408    my $doc_array = decode_json $json_str;
409
410    my $json_result_str = "[";
411    my $first_doc_rec = 1;
412    foreach my $doc_array_rec ( @$doc_array ) {
413   
414    my $docid     = $doc_array_rec->{'docid'};
415   
416    if($first_doc_rec) {
417        $first_doc_rec = 0;
418    } else {
419        $json_result_str .= ",";
420    }
421    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
422
423    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
424    $json_result_str = $json_result_str . ",\"metatable\":[";
425
426    my $first_rec = 1;
427    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
428
429        my ($docid, $docid_secnum) = ($doc_array_rec->{'docid'} =~ m/^(.*?)(\..*)?$/);
430        $docid_secnum = "" if (!defined $docid_secnum);
431
432        # Read the docid entry     
433        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
434        # This now stores the full pathname
435        my $doc_filename = $doc_rec->{'doc-file'}->[0];
436        $doc_filename = &util::filename_cat($archive_dir, $doc_filename);
437
438        if($first_rec) {
439        $first_rec = 0;
440        } else {
441        $json_result_str .= ",";       
442        }
443       
444        my $metaname  = $metatable_rec->{'metaname'};
445        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
446
447        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
448        if(!defined $metapos) {
449        $metapos = 0;
450        }
451
452
453        # Obtain the specified metadata value(s)
454        my $metavalue;
455
456        if($metapos ne "all") { # get the value at a single metapos
457
458        $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
459        #print STDERR "**** Docname, metaname, metapos, sec_num: $doc_filename, $metaname, $metapos, $docid_secnum\n";
460       
461        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
462
463        } else {
464        my $first_metaval = 1;
465        $metapos = 0;
466        $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
467
468        while (defined $metavalue && $metavalue ne "") {
469            if($first_metaval) {       
470            $first_metaval = 0;
471            } else {
472            $json_result_str .= ",";
473            }
474       
475            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
476
477            $metapos++;
478            $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
479        }
480        }
481
482        $json_result_str .= "]}"; # close metavals array and metatable record
483    }
484   
485    $json_result_str .= "]}"; # close metatable array and docid record
486    }
487
488    $json_result_str .= "]"; # close array of docids
489    $gsdl_cgi->generate_ok_message($json_result_str."\n");
490}
491
492
493# Unused at present. Added for completion. Tested, but not sure if it retrieves metadata in the manner it's expected to.
494sub _get_live_metadata_array
495{
496    my $self = shift @_;
497
498    my $collect   = $self->{'collect'};
499    my $gsdl_cgi  = $self->{'gsdl_cgi'};
500    my $site = $self->{'site'};
501    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
502
503    # look up additional args   
504    my $infodbtype = $self->{'infodbtype'};
505   
506    # To people who know $collect_tail please add some comments
507    # Obtain the path to the database
508    my $collect_tail = $collect;
509    $collect_tail =~ s/^.*[\/|\\]//;
510    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
511    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
512
513    my $json_str      = $self->{'json'};
514    my $doc_array = decode_json $json_str;
515
516    my $json_result_str = "[";
517    my $first_doc_rec = 1;
518
519    foreach my $doc_array_rec ( @$doc_array ) {
520   
521    my $docid     = $doc_array_rec->{'docid'};
522   
523    if($first_doc_rec) {
524        $first_doc_rec = 0;
525    } else {
526        $json_result_str .= ",";
527    }
528    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
529   
530    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
531    $json_result_str = $json_result_str . ",\"metatable\":[";
532   
533    my $first_rec = 1;
534    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
535        if($first_rec) {
536        $first_rec = 0;
537        } else {
538        $json_result_str .= ",";       
539        }
540       
541        my $metaname  = $metatable_rec->{'metaname'};
542        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
543       
544        # Generate the dbkey
545        my $dbkey = "$docid.$metaname";
546       
547        # metapos for get_live_metadata is always assumed to be "all".
548        # It's always going to get all the lines of metavalues associated with a metaname
549        # (It's the metaname itself that should contain an increment number, if there are to be multiple values).
550        #my $metapos = "all";
551        my $metapos = $metatable_rec->{'metapos'} || 0; # Can be 0... 1|all|undefined. Defaults to 0 if undefined/false
552        my $metavalue = "";
553       
554        # Obtain the content of the key
555        my $cmd = "gdbmget $infodb_file_path $dbkey";
556        if (open(GIN,"$cmd |") != 0) { # Success.
557       
558        binmode(GIN,":utf8");
559        # Read everything in and concatenate them into $metavalue       
560        my $line;
561        my $first_metaval = 1;
562        my $pos = 0;
563        while (defined ($line=<GIN>)) {
564            chomp($line); # Get rid off the tailing newlines
565           
566            if($metapos eq "all") {
567            if($first_metaval) {       
568                $first_metaval = 0;
569            } else {
570                $json_result_str .= ",";
571            }           
572            $metavalue = $line;
573            $json_result_str .= "{\"metapos\":\"$pos\",\"metavalue\":\"$metavalue\"}";
574            } elsif($metapos == $pos) {
575            $metavalue = $line;
576            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
577            last;
578            } # else, the current $pos is not the required $metapos
579            $pos += 1;
580        }
581        close(GIN);
582        } # else open cmd == 0 (failed) and metavals array will be empty [] for this metaname
583       
584        $json_result_str .= "]}"; # close metavals array and metatable record
585    }
586   
587    $json_result_str .= "]}"; # close metatable array and docid record
588    }
589
590    $json_result_str .= "]"; # close array of docids
591   
592    $gsdl_cgi->generate_ok_message($json_result_str."\n");   
593}
594
595
596# Takes a JSON string and returns a JSON string
597# Request string is of the form:
598# http://localhost:8283/greenstone/cgi-bin/metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]
599# Resulting string is of the form:
600# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
601sub _get_index_metadata_array
602{
603    my $self = shift @_;
604
605    my $collect   = $self->{'collect'};
606    my $gsdl_cgi  = $self->{'gsdl_cgi'};
607    my $site = $self->{'site'};
608    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
609
610    # look up additional args   
611    my $infodbtype = $self->{'infodbtype'};
612   
613    # To people who know $collect_tail please add some comments
614    # Obtain the path to the database
615    my $collect_tail = $collect;
616    $collect_tail =~ s/^.*[\/|\\]//;
617    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
618    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
619
620    my $json_str      = $self->{'json'};
621    my $doc_array = decode_json $json_str;
622
623    my $json_result_str = "[";
624    my $first_doc_rec = 1;
625
626    foreach my $doc_array_rec ( @$doc_array ) {
627   
628    my $docid     = $doc_array_rec->{'docid'};
629   
630    if($first_doc_rec) {
631        $first_doc_rec = 0;
632    } else {
633        $json_result_str .= ",";
634    }
635    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
636
637    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
638    $json_result_str = $json_result_str . ",\"metatable\":[";
639
640    my $first_rec = 1;
641    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
642        if($first_rec) {
643        $first_rec = 0;
644        } else {
645        $json_result_str .= ",";       
646        }
647       
648        my $metaname  = $metatable_rec->{'metaname'};
649        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
650
651        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
652        if(!defined $metapos) {
653        $metapos = 0;
654        }
655
656         # Read the docid entry
657        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
658 
659        # Basically loop through and unescape_html the values
660        foreach my $k (keys %$doc_rec) {
661        my @escaped_v = ();
662        foreach my $v (@{$doc_rec->{$k}}) {
663            my $ev = &ghtml::unescape_html($v);
664            push(@escaped_v, $ev);
665        }
666        $doc_rec->{$k} = \@escaped_v;
667        }
668
669        # Obtain the specified metadata value(s)
670        my $metavalue;
671
672        if($metapos ne "all") { # get the value at a single metapos
673
674        $metavalue = $doc_rec->{$metaname}->[$metapos];
675
676        # protect any double quotes and colons in the metavalue before putting it into JSON
677        $metavalue =~ s/\"/&quot;/g if defined $metavalue;
678        $metavalue =~ s/\:/&58;/g if defined $metavalue;
679
680        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
681
682        } else {
683        my $first_metaval = 1;
684        $metapos = 0;
685        $metavalue = $doc_rec->{$metaname}->[$metapos];
686
687        while (defined $metavalue) {
688
689            # protect any double quotes and colons in the metavalue before putting it into JSON
690            $metavalue =~ s/\"/&quot;/g;
691            $metavalue =~ s/\:/&58;/g;
692
693            if($first_metaval) {       
694            $first_metaval = 0;
695            } else {
696            $json_result_str .= ",";
697            }
698       
699            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
700
701            $metapos++;
702            $metavalue = $doc_rec->{$metaname}->[$metapos];
703        }
704        }
705
706        $json_result_str .= "]}"; # close metavals array and metatable record
707    }
708   
709    $json_result_str .= "]}"; # close metatable array and docid record
710    }
711
712    $json_result_str .= "]"; # close array of docids
713
714    $gsdl_cgi->generate_ok_message($json_result_str."\n");   
715}
716
717
718sub get_index_metadata
719{
720    my $self = shift @_;
721
722    my $username  = $self->{'username'};
723    my $collect   = $self->{'collect'};
724    my $gsdl_cgi  = $self->{'gsdl_cgi'};
725    my $gsdlhome  = $self->{'gsdlhome'};
726
727    # Obtain the collect dir
728    my $site = $self->{'site'};
729    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
730    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
731
732    # look up additional args
733    my $docid     = $self->{'d'};
734    my $metaname  = $self->{'metaname'};
735    my $metapos   = $self->{'metapos'};
736    my $infodbtype = $self->{'infodbtype'};
737
738    # To people who know $collect_tail please add some comments
739    # Obtain path to the database
740    my $collect_tail = $collect;
741    $collect_tail =~ s/^.*[\/|\\]//;
742    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
743    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
744
745    # Read the docid entry
746    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
747 
748    # Basically loop through and unescape_html the values
749    foreach my $k (keys %$doc_rec) {
750    my @escaped_v = ();
751    foreach my $v (@{$doc_rec->{$k}}) {
752        my $ev = &ghtml::unescape_html($v);
753        push(@escaped_v, $ev);
754    }
755    $doc_rec->{$k} = \@escaped_v;
756    }
757
758    # Obtain the specified metadata value
759    $metapos = 0 if (!defined $metapos);
760    my $metavalue = $doc_rec->{$metaname}->[$metapos];
761    $gsdl_cgi->generate_ok_message("$metavalue");
762   
763}
764
765
766sub get_import_metadata
767{
768    my $self = shift @_;
769
770    my $username  = $self->{'username'};
771    my $collect   = $self->{'collect'};
772    my $gsdl_cgi  = $self->{'gsdl_cgi'};
773    my $gsdlhome  = $self->{'gsdlhome'};
774
775    # Obtain the collect dir
776    my $site = $self->{'site'};
777    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
778    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
779
780    # look up additional args
781    my $docid     = $self->{'d'};
782    my $metaname  = $self->{'metaname'};
783    my $metapos = $self->{'metapos'};
784    $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
785
786    my $infodbtype = $self->{'infodbtype'};
787    if (!defined $docid)
788    {
789        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
790    }
791
792    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
793    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
794    my $metadata_xml_file;
795    my $import_filename = undef;
796   
797
798    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
799    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
800    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
801
802    # This now stores the full pathname
803    $import_filename = $doc_rec->{'src-file'}->[0];
804    $import_filename = &util::placeholders_to_abspath($import_filename);
805
806    # figure out correct metadata.xml file [?]
807    # Assuming the metadata.xml file is next to the source file
808    # Note: This will not work if it is using the inherited metadata from the parent folder
809    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
810    my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
811
812    $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
813
814}
815
816sub get_metadata_from_metadata_xml
817{
818    my $self = shift @_;
819    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
820   
821    my @rules =
822    (
823        _default => 'raw',
824        'Metadata' => \&gfmxml_metadata,
825        'FileName' => \&mxml_filename
826    );
827       
828    my $parser = XML::Rules->new
829    (
830        rules => \@rules,
831        output_encoding => 'utf8'
832    );
833   
834    my $xml_in = "";
835    if (!open(MIN,"<$metadata_xml_filename"))
836    {
837        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
838    }
839    else
840    {
841        # Read them in
842        my $line;
843        while (defined ($line=<MIN>)) {
844            $xml_in .= $line;
845        }
846        close(MIN);
847
848        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
849       
850        if(defined $parser->{'pad'}->{'metavalue'})
851        {
852            return $parser->{'pad'}->{'metavalue'};
853        }
854        else
855        {
856            return "";
857        }
858    }
859}
860
861sub gfmxml_metadata
862{
863    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
864
865    # no subsection support yet in metadata.xml
866
867    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
868    {
869        if (!defined $parser->{'parameters'}->{'poscount'})
870        {
871            $parser->{'parameters'}->{'poscount'} = 0;
872        }
873        else
874        {
875            $parser->{'parameters'}->{'poscount'}++;
876        }
877   
878        # gets the first value by default, since metapos defaults to 0
879        if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
880        {
881            if($parser->{'parameters'}->{'metapos'} > 0) {
882            print STDERR "@@@@ WARNING: non-zero metapos.\n";
883            print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at position: ".$parser->{'parameters'}->{'metapos'}.".\n";
884            }
885            $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
886        }
887    }
888}
889
890sub get_archives_metadata
891{
892    my $self = shift @_;
893
894    my $username  = $self->{'username'};
895    my $collect   = $self->{'collect'};
896    my $gsdl_cgi  = $self->{'gsdl_cgi'};
897#   my $gsdlhome  = $self->{'gsdlhome'};
898    my $infodbtype = $self->{'infodbtype'};
899
900    # Obtain the collect dir
901    my $site = $self->{'site'};
902    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
903   
904    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
905
906    # look up additional args
907    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
908    $docid_secnum = "" if (!defined $docid_secnum);
909   
910    my $metaname = $self->{'metaname'};
911    my $metapos = $self->{'metapos'};
912    $metapos = 0 if (!defined $metapos);
913   
914    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
915    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
916
917    # This now stores the full pathname
918    my $doc_filename = $doc_rec->{'doc-file'}->[0];
919
920    $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
921
922}
923
924sub get_metadata_from_archive_xml
925{
926    my $self = shift @_;
927    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
928   
929    my @start_rules = ('Section' => \&dxml_start_section);
930   
931    my @rules =
932    (
933        _default => 'raw',
934        'Metadata' => \&gfdxml_metadata
935    );
936       
937    my $parser = XML::Rules->new
938    (
939        start_rules => \@start_rules,
940        rules => \@rules,
941        output_encoding => 'utf8'
942    );
943   
944    my $xml_in = "";
945    if (!open(MIN,"<$doc_xml_filename"))
946    {
947        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
948    }
949    else
950    {
951        # Read them in
952        my $line;
953        while (defined ($line=<MIN>)) {
954            $xml_in .= $line;
955        }
956        close(MIN);
957
958        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
959       
960        if(defined $parser->{'pad'}->{'metavalue'})
961        {
962            return $parser->{'pad'}->{'metavalue'};
963        }
964        else
965        {
966            return "";
967        }
968    }
969}
970
971sub gfdxml_metadata
972{
973    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
974   
975    if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
976    {
977        return;
978    }
979
980    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
981    {
982        if (!defined $parser->{'parameters'}->{'poscount'})
983        {
984            $parser->{'parameters'}->{'poscount'} = 0;
985        }
986        else
987        {
988            $parser->{'parameters'}->{'poscount'}++;
989        }
990    }
991
992    if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
993    {   
994        $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
995    }
996}
997
998sub _set_live_metadata
999{
1000    my $self = shift @_;
1001
1002    my $collect   = $self->{'collect'};
1003    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1004    #my $gsdlhome  = $self->{'gsdlhome'};
1005    my $infodbtype = $self->{'infodbtype'};
1006
1007    # Obtain the collect dir
1008    my $site = $self->{'site'};
1009    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1010    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1011
1012
1013    # look up additional args
1014    my $docid     = $self->{'d'};
1015    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1016      $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1017    }
1018    my $metavalue = $self->{'metavalue'};
1019
1020    # Generate the dbkey   
1021    my $metaname  = $self->{'metaname'};
1022    my $dbkey = "$docid.$metaname";
1023
1024    # To people who know $collect_tail please add some comments
1025    # Obtain path to the database
1026    my $collect_tail = $collect;
1027    $collect_tail =~ s/^.*[\/|\\]//;
1028    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1029    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1030
1031    # Set the new value
1032    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1033    my $status = system($cmd);
1034    if ($status != 0) {
1035        # Catch error if gdbmget failed
1036    my $mess = "Failed to set metadata key: $dbkey\n";
1037
1038    $mess .= "PATH: $ENV{'PATH'}\n";
1039    $mess .= "cmd = $cmd\n";
1040    $mess .= "Exit status: $status\n";
1041    $mess .= "System Error Message: $!\n";
1042
1043    $gsdl_cgi->generate_error($mess);
1044    }
1045    else {
1046    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
1047    }
1048
1049    #return $status; # in case calling functions have any further use for this
1050}
1051
1052sub set_live_metadata
1053{
1054    my $self = shift @_;
1055
1056    my $username  = $self->{'username'};
1057    my $collect   = $self->{'collect'};
1058    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1059 
1060    if ($baseaction::authentication_enabled) {
1061    # Ensure the user is allowed to edit this collection   
1062    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1063    }
1064
1065    # Make sure the collection isn't locked by someone else
1066    $self->lock_collection($username, $collect);
1067
1068    $self->_set_live_metadata(@_);
1069
1070    # Release the lock once it is done
1071    $self->unlock_collection($username, $collect);
1072}
1073
1074sub set_index_metadata_entry
1075{
1076    print STDERR "1\n";
1077    my $self = shift @_;
1078    my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
1079   
1080    # To people who know $collect_tail please add some comments
1081    # Obtain path to the database
1082    my $collect_tail = $collect;
1083    $collect_tail =~ s/^.*[\/|\\]//;
1084    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1085    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1086   
1087    print STDERR "2\n";
1088#   print STDERR "**** infodb file path = $infodb_file_path\n";
1089#   print STDERR "***** infodb type = $infodbtype\n";
1090   
1091    # Read the docid entry
1092    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1093   
1094    # Set the metadata value
1095    if (defined $metapos) {
1096    print STDERR "3\n";
1097    # if metamode=accumulate AND metapos, warn user and then use metapos
1098    if (defined $metamode && $metamode eq "accumulate") {
1099        print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
1100        print STDERR "**** Proceeding by using metapos\n";
1101    }
1102    $doc_rec->{$metaname}->[$metapos] = $metavalue;
1103    }
1104    elsif (defined $prevmetavalue) {
1105    print STDERR "4\n";
1106        my $array = $doc_rec->{$metaname};
1107        my $length = @$array;
1108
1109        my $found = 0;
1110        for (my $i = 0; $i < $length; $i++){
1111            if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
1112                $doc_rec->{$metaname}->[$i] = $metavalue;
1113                $found = 1;
1114                last;               
1115            }
1116        }
1117
1118        if($found == 0){
1119            $doc_rec->{$metaname} = [ $metavalue ];
1120        }
1121    }
1122    elsif (defined $metamode && $metamode eq "override") {
1123    print STDERR "5\n";
1124    $doc_rec->{$metaname} = [ $metavalue ];
1125    }
1126    else { # default for index was to override, but because accumulate is less destructive,
1127    # and because accumulate is the default for archives and import, that's the new default for index too
1128    print STDERR "6\n";
1129    if(defined $doc_rec->{$metaname}) {
1130        push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
1131    } else {
1132        $doc_rec->{$metaname} = [ $metavalue ];
1133    }
1134    }
1135    print STDERR "6\n";
1136 
1137    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
1138   
1139    return $status;
1140   
1141}
1142
1143sub _set_import_metadata
1144{
1145    my $self = shift @_;
1146
1147    my $collect   = $self->{'collect'};
1148    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1149    my $infodbtype = $self->{'infodbtype'};
1150#    my $gsdlhome  = $self->{'gsdlhome'};
1151     
1152    # Obtain the collect and archive dir   
1153    my $site = $self->{'site'};
1154    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1155    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1156    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1157   
1158    # look up additional args
1159    # want either d= or f=
1160    my $docid  = $self->{'d'};
1161    my ($docid_root,$docid_secnum);
1162    if(defined $docid) {   
1163    ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
1164    # as yet no support for setting subsection metadata in metadata.xml
1165    if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
1166        $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
1167        return;
1168    }
1169    }
1170
1171    my $import_file  = $self->{'f'};
1172    if ((!defined $docid) && (!defined $import_file)) {
1173    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1174    }
1175
1176    # Get the parameters and set default mode to "accumulate"
1177    my $metaname   = $self->{'metaname'};
1178    my $metavalue  = $self->{'metavalue'};
1179##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1180    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1181   
1182    my $metamode   = $self->{'metamode'};
1183    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1184    # make "accumulate" the default (less destructive, as won't actually
1185    # delete any existing values)
1186    $metamode = "accumulate";
1187    }
1188
1189    # adding metapos and prevmetavalue support to import_metadata subroutines
1190    my $metapos   = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1191    my $prevmetavalue = $self->{'prevmetavalue'};
1192
1193    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1194    my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1195
1196    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1197    $mess .= "  $metaname";
1198    $mess .= " = $metavalue";
1199    $mess .= " ($metamode)\n";
1200
1201    $gsdl_cgi->generate_ok_message($mess);
1202
1203    #return $status; # in case calling functions have any further use for this
1204}
1205
1206# the version of set_index_meta that doesn't do authentication
1207sub _set_archives_metadata
1208{
1209    my $self = shift @_;
1210
1211    my $collect   = $self->{'collect'};
1212    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1213    my $infodbtype = $self->{'infodbtype'};
1214   
1215    # Obtain the collect and archive dir   
1216    my $site = $self->{'site'};
1217    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1218    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1219
1220    # look up additional args
1221    my $docid  = $self->{'d'};
1222    my $metaname   = $self->{'metaname'};
1223    my $metavalue  = $self->{'metavalue'};
1224    my $prevmetavalue = $self->{'prevmetavalue'};
1225   
1226    my $metapos    = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1227                                  # Don't append "|| undef", since if metapos=0 it will then be set to undef
1228
1229    my $metamode   = $self->{'metamode'};
1230    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1231    # make "accumulate" the default (less destructive, as won't actually
1232    # delete any existing values)
1233    $metamode = "accumulate";
1234    }
1235
1236    my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1237                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1238   
1239    if ($status == 0) {
1240    my $mess = "set-archives-metadata successful: Key[$docid]\n";
1241    $mess .= "  $metaname";
1242    $mess .= "->[$metapos]" if (defined $metapos);
1243    $mess .= " = $metavalue";
1244    $mess .= " ($metamode)\n";
1245   
1246    $gsdl_cgi->generate_ok_message($mess); 
1247    }
1248    else {
1249    my $mess .= "Failed to set archives metadata key: $docid\n";
1250    $mess .= "Exit status: $status\n";
1251    if(defined $self->{'error_msg'}) {
1252        $mess .= "Error Message: $self->{'error_msg'}\n";
1253    } else {
1254        $mess .= "System Error Message: $!\n";
1255    }
1256    $mess .= "-" x 20 . "\n";
1257   
1258    $gsdl_cgi->generate_error($mess);
1259    }
1260
1261    #return $status; # in case calling functions have any further use for this
1262}
1263
1264
1265# the version of set_index_meta that doesn't do authentication
1266sub _set_index_metadata
1267{
1268    print STDERR "START SET INDEX METADATA\n";
1269    my $self = shift @_;
1270
1271    my $collect   = $self->{'collect'};
1272    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1273
1274    my $site = $self->{'site'};
1275    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1276
1277    # look up additional args
1278    my $docid     = $self->{'d'};
1279    my $metaname  = $self->{'metaname'};
1280    my $metapos   = $self->{'metapos'}; # undef has meaning
1281    my $metavalue = $self->{'metavalue'};
1282    my $infodbtype = $self->{'infodbtype'};
1283    my $metamode  = $self->{'metamode'};
1284    my $prevmetavalue = $self->{'prevmetavalue'};
1285
1286    print STDERR "SETTING INDEX METADATA ENTRY\n";
1287    my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1288    print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
1289    if ($status != 0) {
1290        # Catch error if set infodb entry failed
1291    my $mess = "Failed to set metadata key: $docid\n";
1292   
1293    $mess .= "PATH: $ENV{'PATH'}\n";
1294    $mess .= "Exit status: $status\n";
1295    $mess .= "System Error Message: $!\n";
1296   
1297    $gsdl_cgi->generate_error($mess);
1298    }
1299    else {
1300    my $mess = "set-index-metadata successful: Key[$docid]\n";
1301    $mess .= "  $metaname";
1302    $mess .= "->[$metapos]" if (defined $metapos);
1303    $mess .= " = $metavalue\n";
1304   
1305    $gsdl_cgi->generate_ok_message($mess);
1306    }
1307
1308    print STDERR "END SET INDEX METADATA\n";
1309    #return $status; # in case calling functions have any further use for this
1310}
1311
1312sub set_index_metadata
1313{
1314    my $self = shift @_;
1315
1316    my $username  = $self->{'username'};
1317    my $collect   = $self->{'collect'};
1318    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1319    #my $gsdlhome  = $self->{'gsdlhome'};
1320
1321    if ($baseaction::authentication_enabled) {
1322    # Ensure the user is allowed to edit this collection   
1323    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1324    }
1325
1326    my $site = $self->{'site'};
1327    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1328   
1329    $gsdl_cgi->checked_chdir($collect_dir);
1330
1331    # Obtain the collect dir
1332    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1333
1334    # Make sure the collection isn't locked by someone else
1335    $self->lock_collection($username, $collect);
1336
1337    $self->_set_index_metadata(@_);
1338   
1339    # Release the lock once it is done
1340    $self->unlock_collection($username, $collect);
1341}
1342
1343# call this to set the metadata for a combination of dirs archives, import or index, or live
1344# if none specified, defaults to index which was the original behaviour of set_metadata.
1345sub set_metadata
1346{
1347    my $self = shift @_;
1348
1349    # Testing that not defining a variable, setting it to "" or to "  " all return false
1350    # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
1351
1352    my $where = $self->{'where'};
1353    if(!$where) {   
1354    $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
1355    return;
1356    }
1357
1358    # authenticate and lock collection once, even if processing multiple dirs
1359    my $username  = $self->{'username'};
1360    my $collect   = $self->{'collect'};
1361    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1362   
1363    if ($baseaction::authentication_enabled) {
1364    # Ensure the user is allowed to edit this collection   
1365    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1366    }
1367   
1368    if($where =~ m/index/) {
1369    my $site = $self->{'site'};
1370    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1371    $gsdl_cgi->checked_chdir($collect_dir);
1372    }
1373
1374    # Make sure the collection isn't locked by someone else
1375    $self->lock_collection($username, $collect);
1376
1377
1378    # now at last can set the metadata. $where can specify multiple
1379    # $where is of the form: import|archives|index, or a subset thereof
1380
1381    #my @whichdirs = split('\|', $where);
1382
1383    # just check whether $where contains import/archives/index/live in turn, and
1384    # for each case, process it accordingly
1385    if($where =~ m/import/) {
1386    $self->_set_import_metadata(@_);       
1387    }
1388
1389    if($where =~ m/archives/) {
1390
1391    # look up docID arg which is optional to set_metadata because it's optional
1392    # to set_import, but which is compulsory to set_archives_metadata
1393    my $docid     = $self->{'d'};
1394    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1395        $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1396    }
1397    # we have a docid, so can set archives meta
1398    $self->_set_archives_metadata(@_); 
1399    }
1400
1401    if($where =~ m/index/) {
1402   
1403    # look up docID arg which is optional to set_metadata because it's optional
1404    # to set_import, but which is compulsory to set_archives_metadata
1405    my $docid     = $self->{'d'};
1406    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1407        $gsdl_cgi->generate_error("No docid (d=...) specified.");
1408    }
1409    # we have a docid, so can set index meta
1410    $self->_set_index_metadata(@_);
1411    }   
1412
1413    if($where =~ m/live/) {
1414    $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1415    }
1416
1417    # Release the lock once it is done
1418    $self->unlock_collection($username, $collect);
1419}
1420
1421sub set_metadata_array
1422{
1423    my $self = shift @_;
1424
1425    my $where = $self->{'where'};
1426    if(!$where) {   
1427    $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1428    return;
1429    }
1430
1431    my $username  = $self->{'username'};
1432    my $collect   = $self->{'collect'};
1433    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1434
1435    if ($baseaction::authentication_enabled) {
1436    # Ensure the user is allowed to edit this collection
1437    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1438    }
1439
1440    # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1441    # But including the stmt during this code reorganisation to preserve as-is what used to happen
1442    my $site = $self->{'site'};
1443    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1444    $gsdl_cgi->checked_chdir($collect_dir);
1445
1446    # Make sure the collection isn't locked by someone else
1447    $self->lock_collection($username, $collect);
1448
1449    if($where =~ m/import/) {
1450    $self->_set_import_metadata_array(@_);
1451    }
1452    if($where =~ m/archives/) {
1453    $self->_set_archives_metadata_array(@_);
1454    }
1455    if($where =~ m/index/) {
1456    $self->_set_index_metadata_array(@_);
1457    }
1458    if($where =~ m/live/) {
1459        $self->_set_live_metadata_array(@_);
1460    }
1461
1462    # Release the lock once it is done
1463    $self->unlock_collection($username, $collect);
1464}
1465
1466sub _set_index_metadata_array
1467{
1468    my $self = shift @_;
1469
1470    my $collect   = $self->{'collect'};
1471    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1472    my $site = $self->{'site'};
1473    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1474
1475   
1476    # look up additional args
1477   
1478    my $infodbtype = $self->{'infodbtype'};
1479   
1480    my $json_str      = $self->{'json'};
1481    my $doc_array = decode_json $json_str;
1482   
1483   
1484    my $global_status = 0;
1485    my $global_mess = "";
1486   
1487    my @all_docids = ();
1488   
1489    foreach my $doc_array_rec ( @$doc_array ) {
1490   
1491    my $status = -1;
1492    my $docid     = $doc_array_rec->{'docid'};
1493   
1494    push(@all_docids,$docid);
1495
1496    my $metaname  = $doc_array_rec->{'metaname'};
1497    if(defined $metaname) {
1498        my $metapos   = $doc_array_rec->{'metapos'}; # can legitimately be undef
1499        my $metavalue = $doc_array_rec->{'metavalue'};
1500        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1501
1502        $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1503    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1504        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1505       
1506        foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1507        $metaname  = $metatable_rec->{'metaname'};
1508        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1509        my $metapos = undef;
1510        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1511
1512        foreach my $metavalue ( @$metavals ) { # metavals is an array
1513            $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1514            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1515            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1516            # metavalue for this metaname will be lost
1517            $metamode = "accumulate";
1518            }
1519        }           
1520        }
1521    }
1522   
1523    if ($status != 0) {
1524        # Catch error if set infodb entry failed
1525        $global_status = $status;
1526        $global_mess .= "Failed to set metadata key: $docid\n";
1527        $global_mess .= "Exit status: $status\n";
1528        $global_mess .= "System Error Message: $!\n";
1529        $global_mess .= "-" x 20;
1530    }
1531    }
1532   
1533    if ($global_status != 0) {
1534    $global_mess .= "PATH: $ENV{'PATH'}\n";
1535    $gsdl_cgi->generate_error($global_mess);
1536    }
1537    else {
1538    my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1539    $gsdl_cgi->generate_ok_message($mess);
1540    }
1541}
1542
1543sub set_index_metadata_array
1544{
1545    my $self = shift @_;
1546
1547    my $username  = $self->{'username'};
1548    my $collect   = $self->{'collect'};
1549    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1550#    my $gsdlhome  = $self->{'gsdlhome'};
1551
1552    if ($baseaction::authentication_enabled) {
1553    # Ensure the user is allowed to edit this collection   
1554    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1555    }
1556
1557    my $site = $self->{'site'};
1558    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1559   
1560    $gsdl_cgi->checked_chdir($collect_dir);
1561
1562    # Obtain the collect dir
1563    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1564
1565    # Make sure the collection isn't locked by someone else
1566    $self->lock_collection($username, $collect);
1567
1568    $self->_set_index_metadata_array(@_);
1569
1570    # Release the lock once it is done
1571    $self->unlock_collection($username, $collect);
1572}
1573
1574# experimental, newly added in and untested
1575sub _set_live_metadata_array
1576{
1577    my $self = shift @_;
1578
1579    my $collect   = $self->{'collect'};
1580    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1581
1582    my $site = $self->{'site'};
1583    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1584
1585   
1586    # look up additional args
1587    my $infodbtype = $self->{'infodbtype'};
1588    # To people who know $collect_tail please add some comments
1589    # Obtain path to the database
1590    my $collect_tail = $collect;
1591    $collect_tail =~ s/^.*[\/|\\]//;
1592    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1593    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1594
1595   
1596    my $json_str      = $self->{'json'};
1597    my $doc_array = decode_json $json_str;
1598   
1599   
1600    my $global_status = 0;
1601    my $global_mess = "";
1602   
1603    my @all_docids = ();
1604
1605
1606    foreach my $doc_array_rec ( @$doc_array ) {
1607   
1608    my $status = -1;
1609    my $docid     = $doc_array_rec->{'docid'};
1610
1611    push(@all_docids,$docid);
1612
1613    my $metaname  = $doc_array_rec->{'metaname'};
1614    if(defined $metaname) {
1615        my $dbkey = "$docid.$metaname";
1616        my $metavalue = $doc_array_rec->{'metavalue'};
1617
1618        # Set the new value
1619        my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1620        $status = system($cmd);
1621
1622    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1623        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1624        foreach my $metatable_rec ( @$metatable ) {
1625        $metaname  = $metatable_rec->{'metaname'};
1626        my $dbkey = "$docid.$metaname";
1627
1628        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1629        foreach my $metavalue ( @$metavals ) {
1630             my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1631             $status = system($cmd);
1632        }
1633        }
1634       
1635    }
1636
1637    if ($status != 0) {
1638        # Catch error if gdbmget failed
1639        $global_status = $status;
1640        $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1641        $global_mess .= "Exit status: $status\n";
1642        $global_mess .= "System Error Message: $!\n";
1643        $global_mess .= "-" x 20;
1644    }
1645    }
1646   
1647    if ($global_status != 0) {
1648    $global_mess .= "PATH: $ENV{'PATH'}\n";
1649    $gsdl_cgi->generate_error($global_mess);
1650    }
1651    else {
1652    my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1653    $gsdl_cgi->generate_ok_message($mess);
1654    }
1655}
1656
1657sub set_live_metadata_array
1658{
1659    my $self = shift @_;
1660
1661    my $username  = $self->{'username'};
1662    my $collect   = $self->{'collect'};
1663    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1664
1665    if ($baseaction::authentication_enabled) {
1666    # Ensure the user is allowed to edit this collection   
1667    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1668    }
1669
1670    my $site = $self->{'site'};
1671    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1672   
1673    $gsdl_cgi->checked_chdir($collect_dir);
1674
1675    # Make sure the collection isn't locked by someone else
1676    $self->lock_collection($username, $collect);
1677
1678    $self->_set_live_metadata_array(@_);
1679
1680    # Release the lock once it is done
1681    $self->unlock_collection($username, $collect);
1682}
1683
1684
1685sub dxml_metadata
1686{
1687    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1688    my $metaname = $parser->{'parameters'}->{'metaname'};
1689    my $metamode = $parser->{'parameters'}->{'metamode'};
1690   
1691###!!!    print STDERR "**** Processing closing </Metadata> tag\n";
1692   
1693    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1694   
1695    # Find the right metadata tag and checks if we are going to
1696    # override it
1697    #
1698    # Note: This over writes the first metadata block it
1699    # encountered. If there are multiple Sections in the doc.xml, it
1700    # might not behave as you would expect
1701
1702    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1703##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1704##    print STDERR "**** metamode = $metamode\n";
1705   
1706    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1707    {
1708        my $name_attr = $attrHash->{'name'};
1709        if (($name_attr eq $metaname) && ($metamode eq "override"))
1710        {
1711            if (!defined $parser->{'parameters'}->{'poscount'})
1712            {
1713                $parser->{'parameters'}->{'poscount'} = 0;
1714            }
1715            else
1716            {
1717                $parser->{'parameters'}->{'poscount'}++;
1718            }
1719           
1720            if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1721            {
1722                ##print STDERR "#### got match!!\n";
1723                # Get the value and override the current value
1724                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1725                $attrHash->{'_content'} = $metavalue;
1726               
1727                # Don't want it to wipe out any other pieces of metadata
1728                $parser->{'parameters'}->{'metamode'} = "done";
1729            }
1730            elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1731            {
1732                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1733                $attrHash->{'_content'} = $metavalue;
1734                $parser->{'parameters'}->{'metamode'} = "done";
1735            }
1736        }
1737    }
1738
1739    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1740    return [$tagname => $attrHash];
1741}
1742
1743# This method exists purely for catching invalid section numbers that the client
1744# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1745# we've seen all the Sections in the doc.xml, and none of their section nums matched
1746# if the metamode has not been set to 'done' by then.
1747sub dxml_archive
1748{
1749    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1750    my $metamode = $parser->{'parameters'}->{'metamode'};
1751   
1752    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1753    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1754   
1755#    print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1756   
1757    if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1758    print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1759
1760    my $metaname = $parser->{'parameters'}->{'metaname'};
1761    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1762   
1763    print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1764    print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1765    print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1766    print STDERR "\tin section $opt_doc_secnum.\n";
1767    $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1768    }
1769   
1770    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1771    return [$tagname => $attrHash];
1772}
1773
1774sub dxml_description
1775{
1776    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1777    my $metamode = $parser->{'parameters'}->{'metamode'};
1778
1779    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1780    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1781   
1782###!!!  print STDERR "**** Processing a closing </Description> tag \n";
1783#   print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1784   
1785    # Accumulate the metadata
1786
1787    # We'll be accumulating metadata at this point if we haven't found and therefore
1788    # haven't processed the metadata yet.
1789    # For subsections, this means that if we're at a matching subsection, but haven't
1790    # found the correct metaname to override in that subsection, we accumulate it as new
1791    # meta in the subsection by adding it to the current description.
1792    # If there's no subsection info for the metadata, it will accumulate at the top level
1793    # section description if we hadn't found a matching metaname to override at this point.
1794
1795    # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1796    # at the toplevel section. In the latter case, it means we want to process meta in the
1797    # toplevel section. So the eq check between the values below will work in all cases.
1798   
1799    # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1800    # this document has been provided. In that case, the metadata for that opt_doc_secnum
1801    # won't get attached/accumulated to any part of the doc, not even its top-level section.
1802
1803    if ($curr_secnum eq $opt_doc_secnum
1804        && ($metamode eq "accumulate" || $metamode eq "override")) {
1805        if ($metamode eq "override") {
1806        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
1807        }
1808
1809        # If we get to here and metamode is override, this means there
1810        # was no existing value to overide => treat as an append operation
1811       
1812        # Tack a new metadata tag on to the end of the <Metadata>+ block
1813        my $metaname = $parser->{'parameters'}->{'metaname'};
1814        my $metavalue = $parser->{'parameters'}->{'metavalue'};
1815       
1816        my $metadata_attr = {
1817        '_content' => $metavalue,
1818        'name' => $metaname,
1819        'mode' => "accumulate"
1820        };
1821       
1822        my $append_metadata = [ "Metadata" => $metadata_attr ];
1823        my $description_content = $attrHash->{'_content'};
1824       
1825        print "Appending metadata to doc.xml\n";
1826       
1827        if (ref($description_content)) {
1828        # got some existing interesting nested content
1829        push(@$description_content, "    ", $append_metadata ,"\n        ");
1830        }
1831        else {
1832        #description_content is most likely a string such as "\n"
1833        $attrHash->{'_content'} = [$description_content, "    ", $append_metadata ,"\n" ];
1834        }
1835       
1836        $parser->{'parameters'}->{'metamode'} = "done";
1837    }       
1838    else {
1839        # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1840##      print STDERR "**** NOT ACCUMULATE?!? \n";
1841    }
1842
1843    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1844    return [$tagname => $attrHash];
1845}
1846
1847
1848sub dxml_start_section
1849{
1850    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1851
1852    my $new_depth = scalar(@$contextArray);
1853
1854#   print STDERR "**** START SECTION \n";
1855   
1856    if ($new_depth == 1) {
1857    $parser->{'parameters'}->{'curr_section_depth'} = 1;
1858    $parser->{'parameters'}->{'curr_section_num'}   = "";
1859    }
1860
1861    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
1862    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1863
1864    my $new_secnum;
1865
1866    if ($new_depth > $old_depth) {
1867    # child subsection
1868    $new_secnum = "$old_secnum.1";
1869    }
1870    elsif ($new_depth == $old_depth) {
1871    # sibling section => increase it's value by 1
1872    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1873    $tail_num++;
1874    $new_secnum = $old_secnum;
1875    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1876    }
1877    else {
1878    # back up to parent section => lopp off tail
1879    $new_secnum = $old_secnum;
1880    $new_secnum =~ s/\.\d+$//;
1881    }
1882
1883    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1884    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
1885   
1886    1;
1887}
1888
1889sub edit_xml_file
1890{
1891    my $self = shift @_;
1892    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1893
1894    # use XML::Rules to add it in (read in and out again)
1895    my $parser = XML::Rules->new(start_rules     => $start_rules,
1896                 rules           => $rules,
1897                 style           => 'filter',
1898                                 output_encoding => 'utf8' );
1899
1900    my $xml_in = "";
1901    if (!open(MIN,"<$filename")) {
1902    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1903    }
1904    else {
1905        # Read all the text in
1906    my $line;
1907    while (defined ($line=<MIN>)) {
1908        $xml_in .= $line;
1909    }
1910    close(MIN);
1911   
1912    my $MOUT;   
1913    if (!open($MOUT,">$filename")) {
1914        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1915    }
1916    else {
1917        # Matched lines will get handled by the call backs
1918##      my $xml_out = "";
1919
1920        binmode($MOUT,":utf8");
1921        $parser->filter($xml_in,$MOUT, $options);
1922
1923#       binmode(MOUT,":utf8");
1924#       print MOUT $xml_out;
1925        close($MOUT);       
1926    }
1927    }
1928
1929    # copy across any custom error information that was stored during parsing
1930    $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});   
1931}
1932
1933sub edit_doc_xml
1934{
1935    my $self = shift @_;
1936    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1937
1938    my $info_mess = <<RAWEND;
1939****************************
1940  edit_doc_xml()
1941****************************
1942RAWEND
1943
1944    $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1945    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
1946    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
1947    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
1948    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
1949    $info_mess .= " opt_secnum  = $opt_secnum\n"  if defined($opt_secnum);
1950    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1951     
1952    $info_mess .= "****************************\n";
1953
1954    $gsdl_cgi->generate_message($info_mess);
1955   
1956    # To monitor which section/subsection number we are in
1957    my @start_rules =
1958    ( 'Section'    => \&dxml_start_section );
1959
1960    # use XML::Rules to add it in (read in and out again)
1961    # Set the call back functions
1962    my @rules =
1963    ( _default => 'raw',
1964      'Metadata'    => \&dxml_metadata,
1965      'Description' => \&dxml_description,
1966      'Archive'     => \&dxml_archive); # just for catching errors at end
1967     
1968    # Sets the parameters
1969    my $options = { 'metaname'  => $metaname,
1970            'metapos'   => $metapos,
1971            'metavalue' => $metavalue,
1972            'metamode'  => $metamode,
1973            'prevmetavalue' => $prevmetavalue };
1974           
1975    if (defined $opt_secnum) {
1976    $options->{'secnum'} = $opt_secnum;
1977    }
1978
1979    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1980}
1981
1982sub set_archives_metadata_entry
1983{
1984    my $self = shift @_;
1985    my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1986
1987    my $info_mess = <<RAWEND;
1988****************************
1989  set_archives_metadata_entry()
1990****************************
1991RAWEND
1992
1993    $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1994    $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1995    $info_mess .= " collect     = $collect\n"     if defined($collect);
1996    $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype);
1997    $info_mess .= " docid       = $docid\n"       if defined($docid);
1998    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
1999    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
2000    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
2001    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
2002    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2003     
2004    $info_mess .= "****************************\n";
2005
2006    $gsdl_cgi->generate_message($info_mess);
2007   
2008    # Obtain the doc.xml path for the specified docID
2009    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2010
2011    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2012    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
2013    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
2014   
2015    # The $doc_xml_file is relative to the archives, and now let's get the full path
2016    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
2017    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
2018
2019    # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
2020    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2021    # Thereafter, we will finally be setting the overriding metavalue for this metaname
2022    if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2023    # remove all values of $metaname metadata
2024    $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
2025    }
2026    # Edit the doc.xml file with the specified metadata name, value and position.
2027    # TODO: there is a potential problem here as this edit_doc_xml function
2028    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
2029    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
2030   
2031    # dxml_metadata method ignores metapos if metamode anything other than override
2032    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
2033            $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
2034
2035    # return 0; # return 0 for now to indicate no error
2036    return (defined $self->{'error_msg'}) ? 1 : 0;
2037}
2038
2039
2040sub set_archives_metadata
2041{
2042    my $self = shift @_;
2043
2044    my $username  = $self->{'username'};
2045    my $collect   = $self->{'collect'};
2046    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2047   
2048    if ($baseaction::authentication_enabled) {
2049    # Ensure the user is allowed to edit this collection
2050    $self->authenticate_user($username, $collect);
2051    }
2052
2053    # Make sure the collection isn't locked by someone else
2054    $self->lock_collection($username, $collect);
2055
2056    $self->_set_archives_metadata(@_);
2057
2058    # Release the lock once it is done
2059    $self->unlock_collection($username, $collect);
2060}
2061
2062sub _set_archives_metadata_array
2063{
2064    my $self = shift @_;
2065   
2066    my $collect   = $self->{'collect'};
2067    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2068    my $site = $self->{'site'};
2069    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2070
2071    # look up additional args
2072   
2073    my $infodbtype = $self->{'infodbtype'};
2074   
2075    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2076   
2077    my $json_str      = $self->{'json'};
2078    my $doc_array = decode_json $json_str;
2079   
2080   
2081    my $global_status = 0;
2082    my $global_mess = "";
2083   
2084    my @all_docids = ();
2085   
2086    foreach my $doc_array_rec ( @$doc_array ) {
2087    my $status    = -1;
2088    my $docid     = $doc_array_rec->{'docid'};
2089
2090    push(@all_docids,$docid);
2091   
2092    my $metaname  = $doc_array_rec->{'metaname'};
2093    if(defined $metaname) {
2094       
2095        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2096
2097        my $metamode  = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2098        my $metavalue = $doc_array_rec->{'metavalue'};
2099        my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
2100       
2101       
2102        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2103        # make "accumulate" the default (less destructive, as it won't actually
2104        # delete any existing values)
2105        $metamode = "accumulate";
2106        }       
2107       
2108        $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
2109                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2110    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2111        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2112       
2113        foreach my $metatable_rec ( @$metatable ) {
2114        $metaname  = $metatable_rec->{'metaname'};
2115        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2116        my $metapos = undef;
2117        my $prevmetavalue = undef;
2118        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2119       
2120        foreach my $metavalue ( @$metavals ) {
2121            $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
2122                                 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2123           
2124            if($metamode eq "override") { # now, having overridden the metavalue for the first,
2125            # need to accumulate subsequent metavals for this metaname, else the just-assigned
2126            # metavalue for this metaname will be lost
2127            $metamode = "accumulate";
2128            }
2129        }           
2130        }       
2131    }
2132       
2133    if ($status != 0) {
2134        # Catch error if set infodb entry failed
2135        $global_status = $status;
2136        $global_mess .= "Failed to set metadata key: $docid\n";
2137        $global_mess .= "Exit status: $status\n";
2138        $global_mess .= "System Error Message: $!\n";
2139        $global_mess .= "-" x 20 . "\n";
2140    }
2141    }
2142   
2143    if ($global_status != 0) {
2144    $global_mess .= "PATH: $ENV{'PATH'}\n";
2145    $gsdl_cgi->generate_error($global_mess);
2146    }
2147    else {
2148    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2149    $gsdl_cgi->generate_ok_message($mess);
2150    }
2151}
2152
2153sub set_archives_metadata_array
2154{
2155    my $self = shift @_;
2156
2157    my $username  = $self->{'username'};
2158    my $collect   = $self->{'collect'};
2159    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2160#    my $gsdlhome  = $self->{'gsdlhome'};
2161
2162    if ($baseaction::authentication_enabled) {
2163    # Ensure the user is allowed to edit this collection   
2164    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2165    }
2166
2167    my $site = $self->{'site'};
2168    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2169   
2170    $gsdl_cgi->checked_chdir($collect_dir);
2171
2172    # Obtain the collect dir
2173    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2174
2175    # Make sure the collection isn't locked by someone else
2176    $self->lock_collection($username, $collect);
2177
2178    $self->_set_archives_metadata_array(@_);
2179   
2180    # Release the lock once it is done
2181    $self->unlock_collection($username, $collect);
2182}
2183
2184sub _remove_archives_metadata
2185{
2186    my $self = shift @_;
2187
2188    my $collect   = $self->{'collect'};
2189    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2190#   my $gsdlhome  = $self->{'gsdlhome'};
2191    my $infodbtype = $self->{'infodbtype'};
2192   
2193    my $site = $self->{'site'};
2194       
2195    # Obtain the collect and archive dir   
2196    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2197   
2198    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2199
2200    # look up additional args
2201    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
2202   
2203    my $metaname = $self->{'metaname'};
2204    my $metapos = $self->{'metapos'};
2205
2206    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2207
2208    # if the user hasn't told us what to delete, not having given a metavalue or metapos,
2209    # default to deleting the first metavalue for the given metaname
2210    # Beware that if both metapos AND metavalue are defined, both matches (if any)
2211    # seem to get deleted in one single remove_archives_meta action invocation.
2212    # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
2213    # deleted, both get deleted.
2214    if(!defined $metapos && !defined $metavalue) {
2215        $metapos = 0;
2216    }
2217
2218    my $metamode = $self->{'metamode'} || undef;
2219
2220    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2221    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2222
2223    # This now stores the full pathname
2224    my $doc_filename = $doc_rec->{'doc-file'}->[0];
2225
2226    my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
2227#   my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
2228   
2229    if ($status == 0)
2230    {
2231        my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
2232        $mess .= "  $metaname";
2233        $mess .= "->[$metapos]" if (defined $metapos);
2234        $mess .= " ($metavalue)" if (defined $metavalue);
2235        $gsdl_cgi->generate_ok_message($mess); 
2236    }
2237    else
2238    {
2239        my $mess .= "Failed to remove archives metadata key: $docid\n";
2240        $mess .= "Exit status: $status\n";
2241        $mess .= "System Error Message: $!\n";
2242        $mess .= "-" x 20 . "\n";
2243       
2244        $gsdl_cgi->generate_error($mess);
2245    }
2246   
2247    #return $status; # in case calling functions have a use for this
2248}
2249
2250sub remove_archives_metadata
2251{
2252    my $self = shift @_;
2253
2254    my $username  = $self->{'username'};
2255    my $collect   = $self->{'collect'};
2256    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2257   
2258    if ($baseaction::authentication_enabled)
2259    {
2260        # Ensure the user is allowed to edit this collection       
2261        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2262    }
2263
2264    # Make sure the collection isn't locked by someone else
2265    $self->lock_collection($username, $collect);
2266
2267    $self->_remove_archives_metadata(@_);
2268
2269    # Release the lock once it is done
2270    $self->unlock_collection($username, $collect);
2271}
2272
2273sub remove_from_doc_xml
2274{
2275    my $self = shift @_;
2276    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
2277   
2278    my @start_rules = ('Section' => \&dxml_start_section);
2279   
2280    # Set the call-back functions for the metadata tags
2281    my @rules =
2282    (
2283        _default => 'raw',
2284        'Metadata' => \&rfdxml_metadata
2285    );
2286       
2287    my $parser = XML::Rules->new
2288    (
2289        start_rules => \@start_rules,
2290        rules => \@rules,
2291        style => 'filter',
2292        output_encoding => 'utf8',
2293#    normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2294        stripspaces => 2|0|0 # ineffectual
2295    );
2296   
2297    my $status = 0;
2298    my $xml_in = "";
2299    if (!open(MIN,"<$doc_xml_filename"))
2300    {
2301        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
2302        $status = 1;
2303    }
2304    else
2305    {
2306        # Read them in
2307        my $line;
2308        while (defined ($line=<MIN>)) {
2309            $xml_in .= $line;
2310        }
2311        close(MIN);
2312
2313        # Filter with the call-back functions
2314        my $xml_out = "";
2315
2316        my $MOUT;
2317        if (!open($MOUT,">$doc_xml_filename")) {
2318            $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
2319            $status = 1;
2320        }
2321        else {
2322            binmode($MOUT,":utf8");
2323            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
2324            close($MOUT);       
2325        }
2326    }
2327    return $status;
2328}
2329
2330sub rfdxml_metadata
2331{
2332    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2333
2334    # For comparisons, toplevel section is indicated by ""
2335    my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
2336    my $secid = $parser->{'parameters'}->{'secid'} || "";
2337
2338    if (!($secid eq $curr_sec_num))
2339    {
2340        # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2341        return [$tagname => $attrHash];
2342    }
2343
2344    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2345    {
2346        if (!defined $parser->{'parameters'}->{'poscount'})
2347        {
2348            $parser->{'parameters'}->{'poscount'} = 0;
2349        }
2350        else
2351        {
2352            $parser->{'parameters'}->{'poscount'}++;
2353        }
2354       
2355        # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
2356        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {         
2357            return [];
2358        }
2359
2360        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2361        {   
2362            return [];
2363        }
2364       
2365        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
2366        {   
2367            return [];
2368        }
2369    }
2370   
2371    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2372    return [$tagname => $attrHash];
2373}
2374
2375sub mxml_metadata
2376{
2377    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2378    my $metaname = $parser->{'parameters'}->{'metaname'};
2379    my $metamode = $parser->{'parameters'}->{'metamode'};
2380
2381    # Report error if we don't see FileName tag before this
2382    die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2383   
2384    # Don't do anything if we are not in the right FileSet
2385    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2386    if ($file_regexp =~ /\.\*/) {
2387    # Only interested in a file_regexp if it specifies precisely one
2388    # file. 
2389    # So, skip anything with a .* in it as it is too general
2390##  print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
2391    return [$tagname => $attrHash];
2392    }
2393    my $src_file = $parser->{'parameters'}->{'src_file'};
2394    if (!($src_file =~ /$file_regexp/)) {
2395    return [$tagname => $attrHash];
2396    }
2397##    print STDERR "*** mxl metamode = $metamode\n";
2398
2399    # Find the right metadata tag and checks if we are going to override it
2400    my $name_attr = $attrHash->{'name'};
2401    if (($name_attr eq $metaname) && ($metamode eq "override")) {
2402
2403    # now metadata.xml functions need to keep track of metapos
2404    if (!defined $parser->{'parameters'}->{'poscount'})
2405    {
2406        $parser->{'parameters'}->{'poscount'} = 0;
2407    }
2408    else
2409    {
2410        $parser->{'parameters'}->{'poscount'}++;
2411    }
2412
2413    # If either the metapos or prevmetavalue is set,
2414        # get the value and override the current value
2415    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2416
2417    if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2418    {
2419        $attrHash->{'_content'} = $metavalue;
2420
2421        ##  print STDERR "**** overriding metadata.xml\n";
2422       
2423        # Don't want it to wipe out any other pieces of metadata
2424        $parser->{'parameters'}->{'metamode'} = "done";
2425    }
2426    elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2427    {
2428        $attrHash->{'_content'} = $metavalue;
2429        $parser->{'parameters'}->{'metamode'} = "done";
2430    }
2431    }
2432
2433    # mxml_description will process the metadata if metadata is accumulate,
2434    # or if we haven't found the metadata to override
2435
2436    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2437    return [$tagname => $attrHash];
2438}
2439
2440
2441sub mxml_description
2442{
2443    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2444    my $metamode = $parser->{'parameters'}->{'metamode'};   
2445
2446    # Failed... Report error if we don't see FileName tag before this
2447    die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2448
2449    # Don't do anything if we are not in the right FileSet
2450    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2451    if ($file_regexp =~ m/\.\*/) {
2452    # Only interested in a file_regexp if it specifies precisely one
2453    # file. 
2454    # So, skip anything with a .* in it as it is too general
2455    return [$tagname => $attrHash];
2456    }
2457    my $src_file = $parser->{'parameters'}->{'src_file'};
2458   
2459    if (!($src_file =~ m/$file_regexp/)) {
2460    return [$tagname => $attrHash];
2461    }
2462
2463    # Accumulate the metadata block to the end of the description block
2464    # Note: This adds metadata block to all description blocks, so if there are
2465    # multiple FileSets, it will add to all of them
2466    if (($metamode eq "accumulate") || ($metamode eq "override")) {
2467
2468    # if metamode was "override" but get to here then it failed to
2469    # find an item to override, in which case it should append its
2470    # value to the end, just like the "accumulate" mode
2471
2472    if ($metamode eq "override") {
2473        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2474    }
2475
2476    # tack a new metadata tag on to the end of the <Metadata>+ block
2477    my $metaname = $parser->{'parameters'}->{'metaname'};
2478    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2479   
2480    my $metadata_attr = { '_content' => $metavalue,
2481                  'name'     => $metaname,
2482                  'mode'     => "accumulate" };
2483
2484    my $append_metadata = [ "Metadata" => $metadata_attr ];
2485    my $description_content = $attrHash->{'_content'};
2486   
2487##  print STDERR "*** appending to metadata.xml\n";
2488
2489    # append the new metadata element to the end of the current
2490    # content contained inside this tag
2491    if (ref($description_content) eq "") {
2492        # => string or numeric literal
2493        # this is caused by a <Description> block has no <Metadata> child elements
2494        # => set up an empty array in '_content'
2495        $attrHash->{'_content'} = [ "\n" ];
2496        $description_content = $attrHash->{'_content'};
2497    }
2498
2499    push(@$description_content,"    ", $append_metadata ,"\n        ");
2500    $parser->{'parameters'}->{'metamode'} = "done";
2501    }
2502
2503    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2504    return [$tagname => $attrHash];
2505}
2506
2507
2508sub mxml_filename
2509{
2510    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2511
2512    # Store the filename of the Current Fileset
2513    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2514    # FileName tag must come before Description tag
2515    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2516
2517    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2518    return [$tagname => $attrHash];
2519}
2520
2521
2522sub mxml_fileset
2523{
2524    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2525
2526    # Initilise the current_file
2527    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2528    # FileName tag must come before Description tag
2529    $parser->{'parameters'}->{'current_file'} = "";
2530
2531    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2532    return [$tagname => $attrHash];
2533}
2534
2535sub mxml_directorymetadata
2536{
2537    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2538
2539    # if we haven't processed the metadata when we reach the end of metadata.xml
2540    # it's because there's no particular FileSet element whose FileName matched
2541    # In which case, add a new FileSet for this FileName
2542    my $metamode = $parser->{'parameters'}->{'metamode'};
2543    if($metamode ne "done") {
2544   
2545    if ($metamode eq "override") {
2546        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2547    }
2548
2549    # If we get to here and metamode is override, this means there
2550    # was no existing value to overide => treat as an append operation
2551   
2552    # Create a new FileSet element and append to DirectoryMetadata
2553    # <FileSet>
2554    # <FileName>src_file</FileName>
2555    # <Description>
2556    # <Metadata mode="" name="">metavalue</Metadata>
2557    # </Description>
2558    # </FileSet>
2559    my $src_file = $parser->{'parameters'}->{'src_file'};
2560    my $metaname = $parser->{'parameters'}->{'metaname'};
2561    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2562    my $metadata_attr = {
2563        '_content' => $metavalue,
2564        'name' => $metaname,
2565        'mode' => "accumulate"
2566    };
2567    my $append_metadata = [ "Metadata" => $metadata_attr ];
2568    my $description_attr = { '_content' => [ "\n\t\t   ", $append_metadata, "\n\t\t"] };
2569    my $description_element = [ "Description" => $description_attr ];
2570
2571    #_content is not an attribute, it's special and holds the children of this element
2572    # including the textnode value embedded in this element if any.
2573    my $filename_attr = {'_content' => $src_file};
2574    my $filename_element = [ "FileName" => $filename_attr ];
2575
2576    my $fileset_attr = {};
2577    $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2578    my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2579   
2580   
2581    # get children of dirmeta, and push the new FileSet element onto it
2582    print "Appending metadata to metadata.xml\n";
2583    my $dirmeta_content = $attrHash->{'_content'};
2584    if (ref($dirmeta_content)) {
2585        # got some existing interesting nested content
2586        #push(@$dirmeta_content, "    ", $fileset ,"\n        ");
2587        push(@$dirmeta_content, "\t", $fileset ,"\n");
2588    }
2589    else {
2590        #description_content is most likely a string such as "\n"
2591        #$attrHash->{'_content'} = [$dirmeta_content, "    ", $fileset ,"\n" ];
2592        $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2593    }   
2594
2595    $parser->{'parameters'}->{'metamode'} = "done";
2596    }
2597    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2598    return [$tagname => $attrHash];
2599}
2600
2601
2602sub edit_metadata_xml
2603{
2604    my $self = shift @_;
2605    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2606
2607    # Set the call-back functions for the metadata tags
2608    my @rules =
2609    ( _default => 'raw',
2610          'FileName' => \&mxml_filename,
2611      'Metadata' => \&mxml_metadata,
2612      'Description' => \&mxml_description,
2613          'FileSet' => \&mxml_fileset,
2614      'DirectoryMetadata' => \&mxml_directorymetadata);
2615
2616    # use XML::Rules to add it in (read in and out again)
2617    my $parser = XML::Rules->new(rules => \@rules,
2618                 style => 'filter',
2619                                 output_encoding => 'utf8',
2620                 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2621
2622    if (!-e $metadata_xml_filename) {
2623   
2624        if (open(MOUT,">$metadata_xml_filename")) {
2625           
2626            my $src_file_re = &util::filename_to_regex($src_file);
2627            # shouldn't the following also be in the above utility routine??
2628            # $src_file_re =~ s/\./\\./g;
2629       
2630            print MOUT "<?xml version=\"1.0\"?>\n";
2631            print MOUT "<DirectoryMetadata>\n";
2632            print MOUT " <FileSet>\n";
2633            print MOUT "  <FileName>$src_file_re</FileName>\n";
2634            print MOUT "  <Description>\n";
2635            print MOUT "  </Description>\n";
2636            print MOUT " </FileSet>\n";
2637            print MOUT "</DirectoryMetadata>\n";
2638
2639            close(MOUT);
2640        }
2641        else {
2642            $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2643        }
2644    }
2645   
2646   
2647    my $xml_in = "";
2648    if (!open(MIN,"<$metadata_xml_filename")) {
2649        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2650    }
2651    else {
2652        # Read them in
2653        my $line;
2654        while (defined ($line=<MIN>)) {
2655            $xml_in .= $line;
2656        }
2657        close(MIN);
2658
2659        # Filter with the call-back functions
2660        my $xml_out = "";
2661
2662        my $MOUT;
2663        if (!open($MOUT,">$metadata_xml_filename")) {
2664            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2665        }
2666        else {
2667            binmode($MOUT,":utf8");
2668
2669            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2670            # At the moment, I will just hack it!
2671            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2672            #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2673            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2674            #print MOUT $xml_out;
2675
2676            $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2677                              metapos => $metapos,
2678                      metavalue => $metavalue,
2679                      metamode => $metamode,
2680                      src_file => $src_file,
2681                      prevmetavalue => $prevmetavalue,
2682                      current_file => undef} );
2683            close($MOUT);       
2684        }
2685   }
2686}
2687
2688
2689sub set_import_metadata
2690{
2691    my $self = shift @_;
2692   
2693    my $username  = $self->{'username'};
2694    my $collect   = $self->{'collect'};
2695    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2696   
2697    if ($baseaction::authentication_enabled) {
2698    # Ensure the user is allowed to edit this collection
2699    $self->authenticate_user($username, $collect);
2700    }
2701
2702    # Make sure the collection isn't locked by someone else
2703    $self->lock_collection($username, $collect);
2704 
2705    $self->_set_import_metadata(@_);
2706
2707    # Release the lock once it is done
2708    $self->unlock_collection($username, $collect);
2709   
2710}
2711
2712sub set_import_metadata_array
2713{
2714    my $self = shift @_;
2715
2716    my $username  = $self->{'username'};
2717    my $collect   = $self->{'collect'};
2718    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2719#    my $gsdlhome  = $self->{'gsdlhome'};
2720
2721    if ($baseaction::authentication_enabled) {
2722    # Ensure the user is allowed to edit this collection   
2723    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2724    }
2725
2726    my $site = $self->{'site'};
2727    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2728   
2729    $gsdl_cgi->checked_chdir($collect_dir);
2730
2731    # Make sure the collection isn't locked by someone else
2732    $self->lock_collection($username, $collect);
2733
2734    $self->_set_import_metadata_array(@_);
2735
2736    # Release the lock once it is done
2737    $self->unlock_collection($username, $collect);
2738
2739}
2740
2741
2742sub _set_import_metadata_array
2743{
2744    my $self = shift @_;
2745
2746    my $collect   = $self->{'collect'};
2747    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2748
2749    my $site = $self->{'site'};
2750    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2751   
2752    # look up additional args
2753   
2754    my $infodbtype = $self->{'infodbtype'};
2755   
2756    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");   
2757    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2758   
2759    my $json_str = $self->{'json'};
2760    my $doc_array = decode_json $json_str;
2761   
2762    my $global_status = 0;
2763    my $global_mess = "";
2764   
2765    my @all_docids = ();
2766   
2767    foreach my $doc_array_rec ( @$doc_array )
2768    {
2769    my $status = -1;
2770    my $docid = $doc_array_rec->{'docid'};
2771   
2772    my ($docid_root,$docid_secnum);
2773    if(defined $docid) {   
2774        ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
2775        # as yet no support for setting subsection metadata in metadata.xml
2776        if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2777        $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2778        next; # skip this docid in for loop
2779        }
2780    }
2781
2782    push(@all_docids,$docid); # docid_root rather
2783   
2784    my $metaname = $doc_array_rec->{'metaname'};
2785    if (defined $metaname) {
2786        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2787        my $metavalue = $doc_array_rec->{'metavalue'};
2788        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2789
2790        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2791        # make "accumulate" the default (less destructive, as won't actually
2792        # delete any existing values)
2793        $metamode = "accumulate";
2794        }
2795
2796        # adding metapos and prevmetavalue support to import_metadata subroutines
2797        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2798        my $prevmetavalue = $self->{'prevmetavalue'};
2799
2800        $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2801       
2802    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2803        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2804       
2805        foreach my $metatable_rec ( @$metatable ) {
2806        $metaname  = $metatable_rec->{'metaname'};
2807        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2808        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2809            # make "accumulate" the default (less destructive, as won't actually
2810            # delete any existing values)
2811            $metamode = "accumulate";
2812        }
2813
2814        # No support for metapos and prevmetavalue in the JSON metatable substructure
2815        my $metapos = undef;
2816        my $prevmetavalue = undef;
2817        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2818       
2819        foreach my $metavalue ( @$metavals ) {
2820            $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2821
2822            $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2823            if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2824            # need to accumulate subsequent metavals for this metaname, else the just-assigned
2825            # metavalue for this metaname will be lost
2826            $metamode = "accumulate";
2827            }
2828        }
2829        }
2830    }       
2831    }
2832
2833    # always a success message
2834    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2835    $gsdl_cgi->generate_ok_message($mess);
2836}
2837
2838# always returns true (1)
2839sub set_import_metadata_entry
2840{
2841    my $self = shift @_;
2842    my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2843
2844    my $info_mess = <<RAWEND;
2845****************************
2846  set_import_metadata_entry()
2847****************************
2848RAWEND
2849
2850    $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2851    $info_mess .= " collect     = $collect\n"     if defined($collect);
2852    $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype);
2853    $info_mess .= " arcinfo_doc_filename  = $arcinfo_doc_filename\n"  if defined($arcinfo_doc_filename);
2854    $info_mess .= " docid       = $docid\n"       if defined($docid);
2855    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
2856    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
2857    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
2858    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
2859    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2860     
2861    $info_mess .= "****************************\n";
2862
2863    $gsdl_cgi->generate_message($info_mess);
2864
2865    # import works with metadata.xml which can have inherited metadata
2866    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2867    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2868    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2869    # what they're doing if they provide a metapos.
2870    if(defined $metapos) {
2871    print STDERR "@@@@ WARNING: metapos defined.\n";
2872    print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2873    }
2874
2875    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2876    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2877    my $metadata_xml_file;
2878    my $import_filename = undef;
2879   
2880    if (defined $docid) {
2881    # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2882    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2883
2884    # This now stores the full pathname
2885    $import_filename = $doc_rec->{'src-file'}->[0];
2886    $import_filename = &util::placeholders_to_abspath($import_filename);
2887
2888    } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2889         # as the array version of the method doesn't support the -f parameter yet
2890    my $import_file  = $self->{'f'};
2891    $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2892    }
2893   
2894    # figure out correct metadata.xml file [?]
2895    # Assuming the metadata.xml file is next to the source file
2896    # Note: This will not work if it is using the inherited metadata from the parent folder
2897    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2898    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2899   
2900    # If we're overriding everything, then $prevmetavalue=undefined and
2901    # $metamode=override combined with $metapos=undefined
2902    # in which case we need to remove all metavalues for the metaname at the given (sub)section
2903    # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2904    if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2905##  print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2906    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2907
2908    }
2909
2910    # Edit the metadata.xml
2911    # Modified by Jeffrey from DL Consulting
2912    # Handle the case where there is one metadata.xml file for multiple FileSets
2913    # The XML filter needs to know whether it is in the right FileSet
2914    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2915    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2916    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2917                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2918    #return 0;
2919    return $metadata_xml_filename;
2920}
2921
2922sub _remove_import_metadata
2923{
2924    my $self = shift @_;
2925
2926    my $collect   = $self->{'collect'};
2927    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2928#   my $gsdlhome  = $self->{'gsdlhome'};
2929    my $infodbtype = $self->{'infodbtype'};
2930   
2931    # Obtain the collect dir
2932    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2933    my $site = $self->{'site'};
2934    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2935   
2936    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2937    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2938
2939    # look up additional args
2940    my $docid = $self->{'d'};
2941    if ((!defined $docid) || ($docid =~ m/^\s*$/))
2942    {
2943        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2944    }
2945   
2946    my $metaname = $self->{'metaname'};
2947    my $metapos = $self->{'metapos'};
2948    my $metavalue = $self->{'metavalue'};
2949    if(defined $metavalue) {
2950        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2951    } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2952        $metapos = 0;
2953    }
2954    my $metamode = $self->{'metamode'} || undef;
2955
2956    # import works with metadata.xml which can have inherited metadata
2957    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2958    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2959    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2960    # what they're doing if they provide a metapos.
2961    if(defined $metapos) {
2962        print STDERR "@@@@ WARNING: metapos defined.\n";
2963        print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2964    }
2965   
2966    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2967    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2968    my $metadata_xml_file;
2969    my $import_filename = undef;
2970    if (defined $docid)
2971    {
2972        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2973        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2974
2975        # This now stores the full pathname
2976        $import_filename = $doc_rec->{'src-file'}->[0];
2977        $import_filename = &util::placeholders_to_abspath($import_filename);
2978    }
2979
2980    if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2981    {
2982        $gsdl_cgi->generate_error("There is no metadata\n");
2983    }
2984   
2985    # figure out correct metadata.xml file [?]
2986    # Assuming the metadata.xml file is next to the source file
2987    # Note: This will not work if it is using the inherited metadata from the parent folder
2988    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2989    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2990   
2991    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2992   
2993    my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2994    $mess .= "  $metaname";
2995    $mess .= " = $metavalue\n";
2996   
2997    $gsdl_cgi->generate_ok_message($mess);
2998
2999    #return $status; # in case calling functions have a use for this
3000}
3001
3002sub remove_import_metadata
3003{
3004    my $self = shift @_;
3005   
3006    my $username = $self->{'username'};
3007    my $collect   = $self->{'collect'};
3008    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3009   
3010    if ($baseaction::authentication_enabled) {
3011        # Ensure the user is allowed to edit this collection       
3012        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3013    }
3014
3015    # Make sure the collection isn't locked by someone else
3016    $self->lock_collection($username, $collect);
3017   
3018    $self->_remove_import_metadata(@_);
3019
3020    # Release the lock once it is done
3021    $self->unlock_collection($username, $collect);
3022
3023}
3024
3025sub remove_from_metadata_xml
3026{
3027    my $self = shift @_;
3028    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
3029    # metamode generally has no meaning for removing meta, but is used by set_meta
3030    # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
3031
3032    # Set the call-back functions for the metadata tags
3033    my @rules =
3034    (
3035        _default => 'raw',
3036        'Metadata' => \&rfmxml_metadata,
3037        'FileName' => \&mxml_filename
3038    );
3039       
3040    my $parser = XML::Rules->new
3041    (
3042        rules => \@rules,
3043        style => 'filter',
3044        output_encoding => 'utf8',
3045     #normalisespaces => 1,
3046            stripspaces => 2|0|0 # ineffectual
3047    );
3048   
3049    my $xml_in = "";
3050    if (!open(MIN,"<$metadata_xml_filename"))
3051    {
3052        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
3053    }
3054    else
3055    {
3056        # Read them in
3057        my $line;
3058        while (defined ($line=<MIN>)) {
3059            $xml_in .= $line;
3060        }
3061        close(MIN);
3062
3063        # Filter with the call-back functions
3064        my $xml_out = "";
3065
3066        my $MOUT;
3067        if (!open($MOUT,">$metadata_xml_filename")) {
3068            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
3069        }
3070        else {
3071            binmode($MOUT,":utf8");
3072            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
3073            close($MOUT);       
3074        }
3075    }
3076}
3077
3078sub rfmxml_metadata
3079{
3080    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
3081
3082    # metadata.xml does not handle subsections
3083
3084    # since metadata.xml now has to deal with metapos, we keep track of the metadata position
3085    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
3086        && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
3087    {
3088        if (!defined $parser->{'parameters'}->{'poscount'})
3089        {
3090            $parser->{'parameters'}->{'poscount'} = 0;
3091        }
3092        else
3093        {
3094            $parser->{'parameters'}->{'poscount'}++;
3095        }
3096
3097        # if overriding but no metapos, then clear all the meta for this metaname
3098        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
3099            return [];
3100        }
3101   
3102        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
3103        {
3104            return [];
3105        }
3106        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
3107        {
3108            return [];
3109        }       
3110    }
3111
3112    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
3113    return [$tagname => $attrHash];
3114}
3115
3116sub _remove_live_metadata
3117{
3118    my $self = shift @_;
3119
3120    my $collect   = $self->{'collect'};
3121    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3122#    my $gsdlhome  = $self->{'gsdlhome'};
3123    my $infodbtype = $self->{'infodbtype'};
3124
3125    # Obtain the collect dir
3126    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3127    my $site = $self->{'site'};
3128    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3129
3130   
3131    # look up additional args
3132    my $docid     = $self->{'d'};
3133    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3134      $gsdl_cgi->generate_error("No docid (d=...) specified.");
3135    }
3136   
3137    # Generate the dbkey
3138    my $metaname  = $self->{'metaname'};
3139    my $dbkey = "$docid.$metaname";
3140
3141    # To people who know $collect_tail please add some comments
3142    # Obtain the live gdbm_db path
3143    my $collect_tail = $collect;
3144    $collect_tail =~ s/^.*[\/|\\]//;
3145    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3146    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
3147
3148    # Remove the key
3149    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
3150    my $status = system($cmd);
3151    if ($status != 0) {
3152        # Catch error if gdbmdel failed
3153    my $mess = "Failed to set metadata key: $dbkey\n";
3154   
3155    $mess .= "PATH: $ENV{'PATH'}\n";
3156    $mess .= "cmd = $cmd\n";
3157    $mess .= "Exit status: $status\n";
3158    $mess .= "System Error Message: $!\n";
3159
3160    $gsdl_cgi->generate_error($mess);
3161    }
3162    else {
3163    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
3164    }
3165
3166}
3167
3168sub remove_live_metadata
3169{
3170    my $self = shift @_;
3171
3172    my $username  = $self->{'username'};
3173    my $collect   = $self->{'collect'};
3174    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3175    my $gsdlhome  = $self->{'gsdlhome'};
3176   
3177    if ($baseaction::authentication_enabled) {
3178    # Ensure the user is allowed to edit this collection   
3179    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3180    }
3181
3182    # Make sure the collection isn't locked by someone else
3183    $self->lock_collection($username, $collect);
3184
3185    $self->_remove_live_metadata(@_);
3186
3187    $self->unlock_collection($username, $collect);
3188}
3189
3190sub remove_metadata
3191{
3192    my $self = shift @_;
3193
3194    my $where = $self->{'where'};
3195    if(!$where) {
3196    $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
3197    return;
3198    }
3199
3200    my $username  = $self->{'username'};
3201    my $collect   = $self->{'collect'};
3202    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3203
3204    if ($baseaction::authentication_enabled) {
3205    # Ensure the user is allowed to edit this collection   
3206    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3207    }
3208
3209    # Make sure the collection isn't locked by someone else
3210    $self->lock_collection($username, $collect);
3211
3212    # check which directories need to be processed, specified in $where as
3213    # any combination of import|archives|index|live
3214    if($where =~ m/import/) {
3215    $self->_remove_import_metadata(@_);     
3216    }
3217    if($where =~ m/archives/) {
3218    $self->_remove_archives_metadata(@_);       
3219   }
3220    if($where =~ m/index/) {
3221    $self->_remove_index_metadata(@_);     
3222    }
3223
3224    # Release the lock once it is done
3225    $self->unlock_collection($username, $collect);
3226}
3227
3228# the internal version, without authentication
3229sub _remove_index_metadata
3230{   
3231    my $self = shift @_;
3232
3233    my $collect   = $self->{'collect'};
3234    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3235#    my $gsdlhome  = $self->{'gsdlhome'};
3236    my $infodbtype = $self->{'infodbtype'};
3237   
3238    # Obtain the collect dir
3239    my $site = $self->{'site'};
3240    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3241    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3242
3243        # look up additional args
3244    my $docid     = $self->{'d'};
3245    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3246      $gsdl_cgi->generate_error("No docid (d=...) specified.");
3247    }
3248    my $metaname  = $self->{'metaname'};
3249    my $metapos   = $self->{'metapos'};
3250    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
3251
3252    # To people who know $collect_tail please add some comments
3253    # Obtain the path to the database
3254    my $collect_tail = $collect;
3255    $collect_tail =~ s/^.*[\/|\\]//;
3256    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3257    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3258
3259    # Read the docid entry
3260    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3261
3262    # Check to make sure the key does exist
3263    if (!defined ($doc_rec->{$metaname})) {
3264        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
3265    }
3266
3267    # Obtain the specified metadata pos
3268    # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
3269    if(!defined $metapos && !defined $metavalue) {
3270        $metapos = 0;
3271    }
3272   
3273
3274    # consider check key is defined before deleting?
3275    # Loop through the metadata array and ignore the specified position
3276    my $filtered_metadata = [];
3277    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
3278    for (my $i=0; $i<$num_metadata_vals; $i++) {
3279    my $metaval = shift(@{$doc_rec->{$metaname}});
3280
3281    if (!defined $metavalue && $i != $metapos) {
3282        push(@$filtered_metadata,$metaval);
3283    }
3284   
3285    if(defined $metavalue && !($metavalue eq $metaval))
3286    {
3287        push(@$filtered_metadata,$metaval);
3288    }
3289    }
3290    $doc_rec->{$metaname} = $filtered_metadata;
3291
3292    ## Use the dbutil set_entry method instead of assuming the database is gdbm
3293    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3294
3295    if ($status != 0) {
3296    my $mess = "Failed to set metadata key: $docid\n";
3297   
3298    $mess .= "PATH: $ENV{'PATH'}\n";
3299    $mess .= "Exit status: $status\n";
3300    $mess .= "System Error Message: $!\n";
3301
3302    $gsdl_cgi->generate_error($mess);
3303    }
3304    else {
3305    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
3306    $mess .= "  $metaname";
3307    $mess .= "->[$metapos]" if (defined $metapos);
3308    $mess .= " ($metavalue)" if (defined $metavalue);
3309
3310    $gsdl_cgi->generate_ok_message($mess);
3311    }
3312
3313    #return $status; # in case calling functions have a use for this
3314}
3315
3316sub remove_index_metadata
3317{
3318    my $self = shift @_;
3319
3320    my $username  = $self->{'username'};
3321    my $collect   = $self->{'collect'};
3322    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3323#    my $gsdlhome  = $self->{'gsdlhome'};
3324   
3325    if ($baseaction::authentication_enabled) {
3326    # Ensure the user is allowed to edit this collection   
3327    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3328    }
3329
3330    # Obtain the collect dir
3331    my $site = $self->{'site'};
3332    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3333    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3334
3335    # Make sure the collection isn't locked by someone else
3336    $self->lock_collection($username, $collect);
3337
3338    $self->_remove_index_metadata(@_);
3339
3340    # Release the lock once it is done
3341    $self->unlock_collection($username, $collect);
3342}
3343
3344
3345# Was trying to reused the codes, but the functions need to be broken
3346# down more before they can be reused, otherwise there will be too
3347# much overhead and duplicate process...
3348sub insert_metadata
3349{
3350    my $self = shift @_;
3351   
3352    my $username  = $self->{'username'};
3353    my $collect   = $self->{'collect'};
3354    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3355    my $gsdlhome  = $self->{'gsdlhome'};
3356    my $infodbtype = $self->{'infodbtype'};
3357   
3358    # If the import metadata and gdbm database have been updated, we
3359    # need to insert some notification to warn user that the the text
3360    # they see at the moment is not indexed and require a rebuild.
3361    my $rebuild_pending_macro = "_rebuildpendingmessage_";
3362
3363    if ($baseaction::authentication_enabled) {
3364    # Ensure the user is allowed to edit this collection
3365    $self->authenticate_user($username, $collect);
3366    }
3367
3368    # Obtain the collect and archive dir   
3369    my $site = $self->{'site'};
3370    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3371    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3372    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3373
3374    # Make sure the collection isn't locked by someone else
3375    $self->lock_collection($username, $collect);
3376   
3377    # Check additional args
3378    my $docid = $self->{'d'};
3379    if (!defined($docid)) {
3380    $gsdl_cgi->generate_error("No document id is specified: d=...");
3381    }
3382    my $metaname = $self->{'metaname'};
3383    if (!defined($metaname)) {
3384    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3385    }
3386    my $metavalue = $self->{'metavalue'};
3387    if (!defined($metavalue) || $metavalue eq "") {
3388    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3389    }
3390    # make "accumulate" the default (less destructive, as won't actually
3391    # delete any existing values)
3392    my $metamode = "accumulate";
3393
3394    # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3395    my $metapos   = undef;
3396    my $prevmetavalue = undef;
3397
3398    #=======================================================================#
3399    # set_import_metadata [START]
3400    #=======================================================================#
3401    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3402    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3403    my $metadata_xml_file;
3404    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3405    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3406   
3407    # This now stores the full pathname
3408    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3409    $import_filename = &util::placeholders_to_abspath($import_filename);
3410   
3411    # figure out correct metadata.xml file [?]
3412    # Assuming the metadata.xml file is next to the source file
3413    # Note: This will not work if it is using the inherited metadata from the parent folder
3414    my ($import_tailname, $import_dirname)
3415    = File::Basename::fileparse($import_filename);
3416    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3417
3418    # Shane's escape characters
3419    $metavalue = pack "U0C*", unpack "C*", $metavalue;
3420    $metavalue =~ s/\,/&#44;/g;
3421    $metavalue =~ s/\:/&#58;/g;
3422    $metavalue =~ s/\|/&#124;/g;
3423    $metavalue =~ s/\(/&#40;/g;
3424    $metavalue =~ s/\)/&#41;/g;
3425    $metavalue =~ s/\[/&#91;/g;
3426    $metavalue =~ s/\\/&#92;/g;
3427    $metavalue =~ s/\]/&#93;/g;
3428    $metavalue =~ s/\{/&#123;/g;
3429    $metavalue =~ s/\}/&#125;/g;
3430    $metavalue =~ s/\"/&#34;/g;
3431    $metavalue =~ s/\`/&#96;/g;
3432    $metavalue =~ s/\n/_newline_/g;
3433
3434    # Edit the metadata.xml
3435    # Modified by Jeffrey from DL Consulting
3436    # Handle the case where there is one metadata.xml file for multiple FileSets
3437    # The XML filter needs to know whether it is in the right FileSet
3438    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3439    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3440    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3441                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3442    #=======================================================================#
3443    # set_import_metadata [END]
3444    #=======================================================================#
3445
3446
3447    #=======================================================================#
3448    # set_metadata (accumulate version) [START]
3449    #=======================================================================#
3450    # To people who know $collect_tail please add some comments
3451    # Obtain path to the database
3452    my $collect_tail = $collect;
3453    $collect_tail =~ s/^.*[\/|\\]//;
3454    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3455    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3456
3457    # Read the docid entry
3458    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3459
3460    # Protect the quotes
3461    $metavalue =~ s/\"/\\\"/g;
3462
3463    # Adds the pending macro
3464    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3465
3466    # If the metadata doesn't exist, create a new one
3467    if (!defined($doc_rec->{$metaname})){   
3468    $doc_rec->{$metaname} = [ $macro_metavalue ];
3469    }
3470    # Else, let's acculumate the values
3471    else {
3472        push(@{$doc_rec->{$metaname}},$macro_metavalue);
3473    }
3474
3475    ## Use the dbutil set_entry method instead of assuming the database is gdbm
3476    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3477
3478    if ($status != 0) {
3479        # Catch error if gdbmget failed
3480    my $mess = "Failed to set metadata key: $docid\n";
3481   
3482    $mess .= "PATH: $ENV{'PATH'}\n";
3483    $mess .= "Exit status: $status\n";
3484    $mess .= "System Error Message: $!\n";
3485
3486    $gsdl_cgi->generate_error($mess);
3487    }
3488    else {
3489    my $mess = "insert-metadata successful: Key[$docid]\n";
3490    $mess .= "  [In metadata.xml] $metaname";
3491    $mess .= " = $metavalue\n";
3492    $mess .= "  [In database] $metaname";
3493    $mess .= " = $macro_metavalue\n";
3494    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
3495        $gsdl_cgi->generate_ok_message($mess);
3496    }   
3497    #=======================================================================#
3498    # set_metadata (accumulate version) [END]
3499    #=======================================================================#
3500
3501    # Release the lock once it is done
3502    $self->unlock_collection($username, $collect);
3503}
3504
35051;
Note: See TracBrowser for help on using the browser.