root/main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm @ 27313

Revision 27313, 106.0 KB (checked in by ak19, 6 years ago)

Now the existing user comments to be displayed are all retrieved in one go using the new get_metadata_array subroutine in metadataaction.pm via the new gsajaxapi method getMetadataArray which take a JSON string and return one. This loads user comments much faster, and doesn't get that much slower if the number of comments stored in the index database gets larger. 2 bugfixes to metadataaction.pm's recently added get_metadata_array subroutine: if no metapos supplied it defaults to 0 like the other get_meta functions instead of defaulting to the keyword 'all'. The fieldnames in the JSON string returned also needed to be inside double quotes in order to be successfully parsed back into a JSON object on the Javascript side. Replaced the old loadUserComments() javascript function in style.dm, which now calls the new gsajaxapi.getMetadataArray() post method.

Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr   te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40    require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47    #GET METHODS
48    "get-import-metadata" => {
49        'compulsory-args' => [ "d", "metaname" ],
50        'optional-args'   => [ "metapos" ] },
51
52    "get-archives-metadata" => {
53        'compulsory-args' => [ "d", "metaname" ],
54        'optional-args'   => [ "metapos" ] },
55   
56    "get-index-metadata" => {
57        'compulsory-args' => [ "d", "metaname" ],
58        'optional-args'   => [ "metapos" ] },
59
60    "get-metadata" => { # alias for get-index-metadata
61        'compulsory-args' => [ "d", "metaname" ],
62        'optional-args'   => [ "metapos" ] },
63
64    "get-live-metadata" => {
65        'compulsory-args' => [ "d", "metaname" ],
66        'optional-args'   => [ ] },
67
68    "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69        'compulsory-args' => [ "json" ],
70        'optional-args'   => [ "where" ],
71        'help-string' => [
72        'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73        ]},
74
75    #SET METHODS
76    "set-live-metadata" => {
77        'compulsory-args' => [ "d", "metaname", "metavalue" ],
78        'optional-args'   => [ ] },
79
80    "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81        'compulsory-args' => [ "metaname", "metavalue" ],
82        'optional-args'   => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84    "set-index-metadata" => {
85        'compulsory-args' => [ "d", "metaname", "metavalue" ],
86        'optional-args'   => [ "metapos", "metamode" ] },
87
88    "set-archives-metadata" => {
89        'compulsory-args' => [ "d", "metaname", "metavalue" ],
90        'optional-args'   => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91   
92    "set-import-metadata" => {
93        'compulsory-args' => [ "metaname", "metavalue" ],
94        'optional-args'   => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
95                 
96    #SET METHODS (ARRAY)
97    "set-metadata-array" => {
98        'compulsory-args' => [ "where", "json" ],
99        'optional-args'   => [ ],
100        'help-string' => [
101        'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102       
103        'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108       
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
110                     
111    "set-archives-metadata-array" => {
112        'compulsory-args' => [ "json" ],
113        'optional-args'   => [ ] },
114       
115    "set-import-metadata-array" => {
116        'compulsory-args' => [ "json" ],
117        'optional-args'   => [ ] },
118
119    "set-index-metadata-array" => {
120        'compulsory-args' => [ "json" ],
121        'optional-args'   => [ ] },
122   
123    "set-live-metadata-array" => {
124        'compulsory-args' => [ "json" ],
125        'optional-args'   => [ ] },
126       
127    #REMOVE METHODS
128    "remove-import-metadata" => {
129        'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
130        'optional-args'   => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
131                     
132    "remove-archives-metadata" => {
133        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
134        'optional-args'   => [ "metapos", "metavalue" ] },
135
136    "remove-live-metadata" => {
137        'compulsory-args' => [ "d", "metaname" ],
138        'optional-args'   => [ ] },
139
140    "remove-index-metadata" => {
141        'compulsory-args' => [ "d", "metaname" ],
142        'optional-args'   => [ "metapos", "metavalue" ] },
143
144    "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
145        'compulsory-args' => [ "d", "metaname" ],
146        'optional-args'   => [ "where", "metapos", "metavalue" ] },
147
148    #INSERT METHODS
149    "insert-metadata" => {
150        'compulsory-args' => [ "d", "metaname", "metavalue" ],
151        'optional-args'   => [ ] }
152};
153
154
155sub new
156{
157    my $class = shift (@_);
158    my ($gsdl_cgi,$iis6_mode) = @_;
159
160    # Treat metavalue specially.  To transmit this through a GET request
161    # the Javascript side has url-encoded it, so here we need to decode
162    # it before proceeding
163
164    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
165    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
166    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
167
168    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
169    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
170
171    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
172
173    return bless $self, $class;
174}
175
176
177sub get_live_metadata
178{
179    my $self = shift @_;
180
181    my $username  = $self->{'username'};
182    my $collect   = $self->{'collect'};
183    my $gsdl_cgi  = $self->{'gsdl_cgi'};
184    my $gsdlhome  = $self->{'gsdlhome'};
185    my $infodbtype = $self->{'infodbtype'};
186   
187    # live metadata gets/saves value scoped (prefixed) by the current usename
188    # so (for now) let's not bother to enforce authentication
189
190    # Obtain the collect dir
191    my $site = $self->{'site'};
192    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
193    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
194
195    # Make sure the collection isn't locked by someone else
196    $self->lock_collection($username, $collect);
197
198    # look up additional args
199    my $docid  = $self->{'d'};
200    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
201       $gsdl_cgi->generate_error("No docid (d=...) specified.");
202    }
203
204    # Generate the dbkey
205    my $metaname  = $self->{'metaname'};
206    my $dbkey = "$docid.$metaname";
207
208    # To people who know $collect_tail please add some comments
209    # Obtain path to the database
210    my $collect_tail = $collect;
211    $collect_tail =~ s/^.*[\/|\\]//;
212    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
213    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
214   
215    # Obtain the content of the key
216    my $cmd = "gdbmget $infodb_file_path $dbkey";
217    if (open(GIN,"$cmd |") == 0) {
218        # Catch error if gdbmget failed
219    my $mess = "Failed to get metadata key: $metaname\n";
220    $mess .= "$!\n";
221
222    $gsdl_cgi->generate_error($mess);
223    }
224    else {
225    binmode(GIN,":utf8");
226        # Read everything in and concatenate them into $metavalue
227    my $metavalue = "";
228    my $line;
229    while (defined ($line=<GIN>)) {
230        $metavalue .= $line;
231    }
232    close(GIN);
233    chomp($metavalue); # Get rid off the tailing newlines
234    $gsdl_cgi->generate_ok_message("$metavalue");
235    }
236
237    # Release the lock once it is done
238    $self->unlock_collection($username, $collect);
239}
240
241# just calls the index version
242sub get_metadata
243{
244    my $self = shift @_;
245    $self->get_index_metadata(@_);
246}
247
248# JSON version that will get the requested metadata values
249# from the requested source (index, import, archives or live)
250# One of the params is a JSON string and the return value is JSON too
251# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
252sub get_metadata_array
253{
254    my $self = shift @_;
255
256    my $where = $self->{'where'};
257    if (!$where) { 
258    $where = "index"; # default behaviour is to get the values from index
259    }
260
261    # Only when setting metadata do we perform authentication and do we lock the collection
262    # not when getting metadata
263
264    # for get_meta_array, the where param can only be ONE of import, archives, index, live
265    if($where =~ m/index/) {
266    $self->_get_index_metadata_array(@_);
267    }
268    # still to be implemented for archives, import and live
269}
270
271# takes a JSON string and returns a JSON string
272# resulting string is of the form:
273# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
274sub _get_index_metadata_array
275{
276    my $self = shift @_;
277
278    my $collect   = $self->{'collect'};
279    my $gsdl_cgi  = $self->{'gsdl_cgi'};
280    my $site = $self->{'site'};
281    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
282
283    # look up additional args   
284    my $infodbtype = $self->{'infodbtype'};
285   
286    # To people who know $collect_tail please add some comments
287    # Obtain the path to the database
288    my $collect_tail = $collect;
289    $collect_tail =~ s/^.*[\/\\]//;
290    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
291    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
292
293    my $json_str      = $self->{'json'};
294    my $doc_array = decode_json $json_str;
295
296    my $json_result_str = "[";
297    my $first_doc_rec = 1;
298
299    foreach my $doc_array_rec ( @$doc_array ) {
300   
301    my $docid     = $doc_array_rec->{'docid'};
302   
303    if($first_doc_rec) {
304        $first_doc_rec = 0;
305    } else {
306        $json_result_str .= ",";
307    }
308    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
309
310    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
311    $json_result_str = $json_result_str . ",\"metatable\":[";
312
313    my $first_rec = 1;
314    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
315        if($first_rec) {
316        $first_rec = 0;
317        } else {
318        $json_result_str .= ",";       
319        }
320       
321        my $metaname  = $metatable_rec->{'metaname'};
322        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
323
324        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
325        if(!defined $metapos) {
326        $metapos = 0;
327        }
328
329         # Read the docid entry
330        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
331 
332        # Basically loop through and unescape_html the values
333        foreach my $k (keys %$doc_rec) {
334        my @escaped_v = ();
335        foreach my $v (@{$doc_rec->{$k}}) {
336            my $ev = &ghtml::unescape_html($v);
337            push(@escaped_v, $ev);
338        }
339        $doc_rec->{$k} = \@escaped_v;
340        }
341
342        # Obtain the specified metadata value(s)
343        my $metavalue;
344
345        if($metapos ne "all") { # get the value at a single metapos
346
347        $metavalue = $doc_rec->{$metaname}->[$metapos];     
348        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
349
350        } else {
351        my $first_metaval = 1;
352        $metapos = 0;
353        $metavalue = $doc_rec->{$metaname}->[$metapos];
354
355        while (defined $metavalue) {
356            if($first_metaval) {       
357            $first_metaval = 0;
358            } else {
359            $json_result_str .= ",";
360            }
361       
362            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
363
364            $metapos++;
365            $metavalue = $doc_rec->{$metaname}->[$metapos];         
366        }
367        }
368
369        $json_result_str .= "]}"; # close metavals array and metatable record
370    }
371   
372    $json_result_str .= "]}"; # close metatable array and docid record
373    }
374
375    $json_result_str .= "]"; # close array of docids
376
377    $gsdl_cgi->generate_ok_message($json_result_str);   
378}
379
380
381sub get_index_metadata
382{
383    my $self = shift @_;
384
385    my $username  = $self->{'username'};
386    my $collect   = $self->{'collect'};
387    my $gsdl_cgi  = $self->{'gsdl_cgi'};
388    my $gsdlhome  = $self->{'gsdlhome'};
389
390    # Authenticate user if it is enabled
391    if ($baseaction::authentication_enabled) {
392    # Ensure the user is allowed to edit this collection   
393    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
394    }
395
396    # Obtain the collect dir
397    my $site = $self->{'site'};
398    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
399    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
400
401    # Make sure the collection isn't locked by someone else
402    $self->lock_collection($username, $collect);
403
404    # look up additional args
405    my $docid     = $self->{'d'};
406    my $metaname  = $self->{'metaname'};
407    my $metapos   = $self->{'metapos'};
408    my $infodbtype = $self->{'infodbtype'};
409
410    # To people who know $collect_tail please add some comments
411    # Obtain path to the database
412    my $collect_tail = $collect;
413    $collect_tail =~ s/^.*[\/\\]//;
414    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
415    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
416
417    # Read the docid entry
418    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
419 
420    # Basically loop through and unescape_html the values
421    foreach my $k (keys %$doc_rec) {
422    my @escaped_v = ();
423    foreach my $v (@{$doc_rec->{$k}}) {
424        my $ev = &ghtml::unescape_html($v);
425        push(@escaped_v, $ev);
426    }
427    $doc_rec->{$k} = \@escaped_v;
428    }
429
430    # Obtain the specified metadata value
431    $metapos = 0 if (!defined $metapos);
432    my $metavalue = $doc_rec->{$metaname}->[$metapos];
433    $gsdl_cgi->generate_ok_message("$metavalue");
434   
435    # Release the lock once it is done
436    $self->unlock_collection($username, $collect);
437}
438
439
440sub get_import_metadata
441{
442    my $self = shift @_;
443
444    my $username  = $self->{'username'};
445    my $collect   = $self->{'collect'};
446    my $gsdl_cgi  = $self->{'gsdl_cgi'};
447    my $gsdlhome  = $self->{'gsdlhome'};
448
449    # Authenticate user if it is enabled
450    if ($baseaction::authentication_enabled) {
451        # Ensure the user is allowed to edit this collection
452        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
453    }
454
455    # Obtain the collect dir
456    my $site = $self->{'site'};
457    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
458    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
459
460    # Make sure the collection isn't locked by someone else
461    $self->lock_collection($username, $collect);
462
463    # look up additional args
464    my $docid     = $self->{'d'};
465    my $metaname  = $self->{'metaname'};
466    my $metapos = $self->{'metapos'};
467    $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
468
469    my $infodbtype = $self->{'infodbtype'};
470    if (!defined $docid)
471    {
472        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
473    }
474
475    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
476    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
477    my $metadata_xml_file;
478    my $import_filename = undef;
479   
480
481    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
482    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
483    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
484
485    # This now stores the full pathname
486    $import_filename = $doc_rec->{'src-file'}->[0];
487
488    # figure out correct metadata.xml file [?]
489    # Assuming the metadata.xml file is next to the source file
490    # Note: This will not work if it is using the inherited metadata from the parent folder
491    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
492    my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
493
494    $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
495
496    # Release the lock once it is done
497    $self->unlock_collection($username, $collect);
498}
499
500sub get_metadata_from_metadata_xml
501{
502    my $self = shift @_;
503    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
504   
505    my @rules =
506    (
507        _default => 'raw',
508        'Metadata' => \&gfmxml_metadata,
509        'FileName' => \&mxml_filename
510    );
511       
512    my $parser = XML::Rules->new
513    (
514        rules => \@rules,
515        output_encoding => 'utf8'
516    );
517   
518    my $xml_in = "";
519    if (!open(MIN,"<$metadata_xml_filename"))
520    {
521        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
522    }
523    else
524    {
525        # Read them in
526        my $line;
527        while (defined ($line=<MIN>)) {
528            $xml_in .= $line;
529        }
530        close(MIN);
531
532        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
533       
534        if(defined $parser->{'pad'}->{'metavalue'})
535        {
536            return $parser->{'pad'}->{'metavalue'};
537        }
538        else
539        {
540            return "";
541        }
542    }
543}
544
545sub gfmxml_metadata
546{
547    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
548
549    # no subsection support yet in metadata.xml
550
551    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
552    {
553        if (!defined $parser->{'parameters'}->{'poscount'})
554        {
555            $parser->{'parameters'}->{'poscount'} = 0;
556        }
557        else
558        {
559            $parser->{'parameters'}->{'poscount'}++;
560        }
561   
562        # gets the first value by default, since metapos defaults to 0
563        if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
564        {
565            if($parser->{'parameters'}->{'metapos'} > 0) {
566            print STDERR "@@@@ WARNING: non-zero metapos.\n";
567            print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at".$parser->{'parameters'}->{'metapos'}.".\n";
568            }
569            $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
570        }
571    }
572}
573
574sub get_archives_metadata
575{
576    my $self = shift @_;
577
578    my $username  = $self->{'username'};
579    my $collect   = $self->{'collect'};
580    my $gsdl_cgi  = $self->{'gsdl_cgi'};
581    my $gsdlhome  = $self->{'gsdlhome'};
582    my $infodbtype = $self->{'infodbtype'};
583
584    # Authenticate user if it is enabled
585    if ($baseaction::authentication_enabled) {
586        # Ensure the user is allowed to edit this collection       
587        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
588    }
589
590    # Obtain the collect dir
591    my $site = $self->{'site'};
592    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
593   
594    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
595
596    # Make sure the collection isn't locked by someone else
597    $self->lock_collection($username, $collect);
598   
599    # look up additional args
600    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
601    $docid_secnum = "" if (!defined $docid_secnum);
602   
603    my $metaname = $self->{'metaname'};
604    my $metapos = $self->{'metapos'};
605    $metapos = 0 if (!defined $metapos);
606   
607    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
608    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
609
610    # This now stores the full pathname
611    my $doc_filename = $doc_rec->{'doc-file'}->[0];
612
613    $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
614
615    # Release the lock once it is done
616    $self->unlock_collection($username, $collect);
617}
618
619sub get_metadata_from_archive_xml
620{
621    my $self = shift @_;
622    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
623   
624    my @start_rules = ('Section' => \&dxml_start_section);
625   
626    my @rules =
627    (
628        _default => 'raw',
629        'Metadata' => \&gfdxml_metadata
630    );
631       
632    my $parser = XML::Rules->new
633    (
634        start_rules => \@start_rules,
635        rules => \@rules,
636        output_encoding => 'utf8'
637    );
638   
639    my $xml_in = "";
640    if (!open(MIN,"<$doc_xml_filename"))
641    {
642        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
643    }
644    else
645    {
646        # Read them in
647        my $line;
648        while (defined ($line=<MIN>)) {
649            $xml_in .= $line;
650        }
651        close(MIN);
652
653        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
654       
655        if(defined $parser->{'pad'}->{'metavalue'})
656        {
657            return $parser->{'pad'}->{'metavalue'};
658        }
659        else
660        {
661            return "";
662        }
663    }
664}
665
666sub gfdxml_metadata
667{
668    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
669   
670    if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
671    {
672        return;
673    }
674
675    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
676    {
677        if (!defined $parser->{'parameters'}->{'poscount'})
678        {
679            $parser->{'parameters'}->{'poscount'} = 0;
680        }
681        else
682        {
683            $parser->{'parameters'}->{'poscount'}++;
684        }
685    }
686
687    if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
688    {   
689        $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
690    }
691}
692
693sub _set_live_metadata
694{
695    my $self = shift @_;
696
697    my $collect   = $self->{'collect'};
698    my $gsdl_cgi  = $self->{'gsdl_cgi'};
699    #my $gsdlhome  = $self->{'gsdlhome'};
700    my $infodbtype = $self->{'infodbtype'};
701
702    # Obtain the collect dir
703    my $site = $self->{'site'};
704    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
705    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
706
707
708    # look up additional args
709    my $docid     = $self->{'d'};
710    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
711      $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
712    }
713    my $metavalue = $self->{'metavalue'};
714
715    # Generate the dbkey   
716    my $metaname  = $self->{'metaname'};
717    my $dbkey = "$docid.$metaname";
718
719    # To people who know $collect_tail please add some comments
720    # Obtain path to the database
721    my $collect_tail = $collect;
722    $collect_tail =~ s/^.*[\/\\]//;
723    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
724    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
725
726    # Set the new value
727    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
728    my $status = system($cmd);
729    if ($status != 0) {
730        # Catch error if gdbmget failed
731    my $mess = "Failed to set metadata key: $dbkey\n";
732
733    $mess .= "PATH: $ENV{'PATH'}\n";
734    $mess .= "cmd = $cmd\n";
735    $mess .= "Exit status: $status\n";
736    $mess .= "System Error Message: $!\n";
737
738    $gsdl_cgi->generate_error($mess);
739    }
740    else {
741    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
742    }
743
744    #return $status; # in case calling functions have any further use for this
745}
746
747sub set_live_metadata
748{
749    my $self = shift @_;
750
751    my $username  = $self->{'username'};
752    my $collect   = $self->{'collect'};
753    my $gsdl_cgi  = $self->{'gsdl_cgi'};
754 
755    if ($baseaction::authentication_enabled) {
756    # Ensure the user is allowed to edit this collection   
757    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
758    }
759
760    # Make sure the collection isn't locked by someone else
761    $self->lock_collection($username, $collect);
762
763    $self->_set_live_metadata(@_);
764
765    # Release the lock once it is done
766    $self->unlock_collection($username, $collect);
767}
768
769sub set_index_metadata_entry
770{
771    my $self = shift @_;
772    my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
773   
774    # To people who know $collect_tail please add some comments
775    # Obtain path to the database
776    my $collect_tail = $collect;
777    $collect_tail =~ s/^.*[\/\\]//;
778    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
779    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
780   
781#   print STDERR "**** infodb file path = $infodb_file_path\n";
782#   print STDERR "***** infodb type = $infodbtype\n";
783   
784    # Read the docid entry
785    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
786   
787    # Set the metadata value
788    if (defined $metapos) {
789    # if metamode=accumulate AND metapos, warn user and then use metapos
790    if (defined $metamode && $metamode eq "accumulate") {
791        print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
792        print STDERR "**** Proceeding by using metapos\n";
793    }
794    $doc_rec->{$metaname}->[$metapos] = $metavalue;
795    }
796    elsif (defined $metamode && $metamode eq "override") {
797    $doc_rec->{$metaname} = [ $metavalue ];
798    }
799    else { # default for index was to override, but because accumulate is less destructive,
800    # and because accumulate is the default for archives and import, that's the new default for index too
801    if(defined $doc_rec->{$metaname}) {
802        push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
803    } else {
804        $doc_rec->{$metaname} = [ $metavalue ];
805    }
806    }
807 
808    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
809   
810    return $status;
811   
812}
813
814sub _set_import_metadata
815{
816    my $self = shift @_;
817
818    my $collect   = $self->{'collect'};
819    my $gsdl_cgi  = $self->{'gsdl_cgi'};
820    my $infodbtype = $self->{'infodbtype'};
821#    my $gsdlhome  = $self->{'gsdlhome'};
822     
823    # Obtain the collect and archive dir   
824    my $site = $self->{'site'};
825    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
826    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
827    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
828   
829    # look up additional args
830    # want either d= or f=
831    my $docid  = $self->{'d'};
832    my ($docid_root,$docid_secnum);
833    if(defined $docid) {   
834    ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
835    # as yet no support for setting subsection metadata in metadata.xml
836    if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
837        $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
838        return;
839    }
840    }
841
842    my $import_file  = $self->{'f'};
843    if ((!defined $docid) && (!defined $import_file)) {
844    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
845    }
846
847    # Get the parameters and set default mode to "accumulate"
848    my $metaname   = $self->{'metaname'};
849    my $metavalue  = $self->{'metavalue'};
850##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
851    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
852   
853    my $metamode   = $self->{'metamode'};
854    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
855    # make "accumulate" the default (less destructive, as won't actually
856    # delete any existing values)
857    $metamode = "accumulate";
858    }
859
860    # adding metapos and prevmetavalue support to import_metadata subroutines
861    my $metapos   = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
862    my $prevmetavalue = $self->{'prevmetavalue'};
863
864    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
865    my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
866
867    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
868    $mess .= "  $metaname";
869    $mess .= " = $metavalue";
870    $mess .= " ($metamode)\n";
871
872    $gsdl_cgi->generate_ok_message($mess);
873
874    #return $status; # in case calling functions have any further use for this
875}
876
877# the version of set_index_meta that doesn't do authentication
878sub _set_archives_metadata
879{
880    my $self = shift @_;
881
882    my $collect   = $self->{'collect'};
883    my $gsdl_cgi  = $self->{'gsdl_cgi'};
884    my $infodbtype = $self->{'infodbtype'};
885   
886    # Obtain the collect and archive dir   
887    my $site = $self->{'site'};
888    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
889    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
890
891    # look up additional args
892    my $docid  = $self->{'d'};
893    my $metaname   = $self->{'metaname'};
894    my $metavalue  = $self->{'metavalue'};
895    my $prevmetavalue = $self->{'prevmetavalue'};
896   
897    my $metapos    = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
898                                  # Don't append "|| undef", since if metapos=0 it will then be set to undef
899
900    my $metamode   = $self->{'metamode'};
901    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
902    # make "accumulate" the default (less destructive, as won't actually
903    # delete any existing values)
904    $metamode = "accumulate";
905    }
906
907    my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
908                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
909   
910    if ($status == 0) {
911    my $mess = "set-archives-metadata successful: Key[$docid]\n";
912    $mess .= "  $metaname";
913    $mess .= "->[$metapos]" if (defined $metapos);
914    $mess .= " = $metavalue";
915    $mess .= " ($metamode)\n";
916   
917    $gsdl_cgi->generate_ok_message($mess); 
918    }
919    else {
920    my $mess .= "Failed to set archives metadata key: $docid\n";
921    $mess .= "Exit status: $status\n";
922    if(defined $self->{'error_msg'}) {
923        $mess .= "Error Message: $self->{'error_msg'}\n";
924    } else {
925        $mess .= "System Error Message: $!\n";
926    }
927    $mess .= "-" x 20 . "\n";
928   
929    $gsdl_cgi->generate_error($mess);
930    }
931
932    #return $status; # in case calling functions have any further use for this
933}
934
935
936# the version of set_index_meta that doesn't do authentication
937sub _set_index_metadata
938{
939    my $self = shift @_;
940
941    my $collect   = $self->{'collect'};
942    my $gsdl_cgi  = $self->{'gsdl_cgi'};
943
944    my $site = $self->{'site'};
945    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
946
947    # look up additional args
948    my $docid     = $self->{'d'};
949    my $metaname  = $self->{'metaname'};
950    my $metapos   = $self->{'metapos'}; # undef has meaning
951    my $metavalue = $self->{'metavalue'};
952    my $infodbtype = $self->{'infodbtype'};
953    my $metamode  = $self->{'metamode'};   
954
955    my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
956   
957    if ($status != 0) {
958        # Catch error if set infodb entry failed
959    my $mess = "Failed to set metadata key: $docid\n";
960   
961    $mess .= "PATH: $ENV{'PATH'}\n";
962    $mess .= "Exit status: $status\n";
963    $mess .= "System Error Message: $!\n";
964   
965    $gsdl_cgi->generate_error($mess);
966    }
967    else {
968    my $mess = "set-index-metadata successful: Key[$docid]\n";
969    $mess .= "  $metaname";
970    $mess .= "->[$metapos]" if (defined $metapos);
971    $mess .= " = $metavalue\n";
972   
973    $gsdl_cgi->generate_ok_message($mess);
974    }
975
976    #return $status; # in case calling functions have any further use for this
977}
978
979sub set_index_metadata
980{
981    my $self = shift @_;
982
983    my $username  = $self->{'username'};
984    my $collect   = $self->{'collect'};
985    my $gsdl_cgi  = $self->{'gsdl_cgi'};
986    #my $gsdlhome  = $self->{'gsdlhome'};
987
988    if ($baseaction::authentication_enabled) {
989    # Ensure the user is allowed to edit this collection   
990    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
991    }
992
993    my $site = $self->{'site'};
994    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
995   
996    $gsdl_cgi->checked_chdir($collect_dir);
997
998    # Obtain the collect dir
999    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1000
1001    # Make sure the collection isn't locked by someone else
1002    $self->lock_collection($username, $collect);
1003
1004    $self->_set_index_metadata(@_);
1005   
1006    # Release the lock once it is done
1007    $self->unlock_collection($username, $collect);
1008}
1009
1010# call this to set the metadata for a combination of dirs archives, import or index, or live
1011# if none specified, defaults to index which was the original behaviour of set_metadata.
1012sub set_metadata
1013{
1014    my $self = shift @_;
1015
1016    # Testing that not defining a variable, setting it to "" or to "  " all return false
1017    # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
1018
1019    my $where = $self->{'where'};
1020    if(!$where) {   
1021    $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
1022    return;
1023    }
1024
1025    # authenticate and lock collection once, even if processing multiple dirs
1026    my $username  = $self->{'username'};
1027    my $collect   = $self->{'collect'};
1028    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1029   
1030    if ($baseaction::authentication_enabled) {
1031    # Ensure the user is allowed to edit this collection   
1032    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1033    }
1034   
1035    if($where =~ m/index/) {
1036    my $site = $self->{'site'};
1037    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1038    $gsdl_cgi->checked_chdir($collect_dir);
1039    }
1040
1041    # Make sure the collection isn't locked by someone else
1042    $self->lock_collection($username, $collect);
1043
1044
1045    # now at last can set the metadata. $where can specify multiple
1046    # $where is of the form: import|archives|index, or a subset thereof
1047
1048    #my @whichdirs = split('\|', $where);
1049
1050    # just check whether $where contains import/archives/index/live in turn, and
1051    # for each case, process it accordingly
1052    if($where =~ m/import/) {
1053    $self->_set_import_metadata(@_);       
1054    }
1055
1056    if($where =~ m/archives/) {
1057
1058    # look up docID arg which is optional to set_metadata because it's optional
1059    # to set_import, but which is compulsory to set_archives_metadata
1060    my $docid     = $self->{'d'};
1061    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1062        $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1063    }
1064    # we have a docid, so can set archives meta
1065    $self->_set_archives_metadata(@_); 
1066    }
1067
1068    if($where =~ m/index/) {
1069   
1070    # look up docID arg which is optional to set_metadata because it's optional
1071    # to set_import, but which is compulsory to set_archives_metadata
1072    my $docid     = $self->{'d'};
1073    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1074        $gsdl_cgi->generate_error("No docid (d=...) specified.");
1075    }
1076    # we have a docid, so can set index meta
1077    $self->_set_index_metadata(@_);
1078    }   
1079
1080    if($where =~ m/live/) {
1081    $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1082    }
1083
1084    # Release the lock once it is done
1085    $self->unlock_collection($username, $collect);
1086}
1087
1088sub set_metadata_array
1089{
1090    my $self = shift @_;
1091
1092    my $where = $self->{'where'};
1093    if(!$where) {   
1094    $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1095    return;
1096    }
1097
1098    my $username  = $self->{'username'};
1099    my $collect   = $self->{'collect'};
1100    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1101
1102    if ($baseaction::authentication_enabled) {
1103    # Ensure the user is allowed to edit this collection
1104    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1105    }
1106
1107    # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1108    # But including the stmt during this code reorganisation to preserve as-is what used to happen
1109    my $site = $self->{'site'};
1110    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1111    $gsdl_cgi->checked_chdir($collect_dir);
1112
1113    # Make sure the collection isn't locked by someone else
1114    $self->lock_collection($username, $collect);
1115
1116    if($where =~ m/import/) {
1117    $self->_set_import_metadata_array(@_);
1118    }
1119    if($where =~ m/archives/) {
1120    $self->_set_archives_metadata_array(@_);
1121    }
1122    if($where =~ m/index/) {
1123    $self->_set_index_metadata_array(@_);
1124    }
1125    if($where =~ m/live/) {
1126        $self->_set_live_metadata_array(@_);
1127    }
1128
1129    # Release the lock once it is done
1130    $self->unlock_collection($username, $collect);
1131}
1132
1133sub _set_index_metadata_array
1134{
1135    my $self = shift @_;
1136
1137    my $collect   = $self->{'collect'};
1138    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1139    my $site = $self->{'site'};
1140    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1141
1142   
1143    # look up additional args
1144   
1145    my $infodbtype = $self->{'infodbtype'};
1146   
1147    my $json_str      = $self->{'json'};
1148    my $doc_array = decode_json $json_str;
1149   
1150   
1151    my $global_status = 0;
1152    my $global_mess = "";
1153   
1154    my @all_docids = ();
1155   
1156    foreach my $doc_array_rec ( @$doc_array ) {
1157   
1158    my $status = -1;
1159    my $docid     = $doc_array_rec->{'docid'};
1160   
1161    push(@all_docids,$docid);
1162
1163    my $metaname  = $doc_array_rec->{'metaname'};
1164    if(defined $metaname) {
1165        my $metapos   = $doc_array_rec->{'metapos'}; # can legitimately be undef
1166        my $metavalue = $doc_array_rec->{'metavalue'};
1167        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1168
1169        $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1170    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1171        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1172       
1173        foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1174        $metaname  = $metatable_rec->{'metaname'};
1175        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1176        my $metapos = undef;
1177        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1178
1179        foreach my $metavalue ( @$metavals ) { # metavals is an array
1180            $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1181            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1182            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1183            # metavalue for this metaname will be lost
1184            $metamode = "accumulate";
1185            }
1186        }           
1187        }
1188    }
1189   
1190    if ($status != 0) {
1191        # Catch error if set infodb entry failed
1192        $global_status = $status;
1193        $global_mess .= "Failed to set metadata key: $docid\n";
1194        $global_mess .= "Exit status: $status\n";
1195        $global_mess .= "System Error Message: $!\n";
1196        $global_mess .= "-" x 20;
1197    }
1198    }
1199   
1200    if ($global_status != 0) {
1201    $global_mess .= "PATH: $ENV{'PATH'}\n";
1202    $gsdl_cgi->generate_error($global_mess);
1203    }
1204    else {
1205    my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1206    $gsdl_cgi->generate_ok_message($mess);
1207    }
1208}
1209
1210sub set_index_metadata_array
1211{
1212    my $self = shift @_;
1213
1214    my $username  = $self->{'username'};
1215    my $collect   = $self->{'collect'};
1216    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1217#    my $gsdlhome  = $self->{'gsdlhome'};
1218
1219    if ($baseaction::authentication_enabled) {
1220    # Ensure the user is allowed to edit this collection   
1221    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1222    }
1223
1224    my $site = $self->{'site'};
1225    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1226   
1227    $gsdl_cgi->checked_chdir($collect_dir);
1228
1229    # Obtain the collect dir
1230    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1231
1232    # Make sure the collection isn't locked by someone else
1233    $self->lock_collection($username, $collect);
1234
1235    $self->_set_index_metadata_array(@_);
1236
1237    # Release the lock once it is done
1238    $self->unlock_collection($username, $collect);
1239}
1240
1241# experimental, newly added in and untested
1242sub _set_live_metadata_array
1243{
1244    my $self = shift @_;
1245
1246    my $collect   = $self->{'collect'};
1247    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1248
1249    my $site = $self->{'site'};
1250    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1251
1252   
1253    # look up additional args
1254    my $infodbtype = $self->{'infodbtype'};
1255    # To people who know $collect_tail please add some comments
1256    # Obtain path to the database
1257    my $collect_tail = $collect;
1258    $collect_tail =~ s/^.*[\/\\]//;
1259    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1260    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1261
1262   
1263    my $json_str      = $self->{'json'};
1264    my $doc_array = decode_json $json_str;
1265   
1266   
1267    my $global_status = 0;
1268    my $global_mess = "";
1269   
1270    my @all_docids = ();
1271
1272
1273    foreach my $doc_array_rec ( @$doc_array ) {
1274   
1275    my $status = -1;
1276    my $docid     = $doc_array_rec->{'docid'};
1277
1278    push(@all_docids,$docid);
1279
1280    my $metaname  = $doc_array_rec->{'metaname'};
1281    if(defined $metaname) {
1282        my $dbkey = "$docid.$metaname";
1283        my $metavalue = $doc_array_rec->{'metavalue'};
1284
1285        # Set the new value
1286        my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1287        $status = system($cmd);
1288
1289    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1290        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1291        foreach my $metatable_rec ( @$metatable ) {
1292        $metaname  = $metatable_rec->{'metaname'};
1293        my $dbkey = "$docid.$metaname";
1294
1295        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1296        foreach my $metavalue ( @$metavals ) {
1297             my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1298             $status = system($cmd);
1299        }
1300        }
1301       
1302    }
1303
1304    if ($status != 0) {
1305        # Catch error if gdbmget failed
1306        $global_status = $status;
1307        $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1308        $global_mess .= "Exit status: $status\n";
1309        $global_mess .= "System Error Message: $!\n";
1310        $global_mess .= "-" x 20;
1311    }
1312    }
1313   
1314    if ($global_status != 0) {
1315    $global_mess .= "PATH: $ENV{'PATH'}\n";
1316    $gsdl_cgi->generate_error($global_mess);
1317    }
1318    else {
1319    my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1320    $gsdl_cgi->generate_ok_message($mess);
1321    }
1322}
1323
1324sub set_live_metadata_array
1325{
1326    my $self = shift @_;
1327
1328    my $username  = $self->{'username'};
1329    my $collect   = $self->{'collect'};
1330    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1331
1332    if ($baseaction::authentication_enabled) {
1333    # Ensure the user is allowed to edit this collection   
1334    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1335    }
1336
1337    my $site = $self->{'site'};
1338    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1339   
1340    $gsdl_cgi->checked_chdir($collect_dir);
1341
1342    # Make sure the collection isn't locked by someone else
1343    $self->lock_collection($username, $collect);
1344
1345    $self->_set_live_metadata_array(@_);
1346
1347    # Release the lock once it is done
1348    $self->unlock_collection($username, $collect);
1349}
1350
1351
1352sub dxml_metadata
1353{
1354    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1355    my $metaname = $parser->{'parameters'}->{'metaname'};
1356    my $metamode = $parser->{'parameters'}->{'metamode'};
1357   
1358    print STDERR "**** Processing closing </Metadata> tag\n";
1359   
1360    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1361   
1362    # Find the right metadata tag and checks if we are going to
1363    # override it
1364    #
1365    # Note: This over writes the first metadata block it
1366    # encountered. If there are multiple Sections in the doc.xml, it
1367    # might not behave as you would expect
1368
1369    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1370##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1371##    print STDERR "**** metamode = $metamode\n";
1372   
1373    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1374    {
1375        my $name_attr = $attrHash->{'name'};
1376        if (($name_attr eq $metaname) && ($metamode eq "override"))
1377        {
1378            if (!defined $parser->{'parameters'}->{'poscount'})
1379            {
1380                $parser->{'parameters'}->{'poscount'} = 0;
1381            }
1382            else
1383            {
1384                $parser->{'parameters'}->{'poscount'}++;
1385            }
1386           
1387            if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1388            {
1389                ##print STDERR "#### got match!!\n";
1390                # Get the value and override the current value
1391                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1392                $attrHash->{'_content'} = $metavalue;
1393               
1394                # Don't want it to wipe out any other pieces of metadata
1395                $parser->{'parameters'}->{'metamode'} = "done";
1396            }
1397            elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1398            {
1399                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1400                $attrHash->{'_content'} = $metavalue;
1401                $parser->{'parameters'}->{'metamode'} = "done";
1402            }
1403        }
1404    }
1405
1406    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1407    return [$tagname => $attrHash];
1408}
1409
1410# This method exists purely for catching invalid section numbers that the client
1411# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1412# we've seen all the Sections in the doc.xml, and none of their section nums matched
1413# if the metamode has not been set to 'done' by then.
1414sub dxml_archive
1415{
1416    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1417    my $metamode = $parser->{'parameters'}->{'metamode'};
1418   
1419    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1420    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1421   
1422#    print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1423   
1424    if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1425    print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1426
1427    my $metaname = $parser->{'parameters'}->{'metaname'};
1428    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1429   
1430    print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1431    print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1432    print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1433    print STDERR "\tin section $opt_doc_secnum.\n";
1434    $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1435    }
1436   
1437    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1438    return [$tagname => $attrHash];
1439}
1440
1441sub dxml_description
1442{
1443    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1444    my $metamode = $parser->{'parameters'}->{'metamode'};
1445
1446    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1447    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1448   
1449    print STDERR "**** Processing a closing </Description> tag \n";
1450#   print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1451   
1452    # Accumulate the metadata
1453
1454    # We'll be accumulating metadata at this point if we haven't found and therefore
1455    # haven't processed the metadata yet.
1456    # For subsections, this means that if we're at a matching subsection, but haven't
1457    # found the correct metaname to override in that subsection, we accumulate it as new
1458    # meta in the subsection by adding it to the current description.
1459    # If there's no subsection info for the metadata, it will accumulate at the top level
1460    # section description if we hadn't found a matching metaname to override at this point.
1461
1462    # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1463    # at the toplevel section. In the latter case, it means we want to process meta in the
1464    # toplevel section. So the eq check between the values below will work in all cases.
1465   
1466    # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1467    # this document has been provided. In that case, the metadata for that opt_doc_secnum
1468    # won't get attached/accumulated to any part of the doc, not even its top-level section.
1469
1470    if ($curr_secnum eq $opt_doc_secnum
1471        && ($metamode eq "accumulate" || $metamode eq "override")) {
1472        if ($metamode eq "override") {
1473        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
1474        }
1475
1476        # If we get to here and metamode is override, this means there
1477        # was no existing value to overide => treat as an append operation
1478       
1479        # Tack a new metadata tag on to the end of the <Metadata>+ block
1480        my $metaname = $parser->{'parameters'}->{'metaname'};
1481        my $metavalue = $parser->{'parameters'}->{'metavalue'};
1482       
1483        my $metadata_attr = {
1484        '_content' => $metavalue,
1485        'name' => $metaname,
1486        'mode' => "accumulate"
1487        };
1488       
1489        my $append_metadata = [ "Metadata" => $metadata_attr ];
1490        my $description_content = $attrHash->{'_content'};
1491       
1492        print "Appending metadata to doc.xml\n";
1493       
1494        if (ref($description_content)) {
1495        # got some existing interesting nested content
1496        push(@$description_content, "    ", $append_metadata ,"\n        ");
1497        }
1498        else {
1499        #description_content is most likely a string such as "\n"
1500        $attrHash->{'_content'} = [$description_content, "    ", $append_metadata ,"\n" ];
1501        }
1502       
1503        $parser->{'parameters'}->{'metamode'} = "done";
1504    }       
1505    else {
1506        # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1507##      print STDERR "**** NOT ACCUMULATE?!? \n";
1508    }
1509
1510    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1511    return [$tagname => $attrHash];
1512}
1513
1514
1515sub dxml_start_section
1516{
1517    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1518
1519    my $new_depth = scalar(@$contextArray);
1520
1521    print STDERR "**** START SECTION \n";
1522   
1523    if ($new_depth == 1) {
1524    $parser->{'parameters'}->{'curr_section_depth'} = 1;
1525    $parser->{'parameters'}->{'curr_section_num'}   = "";
1526    }
1527
1528    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
1529    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1530
1531    my $new_secnum;
1532
1533    if ($new_depth > $old_depth) {
1534    # child subsection
1535    $new_secnum = "$old_secnum.1";
1536    }
1537    elsif ($new_depth == $old_depth) {
1538    # sibling section => increase it's value by 1
1539    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1540    $tail_num++;
1541    $new_secnum = $old_secnum;
1542    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1543    }
1544    else {
1545    # back up to parent section => lopp off tail
1546    $new_secnum = $old_secnum;
1547    $new_secnum =~ s/\.\d+$//;
1548    }
1549
1550    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1551    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
1552   
1553    1;
1554}
1555
1556sub edit_xml_file
1557{
1558    my $self = shift @_;
1559    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1560
1561    # use XML::Rules to add it in (read in and out again)
1562    my $parser = XML::Rules->new(start_rules     => $start_rules,
1563                 rules           => $rules,
1564                 style           => 'filter',
1565                                 output_encoding => 'utf8' );
1566
1567    my $xml_in = "";
1568    if (!open(MIN,"<$filename")) {
1569    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1570    }
1571    else {
1572        # Read all the text in
1573    my $line;
1574    while (defined ($line=<MIN>)) {
1575        $xml_in .= $line;
1576    }
1577    close(MIN);
1578   
1579    my $MOUT;   
1580    if (!open($MOUT,">$filename")) {
1581        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1582    }
1583    else {
1584        # Matched lines will get handled by the call backs
1585##      my $xml_out = "";
1586
1587        binmode($MOUT,":utf8");
1588        $parser->filter($xml_in,$MOUT, $options);
1589
1590#       binmode(MOUT,":utf8");
1591#       print MOUT $xml_out;
1592        close($MOUT);       
1593    }
1594    }
1595
1596    # copy across any custom error information that was stored during parsing
1597    $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});   
1598}
1599
1600sub edit_doc_xml
1601{
1602    my $self = shift @_;
1603    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1604
1605    my $info_mess = <<RAWEND;
1606****************************
1607  edit_doc_xml()
1608****************************
1609doc_xml_filename = $doc_xml_filename
1610metaname = $metaname
1611metavalue = $metavalue
1612metapos = $metapos
1613metamode = $metamode
1614opt_secnum = $opt_secnum
1615prevmetavalue = $prevmetavalue
1616****************************
1617RAWEND
1618
1619    $gsdl_cgi->generate_message($info_mess);
1620   
1621    # To monitor which section/subsection number we are in
1622    my @start_rules =
1623    ( 'Section'    => \&dxml_start_section );
1624
1625    # use XML::Rules to add it in (read in and out again)
1626    # Set the call back functions
1627    my @rules =
1628    ( _default => 'raw',
1629      'Metadata'    => \&dxml_metadata,
1630      'Description' => \&dxml_description,
1631      'Archive'     => \&dxml_archive); # just for catching errors at end
1632     
1633    # Sets the parameters
1634    my $options = { 'metaname'  => $metaname,
1635            'metapos'   => $metapos,
1636            'metavalue' => $metavalue,
1637            'metamode'  => $metamode,
1638            'prevmetavalue' => $prevmetavalue };
1639           
1640    if (defined $opt_secnum) {
1641    $options->{'secnum'} = $opt_secnum;
1642    }
1643
1644    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1645}
1646
1647sub set_archives_metadata_entry
1648{
1649    my $self = shift @_;
1650    my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1651   
1652    my $info_mess = <<RAWEND;
1653****************************
1654  set_archives_metadata_entry()
1655****************************
1656archive_dir = $archive_dir
1657collect_dir = $collect_dir
1658collect = $collect
1659infodbtype = $infodbtype
1660docid = $docid
1661metaname = $metaname
1662metapos = $metapos
1663metavalue = $metavalue
1664metamode = $metamode
1665prevmetavalue = $prevmetavalue
1666****************************
1667RAWEND
1668
1669    $gsdl_cgi->generate_message($info_mess);
1670   
1671    # Obtain the doc.xml path for the specified docID
1672    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1673
1674    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1675    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1676    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1677   
1678    # The $doc_xml_file is relative to the archives, and now let's get the full path
1679    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
1680    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1681
1682    # If we're overriding everything, then $metamode=override combined with $metapos=undefined
1683    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1684    # Thereafter, we will finally be setting the overriding metavalue for this metaname
1685    if(!defined $metapos && $metamode eq "override") {
1686    $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, $metapos, undef, $docid_secnum, $metamode);
1687    }
1688
1689    # Edit the doc.xml file with the specified metadata name, value and position.
1690    # TODO: there is a potential problem here as this edit_doc_xml function
1691    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1692    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1693   
1694    # dxml_metadata method ignores metapos if metamode anything other than override
1695    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1696            $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1697
1698    # return 0; # return 0 for now to indicate no error
1699    return (defined $self->{'error_msg'}) ? 1 : 0;
1700}
1701
1702
1703sub set_archives_metadata
1704{
1705    my $self = shift @_;
1706
1707    my $username  = $self->{'username'};
1708    my $collect   = $self->{'collect'};
1709    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1710   
1711    if ($baseaction::authentication_enabled) {
1712    # Ensure the user is allowed to edit this collection
1713    $self->authenticate_user($username, $collect);
1714    }
1715
1716    # Make sure the collection isn't locked by someone else
1717    $self->lock_collection($username, $collect);
1718
1719    $self->_set_archives_metadata(@_);
1720
1721    # Release the lock once it is done
1722    $self->unlock_collection($username, $collect);
1723}
1724
1725sub _set_archives_metadata_array
1726{
1727    my $self = shift @_;
1728   
1729    my $collect   = $self->{'collect'};
1730    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1731    my $site = $self->{'site'};
1732    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1733
1734    # look up additional args
1735   
1736    my $infodbtype = $self->{'infodbtype'};
1737   
1738    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1739   
1740    my $json_str      = $self->{'json'};
1741    my $doc_array = decode_json $json_str;
1742   
1743   
1744    my $global_status = 0;
1745    my $global_mess = "";
1746   
1747    my @all_docids = ();
1748   
1749    foreach my $doc_array_rec ( @$doc_array ) {
1750    my $status    = -1;
1751    my $docid     = $doc_array_rec->{'docid'};
1752
1753    push(@all_docids,$docid);
1754   
1755    my $metaname  = $doc_array_rec->{'metaname'};
1756    if(defined $metaname) {
1757       
1758        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1759
1760        my $metamode  = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1761        my $metavalue = $doc_array_rec->{'metavalue'};
1762        my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1763       
1764       
1765        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1766        # make "accumulate" the default (less destructive, as it won't actually
1767        # delete any existing values)
1768        $metamode = "accumulate";
1769        }       
1770       
1771        $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1772                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1773    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1774        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1775       
1776        foreach my $metatable_rec ( @$metatable ) {
1777        $metaname  = $metatable_rec->{'metaname'};
1778        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1779        my $metapos = undef;
1780        my $prevmetavalue = undef;
1781        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1782       
1783        foreach my $metavalue ( @$metavals ) {
1784            $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1785                                 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1786           
1787            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1788            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1789            # metavalue for this metaname will be lost
1790            $metamode = "accumulate";
1791            }
1792        }           
1793        }       
1794    }
1795       
1796    if ($status != 0) {
1797        # Catch error if set infodb entry failed
1798        $global_status = $status;
1799        $global_mess .= "Failed to set metadata key: $docid\n";
1800        $global_mess .= "Exit status: $status\n";
1801        $global_mess .= "System Error Message: $!\n";
1802        $global_mess .= "-" x 20 . "\n";
1803    }
1804    }
1805   
1806    if ($global_status != 0) {
1807    $global_mess .= "PATH: $ENV{'PATH'}\n";
1808    $gsdl_cgi->generate_error($global_mess);
1809    }
1810    else {
1811    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1812    $gsdl_cgi->generate_ok_message($mess);
1813    }
1814}
1815
1816sub set_archives_metadata_array
1817{
1818    my $self = shift @_;
1819
1820    my $username  = $self->{'username'};
1821    my $collect   = $self->{'collect'};
1822    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1823#    my $gsdlhome  = $self->{'gsdlhome'};
1824
1825    if ($baseaction::authentication_enabled) {
1826    # Ensure the user is allowed to edit this collection   
1827    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1828    }
1829
1830    my $site = $self->{'site'};
1831    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1832   
1833    $gsdl_cgi->checked_chdir($collect_dir);
1834
1835    # Obtain the collect dir
1836    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1837
1838    # Make sure the collection isn't locked by someone else
1839    $self->lock_collection($username, $collect);
1840
1841    $self->_set_archives_metadata_array(@_);
1842   
1843    # Release the lock once it is done
1844    $self->unlock_collection($username, $collect);
1845}
1846
1847sub _remove_archives_metadata
1848{
1849    my $self = shift @_;
1850
1851    my $collect   = $self->{'collect'};
1852    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1853#   my $gsdlhome  = $self->{'gsdlhome'};
1854    my $infodbtype = $self->{'infodbtype'};
1855   
1856    my $site = $self->{'site'};
1857       
1858    # Obtain the collect and archive dir   
1859    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1860   
1861    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1862
1863    # look up additional args
1864    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1865   
1866    my $metaname = $self->{'metaname'};
1867    my $metapos = $self->{'metapos'};
1868
1869    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1870
1871    # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1872    # default to deleting the first metavalue for the given metaname
1873    # Beware that if both metapos AND metavalue are defined, both matches (if any)
1874    # seem to get deleted in one single remove_archives_meta action invocation.
1875    # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1876    # deleted, both get deleted.
1877    if(!defined $metapos && !defined $metavalue) {
1878        $metapos = 0;
1879    }
1880
1881    my $metamode = $self->{'metamode'} || undef;
1882
1883    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1884    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1885
1886    # This now stores the full pathname
1887    my $doc_filename = $doc_rec->{'doc-file'}->[0];
1888
1889    my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1890#   my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1891   
1892    if ($status == 0)
1893    {
1894        my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1895        $mess .= "  $metaname";
1896        $mess .= "->[$metapos]" if (defined $metapos);
1897
1898        $gsdl_cgi->generate_ok_message($mess); 
1899    }
1900    else
1901    {
1902        my $mess .= "Failed to remove archives metadata key: $docid\n";
1903        $mess .= "Exit status: $status\n";
1904        $mess .= "System Error Message: $!\n";
1905        $mess .= "-" x 20 . "\n";
1906       
1907        $gsdl_cgi->generate_error($mess);
1908    }
1909   
1910    #return $status; # in case calling functions have a use for this
1911}
1912
1913sub remove_archives_metadata
1914{
1915    my $self = shift @_;
1916
1917    my $username  = $self->{'username'};
1918    my $collect   = $self->{'collect'};
1919    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1920   
1921    if ($baseaction::authentication_enabled)
1922    {
1923        # Ensure the user is allowed to edit this collection       
1924        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1925    }
1926
1927    # Make sure the collection isn't locked by someone else
1928    $self->lock_collection($username, $collect);
1929
1930    $self->_remove_archives_metadata(@_);
1931
1932    # Release the lock once it is done
1933    $self->unlock_collection($username, $collect);
1934}
1935
1936sub remove_from_doc_xml
1937{
1938    my $self = shift @_;
1939    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1940   
1941    my @start_rules = ('Section' => \&dxml_start_section);
1942   
1943    # Set the call-back functions for the metadata tags
1944    my @rules =
1945    (
1946        _default => 'raw',
1947        'Metadata' => \&rfdxml_metadata
1948    );
1949       
1950    my $parser = XML::Rules->new
1951    (
1952        start_rules => \@start_rules,
1953        rules => \@rules,
1954        style => 'filter',
1955        output_encoding => 'utf8',
1956#    normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1957        stripspaces => 2|0|0 # ineffectual
1958    );
1959   
1960    my $status = 0;
1961    my $xml_in = "";
1962    if (!open(MIN,"<$doc_xml_filename"))
1963    {
1964        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1965        $status = 1;
1966    }
1967    else
1968    {
1969        # Read them in
1970        my $line;
1971        while (defined ($line=<MIN>)) {
1972            $xml_in .= $line;
1973        }
1974        close(MIN);
1975
1976        # Filter with the call-back functions
1977        my $xml_out = "";
1978
1979        my $MOUT;
1980        if (!open($MOUT,">$doc_xml_filename")) {
1981            $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1982            $status = 1;
1983        }
1984        else {
1985            binmode($MOUT,":utf8");
1986            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1987            close($MOUT);       
1988        }
1989    }
1990    return $status;
1991}
1992
1993sub rfdxml_metadata
1994{
1995    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1996
1997    # For comparisons, toplevel section is indicated by ""
1998    my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1999    my $secid = $parser->{'parameters'}->{'secid'} || "";
2000
2001    if (!($secid eq $curr_sec_num))
2002    {
2003        # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2004        return [$tagname => $attrHash];
2005    }
2006
2007    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2008    {
2009        if (!defined $parser->{'parameters'}->{'poscount'})
2010        {
2011            $parser->{'parameters'}->{'poscount'} = 0;
2012        }
2013        else
2014        {
2015            $parser->{'parameters'}->{'poscount'}++;
2016        }
2017       
2018        # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
2019        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {         
2020            return [];
2021        }
2022    }
2023
2024    if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2025    {   
2026        return [];
2027    }
2028
2029    if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
2030    {   
2031        return [];
2032    }
2033   
2034    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2035    return [$tagname => $attrHash];
2036}
2037
2038sub mxml_metadata
2039{
2040    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2041    my $metaname = $parser->{'parameters'}->{'metaname'};
2042    my $metamode = $parser->{'parameters'}->{'metamode'};
2043
2044    # Report error if we don't see FileName tag before this
2045    die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2046   
2047    # Don't do anything if we are not in the right FileSet
2048    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2049    if ($file_regexp =~ /\.\*/) {
2050    # Only interested in a file_regexp if it specifies precisely one
2051    # file. 
2052    # So, skip anything with a .* in it as it is too general
2053##  print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
2054    return [$tagname => $attrHash];
2055    }
2056    my $src_file = $parser->{'parameters'}->{'src_file'};
2057    if (!($src_file =~ /$file_regexp/)) {
2058    return [$tagname => $attrHash];
2059    }
2060##    print STDERR "*** mxl metamode = $metamode\n";
2061
2062    # Find the right metadata tag and checks if we are going to override it
2063    my $name_attr = $attrHash->{'name'};
2064    if (($name_attr eq $metaname) && ($metamode eq "override")) {
2065
2066    # now metadata.xml functions need to keep track of metapos
2067    if (!defined $parser->{'parameters'}->{'poscount'})
2068    {
2069        $parser->{'parameters'}->{'poscount'} = 0;
2070    }
2071    else
2072    {
2073        $parser->{'parameters'}->{'poscount'}++;
2074    }
2075
2076    # If either the metapos or prevmetavalue is set,
2077        # get the value and override the current value
2078    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2079
2080    if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2081    {
2082        $attrHash->{'_content'} = $metavalue;
2083
2084        ##  print STDERR "**** overriding metadata.xml\n";
2085       
2086        # Don't want it to wipe out any other pieces of metadata
2087        $parser->{'parameters'}->{'metamode'} = "done";
2088    }
2089    elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2090    {
2091        $attrHash->{'_content'} = $metavalue;
2092        $parser->{'parameters'}->{'metamode'} = "done";
2093    }
2094    }
2095
2096    # mxml_description will process the metadata if metadata is accumulate,
2097    # or if we haven't found the metadata to override
2098
2099    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2100    return [$tagname => $attrHash];
2101}
2102
2103
2104sub mxml_description
2105{
2106    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2107    my $metamode = $parser->{'parameters'}->{'metamode'};   
2108
2109    # Failed... Report error if we don't see FileName tag before this
2110    die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2111
2112    # Don't do anything if we are not in the right FileSet
2113    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2114    if ($file_regexp =~ m/\.\*/) {
2115    # Only interested in a file_regexp if it specifies precisely one
2116    # file. 
2117    # So, skip anything with a .* in it as it is too general
2118    return [$tagname => $attrHash];
2119    }
2120    my $src_file = $parser->{'parameters'}->{'src_file'};
2121   
2122    if (!($src_file =~ m/$file_regexp/)) {
2123    return [$tagname => $attrHash];
2124    }
2125
2126    # Accumulate the metadata block to the end of the description block
2127    # Note: This adds metadata block to all description blocks, so if there are
2128    # multiple FileSets, it will add to all of them
2129    if (($metamode eq "accumulate") || ($metamode eq "override")) {
2130
2131    # if metamode was "override" but get to here then it failed to
2132    # find an item to override, in which case it should append its
2133    # value to the end, just like the "accumulate" mode
2134
2135    if ($metamode eq "override") {
2136        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2137    }
2138
2139    # tack a new metadata tag on to the end of the <Metadata>+ block
2140    my $metaname = $parser->{'parameters'}->{'metaname'};
2141    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2142   
2143    my $metadata_attr = { '_content' => $metavalue,
2144                  'name'     => $metaname,
2145                  'mode'     => "accumulate" };
2146
2147    my $append_metadata = [ "Metadata" => $metadata_attr ];
2148    my $description_content = $attrHash->{'_content'};
2149   
2150##  print STDERR "*** appending to metadata.xml\n";
2151
2152    # append the new metadata element to the end of the current
2153    # content contained inside this tag
2154    if (ref($description_content) eq "") {
2155        # => string or numeric literal
2156        # this is caused by a <Description> block has no <Metadata> child elements
2157        # => set up an empty array in '_content'
2158        $attrHash->{'_content'} = [ "\n" ];
2159        $description_content = $attrHash->{'_content'};
2160    }
2161
2162    push(@$description_content,"    ", $append_metadata ,"\n        ");
2163    $parser->{'parameters'}->{'metamode'} = "done";
2164    }
2165
2166    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2167    return [$tagname => $attrHash];
2168}
2169
2170
2171sub mxml_filename
2172{
2173    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2174
2175    # Store the filename of the Current Fileset
2176    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2177    # FileName tag must come before Description tag
2178    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2179
2180    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2181    return [$tagname => $attrHash];
2182}
2183
2184
2185sub mxml_fileset
2186{
2187    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2188
2189    # Initilise the current_file
2190    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2191    # FileName tag must come before Description tag
2192    $parser->{'parameters'}->{'current_file'} = "";
2193
2194    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2195    return [$tagname => $attrHash];
2196}
2197
2198sub mxml_directorymetadata
2199{
2200    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2201
2202    # if we haven't processed the metadata when we reach the end of metadata.xml
2203    # it's because there's no particular FileSet element whose FileName matched
2204    # In which case, add a new FileSet for this FileName
2205    my $metamode = $parser->{'parameters'}->{'metamode'};
2206    if($metamode ne "done") {
2207   
2208    if ($metamode eq "override") {
2209        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2210    }
2211
2212    # If we get to here and metamode is override, this means there
2213    # was no existing value to overide => treat as an append operation
2214   
2215    # Create a new FileSet element and append to DirectoryMetadata
2216    # <FileSet>
2217    # <FileName>src_file</FileName>
2218    # <Description>
2219    # <Metadata mode="" name="">metavalue</Metadata>
2220    # </Description>
2221    # </FileSet>
2222    my $src_file = $parser->{'parameters'}->{'src_file'};
2223    my $metaname = $parser->{'parameters'}->{'metaname'};
2224    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2225    my $metadata_attr = {
2226        '_content' => $metavalue,
2227        'name' => $metaname,
2228        'mode' => "accumulate"
2229    };
2230    my $append_metadata = [ "Metadata" => $metadata_attr ];
2231    my $description_attr->{'_content'} = [ "\n\t\t   ", $append_metadata, "\n\t\t"];
2232    my $description_element = [ "Description" => $description_attr ];
2233
2234    #_content is not an attribute, it's special and holds the children of this element
2235    # including the textnode value embedded in this element if any.
2236    my $filename_attr = {'_content' => $src_file};
2237    my $filename_element = [ "FileName" => $filename_attr ];
2238
2239    my $fileset_attr = {};
2240    $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2241    my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2242   
2243   
2244    # get children of dirmeta, and push the new FileSet element onto it
2245    print "Appending metadata to metadata.xml\n";
2246    my $dirmeta_content = $attrHash->{'_content'};
2247    if (ref($dirmeta_content)) {
2248        # got some existing interesting nested content
2249        #push(@$dirmeta_content, "    ", $fileset ,"\n        ");
2250        push(@$dirmeta_content, "\t", $fileset ,"\n");
2251    }
2252    else {
2253        #description_content is most likely a string such as "\n"
2254        #$attrHash->{'_content'} = [$dirmeta_content, "    ", $fileset ,"\n" ];
2255        $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2256    }   
2257
2258    $parser->{'parameters'}->{'metamode'} = "done";
2259    }
2260    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2261    return [$tagname => $attrHash];
2262}
2263
2264
2265sub edit_metadata_xml
2266{
2267    my $self = shift @_;
2268    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2269
2270    # Set the call-back functions for the metadata tags
2271    my @rules =
2272    ( _default => 'raw',
2273          'FileName' => \&mxml_filename,
2274      'Metadata' => \&mxml_metadata,
2275      'Description' => \&mxml_description,
2276          'FileSet' => \&mxml_fileset,
2277      'DirectoryMetadata' => \&mxml_directorymetadata);
2278
2279    # use XML::Rules to add it in (read in and out again)
2280    my $parser = XML::Rules->new(rules => \@rules,
2281                 style => 'filter',
2282                                 output_encoding => 'utf8',
2283                 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2284
2285    if (!-e $metadata_xml_filename) {
2286   
2287        if (open(MOUT,">$metadata_xml_filename")) {
2288           
2289            my $src_file_re = &util::filename_to_regex($src_file);
2290            # shouldn't the following also be in the above utility routine??
2291            # $src_file_re =~ s/\./\\./g;
2292       
2293            print MOUT "<?xml version=\"1.0\"?>\n";
2294            print MOUT "<DirectoryMetadata>\n";
2295            print MOUT " <FileSet>\n";
2296            print MOUT "  <FileName>$src_file_re</FileName>\n";
2297            print MOUT "  <Description>\n";
2298            print MOUT "  </Description>\n";
2299            print MOUT " </FileSet>\n";
2300            print MOUT "</DirectoryMetadata>\n";
2301
2302            close(MOUT);
2303        }
2304        else {
2305            $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2306        }
2307    }
2308   
2309   
2310    my $xml_in = "";
2311    if (!open(MIN,"<$metadata_xml_filename")) {
2312        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2313    }
2314    else {
2315        # Read them in
2316        my $line;
2317        while (defined ($line=<MIN>)) {
2318            $xml_in .= $line;
2319        }
2320        close(MIN);
2321
2322        # Filter with the call-back functions
2323        my $xml_out = "";
2324
2325        my $MOUT;
2326        if (!open($MOUT,">$metadata_xml_filename")) {
2327            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2328        }
2329        else {
2330            binmode($MOUT,":utf8");
2331
2332            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2333            # At the moment, I will just hack it!
2334            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2335            #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2336            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2337            #print MOUT $xml_out;
2338
2339            $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2340                              metapos => $metapos,
2341                      metavalue => $metavalue,
2342                      metamode => $metamode,
2343                      src_file => $src_file,
2344                      prevmetavalue => $prevmetavalue,
2345                      current_file => undef} );
2346            close($MOUT);       
2347        }
2348   }
2349}
2350
2351
2352sub set_import_metadata
2353{
2354    my $self = shift @_;
2355   
2356    my $username  = $self->{'username'};
2357    my $collect   = $self->{'collect'};
2358    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2359   
2360    if ($baseaction::authentication_enabled) {
2361    # Ensure the user is allowed to edit this collection
2362    $self->authenticate_user($username, $collect);
2363    }
2364
2365    # Make sure the collection isn't locked by someone else
2366    $self->lock_collection($username, $collect);
2367 
2368    $self->_set_import_metadata(@_);
2369
2370    # Release the lock once it is done
2371    $self->unlock_collection($username, $collect);
2372   
2373}
2374
2375sub set_import_metadata_array
2376{
2377    my $self = shift @_;
2378
2379    my $username  = $self->{'username'};
2380    my $collect   = $self->{'collect'};
2381    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2382#    my $gsdlhome  = $self->{'gsdlhome'};
2383
2384    if ($baseaction::authentication_enabled) {
2385    # Ensure the user is allowed to edit this collection   
2386    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2387    }
2388
2389    my $site = $self->{'site'};
2390    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2391   
2392    $gsdl_cgi->checked_chdir($collect_dir);
2393
2394    # Make sure the collection isn't locked by someone else
2395    $self->lock_collection($username, $collect);
2396
2397    $self->_set_import_metadata_array(@_);
2398
2399    # Release the lock once it is done
2400    $self->unlock_collection($username, $collect);
2401
2402}
2403
2404
2405sub _set_import_metadata_array
2406{
2407    my $self = shift @_;
2408
2409    my $collect   = $self->{'collect'};
2410    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2411
2412    my $site = $self->{'site'};
2413    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2414   
2415    # look up additional args
2416   
2417    my $infodbtype = $self->{'infodbtype'};
2418   
2419    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");   
2420    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2421   
2422    my $json_str = $self->{'json'};
2423    my $doc_array = decode_json $json_str;
2424   
2425    my $global_status = 0;
2426    my $global_mess = "";
2427   
2428    my @all_docids = ();
2429   
2430    foreach my $doc_array_rec ( @$doc_array )
2431    {
2432    my $status = -1;
2433    my $docid = $doc_array_rec->{'docid'};
2434   
2435    my ($docid_root,$docid_secnum);
2436    if(defined $docid) {   
2437        ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
2438        # as yet no support for setting subsection metadata in metadata.xml
2439        if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2440        $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2441        next; # skip this docid in for loop
2442        }
2443    }
2444
2445    push(@all_docids,$docid); # docid_root rather
2446   
2447    my $metaname = $doc_array_rec->{'metaname'};
2448    if (defined $metaname) {
2449        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2450        my $metavalue = $doc_array_rec->{'metavalue'};
2451        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2452
2453        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2454        # make "accumulate" the default (less destructive, as won't actually
2455        # delete any existing values)
2456        $metamode = "accumulate";
2457        }
2458
2459        # adding metapos and prevmetavalue support to import_metadata subroutines
2460        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2461        my $prevmetavalue = $self->{'prevmetavalue'};
2462
2463        $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2464       
2465    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2466        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2467       
2468        foreach my $metatable_rec ( @$metatable ) {
2469        $metaname  = $metatable_rec->{'metaname'};
2470        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2471        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2472            # make "accumulate" the default (less destructive, as won't actually
2473            # delete any existing values)
2474            $metamode = "accumulate";
2475        }
2476
2477        # No support for metapos and prevmetavalue in the JSON metatable substructure
2478        my $metapos = undef;
2479        my $prevmetavalue = undef;
2480        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2481       
2482        foreach my $metavalue ( @$metavals ) {
2483            $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2484
2485            $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2486            if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2487            # need to accumulate subsequent metavals for this metaname, else the just-assigned
2488            # metavalue for this metaname will be lost
2489            $metamode = "accumulate";
2490            }
2491        }
2492        }
2493    }       
2494    }
2495
2496    # always a success message
2497    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2498    $gsdl_cgi->generate_ok_message($mess);
2499}
2500
2501# always returns true (1)
2502sub set_import_metadata_entry
2503{
2504    my $self = shift @_;
2505    my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2506
2507    my $info_mess = <<RAWEND;
2508****************************
2509  set_import_metadata_entry()
2510****************************
2511collect = $collect
2512collect_dir = $collect_dir
2513infodbtype = $infodbtype
2514arcinfo_doc_filename = $arcinfo_doc_filename
2515docid = $docid
2516metaname = $metaname
2517metapos = $metapos
2518metavalue = $metavalue
2519metamode = $metamode
2520prevmetavalue = $prevmetavalue
2521****************************
2522RAWEND
2523
2524    $gsdl_cgi->generate_message($info_mess);
2525
2526    # import works with metadata.xml which can have inherited metadata
2527    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2528    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2529    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2530    # what they're doing if they provide a metapos.
2531    if(defined $metapos) {
2532    print STDERR "@@@@ WARNING: metapos defined.\n";
2533    print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2534    }
2535
2536    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2537    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2538    my $metadata_xml_file;
2539    my $import_filename = undef;
2540   
2541    if (defined $docid) {
2542    # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2543    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2544
2545    # This now stores the full pathname
2546    $import_filename = $doc_rec->{'src-file'}->[0];
2547    } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2548         # as the array version of the method doesn't support the -f parameter yet
2549    my $import_file  = $self->{'f'};
2550    $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2551    }
2552   
2553    # figure out correct metadata.xml file [?]
2554    # Assuming the metadata.xml file is next to the source file
2555    # Note: This will not work if it is using the inherited metadata from the parent folder
2556    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2557    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2558   
2559    # If we're overriding everything, then $metamode=override combined with $metapos=undefined
2560    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2561    # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2562    if(!defined $metapos && $metamode eq "override") {
2563##  print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2564    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2565
2566    }
2567
2568    # Edit the metadata.xml
2569    # Modified by Jeffrey from DL Consulting
2570    # Handle the case where there is one metadata.xml file for multiple FileSets
2571    # The XML filter needs to know whether it is in the right FileSet
2572    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2573    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2574    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2575                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2576    #return 0;
2577    return $metadata_xml_filename;
2578}
2579
2580sub _remove_import_metadata
2581{
2582    my $self = shift @_;
2583
2584    my $collect   = $self->{'collect'};
2585    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2586#   my $gsdlhome  = $self->{'gsdlhome'};
2587    my $infodbtype = $self->{'infodbtype'};
2588   
2589    # Obtain the collect dir
2590    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2591    my $site = $self->{'site'};
2592    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2593   
2594    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2595    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2596
2597    # look up additional args
2598    my $docid = $self->{'d'};
2599    if ((!defined $docid) || ($docid =~ m/^\s*$/))
2600    {
2601        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2602    }
2603   
2604    my $metaname = $self->{'metaname'};
2605    my $metapos = $self->{'metapos'};
2606    my $metavalue = $self->{'metavalue'};
2607    if(defined $metavalue) {
2608        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2609    } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2610        $metapos = 0;
2611    }
2612    my $metamode = $self->{'metamode'} || undef;
2613
2614    # import works with metadata.xml which can have inherited metadata
2615    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2616    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2617    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2618    # what they're doing if they provide a metapos.
2619    if(defined $metapos) {
2620        print STDERR "@@@@ WARNING: metapos defined.\n";
2621        print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2622    }
2623   
2624    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2625    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2626    my $metadata_xml_file;
2627    my $import_filename = undef;
2628    if (defined $docid)
2629    {
2630        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2631        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2632
2633        # This now stores the full pathname
2634        $import_filename = $doc_rec->{'src-file'}->[0];
2635    }
2636
2637    if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2638    {
2639        $gsdl_cgi->generate_error("There is no metadata\n");
2640    }
2641   
2642    # figure out correct metadata.xml file [?]
2643    # Assuming the metadata.xml file is next to the source file
2644    # Note: This will not work if it is using the inherited metadata from the parent folder
2645    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2646    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2647   
2648    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2649   
2650    my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2651    $mess .= "  $metaname";
2652    $mess .= " = $metavalue\n";
2653   
2654    $gsdl_cgi->generate_ok_message($mess);
2655
2656    #return $status; # in case calling functions have a use for this
2657}
2658
2659sub remove_import_metadata
2660{
2661    my $self = shift @_;
2662   
2663    my $username = $self->{'username'};
2664    my $collect   = $self->{'collect'};
2665    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2666   
2667    if ($baseaction::authentication_enabled) {
2668        # Ensure the user is allowed to edit this collection       
2669        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2670    }
2671
2672    # Make sure the collection isn't locked by someone else
2673    $self->lock_collection($username, $collect);
2674   
2675    $self->_remove_import_metadata(@_);
2676
2677    # Release the lock once it is done
2678    $self->unlock_collection($username, $collect);
2679
2680}
2681
2682sub remove_from_metadata_xml
2683{
2684    my $self = shift @_;
2685    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2686    # metamode generally has no meaning for removing meta, but is used by set_meta
2687    # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2688
2689    # Set the call-back functions for the metadata tags
2690    my @rules =
2691    (
2692        _default => 'raw',
2693        'Metadata' => \&rfmxml_metadata,
2694        'FileName' => \&mxml_filename
2695    );
2696       
2697    my $parser = XML::Rules->new
2698    (
2699        rules => \@rules,
2700        style => 'filter',
2701        output_encoding => 'utf8',
2702     #normalisespaces => 1,
2703            stripspaces => 2|0|0 # ineffectual
2704    );
2705   
2706    my $xml_in = "";
2707    if (!open(MIN,"<$metadata_xml_filename"))
2708    {
2709        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2710    }
2711    else
2712    {
2713        # Read them in
2714        my $line;
2715        while (defined ($line=<MIN>)) {
2716            $xml_in .= $line;
2717        }
2718        close(MIN);
2719
2720        # Filter with the call-back functions
2721        my $xml_out = "";
2722
2723        my $MOUT;
2724        if (!open($MOUT,">$metadata_xml_filename")) {
2725            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2726        }
2727        else {
2728            binmode($MOUT,":utf8");
2729            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2730            close($MOUT);       
2731        }
2732    }
2733}
2734
2735sub rfmxml_metadata
2736{
2737    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2738
2739    # metadata.xml does not handle subsections
2740
2741    # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2742    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2743        && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2744    {
2745        if (!defined $parser->{'parameters'}->{'poscount'})
2746        {
2747            $parser->{'parameters'}->{'poscount'} = 0;
2748        }
2749        else
2750        {
2751            $parser->{'parameters'}->{'poscount'}++;
2752        }
2753
2754        # if overriding but no metapos, then clear all the meta for this metaname
2755        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
2756            return [];
2757        }
2758   
2759        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2760        {
2761            return [];
2762        }
2763
2764        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2765        {
2766            return [];
2767        }       
2768    }
2769
2770    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2771    return [$tagname => $attrHash];
2772}
2773
2774sub _remove_live_metadata
2775{
2776    my $self = shift @_;
2777
2778    my $collect   = $self->{'collect'};
2779    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2780#    my $gsdlhome  = $self->{'gsdlhome'};
2781    my $infodbtype = $self->{'infodbtype'};
2782
2783    # Obtain the collect dir
2784    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2785    my $site = $self->{'site'};
2786    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2787
2788   
2789    # look up additional args
2790    my $docid     = $self->{'d'};
2791    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2792      $gsdl_cgi->generate_error("No docid (d=...) specified.");
2793    }
2794   
2795    # Generate the dbkey
2796    my $metaname  = $self->{'metaname'};
2797    my $dbkey = "$docid.$metaname";
2798
2799    # To people who know $collect_tail please add some comments
2800    # Obtain the live gdbm_db path
2801    my $collect_tail = $collect;
2802    $collect_tail =~ s/^.*[\/\\]//;
2803    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2804    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2805
2806    # Remove the key
2807    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2808    my $status = system($cmd);
2809    if ($status != 0) {
2810        # Catch error if gdbmdel failed
2811    my $mess = "Failed to set metadata key: $dbkey\n";
2812   
2813    $mess .= "PATH: $ENV{'PATH'}\n";
2814    $mess .= "cmd = $cmd\n";
2815    $mess .= "Exit status: $status\n";
2816    $mess .= "System Error Message: $!\n";
2817
2818    $gsdl_cgi->generate_error($mess);
2819    }
2820    else {
2821    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2822    }
2823
2824}
2825
2826sub remove_live_metadata
2827{
2828    my $self = shift @_;
2829
2830    my $username  = $self->{'username'};
2831    my $collect   = $self->{'collect'};
2832    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2833    my $gsdlhome  = $self->{'gsdlhome'};
2834   
2835    if ($baseaction::authentication_enabled) {
2836    # Ensure the user is allowed to edit this collection   
2837    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2838    }
2839
2840    # Make sure the collection isn't locked by someone else
2841    $self->lock_collection($username, $collect);
2842
2843    $self->_remove_live_metadata(@_);
2844
2845    $self->unlock_collection($username, $collect);
2846}
2847
2848sub remove_metadata
2849{
2850    my $self = shift @_;
2851
2852    my $where = $self->{'where'};
2853    if(!$where) {
2854    $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2855    return;
2856    }
2857
2858    my $username  = $self->{'username'};
2859    my $collect   = $self->{'collect'};
2860    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2861
2862    if ($baseaction::authentication_enabled) {
2863    # Ensure the user is allowed to edit this collection   
2864    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2865    }
2866
2867    # Make sure the collection isn't locked by someone else
2868    $self->lock_collection($username, $collect);
2869
2870    # check which directories need to be processed, specified in $where as
2871    # any combination of import|archives|index|live
2872    if($where =~ m/import/) {
2873    $self->_remove_import_metadata(@_);     
2874    }
2875    if($where =~ m/archives/) {
2876    $self->_remove_archives_metadata(@_);       
2877   }
2878    if($where =~ m/index/) {
2879    $self->_remove_index_metadata(@_);     
2880    }
2881
2882    # Release the lock once it is done
2883    $self->unlock_collection($username, $collect);
2884}
2885
2886# the internal version, without authentication
2887sub _remove_index_metadata
2888{   
2889    my $self = shift @_;
2890
2891    my $collect   = $self->{'collect'};
2892    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2893#    my $gsdlhome  = $self->{'gsdlhome'};
2894    my $infodbtype = $self->{'infodbtype'};
2895   
2896    # Obtain the collect dir
2897    my $site = $self->{'site'};
2898    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2899    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2900
2901        # look up additional args
2902    my $docid     = $self->{'d'};
2903    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2904      $gsdl_cgi->generate_error("No docid (d=...) specified.");
2905    }
2906    my $metaname  = $self->{'metaname'};
2907    my $metapos   = $self->{'metapos'};
2908    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2909
2910    # To people who know $collect_tail please add some comments
2911    # Obtain the path to the database
2912    my $collect_tail = $collect;
2913    $collect_tail =~ s/^.*[\/\\]//;
2914    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2915    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2916
2917    # Read the docid entry
2918    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2919
2920    # Check to make sure the key does exist
2921    if (!defined ($doc_rec->{$metaname})) {
2922        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2923    }
2924
2925    # Obtain the specified metadata pos
2926    # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2927    if(!defined $metapos && !defined $metavalue) {
2928        $metapos = 0;
2929    }
2930   
2931
2932    # consider check key is defined before deleting?
2933    # Loop through the metadata array and ignore the specified position
2934    my $filtered_metadata = [];
2935    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
2936    for (my $i=0; $i<$num_metadata_vals; $i++) {
2937    my $metaval = shift(@{$doc_rec->{$metaname}});
2938
2939    if (!defined $metavalue && $i != $metapos) {
2940        push(@$filtered_metadata,$metaval);
2941    }
2942   
2943    if(defined $metavalue && !($metavalue eq $metaval))
2944    {
2945        push(@$filtered_metadata,$metaval);
2946    }
2947    }
2948    $doc_rec->{$metaname} = $filtered_metadata;
2949
2950    ## Use the dbutil set_entry method instead of assuming the database is gdbm
2951    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2952
2953    if ($status != 0) {
2954    my $mess = "Failed to set metadata key: $docid\n";
2955   
2956    $mess .= "PATH: $ENV{'PATH'}\n";
2957    $mess .= "Exit status: $status\n";
2958    $mess .= "System Error Message: $!\n";
2959
2960    $gsdl_cgi->generate_error($mess);
2961    }
2962    else {
2963    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2964    $mess .= "  $metaname";
2965    $mess .= "->[$metapos]" if (defined $metapos);
2966
2967    $gsdl_cgi->generate_ok_message($mess);
2968    }
2969
2970    #return $status; # in case calling functions have a use for this
2971}
2972
2973sub remove_index_metadata
2974{
2975    my $self = shift @_;
2976
2977    my $username  = $self->{'username'};
2978    my $collect   = $self->{'collect'};
2979    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2980#    my $gsdlhome  = $self->{'gsdlhome'};
2981   
2982    if ($baseaction::authentication_enabled) {
2983    # Ensure the user is allowed to edit this collection   
2984    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2985    }
2986
2987    # Obtain the collect dir
2988    my $site = $self->{'site'};
2989    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2990    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2991
2992    # Make sure the collection isn't locked by someone else
2993    $self->lock_collection($username, $collect);
2994
2995    $self->_remove_index_metadata(@_);
2996
2997    # Release the lock once it is done
2998    $self->unlock_collection($username, $collect);
2999}
3000
3001
3002# Was trying to reused the codes, but the functions need to be broken
3003# down more before they can be reused, otherwise there will be too
3004# much overhead and duplicate process...
3005sub insert_metadata
3006{
3007    my $self = shift @_;
3008   
3009    my $username  = $self->{'username'};
3010    my $collect   = $self->{'collect'};
3011    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3012    my $gsdlhome  = $self->{'gsdlhome'};
3013    my $infodbtype = $self->{'infodbtype'};
3014   
3015    # If the import metadata and gdbm database have been updated, we
3016    # need to insert some notification to warn user that the the text
3017    # they see at the moment is not indexed and require a rebuild.
3018    my $rebuild_pending_macro = "_rebuildpendingmessage_";
3019
3020    if ($baseaction::authentication_enabled) {
3021    # Ensure the user is allowed to edit this collection
3022    $self->authenticate_user($username, $collect);
3023    }
3024
3025    # Obtain the collect and archive dir   
3026    my $site = $self->{'site'};
3027    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3028    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3029    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3030
3031    # Make sure the collection isn't locked by someone else
3032    $self->lock_collection($username, $collect);
3033   
3034    # Check additional args
3035    my $docid = $self->{'d'};
3036    if (!defined($docid)) {
3037    $gsdl_cgi->generate_error("No document id is specified: d=...");
3038    }
3039    my $metaname = $self->{'metaname'};
3040    if (!defined($metaname)) {
3041    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3042    }
3043    my $metavalue = $self->{'metavalue'};
3044    if (!defined($metavalue) || $metavalue eq "") {
3045    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3046    }
3047    # make "accumulate" the default (less destructive, as won't actually
3048    # delete any existing values)
3049    my $metamode = "accumulate";
3050
3051    # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3052    my $metapos   = undef;
3053    my $prevmetavalue = undef;
3054
3055    #=======================================================================#
3056    # set_import_metadata [START]
3057    #=======================================================================#
3058    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3059    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3060    my $metadata_xml_file;
3061    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3062    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3063   
3064    # This now stores the full pathname
3065    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3066   
3067    # figure out correct metadata.xml file [?]
3068    # Assuming the metadata.xml file is next to the source file
3069    # Note: This will not work if it is using the inherited metadata from the parent folder
3070    my ($import_tailname, $import_dirname)
3071    = File::Basename::fileparse($import_filename);
3072    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3073
3074    # Shane's escape characters
3075    $metavalue = pack "U0C*", unpack "C*", $metavalue;
3076    $metavalue =~ s/\,/&#44;/g;
3077    $metavalue =~ s/\:/&#58;/g;
3078    $metavalue =~ s/\|/&#124;/g;
3079    $metavalue =~ s/\(/&#40;/g;
3080    $metavalue =~ s/\)/&#41;/g;
3081    $metavalue =~ s/\[/&#91;/g;
3082    $metavalue =~ s/\\/&#92;/g;
3083    $metavalue =~ s/\]/&#93;/g;
3084    $metavalue =~ s/\{/&#123;/g;
3085    $metavalue =~ s/\}/&#125;/g;
3086    $metavalue =~ s/\"/&#34;/g;
3087    $metavalue =~ s/\`/&#96;/g;
3088    $metavalue =~ s/\n/_newline_/g;
3089
3090    # Edit the metadata.xml
3091    # Modified by Jeffrey from DL Consulting
3092    # Handle the case where there is one metadata.xml file for multiple FileSets
3093    # The XML filter needs to know whether it is in the right FileSet
3094    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3095    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3096    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3097                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3098    #=======================================================================#
3099    # set_import_metadata [END]
3100    #=======================================================================#
3101
3102
3103    #=======================================================================#
3104    # set_metadata (accumulate version) [START]
3105    #=======================================================================#
3106    # To people who know $collect_tail please add some comments
3107    # Obtain path to the database
3108    my $collect_tail = $collect;
3109    $collect_tail =~ s/^.*[\/\\]//;
3110    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3111    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3112
3113    # Read the docid entry
3114    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3115
3116    # Protect the quotes
3117    $metavalue =~ s/\"/\\\"/g;
3118
3119    # Adds the pending macro
3120    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3121
3122    # If the metadata doesn't exist, create a new one
3123    if (!defined($doc_rec->{$metaname})){   
3124    $doc_rec->{$metaname} = [ $macro_metavalue ];
3125    }
3126    # Else, let's acculumate the values
3127    else {
3128        push(@{$doc_rec->{$metaname}},$macro_metavalue);
3129    }
3130
3131    ## Use the dbutil set_entry method instead of assuming the database is gdbm
3132    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3133
3134    if ($status != 0) {
3135        # Catch error if gdbmget failed
3136    my $mess = "Failed to set metadata key: $docid\n";
3137   
3138    $mess .= "PATH: $ENV{'PATH'}\n";
3139    $mess .= "Exit status: $status\n";
3140    $mess .= "System Error Message: $!\n";
3141
3142    $gsdl_cgi->generate_error($mess);
3143    }
3144    else {
3145    my $mess = "insert-metadata successful: Key[$docid]\n";
3146    $mess .= "  [In metadata.xml] $metaname";
3147    $mess .= " = $metavalue\n";
3148    $mess .= "  [In database] $metaname";
3149    $mess .= " = $macro_metavalue\n";
3150    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
3151        $gsdl_cgi->generate_ok_message($mess);
3152    }   
3153    #=======================================================================#
3154    # set_metadata (accumulate version) [END]
3155    #=======================================================================#
3156
3157    # Release the lock once it is done
3158    $self->unlock_collection($username, $collect);
3159}
3160
31611;
Note: See TracBrowser for help on using the browser.