root/main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm @ 27314

Revision 27314, 104.7 KB (checked in by ak19, 6 years ago)

No authentication and collection-locking when getting metadata. (Only for setting metadata.)

Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr   te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40    require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47    #GET METHODS
48    "get-import-metadata" => {
49        'compulsory-args' => [ "d", "metaname" ],
50        'optional-args'   => [ "metapos" ] },
51
52    "get-archives-metadata" => {
53        'compulsory-args' => [ "d", "metaname" ],
54        'optional-args'   => [ "metapos" ] },
55   
56    "get-index-metadata" => {
57        'compulsory-args' => [ "d", "metaname" ],
58        'optional-args'   => [ "metapos" ] },
59
60    "get-metadata" => { # alias for get-index-metadata
61        'compulsory-args' => [ "d", "metaname" ],
62        'optional-args'   => [ "metapos" ] },
63
64    "get-live-metadata" => {
65        'compulsory-args' => [ "d", "metaname" ],
66        'optional-args'   => [ ] },
67
68    "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69        'compulsory-args' => [ "json" ],
70        'optional-args'   => [ "where" ],
71        'help-string' => [
72        'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73        ]},
74
75    #SET METHODS
76    "set-live-metadata" => {
77        'compulsory-args' => [ "d", "metaname", "metavalue" ],
78        'optional-args'   => [ ] },
79
80    "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81        'compulsory-args' => [ "metaname", "metavalue" ],
82        'optional-args'   => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84    "set-index-metadata" => {
85        'compulsory-args' => [ "d", "metaname", "metavalue" ],
86        'optional-args'   => [ "metapos", "metamode" ] },
87
88    "set-archives-metadata" => {
89        'compulsory-args' => [ "d", "metaname", "metavalue" ],
90        'optional-args'   => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91   
92    "set-import-metadata" => {
93        'compulsory-args' => [ "metaname", "metavalue" ],
94        'optional-args'   => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
95                 
96    #SET METHODS (ARRAY)
97    "set-metadata-array" => {
98        'compulsory-args' => [ "where", "json" ],
99        'optional-args'   => [ ],
100        'help-string' => [
101        'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102       
103        'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108       
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
110                     
111    "set-archives-metadata-array" => {
112        'compulsory-args' => [ "json" ],
113        'optional-args'   => [ ] },
114       
115    "set-import-metadata-array" => {
116        'compulsory-args' => [ "json" ],
117        'optional-args'   => [ ] },
118
119    "set-index-metadata-array" => {
120        'compulsory-args' => [ "json" ],
121        'optional-args'   => [ ] },
122   
123    "set-live-metadata-array" => {
124        'compulsory-args' => [ "json" ],
125        'optional-args'   => [ ] },
126       
127    #REMOVE METHODS
128    "remove-import-metadata" => {
129        'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
130        'optional-args'   => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
131                     
132    "remove-archives-metadata" => {
133        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
134        'optional-args'   => [ "metapos", "metavalue" ] },
135
136    "remove-live-metadata" => {
137        'compulsory-args' => [ "d", "metaname" ],
138        'optional-args'   => [ ] },
139
140    "remove-index-metadata" => {
141        'compulsory-args' => [ "d", "metaname" ],
142        'optional-args'   => [ "metapos", "metavalue" ] },
143
144    "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
145        'compulsory-args' => [ "d", "metaname" ],
146        'optional-args'   => [ "where", "metapos", "metavalue" ] },
147
148    #INSERT METHODS
149    "insert-metadata" => {
150        'compulsory-args' => [ "d", "metaname", "metavalue" ],
151        'optional-args'   => [ ] }
152};
153
154
155sub new
156{
157    my $class = shift (@_);
158    my ($gsdl_cgi,$iis6_mode) = @_;
159
160    # Treat metavalue specially.  To transmit this through a GET request
161    # the Javascript side has url-encoded it, so here we need to decode
162    # it before proceeding
163
164    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
165    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
166    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
167
168    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
169    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
170
171    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
172
173    return bless $self, $class;
174}
175
176
177sub get_live_metadata
178{
179    my $self = shift @_;
180
181    my $username  = $self->{'username'};
182    my $collect   = $self->{'collect'};
183    my $gsdl_cgi  = $self->{'gsdl_cgi'};
184    my $gsdlhome  = $self->{'gsdlhome'};
185    my $infodbtype = $self->{'infodbtype'};
186   
187    # live metadata gets/saves value scoped (prefixed) by the current usename
188    # so (for now) let's not bother to enforce authentication
189
190    # Obtain the collect dir
191    my $site = $self->{'site'};
192    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
193    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
194
195    # No locking collection when getting metadata, only when setting it
196#    $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
197
198    # look up additional args
199    my $docid  = $self->{'d'};
200    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
201       $gsdl_cgi->generate_error("No docid (d=...) specified.");
202    }
203
204    # Generate the dbkey
205    my $metaname  = $self->{'metaname'};
206    my $dbkey = "$docid.$metaname";
207
208    # To people who know $collect_tail please add some comments
209    # Obtain path to the database
210    my $collect_tail = $collect;
211    $collect_tail =~ s/^.*[\/|\\]//;
212    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
213    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
214   
215    # Obtain the content of the key
216    my $cmd = "gdbmget $infodb_file_path $dbkey";
217    if (open(GIN,"$cmd |") == 0) {
218        # Catch error if gdbmget failed
219    my $mess = "Failed to get metadata key: $metaname\n";
220    $mess .= "$!\n";
221
222    $gsdl_cgi->generate_error($mess);
223    }
224    else {
225    binmode(GIN,":utf8");
226        # Read everything in and concatenate them into $metavalue
227    my $metavalue = "";
228    my $line;
229    while (defined ($line=<GIN>)) {
230        $metavalue .= $line;
231    }
232    close(GIN);
233    chomp($metavalue); # Get rid off the tailing newlines
234    $gsdl_cgi->generate_ok_message("$metavalue");
235    }
236
237    # Release the lock once it is done
238#    $self->unlock_collection($username, $collect);
239}
240
241# just calls the index version
242sub get_metadata
243{
244    my $self = shift @_;
245    $self->get_index_metadata(@_);
246}
247
248# JSON version that will get the requested metadata values
249# from the requested source (index, import, archives or live)
250# One of the params is a JSON string and the return value is JSON too
251# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
252sub get_metadata_array
253{
254    my $self = shift @_;
255
256    my $where = $self->{'where'};
257    if (!$where) { 
258    $where = "index"; # default behaviour is to get the values from index
259    }
260
261    # Only when setting metadata do we perform authentication and do we lock the collection
262    # not when getting metadata
263
264    # for get_meta_array, the where param can only be ONE of import, archives, index, live
265    if($where =~ m/index/) {
266    $self->_get_index_metadata_array(@_);
267    }
268    # still to be implemented for archives, import and live
269}
270
271# takes a JSON string and returns a JSON string
272# resulting string is of the form:
273# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
274sub _get_index_metadata_array
275{
276    my $self = shift @_;
277
278    my $collect   = $self->{'collect'};
279    my $gsdl_cgi  = $self->{'gsdl_cgi'};
280    my $site = $self->{'site'};
281    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
282
283    # look up additional args   
284    my $infodbtype = $self->{'infodbtype'};
285   
286    # To people who know $collect_tail please add some comments
287    # Obtain the path to the database
288    my $collect_tail = $collect;
289    $collect_tail =~ s/^.*[\/\\]//;
290    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
291    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
292
293    my $json_str      = $self->{'json'};
294    my $doc_array = decode_json $json_str;
295
296    my $json_result_str = "[";
297    my $first_doc_rec = 1;
298
299    foreach my $doc_array_rec ( @$doc_array ) {
300   
301    my $docid     = $doc_array_rec->{'docid'};
302   
303    if($first_doc_rec) {
304        $first_doc_rec = 0;
305    } else {
306        $json_result_str .= ",";
307    }
308    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
309
310    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
311    $json_result_str = $json_result_str . ",\"metatable\":[";
312
313    my $first_rec = 1;
314    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
315        if($first_rec) {
316        $first_rec = 0;
317        } else {
318        $json_result_str .= ",";       
319        }
320       
321        my $metaname  = $metatable_rec->{'metaname'};
322        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
323
324        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
325        if(!defined $metapos) {
326        $metapos = 0;
327        }
328
329         # Read the docid entry
330        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
331 
332        # Basically loop through and unescape_html the values
333        foreach my $k (keys %$doc_rec) {
334        my @escaped_v = ();
335        foreach my $v (@{$doc_rec->{$k}}) {
336            my $ev = &ghtml::unescape_html($v);
337            push(@escaped_v, $ev);
338        }
339        $doc_rec->{$k} = \@escaped_v;
340        }
341
342        # Obtain the specified metadata value(s)
343        my $metavalue;
344
345        if($metapos ne "all") { # get the value at a single metapos
346
347        $metavalue = $doc_rec->{$metaname}->[$metapos];     
348        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
349
350        } else {
351        my $first_metaval = 1;
352        $metapos = 0;
353        $metavalue = $doc_rec->{$metaname}->[$metapos];
354
355        while (defined $metavalue) {
356            if($first_metaval) {       
357            $first_metaval = 0;
358            } else {
359            $json_result_str .= ",";
360            }
361       
362            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
363
364            $metapos++;
365            $metavalue = $doc_rec->{$metaname}->[$metapos];         
366        }
367        }
368
369        $json_result_str .= "]}"; # close metavals array and metatable record
370    }
371   
372    $json_result_str .= "]}"; # close metatable array and docid record
373    }
374
375    $json_result_str .= "]"; # close array of docids
376
377    $gsdl_cgi->generate_ok_message($json_result_str);   
378}
379
380
381sub get_index_metadata
382{
383    my $self = shift @_;
384
385    my $username  = $self->{'username'};
386    my $collect   = $self->{'collect'};
387    my $gsdl_cgi  = $self->{'gsdl_cgi'};
388    my $gsdlhome  = $self->{'gsdlhome'};
389
390    # Obtain the collect dir
391    my $site = $self->{'site'};
392    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
393    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
394
395    # look up additional args
396    my $docid     = $self->{'d'};
397    my $metaname  = $self->{'metaname'};
398    my $metapos   = $self->{'metapos'};
399    my $infodbtype = $self->{'infodbtype'};
400
401    # To people who know $collect_tail please add some comments
402    # Obtain path to the database
403    my $collect_tail = $collect;
404    $collect_tail =~ s/^.*[\/\\]//;
405    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
406    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
407
408    # Read the docid entry
409    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
410 
411    # Basically loop through and unescape_html the values
412    foreach my $k (keys %$doc_rec) {
413    my @escaped_v = ();
414    foreach my $v (@{$doc_rec->{$k}}) {
415        my $ev = &ghtml::unescape_html($v);
416        push(@escaped_v, $ev);
417    }
418    $doc_rec->{$k} = \@escaped_v;
419    }
420
421    # Obtain the specified metadata value
422    $metapos = 0 if (!defined $metapos);
423    my $metavalue = $doc_rec->{$metaname}->[$metapos];
424    $gsdl_cgi->generate_ok_message("$metavalue");
425   
426}
427
428
429sub get_import_metadata
430{
431    my $self = shift @_;
432
433    my $username  = $self->{'username'};
434    my $collect   = $self->{'collect'};
435    my $gsdl_cgi  = $self->{'gsdl_cgi'};
436    my $gsdlhome  = $self->{'gsdlhome'};
437
438    # Obtain the collect dir
439    my $site = $self->{'site'};
440    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
441    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
442
443    # look up additional args
444    my $docid     = $self->{'d'};
445    my $metaname  = $self->{'metaname'};
446    my $metapos = $self->{'metapos'};
447    $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
448
449    my $infodbtype = $self->{'infodbtype'};
450    if (!defined $docid)
451    {
452        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
453    }
454
455    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
456    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
457    my $metadata_xml_file;
458    my $import_filename = undef;
459   
460
461    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
462    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
463    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
464
465    # This now stores the full pathname
466    $import_filename = $doc_rec->{'src-file'}->[0];
467
468    # figure out correct metadata.xml file [?]
469    # Assuming the metadata.xml file is next to the source file
470    # Note: This will not work if it is using the inherited metadata from the parent folder
471    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
472    my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
473
474    $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
475
476}
477
478sub get_metadata_from_metadata_xml
479{
480    my $self = shift @_;
481    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
482   
483    my @rules =
484    (
485        _default => 'raw',
486        'Metadata' => \&gfmxml_metadata,
487        'FileName' => \&mxml_filename
488    );
489       
490    my $parser = XML::Rules->new
491    (
492        rules => \@rules,
493        output_encoding => 'utf8'
494    );
495   
496    my $xml_in = "";
497    if (!open(MIN,"<$metadata_xml_filename"))
498    {
499        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
500    }
501    else
502    {
503        # Read them in
504        my $line;
505        while (defined ($line=<MIN>)) {
506            $xml_in .= $line;
507        }
508        close(MIN);
509
510        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
511       
512        if(defined $parser->{'pad'}->{'metavalue'})
513        {
514            return $parser->{'pad'}->{'metavalue'};
515        }
516        else
517        {
518            return "";
519        }
520    }
521}
522
523sub gfmxml_metadata
524{
525    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
526
527    # no subsection support yet in metadata.xml
528
529    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
530    {
531        if (!defined $parser->{'parameters'}->{'poscount'})
532        {
533            $parser->{'parameters'}->{'poscount'} = 0;
534        }
535        else
536        {
537            $parser->{'parameters'}->{'poscount'}++;
538        }
539   
540        # gets the first value by default, since metapos defaults to 0
541        if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
542        {
543            if($parser->{'parameters'}->{'metapos'} > 0) {
544            print STDERR "@@@@ WARNING: non-zero metapos.\n";
545            print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at".$parser->{'parameters'}->{'metapos'}.".\n";
546            }
547            $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
548        }
549    }
550}
551
552sub get_archives_metadata
553{
554    my $self = shift @_;
555
556    my $username  = $self->{'username'};
557    my $collect   = $self->{'collect'};
558    my $gsdl_cgi  = $self->{'gsdl_cgi'};
559    my $gsdlhome  = $self->{'gsdlhome'};
560    my $infodbtype = $self->{'infodbtype'};
561
562    # Obtain the collect dir
563    my $site = $self->{'site'};
564    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
565   
566    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
567
568    # look up additional args
569    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
570    $docid_secnum = "" if (!defined $docid_secnum);
571   
572    my $metaname = $self->{'metaname'};
573    my $metapos = $self->{'metapos'};
574    $metapos = 0 if (!defined $metapos);
575   
576    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
577    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
578
579    # This now stores the full pathname
580    my $doc_filename = $doc_rec->{'doc-file'}->[0];
581
582    $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
583
584}
585
586sub get_metadata_from_archive_xml
587{
588    my $self = shift @_;
589    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
590   
591    my @start_rules = ('Section' => \&dxml_start_section);
592   
593    my @rules =
594    (
595        _default => 'raw',
596        'Metadata' => \&gfdxml_metadata
597    );
598       
599    my $parser = XML::Rules->new
600    (
601        start_rules => \@start_rules,
602        rules => \@rules,
603        output_encoding => 'utf8'
604    );
605   
606    my $xml_in = "";
607    if (!open(MIN,"<$doc_xml_filename"))
608    {
609        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
610    }
611    else
612    {
613        # Read them in
614        my $line;
615        while (defined ($line=<MIN>)) {
616            $xml_in .= $line;
617        }
618        close(MIN);
619
620        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
621       
622        if(defined $parser->{'pad'}->{'metavalue'})
623        {
624            return $parser->{'pad'}->{'metavalue'};
625        }
626        else
627        {
628            return "";
629        }
630    }
631}
632
633sub gfdxml_metadata
634{
635    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
636   
637    if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
638    {
639        return;
640    }
641
642    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
643    {
644        if (!defined $parser->{'parameters'}->{'poscount'})
645        {
646            $parser->{'parameters'}->{'poscount'} = 0;
647        }
648        else
649        {
650            $parser->{'parameters'}->{'poscount'}++;
651        }
652    }
653
654    if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
655    {   
656        $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
657    }
658}
659
660sub _set_live_metadata
661{
662    my $self = shift @_;
663
664    my $collect   = $self->{'collect'};
665    my $gsdl_cgi  = $self->{'gsdl_cgi'};
666    #my $gsdlhome  = $self->{'gsdlhome'};
667    my $infodbtype = $self->{'infodbtype'};
668
669    # Obtain the collect dir
670    my $site = $self->{'site'};
671    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
672    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
673
674
675    # look up additional args
676    my $docid     = $self->{'d'};
677    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
678      $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
679    }
680    my $metavalue = $self->{'metavalue'};
681
682    # Generate the dbkey   
683    my $metaname  = $self->{'metaname'};
684    my $dbkey = "$docid.$metaname";
685
686    # To people who know $collect_tail please add some comments
687    # Obtain path to the database
688    my $collect_tail = $collect;
689    $collect_tail =~ s/^.*[\/\\]//;
690    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
691    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
692
693    # Set the new value
694    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
695    my $status = system($cmd);
696    if ($status != 0) {
697        # Catch error if gdbmget failed
698    my $mess = "Failed to set metadata key: $dbkey\n";
699
700    $mess .= "PATH: $ENV{'PATH'}\n";
701    $mess .= "cmd = $cmd\n";
702    $mess .= "Exit status: $status\n";
703    $mess .= "System Error Message: $!\n";
704
705    $gsdl_cgi->generate_error($mess);
706    }
707    else {
708    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
709    }
710
711    #return $status; # in case calling functions have any further use for this
712}
713
714sub set_live_metadata
715{
716    my $self = shift @_;
717
718    my $username  = $self->{'username'};
719    my $collect   = $self->{'collect'};
720    my $gsdl_cgi  = $self->{'gsdl_cgi'};
721 
722    if ($baseaction::authentication_enabled) {
723    # Ensure the user is allowed to edit this collection   
724    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
725    }
726
727    # Make sure the collection isn't locked by someone else
728    $self->lock_collection($username, $collect);
729
730    $self->_set_live_metadata(@_);
731
732    # Release the lock once it is done
733    $self->unlock_collection($username, $collect);
734}
735
736sub set_index_metadata_entry
737{
738    my $self = shift @_;
739    my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
740   
741    # To people who know $collect_tail please add some comments
742    # Obtain path to the database
743    my $collect_tail = $collect;
744    $collect_tail =~ s/^.*[\/\\]//;
745    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
746    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
747   
748#   print STDERR "**** infodb file path = $infodb_file_path\n";
749#   print STDERR "***** infodb type = $infodbtype\n";
750   
751    # Read the docid entry
752    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
753   
754    # Set the metadata value
755    if (defined $metapos) {
756    # if metamode=accumulate AND metapos, warn user and then use metapos
757    if (defined $metamode && $metamode eq "accumulate") {
758        print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
759        print STDERR "**** Proceeding by using metapos\n";
760    }
761    $doc_rec->{$metaname}->[$metapos] = $metavalue;
762    }
763    elsif (defined $metamode && $metamode eq "override") {
764    $doc_rec->{$metaname} = [ $metavalue ];
765    }
766    else { # default for index was to override, but because accumulate is less destructive,
767    # and because accumulate is the default for archives and import, that's the new default for index too
768    if(defined $doc_rec->{$metaname}) {
769        push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
770    } else {
771        $doc_rec->{$metaname} = [ $metavalue ];
772    }
773    }
774 
775    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
776   
777    return $status;
778   
779}
780
781sub _set_import_metadata
782{
783    my $self = shift @_;
784
785    my $collect   = $self->{'collect'};
786    my $gsdl_cgi  = $self->{'gsdl_cgi'};
787    my $infodbtype = $self->{'infodbtype'};
788#    my $gsdlhome  = $self->{'gsdlhome'};
789     
790    # Obtain the collect and archive dir   
791    my $site = $self->{'site'};
792    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
793    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
794    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
795   
796    # look up additional args
797    # want either d= or f=
798    my $docid  = $self->{'d'};
799    my ($docid_root,$docid_secnum);
800    if(defined $docid) {   
801    ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
802    # as yet no support for setting subsection metadata in metadata.xml
803    if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
804        $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
805        return;
806    }
807    }
808
809    my $import_file  = $self->{'f'};
810    if ((!defined $docid) && (!defined $import_file)) {
811    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
812    }
813
814    # Get the parameters and set default mode to "accumulate"
815    my $metaname   = $self->{'metaname'};
816    my $metavalue  = $self->{'metavalue'};
817##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
818    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
819   
820    my $metamode   = $self->{'metamode'};
821    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
822    # make "accumulate" the default (less destructive, as won't actually
823    # delete any existing values)
824    $metamode = "accumulate";
825    }
826
827    # adding metapos and prevmetavalue support to import_metadata subroutines
828    my $metapos   = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
829    my $prevmetavalue = $self->{'prevmetavalue'};
830
831    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
832    my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
833
834    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
835    $mess .= "  $metaname";
836    $mess .= " = $metavalue";
837    $mess .= " ($metamode)\n";
838
839    $gsdl_cgi->generate_ok_message($mess);
840
841    #return $status; # in case calling functions have any further use for this
842}
843
844# the version of set_index_meta that doesn't do authentication
845sub _set_archives_metadata
846{
847    my $self = shift @_;
848
849    my $collect   = $self->{'collect'};
850    my $gsdl_cgi  = $self->{'gsdl_cgi'};
851    my $infodbtype = $self->{'infodbtype'};
852   
853    # Obtain the collect and archive dir   
854    my $site = $self->{'site'};
855    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
856    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
857
858    # look up additional args
859    my $docid  = $self->{'d'};
860    my $metaname   = $self->{'metaname'};
861    my $metavalue  = $self->{'metavalue'};
862    my $prevmetavalue = $self->{'prevmetavalue'};
863   
864    my $metapos    = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
865                                  # Don't append "|| undef", since if metapos=0 it will then be set to undef
866
867    my $metamode   = $self->{'metamode'};
868    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
869    # make "accumulate" the default (less destructive, as won't actually
870    # delete any existing values)
871    $metamode = "accumulate";
872    }
873
874    my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
875                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
876   
877    if ($status == 0) {
878    my $mess = "set-archives-metadata successful: Key[$docid]\n";
879    $mess .= "  $metaname";
880    $mess .= "->[$metapos]" if (defined $metapos);
881    $mess .= " = $metavalue";
882    $mess .= " ($metamode)\n";
883   
884    $gsdl_cgi->generate_ok_message($mess); 
885    }
886    else {
887    my $mess .= "Failed to set archives metadata key: $docid\n";
888    $mess .= "Exit status: $status\n";
889    if(defined $self->{'error_msg'}) {
890        $mess .= "Error Message: $self->{'error_msg'}\n";
891    } else {
892        $mess .= "System Error Message: $!\n";
893    }
894    $mess .= "-" x 20 . "\n";
895   
896    $gsdl_cgi->generate_error($mess);
897    }
898
899    #return $status; # in case calling functions have any further use for this
900}
901
902
903# the version of set_index_meta that doesn't do authentication
904sub _set_index_metadata
905{
906    my $self = shift @_;
907
908    my $collect   = $self->{'collect'};
909    my $gsdl_cgi  = $self->{'gsdl_cgi'};
910
911    my $site = $self->{'site'};
912    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
913
914    # look up additional args
915    my $docid     = $self->{'d'};
916    my $metaname  = $self->{'metaname'};
917    my $metapos   = $self->{'metapos'}; # undef has meaning
918    my $metavalue = $self->{'metavalue'};
919    my $infodbtype = $self->{'infodbtype'};
920    my $metamode  = $self->{'metamode'};   
921
922    my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
923   
924    if ($status != 0) {
925        # Catch error if set infodb entry failed
926    my $mess = "Failed to set metadata key: $docid\n";
927   
928    $mess .= "PATH: $ENV{'PATH'}\n";
929    $mess .= "Exit status: $status\n";
930    $mess .= "System Error Message: $!\n";
931   
932    $gsdl_cgi->generate_error($mess);
933    }
934    else {
935    my $mess = "set-index-metadata successful: Key[$docid]\n";
936    $mess .= "  $metaname";
937    $mess .= "->[$metapos]" if (defined $metapos);
938    $mess .= " = $metavalue\n";
939   
940    $gsdl_cgi->generate_ok_message($mess);
941    }
942
943    #return $status; # in case calling functions have any further use for this
944}
945
946sub set_index_metadata
947{
948    my $self = shift @_;
949
950    my $username  = $self->{'username'};
951    my $collect   = $self->{'collect'};
952    my $gsdl_cgi  = $self->{'gsdl_cgi'};
953    #my $gsdlhome  = $self->{'gsdlhome'};
954
955    if ($baseaction::authentication_enabled) {
956    # Ensure the user is allowed to edit this collection   
957    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
958    }
959
960    my $site = $self->{'site'};
961    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
962   
963    $gsdl_cgi->checked_chdir($collect_dir);
964
965    # Obtain the collect dir
966    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
967
968    # Make sure the collection isn't locked by someone else
969    $self->lock_collection($username, $collect);
970
971    $self->_set_index_metadata(@_);
972   
973    # Release the lock once it is done
974    $self->unlock_collection($username, $collect);
975}
976
977# call this to set the metadata for a combination of dirs archives, import or index, or live
978# if none specified, defaults to index which was the original behaviour of set_metadata.
979sub set_metadata
980{
981    my $self = shift @_;
982
983    # Testing that not defining a variable, setting it to "" or to "  " all return false
984    # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
985
986    my $where = $self->{'where'};
987    if(!$where) {   
988    $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
989    return;
990    }
991
992    # authenticate and lock collection once, even if processing multiple dirs
993    my $username  = $self->{'username'};
994    my $collect   = $self->{'collect'};
995    my $gsdl_cgi  = $self->{'gsdl_cgi'};
996   
997    if ($baseaction::authentication_enabled) {
998    # Ensure the user is allowed to edit this collection   
999    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1000    }
1001   
1002    if($where =~ m/index/) {
1003    my $site = $self->{'site'};
1004    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1005    $gsdl_cgi->checked_chdir($collect_dir);
1006    }
1007
1008    # Make sure the collection isn't locked by someone else
1009    $self->lock_collection($username, $collect);
1010
1011
1012    # now at last can set the metadata. $where can specify multiple
1013    # $where is of the form: import|archives|index, or a subset thereof
1014
1015    #my @whichdirs = split('\|', $where);
1016
1017    # just check whether $where contains import/archives/index/live in turn, and
1018    # for each case, process it accordingly
1019    if($where =~ m/import/) {
1020    $self->_set_import_metadata(@_);       
1021    }
1022
1023    if($where =~ m/archives/) {
1024
1025    # look up docID arg which is optional to set_metadata because it's optional
1026    # to set_import, but which is compulsory to set_archives_metadata
1027    my $docid     = $self->{'d'};
1028    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1029        $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1030    }
1031    # we have a docid, so can set archives meta
1032    $self->_set_archives_metadata(@_); 
1033    }
1034
1035    if($where =~ m/index/) {
1036   
1037    # look up docID arg which is optional to set_metadata because it's optional
1038    # to set_import, but which is compulsory to set_archives_metadata
1039    my $docid     = $self->{'d'};
1040    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1041        $gsdl_cgi->generate_error("No docid (d=...) specified.");
1042    }
1043    # we have a docid, so can set index meta
1044    $self->_set_index_metadata(@_);
1045    }   
1046
1047    if($where =~ m/live/) {
1048    $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1049    }
1050
1051    # Release the lock once it is done
1052    $self->unlock_collection($username, $collect);
1053}
1054
1055sub set_metadata_array
1056{
1057    my $self = shift @_;
1058
1059    my $where = $self->{'where'};
1060    if(!$where) {   
1061    $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1062    return;
1063    }
1064
1065    my $username  = $self->{'username'};
1066    my $collect   = $self->{'collect'};
1067    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1068
1069    if ($baseaction::authentication_enabled) {
1070    # Ensure the user is allowed to edit this collection
1071    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1072    }
1073
1074    # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1075    # But including the stmt during this code reorganisation to preserve as-is what used to happen
1076    my $site = $self->{'site'};
1077    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1078    $gsdl_cgi->checked_chdir($collect_dir);
1079
1080    # Make sure the collection isn't locked by someone else
1081    $self->lock_collection($username, $collect);
1082
1083    if($where =~ m/import/) {
1084    $self->_set_import_metadata_array(@_);
1085    }
1086    if($where =~ m/archives/) {
1087    $self->_set_archives_metadata_array(@_);
1088    }
1089    if($where =~ m/index/) {
1090    $self->_set_index_metadata_array(@_);
1091    }
1092    if($where =~ m/live/) {
1093        $self->_set_live_metadata_array(@_);
1094    }
1095
1096    # Release the lock once it is done
1097    $self->unlock_collection($username, $collect);
1098}
1099
1100sub _set_index_metadata_array
1101{
1102    my $self = shift @_;
1103
1104    my $collect   = $self->{'collect'};
1105    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1106    my $site = $self->{'site'};
1107    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1108
1109   
1110    # look up additional args
1111   
1112    my $infodbtype = $self->{'infodbtype'};
1113   
1114    my $json_str      = $self->{'json'};
1115    my $doc_array = decode_json $json_str;
1116   
1117   
1118    my $global_status = 0;
1119    my $global_mess = "";
1120   
1121    my @all_docids = ();
1122   
1123    foreach my $doc_array_rec ( @$doc_array ) {
1124   
1125    my $status = -1;
1126    my $docid     = $doc_array_rec->{'docid'};
1127   
1128    push(@all_docids,$docid);
1129
1130    my $metaname  = $doc_array_rec->{'metaname'};
1131    if(defined $metaname) {
1132        my $metapos   = $doc_array_rec->{'metapos'}; # can legitimately be undef
1133        my $metavalue = $doc_array_rec->{'metavalue'};
1134        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1135
1136        $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1137    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1138        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1139       
1140        foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1141        $metaname  = $metatable_rec->{'metaname'};
1142        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1143        my $metapos = undef;
1144        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1145
1146        foreach my $metavalue ( @$metavals ) { # metavals is an array
1147            $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1148            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1149            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1150            # metavalue for this metaname will be lost
1151            $metamode = "accumulate";
1152            }
1153        }           
1154        }
1155    }
1156   
1157    if ($status != 0) {
1158        # Catch error if set infodb entry failed
1159        $global_status = $status;
1160        $global_mess .= "Failed to set metadata key: $docid\n";
1161        $global_mess .= "Exit status: $status\n";
1162        $global_mess .= "System Error Message: $!\n";
1163        $global_mess .= "-" x 20;
1164    }
1165    }
1166   
1167    if ($global_status != 0) {
1168    $global_mess .= "PATH: $ENV{'PATH'}\n";
1169    $gsdl_cgi->generate_error($global_mess);
1170    }
1171    else {
1172    my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1173    $gsdl_cgi->generate_ok_message($mess);
1174    }
1175}
1176
1177sub set_index_metadata_array
1178{
1179    my $self = shift @_;
1180
1181    my $username  = $self->{'username'};
1182    my $collect   = $self->{'collect'};
1183    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1184#    my $gsdlhome  = $self->{'gsdlhome'};
1185
1186    if ($baseaction::authentication_enabled) {
1187    # Ensure the user is allowed to edit this collection   
1188    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1189    }
1190
1191    my $site = $self->{'site'};
1192    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1193   
1194    $gsdl_cgi->checked_chdir($collect_dir);
1195
1196    # Obtain the collect dir
1197    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1198
1199    # Make sure the collection isn't locked by someone else
1200    $self->lock_collection($username, $collect);
1201
1202    $self->_set_index_metadata_array(@_);
1203
1204    # Release the lock once it is done
1205    $self->unlock_collection($username, $collect);
1206}
1207
1208# experimental, newly added in and untested
1209sub _set_live_metadata_array
1210{
1211    my $self = shift @_;
1212
1213    my $collect   = $self->{'collect'};
1214    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1215
1216    my $site = $self->{'site'};
1217    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1218
1219   
1220    # look up additional args
1221    my $infodbtype = $self->{'infodbtype'};
1222    # To people who know $collect_tail please add some comments
1223    # Obtain path to the database
1224    my $collect_tail = $collect;
1225    $collect_tail =~ s/^.*[\/\\]//;
1226    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1227    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1228
1229   
1230    my $json_str      = $self->{'json'};
1231    my $doc_array = decode_json $json_str;
1232   
1233   
1234    my $global_status = 0;
1235    my $global_mess = "";
1236   
1237    my @all_docids = ();
1238
1239
1240    foreach my $doc_array_rec ( @$doc_array ) {
1241   
1242    my $status = -1;
1243    my $docid     = $doc_array_rec->{'docid'};
1244
1245    push(@all_docids,$docid);
1246
1247    my $metaname  = $doc_array_rec->{'metaname'};
1248    if(defined $metaname) {
1249        my $dbkey = "$docid.$metaname";
1250        my $metavalue = $doc_array_rec->{'metavalue'};
1251
1252        # Set the new value
1253        my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1254        $status = system($cmd);
1255
1256    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1257        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1258        foreach my $metatable_rec ( @$metatable ) {
1259        $metaname  = $metatable_rec->{'metaname'};
1260        my $dbkey = "$docid.$metaname";
1261
1262        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1263        foreach my $metavalue ( @$metavals ) {
1264             my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1265             $status = system($cmd);
1266        }
1267        }
1268       
1269    }
1270
1271    if ($status != 0) {
1272        # Catch error if gdbmget failed
1273        $global_status = $status;
1274        $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1275        $global_mess .= "Exit status: $status\n";
1276        $global_mess .= "System Error Message: $!\n";
1277        $global_mess .= "-" x 20;
1278    }
1279    }
1280   
1281    if ($global_status != 0) {
1282    $global_mess .= "PATH: $ENV{'PATH'}\n";
1283    $gsdl_cgi->generate_error($global_mess);
1284    }
1285    else {
1286    my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1287    $gsdl_cgi->generate_ok_message($mess);
1288    }
1289}
1290
1291sub set_live_metadata_array
1292{
1293    my $self = shift @_;
1294
1295    my $username  = $self->{'username'};
1296    my $collect   = $self->{'collect'};
1297    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1298
1299    if ($baseaction::authentication_enabled) {
1300    # Ensure the user is allowed to edit this collection   
1301    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1302    }
1303
1304    my $site = $self->{'site'};
1305    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1306   
1307    $gsdl_cgi->checked_chdir($collect_dir);
1308
1309    # Make sure the collection isn't locked by someone else
1310    $self->lock_collection($username, $collect);
1311
1312    $self->_set_live_metadata_array(@_);
1313
1314    # Release the lock once it is done
1315    $self->unlock_collection($username, $collect);
1316}
1317
1318
1319sub dxml_metadata
1320{
1321    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1322    my $metaname = $parser->{'parameters'}->{'metaname'};
1323    my $metamode = $parser->{'parameters'}->{'metamode'};
1324   
1325    print STDERR "**** Processing closing </Metadata> tag\n";
1326   
1327    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1328   
1329    # Find the right metadata tag and checks if we are going to
1330    # override it
1331    #
1332    # Note: This over writes the first metadata block it
1333    # encountered. If there are multiple Sections in the doc.xml, it
1334    # might not behave as you would expect
1335
1336    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1337##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1338##    print STDERR "**** metamode = $metamode\n";
1339   
1340    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1341    {
1342        my $name_attr = $attrHash->{'name'};
1343        if (($name_attr eq $metaname) && ($metamode eq "override"))
1344        {
1345            if (!defined $parser->{'parameters'}->{'poscount'})
1346            {
1347                $parser->{'parameters'}->{'poscount'} = 0;
1348            }
1349            else
1350            {
1351                $parser->{'parameters'}->{'poscount'}++;
1352            }
1353           
1354            if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1355            {
1356                ##print STDERR "#### got match!!\n";
1357                # Get the value and override the current value
1358                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1359                $attrHash->{'_content'} = $metavalue;
1360               
1361                # Don't want it to wipe out any other pieces of metadata
1362                $parser->{'parameters'}->{'metamode'} = "done";
1363            }
1364            elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1365            {
1366                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1367                $attrHash->{'_content'} = $metavalue;
1368                $parser->{'parameters'}->{'metamode'} = "done";
1369            }
1370        }
1371    }
1372
1373    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1374    return [$tagname => $attrHash];
1375}
1376
1377# This method exists purely for catching invalid section numbers that the client
1378# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1379# we've seen all the Sections in the doc.xml, and none of their section nums matched
1380# if the metamode has not been set to 'done' by then.
1381sub dxml_archive
1382{
1383    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1384    my $metamode = $parser->{'parameters'}->{'metamode'};
1385   
1386    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1387    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1388   
1389#    print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1390   
1391    if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1392    print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1393
1394    my $metaname = $parser->{'parameters'}->{'metaname'};
1395    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1396   
1397    print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1398    print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1399    print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1400    print STDERR "\tin section $opt_doc_secnum.\n";
1401    $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1402    }
1403   
1404    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1405    return [$tagname => $attrHash];
1406}
1407
1408sub dxml_description
1409{
1410    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1411    my $metamode = $parser->{'parameters'}->{'metamode'};
1412
1413    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1414    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1415   
1416    print STDERR "**** Processing a closing </Description> tag \n";
1417#   print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1418   
1419    # Accumulate the metadata
1420
1421    # We'll be accumulating metadata at this point if we haven't found and therefore
1422    # haven't processed the metadata yet.
1423    # For subsections, this means that if we're at a matching subsection, but haven't
1424    # found the correct metaname to override in that subsection, we accumulate it as new
1425    # meta in the subsection by adding it to the current description.
1426    # If there's no subsection info for the metadata, it will accumulate at the top level
1427    # section description if we hadn't found a matching metaname to override at this point.
1428
1429    # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1430    # at the toplevel section. In the latter case, it means we want to process meta in the
1431    # toplevel section. So the eq check between the values below will work in all cases.
1432   
1433    # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1434    # this document has been provided. In that case, the metadata for that opt_doc_secnum
1435    # won't get attached/accumulated to any part of the doc, not even its top-level section.
1436
1437    if ($curr_secnum eq $opt_doc_secnum
1438        && ($metamode eq "accumulate" || $metamode eq "override")) {
1439        if ($metamode eq "override") {
1440        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
1441        }
1442
1443        # If we get to here and metamode is override, this means there
1444        # was no existing value to overide => treat as an append operation
1445       
1446        # Tack a new metadata tag on to the end of the <Metadata>+ block
1447        my $metaname = $parser->{'parameters'}->{'metaname'};
1448        my $metavalue = $parser->{'parameters'}->{'metavalue'};
1449       
1450        my $metadata_attr = {
1451        '_content' => $metavalue,
1452        'name' => $metaname,
1453        'mode' => "accumulate"
1454        };
1455       
1456        my $append_metadata = [ "Metadata" => $metadata_attr ];
1457        my $description_content = $attrHash->{'_content'};
1458       
1459        print "Appending metadata to doc.xml\n";
1460       
1461        if (ref($description_content)) {
1462        # got some existing interesting nested content
1463        push(@$description_content, "    ", $append_metadata ,"\n        ");
1464        }
1465        else {
1466        #description_content is most likely a string such as "\n"
1467        $attrHash->{'_content'} = [$description_content, "    ", $append_metadata ,"\n" ];
1468        }
1469       
1470        $parser->{'parameters'}->{'metamode'} = "done";
1471    }       
1472    else {
1473        # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1474##      print STDERR "**** NOT ACCUMULATE?!? \n";
1475    }
1476
1477    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1478    return [$tagname => $attrHash];
1479}
1480
1481
1482sub dxml_start_section
1483{
1484    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1485
1486    my $new_depth = scalar(@$contextArray);
1487
1488    print STDERR "**** START SECTION \n";
1489   
1490    if ($new_depth == 1) {
1491    $parser->{'parameters'}->{'curr_section_depth'} = 1;
1492    $parser->{'parameters'}->{'curr_section_num'}   = "";
1493    }
1494
1495    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
1496    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1497
1498    my $new_secnum;
1499
1500    if ($new_depth > $old_depth) {
1501    # child subsection
1502    $new_secnum = "$old_secnum.1";
1503    }
1504    elsif ($new_depth == $old_depth) {
1505    # sibling section => increase it's value by 1
1506    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1507    $tail_num++;
1508    $new_secnum = $old_secnum;
1509    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1510    }
1511    else {
1512    # back up to parent section => lopp off tail
1513    $new_secnum = $old_secnum;
1514    $new_secnum =~ s/\.\d+$//;
1515    }
1516
1517    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1518    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
1519   
1520    1;
1521}
1522
1523sub edit_xml_file
1524{
1525    my $self = shift @_;
1526    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1527
1528    # use XML::Rules to add it in (read in and out again)
1529    my $parser = XML::Rules->new(start_rules     => $start_rules,
1530                 rules           => $rules,
1531                 style           => 'filter',
1532                                 output_encoding => 'utf8' );
1533
1534    my $xml_in = "";
1535    if (!open(MIN,"<$filename")) {
1536    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1537    }
1538    else {
1539        # Read all the text in
1540    my $line;
1541    while (defined ($line=<MIN>)) {
1542        $xml_in .= $line;
1543    }
1544    close(MIN);
1545   
1546    my $MOUT;   
1547    if (!open($MOUT,">$filename")) {
1548        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1549    }
1550    else {
1551        # Matched lines will get handled by the call backs
1552##      my $xml_out = "";
1553
1554        binmode($MOUT,":utf8");
1555        $parser->filter($xml_in,$MOUT, $options);
1556
1557#       binmode(MOUT,":utf8");
1558#       print MOUT $xml_out;
1559        close($MOUT);       
1560    }
1561    }
1562
1563    # copy across any custom error information that was stored during parsing
1564    $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});   
1565}
1566
1567sub edit_doc_xml
1568{
1569    my $self = shift @_;
1570    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1571
1572    my $info_mess = <<RAWEND;
1573****************************
1574  edit_doc_xml()
1575****************************
1576doc_xml_filename = $doc_xml_filename
1577metaname = $metaname
1578metavalue = $metavalue
1579metapos = $metapos
1580metamode = $metamode
1581opt_secnum = $opt_secnum
1582prevmetavalue = $prevmetavalue
1583****************************
1584RAWEND
1585
1586    $gsdl_cgi->generate_message($info_mess);
1587   
1588    # To monitor which section/subsection number we are in
1589    my @start_rules =
1590    ( 'Section'    => \&dxml_start_section );
1591
1592    # use XML::Rules to add it in (read in and out again)
1593    # Set the call back functions
1594    my @rules =
1595    ( _default => 'raw',
1596      'Metadata'    => \&dxml_metadata,
1597      'Description' => \&dxml_description,
1598      'Archive'     => \&dxml_archive); # just for catching errors at end
1599     
1600    # Sets the parameters
1601    my $options = { 'metaname'  => $metaname,
1602            'metapos'   => $metapos,
1603            'metavalue' => $metavalue,
1604            'metamode'  => $metamode,
1605            'prevmetavalue' => $prevmetavalue };
1606           
1607    if (defined $opt_secnum) {
1608    $options->{'secnum'} = $opt_secnum;
1609    }
1610
1611    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1612}
1613
1614sub set_archives_metadata_entry
1615{
1616    my $self = shift @_;
1617    my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1618   
1619    my $info_mess = <<RAWEND;
1620****************************
1621  set_archives_metadata_entry()
1622****************************
1623archive_dir = $archive_dir
1624collect_dir = $collect_dir
1625collect = $collect
1626infodbtype = $infodbtype
1627docid = $docid
1628metaname = $metaname
1629metapos = $metapos
1630metavalue = $metavalue
1631metamode = $metamode
1632prevmetavalue = $prevmetavalue
1633****************************
1634RAWEND
1635
1636    $gsdl_cgi->generate_message($info_mess);
1637   
1638    # Obtain the doc.xml path for the specified docID
1639    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1640
1641    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1642    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1643    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1644   
1645    # The $doc_xml_file is relative to the archives, and now let's get the full path
1646    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
1647    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1648
1649    # If we're overriding everything, then $metamode=override combined with $metapos=undefined
1650    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1651    # Thereafter, we will finally be setting the overriding metavalue for this metaname
1652    if(!defined $metapos && $metamode eq "override") {
1653    $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, $metapos, undef, $docid_secnum, $metamode);
1654    }
1655
1656    # Edit the doc.xml file with the specified metadata name, value and position.
1657    # TODO: there is a potential problem here as this edit_doc_xml function
1658    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1659    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1660   
1661    # dxml_metadata method ignores metapos if metamode anything other than override
1662    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1663            $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1664
1665    # return 0; # return 0 for now to indicate no error
1666    return (defined $self->{'error_msg'}) ? 1 : 0;
1667}
1668
1669
1670sub set_archives_metadata
1671{
1672    my $self = shift @_;
1673
1674    my $username  = $self->{'username'};
1675    my $collect   = $self->{'collect'};
1676    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1677   
1678    if ($baseaction::authentication_enabled) {
1679    # Ensure the user is allowed to edit this collection
1680    $self->authenticate_user($username, $collect);
1681    }
1682
1683    # Make sure the collection isn't locked by someone else
1684    $self->lock_collection($username, $collect);
1685
1686    $self->_set_archives_metadata(@_);
1687
1688    # Release the lock once it is done
1689    $self->unlock_collection($username, $collect);
1690}
1691
1692sub _set_archives_metadata_array
1693{
1694    my $self = shift @_;
1695   
1696    my $collect   = $self->{'collect'};
1697    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1698    my $site = $self->{'site'};
1699    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1700
1701    # look up additional args
1702   
1703    my $infodbtype = $self->{'infodbtype'};
1704   
1705    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1706   
1707    my $json_str      = $self->{'json'};
1708    my $doc_array = decode_json $json_str;
1709   
1710   
1711    my $global_status = 0;
1712    my $global_mess = "";
1713   
1714    my @all_docids = ();
1715   
1716    foreach my $doc_array_rec ( @$doc_array ) {
1717    my $status    = -1;
1718    my $docid     = $doc_array_rec->{'docid'};
1719
1720    push(@all_docids,$docid);
1721   
1722    my $metaname  = $doc_array_rec->{'metaname'};
1723    if(defined $metaname) {
1724       
1725        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1726
1727        my $metamode  = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1728        my $metavalue = $doc_array_rec->{'metavalue'};
1729        my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1730       
1731       
1732        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1733        # make "accumulate" the default (less destructive, as it won't actually
1734        # delete any existing values)
1735        $metamode = "accumulate";
1736        }       
1737       
1738        $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1739                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1740    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1741        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1742       
1743        foreach my $metatable_rec ( @$metatable ) {
1744        $metaname  = $metatable_rec->{'metaname'};
1745        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1746        my $metapos = undef;
1747        my $prevmetavalue = undef;
1748        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1749       
1750        foreach my $metavalue ( @$metavals ) {
1751            $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1752                                 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1753           
1754            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1755            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1756            # metavalue for this metaname will be lost
1757            $metamode = "accumulate";
1758            }
1759        }           
1760        }       
1761    }
1762       
1763    if ($status != 0) {
1764        # Catch error if set infodb entry failed
1765        $global_status = $status;
1766        $global_mess .= "Failed to set metadata key: $docid\n";
1767        $global_mess .= "Exit status: $status\n";
1768        $global_mess .= "System Error Message: $!\n";
1769        $global_mess .= "-" x 20 . "\n";
1770    }
1771    }
1772   
1773    if ($global_status != 0) {
1774    $global_mess .= "PATH: $ENV{'PATH'}\n";
1775    $gsdl_cgi->generate_error($global_mess);
1776    }
1777    else {
1778    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1779    $gsdl_cgi->generate_ok_message($mess);
1780    }
1781}
1782
1783sub set_archives_metadata_array
1784{
1785    my $self = shift @_;
1786
1787    my $username  = $self->{'username'};
1788    my $collect   = $self->{'collect'};
1789    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1790#    my $gsdlhome  = $self->{'gsdlhome'};
1791
1792    if ($baseaction::authentication_enabled) {
1793    # Ensure the user is allowed to edit this collection   
1794    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1795    }
1796
1797    my $site = $self->{'site'};
1798    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1799   
1800    $gsdl_cgi->checked_chdir($collect_dir);
1801
1802    # Obtain the collect dir
1803    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1804
1805    # Make sure the collection isn't locked by someone else
1806    $self->lock_collection($username, $collect);
1807
1808    $self->_set_archives_metadata_array(@_);
1809   
1810    # Release the lock once it is done
1811    $self->unlock_collection($username, $collect);
1812}
1813
1814sub _remove_archives_metadata
1815{
1816    my $self = shift @_;
1817
1818    my $collect   = $self->{'collect'};
1819    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1820#   my $gsdlhome  = $self->{'gsdlhome'};
1821    my $infodbtype = $self->{'infodbtype'};
1822   
1823    my $site = $self->{'site'};
1824       
1825    # Obtain the collect and archive dir   
1826    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1827   
1828    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1829
1830    # look up additional args
1831    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1832   
1833    my $metaname = $self->{'metaname'};
1834    my $metapos = $self->{'metapos'};
1835
1836    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1837
1838    # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1839    # default to deleting the first metavalue for the given metaname
1840    # Beware that if both metapos AND metavalue are defined, both matches (if any)
1841    # seem to get deleted in one single remove_archives_meta action invocation.
1842    # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1843    # deleted, both get deleted.
1844    if(!defined $metapos && !defined $metavalue) {
1845        $metapos = 0;
1846    }
1847
1848    my $metamode = $self->{'metamode'} || undef;
1849
1850    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1851    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1852
1853    # This now stores the full pathname
1854    my $doc_filename = $doc_rec->{'doc-file'}->[0];
1855
1856    my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1857#   my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1858   
1859    if ($status == 0)
1860    {
1861        my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1862        $mess .= "  $metaname";
1863        $mess .= "->[$metapos]" if (defined $metapos);
1864
1865        $gsdl_cgi->generate_ok_message($mess); 
1866    }
1867    else
1868    {
1869        my $mess .= "Failed to remove archives metadata key: $docid\n";
1870        $mess .= "Exit status: $status\n";
1871        $mess .= "System Error Message: $!\n";
1872        $mess .= "-" x 20 . "\n";
1873       
1874        $gsdl_cgi->generate_error($mess);
1875    }
1876   
1877    #return $status; # in case calling functions have a use for this
1878}
1879
1880sub remove_archives_metadata
1881{
1882    my $self = shift @_;
1883
1884    my $username  = $self->{'username'};
1885    my $collect   = $self->{'collect'};
1886    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1887   
1888    if ($baseaction::authentication_enabled)
1889    {
1890        # Ensure the user is allowed to edit this collection       
1891        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1892    }
1893
1894    # Make sure the collection isn't locked by someone else
1895    $self->lock_collection($username, $collect);
1896
1897    $self->_remove_archives_metadata(@_);
1898
1899    # Release the lock once it is done
1900    $self->unlock_collection($username, $collect);
1901}
1902
1903sub remove_from_doc_xml
1904{
1905    my $self = shift @_;
1906    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1907   
1908    my @start_rules = ('Section' => \&dxml_start_section);
1909   
1910    # Set the call-back functions for the metadata tags
1911    my @rules =
1912    (
1913        _default => 'raw',
1914        'Metadata' => \&rfdxml_metadata
1915    );
1916       
1917    my $parser = XML::Rules->new
1918    (
1919        start_rules => \@start_rules,
1920        rules => \@rules,
1921        style => 'filter',
1922        output_encoding => 'utf8',
1923#    normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1924        stripspaces => 2|0|0 # ineffectual
1925    );
1926   
1927    my $status = 0;
1928    my $xml_in = "";
1929    if (!open(MIN,"<$doc_xml_filename"))
1930    {
1931        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1932        $status = 1;
1933    }
1934    else
1935    {
1936        # Read them in
1937        my $line;
1938        while (defined ($line=<MIN>)) {
1939            $xml_in .= $line;
1940        }
1941        close(MIN);
1942
1943        # Filter with the call-back functions
1944        my $xml_out = "";
1945
1946        my $MOUT;
1947        if (!open($MOUT,">$doc_xml_filename")) {
1948            $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1949            $status = 1;
1950        }
1951        else {
1952            binmode($MOUT,":utf8");
1953            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1954            close($MOUT);       
1955        }
1956    }
1957    return $status;
1958}
1959
1960sub rfdxml_metadata
1961{
1962    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1963
1964    # For comparisons, toplevel section is indicated by ""
1965    my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1966    my $secid = $parser->{'parameters'}->{'secid'} || "";
1967
1968    if (!($secid eq $curr_sec_num))
1969    {
1970        # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1971        return [$tagname => $attrHash];
1972    }
1973
1974    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1975    {
1976        if (!defined $parser->{'parameters'}->{'poscount'})
1977        {
1978            $parser->{'parameters'}->{'poscount'} = 0;
1979        }
1980        else
1981        {
1982            $parser->{'parameters'}->{'poscount'}++;
1983        }
1984       
1985        # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
1986        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {         
1987            return [];
1988        }
1989    }
1990
1991    if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1992    {   
1993        return [];
1994    }
1995
1996    if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1997    {   
1998        return [];
1999    }
2000   
2001    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2002    return [$tagname => $attrHash];
2003}
2004
2005sub mxml_metadata
2006{
2007    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2008    my $metaname = $parser->{'parameters'}->{'metaname'};
2009    my $metamode = $parser->{'parameters'}->{'metamode'};
2010
2011    # Report error if we don't see FileName tag before this
2012    die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2013   
2014    # Don't do anything if we are not in the right FileSet
2015    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2016    if ($file_regexp =~ /\.\*/) {
2017    # Only interested in a file_regexp if it specifies precisely one
2018    # file. 
2019    # So, skip anything with a .* in it as it is too general
2020##  print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
2021    return [$tagname => $attrHash];
2022    }
2023    my $src_file = $parser->{'parameters'}->{'src_file'};
2024    if (!($src_file =~ /$file_regexp/)) {
2025    return [$tagname => $attrHash];
2026    }
2027##    print STDERR "*** mxl metamode = $metamode\n";
2028
2029    # Find the right metadata tag and checks if we are going to override it
2030    my $name_attr = $attrHash->{'name'};
2031    if (($name_attr eq $metaname) && ($metamode eq "override")) {
2032
2033    # now metadata.xml functions need to keep track of metapos
2034    if (!defined $parser->{'parameters'}->{'poscount'})
2035    {
2036        $parser->{'parameters'}->{'poscount'} = 0;
2037    }
2038    else
2039    {
2040        $parser->{'parameters'}->{'poscount'}++;
2041    }
2042
2043    # If either the metapos or prevmetavalue is set,
2044        # get the value and override the current value
2045    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2046
2047    if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2048    {
2049        $attrHash->{'_content'} = $metavalue;
2050
2051        ##  print STDERR "**** overriding metadata.xml\n";
2052       
2053        # Don't want it to wipe out any other pieces of metadata
2054        $parser->{'parameters'}->{'metamode'} = "done";
2055    }
2056    elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2057    {
2058        $attrHash->{'_content'} = $metavalue;
2059        $parser->{'parameters'}->{'metamode'} = "done";
2060    }
2061    }
2062
2063    # mxml_description will process the metadata if metadata is accumulate,
2064    # or if we haven't found the metadata to override
2065
2066    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2067    return [$tagname => $attrHash];
2068}
2069
2070
2071sub mxml_description
2072{
2073    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2074    my $metamode = $parser->{'parameters'}->{'metamode'};   
2075
2076    # Failed... Report error if we don't see FileName tag before this
2077    die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2078
2079    # Don't do anything if we are not in the right FileSet
2080    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2081    if ($file_regexp =~ m/\.\*/) {
2082    # Only interested in a file_regexp if it specifies precisely one
2083    # file. 
2084    # So, skip anything with a .* in it as it is too general
2085    return [$tagname => $attrHash];
2086    }
2087    my $src_file = $parser->{'parameters'}->{'src_file'};
2088   
2089    if (!($src_file =~ m/$file_regexp/)) {
2090    return [$tagname => $attrHash];
2091    }
2092
2093    # Accumulate the metadata block to the end of the description block
2094    # Note: This adds metadata block to all description blocks, so if there are
2095    # multiple FileSets, it will add to all of them
2096    if (($metamode eq "accumulate") || ($metamode eq "override")) {
2097
2098    # if metamode was "override" but get to here then it failed to
2099    # find an item to override, in which case it should append its
2100    # value to the end, just like the "accumulate" mode
2101
2102    if ($metamode eq "override") {
2103        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2104    }
2105
2106    # tack a new metadata tag on to the end of the <Metadata>+ block
2107    my $metaname = $parser->{'parameters'}->{'metaname'};
2108    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2109   
2110    my $metadata_attr = { '_content' => $metavalue,
2111                  'name'     => $metaname,
2112                  'mode'     => "accumulate" };
2113
2114    my $append_metadata = [ "Metadata" => $metadata_attr ];
2115    my $description_content = $attrHash->{'_content'};
2116   
2117##  print STDERR "*** appending to metadata.xml\n";
2118
2119    # append the new metadata element to the end of the current
2120    # content contained inside this tag
2121    if (ref($description_content) eq "") {
2122        # => string or numeric literal
2123        # this is caused by a <Description> block has no <Metadata> child elements
2124        # => set up an empty array in '_content'
2125        $attrHash->{'_content'} = [ "\n" ];
2126        $description_content = $attrHash->{'_content'};
2127    }
2128
2129    push(@$description_content,"    ", $append_metadata ,"\n        ");
2130    $parser->{'parameters'}->{'metamode'} = "done";
2131    }
2132
2133    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2134    return [$tagname => $attrHash];
2135}
2136
2137
2138sub mxml_filename
2139{
2140    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2141
2142    # Store the filename of the Current Fileset
2143    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2144    # FileName tag must come before Description tag
2145    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2146
2147    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2148    return [$tagname => $attrHash];
2149}
2150
2151
2152sub mxml_fileset
2153{
2154    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2155
2156    # Initilise the current_file
2157    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2158    # FileName tag must come before Description tag
2159    $parser->{'parameters'}->{'current_file'} = "";
2160
2161    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2162    return [$tagname => $attrHash];
2163}
2164
2165sub mxml_directorymetadata
2166{
2167    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2168
2169    # if we haven't processed the metadata when we reach the end of metadata.xml
2170    # it's because there's no particular FileSet element whose FileName matched
2171    # In which case, add a new FileSet for this FileName
2172    my $metamode = $parser->{'parameters'}->{'metamode'};
2173    if($metamode ne "done") {
2174   
2175    if ($metamode eq "override") {
2176        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2177    }
2178
2179    # If we get to here and metamode is override, this means there
2180    # was no existing value to overide => treat as an append operation
2181   
2182    # Create a new FileSet element and append to DirectoryMetadata
2183    # <FileSet>
2184    # <FileName>src_file</FileName>
2185    # <Description>
2186    # <Metadata mode="" name="">metavalue</Metadata>
2187    # </Description>
2188    # </FileSet>
2189    my $src_file = $parser->{'parameters'}->{'src_file'};
2190    my $metaname = $parser->{'parameters'}->{'metaname'};
2191    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2192    my $metadata_attr = {
2193        '_content' => $metavalue,
2194        'name' => $metaname,
2195        'mode' => "accumulate"
2196    };
2197    my $append_metadata = [ "Metadata" => $metadata_attr ];
2198    my $description_attr->{'_content'} = [ "\n\t\t   ", $append_metadata, "\n\t\t"];
2199    my $description_element = [ "Description" => $description_attr ];
2200
2201    #_content is not an attribute, it's special and holds the children of this element
2202    # including the textnode value embedded in this element if any.
2203    my $filename_attr = {'_content' => $src_file};
2204    my $filename_element = [ "FileName" => $filename_attr ];
2205
2206    my $fileset_attr = {};
2207    $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2208    my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2209   
2210   
2211    # get children of dirmeta, and push the new FileSet element onto it
2212    print "Appending metadata to metadata.xml\n";
2213    my $dirmeta_content = $attrHash->{'_content'};
2214    if (ref($dirmeta_content)) {
2215        # got some existing interesting nested content
2216        #push(@$dirmeta_content, "    ", $fileset ,"\n        ");
2217        push(@$dirmeta_content, "\t", $fileset ,"\n");
2218    }
2219    else {
2220        #description_content is most likely a string such as "\n"
2221        #$attrHash->{'_content'} = [$dirmeta_content, "    ", $fileset ,"\n" ];
2222        $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2223    }   
2224
2225    $parser->{'parameters'}->{'metamode'} = "done";
2226    }
2227    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2228    return [$tagname => $attrHash];
2229}
2230
2231
2232sub edit_metadata_xml
2233{
2234    my $self = shift @_;
2235    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2236
2237    # Set the call-back functions for the metadata tags
2238    my @rules =
2239    ( _default => 'raw',
2240          'FileName' => \&mxml_filename,
2241      'Metadata' => \&mxml_metadata,
2242      'Description' => \&mxml_description,
2243          'FileSet' => \&mxml_fileset,
2244      'DirectoryMetadata' => \&mxml_directorymetadata);
2245
2246    # use XML::Rules to add it in (read in and out again)
2247    my $parser = XML::Rules->new(rules => \@rules,
2248                 style => 'filter',
2249                                 output_encoding => 'utf8',
2250                 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2251
2252    if (!-e $metadata_xml_filename) {
2253   
2254        if (open(MOUT,">$metadata_xml_filename")) {
2255           
2256            my $src_file_re = &util::filename_to_regex($src_file);
2257            # shouldn't the following also be in the above utility routine??
2258            # $src_file_re =~ s/\./\\./g;
2259       
2260            print MOUT "<?xml version=\"1.0\"?>\n";
2261            print MOUT "<DirectoryMetadata>\n";
2262            print MOUT " <FileSet>\n";
2263            print MOUT "  <FileName>$src_file_re</FileName>\n";
2264            print MOUT "  <Description>\n";
2265            print MOUT "  </Description>\n";
2266            print MOUT " </FileSet>\n";
2267            print MOUT "</DirectoryMetadata>\n";
2268
2269            close(MOUT);
2270        }
2271        else {
2272            $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2273        }
2274    }
2275   
2276   
2277    my $xml_in = "";
2278    if (!open(MIN,"<$metadata_xml_filename")) {
2279        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2280    }
2281    else {
2282        # Read them in
2283        my $line;
2284        while (defined ($line=<MIN>)) {
2285            $xml_in .= $line;
2286        }
2287        close(MIN);
2288
2289        # Filter with the call-back functions
2290        my $xml_out = "";
2291
2292        my $MOUT;
2293        if (!open($MOUT,">$metadata_xml_filename")) {
2294            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2295        }
2296        else {
2297            binmode($MOUT,":utf8");
2298
2299            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2300            # At the moment, I will just hack it!
2301            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2302            #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2303            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2304            #print MOUT $xml_out;
2305
2306            $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2307                              metapos => $metapos,
2308                      metavalue => $metavalue,
2309                      metamode => $metamode,
2310                      src_file => $src_file,
2311                      prevmetavalue => $prevmetavalue,
2312                      current_file => undef} );
2313            close($MOUT);       
2314        }
2315   }
2316}
2317
2318
2319sub set_import_metadata
2320{
2321    my $self = shift @_;
2322   
2323    my $username  = $self->{'username'};
2324    my $collect   = $self->{'collect'};
2325    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2326   
2327    if ($baseaction::authentication_enabled) {
2328    # Ensure the user is allowed to edit this collection
2329    $self->authenticate_user($username, $collect);
2330    }
2331
2332    # Make sure the collection isn't locked by someone else
2333    $self->lock_collection($username, $collect);
2334 
2335    $self->_set_import_metadata(@_);
2336
2337    # Release the lock once it is done
2338    $self->unlock_collection($username, $collect);
2339   
2340}
2341
2342sub set_import_metadata_array
2343{
2344    my $self = shift @_;
2345
2346    my $username  = $self->{'username'};
2347    my $collect   = $self->{'collect'};
2348    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2349#    my $gsdlhome  = $self->{'gsdlhome'};
2350
2351    if ($baseaction::authentication_enabled) {
2352    # Ensure the user is allowed to edit this collection   
2353    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2354    }
2355
2356    my $site = $self->{'site'};
2357    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2358   
2359    $gsdl_cgi->checked_chdir($collect_dir);
2360
2361    # Make sure the collection isn't locked by someone else
2362    $self->lock_collection($username, $collect);
2363
2364    $self->_set_import_metadata_array(@_);
2365
2366    # Release the lock once it is done
2367    $self->unlock_collection($username, $collect);
2368
2369}
2370
2371
2372sub _set_import_metadata_array
2373{
2374    my $self = shift @_;
2375
2376    my $collect   = $self->{'collect'};
2377    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2378
2379    my $site = $self->{'site'};
2380    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2381   
2382    # look up additional args
2383   
2384    my $infodbtype = $self->{'infodbtype'};
2385   
2386    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");   
2387    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2388   
2389    my $json_str = $self->{'json'};
2390    my $doc_array = decode_json $json_str;
2391   
2392    my $global_status = 0;
2393    my $global_mess = "";
2394   
2395    my @all_docids = ();
2396   
2397    foreach my $doc_array_rec ( @$doc_array )
2398    {
2399    my $status = -1;
2400    my $docid = $doc_array_rec->{'docid'};
2401   
2402    my ($docid_root,$docid_secnum);
2403    if(defined $docid) {   
2404        ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
2405        # as yet no support for setting subsection metadata in metadata.xml
2406        if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2407        $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2408        next; # skip this docid in for loop
2409        }
2410    }
2411
2412    push(@all_docids,$docid); # docid_root rather
2413   
2414    my $metaname = $doc_array_rec->{'metaname'};
2415    if (defined $metaname) {
2416        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2417        my $metavalue = $doc_array_rec->{'metavalue'};
2418        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2419
2420        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2421        # make "accumulate" the default (less destructive, as won't actually
2422        # delete any existing values)
2423        $metamode = "accumulate";
2424        }
2425
2426        # adding metapos and prevmetavalue support to import_metadata subroutines
2427        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2428        my $prevmetavalue = $self->{'prevmetavalue'};
2429
2430        $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2431       
2432    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2433        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2434       
2435        foreach my $metatable_rec ( @$metatable ) {
2436        $metaname  = $metatable_rec->{'metaname'};
2437        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2438        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2439            # make "accumulate" the default (less destructive, as won't actually
2440            # delete any existing values)
2441            $metamode = "accumulate";
2442        }
2443
2444        # No support for metapos and prevmetavalue in the JSON metatable substructure
2445        my $metapos = undef;
2446        my $prevmetavalue = undef;
2447        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2448       
2449        foreach my $metavalue ( @$metavals ) {
2450            $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2451
2452            $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2453            if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2454            # need to accumulate subsequent metavals for this metaname, else the just-assigned
2455            # metavalue for this metaname will be lost
2456            $metamode = "accumulate";
2457            }
2458        }
2459        }
2460    }       
2461    }
2462
2463    # always a success message
2464    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2465    $gsdl_cgi->generate_ok_message($mess);
2466}
2467
2468# always returns true (1)
2469sub set_import_metadata_entry
2470{
2471    my $self = shift @_;
2472    my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2473
2474    my $info_mess = <<RAWEND;
2475****************************
2476  set_import_metadata_entry()
2477****************************
2478collect = $collect
2479collect_dir = $collect_dir
2480infodbtype = $infodbtype
2481arcinfo_doc_filename = $arcinfo_doc_filename
2482docid = $docid
2483metaname = $metaname
2484metapos = $metapos
2485metavalue = $metavalue
2486metamode = $metamode
2487prevmetavalue = $prevmetavalue
2488****************************
2489RAWEND
2490
2491    $gsdl_cgi->generate_message($info_mess);
2492
2493    # import works with metadata.xml which can have inherited metadata
2494    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2495    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2496    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2497    # what they're doing if they provide a metapos.
2498    if(defined $metapos) {
2499    print STDERR "@@@@ WARNING: metapos defined.\n";
2500    print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2501    }
2502
2503    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2504    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2505    my $metadata_xml_file;
2506    my $import_filename = undef;
2507   
2508    if (defined $docid) {
2509    # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2510    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2511
2512    # This now stores the full pathname
2513    $import_filename = $doc_rec->{'src-file'}->[0];
2514    } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2515         # as the array version of the method doesn't support the -f parameter yet
2516    my $import_file  = $self->{'f'};
2517    $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2518    }
2519   
2520    # figure out correct metadata.xml file [?]
2521    # Assuming the metadata.xml file is next to the source file
2522    # Note: This will not work if it is using the inherited metadata from the parent folder
2523    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2524    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2525   
2526    # If we're overriding everything, then $metamode=override combined with $metapos=undefined
2527    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2528    # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2529    if(!defined $metapos && $metamode eq "override") {
2530##  print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2531    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2532
2533    }
2534
2535    # Edit the metadata.xml
2536    # Modified by Jeffrey from DL Consulting
2537    # Handle the case where there is one metadata.xml file for multiple FileSets
2538    # The XML filter needs to know whether it is in the right FileSet
2539    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2540    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2541    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2542                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2543    #return 0;
2544    return $metadata_xml_filename;
2545}
2546
2547sub _remove_import_metadata
2548{
2549    my $self = shift @_;
2550
2551    my $collect   = $self->{'collect'};
2552    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2553#   my $gsdlhome  = $self->{'gsdlhome'};
2554    my $infodbtype = $self->{'infodbtype'};
2555   
2556    # Obtain the collect dir
2557    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2558    my $site = $self->{'site'};
2559    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2560   
2561    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2562    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2563
2564    # look up additional args
2565    my $docid = $self->{'d'};
2566    if ((!defined $docid) || ($docid =~ m/^\s*$/))
2567    {
2568        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2569    }
2570   
2571    my $metaname = $self->{'metaname'};
2572    my $metapos = $self->{'metapos'};
2573    my $metavalue = $self->{'metavalue'};
2574    if(defined $metavalue) {
2575        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2576    } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2577        $metapos = 0;
2578    }
2579    my $metamode = $self->{'metamode'} || undef;
2580
2581    # import works with metadata.xml which can have inherited metadata
2582    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2583    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2584    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2585    # what they're doing if they provide a metapos.
2586    if(defined $metapos) {
2587        print STDERR "@@@@ WARNING: metapos defined.\n";
2588        print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2589    }
2590   
2591    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2592    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2593    my $metadata_xml_file;
2594    my $import_filename = undef;
2595    if (defined $docid)
2596    {
2597        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2598        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2599
2600        # This now stores the full pathname
2601        $import_filename = $doc_rec->{'src-file'}->[0];
2602    }
2603
2604    if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2605    {
2606        $gsdl_cgi->generate_error("There is no metadata\n");
2607    }
2608   
2609    # figure out correct metadata.xml file [?]
2610    # Assuming the metadata.xml file is next to the source file
2611    # Note: This will not work if it is using the inherited metadata from the parent folder
2612    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2613    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2614   
2615    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2616   
2617    my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2618    $mess .= "  $metaname";
2619    $mess .= " = $metavalue\n";
2620   
2621    $gsdl_cgi->generate_ok_message($mess);
2622
2623    #return $status; # in case calling functions have a use for this
2624}
2625
2626sub remove_import_metadata
2627{
2628    my $self = shift @_;
2629   
2630    my $username = $self->{'username'};
2631    my $collect   = $self->{'collect'};
2632    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2633   
2634    if ($baseaction::authentication_enabled) {
2635        # Ensure the user is allowed to edit this collection       
2636        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2637    }
2638
2639    # Make sure the collection isn't locked by someone else
2640    $self->lock_collection($username, $collect);
2641   
2642    $self->_remove_import_metadata(@_);
2643
2644    # Release the lock once it is done
2645    $self->unlock_collection($username, $collect);
2646
2647}
2648
2649sub remove_from_metadata_xml
2650{
2651    my $self = shift @_;
2652    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2653    # metamode generally has no meaning for removing meta, but is used by set_meta
2654    # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2655
2656    # Set the call-back functions for the metadata tags
2657    my @rules =
2658    (
2659        _default => 'raw',
2660        'Metadata' => \&rfmxml_metadata,
2661        'FileName' => \&mxml_filename
2662    );
2663       
2664    my $parser = XML::Rules->new
2665    (
2666        rules => \@rules,
2667        style => 'filter',
2668        output_encoding => 'utf8',
2669     #normalisespaces => 1,
2670            stripspaces => 2|0|0 # ineffectual
2671    );
2672   
2673    my $xml_in = "";
2674    if (!open(MIN,"<$metadata_xml_filename"))
2675    {
2676        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2677    }
2678    else
2679    {
2680        # Read them in
2681        my $line;
2682        while (defined ($line=<MIN>)) {
2683            $xml_in .= $line;
2684        }
2685        close(MIN);
2686
2687        # Filter with the call-back functions
2688        my $xml_out = "";
2689
2690        my $MOUT;
2691        if (!open($MOUT,">$metadata_xml_filename")) {
2692            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2693        }
2694        else {
2695            binmode($MOUT,":utf8");
2696            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2697            close($MOUT);       
2698        }
2699    }
2700}
2701
2702sub rfmxml_metadata
2703{
2704    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2705
2706    # metadata.xml does not handle subsections
2707
2708    # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2709    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2710        && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2711    {
2712        if (!defined $parser->{'parameters'}->{'poscount'})
2713        {
2714            $parser->{'parameters'}->{'poscount'} = 0;
2715        }
2716        else
2717        {
2718            $parser->{'parameters'}->{'poscount'}++;
2719        }
2720
2721        # if overriding but no metapos, then clear all the meta for this metaname
2722        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
2723            return [];
2724        }
2725   
2726        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2727        {
2728            return [];
2729        }
2730
2731        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2732        {
2733            return [];
2734        }       
2735    }
2736
2737    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2738    return [$tagname => $attrHash];
2739}
2740
2741sub _remove_live_metadata
2742{
2743    my $self = shift @_;
2744
2745    my $collect   = $self->{'collect'};
2746    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2747#    my $gsdlhome  = $self->{'gsdlhome'};
2748    my $infodbtype = $self->{'infodbtype'};
2749
2750    # Obtain the collect dir
2751    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2752    my $site = $self->{'site'};
2753    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2754
2755   
2756    # look up additional args
2757    my $docid     = $self->{'d'};
2758    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2759      $gsdl_cgi->generate_error("No docid (d=...) specified.");
2760    }
2761   
2762    # Generate the dbkey
2763    my $metaname  = $self->{'metaname'};
2764    my $dbkey = "$docid.$metaname";
2765
2766    # To people who know $collect_tail please add some comments
2767    # Obtain the live gdbm_db path
2768    my $collect_tail = $collect;
2769    $collect_tail =~ s/^.*[\/\\]//;
2770    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2771    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2772
2773    # Remove the key
2774    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2775    my $status = system($cmd);
2776    if ($status != 0) {
2777        # Catch error if gdbmdel failed
2778    my $mess = "Failed to set metadata key: $dbkey\n";
2779   
2780    $mess .= "PATH: $ENV{'PATH'}\n";
2781    $mess .= "cmd = $cmd\n";
2782    $mess .= "Exit status: $status\n";
2783    $mess .= "System Error Message: $!\n";
2784
2785    $gsdl_cgi->generate_error($mess);
2786    }
2787    else {
2788    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2789    }
2790
2791}
2792
2793sub remove_live_metadata
2794{
2795    my $self = shift @_;
2796
2797    my $username  = $self->{'username'};
2798    my $collect   = $self->{'collect'};
2799    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2800    my $gsdlhome  = $self->{'gsdlhome'};
2801   
2802    if ($baseaction::authentication_enabled) {
2803    # Ensure the user is allowed to edit this collection   
2804    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2805    }
2806
2807    # Make sure the collection isn't locked by someone else
2808    $self->lock_collection($username, $collect);
2809
2810    $self->_remove_live_metadata(@_);
2811
2812    $self->unlock_collection($username, $collect);
2813}
2814
2815sub remove_metadata
2816{
2817    my $self = shift @_;
2818
2819    my $where = $self->{'where'};
2820    if(!$where) {
2821    $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2822    return;
2823    }
2824
2825    my $username  = $self->{'username'};
2826    my $collect   = $self->{'collect'};
2827    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2828
2829    if ($baseaction::authentication_enabled) {
2830    # Ensure the user is allowed to edit this collection   
2831    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2832    }
2833
2834    # Make sure the collection isn't locked by someone else
2835    $self->lock_collection($username, $collect);
2836
2837    # check which directories need to be processed, specified in $where as
2838    # any combination of import|archives|index|live
2839    if($where =~ m/import/) {
2840    $self->_remove_import_metadata(@_);     
2841    }
2842    if($where =~ m/archives/) {
2843    $self->_remove_archives_metadata(@_);       
2844   }
2845    if($where =~ m/index/) {
2846    $self->_remove_index_metadata(@_);     
2847    }
2848
2849    # Release the lock once it is done
2850    $self->unlock_collection($username, $collect);
2851}
2852
2853# the internal version, without authentication
2854sub _remove_index_metadata
2855{   
2856    my $self = shift @_;
2857
2858    my $collect   = $self->{'collect'};
2859    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2860#    my $gsdlhome  = $self->{'gsdlhome'};
2861    my $infodbtype = $self->{'infodbtype'};
2862   
2863    # Obtain the collect dir
2864    my $site = $self->{'site'};
2865    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2866    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2867
2868        # look up additional args
2869    my $docid     = $self->{'d'};
2870    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2871      $gsdl_cgi->generate_error("No docid (d=...) specified.");
2872    }
2873    my $metaname  = $self->{'metaname'};
2874    my $metapos   = $self->{'metapos'};
2875    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2876
2877    # To people who know $collect_tail please add some comments
2878    # Obtain the path to the database
2879    my $collect_tail = $collect;
2880    $collect_tail =~ s/^.*[\/\\]//;
2881    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2882    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2883
2884    # Read the docid entry
2885    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2886
2887    # Check to make sure the key does exist
2888    if (!defined ($doc_rec->{$metaname})) {
2889        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2890    }
2891
2892    # Obtain the specified metadata pos
2893    # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2894    if(!defined $metapos && !defined $metavalue) {
2895        $metapos = 0;
2896    }
2897   
2898
2899    # consider check key is defined before deleting?
2900    # Loop through the metadata array and ignore the specified position
2901    my $filtered_metadata = [];
2902    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
2903    for (my $i=0; $i<$num_metadata_vals; $i++) {
2904    my $metaval = shift(@{$doc_rec->{$metaname}});
2905
2906    if (!defined $metavalue && $i != $metapos) {
2907        push(@$filtered_metadata,$metaval);
2908    }
2909   
2910    if(defined $metavalue && !($metavalue eq $metaval))
2911    {
2912        push(@$filtered_metadata,$metaval);
2913    }
2914    }
2915    $doc_rec->{$metaname} = $filtered_metadata;
2916
2917    ## Use the dbutil set_entry method instead of assuming the database is gdbm
2918    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2919
2920    if ($status != 0) {
2921    my $mess = "Failed to set metadata key: $docid\n";
2922   
2923    $mess .= "PATH: $ENV{'PATH'}\n";
2924    $mess .= "Exit status: $status\n";
2925    $mess .= "System Error Message: $!\n";
2926
2927    $gsdl_cgi->generate_error($mess);
2928    }
2929    else {
2930    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2931    $mess .= "  $metaname";
2932    $mess .= "->[$metapos]" if (defined $metapos);
2933
2934    $gsdl_cgi->generate_ok_message($mess);
2935    }
2936
2937    #return $status; # in case calling functions have a use for this
2938}
2939
2940sub remove_index_metadata
2941{
2942    my $self = shift @_;
2943
2944    my $username  = $self->{'username'};
2945    my $collect   = $self->{'collect'};
2946    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2947#    my $gsdlhome  = $self->{'gsdlhome'};
2948   
2949    if ($baseaction::authentication_enabled) {
2950    # Ensure the user is allowed to edit this collection   
2951    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2952    }
2953
2954    # Obtain the collect dir
2955    my $site = $self->{'site'};
2956    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2957    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2958
2959    # Make sure the collection isn't locked by someone else
2960    $self->lock_collection($username, $collect);
2961
2962    $self->_remove_index_metadata(@_);
2963
2964    # Release the lock once it is done
2965    $self->unlock_collection($username, $collect);
2966}
2967
2968
2969# Was trying to reused the codes, but the functions need to be broken
2970# down more before they can be reused, otherwise there will be too
2971# much overhead and duplicate process...
2972sub insert_metadata
2973{
2974    my $self = shift @_;
2975   
2976    my $username  = $self->{'username'};
2977    my $collect   = $self->{'collect'};
2978    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2979    my $gsdlhome  = $self->{'gsdlhome'};
2980    my $infodbtype = $self->{'infodbtype'};
2981   
2982    # If the import metadata and gdbm database have been updated, we
2983    # need to insert some notification to warn user that the the text
2984    # they see at the moment is not indexed and require a rebuild.
2985    my $rebuild_pending_macro = "_rebuildpendingmessage_";
2986
2987    if ($baseaction::authentication_enabled) {
2988    # Ensure the user is allowed to edit this collection
2989    $self->authenticate_user($username, $collect);
2990    }
2991
2992    # Obtain the collect and archive dir   
2993    my $site = $self->{'site'};
2994    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2995    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2996    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2997
2998    # Make sure the collection isn't locked by someone else
2999    $self->lock_collection($username, $collect);
3000   
3001    # Check additional args
3002    my $docid = $self->{'d'};
3003    if (!defined($docid)) {
3004    $gsdl_cgi->generate_error("No document id is specified: d=...");
3005    }
3006    my $metaname = $self->{'metaname'};
3007    if (!defined($metaname)) {
3008    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3009    }
3010    my $metavalue = $self->{'metavalue'};
3011    if (!defined($metavalue) || $metavalue eq "") {
3012    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3013    }
3014    # make "accumulate" the default (less destructive, as won't actually
3015    # delete any existing values)
3016    my $metamode = "accumulate";
3017
3018    # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3019    my $metapos   = undef;
3020    my $prevmetavalue = undef;
3021
3022    #=======================================================================#
3023    # set_import_metadata [START]
3024    #=======================================================================#
3025    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3026    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3027    my $metadata_xml_file;
3028    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3029    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3030   
3031    # This now stores the full pathname
3032    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3033   
3034    # figure out correct metadata.xml file [?]
3035    # Assuming the metadata.xml file is next to the source file
3036    # Note: This will not work if it is using the inherited metadata from the parent folder
3037    my ($import_tailname, $import_dirname)
3038    = File::Basename::fileparse($import_filename);
3039    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3040
3041    # Shane's escape characters
3042    $metavalue = pack "U0C*", unpack "C*", $metavalue;
3043    $metavalue =~ s/\,/&#44;/g;
3044    $metavalue =~ s/\:/&#58;/g;
3045    $metavalue =~ s/\|/&#124;/g;
3046    $metavalue =~ s/\(/&#40;/g;
3047    $metavalue =~ s/\)/&#41;/g;
3048    $metavalue =~ s/\[/&#91;/g;
3049    $metavalue =~ s/\\/&#92;/g;
3050    $metavalue =~ s/\]/&#93;/g;
3051    $metavalue =~ s/\{/&#123;/g;
3052    $metavalue =~ s/\}/&#125;/g;
3053    $metavalue =~ s/\"/&#34;/g;
3054    $metavalue =~ s/\`/&#96;/g;
3055    $metavalue =~ s/\n/_newline_/g;
3056
3057    # Edit the metadata.xml
3058    # Modified by Jeffrey from DL Consulting
3059    # Handle the case where there is one metadata.xml file for multiple FileSets
3060    # The XML filter needs to know whether it is in the right FileSet
3061    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3062    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3063    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3064                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3065    #=======================================================================#
3066    # set_import_metadata [END]
3067    #=======================================================================#
3068
3069
3070    #=======================================================================#
3071    # set_metadata (accumulate version) [START]
3072    #=======================================================================#
3073    # To people who know $collect_tail please add some comments
3074    # Obtain path to the database
3075    my $collect_tail = $collect;
3076    $collect_tail =~ s/^.*[\/\\]//;
3077    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3078    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3079
3080    # Read the docid entry
3081    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3082
3083    # Protect the quotes
3084    $metavalue =~ s/\"/\\\"/g;
3085
3086    # Adds the pending macro
3087    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3088
3089    # If the metadata doesn't exist, create a new one
3090    if (!defined($doc_rec->{$metaname})){   
3091    $doc_rec->{$metaname} = [ $macro_metavalue ];
3092    }
3093    # Else, let's acculumate the values
3094    else {
3095        push(@{$doc_rec->{$metaname}},$macro_metavalue);
3096    }
3097
3098    ## Use the dbutil set_entry method instead of assuming the database is gdbm
3099    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3100
3101    if ($status != 0) {
3102        # Catch error if gdbmget failed
3103    my $mess = "Failed to set metadata key: $docid\n";
3104   
3105    $mess .= "PATH: $ENV{'PATH'}\n";
3106    $mess .= "Exit status: $status\n";
3107    $mess .= "System Error Message: $!\n";
3108
3109    $gsdl_cgi->generate_error($mess);
3110    }
3111    else {
3112    my $mess = "insert-metadata successful: Key[$docid]\n";
3113    $mess .= "  [In metadata.xml] $metaname";
3114    $mess .= " = $metavalue\n";
3115    $mess .= "  [In database] $metaname";
3116    $mess .= " = $macro_metavalue\n";
3117    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
3118        $gsdl_cgi->generate_ok_message($mess);
3119    }   
3120    #=======================================================================#
3121    # set_metadata (accumulate version) [END]
3122    #=======================================================================#
3123
3124    # Release the lock once it is done
3125    $self->unlock_collection($username, $collect);
3126}
3127
31281;
Note: See TracBrowser for help on using the browser.