root/main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm @ 27312

Revision 27312, 105.5 KB (checked in by ak19, 7 years ago)

get_metadata_array which takes a JSON string and returns one, implemented for getting index metadata. archives, import and live will be implemented later after testing the current implementation against the javascript for retrieving user added comments.

Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr   te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40    require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47    #GET METHODS
48    "get-import-metadata" => {
49        'compulsory-args' => [ "d", "metaname" ],
50        'optional-args'   => [ "metapos" ] },
51
52    "get-archives-metadata" => {
53        'compulsory-args' => [ "d", "metaname" ],
54        'optional-args'   => [ "metapos" ] },
55   
56    "get-index-metadata" => {
57        'compulsory-args' => [ "d", "metaname" ],
58        'optional-args'   => [ "metapos" ] },
59
60    "get-metadata" => { # alias for get-index-metadata
61        'compulsory-args' => [ "d", "metaname" ],
62        'optional-args'   => [ "metapos" ] },
63
64    "get-live-metadata" => {
65        'compulsory-args' => [ "d", "metaname" ],
66        'optional-args'   => [ ] },
67
68    "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69        'compulsory-args' => [ "json" ],
70        'optional-args'   => [ "where" ],
71        'help-string' => [
72        'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73        ]},
74
75    #SET METHODS
76    "set-live-metadata" => {
77        'compulsory-args' => [ "d", "metaname", "metavalue" ],
78        'optional-args'   => [ ] },
79
80    "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81        'compulsory-args' => [ "metaname", "metavalue" ],
82        'optional-args'   => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84    "set-index-metadata" => {
85        'compulsory-args' => [ "d", "metaname", "metavalue" ],
86        'optional-args'   => [ "metapos", "metamode" ] },
87
88    "set-archives-metadata" => {
89        'compulsory-args' => [ "d", "metaname", "metavalue" ],
90        'optional-args'   => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91   
92    "set-import-metadata" => {
93        'compulsory-args' => [ "metaname", "metavalue" ],
94        'optional-args'   => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
95                 
96    #SET METHODS (ARRAY)
97    "set-metadata-array" => {
98        'compulsory-args' => [ "where", "json" ],
99        'optional-args'   => [ ],
100        'help-string' => [
101        'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102       
103        'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108       
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
110                     
111    "set-archives-metadata-array" => {
112        'compulsory-args' => [ "json" ],
113        'optional-args'   => [ ] },
114       
115    "set-import-metadata-array" => {
116        'compulsory-args' => [ "json" ],
117        'optional-args'   => [ ] },
118
119    "set-index-metadata-array" => {
120        'compulsory-args' => [ "json" ],
121        'optional-args'   => [ ] },
122   
123    "set-live-metadata-array" => {
124        'compulsory-args' => [ "json" ],
125        'optional-args'   => [ ] },
126       
127    #REMOVE METHODS
128    "remove-import-metadata" => {
129        'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
130        'optional-args'   => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
131                     
132    "remove-archives-metadata" => {
133        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
134        'optional-args'   => [ "metapos", "metavalue" ] },
135
136    "remove-live-metadata" => {
137        'compulsory-args' => [ "d", "metaname" ],
138        'optional-args'   => [ ] },
139
140    "remove-index-metadata" => {
141        'compulsory-args' => [ "d", "metaname" ],
142        'optional-args'   => [ "metapos", "metavalue" ] },
143
144    "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
145        'compulsory-args' => [ "d", "metaname" ],
146        'optional-args'   => [ "where", "metapos", "metavalue" ] },
147
148    #INSERT METHODS
149    "insert-metadata" => {
150        'compulsory-args' => [ "d", "metaname", "metavalue" ],
151        'optional-args'   => [ ] }
152};
153
154
155sub new
156{
157    my $class = shift (@_);
158    my ($gsdl_cgi,$iis6_mode) = @_;
159
160    # Treat metavalue specially.  To transmit this through a GET request
161    # the Javascript side has url-encoded it, so here we need to decode
162    # it before proceeding
163
164    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
165    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
166    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
167
168    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
169    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
170
171    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
172
173    return bless $self, $class;
174}
175
176
177sub get_live_metadata
178{
179    my $self = shift @_;
180
181    my $username  = $self->{'username'};
182    my $collect   = $self->{'collect'};
183    my $gsdl_cgi  = $self->{'gsdl_cgi'};
184    my $gsdlhome  = $self->{'gsdlhome'};
185    my $infodbtype = $self->{'infodbtype'};
186   
187    # live metadata gets/saves value scoped (prefixed) by the current usename
188    # so (for now) let's not bother to enforce authentication
189
190    # Obtain the collect dir
191    my $site = $self->{'site'};
192    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
193    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
194
195    # Make sure the collection isn't locked by someone else
196    $self->lock_collection($username, $collect);
197
198    # look up additional args
199    my $docid  = $self->{'d'};
200    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
201       $gsdl_cgi->generate_error("No docid (d=...) specified.");
202    }
203
204    # Generate the dbkey
205    my $metaname  = $self->{'metaname'};
206    my $dbkey = "$docid.$metaname";
207
208    # To people who know $collect_tail please add some comments
209    # Obtain path to the database
210    my $collect_tail = $collect;
211    $collect_tail =~ s/^.*[\/|\\]//;
212    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
213    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
214   
215    # Obtain the content of the key
216    my $cmd = "gdbmget $infodb_file_path $dbkey";
217    if (open(GIN,"$cmd |") == 0) {
218        # Catch error if gdbmget failed
219    my $mess = "Failed to get metadata key: $metaname\n";
220    $mess .= "$!\n";
221
222    $gsdl_cgi->generate_error($mess);
223    }
224    else {
225    binmode(GIN,":utf8");
226        # Read everything in and concatenate them into $metavalue
227    my $metavalue = "";
228    my $line;
229    while (defined ($line=<GIN>)) {
230        $metavalue .= $line;
231    }
232    close(GIN);
233    chomp($metavalue); # Get rid off the tailing newlines
234    $gsdl_cgi->generate_ok_message("$metavalue");
235    }
236
237    # Release the lock once it is done
238    $self->unlock_collection($username, $collect);
239}
240
241# just calls the index version
242sub get_metadata
243{
244    my $self = shift @_;
245    $self->get_index_metadata(@_);
246}
247
248# JSON version that will get the requested metadata values
249# from the requested source (index, import, archives or live)
250# One of the params is a JSON string and the return value is JSON too
251# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
252sub get_metadata_array
253{
254    my $self = shift @_;
255
256    my $where = $self->{'where'};
257    if (!$where) { 
258    $where = "index"; # default behaviour is to get the values from index
259    }
260
261    # Only when setting metadata do we perform authentication and do we lock the collection
262    # not when getting metadata
263
264    # for get_meta_array, the where param can only be ONE of import, archives, index, live
265    if($where =~ m/index/) {
266    $self->_get_index_metadata_array(@_);
267    }
268    # still to be implemented for archives, import and live
269}
270
271# takes a JSON string and returns a JSON string
272sub _get_index_metadata_array
273{
274    my $self = shift @_;
275
276    my $collect   = $self->{'collect'};
277    my $gsdl_cgi  = $self->{'gsdl_cgi'};
278    my $site = $self->{'site'};
279    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
280
281    # look up additional args   
282    my $infodbtype = $self->{'infodbtype'};
283   
284    # To people who know $collect_tail please add some comments
285    # Obtain the path to the database
286    my $collect_tail = $collect;
287    $collect_tail =~ s/^.*[\/\\]//;
288    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
289    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
290
291    my $json_str      = $self->{'json'};
292    my $doc_array = decode_json $json_str;
293
294    my $json_result_str = "[";
295    my $first_doc_rec = 1;
296
297    foreach my $doc_array_rec ( @$doc_array ) {
298   
299    my $docid     = $doc_array_rec->{'docid'};
300   
301    if($first_doc_rec) {
302        $first_doc_rec = 0;
303    } else {
304        $json_result_str .= ",";
305    }
306    $json_result_str = $json_result_str . "{docid:\"" . $docid . "\""; 
307
308    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
309    $json_result_str = $json_result_str . ",metatable:[";
310
311    my $first_rec = 1;
312    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
313        if($first_rec) {
314        $first_rec = 0;
315        } else {
316        $json_result_str .= ",";       
317        }
318       
319        my $metaname  = $metatable_rec->{'metaname'};
320        $json_result_str .= "{metaname:\"$metaname\",metavals:[";
321
322        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
323        if(!defined $metapos) {
324        $metapos = "all";
325        }
326
327         # Read the docid entry
328        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
329 
330        # Basically loop through and unescape_html the values
331        foreach my $k (keys %$doc_rec) {
332        my @escaped_v = ();
333        foreach my $v (@{$doc_rec->{$k}}) {
334            my $ev = &ghtml::unescape_html($v);
335            push(@escaped_v, $ev);
336        }
337        $doc_rec->{$k} = \@escaped_v;
338        }
339
340        # Obtain the specified metadata value(s)
341        my $metavalue;
342
343        if($metapos ne "all") { # get the value at a single metapos
344
345        $metavalue = $doc_rec->{$metaname}->[$metapos];     
346        $json_result_str .= "{metapos:\"$metapos\",metavalue:\"$metavalue\"}";
347
348        } else {
349        my $first_metaval = 1;
350        $metapos = 0;
351        $metavalue = $doc_rec->{$metaname}->[$metapos];
352
353        while (defined $metavalue) {
354            if($first_metaval) {       
355            $first_metaval = 0;
356            } else {
357            $json_result_str .= ",";
358            }
359       
360            $json_result_str .= "{metapos:\"$metapos\",metavalue:\"$metavalue\"}";
361
362            $metapos++;
363            $metavalue = $doc_rec->{$metaname}->[$metapos];         
364        }
365        }
366
367        $json_result_str .= "]}"; # close metavals array and metatable record
368    }
369   
370    $json_result_str .= "]}"; # close metatable array and docid record
371    }
372
373    $json_result_str .= "]"; # close array of docids
374
375    $gsdl_cgi->generate_ok_message($json_result_str);   
376}
377
378
379sub get_index_metadata
380{
381    my $self = shift @_;
382
383    my $username  = $self->{'username'};
384    my $collect   = $self->{'collect'};
385    my $gsdl_cgi  = $self->{'gsdl_cgi'};
386    my $gsdlhome  = $self->{'gsdlhome'};
387
388    # Authenticate user if it is enabled
389    if ($baseaction::authentication_enabled) {
390    # Ensure the user is allowed to edit this collection   
391    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
392    }
393
394    # Obtain the collect dir
395    my $site = $self->{'site'};
396    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
397    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
398
399    # Make sure the collection isn't locked by someone else
400    $self->lock_collection($username, $collect);
401
402    # look up additional args
403    my $docid     = $self->{'d'};
404    my $metaname  = $self->{'metaname'};
405    my $metapos   = $self->{'metapos'};
406    my $infodbtype = $self->{'infodbtype'};
407
408    # To people who know $collect_tail please add some comments
409    # Obtain path to the database
410    my $collect_tail = $collect;
411    $collect_tail =~ s/^.*[\/\\]//;
412    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
413    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
414
415    # Read the docid entry
416    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
417 
418    # Basically loop through and unescape_html the values
419    foreach my $k (keys %$doc_rec) {
420    my @escaped_v = ();
421    foreach my $v (@{$doc_rec->{$k}}) {
422        my $ev = &ghtml::unescape_html($v);
423        push(@escaped_v, $ev);
424    }
425    $doc_rec->{$k} = \@escaped_v;
426    }
427
428    # Obtain the specified metadata value
429    $metapos = 0 if (!defined $metapos);
430    my $metavalue = $doc_rec->{$metaname}->[$metapos];
431    $gsdl_cgi->generate_ok_message("$metavalue");
432   
433    # Release the lock once it is done
434    $self->unlock_collection($username, $collect);
435}
436
437
438sub get_import_metadata
439{
440    my $self = shift @_;
441
442    my $username  = $self->{'username'};
443    my $collect   = $self->{'collect'};
444    my $gsdl_cgi  = $self->{'gsdl_cgi'};
445    my $gsdlhome  = $self->{'gsdlhome'};
446
447    # Authenticate user if it is enabled
448    if ($baseaction::authentication_enabled) {
449        # Ensure the user is allowed to edit this collection
450        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
451    }
452
453    # Obtain the collect dir
454    my $site = $self->{'site'};
455    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
456    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
457
458    # Make sure the collection isn't locked by someone else
459    $self->lock_collection($username, $collect);
460
461    # look up additional args
462    my $docid     = $self->{'d'};
463    my $metaname  = $self->{'metaname'};
464    my $metapos = $self->{'metapos'};
465    $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
466
467    my $infodbtype = $self->{'infodbtype'};
468    if (!defined $docid)
469    {
470        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
471    }
472
473    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
474    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
475    my $metadata_xml_file;
476    my $import_filename = undef;
477   
478
479    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
480    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
481    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
482
483    # This now stores the full pathname
484    $import_filename = $doc_rec->{'src-file'}->[0];
485
486    # figure out correct metadata.xml file [?]
487    # Assuming the metadata.xml file is next to the source file
488    # Note: This will not work if it is using the inherited metadata from the parent folder
489    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
490    my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
491
492    $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
493
494    # Release the lock once it is done
495    $self->unlock_collection($username, $collect);
496}
497
498sub get_metadata_from_metadata_xml
499{
500    my $self = shift @_;
501    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
502   
503    my @rules =
504    (
505        _default => 'raw',
506        'Metadata' => \&gfmxml_metadata,
507        'FileName' => \&mxml_filename
508    );
509       
510    my $parser = XML::Rules->new
511    (
512        rules => \@rules,
513        output_encoding => 'utf8'
514    );
515   
516    my $xml_in = "";
517    if (!open(MIN,"<$metadata_xml_filename"))
518    {
519        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
520    }
521    else
522    {
523        # Read them in
524        my $line;
525        while (defined ($line=<MIN>)) {
526            $xml_in .= $line;
527        }
528        close(MIN);
529
530        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
531       
532        if(defined $parser->{'pad'}->{'metavalue'})
533        {
534            return $parser->{'pad'}->{'metavalue'};
535        }
536        else
537        {
538            return "";
539        }
540    }
541}
542
543sub gfmxml_metadata
544{
545    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
546
547    # no subsection support yet in metadata.xml
548
549    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
550    {
551        if (!defined $parser->{'parameters'}->{'poscount'})
552        {
553            $parser->{'parameters'}->{'poscount'} = 0;
554        }
555        else
556        {
557            $parser->{'parameters'}->{'poscount'}++;
558        }
559   
560        # gets the first value by default, since metapos defaults to 0
561        if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
562        {
563            if($parser->{'parameters'}->{'metapos'} > 0) {
564            print STDERR "@@@@ WARNING: non-zero metapos.\n";
565            print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at".$parser->{'parameters'}->{'metapos'}.".\n";
566            }
567            $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
568        }
569    }
570}
571
572sub get_archives_metadata
573{
574    my $self = shift @_;
575
576    my $username  = $self->{'username'};
577    my $collect   = $self->{'collect'};
578    my $gsdl_cgi  = $self->{'gsdl_cgi'};
579    my $gsdlhome  = $self->{'gsdlhome'};
580    my $infodbtype = $self->{'infodbtype'};
581
582    # Authenticate user if it is enabled
583    if ($baseaction::authentication_enabled) {
584        # Ensure the user is allowed to edit this collection       
585        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
586    }
587
588    # Obtain the collect dir
589    my $site = $self->{'site'};
590    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
591   
592    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
593
594    # Make sure the collection isn't locked by someone else
595    $self->lock_collection($username, $collect);
596   
597    # look up additional args
598    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
599    $docid_secnum = "" if (!defined $docid_secnum);
600   
601    my $metaname = $self->{'metaname'};
602    my $metapos = $self->{'metapos'};
603    $metapos = 0 if (!defined $metapos);
604   
605    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
606    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
607
608    # This now stores the full pathname
609    my $doc_filename = $doc_rec->{'doc-file'}->[0];
610
611    $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
612
613    # Release the lock once it is done
614    $self->unlock_collection($username, $collect);
615}
616
617sub get_metadata_from_archive_xml
618{
619    my $self = shift @_;
620    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
621   
622    my @start_rules = ('Section' => \&dxml_start_section);
623   
624    my @rules =
625    (
626        _default => 'raw',
627        'Metadata' => \&gfdxml_metadata
628    );
629       
630    my $parser = XML::Rules->new
631    (
632        start_rules => \@start_rules,
633        rules => \@rules,
634        output_encoding => 'utf8'
635    );
636   
637    my $xml_in = "";
638    if (!open(MIN,"<$doc_xml_filename"))
639    {
640        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
641    }
642    else
643    {
644        # Read them in
645        my $line;
646        while (defined ($line=<MIN>)) {
647            $xml_in .= $line;
648        }
649        close(MIN);
650
651        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
652       
653        if(defined $parser->{'pad'}->{'metavalue'})
654        {
655            return $parser->{'pad'}->{'metavalue'};
656        }
657        else
658        {
659            return "";
660        }
661    }
662}
663
664sub gfdxml_metadata
665{
666    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
667   
668    if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
669    {
670        return;
671    }
672
673    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
674    {
675        if (!defined $parser->{'parameters'}->{'poscount'})
676        {
677            $parser->{'parameters'}->{'poscount'} = 0;
678        }
679        else
680        {
681            $parser->{'parameters'}->{'poscount'}++;
682        }
683    }
684
685    if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
686    {   
687        $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
688    }
689}
690
691sub _set_live_metadata
692{
693    my $self = shift @_;
694
695    my $collect   = $self->{'collect'};
696    my $gsdl_cgi  = $self->{'gsdl_cgi'};
697    #my $gsdlhome  = $self->{'gsdlhome'};
698    my $infodbtype = $self->{'infodbtype'};
699
700    # Obtain the collect dir
701    my $site = $self->{'site'};
702    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
703    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
704
705
706    # look up additional args
707    my $docid     = $self->{'d'};
708    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
709      $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
710    }
711    my $metavalue = $self->{'metavalue'};
712
713    # Generate the dbkey   
714    my $metaname  = $self->{'metaname'};
715    my $dbkey = "$docid.$metaname";
716
717    # To people who know $collect_tail please add some comments
718    # Obtain path to the database
719    my $collect_tail = $collect;
720    $collect_tail =~ s/^.*[\/\\]//;
721    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
722    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
723
724    # Set the new value
725    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
726    my $status = system($cmd);
727    if ($status != 0) {
728        # Catch error if gdbmget failed
729    my $mess = "Failed to set metadata key: $dbkey\n";
730
731    $mess .= "PATH: $ENV{'PATH'}\n";
732    $mess .= "cmd = $cmd\n";
733    $mess .= "Exit status: $status\n";
734    $mess .= "System Error Message: $!\n";
735
736    $gsdl_cgi->generate_error($mess);
737    }
738    else {
739    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
740    }
741
742    #return $status; # in case calling functions have any further use for this
743}
744
745sub set_live_metadata
746{
747    my $self = shift @_;
748
749    my $username  = $self->{'username'};
750    my $collect   = $self->{'collect'};
751    my $gsdl_cgi  = $self->{'gsdl_cgi'};
752 
753    if ($baseaction::authentication_enabled) {
754    # Ensure the user is allowed to edit this collection   
755    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
756    }
757
758    # Make sure the collection isn't locked by someone else
759    $self->lock_collection($username, $collect);
760
761    $self->_set_live_metadata(@_);
762
763    # Release the lock once it is done
764    $self->unlock_collection($username, $collect);
765}
766
767sub set_index_metadata_entry
768{
769    my $self = shift @_;
770    my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
771   
772    # To people who know $collect_tail please add some comments
773    # Obtain path to the database
774    my $collect_tail = $collect;
775    $collect_tail =~ s/^.*[\/\\]//;
776    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
777    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
778   
779#   print STDERR "**** infodb file path = $infodb_file_path\n";
780#   print STDERR "***** infodb type = $infodbtype\n";
781   
782    # Read the docid entry
783    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
784   
785    # Set the metadata value
786    if (defined $metapos) {
787    # if metamode=accumulate AND metapos, warn user and then use metapos
788    if (defined $metamode && $metamode eq "accumulate") {
789        print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
790        print STDERR "**** Proceeding by using metapos\n";
791    }
792    $doc_rec->{$metaname}->[$metapos] = $metavalue;
793    }
794    elsif (defined $metamode && $metamode eq "override") {
795    $doc_rec->{$metaname} = [ $metavalue ];
796    }
797    else { # default for index was to override, but because accumulate is less destructive,
798    # and because accumulate is the default for archives and import, that's the new default for index too
799    if(defined $doc_rec->{$metaname}) {
800        push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
801    } else {
802        $doc_rec->{$metaname} = [ $metavalue ];
803    }
804    }
805 
806    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
807   
808    return $status;
809   
810}
811
812sub _set_import_metadata
813{
814    my $self = shift @_;
815
816    my $collect   = $self->{'collect'};
817    my $gsdl_cgi  = $self->{'gsdl_cgi'};
818    my $infodbtype = $self->{'infodbtype'};
819#    my $gsdlhome  = $self->{'gsdlhome'};
820     
821    # Obtain the collect and archive dir   
822    my $site = $self->{'site'};
823    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
824    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
825    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
826   
827    # look up additional args
828    # want either d= or f=
829    my $docid  = $self->{'d'};
830    my ($docid_root,$docid_secnum);
831    if(defined $docid) {   
832    ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
833    # as yet no support for setting subsection metadata in metadata.xml
834    if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
835        $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
836        return;
837    }
838    }
839
840    my $import_file  = $self->{'f'};
841    if ((!defined $docid) && (!defined $import_file)) {
842    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
843    }
844
845    # Get the parameters and set default mode to "accumulate"
846    my $metaname   = $self->{'metaname'};
847    my $metavalue  = $self->{'metavalue'};
848##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
849    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
850   
851    my $metamode   = $self->{'metamode'};
852    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
853    # make "accumulate" the default (less destructive, as won't actually
854    # delete any existing values)
855    $metamode = "accumulate";
856    }
857
858    # adding metapos and prevmetavalue support to import_metadata subroutines
859    my $metapos   = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
860    my $prevmetavalue = $self->{'prevmetavalue'};
861
862    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
863    my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
864
865    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
866    $mess .= "  $metaname";
867    $mess .= " = $metavalue";
868    $mess .= " ($metamode)\n";
869
870    $gsdl_cgi->generate_ok_message($mess);
871
872    #return $status; # in case calling functions have any further use for this
873}
874
875# the version of set_index_meta that doesn't do authentication
876sub _set_archives_metadata
877{
878    my $self = shift @_;
879
880    my $collect   = $self->{'collect'};
881    my $gsdl_cgi  = $self->{'gsdl_cgi'};
882    my $infodbtype = $self->{'infodbtype'};
883   
884    # Obtain the collect and archive dir   
885    my $site = $self->{'site'};
886    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
887    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
888
889    # look up additional args
890    my $docid  = $self->{'d'};
891    my $metaname   = $self->{'metaname'};
892    my $metavalue  = $self->{'metavalue'};
893    my $prevmetavalue = $self->{'prevmetavalue'};
894   
895    my $metapos    = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
896                                  # Don't append "|| undef", since if metapos=0 it will then be set to undef
897
898    my $metamode   = $self->{'metamode'};
899    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
900    # make "accumulate" the default (less destructive, as won't actually
901    # delete any existing values)
902    $metamode = "accumulate";
903    }
904
905    my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
906                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
907   
908    if ($status == 0) {
909    my $mess = "set-archives-metadata successful: Key[$docid]\n";
910    $mess .= "  $metaname";
911    $mess .= "->[$metapos]" if (defined $metapos);
912    $mess .= " = $metavalue";
913    $mess .= " ($metamode)\n";
914   
915    $gsdl_cgi->generate_ok_message($mess); 
916    }
917    else {
918    my $mess .= "Failed to set archives metadata key: $docid\n";
919    $mess .= "Exit status: $status\n";
920    if(defined $self->{'error_msg'}) {
921        $mess .= "Error Message: $self->{'error_msg'}\n";
922    } else {
923        $mess .= "System Error Message: $!\n";
924    }
925    $mess .= "-" x 20 . "\n";
926   
927    $gsdl_cgi->generate_error($mess);
928    }
929
930    #return $status; # in case calling functions have any further use for this
931}
932
933
934# the version of set_index_meta that doesn't do authentication
935sub _set_index_metadata
936{
937    my $self = shift @_;
938
939    my $collect   = $self->{'collect'};
940    my $gsdl_cgi  = $self->{'gsdl_cgi'};
941
942    my $site = $self->{'site'};
943    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
944
945    # look up additional args
946    my $docid     = $self->{'d'};
947    my $metaname  = $self->{'metaname'};
948    my $metapos   = $self->{'metapos'}; # undef has meaning
949    my $metavalue = $self->{'metavalue'};
950    my $infodbtype = $self->{'infodbtype'};
951    my $metamode  = $self->{'metamode'};   
952
953    my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
954   
955    if ($status != 0) {
956        # Catch error if set infodb entry failed
957    my $mess = "Failed to set metadata key: $docid\n";
958   
959    $mess .= "PATH: $ENV{'PATH'}\n";
960    $mess .= "Exit status: $status\n";
961    $mess .= "System Error Message: $!\n";
962   
963    $gsdl_cgi->generate_error($mess);
964    }
965    else {
966    my $mess = "set-index-metadata successful: Key[$docid]\n";
967    $mess .= "  $metaname";
968    $mess .= "->[$metapos]" if (defined $metapos);
969    $mess .= " = $metavalue\n";
970   
971    $gsdl_cgi->generate_ok_message($mess);
972    }
973
974    #return $status; # in case calling functions have any further use for this
975}
976
977sub set_index_metadata
978{
979    my $self = shift @_;
980
981    my $username  = $self->{'username'};
982    my $collect   = $self->{'collect'};
983    my $gsdl_cgi  = $self->{'gsdl_cgi'};
984    #my $gsdlhome  = $self->{'gsdlhome'};
985
986    if ($baseaction::authentication_enabled) {
987    # Ensure the user is allowed to edit this collection   
988    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
989    }
990
991    my $site = $self->{'site'};
992    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
993   
994    $gsdl_cgi->checked_chdir($collect_dir);
995
996    # Obtain the collect dir
997    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
998
999    # Make sure the collection isn't locked by someone else
1000    $self->lock_collection($username, $collect);
1001
1002    $self->_set_index_metadata(@_);
1003   
1004    # Release the lock once it is done
1005    $self->unlock_collection($username, $collect);
1006}
1007
1008# call this to set the metadata for a combination of dirs archives, import or index, or live
1009# if none specified, defaults to index which was the original behaviour of set_metadata.
1010sub set_metadata
1011{
1012    my $self = shift @_;
1013
1014    # Testing that not defining a variable, setting it to "" or to "  " all return false
1015    # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
1016
1017    my $where = $self->{'where'};
1018    if(!$where) {   
1019    $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
1020    return;
1021    }
1022
1023    # authenticate and lock collection once, even if processing multiple dirs
1024    my $username  = $self->{'username'};
1025    my $collect   = $self->{'collect'};
1026    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1027   
1028    if ($baseaction::authentication_enabled) {
1029    # Ensure the user is allowed to edit this collection   
1030    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1031    }
1032   
1033    if($where =~ m/index/) {
1034    my $site = $self->{'site'};
1035    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1036    $gsdl_cgi->checked_chdir($collect_dir);
1037    }
1038
1039    # Make sure the collection isn't locked by someone else
1040    $self->lock_collection($username, $collect);
1041
1042
1043    # now at last can set the metadata. $where can specify multiple
1044    # $where is of the form: import|archives|index, or a subset thereof
1045
1046    #my @whichdirs = split('\|', $where);
1047
1048    # just check whether $where contains import/archives/index/live in turn, and
1049    # for each case, process it accordingly
1050    if($where =~ m/import/) {
1051    $self->_set_import_metadata(@_);       
1052    }
1053
1054    if($where =~ m/archives/) {
1055
1056    # look up docID arg which is optional to set_metadata because it's optional
1057    # to set_import, but which is compulsory to set_archives_metadata
1058    my $docid     = $self->{'d'};
1059    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1060        $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1061    }
1062    # we have a docid, so can set archives meta
1063    $self->_set_archives_metadata(@_); 
1064    }
1065
1066    if($where =~ m/index/) {
1067   
1068    # look up docID arg which is optional to set_metadata because it's optional
1069    # to set_import, but which is compulsory to set_archives_metadata
1070    my $docid     = $self->{'d'};
1071    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1072        $gsdl_cgi->generate_error("No docid (d=...) specified.");
1073    }
1074    # we have a docid, so can set index meta
1075    $self->_set_index_metadata(@_);
1076    }   
1077
1078    if($where =~ m/live/) {
1079    $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1080    }
1081
1082    # Release the lock once it is done
1083    $self->unlock_collection($username, $collect);
1084}
1085
1086sub set_metadata_array
1087{
1088    my $self = shift @_;
1089
1090    my $where = $self->{'where'};
1091    if(!$where) {   
1092    $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1093    return;
1094    }
1095
1096    my $username  = $self->{'username'};
1097    my $collect   = $self->{'collect'};
1098    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1099
1100    if ($baseaction::authentication_enabled) {
1101    # Ensure the user is allowed to edit this collection
1102    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1103    }
1104
1105    # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1106    # But including the stmt during this code reorganisation to preserve as-is what used to happen
1107    my $site = $self->{'site'};
1108    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1109    $gsdl_cgi->checked_chdir($collect_dir);
1110
1111    # Make sure the collection isn't locked by someone else
1112    $self->lock_collection($username, $collect);
1113
1114    if($where =~ m/import/) {
1115    $self->_set_import_metadata_array(@_);
1116    }
1117    if($where =~ m/archives/) {
1118    $self->_set_archives_metadata_array(@_);
1119    }
1120    if($where =~ m/index/) {
1121    $self->_set_index_metadata_array(@_);
1122    }
1123    if($where =~ m/live/) {
1124        $self->_set_live_metadata_array(@_);
1125    }
1126
1127    # Release the lock once it is done
1128    $self->unlock_collection($username, $collect);
1129}
1130
1131sub _set_index_metadata_array
1132{
1133    my $self = shift @_;
1134
1135    my $collect   = $self->{'collect'};
1136    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1137    my $site = $self->{'site'};
1138    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1139
1140   
1141    # look up additional args
1142   
1143    my $infodbtype = $self->{'infodbtype'};
1144   
1145    my $json_str      = $self->{'json'};
1146    my $doc_array = decode_json $json_str;
1147   
1148   
1149    my $global_status = 0;
1150    my $global_mess = "";
1151   
1152    my @all_docids = ();
1153   
1154    foreach my $doc_array_rec ( @$doc_array ) {
1155   
1156    my $status = -1;
1157    my $docid     = $doc_array_rec->{'docid'};
1158   
1159    push(@all_docids,$docid);
1160
1161    my $metaname  = $doc_array_rec->{'metaname'};
1162    if(defined $metaname) {
1163        my $metapos   = $doc_array_rec->{'metapos'}; # can legitimately be undef
1164        my $metavalue = $doc_array_rec->{'metavalue'};
1165        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1166
1167        $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1168    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1169        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1170       
1171        foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1172        $metaname  = $metatable_rec->{'metaname'};
1173        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1174        my $metapos = undef;
1175        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1176
1177        foreach my $metavalue ( @$metavals ) { # metavals is an array
1178            $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1179            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1180            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1181            # metavalue for this metaname will be lost
1182            $metamode = "accumulate";
1183            }
1184        }           
1185        }
1186    }
1187   
1188    if ($status != 0) {
1189        # Catch error if set infodb entry failed
1190        $global_status = $status;
1191        $global_mess .= "Failed to set metadata key: $docid\n";
1192        $global_mess .= "Exit status: $status\n";
1193        $global_mess .= "System Error Message: $!\n";
1194        $global_mess .= "-" x 20;
1195    }
1196    }
1197   
1198    if ($global_status != 0) {
1199    $global_mess .= "PATH: $ENV{'PATH'}\n";
1200    $gsdl_cgi->generate_error($global_mess);
1201    }
1202    else {
1203    my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1204    $gsdl_cgi->generate_ok_message($mess);
1205    }
1206}
1207
1208sub set_index_metadata_array
1209{
1210    my $self = shift @_;
1211
1212    my $username  = $self->{'username'};
1213    my $collect   = $self->{'collect'};
1214    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1215#    my $gsdlhome  = $self->{'gsdlhome'};
1216
1217    if ($baseaction::authentication_enabled) {
1218    # Ensure the user is allowed to edit this collection   
1219    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1220    }
1221
1222    my $site = $self->{'site'};
1223    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1224   
1225    $gsdl_cgi->checked_chdir($collect_dir);
1226
1227    # Obtain the collect dir
1228    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1229
1230    # Make sure the collection isn't locked by someone else
1231    $self->lock_collection($username, $collect);
1232
1233    $self->_set_index_metadata_array(@_);
1234
1235    # Release the lock once it is done
1236    $self->unlock_collection($username, $collect);
1237}
1238
1239# experimental, newly added in and untested
1240sub _set_live_metadata_array
1241{
1242    my $self = shift @_;
1243
1244    my $collect   = $self->{'collect'};
1245    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1246
1247    my $site = $self->{'site'};
1248    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1249
1250   
1251    # look up additional args
1252    my $infodbtype = $self->{'infodbtype'};
1253    # To people who know $collect_tail please add some comments
1254    # Obtain path to the database
1255    my $collect_tail = $collect;
1256    $collect_tail =~ s/^.*[\/\\]//;
1257    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1258    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1259
1260   
1261    my $json_str      = $self->{'json'};
1262    my $doc_array = decode_json $json_str;
1263   
1264   
1265    my $global_status = 0;
1266    my $global_mess = "";
1267   
1268    my @all_docids = ();
1269
1270
1271    foreach my $doc_array_rec ( @$doc_array ) {
1272   
1273    my $status = -1;
1274    my $docid     = $doc_array_rec->{'docid'};
1275
1276    push(@all_docids,$docid);
1277
1278    my $metaname  = $doc_array_rec->{'metaname'};
1279    if(defined $metaname) {
1280        my $dbkey = "$docid.$metaname";
1281        my $metavalue = $doc_array_rec->{'metavalue'};
1282
1283        # Set the new value
1284        my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1285        $status = system($cmd);
1286
1287    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1288        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1289        foreach my $metatable_rec ( @$metatable ) {
1290        $metaname  = $metatable_rec->{'metaname'};
1291        my $dbkey = "$docid.$metaname";
1292
1293        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1294        foreach my $metavalue ( @$metavals ) {
1295             my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1296             $status = system($cmd);
1297        }
1298        }
1299       
1300    }
1301
1302    if ($status != 0) {
1303        # Catch error if gdbmget failed
1304        $global_status = $status;
1305        $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1306        $global_mess .= "Exit status: $status\n";
1307        $global_mess .= "System Error Message: $!\n";
1308        $global_mess .= "-" x 20;
1309    }
1310    }
1311   
1312    if ($global_status != 0) {
1313    $global_mess .= "PATH: $ENV{'PATH'}\n";
1314    $gsdl_cgi->generate_error($global_mess);
1315    }
1316    else {
1317    my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1318    $gsdl_cgi->generate_ok_message($mess);
1319    }
1320}
1321
1322sub set_live_metadata_array
1323{
1324    my $self = shift @_;
1325
1326    my $username  = $self->{'username'};
1327    my $collect   = $self->{'collect'};
1328    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1329
1330    if ($baseaction::authentication_enabled) {
1331    # Ensure the user is allowed to edit this collection   
1332    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1333    }
1334
1335    my $site = $self->{'site'};
1336    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1337   
1338    $gsdl_cgi->checked_chdir($collect_dir);
1339
1340    # Make sure the collection isn't locked by someone else
1341    $self->lock_collection($username, $collect);
1342
1343    $self->_set_live_metadata_array(@_);
1344
1345    # Release the lock once it is done
1346    $self->unlock_collection($username, $collect);
1347}
1348
1349
1350sub dxml_metadata
1351{
1352    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1353    my $metaname = $parser->{'parameters'}->{'metaname'};
1354    my $metamode = $parser->{'parameters'}->{'metamode'};
1355   
1356    print STDERR "**** Processing closing </Metadata> tag\n";
1357   
1358    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1359   
1360    # Find the right metadata tag and checks if we are going to
1361    # override it
1362    #
1363    # Note: This over writes the first metadata block it
1364    # encountered. If there are multiple Sections in the doc.xml, it
1365    # might not behave as you would expect
1366
1367    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1368##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1369##    print STDERR "**** metamode = $metamode\n";
1370   
1371    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1372    {
1373        my $name_attr = $attrHash->{'name'};
1374        if (($name_attr eq $metaname) && ($metamode eq "override"))
1375        {
1376            if (!defined $parser->{'parameters'}->{'poscount'})
1377            {
1378                $parser->{'parameters'}->{'poscount'} = 0;
1379            }
1380            else
1381            {
1382                $parser->{'parameters'}->{'poscount'}++;
1383            }
1384           
1385            if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1386            {
1387                ##print STDERR "#### got match!!\n";
1388                # Get the value and override the current value
1389                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1390                $attrHash->{'_content'} = $metavalue;
1391               
1392                # Don't want it to wipe out any other pieces of metadata
1393                $parser->{'parameters'}->{'metamode'} = "done";
1394            }
1395            elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1396            {
1397                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1398                $attrHash->{'_content'} = $metavalue;
1399                $parser->{'parameters'}->{'metamode'} = "done";
1400            }
1401        }
1402    }
1403
1404    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1405    return [$tagname => $attrHash];
1406}
1407
1408# This method exists purely for catching invalid section numbers that the client
1409# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1410# we've seen all the Sections in the doc.xml, and none of their section nums matched
1411# if the metamode has not been set to 'done' by then.
1412sub dxml_archive
1413{
1414    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1415    my $metamode = $parser->{'parameters'}->{'metamode'};
1416   
1417    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1418    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1419   
1420#    print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1421   
1422    if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1423    print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1424
1425    my $metaname = $parser->{'parameters'}->{'metaname'};
1426    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1427   
1428    print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1429    print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1430    print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1431    print STDERR "\tin section $opt_doc_secnum.\n";
1432    $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1433    }
1434   
1435    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1436    return [$tagname => $attrHash];
1437}
1438
1439sub dxml_description
1440{
1441    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1442    my $metamode = $parser->{'parameters'}->{'metamode'};
1443
1444    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1445    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1446   
1447    print STDERR "**** Processing a closing </Description> tag \n";
1448#   print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1449   
1450    # Accumulate the metadata
1451
1452    # We'll be accumulating metadata at this point if we haven't found and therefore
1453    # haven't processed the metadata yet.
1454    # For subsections, this means that if we're at a matching subsection, but haven't
1455    # found the correct metaname to override in that subsection, we accumulate it as new
1456    # meta in the subsection by adding it to the current description.
1457    # If there's no subsection info for the metadata, it will accumulate at the top level
1458    # section description if we hadn't found a matching metaname to override at this point.
1459
1460    # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1461    # at the toplevel section. In the latter case, it means we want to process meta in the
1462    # toplevel section. So the eq check between the values below will work in all cases.
1463   
1464    # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1465    # this document has been provided. In that case, the metadata for that opt_doc_secnum
1466    # won't get attached/accumulated to any part of the doc, not even its top-level section.
1467
1468    if ($curr_secnum eq $opt_doc_secnum
1469        && ($metamode eq "accumulate" || $metamode eq "override")) {
1470        if ($metamode eq "override") {
1471        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
1472        }
1473
1474        # If we get to here and metamode is override, this means there
1475        # was no existing value to overide => treat as an append operation
1476       
1477        # Tack a new metadata tag on to the end of the <Metadata>+ block
1478        my $metaname = $parser->{'parameters'}->{'metaname'};
1479        my $metavalue = $parser->{'parameters'}->{'metavalue'};
1480       
1481        my $metadata_attr = {
1482        '_content' => $metavalue,
1483        'name' => $metaname,
1484        'mode' => "accumulate"
1485        };
1486       
1487        my $append_metadata = [ "Metadata" => $metadata_attr ];
1488        my $description_content = $attrHash->{'_content'};
1489       
1490        print "Appending metadata to doc.xml\n";
1491       
1492        if (ref($description_content)) {
1493        # got some existing interesting nested content
1494        push(@$description_content, "    ", $append_metadata ,"\n        ");
1495        }
1496        else {
1497        #description_content is most likely a string such as "\n"
1498        $attrHash->{'_content'} = [$description_content, "    ", $append_metadata ,"\n" ];
1499        }
1500       
1501        $parser->{'parameters'}->{'metamode'} = "done";
1502    }       
1503    else {
1504        # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1505##      print STDERR "**** NOT ACCUMULATE?!? \n";
1506    }
1507
1508    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1509    return [$tagname => $attrHash];
1510}
1511
1512
1513sub dxml_start_section
1514{
1515    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1516
1517    my $new_depth = scalar(@$contextArray);
1518
1519    print STDERR "**** START SECTION \n";
1520   
1521    if ($new_depth == 1) {
1522    $parser->{'parameters'}->{'curr_section_depth'} = 1;
1523    $parser->{'parameters'}->{'curr_section_num'}   = "";
1524    }
1525
1526    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
1527    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1528
1529    my $new_secnum;
1530
1531    if ($new_depth > $old_depth) {
1532    # child subsection
1533    $new_secnum = "$old_secnum.1";
1534    }
1535    elsif ($new_depth == $old_depth) {
1536    # sibling section => increase it's value by 1
1537    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1538    $tail_num++;
1539    $new_secnum = $old_secnum;
1540    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1541    }
1542    else {
1543    # back up to parent section => lopp off tail
1544    $new_secnum = $old_secnum;
1545    $new_secnum =~ s/\.\d+$//;
1546    }
1547
1548    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1549    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
1550   
1551    1;
1552}
1553
1554sub edit_xml_file
1555{
1556    my $self = shift @_;
1557    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1558
1559    # use XML::Rules to add it in (read in and out again)
1560    my $parser = XML::Rules->new(start_rules     => $start_rules,
1561                 rules           => $rules,
1562                 style           => 'filter',
1563                                 output_encoding => 'utf8' );
1564
1565    my $xml_in = "";
1566    if (!open(MIN,"<$filename")) {
1567    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1568    }
1569    else {
1570        # Read all the text in
1571    my $line;
1572    while (defined ($line=<MIN>)) {
1573        $xml_in .= $line;
1574    }
1575    close(MIN);
1576   
1577    my $MOUT;   
1578    if (!open($MOUT,">$filename")) {
1579        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1580    }
1581    else {
1582        # Matched lines will get handled by the call backs
1583##      my $xml_out = "";
1584
1585        binmode($MOUT,":utf8");
1586        $parser->filter($xml_in,$MOUT, $options);
1587
1588#       binmode(MOUT,":utf8");
1589#       print MOUT $xml_out;
1590        close($MOUT);       
1591    }
1592    }
1593
1594    # copy across any custom error information that was stored during parsing
1595    $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});   
1596}
1597
1598sub edit_doc_xml
1599{
1600    my $self = shift @_;
1601    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1602
1603    my $info_mess = <<RAWEND;
1604****************************
1605  edit_doc_xml()
1606****************************
1607doc_xml_filename = $doc_xml_filename
1608metaname = $metaname
1609metavalue = $metavalue
1610metapos = $metapos
1611metamode = $metamode
1612opt_secnum = $opt_secnum
1613prevmetavalue = $prevmetavalue
1614****************************
1615RAWEND
1616
1617    $gsdl_cgi->generate_message($info_mess);
1618   
1619    # To monitor which section/subsection number we are in
1620    my @start_rules =
1621    ( 'Section'    => \&dxml_start_section );
1622
1623    # use XML::Rules to add it in (read in and out again)
1624    # Set the call back functions
1625    my @rules =
1626    ( _default => 'raw',
1627      'Metadata'    => \&dxml_metadata,
1628      'Description' => \&dxml_description,
1629      'Archive'     => \&dxml_archive); # just for catching errors at end
1630     
1631    # Sets the parameters
1632    my $options = { 'metaname'  => $metaname,
1633            'metapos'   => $metapos,
1634            'metavalue' => $metavalue,
1635            'metamode'  => $metamode,
1636            'prevmetavalue' => $prevmetavalue };
1637           
1638    if (defined $opt_secnum) {
1639    $options->{'secnum'} = $opt_secnum;
1640    }
1641
1642    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1643}
1644
1645sub set_archives_metadata_entry
1646{
1647    my $self = shift @_;
1648    my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1649   
1650    my $info_mess = <<RAWEND;
1651****************************
1652  set_archives_metadata_entry()
1653****************************
1654archive_dir = $archive_dir
1655collect_dir = $collect_dir
1656collect = $collect
1657infodbtype = $infodbtype
1658docid = $docid
1659metaname = $metaname
1660metapos = $metapos
1661metavalue = $metavalue
1662metamode = $metamode
1663prevmetavalue = $prevmetavalue
1664****************************
1665RAWEND
1666
1667    $gsdl_cgi->generate_message($info_mess);
1668   
1669    # Obtain the doc.xml path for the specified docID
1670    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1671
1672    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1673    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1674    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1675   
1676    # The $doc_xml_file is relative to the archives, and now let's get the full path
1677    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
1678    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1679
1680    # If we're overriding everything, then $metamode=override combined with $metapos=undefined
1681    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1682    # Thereafter, we will finally be setting the overriding metavalue for this metaname
1683    if(!defined $metapos && $metamode eq "override") {
1684    $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, $metapos, undef, $docid_secnum, $metamode);
1685    }
1686
1687    # Edit the doc.xml file with the specified metadata name, value and position.
1688    # TODO: there is a potential problem here as this edit_doc_xml function
1689    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1690    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1691   
1692    # dxml_metadata method ignores metapos if metamode anything other than override
1693    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1694            $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1695
1696    # return 0; # return 0 for now to indicate no error
1697    return (defined $self->{'error_msg'}) ? 1 : 0;
1698}
1699
1700
1701sub set_archives_metadata
1702{
1703    my $self = shift @_;
1704
1705    my $username  = $self->{'username'};
1706    my $collect   = $self->{'collect'};
1707    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1708   
1709    if ($baseaction::authentication_enabled) {
1710    # Ensure the user is allowed to edit this collection
1711    $self->authenticate_user($username, $collect);
1712    }
1713
1714    # Make sure the collection isn't locked by someone else
1715    $self->lock_collection($username, $collect);
1716
1717    $self->_set_archives_metadata(@_);
1718
1719    # Release the lock once it is done
1720    $self->unlock_collection($username, $collect);
1721}
1722
1723sub _set_archives_metadata_array
1724{
1725    my $self = shift @_;
1726   
1727    my $collect   = $self->{'collect'};
1728    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1729    my $site = $self->{'site'};
1730    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1731
1732    # look up additional args
1733   
1734    my $infodbtype = $self->{'infodbtype'};
1735   
1736    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1737   
1738    my $json_str      = $self->{'json'};
1739    my $doc_array = decode_json $json_str;
1740   
1741   
1742    my $global_status = 0;
1743    my $global_mess = "";
1744   
1745    my @all_docids = ();
1746   
1747    foreach my $doc_array_rec ( @$doc_array ) {
1748    my $status    = -1;
1749    my $docid     = $doc_array_rec->{'docid'};
1750
1751    push(@all_docids,$docid);
1752   
1753    my $metaname  = $doc_array_rec->{'metaname'};
1754    if(defined $metaname) {
1755       
1756        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1757
1758        my $metamode  = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1759        my $metavalue = $doc_array_rec->{'metavalue'};
1760        my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1761       
1762       
1763        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1764        # make "accumulate" the default (less destructive, as it won't actually
1765        # delete any existing values)
1766        $metamode = "accumulate";
1767        }       
1768       
1769        $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1770                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1771    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1772        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1773       
1774        foreach my $metatable_rec ( @$metatable ) {
1775        $metaname  = $metatable_rec->{'metaname'};
1776        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1777        my $metapos = undef;
1778        my $prevmetavalue = undef;
1779        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1780       
1781        foreach my $metavalue ( @$metavals ) {
1782            $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1783                                 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1784           
1785            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1786            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1787            # metavalue for this metaname will be lost
1788            $metamode = "accumulate";
1789            }
1790        }           
1791        }       
1792    }
1793       
1794    if ($status != 0) {
1795        # Catch error if set infodb entry failed
1796        $global_status = $status;
1797        $global_mess .= "Failed to set metadata key: $docid\n";
1798        $global_mess .= "Exit status: $status\n";
1799        $global_mess .= "System Error Message: $!\n";
1800        $global_mess .= "-" x 20 . "\n";
1801    }
1802    }
1803   
1804    if ($global_status != 0) {
1805    $global_mess .= "PATH: $ENV{'PATH'}\n";
1806    $gsdl_cgi->generate_error($global_mess);
1807    }
1808    else {
1809    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1810    $gsdl_cgi->generate_ok_message($mess);
1811    }
1812}
1813
1814sub set_archives_metadata_array
1815{
1816    my $self = shift @_;
1817
1818    my $username  = $self->{'username'};
1819    my $collect   = $self->{'collect'};
1820    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1821#    my $gsdlhome  = $self->{'gsdlhome'};
1822
1823    if ($baseaction::authentication_enabled) {
1824    # Ensure the user is allowed to edit this collection   
1825    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1826    }
1827
1828    my $site = $self->{'site'};
1829    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1830   
1831    $gsdl_cgi->checked_chdir($collect_dir);
1832
1833    # Obtain the collect dir
1834    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1835
1836    # Make sure the collection isn't locked by someone else
1837    $self->lock_collection($username, $collect);
1838
1839    $self->_set_archives_metadata_array(@_);
1840   
1841    # Release the lock once it is done
1842    $self->unlock_collection($username, $collect);
1843}
1844
1845sub _remove_archives_metadata
1846{
1847    my $self = shift @_;
1848
1849    my $collect   = $self->{'collect'};
1850    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1851#   my $gsdlhome  = $self->{'gsdlhome'};
1852    my $infodbtype = $self->{'infodbtype'};
1853   
1854    my $site = $self->{'site'};
1855       
1856    # Obtain the collect and archive dir   
1857    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1858   
1859    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1860
1861    # look up additional args
1862    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1863   
1864    my $metaname = $self->{'metaname'};
1865    my $metapos = $self->{'metapos'};
1866
1867    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1868
1869    # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1870    # default to deleting the first metavalue for the given metaname
1871    # Beware that if both metapos AND metavalue are defined, both matches (if any)
1872    # seem to get deleted in one single remove_archives_meta action invocation.
1873    # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1874    # deleted, both get deleted.
1875    if(!defined $metapos && !defined $metavalue) {
1876        $metapos = 0;
1877    }
1878
1879    my $metamode = $self->{'metamode'} || undef;
1880
1881    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1882    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1883
1884    # This now stores the full pathname
1885    my $doc_filename = $doc_rec->{'doc-file'}->[0];
1886
1887    my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1888#   my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1889   
1890    if ($status == 0)
1891    {
1892        my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1893        $mess .= "  $metaname";
1894        $mess .= "->[$metapos]" if (defined $metapos);
1895
1896        $gsdl_cgi->generate_ok_message($mess); 
1897    }
1898    else
1899    {
1900        my $mess .= "Failed to remove archives metadata key: $docid\n";
1901        $mess .= "Exit status: $status\n";
1902        $mess .= "System Error Message: $!\n";
1903        $mess .= "-" x 20 . "\n";
1904       
1905        $gsdl_cgi->generate_error($mess);
1906    }
1907   
1908    #return $status; # in case calling functions have a use for this
1909}
1910
1911sub remove_archives_metadata
1912{
1913    my $self = shift @_;
1914
1915    my $username  = $self->{'username'};
1916    my $collect   = $self->{'collect'};
1917    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1918   
1919    if ($baseaction::authentication_enabled)
1920    {
1921        # Ensure the user is allowed to edit this collection       
1922        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1923    }
1924
1925    # Make sure the collection isn't locked by someone else
1926    $self->lock_collection($username, $collect);
1927
1928    $self->_remove_archives_metadata(@_);
1929
1930    # Release the lock once it is done
1931    $self->unlock_collection($username, $collect);
1932}
1933
1934sub remove_from_doc_xml
1935{
1936    my $self = shift @_;
1937    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1938   
1939    my @start_rules = ('Section' => \&dxml_start_section);
1940   
1941    # Set the call-back functions for the metadata tags
1942    my @rules =
1943    (
1944        _default => 'raw',
1945        'Metadata' => \&rfdxml_metadata
1946    );
1947       
1948    my $parser = XML::Rules->new
1949    (
1950        start_rules => \@start_rules,
1951        rules => \@rules,
1952        style => 'filter',
1953        output_encoding => 'utf8',
1954#    normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1955        stripspaces => 2|0|0 # ineffectual
1956    );
1957   
1958    my $status = 0;
1959    my $xml_in = "";
1960    if (!open(MIN,"<$doc_xml_filename"))
1961    {
1962        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1963        $status = 1;
1964    }
1965    else
1966    {
1967        # Read them in
1968        my $line;
1969        while (defined ($line=<MIN>)) {
1970            $xml_in .= $line;
1971        }
1972        close(MIN);
1973
1974        # Filter with the call-back functions
1975        my $xml_out = "";
1976
1977        my $MOUT;
1978        if (!open($MOUT,">$doc_xml_filename")) {
1979            $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1980            $status = 1;
1981        }
1982        else {
1983            binmode($MOUT,":utf8");
1984            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1985            close($MOUT);       
1986        }
1987    }
1988    return $status;
1989}
1990
1991sub rfdxml_metadata
1992{
1993    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1994
1995    # For comparisons, toplevel section is indicated by ""
1996    my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1997    my $secid = $parser->{'parameters'}->{'secid'} || "";
1998
1999    if (!($secid eq $curr_sec_num))
2000    {
2001        # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2002        return [$tagname => $attrHash];
2003    }
2004
2005    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2006    {
2007        if (!defined $parser->{'parameters'}->{'poscount'})
2008        {
2009            $parser->{'parameters'}->{'poscount'} = 0;
2010        }
2011        else
2012        {
2013            $parser->{'parameters'}->{'poscount'}++;
2014        }
2015       
2016        # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
2017        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {         
2018            return [];
2019        }
2020    }
2021
2022    if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2023    {   
2024        return [];
2025    }
2026
2027    if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
2028    {   
2029        return [];
2030    }
2031   
2032    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2033    return [$tagname => $attrHash];
2034}
2035
2036sub mxml_metadata
2037{
2038    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2039    my $metaname = $parser->{'parameters'}->{'metaname'};
2040    my $metamode = $parser->{'parameters'}->{'metamode'};
2041
2042    # Report error if we don't see FileName tag before this
2043    die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2044   
2045    # Don't do anything if we are not in the right FileSet
2046    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2047    if ($file_regexp =~ /\.\*/) {
2048    # Only interested in a file_regexp if it specifies precisely one
2049    # file. 
2050    # So, skip anything with a .* in it as it is too general
2051##  print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
2052    return [$tagname => $attrHash];
2053    }
2054    my $src_file = $parser->{'parameters'}->{'src_file'};
2055    if (!($src_file =~ /$file_regexp/)) {
2056    return [$tagname => $attrHash];
2057    }
2058##    print STDERR "*** mxl metamode = $metamode\n";
2059
2060    # Find the right metadata tag and checks if we are going to override it
2061    my $name_attr = $attrHash->{'name'};
2062    if (($name_attr eq $metaname) && ($metamode eq "override")) {
2063
2064    # now metadata.xml functions need to keep track of metapos
2065    if (!defined $parser->{'parameters'}->{'poscount'})
2066    {
2067        $parser->{'parameters'}->{'poscount'} = 0;
2068    }
2069    else
2070    {
2071        $parser->{'parameters'}->{'poscount'}++;
2072    }
2073
2074    # If either the metapos or prevmetavalue is set,
2075        # get the value and override the current value
2076    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2077
2078    if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2079    {
2080        $attrHash->{'_content'} = $metavalue;
2081
2082        ##  print STDERR "**** overriding metadata.xml\n";
2083       
2084        # Don't want it to wipe out any other pieces of metadata
2085        $parser->{'parameters'}->{'metamode'} = "done";
2086    }
2087    elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2088    {
2089        $attrHash->{'_content'} = $metavalue;
2090        $parser->{'parameters'}->{'metamode'} = "done";
2091    }
2092    }
2093
2094    # mxml_description will process the metadata if metadata is accumulate,
2095    # or if we haven't found the metadata to override
2096
2097    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2098    return [$tagname => $attrHash];
2099}
2100
2101
2102sub mxml_description
2103{
2104    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2105    my $metamode = $parser->{'parameters'}->{'metamode'};   
2106
2107    # Failed... Report error if we don't see FileName tag before this
2108    die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2109
2110    # Don't do anything if we are not in the right FileSet
2111    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2112    if ($file_regexp =~ m/\.\*/) {
2113    # Only interested in a file_regexp if it specifies precisely one
2114    # file. 
2115    # So, skip anything with a .* in it as it is too general
2116    return [$tagname => $attrHash];
2117    }
2118    my $src_file = $parser->{'parameters'}->{'src_file'};
2119   
2120    if (!($src_file =~ m/$file_regexp/)) {
2121    return [$tagname => $attrHash];
2122    }
2123
2124    # Accumulate the metadata block to the end of the description block
2125    # Note: This adds metadata block to all description blocks, so if there are
2126    # multiple FileSets, it will add to all of them
2127    if (($metamode eq "accumulate") || ($metamode eq "override")) {
2128
2129    # if metamode was "override" but get to here then it failed to
2130    # find an item to override, in which case it should append its
2131    # value to the end, just like the "accumulate" mode
2132
2133    if ($metamode eq "override") {
2134        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2135    }
2136
2137    # tack a new metadata tag on to the end of the <Metadata>+ block
2138    my $metaname = $parser->{'parameters'}->{'metaname'};
2139    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2140   
2141    my $metadata_attr = { '_content' => $metavalue,
2142                  'name'     => $metaname,
2143                  'mode'     => "accumulate" };
2144
2145    my $append_metadata = [ "Metadata" => $metadata_attr ];
2146    my $description_content = $attrHash->{'_content'};
2147   
2148##  print STDERR "*** appending to metadata.xml\n";
2149
2150    # append the new metadata element to the end of the current
2151    # content contained inside this tag
2152    if (ref($description_content) eq "") {
2153        # => string or numeric literal
2154        # this is caused by a <Description> block has no <Metadata> child elements
2155        # => set up an empty array in '_content'
2156        $attrHash->{'_content'} = [ "\n" ];
2157        $description_content = $attrHash->{'_content'};
2158    }
2159
2160    push(@$description_content,"    ", $append_metadata ,"\n        ");
2161    $parser->{'parameters'}->{'metamode'} = "done";
2162    }
2163
2164    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2165    return [$tagname => $attrHash];
2166}
2167
2168
2169sub mxml_filename
2170{
2171    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2172
2173    # Store the filename of the Current Fileset
2174    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2175    # FileName tag must come before Description tag
2176    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2177
2178    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2179    return [$tagname => $attrHash];
2180}
2181
2182
2183sub mxml_fileset
2184{
2185    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2186
2187    # Initilise the current_file
2188    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2189    # FileName tag must come before Description tag
2190    $parser->{'parameters'}->{'current_file'} = "";
2191
2192    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2193    return [$tagname => $attrHash];
2194}
2195
2196sub mxml_directorymetadata
2197{
2198    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2199
2200    # if we haven't processed the metadata when we reach the end of metadata.xml
2201    # it's because there's no particular FileSet element whose FileName matched
2202    # In which case, add a new FileSet for this FileName
2203    my $metamode = $parser->{'parameters'}->{'metamode'};
2204    if($metamode ne "done") {
2205   
2206    if ($metamode eq "override") {
2207        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2208    }
2209
2210    # If we get to here and metamode is override, this means there
2211    # was no existing value to overide => treat as an append operation
2212   
2213    # Create a new FileSet element and append to DirectoryMetadata
2214    # <FileSet>
2215    # <FileName>src_file</FileName>
2216    # <Description>
2217    # <Metadata mode="" name="">metavalue</Metadata>
2218    # </Description>
2219    # </FileSet>
2220    my $src_file = $parser->{'parameters'}->{'src_file'};
2221    my $metaname = $parser->{'parameters'}->{'metaname'};
2222    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2223    my $metadata_attr = {
2224        '_content' => $metavalue,
2225        'name' => $metaname,
2226        'mode' => "accumulate"
2227    };
2228    my $append_metadata = [ "Metadata" => $metadata_attr ];
2229    my $description_attr->{'_content'} = [ "\n\t\t   ", $append_metadata, "\n\t\t"];
2230    my $description_element = [ "Description" => $description_attr ];
2231
2232    #_content is not an attribute, it's special and holds the children of this element
2233    # including the textnode value embedded in this element if any.
2234    my $filename_attr = {'_content' => $src_file};
2235    my $filename_element = [ "FileName" => $filename_attr ];
2236
2237    my $fileset_attr = {};
2238    $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2239    my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2240   
2241   
2242    # get children of dirmeta, and push the new FileSet element onto it
2243    print "Appending metadata to metadata.xml\n";
2244    my $dirmeta_content = $attrHash->{'_content'};
2245    if (ref($dirmeta_content)) {
2246        # got some existing interesting nested content
2247        #push(@$dirmeta_content, "    ", $fileset ,"\n        ");
2248        push(@$dirmeta_content, "\t", $fileset ,"\n");
2249    }
2250    else {
2251        #description_content is most likely a string such as "\n"
2252        #$attrHash->{'_content'} = [$dirmeta_content, "    ", $fileset ,"\n" ];
2253        $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2254    }   
2255
2256    $parser->{'parameters'}->{'metamode'} = "done";
2257    }
2258    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2259    return [$tagname => $attrHash];
2260}
2261
2262
2263sub edit_metadata_xml
2264{
2265    my $self = shift @_;
2266    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2267
2268    # Set the call-back functions for the metadata tags
2269    my @rules =
2270    ( _default => 'raw',
2271          'FileName' => \&mxml_filename,
2272      'Metadata' => \&mxml_metadata,
2273      'Description' => \&mxml_description,
2274          'FileSet' => \&mxml_fileset,
2275      'DirectoryMetadata' => \&mxml_directorymetadata);
2276
2277    # use XML::Rules to add it in (read in and out again)
2278    my $parser = XML::Rules->new(rules => \@rules,
2279                 style => 'filter',
2280                                 output_encoding => 'utf8',
2281                 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2282
2283    if (!-e $metadata_xml_filename) {
2284   
2285        if (open(MOUT,">$metadata_xml_filename")) {
2286           
2287            my $src_file_re = &util::filename_to_regex($src_file);
2288            # shouldn't the following also be in the above utility routine??
2289            # $src_file_re =~ s/\./\\./g;
2290       
2291            print MOUT "<?xml version=\"1.0\"?>\n";
2292            print MOUT "<DirectoryMetadata>\n";
2293            print MOUT " <FileSet>\n";
2294            print MOUT "  <FileName>$src_file_re</FileName>\n";
2295            print MOUT "  <Description>\n";
2296            print MOUT "  </Description>\n";
2297            print MOUT " </FileSet>\n";
2298            print MOUT "</DirectoryMetadata>\n";
2299
2300            close(MOUT);
2301        }
2302        else {
2303            $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2304        }
2305    }
2306   
2307   
2308    my $xml_in = "";
2309    if (!open(MIN,"<$metadata_xml_filename")) {
2310        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2311    }
2312    else {
2313        # Read them in
2314        my $line;
2315        while (defined ($line=<MIN>)) {
2316            $xml_in .= $line;
2317        }
2318        close(MIN);
2319
2320        # Filter with the call-back functions
2321        my $xml_out = "";
2322
2323        my $MOUT;
2324        if (!open($MOUT,">$metadata_xml_filename")) {
2325            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2326        }
2327        else {
2328            binmode($MOUT,":utf8");
2329
2330            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2331            # At the moment, I will just hack it!
2332            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2333            #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2334            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2335            #print MOUT $xml_out;
2336
2337            $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2338                              metapos => $metapos,
2339                      metavalue => $metavalue,
2340                      metamode => $metamode,
2341                      src_file => $src_file,
2342                      prevmetavalue => $prevmetavalue,
2343                      current_file => undef} );
2344            close($MOUT);       
2345        }
2346   }
2347}
2348
2349
2350sub set_import_metadata
2351{
2352    my $self = shift @_;
2353   
2354    my $username  = $self->{'username'};
2355    my $collect   = $self->{'collect'};
2356    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2357   
2358    if ($baseaction::authentication_enabled) {
2359    # Ensure the user is allowed to edit this collection
2360    $self->authenticate_user($username, $collect);
2361    }
2362
2363    # Make sure the collection isn't locked by someone else
2364    $self->lock_collection($username, $collect);
2365 
2366    $self->_set_import_metadata(@_);
2367
2368    # Release the lock once it is done
2369    $self->unlock_collection($username, $collect);
2370   
2371}
2372
2373sub set_import_metadata_array
2374{
2375    my $self = shift @_;
2376
2377    my $username  = $self->{'username'};
2378    my $collect   = $self->{'collect'};
2379    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2380#    my $gsdlhome  = $self->{'gsdlhome'};
2381
2382    if ($baseaction::authentication_enabled) {
2383    # Ensure the user is allowed to edit this collection   
2384    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2385    }
2386
2387    my $site = $self->{'site'};
2388    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2389   
2390    $gsdl_cgi->checked_chdir($collect_dir);
2391
2392    # Make sure the collection isn't locked by someone else
2393    $self->lock_collection($username, $collect);
2394
2395    $self->_set_import_metadata_array(@_);
2396
2397    # Release the lock once it is done
2398    $self->unlock_collection($username, $collect);
2399
2400}
2401
2402
2403sub _set_import_metadata_array
2404{
2405    my $self = shift @_;
2406
2407    my $collect   = $self->{'collect'};
2408    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2409
2410    my $site = $self->{'site'};
2411    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2412   
2413    # look up additional args
2414   
2415    my $infodbtype = $self->{'infodbtype'};
2416   
2417    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");   
2418    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2419   
2420    my $json_str = $self->{'json'};
2421    my $doc_array = decode_json $json_str;
2422   
2423    my $global_status = 0;
2424    my $global_mess = "";
2425   
2426    my @all_docids = ();
2427   
2428    foreach my $doc_array_rec ( @$doc_array )
2429    {
2430    my $status = -1;
2431    my $docid = $doc_array_rec->{'docid'};
2432   
2433    my ($docid_root,$docid_secnum);
2434    if(defined $docid) {   
2435        ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
2436        # as yet no support for setting subsection metadata in metadata.xml
2437        if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2438        $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2439        next; # skip this docid in for loop
2440        }
2441    }
2442
2443    push(@all_docids,$docid); # docid_root rather
2444   
2445    my $metaname = $doc_array_rec->{'metaname'};
2446    if (defined $metaname) {
2447        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2448        my $metavalue = $doc_array_rec->{'metavalue'};
2449        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2450
2451        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2452        # make "accumulate" the default (less destructive, as won't actually
2453        # delete any existing values)
2454        $metamode = "accumulate";
2455        }
2456
2457        # adding metapos and prevmetavalue support to import_metadata subroutines
2458        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2459        my $prevmetavalue = $self->{'prevmetavalue'};
2460
2461        $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2462       
2463    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2464        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2465       
2466        foreach my $metatable_rec ( @$metatable ) {
2467        $metaname  = $metatable_rec->{'metaname'};
2468        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2469        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2470            # make "accumulate" the default (less destructive, as won't actually
2471            # delete any existing values)
2472            $metamode = "accumulate";
2473        }
2474
2475        # No support for metapos and prevmetavalue in the JSON metatable substructure
2476        my $metapos = undef;
2477        my $prevmetavalue = undef;
2478        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2479       
2480        foreach my $metavalue ( @$metavals ) {
2481            $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2482
2483            $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2484            if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2485            # need to accumulate subsequent metavals for this metaname, else the just-assigned
2486            # metavalue for this metaname will be lost
2487            $metamode = "accumulate";
2488            }
2489        }
2490        }
2491    }       
2492    }
2493
2494    # always a success message
2495    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2496    $gsdl_cgi->generate_ok_message($mess);
2497}
2498
2499# always returns true (1)
2500sub set_import_metadata_entry
2501{
2502    my $self = shift @_;
2503    my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2504
2505    my $info_mess = <<RAWEND;
2506****************************
2507  set_import_metadata_entry()
2508****************************
2509collect = $collect
2510collect_dir = $collect_dir
2511infodbtype = $infodbtype
2512arcinfo_doc_filename = $arcinfo_doc_filename
2513docid = $docid
2514metaname = $metaname
2515metapos = $metapos
2516metavalue = $metavalue
2517metamode = $metamode
2518prevmetavalue = $prevmetavalue
2519****************************
2520RAWEND
2521
2522    $gsdl_cgi->generate_message($info_mess);
2523
2524    # import works with metadata.xml which can have inherited metadata
2525    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2526    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2527    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2528    # what they're doing if they provide a metapos.
2529    if(defined $metapos) {
2530    print STDERR "@@@@ WARNING: metapos defined.\n";
2531    print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2532    }
2533
2534    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2535    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2536    my $metadata_xml_file;
2537    my $import_filename = undef;
2538   
2539    if (defined $docid) {
2540    # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2541    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2542
2543    # This now stores the full pathname
2544    $import_filename = $doc_rec->{'src-file'}->[0];
2545    } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2546         # as the array version of the method doesn't support the -f parameter yet
2547    my $import_file  = $self->{'f'};
2548    $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2549    }
2550   
2551    # figure out correct metadata.xml file [?]
2552    # Assuming the metadata.xml file is next to the source file
2553    # Note: This will not work if it is using the inherited metadata from the parent folder
2554    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2555    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2556   
2557    # If we're overriding everything, then $metamode=override combined with $metapos=undefined
2558    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2559    # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2560    if(!defined $metapos && $metamode eq "override") {
2561##  print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2562    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2563
2564    }
2565
2566    # Edit the metadata.xml
2567    # Modified by Jeffrey from DL Consulting
2568    # Handle the case where there is one metadata.xml file for multiple FileSets
2569    # The XML filter needs to know whether it is in the right FileSet
2570    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2571    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2572    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2573                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2574    #return 0;
2575    return $metadata_xml_filename;
2576}
2577
2578sub _remove_import_metadata
2579{
2580    my $self = shift @_;
2581
2582    my $collect   = $self->{'collect'};
2583    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2584#   my $gsdlhome  = $self->{'gsdlhome'};
2585    my $infodbtype = $self->{'infodbtype'};
2586   
2587    # Obtain the collect dir
2588    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2589    my $site = $self->{'site'};
2590    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2591   
2592    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2593    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2594
2595    # look up additional args
2596    my $docid = $self->{'d'};
2597    if ((!defined $docid) || ($docid =~ m/^\s*$/))
2598    {
2599        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2600    }
2601   
2602    my $metaname = $self->{'metaname'};
2603    my $metapos = $self->{'metapos'};
2604    my $metavalue = $self->{'metavalue'};
2605    if(defined $metavalue) {
2606        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2607    } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2608        $metapos = 0;
2609    }
2610    my $metamode = $self->{'metamode'} || undef;
2611
2612    # import works with metadata.xml which can have inherited metadata
2613    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2614    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2615    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2616    # what they're doing if they provide a metapos.
2617    if(defined $metapos) {
2618        print STDERR "@@@@ WARNING: metapos defined.\n";
2619        print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2620    }
2621   
2622    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2623    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2624    my $metadata_xml_file;
2625    my $import_filename = undef;
2626    if (defined $docid)
2627    {
2628        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2629        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2630
2631        # This now stores the full pathname
2632        $import_filename = $doc_rec->{'src-file'}->[0];
2633    }
2634
2635    if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2636    {
2637        $gsdl_cgi->generate_error("There is no metadata\n");
2638    }
2639   
2640    # figure out correct metadata.xml file [?]
2641    # Assuming the metadata.xml file is next to the source file
2642    # Note: This will not work if it is using the inherited metadata from the parent folder
2643    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2644    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2645   
2646    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2647   
2648    my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2649    $mess .= "  $metaname";
2650    $mess .= " = $metavalue\n";
2651   
2652    $gsdl_cgi->generate_ok_message($mess);
2653
2654    #return $status; # in case calling functions have a use for this
2655}
2656
2657sub remove_import_metadata
2658{
2659    my $self = shift @_;
2660   
2661    my $username = $self->{'username'};
2662    my $collect   = $self->{'collect'};
2663    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2664   
2665    if ($baseaction::authentication_enabled) {
2666        # Ensure the user is allowed to edit this collection       
2667        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2668    }
2669
2670    # Make sure the collection isn't locked by someone else
2671    $self->lock_collection($username, $collect);
2672   
2673    $self->_remove_import_metadata(@_);
2674
2675    # Release the lock once it is done
2676    $self->unlock_collection($username, $collect);
2677
2678}
2679
2680sub remove_from_metadata_xml
2681{
2682    my $self = shift @_;
2683    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2684    # metamode generally has no meaning for removing meta, but is used by set_meta
2685    # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2686
2687    # Set the call-back functions for the metadata tags
2688    my @rules =
2689    (
2690        _default => 'raw',
2691        'Metadata' => \&rfmxml_metadata,
2692        'FileName' => \&mxml_filename
2693    );
2694       
2695    my $parser = XML::Rules->new
2696    (
2697        rules => \@rules,
2698        style => 'filter',
2699        output_encoding => 'utf8',
2700     #normalisespaces => 1,
2701            stripspaces => 2|0|0 # ineffectual
2702    );
2703   
2704    my $xml_in = "";
2705    if (!open(MIN,"<$metadata_xml_filename"))
2706    {
2707        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2708    }
2709    else
2710    {
2711        # Read them in
2712        my $line;
2713        while (defined ($line=<MIN>)) {
2714            $xml_in .= $line;
2715        }
2716        close(MIN);
2717
2718        # Filter with the call-back functions
2719        my $xml_out = "";
2720
2721        my $MOUT;
2722        if (!open($MOUT,">$metadata_xml_filename")) {
2723            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2724        }
2725        else {
2726            binmode($MOUT,":utf8");
2727            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2728            close($MOUT);       
2729        }
2730    }
2731}
2732
2733sub rfmxml_metadata
2734{
2735    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2736
2737    # metadata.xml does not handle subsections
2738
2739    # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2740    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2741        && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2742    {
2743        if (!defined $parser->{'parameters'}->{'poscount'})
2744        {
2745            $parser->{'parameters'}->{'poscount'} = 0;
2746        }
2747        else
2748        {
2749            $parser->{'parameters'}->{'poscount'}++;
2750        }
2751
2752        # if overriding but no metapos, then clear all the meta for this metaname
2753        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
2754            return [];
2755        }
2756   
2757        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2758        {
2759            return [];
2760        }
2761
2762        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2763        {
2764            return [];
2765        }       
2766    }
2767
2768    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2769    return [$tagname => $attrHash];
2770}
2771
2772sub _remove_live_metadata
2773{
2774    my $self = shift @_;
2775
2776    my $collect   = $self->{'collect'};
2777    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2778#    my $gsdlhome  = $self->{'gsdlhome'};
2779    my $infodbtype = $self->{'infodbtype'};
2780
2781    # Obtain the collect dir
2782    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2783    my $site = $self->{'site'};
2784    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2785
2786   
2787    # look up additional args
2788    my $docid     = $self->{'d'};
2789    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2790      $gsdl_cgi->generate_error("No docid (d=...) specified.");
2791    }
2792   
2793    # Generate the dbkey
2794    my $metaname  = $self->{'metaname'};
2795    my $dbkey = "$docid.$metaname";
2796
2797    # To people who know $collect_tail please add some comments
2798    # Obtain the live gdbm_db path
2799    my $collect_tail = $collect;
2800    $collect_tail =~ s/^.*[\/\\]//;
2801    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2802    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2803
2804    # Remove the key
2805    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2806    my $status = system($cmd);
2807    if ($status != 0) {
2808        # Catch error if gdbmdel failed
2809    my $mess = "Failed to set metadata key: $dbkey\n";
2810   
2811    $mess .= "PATH: $ENV{'PATH'}\n";
2812    $mess .= "cmd = $cmd\n";
2813    $mess .= "Exit status: $status\n";
2814    $mess .= "System Error Message: $!\n";
2815
2816    $gsdl_cgi->generate_error($mess);
2817    }
2818    else {
2819    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2820    }
2821
2822}
2823
2824sub remove_live_metadata
2825{
2826    my $self = shift @_;
2827
2828    my $username  = $self->{'username'};
2829    my $collect   = $self->{'collect'};
2830    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2831    my $gsdlhome  = $self->{'gsdlhome'};
2832   
2833    if ($baseaction::authentication_enabled) {
2834    # Ensure the user is allowed to edit this collection   
2835    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2836    }
2837
2838    # Make sure the collection isn't locked by someone else
2839    $self->lock_collection($username, $collect);
2840
2841    $self->_remove_live_metadata(@_);
2842
2843    $self->unlock_collection($username, $collect);
2844}
2845
2846sub remove_metadata
2847{
2848    my $self = shift @_;
2849
2850    my $where = $self->{'where'};
2851    if(!$where) {
2852    $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2853    return;
2854    }
2855
2856    my $username  = $self->{'username'};
2857    my $collect   = $self->{'collect'};
2858    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2859
2860    if ($baseaction::authentication_enabled) {
2861    # Ensure the user is allowed to edit this collection   
2862    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2863    }
2864
2865    # Make sure the collection isn't locked by someone else
2866    $self->lock_collection($username, $collect);
2867
2868    # check which directories need to be processed, specified in $where as
2869    # any combination of import|archives|index|live
2870    if($where =~ m/import/) {
2871    $self->_remove_import_metadata(@_);     
2872    }
2873    if($where =~ m/archives/) {
2874    $self->_remove_archives_metadata(@_);       
2875   }
2876    if($where =~ m/index/) {
2877    $self->_remove_index_metadata(@_);     
2878    }
2879
2880    # Release the lock once it is done
2881    $self->unlock_collection($username, $collect);
2882}
2883
2884# the internal version, without authentication
2885sub _remove_index_metadata
2886{   
2887    my $self = shift @_;
2888
2889    my $collect   = $self->{'collect'};
2890    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2891#    my $gsdlhome  = $self->{'gsdlhome'};
2892    my $infodbtype = $self->{'infodbtype'};
2893   
2894    # Obtain the collect dir
2895    my $site = $self->{'site'};
2896    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2897    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2898
2899        # look up additional args
2900    my $docid     = $self->{'d'};
2901    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2902      $gsdl_cgi->generate_error("No docid (d=...) specified.");
2903    }
2904    my $metaname  = $self->{'metaname'};
2905    my $metapos   = $self->{'metapos'};
2906    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2907
2908    # To people who know $collect_tail please add some comments
2909    # Obtain the path to the database
2910    my $collect_tail = $collect;
2911    $collect_tail =~ s/^.*[\/\\]//;
2912    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2913    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2914
2915    # Read the docid entry
2916    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2917
2918    # Check to make sure the key does exist
2919    if (!defined ($doc_rec->{$metaname})) {
2920        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2921    }
2922
2923    # Obtain the specified metadata pos
2924    # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2925    if(!defined $metapos && !defined $metavalue) {
2926        $metapos = 0;
2927    }
2928   
2929
2930    # consider check key is defined before deleting?
2931    # Loop through the metadata array and ignore the specified position
2932    my $filtered_metadata = [];
2933    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
2934    for (my $i=0; $i<$num_metadata_vals; $i++) {
2935    my $metaval = shift(@{$doc_rec->{$metaname}});
2936
2937    if (!defined $metavalue && $i != $metapos) {
2938        push(@$filtered_metadata,$metaval);
2939    }
2940   
2941    if(defined $metavalue && !($metavalue eq $metaval))
2942    {
2943        push(@$filtered_metadata,$metaval);
2944    }
2945    }
2946    $doc_rec->{$metaname} = $filtered_metadata;
2947
2948    ## Use the dbutil set_entry method instead of assuming the database is gdbm
2949    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2950
2951    if ($status != 0) {
2952    my $mess = "Failed to set metadata key: $docid\n";
2953   
2954    $mess .= "PATH: $ENV{'PATH'}\n";
2955    $mess .= "Exit status: $status\n";
2956    $mess .= "System Error Message: $!\n";
2957
2958    $gsdl_cgi->generate_error($mess);
2959    }
2960    else {
2961    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2962    $mess .= "  $metaname";
2963    $mess .= "->[$metapos]" if (defined $metapos);
2964
2965    $gsdl_cgi->generate_ok_message($mess);
2966    }
2967
2968    #return $status; # in case calling functions have a use for this
2969}
2970
2971sub remove_index_metadata
2972{
2973    my $self = shift @_;
2974
2975    my $username  = $self->{'username'};
2976    my $collect   = $self->{'collect'};
2977    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2978#    my $gsdlhome  = $self->{'gsdlhome'};
2979   
2980    if ($baseaction::authentication_enabled) {
2981    # Ensure the user is allowed to edit this collection   
2982    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2983    }
2984
2985    # Obtain the collect dir
2986    my $site = $self->{'site'};
2987    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2988    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2989
2990    # Make sure the collection isn't locked by someone else
2991    $self->lock_collection($username, $collect);
2992
2993    $self->_remove_index_metadata(@_);
2994
2995    # Release the lock once it is done
2996    $self->unlock_collection($username, $collect);
2997}
2998
2999
3000# Was trying to reused the codes, but the functions need to be broken
3001# down more before they can be reused, otherwise there will be too
3002# much overhead and duplicate process...
3003sub insert_metadata
3004{
3005    my $self = shift @_;
3006   
3007    my $username  = $self->{'username'};
3008    my $collect   = $self->{'collect'};
3009    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3010    my $gsdlhome  = $self->{'gsdlhome'};
3011    my $infodbtype = $self->{'infodbtype'};
3012   
3013    # If the import metadata and gdbm database have been updated, we
3014    # need to insert some notification to warn user that the the text
3015    # they see at the moment is not indexed and require a rebuild.
3016    my $rebuild_pending_macro = "_rebuildpendingmessage_";
3017
3018    if ($baseaction::authentication_enabled) {
3019    # Ensure the user is allowed to edit this collection
3020    $self->authenticate_user($username, $collect);
3021    }
3022
3023    # Obtain the collect and archive dir   
3024    my $site = $self->{'site'};
3025    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3026    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3027    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3028
3029    # Make sure the collection isn't locked by someone else
3030    $self->lock_collection($username, $collect);
3031   
3032    # Check additional args
3033    my $docid = $self->{'d'};
3034    if (!defined($docid)) {
3035    $gsdl_cgi->generate_error("No document id is specified: d=...");
3036    }
3037    my $metaname = $self->{'metaname'};
3038    if (!defined($metaname)) {
3039    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3040    }
3041    my $metavalue = $self->{'metavalue'};
3042    if (!defined($metavalue) || $metavalue eq "") {
3043    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3044    }
3045    # make "accumulate" the default (less destructive, as won't actually
3046    # delete any existing values)
3047    my $metamode = "accumulate";
3048
3049    # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3050    my $metapos   = undef;
3051    my $prevmetavalue = undef;
3052
3053    #=======================================================================#
3054    # set_import_metadata [START]
3055    #=======================================================================#
3056    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3057    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3058    my $metadata_xml_file;
3059    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3060    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3061   
3062    # This now stores the full pathname
3063    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3064   
3065    # figure out correct metadata.xml file [?]
3066    # Assuming the metadata.xml file is next to the source file
3067    # Note: This will not work if it is using the inherited metadata from the parent folder
3068    my ($import_tailname, $import_dirname)
3069    = File::Basename::fileparse($import_filename);
3070    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3071
3072    # Shane's escape characters
3073    $metavalue = pack "U0C*", unpack "C*", $metavalue;
3074    $metavalue =~ s/\,/&#44;/g;
3075    $metavalue =~ s/\:/&#58;/g;
3076    $metavalue =~ s/\|/&#124;/g;
3077    $metavalue =~ s/\(/&#40;/g;
3078    $metavalue =~ s/\)/&#41;/g;
3079    $metavalue =~ s/\[/&#91;/g;
3080    $metavalue =~ s/\\/&#92;/g;
3081    $metavalue =~ s/\]/&#93;/g;
3082    $metavalue =~ s/\{/&#123;/g;
3083    $metavalue =~ s/\}/&#125;/g;
3084    $metavalue =~ s/\"/&#34;/g;
3085    $metavalue =~ s/\`/&#96;/g;
3086    $metavalue =~ s/\n/_newline_/g;
3087
3088    # Edit the metadata.xml
3089    # Modified by Jeffrey from DL Consulting
3090    # Handle the case where there is one metadata.xml file for multiple FileSets
3091    # The XML filter needs to know whether it is in the right FileSet
3092    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3093    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3094    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3095                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3096    #=======================================================================#
3097    # set_import_metadata [END]
3098    #=======================================================================#
3099
3100
3101    #=======================================================================#
3102    # set_metadata (accumulate version) [START]
3103    #=======================================================================#
3104    # To people who know $collect_tail please add some comments
3105    # Obtain path to the database
3106    my $collect_tail = $collect;
3107    $collect_tail =~ s/^.*[\/\\]//;
3108    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3109    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3110
3111    # Read the docid entry
3112    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3113
3114    # Protect the quotes
3115    $metavalue =~ s/\"/\\\"/g;
3116
3117    # Adds the pending macro
3118    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3119
3120    # If the metadata doesn't exist, create a new one
3121    if (!defined($doc_rec->{$metaname})){   
3122    $doc_rec->{$metaname} = [ $macro_metavalue ];
3123    }
3124    # Else, let's acculumate the values
3125    else {
3126        push(@{$doc_rec->{$metaname}},$macro_metavalue);
3127    }
3128
3129    ## Use the dbutil set_entry method instead of assuming the database is gdbm
3130    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3131
3132    if ($status != 0) {
3133        # Catch error if gdbmget failed
3134    my $mess = "Failed to set metadata key: $docid\n";
3135   
3136    $mess .= "PATH: $ENV{'PATH'}\n";
3137    $mess .= "Exit status: $status\n";
3138    $mess .= "System Error Message: $!\n";
3139
3140    $gsdl_cgi->generate_error($mess);
3141    }
3142    else {
3143    my $mess = "insert-metadata successful: Key[$docid]\n";
3144    $mess .= "  [In metadata.xml] $metaname";
3145    $mess .= " = $metavalue\n";
3146    $mess .= "  [In database] $metaname";
3147    $mess .= " = $macro_metavalue\n";
3148    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
3149        $gsdl_cgi->generate_ok_message($mess);
3150    }   
3151    #=======================================================================#
3152    # set_metadata (accumulate version) [END]
3153    #=======================================================================#
3154
3155    # Release the lock once it is done
3156    $self->unlock_collection($username, $collect);
3157}
3158
31591;
Note: See TracBrowser for help on using the browser.