root/main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm @ 27315

Revision 27315, 104.9 KB (checked in by ak19, 7 years ago)

No authentication and collection-locking when getting metadata. (Only for setting metadata.)

Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr   te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40    require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47    #GET METHODS
48    "get-import-metadata" => {
49        'compulsory-args' => [ "d", "metaname" ],
50        'optional-args'   => [ "metapos" ] },
51
52    "get-archives-metadata" => {
53        'compulsory-args' => [ "d", "metaname" ],
54        'optional-args'   => [ "metapos" ] },
55   
56    "get-index-metadata" => {
57        'compulsory-args' => [ "d", "metaname" ],
58        'optional-args'   => [ "metapos" ] },
59
60    "get-metadata" => { # alias for get-index-metadata
61        'compulsory-args' => [ "d", "metaname" ],
62        'optional-args'   => [ "metapos" ] },
63
64    "get-live-metadata" => {
65        'compulsory-args' => [ "d", "metaname" ],
66        'optional-args'   => [ ] },
67
68    "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69        'compulsory-args' => [ "json" ],
70        'optional-args'   => [ "where" ],
71        'help-string' => [
72        'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73        ]},
74
75    #SET METHODS
76    "set-live-metadata" => {
77        'compulsory-args' => [ "d", "metaname", "metavalue" ],
78        'optional-args'   => [ ] },
79
80    "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81        'compulsory-args' => [ "metaname", "metavalue" ],
82        'optional-args'   => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84    "set-index-metadata" => {
85        'compulsory-args' => [ "d", "metaname", "metavalue" ],
86        'optional-args'   => [ "metapos", "metamode" ] },
87
88    "set-archives-metadata" => {
89        'compulsory-args' => [ "d", "metaname", "metavalue" ],
90        'optional-args'   => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91   
92    "set-import-metadata" => {
93        'compulsory-args' => [ "metaname", "metavalue" ],
94        'optional-args'   => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
95                 
96    #SET METHODS (ARRAY)
97    "set-metadata-array" => {
98        'compulsory-args' => [ "where", "json" ],
99        'optional-args'   => [ ],
100        'help-string' => [
101        'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102       
103        'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108       
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
110                     
111    "set-archives-metadata-array" => {
112        'compulsory-args' => [ "json" ],
113        'optional-args'   => [ ] },
114       
115    "set-import-metadata-array" => {
116        'compulsory-args' => [ "json" ],
117        'optional-args'   => [ ] },
118
119    "set-index-metadata-array" => {
120        'compulsory-args' => [ "json" ],
121        'optional-args'   => [ ] },
122   
123    "set-live-metadata-array" => {
124        'compulsory-args' => [ "json" ],
125        'optional-args'   => [ ] },
126       
127    #REMOVE METHODS
128    "remove-import-metadata" => {
129        'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
130        'optional-args'   => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
131                     
132    "remove-archives-metadata" => {
133        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
134        'optional-args'   => [ "metapos", "metavalue" ] },
135
136    "remove-live-metadata" => {
137        'compulsory-args' => [ "d", "metaname" ],
138        'optional-args'   => [ ] },
139
140    "remove-index-metadata" => {
141        'compulsory-args' => [ "d", "metaname" ],
142        'optional-args'   => [ "metapos", "metavalue" ] },
143
144    "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
145        'compulsory-args' => [ "d", "metaname" ],
146        'optional-args'   => [ "where", "metapos", "metavalue" ] },
147
148    #INSERT METHODS
149    "insert-metadata" => {
150        'compulsory-args' => [ "d", "metaname", "metavalue" ],
151        'optional-args'   => [ ] }
152};
153
154
155sub new
156{
157    my $class = shift (@_);
158    my ($gsdl_cgi,$iis6_mode) = @_;
159
160    # Treat metavalue specially.  To transmit this through a GET request
161    # the Javascript side has url-encoded it, so here we need to decode
162    # it before proceeding
163
164    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
165    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
166    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
167
168    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
169    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
170
171    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
172
173    return bless $self, $class;
174}
175
176
177sub get_live_metadata
178{
179    my $self = shift @_;
180
181    my $username  = $self->{'username'};
182    my $collect   = $self->{'collect'};
183    my $gsdl_cgi  = $self->{'gsdl_cgi'};
184    my $gsdlhome  = $self->{'gsdlhome'};
185    my $infodbtype = $self->{'infodbtype'};
186   
187    # live metadata gets/saves value scoped (prefixed) by the current usename
188    # so (for now) let's not bother to enforce authentication
189
190    # Obtain the collect dir
191    my $site = $self->{'site'};
192    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
193    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
194
195    # No locking collection when getting metadata, only when setting it
196#    $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
197
198    # look up additional args
199    my $docid  = $self->{'d'};
200    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
201       $gsdl_cgi->generate_error("No docid (d=...) specified.");
202    }
203
204    # Generate the dbkey
205    my $metaname  = $self->{'metaname'};
206    my $dbkey = "$docid.$metaname";
207
208    # To people who know $collect_tail please add some comments
209    # Obtain path to the database
210    my $collect_tail = $collect;
211    $collect_tail =~ s/^.*[\/|\\]//;
212    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
213    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
214   
215    # Obtain the content of the key
216    my $cmd = "gdbmget $infodb_file_path $dbkey";
217    if (open(GIN,"$cmd |") == 0) {
218        # Catch error if gdbmget failed
219    my $mess = "Failed to get metadata key: $metaname\n";
220    $mess .= "$!\n";
221
222    $gsdl_cgi->generate_error($mess);
223    }
224    else {
225    binmode(GIN,":utf8");
226        # Read everything in and concatenate them into $metavalue
227    my $metavalue = "";
228    my $line;
229    while (defined ($line=<GIN>)) {
230        $metavalue .= $line;
231    }
232    close(GIN);
233    chomp($metavalue); # Get rid off the tailing newlines
234    $gsdl_cgi->generate_ok_message("$metavalue");
235    }
236
237    # Release the lock once it is done
238#    $self->unlock_collection($username, $collect);
239}
240
241# just calls the index version
242sub get_metadata
243{
244    my $self = shift @_;
245    $self->get_index_metadata(@_);
246}
247
248# JSON version that will get the requested metadata values
249# from the requested source (index, import, archives or live)
250# One of the params is a JSON string and the return value is JSON too
251# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
252sub get_metadata_array
253{
254    my $self = shift @_;
255
256    my $where = $self->{'where'};
257    if (!$where) { 
258    $where = "index"; # default behaviour is to get the values from index
259    }
260
261    # Only when setting metadata do we perform authentication and do we lock the collection
262    # not when getting metadata
263
264    # for get_meta_array, the where param can only be ONE of import, archives, index, live
265    if($where =~ m/index/) {
266    $self->_get_index_metadata_array(@_);
267    }
268    elsif($where =~ m/archives/) {
269#   $self->_get_archives_metadata_array(@_);
270    }
271    elsif($where =~ m/import/) {
272#   $self->_get_import_metadata_array(@_);
273    }
274    elsif($where =~ m/live/) {
275#       $self->_get_live_metadata_array(@_);
276    }
277}
278
279# takes a JSON string and returns a JSON string
280# resulting string is of the form:
281# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
282sub _get_index_metadata_array
283{
284    my $self = shift @_;
285
286    my $collect   = $self->{'collect'};
287    my $gsdl_cgi  = $self->{'gsdl_cgi'};
288    my $site = $self->{'site'};
289    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
290
291    # look up additional args   
292    my $infodbtype = $self->{'infodbtype'};
293   
294    # To people who know $collect_tail please add some comments
295    # Obtain the path to the database
296    my $collect_tail = $collect;
297    $collect_tail =~ s/^.*[\/\\]//;
298    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
299    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
300
301    my $json_str      = $self->{'json'};
302    my $doc_array = decode_json $json_str;
303
304    my $json_result_str = "[";
305    my $first_doc_rec = 1;
306
307    foreach my $doc_array_rec ( @$doc_array ) {
308   
309    my $docid     = $doc_array_rec->{'docid'};
310   
311    if($first_doc_rec) {
312        $first_doc_rec = 0;
313    } else {
314        $json_result_str .= ",";
315    }
316    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
317
318    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
319    $json_result_str = $json_result_str . ",\"metatable\":[";
320
321    my $first_rec = 1;
322    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
323        if($first_rec) {
324        $first_rec = 0;
325        } else {
326        $json_result_str .= ",";       
327        }
328       
329        my $metaname  = $metatable_rec->{'metaname'};
330        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
331
332        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
333        if(!defined $metapos) {
334        $metapos = 0;
335        }
336
337         # Read the docid entry
338        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
339 
340        # Basically loop through and unescape_html the values
341        foreach my $k (keys %$doc_rec) {
342        my @escaped_v = ();
343        foreach my $v (@{$doc_rec->{$k}}) {
344            my $ev = &ghtml::unescape_html($v);
345            push(@escaped_v, $ev);
346        }
347        $doc_rec->{$k} = \@escaped_v;
348        }
349
350        # Obtain the specified metadata value(s)
351        my $metavalue;
352
353        if($metapos ne "all") { # get the value at a single metapos
354
355        $metavalue = $doc_rec->{$metaname}->[$metapos];     
356        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
357
358        } else {
359        my $first_metaval = 1;
360        $metapos = 0;
361        $metavalue = $doc_rec->{$metaname}->[$metapos];
362
363        while (defined $metavalue) {
364            if($first_metaval) {       
365            $first_metaval = 0;
366            } else {
367            $json_result_str .= ",";
368            }
369       
370            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
371
372            $metapos++;
373            $metavalue = $doc_rec->{$metaname}->[$metapos];         
374        }
375        }
376
377        $json_result_str .= "]}"; # close metavals array and metatable record
378    }
379   
380    $json_result_str .= "]}"; # close metatable array and docid record
381    }
382
383    $json_result_str .= "]"; # close array of docids
384
385    $gsdl_cgi->generate_ok_message($json_result_str);   
386}
387
388
389sub get_index_metadata
390{
391    my $self = shift @_;
392
393    my $username  = $self->{'username'};
394    my $collect   = $self->{'collect'};
395    my $gsdl_cgi  = $self->{'gsdl_cgi'};
396    my $gsdlhome  = $self->{'gsdlhome'};
397
398    # Obtain the collect dir
399    my $site = $self->{'site'};
400    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
401    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
402
403    # look up additional args
404    my $docid     = $self->{'d'};
405    my $metaname  = $self->{'metaname'};
406    my $metapos   = $self->{'metapos'};
407    my $infodbtype = $self->{'infodbtype'};
408
409    # To people who know $collect_tail please add some comments
410    # Obtain path to the database
411    my $collect_tail = $collect;
412    $collect_tail =~ s/^.*[\/\\]//;
413    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
414    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
415
416    # Read the docid entry
417    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
418 
419    # Basically loop through and unescape_html the values
420    foreach my $k (keys %$doc_rec) {
421    my @escaped_v = ();
422    foreach my $v (@{$doc_rec->{$k}}) {
423        my $ev = &ghtml::unescape_html($v);
424        push(@escaped_v, $ev);
425    }
426    $doc_rec->{$k} = \@escaped_v;
427    }
428
429    # Obtain the specified metadata value
430    $metapos = 0 if (!defined $metapos);
431    my $metavalue = $doc_rec->{$metaname}->[$metapos];
432    $gsdl_cgi->generate_ok_message("$metavalue");
433   
434}
435
436
437sub get_import_metadata
438{
439    my $self = shift @_;
440
441    my $username  = $self->{'username'};
442    my $collect   = $self->{'collect'};
443    my $gsdl_cgi  = $self->{'gsdl_cgi'};
444    my $gsdlhome  = $self->{'gsdlhome'};
445
446    # Obtain the collect dir
447    my $site = $self->{'site'};
448    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
449    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
450
451    # look up additional args
452    my $docid     = $self->{'d'};
453    my $metaname  = $self->{'metaname'};
454    my $metapos = $self->{'metapos'};
455    $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
456
457    my $infodbtype = $self->{'infodbtype'};
458    if (!defined $docid)
459    {
460        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
461    }
462
463    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
464    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
465    my $metadata_xml_file;
466    my $import_filename = undef;
467   
468
469    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
470    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
471    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
472
473    # This now stores the full pathname
474    $import_filename = $doc_rec->{'src-file'}->[0];
475
476    # figure out correct metadata.xml file [?]
477    # Assuming the metadata.xml file is next to the source file
478    # Note: This will not work if it is using the inherited metadata from the parent folder
479    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
480    my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
481
482    $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
483
484}
485
486sub get_metadata_from_metadata_xml
487{
488    my $self = shift @_;
489    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
490   
491    my @rules =
492    (
493        _default => 'raw',
494        'Metadata' => \&gfmxml_metadata,
495        'FileName' => \&mxml_filename
496    );
497       
498    my $parser = XML::Rules->new
499    (
500        rules => \@rules,
501        output_encoding => 'utf8'
502    );
503   
504    my $xml_in = "";
505    if (!open(MIN,"<$metadata_xml_filename"))
506    {
507        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
508    }
509    else
510    {
511        # Read them in
512        my $line;
513        while (defined ($line=<MIN>)) {
514            $xml_in .= $line;
515        }
516        close(MIN);
517
518        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
519       
520        if(defined $parser->{'pad'}->{'metavalue'})
521        {
522            return $parser->{'pad'}->{'metavalue'};
523        }
524        else
525        {
526            return "";
527        }
528    }
529}
530
531sub gfmxml_metadata
532{
533    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
534
535    # no subsection support yet in metadata.xml
536
537    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
538    {
539        if (!defined $parser->{'parameters'}->{'poscount'})
540        {
541            $parser->{'parameters'}->{'poscount'} = 0;
542        }
543        else
544        {
545            $parser->{'parameters'}->{'poscount'}++;
546        }
547   
548        # gets the first value by default, since metapos defaults to 0
549        if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
550        {
551            if($parser->{'parameters'}->{'metapos'} > 0) {
552            print STDERR "@@@@ WARNING: non-zero metapos.\n";
553            print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at".$parser->{'parameters'}->{'metapos'}.".\n";
554            }
555            $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
556        }
557    }
558}
559
560sub get_archives_metadata
561{
562    my $self = shift @_;
563
564    my $username  = $self->{'username'};
565    my $collect   = $self->{'collect'};
566    my $gsdl_cgi  = $self->{'gsdl_cgi'};
567    my $gsdlhome  = $self->{'gsdlhome'};
568    my $infodbtype = $self->{'infodbtype'};
569
570    # Obtain the collect dir
571    my $site = $self->{'site'};
572    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
573   
574    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
575
576    # look up additional args
577    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
578    $docid_secnum = "" if (!defined $docid_secnum);
579   
580    my $metaname = $self->{'metaname'};
581    my $metapos = $self->{'metapos'};
582    $metapos = 0 if (!defined $metapos);
583   
584    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
585    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
586
587    # This now stores the full pathname
588    my $doc_filename = $doc_rec->{'doc-file'}->[0];
589
590    $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
591
592}
593
594sub get_metadata_from_archive_xml
595{
596    my $self = shift @_;
597    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
598   
599    my @start_rules = ('Section' => \&dxml_start_section);
600   
601    my @rules =
602    (
603        _default => 'raw',
604        'Metadata' => \&gfdxml_metadata
605    );
606       
607    my $parser = XML::Rules->new
608    (
609        start_rules => \@start_rules,
610        rules => \@rules,
611        output_encoding => 'utf8'
612    );
613   
614    my $xml_in = "";
615    if (!open(MIN,"<$doc_xml_filename"))
616    {
617        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
618    }
619    else
620    {
621        # Read them in
622        my $line;
623        while (defined ($line=<MIN>)) {
624            $xml_in .= $line;
625        }
626        close(MIN);
627
628        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
629       
630        if(defined $parser->{'pad'}->{'metavalue'})
631        {
632            return $parser->{'pad'}->{'metavalue'};
633        }
634        else
635        {
636            return "";
637        }
638    }
639}
640
641sub gfdxml_metadata
642{
643    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
644   
645    if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
646    {
647        return;
648    }
649
650    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
651    {
652        if (!defined $parser->{'parameters'}->{'poscount'})
653        {
654            $parser->{'parameters'}->{'poscount'} = 0;
655        }
656        else
657        {
658            $parser->{'parameters'}->{'poscount'}++;
659        }
660    }
661
662    if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
663    {   
664        $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
665    }
666}
667
668sub _set_live_metadata
669{
670    my $self = shift @_;
671
672    my $collect   = $self->{'collect'};
673    my $gsdl_cgi  = $self->{'gsdl_cgi'};
674    #my $gsdlhome  = $self->{'gsdlhome'};
675    my $infodbtype = $self->{'infodbtype'};
676
677    # Obtain the collect dir
678    my $site = $self->{'site'};
679    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
680    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
681
682
683    # look up additional args
684    my $docid     = $self->{'d'};
685    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
686      $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
687    }
688    my $metavalue = $self->{'metavalue'};
689
690    # Generate the dbkey   
691    my $metaname  = $self->{'metaname'};
692    my $dbkey = "$docid.$metaname";
693
694    # To people who know $collect_tail please add some comments
695    # Obtain path to the database
696    my $collect_tail = $collect;
697    $collect_tail =~ s/^.*[\/\\]//;
698    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
699    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
700
701    # Set the new value
702    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
703    my $status = system($cmd);
704    if ($status != 0) {
705        # Catch error if gdbmget failed
706    my $mess = "Failed to set metadata key: $dbkey\n";
707
708    $mess .= "PATH: $ENV{'PATH'}\n";
709    $mess .= "cmd = $cmd\n";
710    $mess .= "Exit status: $status\n";
711    $mess .= "System Error Message: $!\n";
712
713    $gsdl_cgi->generate_error($mess);
714    }
715    else {
716    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
717    }
718
719    #return $status; # in case calling functions have any further use for this
720}
721
722sub set_live_metadata
723{
724    my $self = shift @_;
725
726    my $username  = $self->{'username'};
727    my $collect   = $self->{'collect'};
728    my $gsdl_cgi  = $self->{'gsdl_cgi'};
729 
730    if ($baseaction::authentication_enabled) {
731    # Ensure the user is allowed to edit this collection   
732    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
733    }
734
735    # Make sure the collection isn't locked by someone else
736    $self->lock_collection($username, $collect);
737
738    $self->_set_live_metadata(@_);
739
740    # Release the lock once it is done
741    $self->unlock_collection($username, $collect);
742}
743
744sub set_index_metadata_entry
745{
746    my $self = shift @_;
747    my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
748   
749    # To people who know $collect_tail please add some comments
750    # Obtain path to the database
751    my $collect_tail = $collect;
752    $collect_tail =~ s/^.*[\/\\]//;
753    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
754    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
755   
756#   print STDERR "**** infodb file path = $infodb_file_path\n";
757#   print STDERR "***** infodb type = $infodbtype\n";
758   
759    # Read the docid entry
760    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
761   
762    # Set the metadata value
763    if (defined $metapos) {
764    # if metamode=accumulate AND metapos, warn user and then use metapos
765    if (defined $metamode && $metamode eq "accumulate") {
766        print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
767        print STDERR "**** Proceeding by using metapos\n";
768    }
769    $doc_rec->{$metaname}->[$metapos] = $metavalue;
770    }
771    elsif (defined $metamode && $metamode eq "override") {
772    $doc_rec->{$metaname} = [ $metavalue ];
773    }
774    else { # default for index was to override, but because accumulate is less destructive,
775    # and because accumulate is the default for archives and import, that's the new default for index too
776    if(defined $doc_rec->{$metaname}) {
777        push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
778    } else {
779        $doc_rec->{$metaname} = [ $metavalue ];
780    }
781    }
782 
783    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
784   
785    return $status;
786   
787}
788
789sub _set_import_metadata
790{
791    my $self = shift @_;
792
793    my $collect   = $self->{'collect'};
794    my $gsdl_cgi  = $self->{'gsdl_cgi'};
795    my $infodbtype = $self->{'infodbtype'};
796#    my $gsdlhome  = $self->{'gsdlhome'};
797     
798    # Obtain the collect and archive dir   
799    my $site = $self->{'site'};
800    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
801    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
802    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
803   
804    # look up additional args
805    # want either d= or f=
806    my $docid  = $self->{'d'};
807    my ($docid_root,$docid_secnum);
808    if(defined $docid) {   
809    ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
810    # as yet no support for setting subsection metadata in metadata.xml
811    if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
812        $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
813        return;
814    }
815    }
816
817    my $import_file  = $self->{'f'};
818    if ((!defined $docid) && (!defined $import_file)) {
819    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
820    }
821
822    # Get the parameters and set default mode to "accumulate"
823    my $metaname   = $self->{'metaname'};
824    my $metavalue  = $self->{'metavalue'};
825##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
826    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
827   
828    my $metamode   = $self->{'metamode'};
829    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
830    # make "accumulate" the default (less destructive, as won't actually
831    # delete any existing values)
832    $metamode = "accumulate";
833    }
834
835    # adding metapos and prevmetavalue support to import_metadata subroutines
836    my $metapos   = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
837    my $prevmetavalue = $self->{'prevmetavalue'};
838
839    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
840    my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
841
842    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
843    $mess .= "  $metaname";
844    $mess .= " = $metavalue";
845    $mess .= " ($metamode)\n";
846
847    $gsdl_cgi->generate_ok_message($mess);
848
849    #return $status; # in case calling functions have any further use for this
850}
851
852# the version of set_index_meta that doesn't do authentication
853sub _set_archives_metadata
854{
855    my $self = shift @_;
856
857    my $collect   = $self->{'collect'};
858    my $gsdl_cgi  = $self->{'gsdl_cgi'};
859    my $infodbtype = $self->{'infodbtype'};
860   
861    # Obtain the collect and archive dir   
862    my $site = $self->{'site'};
863    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
864    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
865
866    # look up additional args
867    my $docid  = $self->{'d'};
868    my $metaname   = $self->{'metaname'};
869    my $metavalue  = $self->{'metavalue'};
870    my $prevmetavalue = $self->{'prevmetavalue'};
871   
872    my $metapos    = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
873                                  # Don't append "|| undef", since if metapos=0 it will then be set to undef
874
875    my $metamode   = $self->{'metamode'};
876    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
877    # make "accumulate" the default (less destructive, as won't actually
878    # delete any existing values)
879    $metamode = "accumulate";
880    }
881
882    my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
883                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
884   
885    if ($status == 0) {
886    my $mess = "set-archives-metadata successful: Key[$docid]\n";
887    $mess .= "  $metaname";
888    $mess .= "->[$metapos]" if (defined $metapos);
889    $mess .= " = $metavalue";
890    $mess .= " ($metamode)\n";
891   
892    $gsdl_cgi->generate_ok_message($mess); 
893    }
894    else {
895    my $mess .= "Failed to set archives metadata key: $docid\n";
896    $mess .= "Exit status: $status\n";
897    if(defined $self->{'error_msg'}) {
898        $mess .= "Error Message: $self->{'error_msg'}\n";
899    } else {
900        $mess .= "System Error Message: $!\n";
901    }
902    $mess .= "-" x 20 . "\n";
903   
904    $gsdl_cgi->generate_error($mess);
905    }
906
907    #return $status; # in case calling functions have any further use for this
908}
909
910
911# the version of set_index_meta that doesn't do authentication
912sub _set_index_metadata
913{
914    my $self = shift @_;
915
916    my $collect   = $self->{'collect'};
917    my $gsdl_cgi  = $self->{'gsdl_cgi'};
918
919    my $site = $self->{'site'};
920    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
921
922    # look up additional args
923    my $docid     = $self->{'d'};
924    my $metaname  = $self->{'metaname'};
925    my $metapos   = $self->{'metapos'}; # undef has meaning
926    my $metavalue = $self->{'metavalue'};
927    my $infodbtype = $self->{'infodbtype'};
928    my $metamode  = $self->{'metamode'};   
929
930    my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
931   
932    if ($status != 0) {
933        # Catch error if set infodb entry failed
934    my $mess = "Failed to set metadata key: $docid\n";
935   
936    $mess .= "PATH: $ENV{'PATH'}\n";
937    $mess .= "Exit status: $status\n";
938    $mess .= "System Error Message: $!\n";
939   
940    $gsdl_cgi->generate_error($mess);
941    }
942    else {
943    my $mess = "set-index-metadata successful: Key[$docid]\n";
944    $mess .= "  $metaname";
945    $mess .= "->[$metapos]" if (defined $metapos);
946    $mess .= " = $metavalue\n";
947   
948    $gsdl_cgi->generate_ok_message($mess);
949    }
950
951    #return $status; # in case calling functions have any further use for this
952}
953
954sub set_index_metadata
955{
956    my $self = shift @_;
957
958    my $username  = $self->{'username'};
959    my $collect   = $self->{'collect'};
960    my $gsdl_cgi  = $self->{'gsdl_cgi'};
961    #my $gsdlhome  = $self->{'gsdlhome'};
962
963    if ($baseaction::authentication_enabled) {
964    # Ensure the user is allowed to edit this collection   
965    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
966    }
967
968    my $site = $self->{'site'};
969    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
970   
971    $gsdl_cgi->checked_chdir($collect_dir);
972
973    # Obtain the collect dir
974    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
975
976    # Make sure the collection isn't locked by someone else
977    $self->lock_collection($username, $collect);
978
979    $self->_set_index_metadata(@_);
980   
981    # Release the lock once it is done
982    $self->unlock_collection($username, $collect);
983}
984
985# call this to set the metadata for a combination of dirs archives, import or index, or live
986# if none specified, defaults to index which was the original behaviour of set_metadata.
987sub set_metadata
988{
989    my $self = shift @_;
990
991    # Testing that not defining a variable, setting it to "" or to "  " all return false
992    # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
993
994    my $where = $self->{'where'};
995    if(!$where) {   
996    $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
997    return;
998    }
999
1000    # authenticate and lock collection once, even if processing multiple dirs
1001    my $username  = $self->{'username'};
1002    my $collect   = $self->{'collect'};
1003    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1004   
1005    if ($baseaction::authentication_enabled) {
1006    # Ensure the user is allowed to edit this collection   
1007    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1008    }
1009   
1010    if($where =~ m/index/) {
1011    my $site = $self->{'site'};
1012    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1013    $gsdl_cgi->checked_chdir($collect_dir);
1014    }
1015
1016    # Make sure the collection isn't locked by someone else
1017    $self->lock_collection($username, $collect);
1018
1019
1020    # now at last can set the metadata. $where can specify multiple
1021    # $where is of the form: import|archives|index, or a subset thereof
1022
1023    #my @whichdirs = split('\|', $where);
1024
1025    # just check whether $where contains import/archives/index/live in turn, and
1026    # for each case, process it accordingly
1027    if($where =~ m/import/) {
1028    $self->_set_import_metadata(@_);       
1029    }
1030
1031    if($where =~ m/archives/) {
1032
1033    # look up docID arg which is optional to set_metadata because it's optional
1034    # to set_import, but which is compulsory to set_archives_metadata
1035    my $docid     = $self->{'d'};
1036    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1037        $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1038    }
1039    # we have a docid, so can set archives meta
1040    $self->_set_archives_metadata(@_); 
1041    }
1042
1043    if($where =~ m/index/) {
1044   
1045    # look up docID arg which is optional to set_metadata because it's optional
1046    # to set_import, but which is compulsory to set_archives_metadata
1047    my $docid     = $self->{'d'};
1048    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1049        $gsdl_cgi->generate_error("No docid (d=...) specified.");
1050    }
1051    # we have a docid, so can set index meta
1052    $self->_set_index_metadata(@_);
1053    }   
1054
1055    if($where =~ m/live/) {
1056    $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1057    }
1058
1059    # Release the lock once it is done
1060    $self->unlock_collection($username, $collect);
1061}
1062
1063sub set_metadata_array
1064{
1065    my $self = shift @_;
1066
1067    my $where = $self->{'where'};
1068    if(!$where) {   
1069    $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1070    return;
1071    }
1072
1073    my $username  = $self->{'username'};
1074    my $collect   = $self->{'collect'};
1075    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1076
1077    if ($baseaction::authentication_enabled) {
1078    # Ensure the user is allowed to edit this collection
1079    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1080    }
1081
1082    # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1083    # But including the stmt during this code reorganisation to preserve as-is what used to happen
1084    my $site = $self->{'site'};
1085    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1086    $gsdl_cgi->checked_chdir($collect_dir);
1087
1088    # Make sure the collection isn't locked by someone else
1089    $self->lock_collection($username, $collect);
1090
1091    if($where =~ m/import/) {
1092    $self->_set_import_metadata_array(@_);
1093    }
1094    if($where =~ m/archives/) {
1095    $self->_set_archives_metadata_array(@_);
1096    }
1097    if($where =~ m/index/) {
1098    $self->_set_index_metadata_array(@_);
1099    }
1100    if($where =~ m/live/) {
1101        $self->_set_live_metadata_array(@_);
1102    }
1103
1104    # Release the lock once it is done
1105    $self->unlock_collection($username, $collect);
1106}
1107
1108sub _set_index_metadata_array
1109{
1110    my $self = shift @_;
1111
1112    my $collect   = $self->{'collect'};
1113    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1114    my $site = $self->{'site'};
1115    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1116
1117   
1118    # look up additional args
1119   
1120    my $infodbtype = $self->{'infodbtype'};
1121   
1122    my $json_str      = $self->{'json'};
1123    my $doc_array = decode_json $json_str;
1124   
1125   
1126    my $global_status = 0;
1127    my $global_mess = "";
1128   
1129    my @all_docids = ();
1130   
1131    foreach my $doc_array_rec ( @$doc_array ) {
1132   
1133    my $status = -1;
1134    my $docid     = $doc_array_rec->{'docid'};
1135   
1136    push(@all_docids,$docid);
1137
1138    my $metaname  = $doc_array_rec->{'metaname'};
1139    if(defined $metaname) {
1140        my $metapos   = $doc_array_rec->{'metapos'}; # can legitimately be undef
1141        my $metavalue = $doc_array_rec->{'metavalue'};
1142        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1143
1144        $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1145    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1146        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1147       
1148        foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1149        $metaname  = $metatable_rec->{'metaname'};
1150        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1151        my $metapos = undef;
1152        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1153
1154        foreach my $metavalue ( @$metavals ) { # metavals is an array
1155            $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1156            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1157            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1158            # metavalue for this metaname will be lost
1159            $metamode = "accumulate";
1160            }
1161        }           
1162        }
1163    }
1164   
1165    if ($status != 0) {
1166        # Catch error if set infodb entry failed
1167        $global_status = $status;
1168        $global_mess .= "Failed to set metadata key: $docid\n";
1169        $global_mess .= "Exit status: $status\n";
1170        $global_mess .= "System Error Message: $!\n";
1171        $global_mess .= "-" x 20;
1172    }
1173    }
1174   
1175    if ($global_status != 0) {
1176    $global_mess .= "PATH: $ENV{'PATH'}\n";
1177    $gsdl_cgi->generate_error($global_mess);
1178    }
1179    else {
1180    my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1181    $gsdl_cgi->generate_ok_message($mess);
1182    }
1183}
1184
1185sub set_index_metadata_array
1186{
1187    my $self = shift @_;
1188
1189    my $username  = $self->{'username'};
1190    my $collect   = $self->{'collect'};
1191    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1192#    my $gsdlhome  = $self->{'gsdlhome'};
1193
1194    if ($baseaction::authentication_enabled) {
1195    # Ensure the user is allowed to edit this collection   
1196    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1197    }
1198
1199    my $site = $self->{'site'};
1200    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1201   
1202    $gsdl_cgi->checked_chdir($collect_dir);
1203
1204    # Obtain the collect dir
1205    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1206
1207    # Make sure the collection isn't locked by someone else
1208    $self->lock_collection($username, $collect);
1209
1210    $self->_set_index_metadata_array(@_);
1211
1212    # Release the lock once it is done
1213    $self->unlock_collection($username, $collect);
1214}
1215
1216# experimental, newly added in and untested
1217sub _set_live_metadata_array
1218{
1219    my $self = shift @_;
1220
1221    my $collect   = $self->{'collect'};
1222    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1223
1224    my $site = $self->{'site'};
1225    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1226
1227   
1228    # look up additional args
1229    my $infodbtype = $self->{'infodbtype'};
1230    # To people who know $collect_tail please add some comments
1231    # Obtain path to the database
1232    my $collect_tail = $collect;
1233    $collect_tail =~ s/^.*[\/\\]//;
1234    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1235    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1236
1237   
1238    my $json_str      = $self->{'json'};
1239    my $doc_array = decode_json $json_str;
1240   
1241   
1242    my $global_status = 0;
1243    my $global_mess = "";
1244   
1245    my @all_docids = ();
1246
1247
1248    foreach my $doc_array_rec ( @$doc_array ) {
1249   
1250    my $status = -1;
1251    my $docid     = $doc_array_rec->{'docid'};
1252
1253    push(@all_docids,$docid);
1254
1255    my $metaname  = $doc_array_rec->{'metaname'};
1256    if(defined $metaname) {
1257        my $dbkey = "$docid.$metaname";
1258        my $metavalue = $doc_array_rec->{'metavalue'};
1259
1260        # Set the new value
1261        my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1262        $status = system($cmd);
1263
1264    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1265        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1266        foreach my $metatable_rec ( @$metatable ) {
1267        $metaname  = $metatable_rec->{'metaname'};
1268        my $dbkey = "$docid.$metaname";
1269
1270        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1271        foreach my $metavalue ( @$metavals ) {
1272             my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1273             $status = system($cmd);
1274        }
1275        }
1276       
1277    }
1278
1279    if ($status != 0) {
1280        # Catch error if gdbmget failed
1281        $global_status = $status;
1282        $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1283        $global_mess .= "Exit status: $status\n";
1284        $global_mess .= "System Error Message: $!\n";
1285        $global_mess .= "-" x 20;
1286    }
1287    }
1288   
1289    if ($global_status != 0) {
1290    $global_mess .= "PATH: $ENV{'PATH'}\n";
1291    $gsdl_cgi->generate_error($global_mess);
1292    }
1293    else {
1294    my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1295    $gsdl_cgi->generate_ok_message($mess);
1296    }
1297}
1298
1299sub set_live_metadata_array
1300{
1301    my $self = shift @_;
1302
1303    my $username  = $self->{'username'};
1304    my $collect   = $self->{'collect'};
1305    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1306
1307    if ($baseaction::authentication_enabled) {
1308    # Ensure the user is allowed to edit this collection   
1309    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1310    }
1311
1312    my $site = $self->{'site'};
1313    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1314   
1315    $gsdl_cgi->checked_chdir($collect_dir);
1316
1317    # Make sure the collection isn't locked by someone else
1318    $self->lock_collection($username, $collect);
1319
1320    $self->_set_live_metadata_array(@_);
1321
1322    # Release the lock once it is done
1323    $self->unlock_collection($username, $collect);
1324}
1325
1326
1327sub dxml_metadata
1328{
1329    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1330    my $metaname = $parser->{'parameters'}->{'metaname'};
1331    my $metamode = $parser->{'parameters'}->{'metamode'};
1332   
1333    print STDERR "**** Processing closing </Metadata> tag\n";
1334   
1335    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1336   
1337    # Find the right metadata tag and checks if we are going to
1338    # override it
1339    #
1340    # Note: This over writes the first metadata block it
1341    # encountered. If there are multiple Sections in the doc.xml, it
1342    # might not behave as you would expect
1343
1344    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1345##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1346##    print STDERR "**** metamode = $metamode\n";
1347   
1348    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1349    {
1350        my $name_attr = $attrHash->{'name'};
1351        if (($name_attr eq $metaname) && ($metamode eq "override"))
1352        {
1353            if (!defined $parser->{'parameters'}->{'poscount'})
1354            {
1355                $parser->{'parameters'}->{'poscount'} = 0;
1356            }
1357            else
1358            {
1359                $parser->{'parameters'}->{'poscount'}++;
1360            }
1361           
1362            if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1363            {
1364                ##print STDERR "#### got match!!\n";
1365                # Get the value and override the current value
1366                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1367                $attrHash->{'_content'} = $metavalue;
1368               
1369                # Don't want it to wipe out any other pieces of metadata
1370                $parser->{'parameters'}->{'metamode'} = "done";
1371            }
1372            elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1373            {
1374                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1375                $attrHash->{'_content'} = $metavalue;
1376                $parser->{'parameters'}->{'metamode'} = "done";
1377            }
1378        }
1379    }
1380
1381    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1382    return [$tagname => $attrHash];
1383}
1384
1385# This method exists purely for catching invalid section numbers that the client
1386# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1387# we've seen all the Sections in the doc.xml, and none of their section nums matched
1388# if the metamode has not been set to 'done' by then.
1389sub dxml_archive
1390{
1391    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1392    my $metamode = $parser->{'parameters'}->{'metamode'};
1393   
1394    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1395    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1396   
1397#    print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1398   
1399    if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1400    print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1401
1402    my $metaname = $parser->{'parameters'}->{'metaname'};
1403    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1404   
1405    print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1406    print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1407    print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1408    print STDERR "\tin section $opt_doc_secnum.\n";
1409    $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1410    }
1411   
1412    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1413    return [$tagname => $attrHash];
1414}
1415
1416sub dxml_description
1417{
1418    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1419    my $metamode = $parser->{'parameters'}->{'metamode'};
1420
1421    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1422    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1423   
1424    print STDERR "**** Processing a closing </Description> tag \n";
1425#   print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1426   
1427    # Accumulate the metadata
1428
1429    # We'll be accumulating metadata at this point if we haven't found and therefore
1430    # haven't processed the metadata yet.
1431    # For subsections, this means that if we're at a matching subsection, but haven't
1432    # found the correct metaname to override in that subsection, we accumulate it as new
1433    # meta in the subsection by adding it to the current description.
1434    # If there's no subsection info for the metadata, it will accumulate at the top level
1435    # section description if we hadn't found a matching metaname to override at this point.
1436
1437    # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1438    # at the toplevel section. In the latter case, it means we want to process meta in the
1439    # toplevel section. So the eq check between the values below will work in all cases.
1440   
1441    # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1442    # this document has been provided. In that case, the metadata for that opt_doc_secnum
1443    # won't get attached/accumulated to any part of the doc, not even its top-level section.
1444
1445    if ($curr_secnum eq $opt_doc_secnum
1446        && ($metamode eq "accumulate" || $metamode eq "override")) {
1447        if ($metamode eq "override") {
1448        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
1449        }
1450
1451        # If we get to here and metamode is override, this means there
1452        # was no existing value to overide => treat as an append operation
1453       
1454        # Tack a new metadata tag on to the end of the <Metadata>+ block
1455        my $metaname = $parser->{'parameters'}->{'metaname'};
1456        my $metavalue = $parser->{'parameters'}->{'metavalue'};
1457       
1458        my $metadata_attr = {
1459        '_content' => $metavalue,
1460        'name' => $metaname,
1461        'mode' => "accumulate"
1462        };
1463       
1464        my $append_metadata = [ "Metadata" => $metadata_attr ];
1465        my $description_content = $attrHash->{'_content'};
1466       
1467        print "Appending metadata to doc.xml\n";
1468       
1469        if (ref($description_content)) {
1470        # got some existing interesting nested content
1471        push(@$description_content, "    ", $append_metadata ,"\n        ");
1472        }
1473        else {
1474        #description_content is most likely a string such as "\n"
1475        $attrHash->{'_content'} = [$description_content, "    ", $append_metadata ,"\n" ];
1476        }
1477       
1478        $parser->{'parameters'}->{'metamode'} = "done";
1479    }       
1480    else {
1481        # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1482##      print STDERR "**** NOT ACCUMULATE?!? \n";
1483    }
1484
1485    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1486    return [$tagname => $attrHash];
1487}
1488
1489
1490sub dxml_start_section
1491{
1492    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1493
1494    my $new_depth = scalar(@$contextArray);
1495
1496    print STDERR "**** START SECTION \n";
1497   
1498    if ($new_depth == 1) {
1499    $parser->{'parameters'}->{'curr_section_depth'} = 1;
1500    $parser->{'parameters'}->{'curr_section_num'}   = "";
1501    }
1502
1503    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
1504    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1505
1506    my $new_secnum;
1507
1508    if ($new_depth > $old_depth) {
1509    # child subsection
1510    $new_secnum = "$old_secnum.1";
1511    }
1512    elsif ($new_depth == $old_depth) {
1513    # sibling section => increase it's value by 1
1514    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1515    $tail_num++;
1516    $new_secnum = $old_secnum;
1517    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1518    }
1519    else {
1520    # back up to parent section => lopp off tail
1521    $new_secnum = $old_secnum;
1522    $new_secnum =~ s/\.\d+$//;
1523    }
1524
1525    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1526    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
1527   
1528    1;
1529}
1530
1531sub edit_xml_file
1532{
1533    my $self = shift @_;
1534    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1535
1536    # use XML::Rules to add it in (read in and out again)
1537    my $parser = XML::Rules->new(start_rules     => $start_rules,
1538                 rules           => $rules,
1539                 style           => 'filter',
1540                                 output_encoding => 'utf8' );
1541
1542    my $xml_in = "";
1543    if (!open(MIN,"<$filename")) {
1544    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1545    }
1546    else {
1547        # Read all the text in
1548    my $line;
1549    while (defined ($line=<MIN>)) {
1550        $xml_in .= $line;
1551    }
1552    close(MIN);
1553   
1554    my $MOUT;   
1555    if (!open($MOUT,">$filename")) {
1556        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1557    }
1558    else {
1559        # Matched lines will get handled by the call backs
1560##      my $xml_out = "";
1561
1562        binmode($MOUT,":utf8");
1563        $parser->filter($xml_in,$MOUT, $options);
1564
1565#       binmode(MOUT,":utf8");
1566#       print MOUT $xml_out;
1567        close($MOUT);       
1568    }
1569    }
1570
1571    # copy across any custom error information that was stored during parsing
1572    $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});   
1573}
1574
1575sub edit_doc_xml
1576{
1577    my $self = shift @_;
1578    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1579
1580    my $info_mess = <<RAWEND;
1581****************************
1582  edit_doc_xml()
1583****************************
1584doc_xml_filename = $doc_xml_filename
1585metaname = $metaname
1586metavalue = $metavalue
1587metapos = $metapos
1588metamode = $metamode
1589opt_secnum = $opt_secnum
1590prevmetavalue = $prevmetavalue
1591****************************
1592RAWEND
1593
1594    $gsdl_cgi->generate_message($info_mess);
1595   
1596    # To monitor which section/subsection number we are in
1597    my @start_rules =
1598    ( 'Section'    => \&dxml_start_section );
1599
1600    # use XML::Rules to add it in (read in and out again)
1601    # Set the call back functions
1602    my @rules =
1603    ( _default => 'raw',
1604      'Metadata'    => \&dxml_metadata,
1605      'Description' => \&dxml_description,
1606      'Archive'     => \&dxml_archive); # just for catching errors at end
1607     
1608    # Sets the parameters
1609    my $options = { 'metaname'  => $metaname,
1610            'metapos'   => $metapos,
1611            'metavalue' => $metavalue,
1612            'metamode'  => $metamode,
1613            'prevmetavalue' => $prevmetavalue };
1614           
1615    if (defined $opt_secnum) {
1616    $options->{'secnum'} = $opt_secnum;
1617    }
1618
1619    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1620}
1621
1622sub set_archives_metadata_entry
1623{
1624    my $self = shift @_;
1625    my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1626   
1627    my $info_mess = <<RAWEND;
1628****************************
1629  set_archives_metadata_entry()
1630****************************
1631archive_dir = $archive_dir
1632collect_dir = $collect_dir
1633collect = $collect
1634infodbtype = $infodbtype
1635docid = $docid
1636metaname = $metaname
1637metapos = $metapos
1638metavalue = $metavalue
1639metamode = $metamode
1640prevmetavalue = $prevmetavalue
1641****************************
1642RAWEND
1643
1644    $gsdl_cgi->generate_message($info_mess);
1645   
1646    # Obtain the doc.xml path for the specified docID
1647    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1648
1649    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1650    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1651    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1652   
1653    # The $doc_xml_file is relative to the archives, and now let's get the full path
1654    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
1655    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1656
1657    # If we're overriding everything, then $metamode=override combined with $metapos=undefined
1658    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1659    # Thereafter, we will finally be setting the overriding metavalue for this metaname
1660    if(!defined $metapos && $metamode eq "override") {
1661    $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, $metapos, undef, $docid_secnum, $metamode);
1662    }
1663
1664    # Edit the doc.xml file with the specified metadata name, value and position.
1665    # TODO: there is a potential problem here as this edit_doc_xml function
1666    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1667    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1668   
1669    # dxml_metadata method ignores metapos if metamode anything other than override
1670    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1671            $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1672
1673    # return 0; # return 0 for now to indicate no error
1674    return (defined $self->{'error_msg'}) ? 1 : 0;
1675}
1676
1677
1678sub set_archives_metadata
1679{
1680    my $self = shift @_;
1681
1682    my $username  = $self->{'username'};
1683    my $collect   = $self->{'collect'};
1684    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1685   
1686    if ($baseaction::authentication_enabled) {
1687    # Ensure the user is allowed to edit this collection
1688    $self->authenticate_user($username, $collect);
1689    }
1690
1691    # Make sure the collection isn't locked by someone else
1692    $self->lock_collection($username, $collect);
1693
1694    $self->_set_archives_metadata(@_);
1695
1696    # Release the lock once it is done
1697    $self->unlock_collection($username, $collect);
1698}
1699
1700sub _set_archives_metadata_array
1701{
1702    my $self = shift @_;
1703   
1704    my $collect   = $self->{'collect'};
1705    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1706    my $site = $self->{'site'};
1707    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1708
1709    # look up additional args
1710   
1711    my $infodbtype = $self->{'infodbtype'};
1712   
1713    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1714   
1715    my $json_str      = $self->{'json'};
1716    my $doc_array = decode_json $json_str;
1717   
1718   
1719    my $global_status = 0;
1720    my $global_mess = "";
1721   
1722    my @all_docids = ();
1723   
1724    foreach my $doc_array_rec ( @$doc_array ) {
1725    my $status    = -1;
1726    my $docid     = $doc_array_rec->{'docid'};
1727
1728    push(@all_docids,$docid);
1729   
1730    my $metaname  = $doc_array_rec->{'metaname'};
1731    if(defined $metaname) {
1732       
1733        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1734
1735        my $metamode  = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1736        my $metavalue = $doc_array_rec->{'metavalue'};
1737        my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1738       
1739       
1740        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1741        # make "accumulate" the default (less destructive, as it won't actually
1742        # delete any existing values)
1743        $metamode = "accumulate";
1744        }       
1745       
1746        $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1747                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1748    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1749        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1750       
1751        foreach my $metatable_rec ( @$metatable ) {
1752        $metaname  = $metatable_rec->{'metaname'};
1753        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1754        my $metapos = undef;
1755        my $prevmetavalue = undef;
1756        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1757       
1758        foreach my $metavalue ( @$metavals ) {
1759            $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1760                                 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1761           
1762            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1763            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1764            # metavalue for this metaname will be lost
1765            $metamode = "accumulate";
1766            }
1767        }           
1768        }       
1769    }
1770       
1771    if ($status != 0) {
1772        # Catch error if set infodb entry failed
1773        $global_status = $status;
1774        $global_mess .= "Failed to set metadata key: $docid\n";
1775        $global_mess .= "Exit status: $status\n";
1776        $global_mess .= "System Error Message: $!\n";
1777        $global_mess .= "-" x 20 . "\n";
1778    }
1779    }
1780   
1781    if ($global_status != 0) {
1782    $global_mess .= "PATH: $ENV{'PATH'}\n";
1783    $gsdl_cgi->generate_error($global_mess);
1784    }
1785    else {
1786    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1787    $gsdl_cgi->generate_ok_message($mess);
1788    }
1789}
1790
1791sub set_archives_metadata_array
1792{
1793    my $self = shift @_;
1794
1795    my $username  = $self->{'username'};
1796    my $collect   = $self->{'collect'};
1797    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1798#    my $gsdlhome  = $self->{'gsdlhome'};
1799
1800    if ($baseaction::authentication_enabled) {
1801    # Ensure the user is allowed to edit this collection   
1802    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1803    }
1804
1805    my $site = $self->{'site'};
1806    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1807   
1808    $gsdl_cgi->checked_chdir($collect_dir);
1809
1810    # Obtain the collect dir
1811    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1812
1813    # Make sure the collection isn't locked by someone else
1814    $self->lock_collection($username, $collect);
1815
1816    $self->_set_archives_metadata_array(@_);
1817   
1818    # Release the lock once it is done
1819    $self->unlock_collection($username, $collect);
1820}
1821
1822sub _remove_archives_metadata
1823{
1824    my $self = shift @_;
1825
1826    my $collect   = $self->{'collect'};
1827    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1828#   my $gsdlhome  = $self->{'gsdlhome'};
1829    my $infodbtype = $self->{'infodbtype'};
1830   
1831    my $site = $self->{'site'};
1832       
1833    # Obtain the collect and archive dir   
1834    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1835   
1836    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1837
1838    # look up additional args
1839    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1840   
1841    my $metaname = $self->{'metaname'};
1842    my $metapos = $self->{'metapos'};
1843
1844    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1845
1846    # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1847    # default to deleting the first metavalue for the given metaname
1848    # Beware that if both metapos AND metavalue are defined, both matches (if any)
1849    # seem to get deleted in one single remove_archives_meta action invocation.
1850    # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1851    # deleted, both get deleted.
1852    if(!defined $metapos && !defined $metavalue) {
1853        $metapos = 0;
1854    }
1855
1856    my $metamode = $self->{'metamode'} || undef;
1857
1858    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1859    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1860
1861    # This now stores the full pathname
1862    my $doc_filename = $doc_rec->{'doc-file'}->[0];
1863
1864    my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1865#   my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1866   
1867    if ($status == 0)
1868    {
1869        my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1870        $mess .= "  $metaname";
1871        $mess .= "->[$metapos]" if (defined $metapos);
1872
1873        $gsdl_cgi->generate_ok_message($mess); 
1874    }
1875    else
1876    {
1877        my $mess .= "Failed to remove archives metadata key: $docid\n";
1878        $mess .= "Exit status: $status\n";
1879        $mess .= "System Error Message: $!\n";
1880        $mess .= "-" x 20 . "\n";
1881       
1882        $gsdl_cgi->generate_error($mess);
1883    }
1884   
1885    #return $status; # in case calling functions have a use for this
1886}
1887
1888sub remove_archives_metadata
1889{
1890    my $self = shift @_;
1891
1892    my $username  = $self->{'username'};
1893    my $collect   = $self->{'collect'};
1894    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1895   
1896    if ($baseaction::authentication_enabled)
1897    {
1898        # Ensure the user is allowed to edit this collection       
1899        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1900    }
1901
1902    # Make sure the collection isn't locked by someone else
1903    $self->lock_collection($username, $collect);
1904
1905    $self->_remove_archives_metadata(@_);
1906
1907    # Release the lock once it is done
1908    $self->unlock_collection($username, $collect);
1909}
1910
1911sub remove_from_doc_xml
1912{
1913    my $self = shift @_;
1914    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1915   
1916    my @start_rules = ('Section' => \&dxml_start_section);
1917   
1918    # Set the call-back functions for the metadata tags
1919    my @rules =
1920    (
1921        _default => 'raw',
1922        'Metadata' => \&rfdxml_metadata
1923    );
1924       
1925    my $parser = XML::Rules->new
1926    (
1927        start_rules => \@start_rules,
1928        rules => \@rules,
1929        style => 'filter',
1930        output_encoding => 'utf8',
1931#    normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1932        stripspaces => 2|0|0 # ineffectual
1933    );
1934   
1935    my $status = 0;
1936    my $xml_in = "";
1937    if (!open(MIN,"<$doc_xml_filename"))
1938    {
1939        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1940        $status = 1;
1941    }
1942    else
1943    {
1944        # Read them in
1945        my $line;
1946        while (defined ($line=<MIN>)) {
1947            $xml_in .= $line;
1948        }
1949        close(MIN);
1950
1951        # Filter with the call-back functions
1952        my $xml_out = "";
1953
1954        my $MOUT;
1955        if (!open($MOUT,">$doc_xml_filename")) {
1956            $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1957            $status = 1;
1958        }
1959        else {
1960            binmode($MOUT,":utf8");
1961            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1962            close($MOUT);       
1963        }
1964    }
1965    return $status;
1966}
1967
1968sub rfdxml_metadata
1969{
1970    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1971
1972    # For comparisons, toplevel section is indicated by ""
1973    my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1974    my $secid = $parser->{'parameters'}->{'secid'} || "";
1975
1976    if (!($secid eq $curr_sec_num))
1977    {
1978        # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1979        return [$tagname => $attrHash];
1980    }
1981
1982    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1983    {
1984        if (!defined $parser->{'parameters'}->{'poscount'})
1985        {
1986            $parser->{'parameters'}->{'poscount'} = 0;
1987        }
1988        else
1989        {
1990            $parser->{'parameters'}->{'poscount'}++;
1991        }
1992       
1993        # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
1994        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {         
1995            return [];
1996        }
1997    }
1998
1999    if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2000    {   
2001        return [];
2002    }
2003
2004    if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
2005    {   
2006        return [];
2007    }
2008   
2009    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2010    return [$tagname => $attrHash];
2011}
2012
2013sub mxml_metadata
2014{
2015    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2016    my $metaname = $parser->{'parameters'}->{'metaname'};
2017    my $metamode = $parser->{'parameters'}->{'metamode'};
2018
2019    # Report error if we don't see FileName tag before this
2020    die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2021   
2022    # Don't do anything if we are not in the right FileSet
2023    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2024    if ($file_regexp =~ /\.\*/) {
2025    # Only interested in a file_regexp if it specifies precisely one
2026    # file. 
2027    # So, skip anything with a .* in it as it is too general
2028##  print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
2029    return [$tagname => $attrHash];
2030    }
2031    my $src_file = $parser->{'parameters'}->{'src_file'};
2032    if (!($src_file =~ /$file_regexp/)) {
2033    return [$tagname => $attrHash];
2034    }
2035##    print STDERR "*** mxl metamode = $metamode\n";
2036
2037    # Find the right metadata tag and checks if we are going to override it
2038    my $name_attr = $attrHash->{'name'};
2039    if (($name_attr eq $metaname) && ($metamode eq "override")) {
2040
2041    # now metadata.xml functions need to keep track of metapos
2042    if (!defined $parser->{'parameters'}->{'poscount'})
2043    {
2044        $parser->{'parameters'}->{'poscount'} = 0;
2045    }
2046    else
2047    {
2048        $parser->{'parameters'}->{'poscount'}++;
2049    }
2050
2051    # If either the metapos or prevmetavalue is set,
2052        # get the value and override the current value
2053    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2054
2055    if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2056    {
2057        $attrHash->{'_content'} = $metavalue;
2058
2059        ##  print STDERR "**** overriding metadata.xml\n";
2060       
2061        # Don't want it to wipe out any other pieces of metadata
2062        $parser->{'parameters'}->{'metamode'} = "done";
2063    }
2064    elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2065    {
2066        $attrHash->{'_content'} = $metavalue;
2067        $parser->{'parameters'}->{'metamode'} = "done";
2068    }
2069    }
2070
2071    # mxml_description will process the metadata if metadata is accumulate,
2072    # or if we haven't found the metadata to override
2073
2074    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2075    return [$tagname => $attrHash];
2076}
2077
2078
2079sub mxml_description
2080{
2081    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2082    my $metamode = $parser->{'parameters'}->{'metamode'};   
2083
2084    # Failed... Report error if we don't see FileName tag before this
2085    die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2086
2087    # Don't do anything if we are not in the right FileSet
2088    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2089    if ($file_regexp =~ m/\.\*/) {
2090    # Only interested in a file_regexp if it specifies precisely one
2091    # file. 
2092    # So, skip anything with a .* in it as it is too general
2093    return [$tagname => $attrHash];
2094    }
2095    my $src_file = $parser->{'parameters'}->{'src_file'};
2096   
2097    if (!($src_file =~ m/$file_regexp/)) {
2098    return [$tagname => $attrHash];
2099    }
2100
2101    # Accumulate the metadata block to the end of the description block
2102    # Note: This adds metadata block to all description blocks, so if there are
2103    # multiple FileSets, it will add to all of them
2104    if (($metamode eq "accumulate") || ($metamode eq "override")) {
2105
2106    # if metamode was "override" but get to here then it failed to
2107    # find an item to override, in which case it should append its
2108    # value to the end, just like the "accumulate" mode
2109
2110    if ($metamode eq "override") {
2111        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2112    }
2113
2114    # tack a new metadata tag on to the end of the <Metadata>+ block
2115    my $metaname = $parser->{'parameters'}->{'metaname'};
2116    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2117   
2118    my $metadata_attr = { '_content' => $metavalue,
2119                  'name'     => $metaname,
2120                  'mode'     => "accumulate" };
2121
2122    my $append_metadata = [ "Metadata" => $metadata_attr ];
2123    my $description_content = $attrHash->{'_content'};
2124   
2125##  print STDERR "*** appending to metadata.xml\n";
2126
2127    # append the new metadata element to the end of the current
2128    # content contained inside this tag
2129    if (ref($description_content) eq "") {
2130        # => string or numeric literal
2131        # this is caused by a <Description> block has no <Metadata> child elements
2132        # => set up an empty array in '_content'
2133        $attrHash->{'_content'} = [ "\n" ];
2134        $description_content = $attrHash->{'_content'};
2135    }
2136
2137    push(@$description_content,"    ", $append_metadata ,"\n        ");
2138    $parser->{'parameters'}->{'metamode'} = "done";
2139    }
2140
2141    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2142    return [$tagname => $attrHash];
2143}
2144
2145
2146sub mxml_filename
2147{
2148    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2149
2150    # Store the filename of the Current Fileset
2151    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2152    # FileName tag must come before Description tag
2153    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2154
2155    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2156    return [$tagname => $attrHash];
2157}
2158
2159
2160sub mxml_fileset
2161{
2162    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2163
2164    # Initilise the current_file
2165    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2166    # FileName tag must come before Description tag
2167    $parser->{'parameters'}->{'current_file'} = "";
2168
2169    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2170    return [$tagname => $attrHash];
2171}
2172
2173sub mxml_directorymetadata
2174{
2175    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2176
2177    # if we haven't processed the metadata when we reach the end of metadata.xml
2178    # it's because there's no particular FileSet element whose FileName matched
2179    # In which case, add a new FileSet for this FileName
2180    my $metamode = $parser->{'parameters'}->{'metamode'};
2181    if($metamode ne "done") {
2182   
2183    if ($metamode eq "override") {
2184        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2185    }
2186
2187    # If we get to here and metamode is override, this means there
2188    # was no existing value to overide => treat as an append operation
2189   
2190    # Create a new FileSet element and append to DirectoryMetadata
2191    # <FileSet>
2192    # <FileName>src_file</FileName>
2193    # <Description>
2194    # <Metadata mode="" name="">metavalue</Metadata>
2195    # </Description>
2196    # </FileSet>
2197    my $src_file = $parser->{'parameters'}->{'src_file'};
2198    my $metaname = $parser->{'parameters'}->{'metaname'};
2199    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2200    my $metadata_attr = {
2201        '_content' => $metavalue,
2202        'name' => $metaname,
2203        'mode' => "accumulate"
2204    };
2205    my $append_metadata = [ "Metadata" => $metadata_attr ];
2206    my $description_attr->{'_content'} = [ "\n\t\t   ", $append_metadata, "\n\t\t"];
2207    my $description_element = [ "Description" => $description_attr ];
2208
2209    #_content is not an attribute, it's special and holds the children of this element
2210    # including the textnode value embedded in this element if any.
2211    my $filename_attr = {'_content' => $src_file};
2212    my $filename_element = [ "FileName" => $filename_attr ];
2213
2214    my $fileset_attr = {};
2215    $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2216    my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2217   
2218   
2219    # get children of dirmeta, and push the new FileSet element onto it
2220    print "Appending metadata to metadata.xml\n";
2221    my $dirmeta_content = $attrHash->{'_content'};
2222    if (ref($dirmeta_content)) {
2223        # got some existing interesting nested content
2224        #push(@$dirmeta_content, "    ", $fileset ,"\n        ");
2225        push(@$dirmeta_content, "\t", $fileset ,"\n");
2226    }
2227    else {
2228        #description_content is most likely a string such as "\n"
2229        #$attrHash->{'_content'} = [$dirmeta_content, "    ", $fileset ,"\n" ];
2230        $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2231    }   
2232
2233    $parser->{'parameters'}->{'metamode'} = "done";
2234    }
2235    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2236    return [$tagname => $attrHash];
2237}
2238
2239
2240sub edit_metadata_xml
2241{
2242    my $self = shift @_;
2243    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2244
2245    # Set the call-back functions for the metadata tags
2246    my @rules =
2247    ( _default => 'raw',
2248          'FileName' => \&mxml_filename,
2249      'Metadata' => \&mxml_metadata,
2250      'Description' => \&mxml_description,
2251          'FileSet' => \&mxml_fileset,
2252      'DirectoryMetadata' => \&mxml_directorymetadata);
2253
2254    # use XML::Rules to add it in (read in and out again)
2255    my $parser = XML::Rules->new(rules => \@rules,
2256                 style => 'filter',
2257                                 output_encoding => 'utf8',
2258                 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2259
2260    if (!-e $metadata_xml_filename) {
2261   
2262        if (open(MOUT,">$metadata_xml_filename")) {
2263           
2264            my $src_file_re = &util::filename_to_regex($src_file);
2265            # shouldn't the following also be in the above utility routine??
2266            # $src_file_re =~ s/\./\\./g;
2267       
2268            print MOUT "<?xml version=\"1.0\"?>\n";
2269            print MOUT "<DirectoryMetadata>\n";
2270            print MOUT " <FileSet>\n";
2271            print MOUT "  <FileName>$src_file_re</FileName>\n";
2272            print MOUT "  <Description>\n";
2273            print MOUT "  </Description>\n";
2274            print MOUT " </FileSet>\n";
2275            print MOUT "</DirectoryMetadata>\n";
2276
2277            close(MOUT);
2278        }
2279        else {
2280            $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2281        }
2282    }
2283   
2284   
2285    my $xml_in = "";
2286    if (!open(MIN,"<$metadata_xml_filename")) {
2287        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2288    }
2289    else {
2290        # Read them in
2291        my $line;
2292        while (defined ($line=<MIN>)) {
2293            $xml_in .= $line;
2294        }
2295        close(MIN);
2296
2297        # Filter with the call-back functions
2298        my $xml_out = "";
2299
2300        my $MOUT;
2301        if (!open($MOUT,">$metadata_xml_filename")) {
2302            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2303        }
2304        else {
2305            binmode($MOUT,":utf8");
2306
2307            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2308            # At the moment, I will just hack it!
2309            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2310            #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2311            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2312            #print MOUT $xml_out;
2313
2314            $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2315                              metapos => $metapos,
2316                      metavalue => $metavalue,
2317                      metamode => $metamode,
2318                      src_file => $src_file,
2319                      prevmetavalue => $prevmetavalue,
2320                      current_file => undef} );
2321            close($MOUT);       
2322        }
2323   }
2324}
2325
2326
2327sub set_import_metadata
2328{
2329    my $self = shift @_;
2330   
2331    my $username  = $self->{'username'};
2332    my $collect   = $self->{'collect'};
2333    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2334   
2335    if ($baseaction::authentication_enabled) {
2336    # Ensure the user is allowed to edit this collection
2337    $self->authenticate_user($username, $collect);
2338    }
2339
2340    # Make sure the collection isn't locked by someone else
2341    $self->lock_collection($username, $collect);
2342 
2343    $self->_set_import_metadata(@_);
2344
2345    # Release the lock once it is done
2346    $self->unlock_collection($username, $collect);
2347   
2348}
2349
2350sub set_import_metadata_array
2351{
2352    my $self = shift @_;
2353
2354    my $username  = $self->{'username'};
2355    my $collect   = $self->{'collect'};
2356    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2357#    my $gsdlhome  = $self->{'gsdlhome'};
2358
2359    if ($baseaction::authentication_enabled) {
2360    # Ensure the user is allowed to edit this collection   
2361    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2362    }
2363
2364    my $site = $self->{'site'};
2365    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2366   
2367    $gsdl_cgi->checked_chdir($collect_dir);
2368
2369    # Make sure the collection isn't locked by someone else
2370    $self->lock_collection($username, $collect);
2371
2372    $self->_set_import_metadata_array(@_);
2373
2374    # Release the lock once it is done
2375    $self->unlock_collection($username, $collect);
2376
2377}
2378
2379
2380sub _set_import_metadata_array
2381{
2382    my $self = shift @_;
2383
2384    my $collect   = $self->{'collect'};
2385    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2386
2387    my $site = $self->{'site'};
2388    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2389   
2390    # look up additional args
2391   
2392    my $infodbtype = $self->{'infodbtype'};
2393   
2394    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");   
2395    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2396   
2397    my $json_str = $self->{'json'};
2398    my $doc_array = decode_json $json_str;
2399   
2400    my $global_status = 0;
2401    my $global_mess = "";
2402   
2403    my @all_docids = ();
2404   
2405    foreach my $doc_array_rec ( @$doc_array )
2406    {
2407    my $status = -1;
2408    my $docid = $doc_array_rec->{'docid'};
2409   
2410    my ($docid_root,$docid_secnum);
2411    if(defined $docid) {   
2412        ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
2413        # as yet no support for setting subsection metadata in metadata.xml
2414        if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2415        $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2416        next; # skip this docid in for loop
2417        }
2418    }
2419
2420    push(@all_docids,$docid); # docid_root rather
2421   
2422    my $metaname = $doc_array_rec->{'metaname'};
2423    if (defined $metaname) {
2424        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2425        my $metavalue = $doc_array_rec->{'metavalue'};
2426        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2427
2428        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2429        # make "accumulate" the default (less destructive, as won't actually
2430        # delete any existing values)
2431        $metamode = "accumulate";
2432        }
2433
2434        # adding metapos and prevmetavalue support to import_metadata subroutines
2435        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2436        my $prevmetavalue = $self->{'prevmetavalue'};
2437
2438        $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2439       
2440    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2441        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2442       
2443        foreach my $metatable_rec ( @$metatable ) {
2444        $metaname  = $metatable_rec->{'metaname'};
2445        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2446        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2447            # make "accumulate" the default (less destructive, as won't actually
2448            # delete any existing values)
2449            $metamode = "accumulate";
2450        }
2451
2452        # No support for metapos and prevmetavalue in the JSON metatable substructure
2453        my $metapos = undef;
2454        my $prevmetavalue = undef;
2455        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2456       
2457        foreach my $metavalue ( @$metavals ) {
2458            $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2459
2460            $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2461            if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2462            # need to accumulate subsequent metavals for this metaname, else the just-assigned
2463            # metavalue for this metaname will be lost
2464            $metamode = "accumulate";
2465            }
2466        }
2467        }
2468    }       
2469    }
2470
2471    # always a success message
2472    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2473    $gsdl_cgi->generate_ok_message($mess);
2474}
2475
2476# always returns true (1)
2477sub set_import_metadata_entry
2478{
2479    my $self = shift @_;
2480    my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2481
2482    my $info_mess = <<RAWEND;
2483****************************
2484  set_import_metadata_entry()
2485****************************
2486collect = $collect
2487collect_dir = $collect_dir
2488infodbtype = $infodbtype
2489arcinfo_doc_filename = $arcinfo_doc_filename
2490docid = $docid
2491metaname = $metaname
2492metapos = $metapos
2493metavalue = $metavalue
2494metamode = $metamode
2495prevmetavalue = $prevmetavalue
2496****************************
2497RAWEND
2498
2499    $gsdl_cgi->generate_message($info_mess);
2500
2501    # import works with metadata.xml which can have inherited metadata
2502    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2503    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2504    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2505    # what they're doing if they provide a metapos.
2506    if(defined $metapos) {
2507    print STDERR "@@@@ WARNING: metapos defined.\n";
2508    print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2509    }
2510
2511    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2512    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2513    my $metadata_xml_file;
2514    my $import_filename = undef;
2515   
2516    if (defined $docid) {
2517    # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2518    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2519
2520    # This now stores the full pathname
2521    $import_filename = $doc_rec->{'src-file'}->[0];
2522    } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2523         # as the array version of the method doesn't support the -f parameter yet
2524    my $import_file  = $self->{'f'};
2525    $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2526    }
2527   
2528    # figure out correct metadata.xml file [?]
2529    # Assuming the metadata.xml file is next to the source file
2530    # Note: This will not work if it is using the inherited metadata from the parent folder
2531    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2532    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2533   
2534    # If we're overriding everything, then $metamode=override combined with $metapos=undefined
2535    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2536    # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2537    if(!defined $metapos && $metamode eq "override") {
2538##  print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2539    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2540
2541    }
2542
2543    # Edit the metadata.xml
2544    # Modified by Jeffrey from DL Consulting
2545    # Handle the case where there is one metadata.xml file for multiple FileSets
2546    # The XML filter needs to know whether it is in the right FileSet
2547    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2548    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2549    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2550                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2551    #return 0;
2552    return $metadata_xml_filename;
2553}
2554
2555sub _remove_import_metadata
2556{
2557    my $self = shift @_;
2558
2559    my $collect   = $self->{'collect'};
2560    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2561#   my $gsdlhome  = $self->{'gsdlhome'};
2562    my $infodbtype = $self->{'infodbtype'};
2563   
2564    # Obtain the collect dir
2565    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2566    my $site = $self->{'site'};
2567    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2568   
2569    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2570    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2571
2572    # look up additional args
2573    my $docid = $self->{'d'};
2574    if ((!defined $docid) || ($docid =~ m/^\s*$/))
2575    {
2576        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2577    }
2578   
2579    my $metaname = $self->{'metaname'};
2580    my $metapos = $self->{'metapos'};
2581    my $metavalue = $self->{'metavalue'};
2582    if(defined $metavalue) {
2583        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2584    } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2585        $metapos = 0;
2586    }
2587    my $metamode = $self->{'metamode'} || undef;
2588
2589    # import works with metadata.xml which can have inherited metadata
2590    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2591    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2592    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2593    # what they're doing if they provide a metapos.
2594    if(defined $metapos) {
2595        print STDERR "@@@@ WARNING: metapos defined.\n";
2596        print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2597    }
2598   
2599    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2600    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2601    my $metadata_xml_file;
2602    my $import_filename = undef;
2603    if (defined $docid)
2604    {
2605        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2606        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2607
2608        # This now stores the full pathname
2609        $import_filename = $doc_rec->{'src-file'}->[0];
2610    }
2611
2612    if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2613    {
2614        $gsdl_cgi->generate_error("There is no metadata\n");
2615    }
2616   
2617    # figure out correct metadata.xml file [?]
2618    # Assuming the metadata.xml file is next to the source file
2619    # Note: This will not work if it is using the inherited metadata from the parent folder
2620    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2621    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2622   
2623    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2624   
2625    my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2626    $mess .= "  $metaname";
2627    $mess .= " = $metavalue\n";
2628   
2629    $gsdl_cgi->generate_ok_message($mess);
2630
2631    #return $status; # in case calling functions have a use for this
2632}
2633
2634sub remove_import_metadata
2635{
2636    my $self = shift @_;
2637   
2638    my $username = $self->{'username'};
2639    my $collect   = $self->{'collect'};
2640    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2641   
2642    if ($baseaction::authentication_enabled) {
2643        # Ensure the user is allowed to edit this collection       
2644        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2645    }
2646
2647    # Make sure the collection isn't locked by someone else
2648    $self->lock_collection($username, $collect);
2649   
2650    $self->_remove_import_metadata(@_);
2651
2652    # Release the lock once it is done
2653    $self->unlock_collection($username, $collect);
2654
2655}
2656
2657sub remove_from_metadata_xml
2658{
2659    my $self = shift @_;
2660    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2661    # metamode generally has no meaning for removing meta, but is used by set_meta
2662    # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2663
2664    # Set the call-back functions for the metadata tags
2665    my @rules =
2666    (
2667        _default => 'raw',
2668        'Metadata' => \&rfmxml_metadata,
2669        'FileName' => \&mxml_filename
2670    );
2671       
2672    my $parser = XML::Rules->new
2673    (
2674        rules => \@rules,
2675        style => 'filter',
2676        output_encoding => 'utf8',
2677     #normalisespaces => 1,
2678            stripspaces => 2|0|0 # ineffectual
2679    );
2680   
2681    my $xml_in = "";
2682    if (!open(MIN,"<$metadata_xml_filename"))
2683    {
2684        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2685    }
2686    else
2687    {
2688        # Read them in
2689        my $line;
2690        while (defined ($line=<MIN>)) {
2691            $xml_in .= $line;
2692        }
2693        close(MIN);
2694
2695        # Filter with the call-back functions
2696        my $xml_out = "";
2697
2698        my $MOUT;
2699        if (!open($MOUT,">$metadata_xml_filename")) {
2700            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2701        }
2702        else {
2703            binmode($MOUT,":utf8");
2704            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2705            close($MOUT);       
2706        }
2707    }
2708}
2709
2710sub rfmxml_metadata
2711{
2712    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2713
2714    # metadata.xml does not handle subsections
2715
2716    # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2717    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2718        && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2719    {
2720        if (!defined $parser->{'parameters'}->{'poscount'})
2721        {
2722            $parser->{'parameters'}->{'poscount'} = 0;
2723        }
2724        else
2725        {
2726            $parser->{'parameters'}->{'poscount'}++;
2727        }
2728
2729        # if overriding but no metapos, then clear all the meta for this metaname
2730        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
2731            return [];
2732        }
2733   
2734        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2735        {
2736            return [];
2737        }
2738
2739        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2740        {
2741            return [];
2742        }       
2743    }
2744
2745    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2746    return [$tagname => $attrHash];
2747}
2748
2749sub _remove_live_metadata
2750{
2751    my $self = shift @_;
2752
2753    my $collect   = $self->{'collect'};
2754    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2755#    my $gsdlhome  = $self->{'gsdlhome'};
2756    my $infodbtype = $self->{'infodbtype'};
2757
2758    # Obtain the collect dir
2759    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2760    my $site = $self->{'site'};
2761    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2762
2763   
2764    # look up additional args
2765    my $docid     = $self->{'d'};
2766    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2767      $gsdl_cgi->generate_error("No docid (d=...) specified.");
2768    }
2769   
2770    # Generate the dbkey
2771    my $metaname  = $self->{'metaname'};
2772    my $dbkey = "$docid.$metaname";
2773
2774    # To people who know $collect_tail please add some comments
2775    # Obtain the live gdbm_db path
2776    my $collect_tail = $collect;
2777    $collect_tail =~ s/^.*[\/\\]//;
2778    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2779    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2780
2781    # Remove the key
2782    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2783    my $status = system($cmd);
2784    if ($status != 0) {
2785        # Catch error if gdbmdel failed
2786    my $mess = "Failed to set metadata key: $dbkey\n";
2787   
2788    $mess .= "PATH: $ENV{'PATH'}\n";
2789    $mess .= "cmd = $cmd\n";
2790    $mess .= "Exit status: $status\n";
2791    $mess .= "System Error Message: $!\n";
2792
2793    $gsdl_cgi->generate_error($mess);
2794    }
2795    else {
2796    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2797    }
2798
2799}
2800
2801sub remove_live_metadata
2802{
2803    my $self = shift @_;
2804
2805    my $username  = $self->{'username'};
2806    my $collect   = $self->{'collect'};
2807    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2808    my $gsdlhome  = $self->{'gsdlhome'};
2809   
2810    if ($baseaction::authentication_enabled) {
2811    # Ensure the user is allowed to edit this collection   
2812    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2813    }
2814
2815    # Make sure the collection isn't locked by someone else
2816    $self->lock_collection($username, $collect);
2817
2818    $self->_remove_live_metadata(@_);
2819
2820    $self->unlock_collection($username, $collect);
2821}
2822
2823sub remove_metadata
2824{
2825    my $self = shift @_;
2826
2827    my $where = $self->{'where'};
2828    if(!$where) {
2829    $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2830    return;
2831    }
2832
2833    my $username  = $self->{'username'};
2834    my $collect   = $self->{'collect'};
2835    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2836
2837    if ($baseaction::authentication_enabled) {
2838    # Ensure the user is allowed to edit this collection   
2839    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2840    }
2841
2842    # Make sure the collection isn't locked by someone else
2843    $self->lock_collection($username, $collect);
2844
2845    # check which directories need to be processed, specified in $where as
2846    # any combination of import|archives|index|live
2847    if($where =~ m/import/) {
2848    $self->_remove_import_metadata(@_);     
2849    }
2850    if($where =~ m/archives/) {
2851    $self->_remove_archives_metadata(@_);       
2852   }
2853    if($where =~ m/index/) {
2854    $self->_remove_index_metadata(@_);     
2855    }
2856
2857    # Release the lock once it is done
2858    $self->unlock_collection($username, $collect);
2859}
2860
2861# the internal version, without authentication
2862sub _remove_index_metadata
2863{   
2864    my $self = shift @_;
2865
2866    my $collect   = $self->{'collect'};
2867    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2868#    my $gsdlhome  = $self->{'gsdlhome'};
2869    my $infodbtype = $self->{'infodbtype'};
2870   
2871    # Obtain the collect dir
2872    my $site = $self->{'site'};
2873    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2874    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2875
2876        # look up additional args
2877    my $docid     = $self->{'d'};
2878    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2879      $gsdl_cgi->generate_error("No docid (d=...) specified.");
2880    }
2881    my $metaname  = $self->{'metaname'};
2882    my $metapos   = $self->{'metapos'};
2883    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2884
2885    # To people who know $collect_tail please add some comments
2886    # Obtain the path to the database
2887    my $collect_tail = $collect;
2888    $collect_tail =~ s/^.*[\/\\]//;
2889    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2890    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2891
2892    # Read the docid entry
2893    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2894
2895    # Check to make sure the key does exist
2896    if (!defined ($doc_rec->{$metaname})) {
2897        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2898    }
2899
2900    # Obtain the specified metadata pos
2901    # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2902    if(!defined $metapos && !defined $metavalue) {
2903        $metapos = 0;
2904    }
2905   
2906
2907    # consider check key is defined before deleting?
2908    # Loop through the metadata array and ignore the specified position
2909    my $filtered_metadata = [];
2910    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
2911    for (my $i=0; $i<$num_metadata_vals; $i++) {
2912    my $metaval = shift(@{$doc_rec->{$metaname}});
2913
2914    if (!defined $metavalue && $i != $metapos) {
2915        push(@$filtered_metadata,$metaval);
2916    }
2917   
2918    if(defined $metavalue && !($metavalue eq $metaval))
2919    {
2920        push(@$filtered_metadata,$metaval);
2921    }
2922    }
2923    $doc_rec->{$metaname} = $filtered_metadata;
2924
2925    ## Use the dbutil set_entry method instead of assuming the database is gdbm
2926    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2927
2928    if ($status != 0) {
2929    my $mess = "Failed to set metadata key: $docid\n";
2930   
2931    $mess .= "PATH: $ENV{'PATH'}\n";
2932    $mess .= "Exit status: $status\n";
2933    $mess .= "System Error Message: $!\n";
2934
2935    $gsdl_cgi->generate_error($mess);
2936    }
2937    else {
2938    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2939    $mess .= "  $metaname";
2940    $mess .= "->[$metapos]" if (defined $metapos);
2941
2942    $gsdl_cgi->generate_ok_message($mess);
2943    }
2944
2945    #return $status; # in case calling functions have a use for this
2946}
2947
2948sub remove_index_metadata
2949{
2950    my $self = shift @_;
2951
2952    my $username  = $self->{'username'};
2953    my $collect   = $self->{'collect'};
2954    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2955#    my $gsdlhome  = $self->{'gsdlhome'};
2956   
2957    if ($baseaction::authentication_enabled) {
2958    # Ensure the user is allowed to edit this collection   
2959    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2960    }
2961
2962    # Obtain the collect dir
2963    my $site = $self->{'site'};
2964    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2965    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2966
2967    # Make sure the collection isn't locked by someone else
2968    $self->lock_collection($username, $collect);
2969
2970    $self->_remove_index_metadata(@_);
2971
2972    # Release the lock once it is done
2973    $self->unlock_collection($username, $collect);
2974}
2975
2976
2977# Was trying to reused the codes, but the functions need to be broken
2978# down more before they can be reused, otherwise there will be too
2979# much overhead and duplicate process...
2980sub insert_metadata
2981{
2982    my $self = shift @_;
2983   
2984    my $username  = $self->{'username'};
2985    my $collect   = $self->{'collect'};
2986    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2987    my $gsdlhome  = $self->{'gsdlhome'};
2988    my $infodbtype = $self->{'infodbtype'};
2989   
2990    # If the import metadata and gdbm database have been updated, we
2991    # need to insert some notification to warn user that the the text
2992    # they see at the moment is not indexed and require a rebuild.
2993    my $rebuild_pending_macro = "_rebuildpendingmessage_";
2994
2995    if ($baseaction::authentication_enabled) {
2996    # Ensure the user is allowed to edit this collection
2997    $self->authenticate_user($username, $collect);
2998    }
2999
3000    # Obtain the collect and archive dir   
3001    my $site = $self->{'site'};
3002    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3003    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3004    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3005
3006    # Make sure the collection isn't locked by someone else
3007    $self->lock_collection($username, $collect);
3008   
3009    # Check additional args
3010    my $docid = $self->{'d'};
3011    if (!defined($docid)) {
3012    $gsdl_cgi->generate_error("No document id is specified: d=...");
3013    }
3014    my $metaname = $self->{'metaname'};
3015    if (!defined($metaname)) {
3016    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3017    }
3018    my $metavalue = $self->{'metavalue'};
3019    if (!defined($metavalue) || $metavalue eq "") {
3020    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3021    }
3022    # make "accumulate" the default (less destructive, as won't actually
3023    # delete any existing values)
3024    my $metamode = "accumulate";
3025
3026    # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3027    my $metapos   = undef;
3028    my $prevmetavalue = undef;
3029
3030    #=======================================================================#
3031    # set_import_metadata [START]
3032    #=======================================================================#
3033    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3034    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3035    my $metadata_xml_file;
3036    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3037    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3038   
3039    # This now stores the full pathname
3040    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3041   
3042    # figure out correct metadata.xml file [?]
3043    # Assuming the metadata.xml file is next to the source file
3044    # Note: This will not work if it is using the inherited metadata from the parent folder
3045    my ($import_tailname, $import_dirname)
3046    = File::Basename::fileparse($import_filename);
3047    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3048
3049    # Shane's escape characters
3050    $metavalue = pack "U0C*", unpack "C*", $metavalue;
3051    $metavalue =~ s/\,/&#44;/g;
3052    $metavalue =~ s/\:/&#58;/g;
3053    $metavalue =~ s/\|/&#124;/g;
3054    $metavalue =~ s/\(/&#40;/g;
3055    $metavalue =~ s/\)/&#41;/g;
3056    $metavalue =~ s/\[/&#91;/g;
3057    $metavalue =~ s/\\/&#92;/g;
3058    $metavalue =~ s/\]/&#93;/g;
3059    $metavalue =~ s/\{/&#123;/g;
3060    $metavalue =~ s/\}/&#125;/g;
3061    $metavalue =~ s/\"/&#34;/g;
3062    $metavalue =~ s/\`/&#96;/g;
3063    $metavalue =~ s/\n/_newline_/g;
3064
3065    # Edit the metadata.xml
3066    # Modified by Jeffrey from DL Consulting
3067    # Handle the case where there is one metadata.xml file for multiple FileSets
3068    # The XML filter needs to know whether it is in the right FileSet
3069    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3070    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3071    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3072                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3073    #=======================================================================#
3074    # set_import_metadata [END]
3075    #=======================================================================#
3076
3077
3078    #=======================================================================#
3079    # set_metadata (accumulate version) [START]
3080    #=======================================================================#
3081    # To people who know $collect_tail please add some comments
3082    # Obtain path to the database
3083    my $collect_tail = $collect;
3084    $collect_tail =~ s/^.*[\/\\]//;
3085    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3086    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3087
3088    # Read the docid entry
3089    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3090
3091    # Protect the quotes
3092    $metavalue =~ s/\"/\\\"/g;
3093
3094    # Adds the pending macro
3095    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3096
3097    # If the metadata doesn't exist, create a new one
3098    if (!defined($doc_rec->{$metaname})){   
3099    $doc_rec->{$metaname} = [ $macro_metavalue ];
3100    }
3101    # Else, let's acculumate the values
3102    else {
3103        push(@{$doc_rec->{$metaname}},$macro_metavalue);
3104    }
3105
3106    ## Use the dbutil set_entry method instead of assuming the database is gdbm
3107    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3108
3109    if ($status != 0) {
3110        # Catch error if gdbmget failed
3111    my $mess = "Failed to set metadata key: $docid\n";
3112   
3113    $mess .= "PATH: $ENV{'PATH'}\n";
3114    $mess .= "Exit status: $status\n";
3115    $mess .= "System Error Message: $!\n";
3116
3117    $gsdl_cgi->generate_error($mess);
3118    }
3119    else {
3120    my $mess = "insert-metadata successful: Key[$docid]\n";
3121    $mess .= "  [In metadata.xml] $metaname";
3122    $mess .= " = $metavalue\n";
3123    $mess .= "  [In database] $metaname";
3124    $mess .= " = $macro_metavalue\n";
3125    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
3126        $gsdl_cgi->generate_ok_message($mess);
3127    }   
3128    #=======================================================================#
3129    # set_metadata (accumulate version) [END]
3130    #=======================================================================#
3131
3132    # Release the lock once it is done
3133    $self->unlock_collection($username, $collect);
3134}
3135
31361;
Note: See TracBrowser for help on using the browser.