root/main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm @ 30058

Revision 30058, 119.6 KB (checked in by davidb, 5 years ago)

More careful checking/testing of parameters so an override is performed (in a particular case), rather than automatically switching to accumulate

Line 
1##########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40    require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47    #GET METHODS
48    "get-import-metadata" => {
49        'compulsory-args' => [ "d", "metaname" ],
50        'optional-args'   => [ "metapos" ] },
51
52    "get-archives-metadata" => {
53        'compulsory-args' => [ "d", "metaname" ],
54        'optional-args'   => [ "metapos" ] },
55   
56    "get-index-metadata" => {
57        'compulsory-args' => [ "d", "metaname" ],
58        'optional-args'   => [ "metapos" ] },
59
60    "get-metadata" => { # alias for get-index-metadata
61        'compulsory-args' => [ "d", "metaname" ],
62        'optional-args'   => [ "metapos" ] },
63
64    "get-live-metadata" => {
65        'compulsory-args' => [ "d", "metaname" ],
66        'optional-args'   => [ ] },
67
68    "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69        'compulsory-args' => [ "json" ],
70        'optional-args'   => [ "where" ],
71        'help-string' => [
72        'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73        ]},
74
75    #SET METHODS
76    "set-live-metadata" => {
77        'compulsory-args' => [ "d", "metaname", "metavalue" ],
78        'optional-args'   => [ ] },
79
80    "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81        'compulsory-args' => [ "metaname", "metavalue" ],
82        'optional-args'   => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84    "set-index-metadata" => {
85        'compulsory-args' => [ "d", "metaname", "metavalue" ],
86        'optional-args'   => [ "metapos", "metamode" ] },
87
88    "set-archives-metadata" => {
89        'compulsory-args' => [ "d", "metaname", "metavalue" ],
90        'optional-args'   => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91   
92    "set-import-metadata" => {
93        'compulsory-args' => [ "metaname", "metavalue" ],
94        'optional-args'   => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
95                 
96    #SET METHODS (ARRAY)
97    "set-metadata-array" => {
98        'compulsory-args' => [ "where", "json" ],
99        'optional-args'   => [ ],
100        'help-string' => [
101        'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102       
103        'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108       
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
110                     
111    "set-archives-metadata-array" => {
112        'compulsory-args' => [ "json" ],
113        'optional-args'   => [ ] },
114       
115    "set-import-metadata-array" => {
116        'compulsory-args' => [ "json" ],
117        'optional-args'   => [ ] },
118
119    "set-index-metadata-array" => {
120        'compulsory-args' => [ "json" ],
121        'optional-args'   => [ ] },
122   
123    "set-live-metadata-array" => {
124        'compulsory-args' => [ "json" ],
125        'optional-args'   => [ ] },
126       
127    #REMOVE METHODS
128    "remove-import-metadata" => {
129        'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
130        'optional-args'   => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
131                     
132    "remove-archives-metadata" => {
133        'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
134        'optional-args'   => [ "metapos", "metavalue" ] },
135
136    "remove-live-metadata" => {
137        'compulsory-args' => [ "d", "metaname" ],
138        'optional-args'   => [ ] },
139
140    "remove-index-metadata" => {
141        'compulsory-args' => [ "d", "metaname" ],
142        'optional-args'   => [ "metapos", "metavalue" ] },
143
144    "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
145        'compulsory-args' => [ "d", "metaname" ],
146        'optional-args'   => [ "where", "metapos", "metavalue" ] },
147
148    #INSERT METHODS
149    "insert-metadata" => {
150        'compulsory-args' => [ "d", "metaname", "metavalue" ],
151        'optional-args'   => [ ] }
152};
153
154
155sub new
156{
157    my $class = shift (@_);
158    my ($gsdl_cgi,$iis6_mode) = @_;
159
160    # Treat metavalue specially.  To transmit this through a GET request
161    # the Javascript side has url-encoded it, so here we need to decode
162    # it before proceeding
163
164    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
165    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
166    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
167
168    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
169    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
170
171    # need to do the same with prevmetavalue
172    my $url_encoded_prevmetavalue = $gsdl_cgi->param("prevmetavalue");
173    my $url_decoded_prevmetavalue = &unicode::url_decode($url_encoded_prevmetavalue,1);
174    my $prevunicode_array = &unicode::utf82unicode($url_decoded_prevmetavalue);
175
176    $url_decoded_prevmetavalue = join("",map(chr($_),@$prevunicode_array));
177    $gsdl_cgi->param("prevmetavalue",$url_decoded_prevmetavalue);
178
179    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
180
181    return bless $self, $class;
182}
183
184
185sub get_live_metadata
186{
187    my $self = shift @_;
188
189    my $username  = $self->{'username'};
190    my $collect   = $self->{'collect'};
191    my $gsdl_cgi  = $self->{'gsdl_cgi'};
192    my $gsdlhome  = $self->{'gsdlhome'};
193    my $infodbtype = $self->{'infodbtype'};
194   
195    # live metadata gets/saves value scoped (prefixed) by the current usename
196    # so (for now) let's not bother to enforce authentication
197
198    # Obtain the collect dir
199    my $site = $self->{'site'};
200    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
201    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
202
203    # No locking collection when getting metadata, only when setting it
204#    $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
205
206    # look up additional args
207    my $docid  = $self->{'d'};
208    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
209       $gsdl_cgi->generate_error("No docid (d=...) specified.");
210    }
211
212    # Generate the dbkey
213    my $metaname  = $self->{'metaname'};
214    my $dbkey = "$docid.$metaname";
215
216    # To people who know $collect_tail please add some comments
217    # Obtain path to the database
218    my $collect_tail = $collect;
219    $collect_tail =~ s/^.*[\/|\\]//;
220    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
221    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
222   
223    # Obtain the content of the key
224    my $cmd = "gdbmget $infodb_file_path $dbkey";
225    if (open(GIN,"$cmd |") == 0) {
226        # Catch error if gdbmget failed
227    my $mess = "Failed to get metadata key: $metaname\n";
228    $mess .= "$!\n";
229
230    $gsdl_cgi->generate_error($mess);
231    }
232    else {
233    binmode(GIN,":utf8");
234        # Read everything in and concatenate them into $metavalue
235    my $metavalue = "";
236    my $line;
237    while (defined ($line=<GIN>)) {
238        $metavalue .= $line;
239    }
240    close(GIN);
241    chomp($metavalue); # Get rid off the tailing newlines
242    $gsdl_cgi->generate_ok_message("$metavalue");
243    }
244
245    # Release the lock once it is done
246#    $self->unlock_collection($username, $collect);
247}
248
249# just calls the index version
250sub get_metadata
251{
252    my $self = shift @_;
253    $self->get_index_metadata(@_);
254}
255
256# JSON version that will get the requested metadata values
257# from the requested source (index, import, archives or live)
258# One of the params is a JSON string and the return value is JSON too
259# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
260sub get_metadata_array
261{
262    my $self = shift @_;
263
264    my $where = $self->{'where'};
265    if (!$where) { 
266    $where = "index"; # default behaviour is to get the values from index
267    }
268
269    # Only when setting metadata do we perform authentication and do we lock the collection,
270    # not when getting metadata
271
272    # for get_meta_array, the where param can only be ONE of import, archives, index, live
273    if($where =~ m/index/) {
274    $self->_get_index_metadata_array(@_);
275    }
276    elsif($where =~ m/archives/) {
277    $self->_get_archives_metadata_array(@_);
278    }
279    elsif($where =~ m/import/) {
280    $self->_get_import_metadata_array(@_);
281    }
282    elsif($where =~ m/live/) {
283        $self->_get_live_metadata_array(@_);
284    }
285}
286
287# Unused at present. Added for completion. Tested.
288sub _get_import_metadata_array {
289   
290    my $self = shift @_;
291
292    my $collect   = $self->{'collect'};
293    my $gsdl_cgi  = $self->{'gsdl_cgi'};
294    my $site = $self->{'site'};
295    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
296   
297    # look up additional args
298    my $infodbtype = $self->{'infodbtype'};
299   
300    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
301    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
302    my $json_str      = $self->{'json'};
303    my $doc_array = decode_json $json_str;
304
305    my $json_result_str = "[";
306    my $first_doc_rec = 1;
307    foreach my $doc_array_rec ( @$doc_array ) {
308   
309    my $docid = $doc_array_rec->{'docid'}; # no subsection metadata support in metadata.xml, only toplevel meta
310   
311    if($first_doc_rec) {
312        $first_doc_rec = 0;
313    } else {
314        $json_result_str .= ",";
315    }
316    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
317
318    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
319    $json_result_str = $json_result_str . ",\"metatable\":[";
320
321    my $first_rec = 1;
322    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
323
324        # Read the docid entry     
325        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
326        # This now stores the full pathname
327        my $import_filename = $doc_rec->{'src-file'}->[0];
328        $import_filename = &util::placeholders_to_abspath($import_filename);
329
330        # figure out correct metadata.xml file [?]
331        # Assuming the metadata.xml file is next to the source file
332        # Note: This will not work if it is using the inherited metadata from the parent folder
333        my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
334        my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
335
336
337        if($first_rec) {
338        $first_rec = 0;
339        } else {
340        $json_result_str .= ",";       
341        }
342       
343        my $metaname  = $metatable_rec->{'metaname'};
344        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
345
346        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
347        if(!defined $metapos) {
348        $metapos = 0;
349        }
350
351        # Obtain the specified metadata value(s)
352        my $metavalue;
353
354        if($metapos ne "all") { # get the value at a single metapos
355        $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
356
357        #print STDERR "**** Metafilename, metaname, metapos, sec_num: $metadata_xml_filename, $metaname, $metapos, $import_tailname\n";
358       
359        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
360
361        } else {
362        my $first_metaval = 1;
363        $metapos = 0;
364        $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
365
366        while (defined $metavalue && $metavalue ne "") {
367            if($first_metaval) {       
368            $first_metaval = 0;
369            } else {
370            $json_result_str .= ",";
371            }
372       
373            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
374
375            $metapos++;
376            $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
377        }
378        }
379
380        $json_result_str .= "]}"; # close metavals array and metatable record
381    }
382   
383    $json_result_str .= "]}"; # close metatable array and docid record
384    }
385
386    $json_result_str .= "]"; # close array of docids
387    $gsdl_cgi->generate_ok_message($json_result_str."\n");
388}
389
390# Unused method, but included for completion. Tested, works. Takes a JSON string and returns a JSON string.
391# For more information on the format of the output, see _get_index_metadata_array, which is in use.
392sub _get_archives_metadata_array {
393
394    my $self = shift @_;
395
396    my $collect   = $self->{'collect'};
397    my $gsdl_cgi  = $self->{'gsdl_cgi'};
398    my $site = $self->{'site'};
399    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
400
401    # look up additional args   
402    my $infodbtype = $self->{'infodbtype'};
403
404    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
405    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
406
407    my $json_str      = $self->{'json'};
408    my $doc_array = decode_json $json_str;
409
410    my $json_result_str = "[";
411    my $first_doc_rec = 1;
412    foreach my $doc_array_rec ( @$doc_array ) {
413   
414    my $docid     = $doc_array_rec->{'docid'};
415   
416    if($first_doc_rec) {
417        $first_doc_rec = 0;
418    } else {
419        $json_result_str .= ",";
420    }
421    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
422
423    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
424    $json_result_str = $json_result_str . ",\"metatable\":[";
425
426    my $first_rec = 1;
427    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
428
429        my ($docid, $docid_secnum) = ($doc_array_rec->{'docid'} =~ m/^(.*?)(\..*)?$/);
430        $docid_secnum = "" if (!defined $docid_secnum);
431
432        # Read the docid entry     
433        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
434        # This now stores the full pathname
435        my $doc_filename = $doc_rec->{'doc-file'}->[0];
436        $doc_filename = &util::filename_cat($archive_dir, $doc_filename);
437
438        if($first_rec) {
439        $first_rec = 0;
440        } else {
441        $json_result_str .= ",";       
442        }
443       
444        my $metaname  = $metatable_rec->{'metaname'};
445        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
446
447        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
448        if(!defined $metapos) {
449        $metapos = 0;
450        }
451
452
453        # Obtain the specified metadata value(s)
454        my $metavalue;
455
456        if($metapos ne "all") { # get the value at a single metapos
457
458        $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
459        #print STDERR "**** Docname, metaname, metapos, sec_num: $doc_filename, $metaname, $metapos, $docid_secnum\n";
460       
461        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
462
463        } else {
464        my $first_metaval = 1;
465        $metapos = 0;
466        $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
467
468        while (defined $metavalue && $metavalue ne "") {
469            if($first_metaval) {       
470            $first_metaval = 0;
471            } else {
472            $json_result_str .= ",";
473            }
474       
475            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
476
477            $metapos++;
478            $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
479        }
480        }
481
482        $json_result_str .= "]}"; # close metavals array and metatable record
483    }
484   
485    $json_result_str .= "]}"; # close metatable array and docid record
486    }
487
488    $json_result_str .= "]"; # close array of docids
489    $gsdl_cgi->generate_ok_message($json_result_str."\n");
490}
491
492
493# Unused at present. Added for completion. Tested, but not sure if it retrieves metadata in the manner it's expected to.
494sub _get_live_metadata_array
495{
496    my $self = shift @_;
497
498    my $collect   = $self->{'collect'};
499    my $gsdl_cgi  = $self->{'gsdl_cgi'};
500    my $site = $self->{'site'};
501    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
502
503    # look up additional args   
504    my $infodbtype = $self->{'infodbtype'};
505   
506    # To people who know $collect_tail please add some comments
507    # Obtain the path to the database
508    my $collect_tail = $collect;
509    $collect_tail =~ s/^.*[\/|\\]//;
510    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
511    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
512
513    my $json_str      = $self->{'json'};
514    my $doc_array = decode_json $json_str;
515
516    my $json_result_str = "[";
517    my $first_doc_rec = 1;
518
519    foreach my $doc_array_rec ( @$doc_array ) {
520   
521    my $docid     = $doc_array_rec->{'docid'};
522   
523    if($first_doc_rec) {
524        $first_doc_rec = 0;
525    } else {
526        $json_result_str .= ",";
527    }
528    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
529   
530    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
531    $json_result_str = $json_result_str . ",\"metatable\":[";
532   
533    my $first_rec = 1;
534    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
535        if($first_rec) {
536        $first_rec = 0;
537        } else {
538        $json_result_str .= ",";       
539        }
540       
541        my $metaname  = $metatable_rec->{'metaname'};
542        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
543       
544        # Generate the dbkey
545        my $dbkey = "$docid.$metaname";
546       
547        # metapos for get_live_metadata is always assumed to be "all".
548        # It's always going to get all the lines of metavalues associated with a metaname
549        # (It's the metaname itself that should contain an increment number, if there are to be multiple values).
550        #my $metapos = "all";
551        my $metapos = $metatable_rec->{'metapos'} || 0; # Can be 0... 1|all|undefined. Defaults to 0 if undefined/false
552        my $metavalue = "";
553       
554        # Obtain the content of the key
555        my $cmd = "gdbmget $infodb_file_path $dbkey";
556        if (open(GIN,"$cmd |") != 0) { # Success.
557       
558        binmode(GIN,":utf8");
559        # Read everything in and concatenate them into $metavalue       
560        my $line;
561        my $first_metaval = 1;
562        my $pos = 0;
563        while (defined ($line=<GIN>)) {
564            chomp($line); # Get rid off the tailing newlines
565           
566            if($metapos eq "all") {
567            if($first_metaval) {       
568                $first_metaval = 0;
569            } else {
570                $json_result_str .= ",";
571            }           
572            $metavalue = $line;
573            $json_result_str .= "{\"metapos\":\"$pos\",\"metavalue\":\"$metavalue\"}";
574            } elsif($metapos == $pos) {
575            $metavalue = $line;
576            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
577            last;
578            } # else, the current $pos is not the required $metapos
579            $pos += 1;
580        }
581        close(GIN);
582        } # else open cmd == 0 (failed) and metavals array will be empty [] for this metaname
583       
584        $json_result_str .= "]}"; # close metavals array and metatable record
585    }
586   
587    $json_result_str .= "]}"; # close metatable array and docid record
588    }
589
590    $json_result_str .= "]"; # close array of docids
591   
592    $gsdl_cgi->generate_ok_message($json_result_str."\n");   
593}
594
595
596# Takes a JSON string and returns a JSON string
597# Request string is of the form:
598# http://localhost:8283/greenstone/cgi-bin/metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]
599# Resulting string is of the form:
600# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
601sub _get_index_metadata_array
602{
603    my $self = shift @_;
604
605    my $collect   = $self->{'collect'};
606    my $gsdl_cgi  = $self->{'gsdl_cgi'};
607    my $site = $self->{'site'};
608    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
609
610    # look up additional args   
611    my $infodbtype = $self->{'infodbtype'};
612   
613    # To people who know $collect_tail please add some comments
614    # Obtain the path to the database
615    my $collect_tail = $collect;
616    $collect_tail =~ s/^.*[\/|\\]//;
617    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
618    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
619
620    my $json_str      = $self->{'json'};
621    my $doc_array = decode_json $json_str;
622
623    my $json_result_str = "[";
624    my $first_doc_rec = 1;
625
626    foreach my $doc_array_rec ( @$doc_array ) {
627   
628    my $docid     = $doc_array_rec->{'docid'};
629   
630    if($first_doc_rec) {
631        $first_doc_rec = 0;
632    } else {
633        $json_result_str .= ",";
634    }
635    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
636
637    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
638    $json_result_str = $json_result_str . ",\"metatable\":[";
639
640    my $first_rec = 1;
641    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
642        if($first_rec) {
643        $first_rec = 0;
644        } else {
645        $json_result_str .= ",";       
646        }
647       
648        my $metaname  = $metatable_rec->{'metaname'};
649        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
650
651        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
652        if(!defined $metapos) {
653        $metapos = 0;
654        }
655
656         # Read the docid entry
657        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
658 
659        # Basically loop through and unescape_html the values
660        foreach my $k (keys %$doc_rec) {
661        my @escaped_v = ();
662        foreach my $v (@{$doc_rec->{$k}}) {
663            my $ev = &ghtml::unescape_html($v);
664            push(@escaped_v, $ev);
665        }
666        $doc_rec->{$k} = \@escaped_v;
667        }
668
669        # Obtain the specified metadata value(s)
670        my $metavalue;
671
672        if($metapos ne "all") { # get the value at a single metapos
673
674        $metavalue = $doc_rec->{$metaname}->[$metapos];
675
676        # protect any double quotes and colons in the metavalue before putting it into JSON
677        $metavalue =~ s/\"/&quot;/g if defined $metavalue;
678        $metavalue =~ s/\:/&58;/g if defined $metavalue;
679
680        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
681
682        } else {
683        my $first_metaval = 1;
684        $metapos = 0;
685        $metavalue = $doc_rec->{$metaname}->[$metapos];
686
687        while (defined $metavalue) {
688
689            # protect any double quotes and colons in the metavalue before putting it into JSON
690            $metavalue =~ s/\"/&quot;/g;
691            $metavalue =~ s/\:/&58;/g;
692
693            if($first_metaval) {       
694            $first_metaval = 0;
695            } else {
696            $json_result_str .= ",";
697            }
698       
699            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
700
701            $metapos++;
702            $metavalue = $doc_rec->{$metaname}->[$metapos];
703        }
704        }
705
706        $json_result_str .= "]}"; # close metavals array and metatable record
707    }
708   
709    $json_result_str .= "]}"; # close metatable array and docid record
710    }
711
712    $json_result_str .= "]"; # close array of docids
713
714    $gsdl_cgi->generate_ok_message($json_result_str."\n");   
715}
716
717
718sub get_index_metadata
719{
720    my $self = shift @_;
721
722    my $username  = $self->{'username'};
723    my $collect   = $self->{'collect'};
724    my $gsdl_cgi  = $self->{'gsdl_cgi'};
725    my $gsdlhome  = $self->{'gsdlhome'};
726
727    # Obtain the collect dir
728    my $site = $self->{'site'};
729    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
730    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
731
732    # look up additional args
733    my $docid     = $self->{'d'};
734    my $metaname  = $self->{'metaname'};
735    my $metapos   = $self->{'metapos'};
736    my $infodbtype = $self->{'infodbtype'};
737
738    # To people who know $collect_tail please add some comments
739    # Obtain path to the database
740    my $collect_tail = $collect;
741    $collect_tail =~ s/^.*[\/|\\]//;
742    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
743    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
744
745    # Read the docid entry
746    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
747 
748    # Basically loop through and unescape_html the values
749    foreach my $k (keys %$doc_rec) {
750    my @escaped_v = ();
751    foreach my $v (@{$doc_rec->{$k}}) {
752        my $ev = &ghtml::unescape_html($v);
753        push(@escaped_v, $ev);
754    }
755    $doc_rec->{$k} = \@escaped_v;
756    }
757
758    # Obtain the specified metadata value
759    $metapos = 0 if (!defined $metapos);
760    my $metavalue = $doc_rec->{$metaname}->[$metapos];
761    $gsdl_cgi->generate_ok_message("$metavalue");
762   
763}
764
765
766sub get_import_metadata
767{
768    my $self = shift @_;
769
770    my $username  = $self->{'username'};
771    my $collect   = $self->{'collect'};
772    my $gsdl_cgi  = $self->{'gsdl_cgi'};
773    my $gsdlhome  = $self->{'gsdlhome'};
774
775    # Obtain the collect dir
776    my $site = $self->{'site'};
777    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
778    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
779
780    # look up additional args
781    my $docid     = $self->{'d'};
782    my $metaname  = $self->{'metaname'};
783    my $metapos = $self->{'metapos'};
784    $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
785
786    my $infodbtype = $self->{'infodbtype'};
787    if (!defined $docid)
788    {
789        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
790    }
791
792    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
793    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
794    my $metadata_xml_file;
795    my $import_filename = undef;
796   
797
798    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
799    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
800    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
801
802    # This now stores the full pathname
803    $import_filename = $doc_rec->{'src-file'}->[0];
804    $import_filename = &util::placeholders_to_abspath($import_filename);
805
806    # figure out correct metadata.xml file [?]
807    # Assuming the metadata.xml file is next to the source file
808    # Note: This will not work if it is using the inherited metadata from the parent folder
809    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
810    my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
811
812    $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
813
814}
815
816sub get_metadata_from_metadata_xml
817{
818    my $self = shift @_;
819    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
820   
821    my @rules =
822    (
823        _default => 'raw',
824        'Metadata' => \&gfmxml_metadata,
825        'FileName' => \&mxml_filename
826    );
827       
828    my $parser = XML::Rules->new
829    (
830        rules => \@rules,
831        output_encoding => 'utf8'
832    );
833   
834    my $xml_in = "";
835    if (!open(MIN,"<$metadata_xml_filename"))
836    {
837        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
838    }
839    else
840    {
841        # Read them in
842        my $line;
843        while (defined ($line=<MIN>)) {
844            $xml_in .= $line;
845        }
846        close(MIN);
847
848        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
849       
850        if(defined $parser->{'pad'}->{'metavalue'})
851        {
852            return $parser->{'pad'}->{'metavalue'};
853        }
854        else
855        {
856            return "";
857        }
858    }
859}
860
861sub gfmxml_metadata
862{
863    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
864
865    # no subsection support yet in metadata.xml
866
867    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
868    {
869        if (!defined $parser->{'parameters'}->{'poscount'})
870        {
871            $parser->{'parameters'}->{'poscount'} = 0;
872        }
873        else
874        {
875            $parser->{'parameters'}->{'poscount'}++;
876        }
877   
878        # gets the first value by default, since metapos defaults to 0
879        if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
880        {
881            if($parser->{'parameters'}->{'metapos'} > 0) {
882            print STDERR "@@@@ WARNING: non-zero metapos.\n";
883            print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at position: ".$parser->{'parameters'}->{'metapos'}.".\n";
884            }
885            $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
886        }
887    }
888}
889
890sub get_archives_metadata
891{
892    my $self = shift @_;
893
894    my $username  = $self->{'username'};
895    my $collect   = $self->{'collect'};
896    my $gsdl_cgi  = $self->{'gsdl_cgi'};
897#   my $gsdlhome  = $self->{'gsdlhome'};
898    my $infodbtype = $self->{'infodbtype'};
899
900    # Obtain the collect dir
901    my $site = $self->{'site'};
902    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
903   
904    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
905
906    # look up additional args
907    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
908    $docid_secnum = "" if (!defined $docid_secnum);
909   
910    my $metaname = $self->{'metaname'};
911    my $metapos = $self->{'metapos'};
912    $metapos = 0 if (!defined $metapos);
913   
914    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
915    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
916
917    # This now stores the full pathname
918    my $doc_filename = $doc_rec->{'doc-file'}->[0];
919
920    $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
921
922}
923
924sub get_metadata_from_archive_xml
925{
926    my $self = shift @_;
927    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
928   
929    my @start_rules = ('Section' => \&dxml_start_section);
930   
931    my @rules =
932    (
933        _default => 'raw',
934        'Metadata' => \&gfdxml_metadata
935    );
936       
937    my $parser = XML::Rules->new
938    (
939        start_rules => \@start_rules,
940        rules => \@rules,
941        output_encoding => 'utf8'
942    );
943   
944    my $xml_in = "";
945    if (!open(MIN,"<$doc_xml_filename"))
946    {
947        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
948    }
949    else
950    {
951        # Read them in
952        my $line;
953        while (defined ($line=<MIN>)) {
954            $xml_in .= $line;
955        }
956        close(MIN);
957
958        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
959       
960        if(defined $parser->{'pad'}->{'metavalue'})
961        {
962            return $parser->{'pad'}->{'metavalue'};
963        }
964        else
965        {
966            return "";
967        }
968    }
969}
970
971sub gfdxml_metadata
972{
973    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
974   
975    if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
976    {
977        return;
978    }
979
980    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
981    {
982        if (!defined $parser->{'parameters'}->{'poscount'})
983        {
984            $parser->{'parameters'}->{'poscount'} = 0;
985        }
986        else
987        {
988            $parser->{'parameters'}->{'poscount'}++;
989        }
990    }
991
992    if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
993    {   
994        $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
995    }
996}
997
998sub _set_live_metadata
999{
1000    my $self = shift @_;
1001
1002    my $collect   = $self->{'collect'};
1003    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1004    #my $gsdlhome  = $self->{'gsdlhome'};
1005    my $infodbtype = $self->{'infodbtype'};
1006
1007    # Obtain the collect dir
1008    my $site = $self->{'site'};
1009    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1010    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1011
1012
1013    # look up additional args
1014    my $docid     = $self->{'d'};
1015    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1016      $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1017    }
1018    my $metavalue = $self->{'metavalue'};
1019
1020    # Generate the dbkey   
1021    my $metaname  = $self->{'metaname'};
1022    my $dbkey = "$docid.$metaname";
1023
1024    # To people who know $collect_tail please add some comments
1025    # Obtain path to the database
1026    my $collect_tail = $collect;
1027    $collect_tail =~ s/^.*[\/|\\]//;
1028    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1029    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1030
1031    # Set the new value
1032    my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1033    my $status = system($cmd);
1034    if ($status != 0) {
1035        # Catch error if gdbmget failed
1036    my $mess = "Failed to set metadata key: $dbkey\n";
1037
1038    $mess .= "PATH: $ENV{'PATH'}\n";
1039    $mess .= "cmd = $cmd\n";
1040    $mess .= "Exit status: $status\n";
1041    $mess .= "System Error Message: $!\n";
1042
1043    $gsdl_cgi->generate_error($mess);
1044    }
1045    else {
1046    $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
1047    }
1048
1049    #return $status; # in case calling functions have any further use for this
1050}
1051
1052sub set_live_metadata
1053{
1054    my $self = shift @_;
1055
1056    my $username  = $self->{'username'};
1057    my $collect   = $self->{'collect'};
1058    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1059 
1060    if ($baseaction::authentication_enabled) {
1061    # Ensure the user is allowed to edit this collection   
1062    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1063    }
1064
1065    # Make sure the collection isn't locked by someone else
1066    $self->lock_collection($username, $collect);
1067
1068    $self->_set_live_metadata(@_);
1069
1070    # Release the lock once it is done
1071    $self->unlock_collection($username, $collect);
1072}
1073
1074sub set_index_metadata_entry
1075{
1076    print STDERR "1\n";
1077    my $self = shift @_;
1078    my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
1079   
1080    # To people who know $collect_tail please add some comments
1081    # Obtain path to the database
1082    my $collect_tail = $collect;
1083    $collect_tail =~ s/^.*[\/|\\]//;
1084    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1085    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1086   
1087    print STDERR "2\n";
1088#   print STDERR "**** infodb file path = $infodb_file_path\n";
1089#   print STDERR "***** infodb type = $infodbtype\n";
1090   
1091    # Read the docid entry
1092    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1093   
1094    # Set the metadata value
1095    if (defined $metapos) {
1096    print STDERR "3\n";
1097    # if metamode=accumulate AND metapos, warn user and then use metapos
1098    if (defined $metamode && $metamode eq "accumulate") {
1099        print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
1100        print STDERR "**** Proceeding by using metapos\n";
1101    }
1102    $doc_rec->{$metaname}->[$metapos] = $metavalue;
1103    }
1104    elsif (defined $prevmetavalue) {
1105    print STDERR "4\n";
1106        my $array = $doc_rec->{$metaname};
1107        my $length = @$array;
1108
1109        my $found = 0;
1110        for (my $i = 0; $i < $length; $i++){
1111            if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
1112                $doc_rec->{$metaname}->[$i] = $metavalue;
1113                $found = 1;
1114                last;               
1115            }
1116        }
1117
1118        if($found == 0){
1119            $doc_rec->{$metaname} = [ $metavalue ];
1120        }
1121    }
1122    elsif (defined $metamode && $metamode eq "override") {
1123    print STDERR "5\n";
1124    $doc_rec->{$metaname} = [ $metavalue ];
1125    }
1126    else { # default for index was to override, but because accumulate is less destructive,
1127    # and because accumulate is the default for archives and import, that's the new default for index too
1128    print STDERR "6\n";
1129    if(defined $doc_rec->{$metaname}) {
1130        push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
1131    } else {
1132        $doc_rec->{$metaname} = [ $metavalue ];
1133    }
1134    }
1135    print STDERR "6\n";
1136 
1137    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
1138   
1139    return $status;
1140   
1141}
1142
1143sub _set_import_metadata
1144{
1145    my $self = shift @_;
1146
1147    my $collect   = $self->{'collect'};
1148    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1149    my $infodbtype = $self->{'infodbtype'};
1150#    my $gsdlhome  = $self->{'gsdlhome'};
1151     
1152    # Obtain the collect and archive dir   
1153    my $site = $self->{'site'};
1154    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1155    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1156    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1157   
1158    # look up additional args
1159    # want either d= or f=
1160    my $docid  = $self->{'d'};
1161    my ($docid_root,$docid_secnum);
1162    if(defined $docid) {   
1163    ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
1164    # as yet no support for setting subsection metadata in metadata.xml
1165    if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
1166        $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
1167        return;
1168    }
1169    }
1170
1171    my $import_file  = $self->{'f'};
1172    if ((!defined $docid) && (!defined $import_file)) {
1173    $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1174    }
1175
1176    # Get the parameters and set default mode to "accumulate"
1177    my $metaname   = $self->{'metaname'};
1178    my $metavalue  = $self->{'metavalue'};
1179##    $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1180    $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1181   
1182    my $metamode   = $self->{'metamode'};
1183    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1184    # make "accumulate" the default (less destructive, as won't actually
1185    # delete any existing values)
1186    $metamode = "accumulate";
1187    }
1188
1189    # adding metapos and prevmetavalue support to import_metadata subroutines
1190    my $metapos   = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1191    my $prevmetavalue = $self->{'prevmetavalue'};
1192
1193    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1194    my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1195
1196    my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1197    $mess .= "  $metaname";
1198    $mess .= " = $metavalue";
1199    $mess .= " ($metamode)\n";
1200
1201    $gsdl_cgi->generate_ok_message($mess);
1202
1203    #return $status; # in case calling functions have any further use for this
1204}
1205
1206# the version of set_index_meta that doesn't do authentication
1207sub _set_archives_metadata
1208{
1209    my $self = shift @_;
1210
1211    my $collect   = $self->{'collect'};
1212    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1213    my $infodbtype = $self->{'infodbtype'};
1214   
1215    # Obtain the collect and archive dir   
1216    my $site = $self->{'site'};
1217    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1218    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1219
1220    # look up additional args
1221    my $docid  = $self->{'d'};
1222    my $metaname   = $self->{'metaname'};
1223    my $metavalue  = $self->{'metavalue'};
1224    my $prevmetavalue = $self->{'prevmetavalue'};
1225   
1226    my $metapos    = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1227                                  # Don't append "|| undef", since if metapos=0 it will then be set to undef
1228
1229    my $metamode   = $self->{'metamode'};
1230    if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1231    # make "accumulate" the default (less destructive, as won't actually
1232    # delete any existing values)
1233    $metamode = "accumulate";
1234    }
1235
1236    my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1237                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1238   
1239    if ($status == 0) {
1240    my $mess = "set-archives-metadata successful: Key[$docid]\n";
1241    $mess .= "  $metaname";
1242    $mess .= "->[$metapos]" if (defined $metapos);
1243    $mess .= " = $metavalue";
1244    $mess .= " ($metamode)\n";
1245   
1246    $gsdl_cgi->generate_ok_message($mess); 
1247    }
1248    else {
1249    my $mess .= "Failed to set archives metadata key: $docid\n";
1250    $mess .= "Exit status: $status\n";
1251    if(defined $self->{'error_msg'}) {
1252        $mess .= "Error Message: $self->{'error_msg'}\n";
1253    } else {
1254        $mess .= "System Error Message: $!\n";
1255    }
1256    $mess .= "-" x 20 . "\n";
1257   
1258    $gsdl_cgi->generate_error($mess);
1259    }
1260
1261    #return $status; # in case calling functions have any further use for this
1262}
1263
1264
1265# the version of set_index_meta that doesn't do authentication
1266sub _set_index_metadata
1267{
1268    print STDERR "START SET INDEX METADATA\n";
1269    my $self = shift @_;
1270
1271    my $collect   = $self->{'collect'};
1272    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1273
1274    my $site = $self->{'site'};
1275    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1276
1277    # look up additional args
1278    my $docid     = $self->{'d'};
1279    my $metaname  = $self->{'metaname'};
1280    my $metapos   = $self->{'metapos'}; # undef has meaning
1281    my $metavalue = $self->{'metavalue'};
1282    my $infodbtype = $self->{'infodbtype'};
1283    my $metamode  = $self->{'metamode'};
1284    my $prevmetavalue = $self->{'prevmetavalue'};
1285
1286    print STDERR "SETTING INDEX METADATA ENTRY\n";
1287    my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1288    print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
1289    if ($status != 0) {
1290        # Catch error if set infodb entry failed
1291    my $mess = "Failed to set metadata key: $docid\n";
1292   
1293    $mess .= "PATH: $ENV{'PATH'}\n";
1294    $mess .= "Exit status: $status\n";
1295    $mess .= "System Error Message: $!\n";
1296   
1297    $gsdl_cgi->generate_error($mess);
1298    }
1299    else {
1300    my $mess = "set-index-metadata successful: Key[$docid]\n";
1301    $mess .= "  $metaname";
1302    $mess .= "->[$metapos]" if (defined $metapos);
1303    $mess .= " = $metavalue\n";
1304   
1305    $gsdl_cgi->generate_ok_message($mess);
1306    }
1307
1308    print STDERR "END SET INDEX METADATA\n";
1309    #return $status; # in case calling functions have any further use for this
1310}
1311
1312sub set_index_metadata
1313{
1314    my $self = shift @_;
1315
1316    my $username  = $self->{'username'};
1317    my $collect   = $self->{'collect'};
1318    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1319    #my $gsdlhome  = $self->{'gsdlhome'};
1320
1321    if ($baseaction::authentication_enabled) {
1322    # Ensure the user is allowed to edit this collection   
1323    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1324    }
1325
1326    my $site = $self->{'site'};
1327    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1328   
1329    $gsdl_cgi->checked_chdir($collect_dir);
1330
1331    # Obtain the collect dir
1332    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1333
1334    # Make sure the collection isn't locked by someone else
1335    $self->lock_collection($username, $collect);
1336
1337    $self->_set_index_metadata(@_);
1338   
1339    # Release the lock once it is done
1340    $self->unlock_collection($username, $collect);
1341}
1342
1343# call this to set the metadata for a combination of dirs archives, import or index, or live
1344# if none specified, defaults to index which was the original behaviour of set_metadata.
1345sub set_metadata
1346{
1347    my $self = shift @_;
1348
1349    # Testing that not defining a variable, setting it to "" or to "  " all return false
1350    # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
1351
1352    my $where = $self->{'where'};
1353    if(!$where) {   
1354    $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
1355    return;
1356    }
1357
1358    # authenticate and lock collection once, even if processing multiple dirs
1359    my $username  = $self->{'username'};
1360    my $collect   = $self->{'collect'};
1361    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1362   
1363    if ($baseaction::authentication_enabled) {
1364    # Ensure the user is allowed to edit this collection   
1365    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1366    }
1367   
1368    if($where =~ m/index/) {
1369    my $site = $self->{'site'};
1370    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1371    $gsdl_cgi->checked_chdir($collect_dir);
1372    }
1373
1374    # Make sure the collection isn't locked by someone else
1375    $self->lock_collection($username, $collect);
1376
1377
1378    # now at last can set the metadata. $where can specify multiple
1379    # $where is of the form: import|archives|index, or a subset thereof
1380
1381    #my @whichdirs = split('\|', $where);
1382
1383    # just check whether $where contains import/archives/index/live in turn, and
1384    # for each case, process it accordingly
1385    if($where =~ m/import/) {
1386    $self->_set_import_metadata(@_);       
1387    }
1388
1389    if($where =~ m/archives/) {
1390
1391    # look up docID arg which is optional to set_metadata because it's optional
1392    # to set_import, but which is compulsory to set_archives_metadata
1393    my $docid     = $self->{'d'};
1394    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1395        $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1396    }
1397    # we have a docid, so can set archives meta
1398    $self->_set_archives_metadata(@_); 
1399    }
1400
1401    if($where =~ m/index/) {
1402   
1403    # look up docID arg which is optional to set_metadata because it's optional
1404    # to set_import, but which is compulsory to set_archives_metadata
1405    my $docid     = $self->{'d'};
1406    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1407        $gsdl_cgi->generate_error("No docid (d=...) specified.");
1408    }
1409    # we have a docid, so can set index meta
1410    $self->_set_index_metadata(@_);
1411    }   
1412
1413    if($where =~ m/live/) {
1414    $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1415    }
1416
1417    # Release the lock once it is done
1418    $self->unlock_collection($username, $collect);
1419}
1420
1421sub set_metadata_array
1422{
1423    my $self = shift @_;
1424
1425    my $where = $self->{'where'};
1426    if(!$where) {   
1427    $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1428    return;
1429    }
1430
1431    my $username  = $self->{'username'};
1432    my $collect   = $self->{'collect'};
1433    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1434
1435    if ($baseaction::authentication_enabled) {
1436    # Ensure the user is allowed to edit this collection
1437    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1438    }
1439
1440    # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1441    # But including the stmt during this code reorganisation to preserve as-is what used to happen
1442    my $site = $self->{'site'};
1443    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1444    $gsdl_cgi->checked_chdir($collect_dir);
1445
1446    # Make sure the collection isn't locked by someone else
1447    $self->lock_collection($username, $collect);
1448
1449    if($where =~ m/import/) {
1450    $self->_set_import_metadata_array(@_);
1451    }
1452    if($where =~ m/archives/) {
1453    $self->_set_archives_metadata_array(@_);
1454    }
1455    if($where =~ m/index/) {
1456    $self->_set_index_metadata_array(@_);
1457    }
1458    if($where =~ m/live/) {
1459        $self->_set_live_metadata_array(@_);
1460    }
1461
1462    # Release the lock once it is done
1463    $self->unlock_collection($username, $collect);
1464}
1465
1466sub _set_index_metadata_array
1467{
1468    my $self = shift @_;
1469
1470    my $collect   = $self->{'collect'};
1471    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1472    my $site = $self->{'site'};
1473    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1474
1475   
1476    # look up additional args
1477   
1478    my $infodbtype = $self->{'infodbtype'};
1479   
1480    my $json_str      = $self->{'json'};
1481    my $doc_array = decode_json $json_str;
1482   
1483   
1484    my $global_status = 0;
1485    my $global_mess = "";
1486   
1487    my @all_docids = ();
1488   
1489    foreach my $doc_array_rec ( @$doc_array ) {
1490   
1491    my $status = -1;
1492    my $docid     = $doc_array_rec->{'docid'};
1493   
1494    push(@all_docids,$docid);
1495
1496    my $metaname  = $doc_array_rec->{'metaname'};
1497    if(defined $metaname) {
1498        my $metapos   = $doc_array_rec->{'metapos'}; # can legitimately be undef
1499        my $metavalue = $doc_array_rec->{'metavalue'};
1500        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1501
1502        $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1503    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1504        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1505       
1506        foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1507        $metaname  = $metatable_rec->{'metaname'};
1508        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1509        my $metapos = undef;
1510        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1511
1512        foreach my $metavalue ( @$metavals ) { # metavals is an array
1513            $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1514            if($metamode eq "override") { # now, having overridden the metavalue for the first,
1515            # need to accumulate subsequent metavals for this metaname, else the just-assigned
1516            # metavalue for this metaname will be lost
1517            $metamode = "accumulate";
1518            }
1519        }           
1520        }
1521    }
1522   
1523    if ($status != 0) {
1524        # Catch error if set infodb entry failed
1525        $global_status = $status;
1526        $global_mess .= "Failed to set metadata key: $docid\n";
1527        $global_mess .= "Exit status: $status\n";
1528        $global_mess .= "System Error Message: $!\n";
1529        $global_mess .= "-" x 20;
1530    }
1531    }
1532   
1533    if ($global_status != 0) {
1534    $global_mess .= "PATH: $ENV{'PATH'}\n";
1535    $gsdl_cgi->generate_error($global_mess);
1536    }
1537    else {
1538    my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1539    $gsdl_cgi->generate_ok_message($mess);
1540    }
1541}
1542
1543sub set_index_metadata_array
1544{
1545    my $self = shift @_;
1546
1547    my $username  = $self->{'username'};
1548    my $collect   = $self->{'collect'};
1549    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1550#    my $gsdlhome  = $self->{'gsdlhome'};
1551
1552    if ($baseaction::authentication_enabled) {
1553    # Ensure the user is allowed to edit this collection   
1554    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1555    }
1556
1557    my $site = $self->{'site'};
1558    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1559   
1560    $gsdl_cgi->checked_chdir($collect_dir);
1561
1562    # Obtain the collect dir
1563    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1564
1565    # Make sure the collection isn't locked by someone else
1566    $self->lock_collection($username, $collect);
1567
1568    $self->_set_index_metadata_array(@_);
1569
1570    # Release the lock once it is done
1571    $self->unlock_collection($username, $collect);
1572}
1573
1574# experimental, newly added in and untested
1575sub _set_live_metadata_array
1576{
1577    my $self = shift @_;
1578
1579    my $collect   = $self->{'collect'};
1580    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1581
1582    my $site = $self->{'site'};
1583    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1584
1585   
1586    # look up additional args
1587    my $infodbtype = $self->{'infodbtype'};
1588    # To people who know $collect_tail please add some comments
1589    # Obtain path to the database
1590    my $collect_tail = $collect;
1591    $collect_tail =~ s/^.*[\/|\\]//;
1592    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1593    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1594
1595   
1596    my $json_str      = $self->{'json'};
1597    my $doc_array = decode_json $json_str;
1598   
1599   
1600    my $global_status = 0;
1601    my $global_mess = "";
1602   
1603    my @all_docids = ();
1604
1605
1606    foreach my $doc_array_rec ( @$doc_array ) {
1607   
1608    my $status = -1;
1609    my $docid     = $doc_array_rec->{'docid'};
1610
1611    push(@all_docids,$docid);
1612
1613    my $metaname  = $doc_array_rec->{'metaname'};
1614    if(defined $metaname) {
1615        my $dbkey = "$docid.$metaname";
1616        my $metavalue = $doc_array_rec->{'metavalue'};
1617
1618        # Set the new value
1619        my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1620        $status = system($cmd);
1621
1622    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1623        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1624        foreach my $metatable_rec ( @$metatable ) {
1625        $metaname  = $metatable_rec->{'metaname'};
1626        my $dbkey = "$docid.$metaname";
1627
1628        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1629        foreach my $metavalue ( @$metavals ) {
1630             my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1631             $status = system($cmd);
1632        }
1633        }
1634       
1635    }
1636
1637    if ($status != 0) {
1638        # Catch error if gdbmget failed
1639        $global_status = $status;
1640        $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1641        $global_mess .= "Exit status: $status\n";
1642        $global_mess .= "System Error Message: $!\n";
1643        $global_mess .= "-" x 20;
1644    }
1645    }
1646   
1647    if ($global_status != 0) {
1648    $global_mess .= "PATH: $ENV{'PATH'}\n";
1649    $gsdl_cgi->generate_error($global_mess);
1650    }
1651    else {
1652    my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1653    $gsdl_cgi->generate_ok_message($mess);
1654    }
1655}
1656
1657sub set_live_metadata_array
1658{
1659    my $self = shift @_;
1660
1661    my $username  = $self->{'username'};
1662    my $collect   = $self->{'collect'};
1663    my $gsdl_cgi  = $self->{'gsdl_cgi'};
1664
1665    if ($baseaction::authentication_enabled) {
1666    # Ensure the user is allowed to edit this collection   
1667    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1668    }
1669
1670    my $site = $self->{'site'};
1671    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1672   
1673    $gsdl_cgi->checked_chdir($collect_dir);
1674
1675    # Make sure the collection isn't locked by someone else
1676    $self->lock_collection($username, $collect);
1677
1678    $self->_set_live_metadata_array(@_);
1679
1680    # Release the lock once it is done
1681    $self->unlock_collection($username, $collect);
1682}
1683
1684
1685sub dxml_metadata
1686{
1687    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1688    my $metaname = $parser->{'parameters'}->{'metaname'};
1689    my $metamode = $parser->{'parameters'}->{'metamode'};
1690   
1691###!!!    print STDERR "**** Processing closing </Metadata> tag\n";
1692   
1693    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1694   
1695    # Find the right metadata tag and checks if we are going to
1696    # override it
1697    #
1698    # Note: This over writes the first metadata block it
1699    # encountered. If there are multiple Sections in the doc.xml, it
1700    # might not behave as you would expect
1701
1702    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1703##    print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1704##    print STDERR "**** metamode = $metamode\n";
1705   
1706    if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1707    {
1708        my $name_attr = $attrHash->{'name'};
1709        # print STDOUT "*** testing: $name_attr eq $metaname ?   and  $metamode eq override ?\n";
1710        if (($name_attr eq $metaname) && ($metamode eq "override"))
1711        {
1712            if (!defined $parser->{'parameters'}->{'poscount'})
1713            {
1714                $parser->{'parameters'}->{'poscount'} = 0;
1715            }
1716            else
1717            {
1718                $parser->{'parameters'}->{'poscount'}++;
1719            }
1720           
1721            if ((defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1722                || (!defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == 0))
1723               
1724            {
1725                ##print STDERR "#### got match!!\n";
1726                # Get the value and override the current value
1727                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1728                $attrHash->{'_content'} = $metavalue;
1729               
1730                # Don't want it to wipe out any other pieces of metadata
1731                $parser->{'parameters'}->{'metamode'} = "done";
1732            }
1733            elsif (defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1734            {
1735                my $metavalue = $parser->{'parameters'}->{'metavalue'};
1736                $attrHash->{'_content'} = $metavalue;
1737                $parser->{'parameters'}->{'metamode'} = "done";
1738            }
1739        }
1740    }
1741
1742    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1743    return [$tagname => $attrHash];
1744}
1745
1746# This method exists purely for catching invalid section numbers that the client
1747# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1748# we've seen all the Sections in the doc.xml, and none of their section nums matched
1749# if the metamode has not been set to 'done' by then.
1750sub dxml_archive
1751{
1752    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1753    my $metamode = $parser->{'parameters'}->{'metamode'};
1754   
1755    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1756    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1757   
1758#    print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1759   
1760    if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1761    print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1762
1763    my $metaname = $parser->{'parameters'}->{'metaname'};
1764    my $metavalue = $parser->{'parameters'}->{'metavalue'};
1765   
1766    print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1767    print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1768    print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1769    print STDERR "\tin section $opt_doc_secnum.\n";
1770    $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1771    }
1772   
1773    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1774    return [$tagname => $attrHash];
1775}
1776
1777sub dxml_description
1778{
1779    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1780    my $metamode = $parser->{'parameters'}->{'metamode'};
1781
1782    my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1783    my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1784   
1785###!!!  print STDERR "**** Processing a closing </Description> tag \n";
1786#   print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1787   
1788    # Accumulate the metadata
1789
1790    # We'll be accumulating metadata at this point if we haven't found and therefore
1791    # haven't processed the metadata yet.
1792    # For subsections, this means that if we're at a matching subsection, but haven't
1793    # found the correct metaname to override in that subsection, we accumulate it as new
1794    # meta in the subsection by adding it to the current description.
1795    # If there's no subsection info for the metadata, it will accumulate at the top level
1796    # section description if we hadn't found a matching metaname to override at this point.
1797
1798    # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1799    # at the toplevel section. In the latter case, it means we want to process meta in the
1800    # toplevel section. So the eq check between the values below will work in all cases.
1801   
1802    # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1803    # this document has been provided. In that case, the metadata for that opt_doc_secnum
1804    # won't get attached/accumulated to any part of the doc, not even its top-level section.
1805
1806    if ($curr_secnum eq $opt_doc_secnum
1807        && ($metamode eq "accumulate" || $metamode eq "override")) {
1808        if ($metamode eq "override") {
1809        print "Got to end of <Description> block. No metadata value to override.  Switching 'metamode' to accumulate\n";
1810        }
1811
1812        # If we get to here and metamode is override, this means there
1813        # was no existing value to overide => treat as an append operation
1814       
1815        # Tack a new metadata tag on to the end of the <Metadata>+ block
1816        my $metaname = $parser->{'parameters'}->{'metaname'};
1817        my $metavalue = $parser->{'parameters'}->{'metavalue'};
1818       
1819        my $metadata_attr = {
1820        '_content' => $metavalue,
1821        'name' => $metaname,
1822        'mode' => "accumulate"
1823        };
1824       
1825        my $append_metadata = [ "Metadata" => $metadata_attr ];
1826        my $description_content = $attrHash->{'_content'};
1827       
1828        print "Appending metadata to doc.xml\n";
1829       
1830        if (ref($description_content)) {
1831        # got some existing interesting nested content
1832        push(@$description_content, "    ", $append_metadata ,"\n        ");
1833        }
1834        else {
1835        #description_content is most likely a string such as "\n"
1836        $attrHash->{'_content'} = [$description_content, "    ", $append_metadata ,"\n" ];
1837        }
1838       
1839        $parser->{'parameters'}->{'metamode'} = "done";
1840    }       
1841    else {
1842        # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1843##      print STDERR "**** NOT ACCUMULATE?!? \n";
1844    }
1845
1846    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1847    return [$tagname => $attrHash];
1848}
1849
1850
1851sub dxml_start_section
1852{
1853    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1854
1855    my $new_depth = scalar(@$contextArray);
1856
1857#   print STDERR "**** START SECTION \n";
1858   
1859    if ($new_depth == 1) {
1860    $parser->{'parameters'}->{'curr_section_depth'} = 1;
1861    $parser->{'parameters'}->{'curr_section_num'}   = "";
1862    }
1863
1864    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
1865    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1866
1867    my $new_secnum;
1868
1869    if ($new_depth > $old_depth) {
1870    # child subsection
1871    $new_secnum = "$old_secnum.1";
1872    }
1873    elsif ($new_depth == $old_depth) {
1874    # sibling section => increase it's value by 1
1875    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1876    $tail_num++;
1877    $new_secnum = $old_secnum;
1878    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1879    }
1880    else {
1881    # back up to parent section => lopp off tail
1882    $new_secnum = $old_secnum;
1883    $new_secnum =~ s/\.\d+$//;
1884    }
1885
1886    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1887    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
1888   
1889    1;
1890}
1891
1892sub edit_xml_file
1893{
1894    my $self = shift @_;
1895    my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1896
1897    # use XML::Rules to add it in (read in and out again)
1898    my $parser = XML::Rules->new(start_rules     => $start_rules,
1899                 rules           => $rules,
1900                 style           => 'filter',
1901                                 output_encoding => 'utf8' );
1902
1903    my $xml_in = "";
1904    if (!open(MIN,"<$filename")) {
1905    $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1906    }
1907    else {
1908        # Read all the text in
1909    my $line;
1910    while (defined ($line=<MIN>)) {
1911        $xml_in .= $line;
1912    }
1913    close(MIN);
1914   
1915    my $MOUT;   
1916    if (!open($MOUT,">$filename")) {
1917        $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1918    }
1919    else {
1920        # Matched lines will get handled by the call backs
1921##      my $xml_out = "";
1922
1923        binmode($MOUT,":utf8");
1924        $parser->filter($xml_in,$MOUT, $options);
1925
1926#       binmode(MOUT,":utf8");
1927#       print MOUT $xml_out;
1928        close($MOUT);       
1929    }
1930    }
1931
1932    # copy across any custom error information that was stored during parsing
1933    $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});   
1934}
1935
1936sub edit_doc_xml
1937{
1938    my $self = shift @_;
1939    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1940
1941    my $info_mess = <<RAWEND;
1942****************************
1943  edit_doc_xml()
1944****************************
1945RAWEND
1946
1947    $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1948    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
1949    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
1950    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
1951    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
1952    $info_mess .= " opt_secnum  = $opt_secnum\n"  if defined($opt_secnum);
1953    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1954     
1955    $info_mess .= "****************************\n";
1956
1957    $gsdl_cgi->generate_message($info_mess);
1958   
1959    # To monitor which section/subsection number we are in
1960    my @start_rules =
1961    ( 'Section'    => \&dxml_start_section );
1962
1963    # use XML::Rules to add it in (read in and out again)
1964    # Set the call back functions
1965    my @rules =
1966    ( _default => 'raw',
1967      'Metadata'    => \&dxml_metadata,
1968      'Description' => \&dxml_description,
1969      'Archive'     => \&dxml_archive); # just for catching errors at end
1970     
1971    # Sets the parameters
1972    my $options = { 'metaname'  => $metaname,
1973            'metapos'   => $metapos,
1974            'metavalue' => $metavalue,
1975            'metamode'  => $metamode,
1976            'prevmetavalue' => $prevmetavalue };
1977           
1978    if (defined $opt_secnum) {
1979    $options->{'secnum'} = $opt_secnum;
1980    }
1981
1982    $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1983}
1984
1985sub set_archives_metadata_entry
1986{
1987    my $self = shift @_;
1988    my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1989
1990    my $info_mess = <<RAWEND;
1991****************************
1992  set_archives_metadata_entry()
1993****************************
1994RAWEND
1995
1996    $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1997    $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1998    $info_mess .= " collect     = $collect\n"     if defined($collect);
1999    $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype);
2000    $info_mess .= " docid       = $docid\n"       if defined($docid);
2001    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
2002    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
2003    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
2004    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
2005    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2006     
2007    $info_mess .= "****************************\n";
2008
2009    $gsdl_cgi->generate_message($info_mess);
2010   
2011    # Obtain the doc.xml path for the specified docID
2012    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2013
2014    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2015    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
2016    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
2017   
2018    # The $doc_xml_file is relative to the archives, and now let's get the full path
2019    my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");   
2020    my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
2021
2022    # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
2023    # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2024    # Thereafter, we will finally be setting the overriding metavalue for this metaname
2025    if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2026    # remove all values of $metaname metadata
2027    $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
2028    }
2029    # Edit the doc.xml file with the specified metadata name, value and position.
2030    # TODO: there is a potential problem here as this edit_doc_xml function
2031    # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
2032    # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
2033   
2034    # dxml_metadata method ignores metapos if metamode anything other than override
2035    $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
2036            $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
2037
2038    # return 0; # return 0 for now to indicate no error
2039    return (defined $self->{'error_msg'}) ? 1 : 0;
2040}
2041
2042
2043sub set_archives_metadata
2044{
2045    my $self = shift @_;
2046
2047    my $username  = $self->{'username'};
2048    my $collect   = $self->{'collect'};
2049    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2050   
2051    if ($baseaction::authentication_enabled) {
2052    # Ensure the user is allowed to edit this collection
2053    $self->authenticate_user($username, $collect);
2054    }
2055
2056    # Make sure the collection isn't locked by someone else
2057    $self->lock_collection($username, $collect);
2058
2059    $self->_set_archives_metadata(@_);
2060
2061    # Release the lock once it is done
2062    $self->unlock_collection($username, $collect);
2063}
2064
2065sub _set_archives_metadata_array
2066{
2067    my $self = shift @_;
2068   
2069    my $collect   = $self->{'collect'};
2070    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2071    my $site = $self->{'site'};
2072    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2073
2074    # look up additional args
2075   
2076    my $infodbtype = $self->{'infodbtype'};
2077   
2078    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2079   
2080    my $json_str      = $self->{'json'};
2081    my $doc_array = decode_json $json_str;
2082   
2083   
2084    my $global_status = 0;
2085    my $global_mess = "";
2086   
2087    my @all_docids = ();
2088   
2089    foreach my $doc_array_rec ( @$doc_array ) {
2090    my $status    = -1;
2091    my $docid     = $doc_array_rec->{'docid'};
2092
2093    push(@all_docids,$docid);
2094   
2095    my $metaname  = $doc_array_rec->{'metaname'};
2096    if(defined $metaname) {
2097       
2098        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2099
2100        my $metamode  = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2101        my $metavalue = $doc_array_rec->{'metavalue'};
2102        my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
2103       
2104       
2105        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2106        # make "accumulate" the default (less destructive, as it won't actually
2107        # delete any existing values)
2108        $metamode = "accumulate";
2109        }       
2110       
2111        $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
2112                $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2113    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2114        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2115       
2116        foreach my $metatable_rec ( @$metatable ) {
2117        $metaname  = $metatable_rec->{'metaname'};
2118        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2119        my $metapos = undef;
2120        my $prevmetavalue = undef;
2121        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2122       
2123        foreach my $metavalue ( @$metavals ) {
2124            $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
2125                                 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2126           
2127            if($metamode eq "override") { # now, having overridden the metavalue for the first,
2128            # need to accumulate subsequent metavals for this metaname, else the just-assigned
2129            # metavalue for this metaname will be lost
2130            $metamode = "accumulate";
2131            }
2132        }           
2133        }       
2134    }
2135       
2136    if ($status != 0) {
2137        # Catch error if set infodb entry failed
2138        $global_status = $status;
2139        $global_mess .= "Failed to set metadata key: $docid\n";
2140        $global_mess .= "Exit status: $status\n";
2141        $global_mess .= "System Error Message: $!\n";
2142        $global_mess .= "-" x 20 . "\n";
2143    }
2144    }
2145   
2146    if ($global_status != 0) {
2147    $global_mess .= "PATH: $ENV{'PATH'}\n";
2148    $gsdl_cgi->generate_error($global_mess);
2149    }
2150    else {
2151    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2152    $gsdl_cgi->generate_ok_message($mess);
2153    }
2154}
2155
2156sub set_archives_metadata_array
2157{
2158    my $self = shift @_;
2159
2160    my $username  = $self->{'username'};
2161    my $collect   = $self->{'collect'};
2162    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2163#    my $gsdlhome  = $self->{'gsdlhome'};
2164
2165    if ($baseaction::authentication_enabled) {
2166    # Ensure the user is allowed to edit this collection   
2167    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2168    }
2169
2170    my $site = $self->{'site'};
2171    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2172   
2173    $gsdl_cgi->checked_chdir($collect_dir);
2174
2175    # Obtain the collect dir
2176    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2177
2178    # Make sure the collection isn't locked by someone else
2179    $self->lock_collection($username, $collect);
2180
2181    $self->_set_archives_metadata_array(@_);
2182   
2183    # Release the lock once it is done
2184    $self->unlock_collection($username, $collect);
2185}
2186
2187sub _remove_archives_metadata
2188{
2189    my $self = shift @_;
2190
2191    my $collect   = $self->{'collect'};
2192    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2193#   my $gsdlhome  = $self->{'gsdlhome'};
2194    my $infodbtype = $self->{'infodbtype'};
2195   
2196    my $site = $self->{'site'};
2197       
2198    # Obtain the collect and archive dir   
2199    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2200   
2201    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2202
2203    # look up additional args
2204    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
2205   
2206    my $metaname = $self->{'metaname'};
2207    my $metapos = $self->{'metapos'};
2208
2209    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2210
2211    # if the user hasn't told us what to delete, not having given a metavalue or metapos,
2212    # default to deleting the first metavalue for the given metaname
2213    # Beware that if both metapos AND metavalue are defined, both matches (if any)
2214    # seem to get deleted in one single remove_archives_meta action invocation.
2215    # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
2216    # deleted, both get deleted.
2217    if(!defined $metapos && !defined $metavalue) {
2218        $metapos = 0;
2219    }
2220
2221    my $metamode = $self->{'metamode'} || undef;
2222
2223    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2224    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2225
2226    # This now stores the full pathname
2227    my $doc_filename = $doc_rec->{'doc-file'}->[0];
2228
2229    my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
2230#   my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
2231   
2232    if ($status == 0)
2233    {
2234        my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
2235        $mess .= "  $metaname";
2236        $mess .= "->[$metapos]" if (defined $metapos);
2237        $mess .= " ($metavalue)" if (defined $metavalue);
2238        $gsdl_cgi->generate_ok_message($mess); 
2239    }
2240    else
2241    {
2242        my $mess .= "Failed to remove archives metadata key: $docid\n";
2243        $mess .= "Exit status: $status\n";
2244        $mess .= "System Error Message: $!\n";
2245        $mess .= "-" x 20 . "\n";
2246       
2247        $gsdl_cgi->generate_error($mess);
2248    }
2249   
2250    #return $status; # in case calling functions have a use for this
2251}
2252
2253sub remove_archives_metadata
2254{
2255    my $self = shift @_;
2256
2257    my $username  = $self->{'username'};
2258    my $collect   = $self->{'collect'};
2259    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2260   
2261    if ($baseaction::authentication_enabled)
2262    {
2263        # Ensure the user is allowed to edit this collection       
2264        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2265    }
2266
2267    # Make sure the collection isn't locked by someone else
2268    $self->lock_collection($username, $collect);
2269
2270    $self->_remove_archives_metadata(@_);
2271
2272    # Release the lock once it is done
2273    $self->unlock_collection($username, $collect);
2274}
2275
2276sub remove_from_doc_xml
2277{
2278    my $self = shift @_;
2279    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
2280   
2281    my @start_rules = ('Section' => \&dxml_start_section);
2282   
2283    # Set the call-back functions for the metadata tags
2284    my @rules =
2285    (
2286        _default => 'raw',
2287        'Metadata' => \&rfdxml_metadata
2288    );
2289       
2290    my $parser = XML::Rules->new
2291    (
2292        start_rules => \@start_rules,
2293        rules => \@rules,
2294        style => 'filter',
2295        output_encoding => 'utf8',
2296#    normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2297        stripspaces => 2|0|0 # ineffectual
2298    );
2299   
2300    my $status = 0;
2301    my $xml_in = "";
2302    if (!open(MIN,"<$doc_xml_filename"))
2303    {
2304        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
2305        $status = 1;
2306    }
2307    else
2308    {
2309        # Read them in
2310        my $line;
2311        while (defined ($line=<MIN>)) {
2312            $xml_in .= $line;
2313        }
2314        close(MIN);
2315
2316        # Filter with the call-back functions
2317        my $xml_out = "";
2318
2319        my $MOUT;
2320        if (!open($MOUT,">$doc_xml_filename")) {
2321            $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
2322            $status = 1;
2323        }
2324        else {
2325            binmode($MOUT,":utf8");
2326            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
2327            close($MOUT);       
2328        }
2329    }
2330    return $status;
2331}
2332
2333sub rfdxml_metadata
2334{
2335    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2336
2337    # For comparisons, toplevel section is indicated by ""
2338    my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
2339    my $secid = $parser->{'parameters'}->{'secid'} || "";
2340
2341    if (!($secid eq $curr_sec_num))
2342    {
2343        # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2344        return [$tagname => $attrHash];
2345    }
2346
2347    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2348    {
2349        if (!defined $parser->{'parameters'}->{'poscount'})
2350        {
2351            $parser->{'parameters'}->{'poscount'} = 0;
2352        }
2353        else
2354        {
2355            $parser->{'parameters'}->{'poscount'}++;
2356        }
2357       
2358        # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
2359        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {         
2360            return [];
2361        }
2362
2363        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2364        {   
2365            return [];
2366        }
2367       
2368        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
2369        {   
2370            return [];
2371        }
2372    }
2373   
2374    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2375    return [$tagname => $attrHash];
2376}
2377
2378sub mxml_metadata
2379{
2380    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2381    my $metaname = $parser->{'parameters'}->{'metaname'};
2382    my $metamode = $parser->{'parameters'}->{'metamode'};
2383
2384    # Report error if we don't see FileName tag before this
2385    die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2386   
2387    # Don't do anything if we are not in the right FileSet
2388    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2389    if ($file_regexp =~ /\.\*/) {
2390    # Only interested in a file_regexp if it specifies precisely one
2391    # file. 
2392    # So, skip anything with a .* in it as it is too general
2393##  print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
2394    return [$tagname => $attrHash];
2395    }
2396    my $src_file = $parser->{'parameters'}->{'src_file'};
2397    if (!($src_file =~ /$file_regexp/)) {
2398    return [$tagname => $attrHash];
2399    }
2400##    print STDERR "*** mxl metamode = $metamode\n";
2401
2402    # Find the right metadata tag and checks if we are going to override it
2403    my $name_attr = $attrHash->{'name'};
2404    if (($name_attr eq $metaname) && ($metamode eq "override")) {
2405
2406    # now metadata.xml functions need to keep track of metapos
2407    if (!defined $parser->{'parameters'}->{'poscount'})
2408    {
2409        $parser->{'parameters'}->{'poscount'} = 0;
2410    }
2411    else
2412    {
2413        $parser->{'parameters'}->{'poscount'}++;
2414    }
2415
2416    # If either the metapos or prevmetavalue is set,
2417        # get the value and override the current value
2418    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2419
2420    if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2421    {
2422        $attrHash->{'_content'} = $metavalue;
2423
2424        ##  print STDERR "**** overriding metadata.xml\n";
2425       
2426        # Don't want it to wipe out any other pieces of metadata
2427        $parser->{'parameters'}->{'metamode'} = "done";
2428    }
2429    elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2430    {
2431        $attrHash->{'_content'} = $metavalue;
2432        $parser->{'parameters'}->{'metamode'} = "done";
2433    }
2434    }
2435
2436    # mxml_description will process the metadata if metadata is accumulate,
2437    # or if we haven't found the metadata to override
2438
2439    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2440    return [$tagname => $attrHash];
2441}
2442
2443
2444sub mxml_description
2445{
2446    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2447    my $metamode = $parser->{'parameters'}->{'metamode'};   
2448
2449    # Failed... Report error if we don't see FileName tag before this
2450    die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2451
2452    # Don't do anything if we are not in the right FileSet
2453    my $file_regexp = $parser->{'parameters'}->{'current_file'};
2454    if ($file_regexp =~ m/\.\*/) {
2455    # Only interested in a file_regexp if it specifies precisely one
2456    # file. 
2457    # So, skip anything with a .* in it as it is too general
2458    return [$tagname => $attrHash];
2459    }
2460    my $src_file = $parser->{'parameters'}->{'src_file'};
2461   
2462    if (!($src_file =~ m/$file_regexp/)) {
2463    return [$tagname => $attrHash];
2464    }
2465
2466    # Accumulate the metadata block to the end of the description block
2467    # Note: This adds metadata block to all description blocks, so if there are
2468    # multiple FileSets, it will add to all of them
2469    if (($metamode eq "accumulate") || ($metamode eq "override")) {
2470
2471    # if metamode was "override" but get to here then it failed to
2472    # find an item to override, in which case it should append its
2473    # value to the end, just like the "accumulate" mode
2474
2475    if ($metamode eq "override") {
2476        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2477    }
2478
2479    # tack a new metadata tag on to the end of the <Metadata>+ block
2480    my $metaname = $parser->{'parameters'}->{'metaname'};
2481    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2482   
2483    my $metadata_attr = { '_content' => $metavalue,
2484                  'name'     => $metaname,
2485                  'mode'     => "accumulate" };
2486
2487    my $append_metadata = [ "Metadata" => $metadata_attr ];
2488    my $description_content = $attrHash->{'_content'};
2489   
2490##  print STDERR "*** appending to metadata.xml\n";
2491
2492    # append the new metadata element to the end of the current
2493    # content contained inside this tag
2494    if (ref($description_content) eq "") {
2495        # => string or numeric literal
2496        # this is caused by a <Description> block has no <Metadata> child elements
2497        # => set up an empty array in '_content'
2498        $attrHash->{'_content'} = [ "\n" ];
2499        $description_content = $attrHash->{'_content'};
2500    }
2501
2502    push(@$description_content,"    ", $append_metadata ,"\n        ");
2503    $parser->{'parameters'}->{'metamode'} = "done";
2504    }
2505
2506    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2507    return [$tagname => $attrHash];
2508}
2509
2510
2511sub mxml_filename
2512{
2513    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2514
2515    # Store the filename of the Current Fileset
2516    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2517    # FileName tag must come before Description tag
2518    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2519
2520    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2521    return [$tagname => $attrHash];
2522}
2523
2524
2525sub mxml_fileset
2526{
2527    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2528
2529    # Initilise the current_file
2530    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2531    # FileName tag must come before Description tag
2532    $parser->{'parameters'}->{'current_file'} = "";
2533
2534    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2535    return [$tagname => $attrHash];
2536}
2537
2538sub mxml_directorymetadata
2539{
2540    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2541
2542    # if we haven't processed the metadata when we reach the end of metadata.xml
2543    # it's because there's no particular FileSet element whose FileName matched
2544    # In which case, add a new FileSet for this FileName
2545    my $metamode = $parser->{'parameters'}->{'metamode'};
2546    if($metamode ne "done") {
2547   
2548    if ($metamode eq "override") {
2549        print "No metadata value to override.  Switching 'metamode' to accumulate\n";
2550    }
2551
2552    # If we get to here and metamode is override, this means there
2553    # was no existing value to overide => treat as an append operation
2554   
2555    # Create a new FileSet element and append to DirectoryMetadata
2556    # <FileSet>
2557    # <FileName>src_file</FileName>
2558    # <Description>
2559    # <Metadata mode="" name="">metavalue</Metadata>
2560    # </Description>
2561    # </FileSet>
2562    my $src_file = $parser->{'parameters'}->{'src_file'};
2563    my $metaname = $parser->{'parameters'}->{'metaname'};
2564    my $metavalue = $parser->{'parameters'}->{'metavalue'};
2565    my $metadata_attr = {
2566        '_content' => $metavalue,
2567        'name' => $metaname,
2568        'mode' => "accumulate"
2569    };
2570    my $append_metadata = [ "Metadata" => $metadata_attr ];
2571    my $description_attr = { '_content' => [ "\n\t\t   ", $append_metadata, "\n\t\t"] };
2572    my $description_element = [ "Description" => $description_attr ];
2573
2574    #_content is not an attribute, it's special and holds the children of this element
2575    # including the textnode value embedded in this element if any.
2576    my $filename_attr = {'_content' => $src_file};
2577    my $filename_element = [ "FileName" => $filename_attr ];
2578
2579    my $fileset_attr = {};
2580    $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2581    my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2582   
2583   
2584    # get children of dirmeta, and push the new FileSet element onto it
2585    print "Appending metadata to metadata.xml\n";
2586    my $dirmeta_content = $attrHash->{'_content'};
2587    if (ref($dirmeta_content)) {
2588        # got some existing interesting nested content
2589        #push(@$dirmeta_content, "    ", $fileset ,"\n        ");
2590        push(@$dirmeta_content, "\t", $fileset ,"\n");
2591    }
2592    else {
2593        #description_content is most likely a string such as "\n"
2594        #$attrHash->{'_content'} = [$dirmeta_content, "    ", $fileset ,"\n" ];
2595        $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2596    }   
2597
2598    $parser->{'parameters'}->{'metamode'} = "done";
2599    }
2600    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2601    return [$tagname => $attrHash];
2602}
2603
2604
2605sub edit_metadata_xml
2606{
2607    my $self = shift @_;
2608    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2609
2610    # Set the call-back functions for the metadata tags
2611    my @rules =
2612    ( _default => 'raw',
2613          'FileName' => \&mxml_filename,
2614      'Metadata' => \&mxml_metadata,
2615      'Description' => \&mxml_description,
2616          'FileSet' => \&mxml_fileset,
2617      'DirectoryMetadata' => \&mxml_directorymetadata);
2618
2619    # use XML::Rules to add it in (read in and out again)
2620    my $parser = XML::Rules->new(rules => \@rules,
2621                 style => 'filter',
2622                                 output_encoding => 'utf8',
2623                 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2624
2625    if (!-e $metadata_xml_filename) {
2626   
2627        if (open(MOUT,">$metadata_xml_filename")) {
2628           
2629            my $src_file_re = &util::filename_to_regex($src_file);
2630            # shouldn't the following also be in the above utility routine??
2631            # $src_file_re =~ s/\./\\./g;
2632       
2633            print MOUT "<?xml version=\"1.0\"?>\n";
2634            print MOUT "<DirectoryMetadata>\n";
2635            print MOUT " <FileSet>\n";
2636            print MOUT "  <FileName>$src_file_re</FileName>\n";
2637            print MOUT "  <Description>\n";
2638            print MOUT "  </Description>\n";
2639            print MOUT " </FileSet>\n";
2640            print MOUT "</DirectoryMetadata>\n";
2641
2642            close(MOUT);
2643        }
2644        else {
2645            $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2646        }
2647    }
2648   
2649   
2650    my $xml_in = "";
2651    if (!open(MIN,"<$metadata_xml_filename")) {
2652        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2653    }
2654    else {
2655        # Read them in
2656        my $line;
2657        while (defined ($line=<MIN>)) {
2658            $xml_in .= $line;
2659        }
2660        close(MIN);
2661
2662        # Filter with the call-back functions
2663        my $xml_out = "";
2664
2665        my $MOUT;
2666        if (!open($MOUT,">$metadata_xml_filename")) {
2667            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2668        }
2669        else {
2670            binmode($MOUT,":utf8");
2671
2672            # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2673            # At the moment, I will just hack it!
2674            #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2675            #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2676            #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2677            #print MOUT $xml_out;
2678
2679            $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2680                              metapos => $metapos,
2681                      metavalue => $metavalue,
2682                      metamode => $metamode,
2683                      src_file => $src_file,
2684                      prevmetavalue => $prevmetavalue,
2685                      current_file => undef} );
2686            close($MOUT);       
2687        }
2688   }
2689}
2690
2691
2692sub set_import_metadata
2693{
2694    my $self = shift @_;
2695   
2696    my $username  = $self->{'username'};
2697    my $collect   = $self->{'collect'};
2698    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2699   
2700    if ($baseaction::authentication_enabled) {
2701    # Ensure the user is allowed to edit this collection
2702    $self->authenticate_user($username, $collect);
2703    }
2704
2705    # Make sure the collection isn't locked by someone else
2706    $self->lock_collection($username, $collect);
2707 
2708    $self->_set_import_metadata(@_);
2709
2710    # Release the lock once it is done
2711    $self->unlock_collection($username, $collect);
2712   
2713}
2714
2715sub set_import_metadata_array
2716{
2717    my $self = shift @_;
2718
2719    my $username  = $self->{'username'};
2720    my $collect   = $self->{'collect'};
2721    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2722#    my $gsdlhome  = $self->{'gsdlhome'};
2723
2724    if ($baseaction::authentication_enabled) {
2725    # Ensure the user is allowed to edit this collection   
2726    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2727    }
2728
2729    my $site = $self->{'site'};
2730    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2731   
2732    $gsdl_cgi->checked_chdir($collect_dir);
2733
2734    # Make sure the collection isn't locked by someone else
2735    $self->lock_collection($username, $collect);
2736
2737    $self->_set_import_metadata_array(@_);
2738
2739    # Release the lock once it is done
2740    $self->unlock_collection($username, $collect);
2741
2742}
2743
2744
2745sub _set_import_metadata_array
2746{
2747    my $self = shift @_;
2748
2749    my $collect   = $self->{'collect'};
2750    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2751
2752    my $site = $self->{'site'};
2753    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2754   
2755    # look up additional args
2756   
2757    my $infodbtype = $self->{'infodbtype'};
2758   
2759    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");   
2760    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2761   
2762    my $json_str = $self->{'json'};
2763    my $doc_array = decode_json $json_str;
2764   
2765    my $global_status = 0;
2766    my $global_mess = "";
2767   
2768    my @all_docids = ();
2769   
2770    foreach my $doc_array_rec ( @$doc_array )
2771    {
2772    my $status = -1;
2773    my $docid = $doc_array_rec->{'docid'};
2774   
2775    my ($docid_root,$docid_secnum);
2776    if(defined $docid) {   
2777        ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);   
2778        # as yet no support for setting subsection metadata in metadata.xml
2779        if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2780        $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2781        next; # skip this docid in for loop
2782        }
2783    }
2784
2785    push(@all_docids,$docid); # docid_root rather
2786   
2787    my $metaname = $doc_array_rec->{'metaname'};
2788    if (defined $metaname) {
2789        my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2790        my $metavalue = $doc_array_rec->{'metavalue'};
2791        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2792
2793        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2794        # make "accumulate" the default (less destructive, as won't actually
2795        # delete any existing values)
2796        $metamode = "accumulate";
2797        }
2798
2799        # adding metapos and prevmetavalue support to import_metadata subroutines
2800        my $metapos   = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2801        my $prevmetavalue = $self->{'prevmetavalue'};
2802
2803        $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2804       
2805    } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2806        my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2807       
2808        foreach my $metatable_rec ( @$metatable ) {
2809        $metaname  = $metatable_rec->{'metaname'};
2810        my $metamode  = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2811        if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2812            # make "accumulate" the default (less destructive, as won't actually
2813            # delete any existing values)
2814            $metamode = "accumulate";
2815        }
2816
2817        # No support for metapos and prevmetavalue in the JSON metatable substructure
2818        my $metapos = undef;
2819        my $prevmetavalue = undef;
2820        my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2821       
2822        foreach my $metavalue ( @$metavals ) {
2823            $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2824
2825            $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2826            if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2827            # need to accumulate subsequent metavals for this metaname, else the just-assigned
2828            # metavalue for this metaname will be lost
2829            $metamode = "accumulate";
2830            }
2831        }
2832        }
2833    }       
2834    }
2835
2836    # always a success message
2837    my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2838    $gsdl_cgi->generate_ok_message($mess);
2839}
2840
2841# always returns true (1)
2842sub set_import_metadata_entry
2843{
2844    my $self = shift @_;
2845    my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2846
2847    my $info_mess = <<RAWEND;
2848****************************
2849  set_import_metadata_entry()
2850****************************
2851RAWEND
2852
2853    $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2854    $info_mess .= " collect     = $collect\n"     if defined($collect);
2855    $info_mess .= " infodbtype  = $infodbtype\n"  if defined($infodbtype);
2856    $info_mess .= " arcinfo_doc_filename  = $arcinfo_doc_filename\n"  if defined($arcinfo_doc_filename);
2857    $info_mess .= " docid       = $docid\n"       if defined($docid);
2858    $info_mess .= " metaname    = $metaname\n"    if defined($metaname);
2859    $info_mess .= " metapos     = $metapos\n"     if defined($metapos);
2860    $info_mess .= " metavalue   = $metavalue\n"   if defined($metavalue);
2861    $info_mess .= " metamode    = $metamode\n"    if defined($metamode);
2862    $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2863     
2864    $info_mess .= "****************************\n";
2865
2866    $gsdl_cgi->generate_message($info_mess);
2867
2868    # import works with metadata.xml which can have inherited metadata
2869    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2870    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2871    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2872    # what they're doing if they provide a metapos.
2873    if(defined $metapos) {
2874    print STDERR "@@@@ WARNING: metapos defined.\n";
2875    print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2876    }
2877
2878    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2879    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2880    my $metadata_xml_file;
2881    my $import_filename = undef;
2882   
2883    if (defined $docid) {
2884    # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2885    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2886
2887    # This now stores the full pathname
2888    $import_filename = $doc_rec->{'src-file'}->[0];
2889    $import_filename = &util::placeholders_to_abspath($import_filename);
2890
2891    } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2892         # as the array version of the method doesn't support the -f parameter yet
2893    my $import_file  = $self->{'f'};
2894    $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2895    }
2896   
2897    # figure out correct metadata.xml file [?]
2898    # Assuming the metadata.xml file is next to the source file
2899    # Note: This will not work if it is using the inherited metadata from the parent folder
2900    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2901    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2902   
2903    # If we're overriding everything, then $prevmetavalue=undefined and
2904    # $metamode=override combined with $metapos=undefined
2905    # in which case we need to remove all metavalues for the metaname at the given (sub)section
2906    # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2907    if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2908##  print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2909    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2910
2911    }
2912
2913    # Edit the metadata.xml
2914    # Modified by Jeffrey from DL Consulting
2915    # Handle the case where there is one metadata.xml file for multiple FileSets
2916    # The XML filter needs to know whether it is in the right FileSet
2917    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2918    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2919    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2920                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2921    #return 0;
2922    return $metadata_xml_filename;
2923}
2924
2925sub _remove_import_metadata
2926{
2927    my $self = shift @_;
2928
2929    my $collect   = $self->{'collect'};
2930    my $gsdl_cgi  = $self->{'gsdl_cgi'};
2931#   my $gsdlhome  = $self->{'gsdlhome'};
2932    my $infodbtype = $self->{'infodbtype'};
2933   
2934    # Obtain the collect dir
2935    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2936    my $site = $self->{'site'};
2937    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2938   
2939    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2940    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2941
2942    # look up additional args
2943    my $docid = $self->{'d'};
2944    if ((!defined $docid) || ($docid =~ m/^\s*$/))
2945    {
2946        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2947    }
2948   
2949    my $metaname = $self->{'metaname'};
2950    my $metapos = $self->{'metapos'};
2951    my $metavalue = $self->{'metavalue'};
2952    if(defined $metavalue) {
2953        $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2954    } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2955        $metapos = 0;
2956    }
2957    my $metamode = $self->{'metamode'} || undef;
2958
2959    # import works with metadata.xml which can have inherited metadata
2960    # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2961    # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2962    # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2963    # what they're doing if they provide a metapos.
2964    if(defined $metapos) {
2965        print STDERR "@@@@ WARNING: metapos defined.\n";
2966        print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2967    }
2968   
2969    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2970    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2971    my $metadata_xml_file;
2972    my $import_filename = undef;
2973    if (defined $docid)
2974    {
2975        my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2976        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2977
2978        # This now stores the full pathname
2979        $import_filename = $doc_rec->{'src-file'}->[0];
2980        $import_filename = &util::placeholders_to_abspath($import_filename);
2981    }
2982
2983    if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2984    {
2985        $gsdl_cgi->generate_error("There is no metadata\n");
2986    }
2987   
2988    # figure out correct metadata.xml file [?]
2989    # Assuming the metadata.xml file is next to the source file
2990    # Note: This will not work if it is using the inherited metadata from the parent folder
2991    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2992    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2993   
2994    $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2995   
2996    my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2997    $mess .= "  $metaname";
2998    $mess .= " = $metavalue\n";
2999   
3000    $gsdl_cgi->generate_ok_message($mess);
3001
3002    #return $status; # in case calling functions have a use for this
3003}
3004
3005sub remove_import_metadata
3006{
3007    my $self = shift @_;
3008   
3009    my $username = $self->{'username'};
3010    my $collect   = $self->{'collect'};
3011    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3012   
3013    if ($baseaction::authentication_enabled) {
3014        # Ensure the user is allowed to edit this collection       
3015        $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3016    }
3017
3018    # Make sure the collection isn't locked by someone else
3019    $self->lock_collection($username, $collect);
3020   
3021    $self->_remove_import_metadata(@_);
3022
3023    # Release the lock once it is done
3024    $self->unlock_collection($username, $collect);
3025
3026}
3027
3028sub remove_from_metadata_xml
3029{
3030    my $self = shift @_;
3031    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
3032    # metamode generally has no meaning for removing meta, but is used by set_meta
3033    # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
3034
3035    # Set the call-back functions for the metadata tags
3036    my @rules =
3037    (
3038        _default => 'raw',
3039        'Metadata' => \&rfmxml_metadata,
3040        'FileName' => \&mxml_filename
3041    );
3042       
3043    my $parser = XML::Rules->new
3044    (
3045        rules => \@rules,
3046        style => 'filter',
3047        output_encoding => 'utf8',
3048     #normalisespaces => 1,
3049            stripspaces => 2|0|0 # ineffectual
3050    );
3051   
3052    my $xml_in = "";
3053    if (!open(MIN,"<$metadata_xml_filename"))
3054    {
3055        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
3056    }
3057    else
3058    {
3059        # Read them in
3060        my $line;
3061        while (defined ($line=<MIN>)) {
3062            $xml_in .= $line;
3063        }
3064        close(MIN);
3065
3066        # Filter with the call-back functions
3067        my $xml_out = "";
3068
3069        my $MOUT;
3070        if (!open($MOUT,">$metadata_xml_filename")) {
3071            $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
3072        }
3073        else {
3074            binmode($MOUT,":utf8");
3075            $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
3076            close($MOUT);       
3077        }
3078    }
3079}
3080
3081sub rfmxml_metadata
3082{
3083    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
3084
3085    # metadata.xml does not handle subsections
3086
3087    # since metadata.xml now has to deal with metapos, we keep track of the metadata position
3088    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
3089        && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
3090    {
3091        if (!defined $parser->{'parameters'}->{'poscount'})
3092        {
3093            $parser->{'parameters'}->{'poscount'} = 0;
3094        }
3095        else
3096        {
3097            $parser->{'parameters'}->{'poscount'}++;
3098        }
3099
3100        # if overriding but no metapos, then clear all the meta for this metaname
3101        if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
3102            return [];
3103        }
3104   
3105        if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
3106        {
3107            return [];
3108        }
3109        if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
3110        {
3111            return [];
3112        }       
3113    }
3114
3115    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
3116    return [$tagname => $attrHash];
3117}
3118
3119sub _remove_live_metadata
3120{
3121    my $self = shift @_;
3122
3123    my $collect   = $self->{'collect'};
3124    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3125#    my $gsdlhome  = $self->{'gsdlhome'};
3126    my $infodbtype = $self->{'infodbtype'};
3127
3128    # Obtain the collect dir
3129    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3130    my $site = $self->{'site'};
3131    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3132
3133   
3134    # look up additional args
3135    my $docid     = $self->{'d'};
3136    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3137      $gsdl_cgi->generate_error("No docid (d=...) specified.");
3138    }
3139   
3140    # Generate the dbkey
3141    my $metaname  = $self->{'metaname'};
3142    my $dbkey = "$docid.$metaname";
3143
3144    # To people who know $collect_tail please add some comments
3145    # Obtain the live gdbm_db path
3146    my $collect_tail = $collect;
3147    $collect_tail =~ s/^.*[\/|\\]//;
3148    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3149    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
3150
3151    # Remove the key
3152    my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
3153    my $status = system($cmd);
3154    if ($status != 0) {
3155        # Catch error if gdbmdel failed
3156    my $mess = "Failed to set metadata key: $dbkey\n";
3157   
3158    $mess .= "PATH: $ENV{'PATH'}\n";
3159    $mess .= "cmd = $cmd\n";
3160    $mess .= "Exit status: $status\n";
3161    $mess .= "System Error Message: $!\n";
3162
3163    $gsdl_cgi->generate_error($mess);
3164    }
3165    else {
3166    $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
3167    }
3168
3169}
3170
3171sub remove_live_metadata
3172{
3173    my $self = shift @_;
3174
3175    my $username  = $self->{'username'};
3176    my $collect   = $self->{'collect'};
3177    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3178    my $gsdlhome  = $self->{'gsdlhome'};
3179   
3180    if ($baseaction::authentication_enabled) {
3181    # Ensure the user is allowed to edit this collection   
3182    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3183    }
3184
3185    # Make sure the collection isn't locked by someone else
3186    $self->lock_collection($username, $collect);
3187
3188    $self->_remove_live_metadata(@_);
3189
3190    $self->unlock_collection($username, $collect);
3191}
3192
3193sub remove_metadata
3194{
3195    my $self = shift @_;
3196
3197    my $where = $self->{'where'};
3198    if(!$where) {
3199    $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
3200    return;
3201    }
3202
3203    my $username  = $self->{'username'};
3204    my $collect   = $self->{'collect'};
3205    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3206
3207    if ($baseaction::authentication_enabled) {
3208    # Ensure the user is allowed to edit this collection   
3209    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3210    }
3211
3212    # Make sure the collection isn't locked by someone else
3213    $self->lock_collection($username, $collect);
3214
3215    # check which directories need to be processed, specified in $where as
3216    # any combination of import|archives|index|live
3217    if($where =~ m/import/) {
3218    $self->_remove_import_metadata(@_);     
3219    }
3220    if($where =~ m/archives/) {
3221    $self->_remove_archives_metadata(@_);       
3222   }
3223    if($where =~ m/index/) {
3224    $self->_remove_index_metadata(@_);     
3225    }
3226
3227    # Release the lock once it is done
3228    $self->unlock_collection($username, $collect);
3229}
3230
3231# the internal version, without authentication
3232sub _remove_index_metadata
3233{   
3234    my $self = shift @_;
3235
3236    my $collect   = $self->{'collect'};
3237    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3238#    my $gsdlhome  = $self->{'gsdlhome'};
3239    my $infodbtype = $self->{'infodbtype'};
3240   
3241    # Obtain the collect dir
3242    my $site = $self->{'site'};
3243    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3244    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3245
3246        # look up additional args
3247    my $docid     = $self->{'d'};
3248    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3249      $gsdl_cgi->generate_error("No docid (d=...) specified.");
3250    }
3251    my $metaname  = $self->{'metaname'};
3252    my $metapos   = $self->{'metapos'};
3253    my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
3254
3255    # To people who know $collect_tail please add some comments
3256    # Obtain the path to the database
3257    my $collect_tail = $collect;
3258    $collect_tail =~ s/^.*[\/|\\]//;
3259    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3260    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3261
3262    # Read the docid entry
3263    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3264
3265    # Check to make sure the key does exist
3266    if (!defined ($doc_rec->{$metaname})) {
3267        $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
3268    }
3269
3270    # Obtain the specified metadata pos
3271    # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
3272    if(!defined $metapos && !defined $metavalue) {
3273        $metapos = 0;
3274    }
3275   
3276
3277    # consider check key is defined before deleting?
3278    # Loop through the metadata array and ignore the specified position
3279    my $filtered_metadata = [];
3280    my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});   
3281    for (my $i=0; $i<$num_metadata_vals; $i++) {
3282    my $metaval = shift(@{$doc_rec->{$metaname}});
3283
3284    if (!defined $metavalue && $i != $metapos) {
3285        push(@$filtered_metadata,$metaval);
3286    }
3287   
3288    if(defined $metavalue && !($metavalue eq $metaval))
3289    {
3290        push(@$filtered_metadata,$metaval);
3291    }
3292    }
3293    $doc_rec->{$metaname} = $filtered_metadata;
3294
3295    ## Use the dbutil set_entry method instead of assuming the database is gdbm
3296    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3297
3298    if ($status != 0) {
3299    my $mess = "Failed to set metadata key: $docid\n";
3300   
3301    $mess .= "PATH: $ENV{'PATH'}\n";
3302    $mess .= "Exit status: $status\n";
3303    $mess .= "System Error Message: $!\n";
3304
3305    $gsdl_cgi->generate_error($mess);
3306    }
3307    else {
3308    my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
3309    $mess .= "  $metaname";
3310    $mess .= "->[$metapos]" if (defined $metapos);
3311    $mess .= " ($metavalue)" if (defined $metavalue);
3312
3313    $gsdl_cgi->generate_ok_message($mess);
3314    }
3315
3316    #return $status; # in case calling functions have a use for this
3317}
3318
3319sub remove_index_metadata
3320{
3321    my $self = shift @_;
3322
3323    my $username  = $self->{'username'};
3324    my $collect   = $self->{'collect'};
3325    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3326#    my $gsdlhome  = $self->{'gsdlhome'};
3327   
3328    if ($baseaction::authentication_enabled) {
3329    # Ensure the user is allowed to edit this collection   
3330    $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3331    }
3332
3333    # Obtain the collect dir
3334    my $site = $self->{'site'};
3335    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3336    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3337
3338    # Make sure the collection isn't locked by someone else
3339    $self->lock_collection($username, $collect);
3340
3341    $self->_remove_index_metadata(@_);
3342
3343    # Release the lock once it is done
3344    $self->unlock_collection($username, $collect);
3345}
3346
3347
3348# Was trying to reused the codes, but the functions need to be broken
3349# down more before they can be reused, otherwise there will be too
3350# much overhead and duplicate process...
3351sub insert_metadata
3352{
3353    my $self = shift @_;
3354   
3355    my $username  = $self->{'username'};
3356    my $collect   = $self->{'collect'};
3357    my $gsdl_cgi  = $self->{'gsdl_cgi'};
3358    my $gsdlhome  = $self->{'gsdlhome'};
3359    my $infodbtype = $self->{'infodbtype'};
3360   
3361    # If the import metadata and gdbm database have been updated, we
3362    # need to insert some notification to warn user that the the text
3363    # they see at the moment is not indexed and require a rebuild.
3364    my $rebuild_pending_macro = "_rebuildpendingmessage_";
3365
3366    if ($baseaction::authentication_enabled) {
3367    # Ensure the user is allowed to edit this collection
3368    $self->authenticate_user($username, $collect);
3369    }
3370
3371    # Obtain the collect and archive dir   
3372    my $site = $self->{'site'};
3373    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3374    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3375    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3376
3377    # Make sure the collection isn't locked by someone else
3378    $self->lock_collection($username, $collect);
3379   
3380    # Check additional args
3381    my $docid = $self->{'d'};
3382    if (!defined($docid)) {
3383    $gsdl_cgi->generate_error("No document id is specified: d=...");
3384    }
3385    my $metaname = $self->{'metaname'};
3386    if (!defined($metaname)) {
3387    $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3388    }
3389    my $metavalue = $self->{'metavalue'};
3390    if (!defined($metavalue) || $metavalue eq "") {
3391    $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3392    }
3393    # make "accumulate" the default (less destructive, as won't actually
3394    # delete any existing values)
3395    my $metamode = "accumulate";
3396
3397    # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3398    my $metapos   = undef;
3399    my $prevmetavalue = undef;
3400
3401    #=======================================================================#
3402    # set_import_metadata [START]
3403    #=======================================================================#
3404    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3405    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3406    my $metadata_xml_file;
3407    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3408    my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3409   
3410    # This now stores the full pathname
3411    my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3412    $import_filename = &util::placeholders_to_abspath($import_filename);
3413   
3414    # figure out correct metadata.xml file [?]
3415    # Assuming the metadata.xml file is next to the source file
3416    # Note: This will not work if it is using the inherited metadata from the parent folder
3417    my ($import_tailname, $import_dirname)
3418    = File::Basename::fileparse($import_filename);
3419    my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3420
3421    # Shane's escape characters
3422    $metavalue = pack "U0C*", unpack "C*", $metavalue;
3423    $metavalue =~ s/\,/&#44;/g;
3424    $metavalue =~ s/\:/&#58;/g;
3425    $metavalue =~ s/\|/&#124;/g;
3426    $metavalue =~ s/\(/&#40;/g;
3427    $metavalue =~ s/\)/&#41;/g;
3428    $metavalue =~ s/\[/&#91;/g;
3429    $metavalue =~ s/\\/&#92;/g;
3430    $metavalue =~ s/\]/&#93;/g;
3431    $metavalue =~ s/\{/&#123;/g;
3432    $metavalue =~ s/\}/&#125;/g;
3433    $metavalue =~ s/\"/&#34;/g;
3434    $metavalue =~ s/\`/&#96;/g;
3435    $metavalue =~ s/\n/_newline_/g;
3436
3437    # Edit the metadata.xml
3438    # Modified by Jeffrey from DL Consulting
3439    # Handle the case where there is one metadata.xml file for multiple FileSets
3440    # The XML filter needs to know whether it is in the right FileSet
3441    # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3442    # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3443    $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3444                 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3445    #=======================================================================#
3446    # set_import_metadata [END]
3447    #=======================================================================#
3448
3449
3450    #=======================================================================#
3451    # set_metadata (accumulate version) [START]
3452    #=======================================================================#
3453    # To people who know $collect_tail please add some comments
3454    # Obtain path to the database
3455    my $collect_tail = $collect;
3456    $collect_tail =~ s/^.*[\/|\\]//;
3457    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3458    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3459
3460    # Read the docid entry
3461    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3462
3463    # Protect the quotes
3464    $metavalue =~ s/\"/\\\"/g;
3465
3466    # Adds the pending macro
3467    my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3468
3469    # If the metadata doesn't exist, create a new one
3470    if (!defined($doc_rec->{$metaname})){   
3471    $doc_rec->{$metaname} = [ $macro_metavalue ];
3472    }
3473    # Else, let's acculumate the values
3474    else {
3475        push(@{$doc_rec->{$metaname}},$macro_metavalue);
3476    }
3477
3478    ## Use the dbutil set_entry method instead of assuming the database is gdbm
3479    my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3480
3481    if ($status != 0) {
3482        # Catch error if gdbmget failed
3483    my $mess = "Failed to set metadata key: $docid\n";
3484   
3485    $mess .= "PATH: $ENV{'PATH'}\n";
3486    $mess .= "Exit status: $status\n";
3487    $mess .= "System Error Message: $!\n";
3488
3489    $gsdl_cgi->generate_error($mess);
3490    }
3491    else {
3492    my $mess = "insert-metadata successful: Key[$docid]\n";
3493    $mess .= "  [In metadata.xml] $metaname";
3494    $mess .= " = $metavalue\n";
3495    $mess .= "  [In database] $metaname";
3496    $mess .= " = $macro_metavalue\n";
3497    $mess .= "  The new text has not been indexed, rebuilding collection is required\n";
3498        $gsdl_cgi->generate_ok_message($mess);
3499    }   
3500    #=======================================================================#
3501    # set_metadata (accumulate version) [END]
3502    #=======================================================================#
3503
3504    # Release the lock once it is done
3505    $self->unlock_collection($username, $collect);
3506}
3507
35081;
Note: See TracBrowser for help on using the browser.