source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 31589

Last change on this file since 31589 was 31589, checked in by ak19, 7 years ago

Bugfixes I think. Noticed that remove-archives-meta was removing the existing two metavalues for a metaname instead of a specific one despite being given a specific metavalue to remove. This seems to have happened because metapos was not undefined yet did not have a proper value (being the empty string or a string of empty chars), and so metapos was being set to 0. In that case the metavalue at metapos 0 AND the metavalue specified were both being removed. I then noticed the same issue going on with remove-index-meta too, and then noticed that set-index-meta wasn't working the way I expected it to either. All of these had to do with incoming args like metapos and metavalue only being tested for undefinedness rather than being tested for whether they were defined yet consisted of a string of one or more empty spaces.

File size: 121.6 KB
Line 
1##########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ "metapos" ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-index-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
67
68 "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69 'compulsory-args' => [ "json" ],
70 'optional-args' => [ "where" ],
71 'help-string' => [
72 'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73 ]},
74
75 #SET METHODS
76 "set-live-metadata" => {
77 'compulsory-args' => [ "d", "metaname", "metavalue" ],
78 'optional-args' => [ ] },
79
80 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81 'compulsory-args' => [ "metaname", "metavalue" ],
82 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84 "set-index-metadata" => {
85 'compulsory-args' => [ "d", "metaname", "metavalue" ],
86 'optional-args' => [ "metapos", "metamode" ] },
87
88 "set-archives-metadata" => {
89 'compulsory-args' => [ "d", "metaname", "metavalue" ],
90 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91
92 "set-import-metadata" => {
93 'compulsory-args' => [ "metaname", "metavalue" ],
94 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
95
96 #SET METHODS (ARRAY)
97 "set-metadata-array" => {
98 'compulsory-args' => [ "where", "json" ],
99 'optional-args' => [ ],
100 'help-string' => [
101 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102
103 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
110
111 "set-archives-metadata-array" => {
112 'compulsory-args' => [ "json" ],
113 'optional-args' => [ ] },
114
115 "set-import-metadata-array" => {
116 'compulsory-args' => [ "json" ],
117 'optional-args' => [ ] },
118
119 "set-index-metadata-array" => {
120 'compulsory-args' => [ "json" ],
121 'optional-args' => [ ] },
122
123 "set-live-metadata-array" => {
124 'compulsory-args' => [ "json" ],
125 'optional-args' => [ ] },
126
127 #REMOVE METHODS
128 "remove-import-metadata" => {
129 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
130 'optional-args' => [ "metapos", "metavalue", "metamode" ] }, # only provide metapos arg for SIMPLE collections.
131# Metavalue is now an optional arg for remove_import_metadata() based on what the implementation did, which allowed metavalue to be undefined, and if so, used metapos.
132
133 "remove-archives-metadata" => {
134 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
135 'optional-args' => [ "metapos", "metavalue", "metamode" ] },
136
137 "remove-live-metadata" => {
138 'compulsory-args' => [ "d", "metaname" ],
139 'optional-args' => [ ] },
140
141 "remove-index-metadata" => {
142 'compulsory-args' => [ "d", "metaname" ],
143 'optional-args' => [ "metapos", "metavalue" ] },
144
145 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
146 'compulsory-args' => [ "d", "metaname" ],
147 'optional-args' => [ "where", "metapos", "metavalue", "metamode" ] }, # metamode is optional since remove-metadata can call any of remove_import_meta and remove_archives_meta, remove_index_meta, of which the first two accept metamode as an optional param
148
149 #INSERT METHODS
150 "insert-metadata" => {
151 'compulsory-args' => [ "d", "metaname", "metavalue" ],
152 'optional-args' => [ ] }
153};
154
155
156sub new
157{
158 my $class = shift (@_);
159 my ($gsdl_cgi,$iis6_mode) = @_;
160
161 # Treat metavalue specially. To transmit this through a GET request
162 # the Javascript side has url-encoded it, so here we need to decode
163 # it before proceeding
164
165 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
166 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
167 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
168
169 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
170 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
171
172 # need to do the same with prevmetavalue
173 my $url_encoded_prevmetavalue = $gsdl_cgi->param("prevmetavalue");
174 my $url_decoded_prevmetavalue = &unicode::url_decode($url_encoded_prevmetavalue,1);
175 my $prevunicode_array = &unicode::utf82unicode($url_decoded_prevmetavalue);
176
177 $url_decoded_prevmetavalue = join("",map(chr($_),@$prevunicode_array));
178 $gsdl_cgi->param("prevmetavalue",$url_decoded_prevmetavalue);
179
180 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
181
182 return bless $self, $class;
183}
184
185
186sub get_live_metadata
187{
188 my $self = shift @_;
189
190 my $username = $self->{'username'};
191 my $collect = $self->{'collect'};
192 my $gsdl_cgi = $self->{'gsdl_cgi'};
193 my $gsdlhome = $self->{'gsdlhome'};
194 my $infodbtype = $self->{'infodbtype'};
195
196 # live metadata gets/saves value scoped (prefixed) by the current usename
197 # so (for now) let's not bother to enforce authentication
198
199 # Obtain the collect dir
200 my $site = $self->{'site'};
201 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
202 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
203
204 # No locking collection when getting metadata, only when setting it
205# $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
206
207 # look up additional args
208 my $docid = $self->{'d'};
209 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
210 $gsdl_cgi->generate_error("No docid (d=...) specified.");
211 }
212
213 # Generate the dbkey
214 my $metaname = $self->{'metaname'};
215 my $dbkey = "$docid.$metaname";
216
217 # To people who know $collect_tail please add some comments
218 # Obtain path to the database
219 my $collect_tail = $collect;
220 $collect_tail =~ s/^.*[\/|\\]//;
221 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
222 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
223
224 # Obtain the content of the key
225 my $cmd = "gdbmget $infodb_file_path $dbkey";
226 if (open(GIN,"$cmd |") == 0) {
227 # Catch error if gdbmget failed
228 my $mess = "Failed to get metadata key: $metaname\n";
229 $mess .= "$!\n";
230
231 $gsdl_cgi->generate_error($mess);
232 }
233 else {
234 binmode(GIN,":utf8");
235 # Read everything in and concatenate them into $metavalue
236 my $metavalue = "";
237 my $line;
238 while (defined ($line=<GIN>)) {
239 $metavalue .= $line;
240 }
241 close(GIN);
242 chomp($metavalue); # Get rid off the tailing newlines
243 $gsdl_cgi->generate_ok_message("$metavalue");
244 }
245
246 # Release the lock once it is done
247# $self->unlock_collection($username, $collect);
248}
249
250# just calls the index version
251sub get_metadata
252{
253 my $self = shift @_;
254 $self->get_index_metadata(@_);
255}
256
257# JSON version that will get the requested metadata values
258# from the requested source (index, import, archives or live)
259# One of the params is a JSON string and the return value is JSON too
260# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
261sub get_metadata_array
262{
263 my $self = shift @_;
264
265 my $where = $self->{'where'};
266 if (!$where || ($where =~ m/^\s*$/)) { # 0, "0", "" and undef are all false. All else is true.
267 # What is truth in perl: http://www.berkeleyinternet.com/perl/node11.html
268 # and http://www.perlmonks.org/?node_id=33638
269
270 $where = "index"; # default behaviour is to get the values from index
271 }
272
273 # Only when setting metadata do we perform authentication and do we lock the collection,
274 # not when getting metadata
275
276 # for get_meta_array, the where param can only be ONE of import, archives, index, live
277 if($where =~ m/index/) {
278 $self->_get_index_metadata_array(@_);
279 }
280 elsif($where =~ m/archives/) {
281 $self->_get_archives_metadata_array(@_);
282 }
283 elsif($where =~ m/import/) {
284 $self->_get_import_metadata_array(@_);
285 }
286 elsif($where =~ m/live/) {
287 $self->_get_live_metadata_array(@_);
288 }
289}
290
291# Unused at present. Added for completion. Tested.
292sub _get_import_metadata_array {
293
294 my $self = shift @_;
295
296 my $collect = $self->{'collect'};
297 my $gsdl_cgi = $self->{'gsdl_cgi'};
298 my $site = $self->{'site'};
299 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
300
301 # look up additional args
302 my $infodbtype = $self->{'infodbtype'};
303
304 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
305 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
306 my $json_str = $self->{'json'};
307 my $doc_array = decode_json $json_str;
308
309 my $json_result_str = "[";
310 my $first_doc_rec = 1;
311 foreach my $doc_array_rec ( @$doc_array ) {
312
313 my $docid = $doc_array_rec->{'docid'}; # no subsection metadata support in metadata.xml, only toplevel meta
314
315 if($first_doc_rec) {
316 $first_doc_rec = 0;
317 } else {
318 $json_result_str .= ",";
319 }
320 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
321
322 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
323 $json_result_str = $json_result_str . ",\"metatable\":[";
324
325 my $first_rec = 1;
326 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
327
328 # Read the docid entry
329 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
330 # This now stores the full pathname
331 my $import_filename = $doc_rec->{'src-file'}->[0];
332 $import_filename = &util::placeholders_to_abspath($import_filename);
333
334 # figure out correct metadata.xml file [?]
335 # Assuming the metadata.xml file is next to the source file
336 # Note: This will not work if it is using the inherited metadata from the parent folder
337 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
338 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
339
340
341 if($first_rec) {
342 $first_rec = 0;
343 } else {
344 $json_result_str .= ",";
345 }
346
347 my $metaname = $metatable_rec->{'metaname'};
348 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
349
350 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
351 if(!defined $metapos) {
352 $metapos = 0;
353 }
354
355 # Obtain the specified metadata value(s)
356 my $metavalue;
357
358 if($metapos ne "all") { # get the value at a single metapos
359 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
360
361 #print STDERR "**** Metafilename, metaname, metapos, sec_num: $metadata_xml_filename, $metaname, $metapos, $import_tailname\n";
362
363 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
364
365 } else {
366 my $first_metaval = 1;
367 $metapos = 0;
368 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
369
370 while (defined $metavalue && $metavalue ne "") {
371 if($first_metaval) {
372 $first_metaval = 0;
373 } else {
374 $json_result_str .= ",";
375 }
376
377 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
378
379 $metapos++;
380 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
381 }
382 }
383
384 $json_result_str .= "]}"; # close metavals array and metatable record
385 }
386
387 $json_result_str .= "]}"; # close metatable array and docid record
388 }
389
390 $json_result_str .= "]"; # close array of docids
391 $gsdl_cgi->generate_ok_message($json_result_str."\n");
392}
393
394# Unused method, but included for completion. Tested, works. Takes a JSON string and returns a JSON string.
395# For more information on the format of the output, see _get_index_metadata_array, which is in use.
396sub _get_archives_metadata_array {
397
398 my $self = shift @_;
399
400 my $collect = $self->{'collect'};
401 my $gsdl_cgi = $self->{'gsdl_cgi'};
402 my $site = $self->{'site'};
403 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
404
405 # look up additional args
406 my $infodbtype = $self->{'infodbtype'};
407
408 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
409 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
410
411 my $json_str = $self->{'json'};
412 my $doc_array = decode_json $json_str;
413
414 my $json_result_str = "[";
415 my $first_doc_rec = 1;
416 foreach my $doc_array_rec ( @$doc_array ) {
417
418 my $docid = $doc_array_rec->{'docid'};
419
420 if($first_doc_rec) {
421 $first_doc_rec = 0;
422 } else {
423 $json_result_str .= ",";
424 }
425 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
426
427 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
428 $json_result_str = $json_result_str . ",\"metatable\":[";
429
430 my $first_rec = 1;
431 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
432
433 my ($docid, $docid_secnum) = ($doc_array_rec->{'docid'} =~ m/^(.*?)(\..*)?$/);
434 $docid_secnum = "" if (!defined $docid_secnum);
435
436 # Read the docid entry
437 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
438 # This now stores the full pathname
439 my $doc_filename = $doc_rec->{'doc-file'}->[0];
440 $doc_filename = &util::filename_cat($archive_dir, $doc_filename);
441
442 if($first_rec) {
443 $first_rec = 0;
444 } else {
445 $json_result_str .= ",";
446 }
447
448 my $metaname = $metatable_rec->{'metaname'};
449 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
450
451 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
452 if(!defined $metapos) {
453 $metapos = 0;
454 }
455
456
457 # Obtain the specified metadata value(s)
458 my $metavalue;
459
460 if($metapos ne "all") { # get the value at a single metapos
461
462 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
463 #print STDERR "**** Docname, metaname, metapos, sec_num: $doc_filename, $metaname, $metapos, $docid_secnum\n";
464
465 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
466
467 } else {
468 my $first_metaval = 1;
469 $metapos = 0;
470 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
471
472 while (defined $metavalue && $metavalue ne "") {
473 if($first_metaval) {
474 $first_metaval = 0;
475 } else {
476 $json_result_str .= ",";
477 }
478
479 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
480
481 $metapos++;
482 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
483 }
484 }
485
486 $json_result_str .= "]}"; # close metavals array and metatable record
487 }
488
489 $json_result_str .= "]}"; # close metatable array and docid record
490 }
491
492 $json_result_str .= "]"; # close array of docids
493 $gsdl_cgi->generate_ok_message($json_result_str."\n");
494}
495
496
497# Unused at present. Added for completion. Tested, but not sure if it retrieves metadata in the manner it's expected to.
498sub _get_live_metadata_array
499{
500 my $self = shift @_;
501
502 my $collect = $self->{'collect'};
503 my $gsdl_cgi = $self->{'gsdl_cgi'};
504 my $site = $self->{'site'};
505 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
506
507 # look up additional args
508 my $infodbtype = $self->{'infodbtype'};
509
510 # To people who know $collect_tail please add some comments
511 # Obtain the path to the database
512 my $collect_tail = $collect;
513 $collect_tail =~ s/^.*[\/|\\]//;
514 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
515 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
516
517 my $json_str = $self->{'json'};
518 my $doc_array = decode_json $json_str;
519
520 my $json_result_str = "[";
521 my $first_doc_rec = 1;
522
523 foreach my $doc_array_rec ( @$doc_array ) {
524
525 my $docid = $doc_array_rec->{'docid'};
526
527 if($first_doc_rec) {
528 $first_doc_rec = 0;
529 } else {
530 $json_result_str .= ",";
531 }
532 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
533
534 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
535 $json_result_str = $json_result_str . ",\"metatable\":[";
536
537 my $first_rec = 1;
538 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
539 if($first_rec) {
540 $first_rec = 0;
541 } else {
542 $json_result_str .= ",";
543 }
544
545 my $metaname = $metatable_rec->{'metaname'};
546 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
547
548 # Generate the dbkey
549 my $dbkey = "$docid.$metaname";
550
551 # metapos for get_live_metadata is always assumed to be "all".
552 # It's always going to get all the lines of metavalues associated with a metaname
553 # (It's the metaname itself that should contain an increment number, if there are to be multiple values).
554 #my $metapos = "all";
555 my $metapos = $metatable_rec->{'metapos'} || 0; # Can be 0... 1|all|undefined. Defaults to 0 if undefined/false
556 my $metavalue = "";
557
558 # Obtain the content of the key
559 my $cmd = "gdbmget $infodb_file_path $dbkey";
560 if (open(GIN,"$cmd |") != 0) { # Success.
561
562 binmode(GIN,":utf8");
563 # Read everything in and concatenate them into $metavalue
564 my $line;
565 my $first_metaval = 1;
566 my $pos = 0;
567 while (defined ($line=<GIN>)) {
568 chomp($line); # Get rid off the tailing newlines
569
570 if($metapos eq "all") {
571 if($first_metaval) {
572 $first_metaval = 0;
573 } else {
574 $json_result_str .= ",";
575 }
576 $metavalue = $line;
577 $json_result_str .= "{\"metapos\":\"$pos\",\"metavalue\":\"$metavalue\"}";
578 } elsif($metapos == $pos) {
579 $metavalue = $line;
580 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
581 last;
582 } # else, the current $pos is not the required $metapos
583 $pos += 1;
584 }
585 close(GIN);
586 } # else open cmd == 0 (failed) and metavals array will be empty [] for this metaname
587
588 $json_result_str .= "]}"; # close metavals array and metatable record
589 }
590
591 $json_result_str .= "]}"; # close metatable array and docid record
592 }
593
594 $json_result_str .= "]"; # close array of docids
595
596 $gsdl_cgi->generate_ok_message($json_result_str."\n");
597}
598
599
600# Takes a JSON string and returns a JSON string
601# Request string is of the form:
602# http://localhost:8283/greenstone/cgi-bin/metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]
603# Resulting string is of the form:
604# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
605sub _get_index_metadata_array
606{
607 my $self = shift @_;
608
609 my $collect = $self->{'collect'};
610 my $gsdl_cgi = $self->{'gsdl_cgi'};
611 my $site = $self->{'site'};
612 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
613
614 # look up additional args
615 my $infodbtype = $self->{'infodbtype'};
616
617 # To people who know $collect_tail please add some comments
618 # Obtain the path to the database
619 my $collect_tail = $collect;
620 $collect_tail =~ s/^.*[\/|\\]//;
621 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
622 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
623
624 my $json_str = $self->{'json'};
625 my $doc_array = decode_json $json_str;
626
627 my $json_result_str = "[";
628 my $first_doc_rec = 1;
629
630 foreach my $doc_array_rec ( @$doc_array ) {
631
632 my $docid = $doc_array_rec->{'docid'};
633
634 if($first_doc_rec) {
635 $first_doc_rec = 0;
636 } else {
637 $json_result_str .= ",";
638 }
639 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
640
641 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
642 $json_result_str = $json_result_str . ",\"metatable\":[";
643
644 my $first_rec = 1;
645 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
646 if($first_rec) {
647 $first_rec = 0;
648 } else {
649 $json_result_str .= ",";
650 }
651
652 my $metaname = $metatable_rec->{'metaname'};
653 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
654
655 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
656 if(!defined $metapos) {
657 $metapos = 0;
658 }
659
660 # Read the docid entry
661 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
662
663 # Basically loop through and unescape_html the values
664 foreach my $k (keys %$doc_rec) {
665 my @escaped_v = ();
666 foreach my $v (@{$doc_rec->{$k}}) {
667 my $ev = &ghtml::unescape_html($v);
668 push(@escaped_v, $ev);
669 }
670 $doc_rec->{$k} = \@escaped_v;
671 }
672
673 # Obtain the specified metadata value(s)
674 my $metavalue;
675
676 if($metapos ne "all") { # get the value at a single metapos
677
678 $metavalue = $doc_rec->{$metaname}->[$metapos];
679
680 # protect any double quotes and colons in the metavalue before putting it into JSON
681 $metavalue =~ s/\"/&quot;/g if defined $metavalue;
682 $metavalue =~ s/\:/&58;/g if defined $metavalue;
683
684 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
685
686 } else {
687 my $first_metaval = 1;
688 $metapos = 0;
689 $metavalue = $doc_rec->{$metaname}->[$metapos];
690
691 while (defined $metavalue) {
692
693 # protect any double quotes and colons in the metavalue before putting it into JSON
694 $metavalue =~ s/\"/&quot;/g;
695 $metavalue =~ s/\:/&58;/g;
696
697 if($first_metaval) {
698 $first_metaval = 0;
699 } else {
700 $json_result_str .= ",";
701 }
702
703 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
704
705 $metapos++;
706 $metavalue = $doc_rec->{$metaname}->[$metapos];
707 }
708 }
709
710 $json_result_str .= "]}"; # close metavals array and metatable record
711 }
712
713 $json_result_str .= "]}"; # close metatable array and docid record
714 }
715
716 $json_result_str .= "]"; # close array of docids
717
718 $gsdl_cgi->generate_ok_message($json_result_str."\n");
719}
720
721
722sub get_index_metadata
723{
724 my $self = shift @_;
725
726 my $username = $self->{'username'};
727 my $collect = $self->{'collect'};
728 my $gsdl_cgi = $self->{'gsdl_cgi'};
729 my $gsdlhome = $self->{'gsdlhome'};
730
731 # Obtain the collect dir
732 my $site = $self->{'site'};
733 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
734 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
735
736 # look up additional args
737 my $docid = $self->{'d'};
738 my $metaname = $self->{'metaname'};
739 my $metapos = $self->{'metapos'};
740 my $infodbtype = $self->{'infodbtype'};
741
742 # To people who know $collect_tail please add some comments
743 # Obtain path to the database
744 my $collect_tail = $collect;
745 $collect_tail =~ s/^.*[\/|\\]//;
746 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
747 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
748
749 # Read the docid entry
750 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
751
752 # Basically loop through and unescape_html the values
753 foreach my $k (keys %$doc_rec) {
754 my @escaped_v = ();
755 foreach my $v (@{$doc_rec->{$k}}) {
756 my $ev = &ghtml::unescape_html($v);
757 push(@escaped_v, $ev);
758 }
759 $doc_rec->{$k} = \@escaped_v;
760 }
761
762 # Obtain the specified metadata value
763 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
764 my $metavalue = $doc_rec->{$metaname}->[$metapos];
765 $gsdl_cgi->generate_ok_message("$metavalue");
766
767}
768
769
770sub get_import_metadata
771{
772 my $self = shift @_;
773
774 my $username = $self->{'username'};
775 my $collect = $self->{'collect'};
776 my $gsdl_cgi = $self->{'gsdl_cgi'};
777 my $gsdlhome = $self->{'gsdlhome'};
778
779 # Obtain the collect dir
780 my $site = $self->{'site'};
781 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
782 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
783
784 # look up additional args
785 my $docid = $self->{'d'};
786 my $metaname = $self->{'metaname'};
787 my $metapos = $self->{'metapos'};
788 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/)); # gets the first value by default since metapos defaults to 0
789
790 my $infodbtype = $self->{'infodbtype'};
791 if (!defined $docid)
792 {
793 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
794 }
795
796 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
797 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
798 my $metadata_xml_file;
799 my $import_filename = undef;
800
801
802 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
803 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
804 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
805
806 # This now stores the full pathname
807 $import_filename = $doc_rec->{'src-file'}->[0];
808 $import_filename = &util::placeholders_to_abspath($import_filename);
809
810 # figure out correct metadata.xml file [?]
811 # Assuming the metadata.xml file is next to the source file
812 # Note: This will not work if it is using the inherited metadata from the parent folder
813 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
814 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
815
816 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
817
818}
819
820sub get_metadata_from_metadata_xml
821{
822 my $self = shift @_;
823 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
824
825 my @rules =
826 (
827 _default => 'raw',
828 'Metadata' => \&gfmxml_metadata,
829 'FileName' => \&mxml_filename
830 );
831
832 my $parser = XML::Rules->new
833 (
834 rules => \@rules,
835 output_encoding => 'utf8'
836 );
837
838 my $xml_in = "";
839 if (!open(MIN,"<$metadata_xml_filename"))
840 {
841 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
842 }
843 else
844 {
845 # Read them in
846 my $line;
847 while (defined ($line=<MIN>)) {
848 $xml_in .= $line;
849 }
850 close(MIN);
851
852 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
853
854 if(defined $parser->{'pad'}->{'metavalue'})
855 {
856 return $parser->{'pad'}->{'metavalue'};
857 }
858 else
859 {
860 return "";
861 }
862 }
863}
864
865sub gfmxml_metadata
866{
867 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
868
869 # no subsection support yet in metadata.xml
870
871 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
872 {
873 if (!defined $parser->{'parameters'}->{'poscount'})
874 {
875 $parser->{'parameters'}->{'poscount'} = 0;
876 }
877 else
878 {
879 $parser->{'parameters'}->{'poscount'}++;
880 }
881
882 # gets the first value by default, since metapos defaults to 0
883 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
884 {
885 if($parser->{'parameters'}->{'metapos'} > 0) {
886 print STDERR "@@@@ WARNING: non-zero metapos.\n";
887 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at position: ".$parser->{'parameters'}->{'metapos'}.".\n";
888 }
889 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
890 }
891 }
892}
893
894sub get_archives_metadata
895{
896 my $self = shift @_;
897
898 my $username = $self->{'username'};
899 my $collect = $self->{'collect'};
900 my $gsdl_cgi = $self->{'gsdl_cgi'};
901# my $gsdlhome = $self->{'gsdlhome'};
902 my $infodbtype = $self->{'infodbtype'};
903
904 # Obtain the collect dir
905 my $site = $self->{'site'};
906 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
907
908 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
909
910 # look up additional args
911 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
912 $docid_secnum = "" if (!defined $docid_secnum);
913
914 my $metaname = $self->{'metaname'};
915 my $metapos = $self->{'metapos'};
916 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
917
918 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
919 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
920
921 # This now stores the full pathname
922 my $doc_filename = $doc_rec->{'doc-file'}->[0];
923
924 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
925
926}
927
928sub get_metadata_from_archive_xml
929{
930 my $self = shift @_;
931 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
932
933 my @start_rules = ('Section' => \&dxml_start_section);
934
935 my @rules =
936 (
937 _default => 'raw',
938 'Metadata' => \&gfdxml_metadata
939 );
940
941 my $parser = XML::Rules->new
942 (
943 start_rules => \@start_rules,
944 rules => \@rules,
945 output_encoding => 'utf8'
946 );
947
948 my $xml_in = "";
949 if (!open(MIN,"<$doc_xml_filename"))
950 {
951 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
952 }
953 else
954 {
955 # Read them in
956 my $line;
957 while (defined ($line=<MIN>)) {
958 $xml_in .= $line;
959 }
960 close(MIN);
961
962 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
963
964 if(defined $parser->{'pad'}->{'metavalue'})
965 {
966 return $parser->{'pad'}->{'metavalue'};
967 }
968 else
969 {
970 return "";
971 }
972 }
973}
974
975sub gfdxml_metadata
976{
977 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
978
979 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
980 {
981 return;
982 }
983
984 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
985 {
986 if (!defined $parser->{'parameters'}->{'poscount'})
987 {
988 $parser->{'parameters'}->{'poscount'} = 0;
989 }
990 else
991 {
992 $parser->{'parameters'}->{'poscount'}++;
993 }
994 }
995
996 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
997 {
998 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
999 }
1000}
1001
1002sub _set_live_metadata
1003{
1004 my $self = shift @_;
1005
1006 my $collect = $self->{'collect'};
1007 my $gsdl_cgi = $self->{'gsdl_cgi'};
1008 #my $gsdlhome = $self->{'gsdlhome'};
1009 my $infodbtype = $self->{'infodbtype'};
1010
1011 # Obtain the collect dir
1012 my $site = $self->{'site'};
1013 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1014 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1015
1016
1017 # look up additional args
1018 my $docid = $self->{'d'};
1019 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1020 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1021 }
1022 my $metavalue = $self->{'metavalue'};
1023
1024 # Generate the dbkey
1025 my $metaname = $self->{'metaname'};
1026 my $dbkey = "$docid.$metaname";
1027
1028 # To people who know $collect_tail please add some comments
1029 # Obtain path to the database
1030 my $collect_tail = $collect;
1031 $collect_tail =~ s/^.*[\/|\\]//;
1032 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1033 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1034
1035 # Set the new value
1036 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1037 my $status = system($cmd);
1038 if ($status != 0) {
1039 # Catch error if gdbmget failed
1040 my $mess = "Failed to set metadata key: $dbkey\n";
1041
1042 $mess .= "PATH: $ENV{'PATH'}\n";
1043 $mess .= "cmd = $cmd\n";
1044 $mess .= "Exit status: $status\n";
1045 $mess .= "System Error Message: $!\n";
1046
1047 $gsdl_cgi->generate_error($mess);
1048 }
1049 else {
1050 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
1051 }
1052
1053 #return $status; # in case calling functions have any further use for this
1054}
1055
1056sub set_live_metadata
1057{
1058 my $self = shift @_;
1059
1060 my $username = $self->{'username'};
1061 my $collect = $self->{'collect'};
1062 my $gsdl_cgi = $self->{'gsdl_cgi'};
1063
1064 if ($baseaction::authentication_enabled) {
1065 # Ensure the user is allowed to edit this collection
1066 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1067 }
1068
1069 # Make sure the collection isn't locked by someone else
1070 $self->lock_collection($username, $collect);
1071
1072 $self->_set_live_metadata(@_);
1073
1074 # Release the lock once it is done
1075 $self->unlock_collection($username, $collect);
1076}
1077
1078sub set_index_metadata_entry
1079{
1080 print STDERR "1\n";
1081 my $self = shift @_;
1082 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
1083
1084 # To people who know $collect_tail please add some comments
1085 # Obtain path to the database
1086 my $collect_tail = $collect;
1087 $collect_tail =~ s/^.*[\/|\\]//;
1088 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1089 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1090
1091 print STDERR "2\n";
1092# print STDERR "**** infodb file path = $infodb_file_path\n";
1093# print STDERR "***** infodb type = $infodbtype\n";
1094
1095 # Read the docid entry
1096 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1097
1098 # Set the metadata value
1099 if (defined $metapos) {
1100 print STDERR "3\n";
1101 # if metamode=accumulate AND metapos, warn user and then use metapos
1102 if (defined $metamode && $metamode eq "accumulate") {
1103 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
1104 print STDERR "**** Proceeding by using metapos\n";
1105 }
1106 $doc_rec->{$metaname}->[$metapos] = $metavalue;
1107 }
1108 elsif (defined $prevmetavalue) {
1109 print STDERR "4\n";
1110 my $array = $doc_rec->{$metaname};
1111 my $length = @$array;
1112
1113 my $found = 0;
1114 for (my $i = 0; $i < $length; $i++){
1115 if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
1116 $doc_rec->{$metaname}->[$i] = $metavalue;
1117 $found = 1;
1118 last;
1119 }
1120 }
1121
1122 if($found == 0){
1123 $doc_rec->{$metaname} = [ $metavalue ];
1124 }
1125 }
1126 elsif (defined $metamode && $metamode eq "override") {
1127 print STDERR "5\n";
1128 $doc_rec->{$metaname} = [ $metavalue ];
1129 }
1130 else { # default for index was to override, but because accumulate is less destructive,
1131 # and because accumulate is the default for archives and import, that's the new default for index too
1132 print STDERR "6\n";
1133 if(defined $doc_rec->{$metaname}) {
1134 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
1135 } else {
1136 $doc_rec->{$metaname} = [ $metavalue ];
1137 }
1138 }
1139 print STDERR "6\n";
1140
1141 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
1142
1143 return $status;
1144
1145}
1146
1147sub _set_import_metadata
1148{
1149 my $self = shift @_;
1150
1151 my $collect = $self->{'collect'};
1152 my $gsdl_cgi = $self->{'gsdl_cgi'};
1153 my $infodbtype = $self->{'infodbtype'};
1154# my $gsdlhome = $self->{'gsdlhome'};
1155
1156 # Obtain the collect and archive dir
1157 my $site = $self->{'site'};
1158 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1159 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1160 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1161
1162 # look up additional args
1163 # want either d= or f=
1164 my $docid = $self->{'d'};
1165 my ($docid_root,$docid_secnum);
1166 if(defined $docid) {
1167 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1168 # as yet no support for setting subsection metadata in metadata.xml
1169 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
1170 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
1171 return;
1172 }
1173 }
1174
1175 my $import_file = $self->{'f'};
1176 if ((!defined $docid || $docid =~ m/^\s*$/) && (!defined $import_file || $import_file =~ m/^\s*$/)) {
1177 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified."); # at least d or f must be specified
1178 }
1179
1180 # Get the parameters and set default mode to "accumulate"
1181 my $metaname = $self->{'metaname'};
1182 my $metavalue = $self->{'metavalue'};
1183## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1184 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1185
1186 my $metamode = $self->{'metamode'};
1187 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1188 # make "accumulate" the default (less destructive, as it won't actually
1189 # delete any existing values)
1190 $metamode = "accumulate";
1191 }
1192
1193 # adding metapos and prevmetavalue support to import_metadata subroutines
1194 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1195 my $prevmetavalue = $self->{'prevmetavalue'};
1196 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1197 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
1198
1199 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1200 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1201
1202 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1203 $mess .= " $metaname";
1204 $mess .= " = $metavalue";
1205 $mess .= " ($metamode)\n";
1206
1207 $gsdl_cgi->generate_ok_message($mess);
1208
1209 #return $status; # in case calling functions have any further use for this
1210}
1211
1212# the version of set_index_meta that doesn't do authentication
1213sub _set_archives_metadata
1214{
1215 my $self = shift @_;
1216
1217 my $collect = $self->{'collect'};
1218 my $gsdl_cgi = $self->{'gsdl_cgi'};
1219 my $infodbtype = $self->{'infodbtype'};
1220
1221 # Obtain the collect and archive dir
1222 my $site = $self->{'site'};
1223 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1224 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1225
1226 # look up additional args
1227 my $docid = $self->{'d'};
1228 my $metaname = $self->{'metaname'};
1229 my $metavalue = $self->{'metavalue'};
1230 my $prevmetavalue = $self->{'prevmetavalue'};
1231
1232 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1233 # Don't append "|| undef", since if metapos=0 it will then be set to undef
1234
1235 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1236 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
1237
1238 my $metamode = $self->{'metamode'};
1239 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1240 # make "accumulate" the default (less destructive, as it won't actually
1241 # delete any existing values)
1242 $metamode = "accumulate";
1243 }
1244
1245 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1246 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1247
1248 if ($status == 0) {
1249 my $mess = "set-archives-metadata successful: Key[$docid]\n";
1250 $mess .= " $metaname";
1251 $mess .= "->[$metapos]" if (defined $metapos);
1252 $mess .= " = $metavalue";
1253 $mess .= " ($metamode)\n";
1254
1255 $gsdl_cgi->generate_ok_message($mess);
1256 }
1257 else {
1258 my $mess .= "Failed to set archives metadata key: $docid\n";
1259 $mess .= "Exit status: $status\n";
1260 if(defined $self->{'error_msg'}) {
1261 $mess .= "Error Message: $self->{'error_msg'}\n";
1262 } else {
1263 $mess .= "System Error Message: $!\n";
1264 }
1265 $mess .= "-" x 20 . "\n";
1266
1267 $gsdl_cgi->generate_error($mess);
1268 }
1269
1270 #return $status; # in case calling functions have any further use for this
1271}
1272
1273
1274# the version of set_index_meta that doesn't do authentication
1275sub _set_index_metadata
1276{
1277 print STDERR "START SET INDEX METADATA\n";
1278 my $self = shift @_;
1279
1280 my $collect = $self->{'collect'};
1281 my $gsdl_cgi = $self->{'gsdl_cgi'};
1282
1283 my $site = $self->{'site'};
1284 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1285
1286 # look up additional args
1287 my $docid = $self->{'d'};
1288 my $metaname = $self->{'metaname'};
1289 my $metapos = $self->{'metapos'}; # undef has meaning
1290 my $metavalue = $self->{'metavalue'};
1291 my $infodbtype = $self->{'infodbtype'};
1292 my $metamode = $self->{'metamode'};
1293 my $prevmetavalue = $self->{'prevmetavalue'};
1294
1295 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
1296 $prevmetavalue = undef if(defined $prevmetavalue && ($prevmetavalue =~ m/^\s*$/));
1297
1298 print STDERR "SETTING INDEX METADATA ENTRY\n";
1299 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1300 print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
1301 if ($status != 0) {
1302 # Catch error if set infodb entry failed
1303 my $mess = "Failed to set metadata key: $docid\n";
1304
1305 $mess .= "PATH: $ENV{'PATH'}\n";
1306 $mess .= "Exit status: $status\n";
1307 $mess .= "System Error Message: $!\n";
1308
1309 $gsdl_cgi->generate_error($mess);
1310 }
1311 else {
1312 my $mess = "set-index-metadata successful: Key[$docid]\n";
1313 $mess .= " $metaname";
1314 $mess .= "->[$metapos]" if (defined $metapos);
1315 $mess .= " = $metavalue\n";
1316
1317 $gsdl_cgi->generate_ok_message($mess);
1318 }
1319
1320 print STDERR "END SET INDEX METADATA\n";
1321 #return $status; # in case calling functions have any further use for this
1322}
1323
1324sub set_index_metadata
1325{
1326 my $self = shift @_;
1327
1328 my $username = $self->{'username'};
1329 my $collect = $self->{'collect'};
1330 my $gsdl_cgi = $self->{'gsdl_cgi'};
1331 #my $gsdlhome = $self->{'gsdlhome'};
1332
1333 if ($baseaction::authentication_enabled) {
1334 # Ensure the user is allowed to edit this collection
1335 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1336 }
1337
1338 my $site = $self->{'site'};
1339 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1340
1341 $gsdl_cgi->checked_chdir($collect_dir);
1342
1343 # Obtain the collect dir
1344 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1345
1346 # Make sure the collection isn't locked by someone else
1347 $self->lock_collection($username, $collect);
1348
1349 $self->_set_index_metadata(@_);
1350
1351 # Release the lock once it is done
1352 $self->unlock_collection($username, $collect);
1353}
1354
1355# call this to set the metadata for a combination of dirs archives, import or index, or live
1356# if none specified, defaults to index which was the original behaviour of set_metadata.
1357sub set_metadata
1358{
1359 my $self = shift @_;
1360
1361 # Testing that not defining a variable, setting it to "" or to " " all return false
1362 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
1363
1364 my $where = $self->{'where'};
1365 if(!$where || ($where =~ m/^\s*$/)) {
1366 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
1367 return;
1368 }
1369
1370 # authenticate and lock collection once, even if processing multiple dirs
1371 my $username = $self->{'username'};
1372 my $collect = $self->{'collect'};
1373 my $gsdl_cgi = $self->{'gsdl_cgi'};
1374
1375 if ($baseaction::authentication_enabled) {
1376 # Ensure the user is allowed to edit this collection
1377 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1378 }
1379
1380 if($where =~ m/index/) {
1381 my $site = $self->{'site'};
1382 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1383 $gsdl_cgi->checked_chdir($collect_dir);
1384 }
1385
1386 # Make sure the collection isn't locked by someone else
1387 $self->lock_collection($username, $collect);
1388
1389
1390 # now at last can set the metadata. $where can specify multiple
1391 # $where is of the form: import|archives|index, or a subset thereof
1392
1393 #my @whichdirs = split('\|', $where);
1394
1395 # just check whether $where contains import/archives/index/live in turn, and
1396 # for each case, process it accordingly
1397 if($where =~ m/import/) {
1398 $self->_set_import_metadata(@_);
1399 }
1400
1401 if($where =~ m/archives/) {
1402
1403 # look up docID arg which is optional to set_metadata because it's optional
1404 # to set_import, but which is compulsory to set_archives_metadata
1405 my $docid = $self->{'d'};
1406 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1407 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1408 }
1409 # we have a docid, so can set archives meta
1410 $self->_set_archives_metadata(@_);
1411 }
1412
1413 if($where =~ m/index/) {
1414
1415 # look up docID arg which is optional to set_metadata because it's optional
1416 # to set_import, but which is compulsory to set_archives_metadata
1417 my $docid = $self->{'d'};
1418 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1419 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1420 }
1421 # we have a docid, so can set index meta
1422 $self->_set_index_metadata(@_);
1423 }
1424
1425 if($where =~ m/live/) {
1426 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1427 }
1428
1429 # Release the lock once it is done
1430 $self->unlock_collection($username, $collect);
1431}
1432
1433sub set_metadata_array
1434{
1435 my $self = shift @_;
1436
1437 my $where = $self->{'where'};
1438 if(!$where || ($where =~ m/^\s*$/)) {
1439 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1440 return;
1441 }
1442
1443 my $username = $self->{'username'};
1444 my $collect = $self->{'collect'};
1445 my $gsdl_cgi = $self->{'gsdl_cgi'};
1446
1447 if ($baseaction::authentication_enabled) {
1448 # Ensure the user is allowed to edit this collection
1449 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1450 }
1451
1452 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1453 # But including the stmt during this code reorganisation to preserve as-is what used to happen
1454 my $site = $self->{'site'};
1455 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1456 $gsdl_cgi->checked_chdir($collect_dir);
1457
1458 # Make sure the collection isn't locked by someone else
1459 $self->lock_collection($username, $collect);
1460
1461 if($where =~ m/import/) {
1462 $self->_set_import_metadata_array(@_);
1463 }
1464 if($where =~ m/archives/) {
1465 $self->_set_archives_metadata_array(@_);
1466 }
1467 if($where =~ m/index/) {
1468 $self->_set_index_metadata_array(@_);
1469 }
1470 if($where =~ m/live/) {
1471 $self->_set_live_metadata_array(@_);
1472 }
1473
1474 # Release the lock once it is done
1475 $self->unlock_collection($username, $collect);
1476}
1477
1478sub _set_index_metadata_array
1479{
1480 my $self = shift @_;
1481
1482 my $collect = $self->{'collect'};
1483 my $gsdl_cgi = $self->{'gsdl_cgi'};
1484 my $site = $self->{'site'};
1485 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1486
1487
1488 # look up additional args
1489
1490 my $infodbtype = $self->{'infodbtype'};
1491
1492 my $json_str = $self->{'json'};
1493 my $doc_array = decode_json $json_str;
1494
1495
1496 my $global_status = 0;
1497 my $global_mess = "";
1498
1499 my @all_docids = ();
1500
1501 foreach my $doc_array_rec ( @$doc_array ) {
1502
1503 my $status = -1;
1504 my $docid = $doc_array_rec->{'docid'};
1505
1506 push(@all_docids,$docid);
1507
1508 my $metaname = $doc_array_rec->{'metaname'};
1509 if(defined $metaname) {
1510 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
1511 my $metavalue = $doc_array_rec->{'metavalue'};
1512 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1513
1514 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1515 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1516 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1517
1518 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1519 $metaname = $metatable_rec->{'metaname'};
1520 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1521 my $metapos = undef;
1522 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1523
1524 foreach my $metavalue ( @$metavals ) { # metavals is an array
1525 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1526 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1527 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1528 # metavalue for this metaname will be lost
1529 $metamode = "accumulate";
1530 }
1531 }
1532 }
1533 }
1534
1535 if ($status != 0) {
1536 # Catch error if set infodb entry failed
1537 $global_status = $status;
1538 $global_mess .= "Failed to set metadata key: $docid\n";
1539 $global_mess .= "Exit status: $status\n";
1540 $global_mess .= "System Error Message: $!\n";
1541 $global_mess .= "-" x 20;
1542 }
1543 }
1544
1545 if ($global_status != 0) {
1546 $global_mess .= "PATH: $ENV{'PATH'}\n";
1547 $gsdl_cgi->generate_error($global_mess);
1548 }
1549 else {
1550 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1551 $gsdl_cgi->generate_ok_message($mess);
1552 }
1553}
1554
1555sub set_index_metadata_array
1556{
1557 my $self = shift @_;
1558
1559 my $username = $self->{'username'};
1560 my $collect = $self->{'collect'};
1561 my $gsdl_cgi = $self->{'gsdl_cgi'};
1562# my $gsdlhome = $self->{'gsdlhome'};
1563
1564 if ($baseaction::authentication_enabled) {
1565 # Ensure the user is allowed to edit this collection
1566 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1567 }
1568
1569 my $site = $self->{'site'};
1570 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1571
1572 $gsdl_cgi->checked_chdir($collect_dir);
1573
1574 # Obtain the collect dir
1575 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1576
1577 # Make sure the collection isn't locked by someone else
1578 $self->lock_collection($username, $collect);
1579
1580 $self->_set_index_metadata_array(@_);
1581
1582 # Release the lock once it is done
1583 $self->unlock_collection($username, $collect);
1584}
1585
1586# experimental, newly added in and untested
1587sub _set_live_metadata_array
1588{
1589 my $self = shift @_;
1590
1591 my $collect = $self->{'collect'};
1592 my $gsdl_cgi = $self->{'gsdl_cgi'};
1593
1594 my $site = $self->{'site'};
1595 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1596
1597
1598 # look up additional args
1599 my $infodbtype = $self->{'infodbtype'};
1600 # To people who know $collect_tail please add some comments
1601 # Obtain path to the database
1602 my $collect_tail = $collect;
1603 $collect_tail =~ s/^.*[\/|\\]//;
1604 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1605 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1606
1607
1608 my $json_str = $self->{'json'};
1609 my $doc_array = decode_json $json_str;
1610
1611
1612 my $global_status = 0;
1613 my $global_mess = "";
1614
1615 my @all_docids = ();
1616
1617
1618 foreach my $doc_array_rec ( @$doc_array ) {
1619
1620 my $status = -1;
1621 my $docid = $doc_array_rec->{'docid'};
1622
1623 push(@all_docids,$docid);
1624
1625 my $metaname = $doc_array_rec->{'metaname'};
1626 if(defined $metaname) {
1627 my $dbkey = "$docid.$metaname";
1628 my $metavalue = $doc_array_rec->{'metavalue'};
1629
1630 # Set the new value
1631 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1632 $status = system($cmd);
1633
1634 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1635 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1636 foreach my $metatable_rec ( @$metatable ) {
1637 $metaname = $metatable_rec->{'metaname'};
1638 my $dbkey = "$docid.$metaname";
1639
1640 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1641 foreach my $metavalue ( @$metavals ) {
1642 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1643 $status = system($cmd);
1644 }
1645 }
1646
1647 }
1648
1649 if ($status != 0) {
1650 # Catch error if gdbmget failed
1651 $global_status = $status;
1652 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1653 $global_mess .= "Exit status: $status\n";
1654 $global_mess .= "System Error Message: $!\n";
1655 $global_mess .= "-" x 20;
1656 }
1657 }
1658
1659 if ($global_status != 0) {
1660 $global_mess .= "PATH: $ENV{'PATH'}\n";
1661 $gsdl_cgi->generate_error($global_mess);
1662 }
1663 else {
1664 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1665 $gsdl_cgi->generate_ok_message($mess);
1666 }
1667}
1668
1669sub set_live_metadata_array
1670{
1671 my $self = shift @_;
1672
1673 my $username = $self->{'username'};
1674 my $collect = $self->{'collect'};
1675 my $gsdl_cgi = $self->{'gsdl_cgi'};
1676
1677 if ($baseaction::authentication_enabled) {
1678 # Ensure the user is allowed to edit this collection
1679 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1680 }
1681
1682 my $site = $self->{'site'};
1683 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1684
1685 $gsdl_cgi->checked_chdir($collect_dir);
1686
1687 # Make sure the collection isn't locked by someone else
1688 $self->lock_collection($username, $collect);
1689
1690 $self->_set_live_metadata_array(@_);
1691
1692 # Release the lock once it is done
1693 $self->unlock_collection($username, $collect);
1694}
1695
1696
1697sub dxml_metadata
1698{
1699 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1700 my $metaname = $parser->{'parameters'}->{'metaname'};
1701 my $metamode = $parser->{'parameters'}->{'metamode'};
1702
1703###!!! print STDERR "**** Processing closing </Metadata> tag\n";
1704
1705 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1706
1707 # Find the right metadata tag and checks if we are going to
1708 # override it
1709 #
1710 # Note: This over writes the first metadata block it
1711 # encountered. If there are multiple Sections in the doc.xml, it
1712 # might not behave as you would expect
1713
1714 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1715## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1716## print STDERR "**** metamode = $metamode\n";
1717
1718 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1719 {
1720 my $name_attr = $attrHash->{'name'};
1721 # print STDOUT "*** testing: $name_attr eq $metaname ? and $metamode eq override ?\n";
1722 if (($name_attr eq $metaname) && ($metamode eq "override"))
1723 {
1724 if (!defined $parser->{'parameters'}->{'poscount'})
1725 {
1726 $parser->{'parameters'}->{'poscount'} = 0;
1727 }
1728 else
1729 {
1730 $parser->{'parameters'}->{'poscount'}++;
1731 }
1732
1733 if ((defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1734 || (!defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == 0))
1735
1736 {
1737 ##print STDERR "#### got match!!\n";
1738 # Get the value and override the current value
1739 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1740 $attrHash->{'_content'} = $metavalue;
1741
1742 # Don't want it to wipe out any other pieces of metadata
1743 $parser->{'parameters'}->{'metamode'} = "done";
1744 }
1745 elsif (defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1746 {
1747 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1748 $attrHash->{'_content'} = $metavalue;
1749 $parser->{'parameters'}->{'metamode'} = "done";
1750 }
1751 }
1752 }
1753
1754 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1755 return [$tagname => $attrHash];
1756}
1757
1758# This method exists purely for catching invalid section numbers that the client
1759# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1760# we've seen all the Sections in the doc.xml, and none of their section nums matched
1761# if the metamode has not been set to 'done' by then.
1762sub dxml_archive
1763{
1764 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1765 my $metamode = $parser->{'parameters'}->{'metamode'};
1766
1767 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1768 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1769
1770# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1771
1772 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1773 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1774
1775 my $metaname = $parser->{'parameters'}->{'metaname'};
1776 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1777
1778 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1779 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1780 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1781 print STDERR "\tin section $opt_doc_secnum.\n";
1782 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1783 }
1784
1785 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1786 return [$tagname => $attrHash];
1787}
1788
1789sub dxml_description
1790{
1791 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1792 my $metamode = $parser->{'parameters'}->{'metamode'};
1793
1794 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1795 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1796
1797###!!! print STDERR "**** Processing a closing </Description> tag \n";
1798# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1799
1800 # Accumulate the metadata
1801
1802 # We'll be accumulating metadata at this point if we haven't found and therefore
1803 # haven't processed the metadata yet.
1804 # For subsections, this means that if we're at a matching subsection, but haven't
1805 # found the correct metaname to override in that subsection, we accumulate it as new
1806 # meta in the subsection by adding it to the current description.
1807 # If there's no subsection info for the metadata, it will accumulate at the top level
1808 # section description if we hadn't found a matching metaname to override at this point.
1809
1810 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1811 # at the toplevel section. In the latter case, it means we want to process meta in the
1812 # toplevel section. So the eq check between the values below will work in all cases.
1813
1814 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1815 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1816 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1817
1818 if ($curr_secnum eq $opt_doc_secnum
1819 && ($metamode eq "accumulate" || $metamode eq "override")) {
1820 if ($metamode eq "override") {
1821 print "Got to end of <Description> block. No metadata value to override. Switching 'metamode' to accumulate\n";
1822 }
1823
1824 # If we get to here and metamode is override, this means there
1825 # was no existing value to overide => treat as an append operation
1826
1827 # Tack a new metadata tag on to the end of the <Metadata>+ block
1828 my $metaname = $parser->{'parameters'}->{'metaname'};
1829 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1830
1831 my $metadata_attr = {
1832 '_content' => $metavalue,
1833 'name' => $metaname,
1834 'mode' => "accumulate"
1835 };
1836
1837 my $append_metadata = [ "Metadata" => $metadata_attr ];
1838 my $description_content = $attrHash->{'_content'};
1839
1840 print "Appending metadata to doc.xml\n";
1841
1842 if (ref($description_content)) {
1843 # got some existing interesting nested content
1844 push(@$description_content, " ", $append_metadata ,"\n ");
1845 }
1846 else {
1847 #description_content is most likely a string such as "\n"
1848 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1849 }
1850
1851 $parser->{'parameters'}->{'metamode'} = "done";
1852 }
1853 else {
1854 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1855## print STDERR "**** NOT ACCUMULATE?!? \n";
1856 }
1857
1858 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1859 return [$tagname => $attrHash];
1860}
1861
1862
1863sub dxml_start_section
1864{
1865 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1866
1867 my $new_depth = scalar(@$contextArray);
1868
1869# print STDERR "**** START SECTION \n";
1870
1871 if ($new_depth == 1) {
1872 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1873 $parser->{'parameters'}->{'curr_section_num'} = "";
1874 }
1875
1876 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1877 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1878
1879 my $new_secnum;
1880
1881 if ($new_depth > $old_depth) {
1882 # child subsection
1883 $new_secnum = "$old_secnum.1";
1884 }
1885 elsif ($new_depth == $old_depth) {
1886 # sibling section => increase it's value by 1
1887 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1888 $tail_num++;
1889 $new_secnum = $old_secnum;
1890 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1891 }
1892 else {
1893 # back up to parent section => lopp off tail
1894 $new_secnum = $old_secnum;
1895 $new_secnum =~ s/\.\d+$//;
1896 }
1897
1898 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1899 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1900
1901 1;
1902}
1903
1904sub edit_xml_file
1905{
1906 my $self = shift @_;
1907 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1908
1909 # use XML::Rules to add it in (read in and out again)
1910 my $parser = XML::Rules->new(start_rules => $start_rules,
1911 rules => $rules,
1912 style => 'filter',
1913 output_encoding => 'utf8' );
1914
1915 my $xml_in = "";
1916 if (!open(MIN,"<$filename")) {
1917 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1918 }
1919 else {
1920 # Read all the text in
1921 my $line;
1922 while (defined ($line=<MIN>)) {
1923 $xml_in .= $line;
1924 }
1925 close(MIN);
1926
1927 my $MOUT;
1928 if (!open($MOUT,">$filename")) {
1929 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1930 }
1931 else {
1932 # Matched lines will get handled by the call backs
1933## my $xml_out = "";
1934
1935 binmode($MOUT,":utf8");
1936 $parser->filter($xml_in,$MOUT, $options);
1937
1938# binmode(MOUT,":utf8");
1939# print MOUT $xml_out;
1940 close($MOUT);
1941 }
1942 }
1943
1944 # copy across any custom error information that was stored during parsing
1945 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1946}
1947
1948sub edit_doc_xml
1949{
1950 my $self = shift @_;
1951 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1952
1953 my $info_mess = <<RAWEND;
1954****************************
1955 edit_doc_xml()
1956****************************
1957RAWEND
1958
1959 $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1960 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1961 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1962 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1963 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1964 $info_mess .= " opt_secnum = $opt_secnum\n" if defined($opt_secnum);
1965 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1966
1967 $info_mess .= "****************************\n";
1968
1969 $gsdl_cgi->generate_message($info_mess);
1970
1971 # To monitor which section/subsection number we are in
1972 my @start_rules =
1973 ( 'Section' => \&dxml_start_section );
1974
1975 # use XML::Rules to add it in (read in and out again)
1976 # Set the call back functions
1977 my @rules =
1978 ( _default => 'raw',
1979 'Metadata' => \&dxml_metadata,
1980 'Description' => \&dxml_description,
1981 'Archive' => \&dxml_archive); # just for catching errors at end
1982
1983 # Sets the parameters
1984 my $options = { 'metaname' => $metaname,
1985 'metapos' => $metapos,
1986 'metavalue' => $metavalue,
1987 'metamode' => $metamode,
1988 'prevmetavalue' => $prevmetavalue };
1989
1990 if (defined $opt_secnum) {
1991 $options->{'secnum'} = $opt_secnum;
1992 }
1993
1994 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1995}
1996
1997sub set_archives_metadata_entry
1998{
1999 my $self = shift @_;
2000 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
2001
2002 my $info_mess = <<RAWEND;
2003****************************
2004 set_archives_metadata_entry()
2005****************************
2006RAWEND
2007
2008 $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
2009 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2010 $info_mess .= " collect = $collect\n" if defined($collect);
2011 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
2012 $info_mess .= " docid = $docid\n" if defined($docid);
2013 $info_mess .= " metaname = $metaname\n" if defined($metaname);
2014 $info_mess .= " metapos = $metapos\n" if defined($metapos);
2015 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
2016 $info_mess .= " metamode = $metamode\n" if defined($metamode);
2017 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2018
2019 $info_mess .= "****************************\n";
2020
2021 $gsdl_cgi->generate_message($info_mess);
2022
2023 # Obtain the doc.xml path for the specified docID
2024 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2025
2026 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2027 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
2028 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
2029
2030 # The $doc_xml_file is relative to the archives, and now let's get the full path
2031 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
2032 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
2033
2034 # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
2035 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2036 # Thereafter, we will finally be setting the overriding metavalue for this metaname
2037 if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2038 # remove all values of $metaname metadata
2039 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
2040 }
2041 # Edit the doc.xml file with the specified metadata name, value and position.
2042 # TODO: there is a potential problem here as this edit_doc_xml function
2043 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
2044 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
2045
2046 # dxml_metadata method ignores metapos if metamode anything other than override
2047 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
2048 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
2049
2050 # return 0; # return 0 for now to indicate no error
2051 return (defined $self->{'error_msg'}) ? 1 : 0;
2052}
2053
2054
2055sub set_archives_metadata
2056{
2057 my $self = shift @_;
2058
2059 my $username = $self->{'username'};
2060 my $collect = $self->{'collect'};
2061 my $gsdl_cgi = $self->{'gsdl_cgi'};
2062
2063 if ($baseaction::authentication_enabled) {
2064 # Ensure the user is allowed to edit this collection
2065 $self->authenticate_user($username, $collect);
2066 }
2067
2068 # Make sure the collection isn't locked by someone else
2069 $self->lock_collection($username, $collect);
2070
2071 $self->_set_archives_metadata(@_);
2072
2073 # Release the lock once it is done
2074 $self->unlock_collection($username, $collect);
2075}
2076
2077sub _set_archives_metadata_array
2078{
2079 my $self = shift @_;
2080
2081 my $collect = $self->{'collect'};
2082 my $gsdl_cgi = $self->{'gsdl_cgi'};
2083 my $site = $self->{'site'};
2084 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2085
2086 # look up additional args
2087
2088 my $infodbtype = $self->{'infodbtype'};
2089
2090 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2091
2092 my $json_str = $self->{'json'};
2093 my $doc_array = decode_json $json_str;
2094
2095
2096 my $global_status = 0;
2097 my $global_mess = "";
2098
2099 my @all_docids = ();
2100
2101 foreach my $doc_array_rec ( @$doc_array ) {
2102 my $status = -1;
2103 my $docid = $doc_array_rec->{'docid'};
2104
2105 push(@all_docids,$docid);
2106
2107 my $metaname = $doc_array_rec->{'metaname'};
2108 if(defined $metaname) {
2109
2110 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2111
2112 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2113 my $metavalue = $doc_array_rec->{'metavalue'};
2114 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
2115
2116
2117 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2118 # make "accumulate" the default (less destructive, as it won't actually
2119 # delete any existing values)
2120 $metamode = "accumulate";
2121 }
2122
2123 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
2124 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2125 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2126 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2127
2128 foreach my $metatable_rec ( @$metatable ) {
2129 $metaname = $metatable_rec->{'metaname'};
2130 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2131 my $metapos = undef;
2132 my $prevmetavalue = undef;
2133 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2134
2135 foreach my $metavalue ( @$metavals ) {
2136 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
2137 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2138
2139 if($metamode eq "override") { # now, having overridden the metavalue for the first,
2140 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2141 # metavalue for this metaname will be lost
2142 $metamode = "accumulate";
2143 }
2144 }
2145 }
2146 }
2147
2148 if ($status != 0) {
2149 # Catch error if set infodb entry failed
2150 $global_status = $status;
2151 $global_mess .= "Failed to set metadata key: $docid\n";
2152 $global_mess .= "Exit status: $status\n";
2153 $global_mess .= "System Error Message: $!\n";
2154 $global_mess .= "-" x 20 . "\n";
2155 }
2156 }
2157
2158 if ($global_status != 0) {
2159 $global_mess .= "PATH: $ENV{'PATH'}\n";
2160 $gsdl_cgi->generate_error($global_mess);
2161 }
2162 else {
2163 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2164 $gsdl_cgi->generate_ok_message($mess);
2165 }
2166}
2167
2168sub set_archives_metadata_array
2169{
2170 my $self = shift @_;
2171
2172 my $username = $self->{'username'};
2173 my $collect = $self->{'collect'};
2174 my $gsdl_cgi = $self->{'gsdl_cgi'};
2175# my $gsdlhome = $self->{'gsdlhome'};
2176
2177 if ($baseaction::authentication_enabled) {
2178 # Ensure the user is allowed to edit this collection
2179 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2180 }
2181
2182 my $site = $self->{'site'};
2183 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2184
2185 $gsdl_cgi->checked_chdir($collect_dir);
2186
2187 # Obtain the collect dir
2188 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2189
2190 # Make sure the collection isn't locked by someone else
2191 $self->lock_collection($username, $collect);
2192
2193 $self->_set_archives_metadata_array(@_);
2194
2195 # Release the lock once it is done
2196 $self->unlock_collection($username, $collect);
2197}
2198
2199sub _remove_archives_metadata
2200{
2201 my $self = shift @_;
2202
2203 my $collect = $self->{'collect'};
2204 my $gsdl_cgi = $self->{'gsdl_cgi'};
2205# my $gsdlhome = $self->{'gsdlhome'};
2206 my $infodbtype = $self->{'infodbtype'};
2207
2208 my $site = $self->{'site'};
2209
2210 # Obtain the collect and archive dir
2211 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2212
2213 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2214
2215 # look up additional args
2216 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
2217
2218 my $metaname = $self->{'metaname'};
2219 my $metapos = $self->{'metapos'};
2220 my $metavalue = $self->{'metavalue'};
2221
2222 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2223 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
2224
2225 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
2226 # default to deleting the first metavalue for the given metaname
2227 # Beware that if both metapos AND metavalue are defined, both matches (if any)
2228 # seem to get deleted in one single remove_archives_meta action invocation.
2229 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
2230 # deleted, both get deleted.
2231 if(!defined $metapos && !defined $metavalue) {
2232 $metapos = 0;
2233 }
2234
2235 my $metamode = $self->{'metamode'};
2236 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
2237
2238 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2239 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2240
2241 # This now stores the full pathname
2242 my $doc_filename = $doc_rec->{'doc-file'}->[0];
2243
2244 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
2245# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
2246
2247 if ($status == 0)
2248 {
2249 my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
2250 $mess .= " $metaname";
2251 $mess .= "->[$metapos]" if (defined $metapos);
2252 $mess .= " ($metavalue)" if (defined $metavalue);
2253 $gsdl_cgi->generate_ok_message($mess);
2254 }
2255 else
2256 {
2257 my $mess .= "Failed to remove archives metadata key: $docid\n";
2258 $mess .= "Exit status: $status\n";
2259 $mess .= "System Error Message: $!\n";
2260 $mess .= "-" x 20 . "\n";
2261
2262 $gsdl_cgi->generate_error($mess);
2263 }
2264
2265 #return $status; # in case calling functions have a use for this
2266}
2267
2268sub remove_archives_metadata
2269{
2270 my $self = shift @_;
2271
2272 my $username = $self->{'username'};
2273 my $collect = $self->{'collect'};
2274 my $gsdl_cgi = $self->{'gsdl_cgi'};
2275
2276 if ($baseaction::authentication_enabled)
2277 {
2278 # Ensure the user is allowed to edit this collection
2279 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2280 }
2281
2282 # Make sure the collection isn't locked by someone else
2283 $self->lock_collection($username, $collect);
2284
2285 $self->_remove_archives_metadata(@_);
2286
2287 # Release the lock once it is done
2288 $self->unlock_collection($username, $collect);
2289}
2290
2291sub remove_from_doc_xml
2292{
2293 my $self = shift @_;
2294 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
2295
2296 my @start_rules = ('Section' => \&dxml_start_section);
2297
2298 # Set the call-back functions for the metadata tags
2299 my @rules =
2300 (
2301 _default => 'raw',
2302 'Metadata' => \&rfdxml_metadata
2303 );
2304
2305 my $parser = XML::Rules->new
2306 (
2307 start_rules => \@start_rules,
2308 rules => \@rules,
2309 style => 'filter',
2310 output_encoding => 'utf8',
2311# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2312 stripspaces => 2|0|0 # ineffectual
2313 );
2314
2315 my $status = 0;
2316 my $xml_in = "";
2317 if (!open(MIN,"<$doc_xml_filename"))
2318 {
2319 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
2320 $status = 1;
2321 }
2322 else
2323 {
2324 # Read them in
2325 my $line;
2326 while (defined ($line=<MIN>)) {
2327 $xml_in .= $line;
2328 }
2329 close(MIN);
2330
2331 # Filter with the call-back functions
2332 my $xml_out = "";
2333
2334 my $MOUT;
2335 if (!open($MOUT,">$doc_xml_filename")) {
2336 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
2337 $status = 1;
2338 }
2339 else {
2340 binmode($MOUT,":utf8");
2341 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
2342 close($MOUT);
2343 }
2344 }
2345 return $status;
2346}
2347
2348sub rfdxml_metadata
2349{
2350 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2351
2352 # For comparisons, toplevel section is indicated by ""
2353 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
2354 my $secid = $parser->{'parameters'}->{'secid'} || "";
2355
2356 if (!($secid eq $curr_sec_num))
2357 {
2358 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2359 return [$tagname => $attrHash];
2360 }
2361
2362 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2363 {
2364 if (!defined $parser->{'parameters'}->{'poscount'})
2365 {
2366 $parser->{'parameters'}->{'poscount'} = 0;
2367 }
2368 else
2369 {
2370 $parser->{'parameters'}->{'poscount'}++;
2371 }
2372
2373 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
2374 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {
2375 return [];
2376 }
2377
2378 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2379 {
2380 return [];
2381 }
2382
2383 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
2384 {
2385 return [];
2386 }
2387 }
2388
2389 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2390 return [$tagname => $attrHash];
2391}
2392
2393sub mxml_metadata
2394{
2395 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2396 my $metaname = $parser->{'parameters'}->{'metaname'};
2397 my $metamode = $parser->{'parameters'}->{'metamode'};
2398
2399 # Report error if we don't see FileName tag before this
2400 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2401
2402 # Don't do anything if we are not in the right FileSet
2403 my $file_regexp = $parser->{'parameters'}->{'current_file'};
2404 if ($file_regexp =~ /\.\*/) {
2405 # Only interested in a file_regexp if it specifies precisely one
2406 # file.
2407 # So, skip anything with a .* in it as it is too general
2408## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
2409 return [$tagname => $attrHash];
2410 }
2411 my $src_file = $parser->{'parameters'}->{'src_file'};
2412 if (!($src_file =~ /$file_regexp/)) {
2413 return [$tagname => $attrHash];
2414 }
2415## print STDERR "*** mxl metamode = $metamode\n";
2416
2417 # Find the right metadata tag and checks if we are going to override it
2418 my $name_attr = $attrHash->{'name'};
2419 if (($name_attr eq $metaname) && ($metamode eq "override")) {
2420
2421 # now metadata.xml functions need to keep track of metapos
2422 if (!defined $parser->{'parameters'}->{'poscount'})
2423 {
2424 $parser->{'parameters'}->{'poscount'} = 0;
2425 }
2426 else
2427 {
2428 $parser->{'parameters'}->{'poscount'}++;
2429 }
2430
2431 # If either the metapos or prevmetavalue is set,
2432 # get the value and override the current value
2433 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2434
2435 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2436 {
2437 $attrHash->{'_content'} = $metavalue;
2438
2439 ## print STDERR "**** overriding metadata.xml\n";
2440
2441 # Don't want it to wipe out any other pieces of metadata
2442 $parser->{'parameters'}->{'metamode'} = "done";
2443 }
2444 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2445 {
2446 $attrHash->{'_content'} = $metavalue;
2447 $parser->{'parameters'}->{'metamode'} = "done";
2448 }
2449 }
2450
2451 # mxml_description will process the metadata if metadata is accumulate,
2452 # or if we haven't found the metadata to override
2453
2454 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2455 return [$tagname => $attrHash];
2456}
2457
2458
2459sub mxml_description
2460{
2461 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2462 my $metamode = $parser->{'parameters'}->{'metamode'};
2463
2464 # Failed... Report error if we don't see FileName tag before this
2465 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2466
2467 # Don't do anything if we are not in the right FileSet
2468 my $file_regexp = $parser->{'parameters'}->{'current_file'};
2469 if ($file_regexp =~ m/\.\*/) {
2470 # Only interested in a file_regexp if it specifies precisely one
2471 # file.
2472 # So, skip anything with a .* in it as it is too general
2473 return [$tagname => $attrHash];
2474 }
2475 my $src_file = $parser->{'parameters'}->{'src_file'};
2476
2477 if (!($src_file =~ m/$file_regexp/)) {
2478 return [$tagname => $attrHash];
2479 }
2480
2481 # Accumulate the metadata block to the end of the description block
2482 # Note: This adds metadata block to all description blocks, so if there are
2483 # multiple FileSets, it will add to all of them
2484 if (($metamode eq "accumulate") || ($metamode eq "override")) {
2485
2486 # if metamode was "override" but get to here then it failed to
2487 # find an item to override, in which case it should append its
2488 # value to the end, just like the "accumulate" mode
2489
2490 if ($metamode eq "override") {
2491 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2492 }
2493
2494 # tack a new metadata tag on to the end of the <Metadata>+ block
2495 my $metaname = $parser->{'parameters'}->{'metaname'};
2496 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2497
2498 my $metadata_attr = { '_content' => $metavalue,
2499 'name' => $metaname,
2500 'mode' => "accumulate" };
2501
2502 my $append_metadata = [ "Metadata" => $metadata_attr ];
2503 my $description_content = $attrHash->{'_content'};
2504
2505## print STDERR "*** appending to metadata.xml\n";
2506
2507 # append the new metadata element to the end of the current
2508 # content contained inside this tag
2509 if (ref($description_content) eq "") {
2510 # => string or numeric literal
2511 # this is caused by a <Description> block has no <Metadata> child elements
2512 # => set up an empty array in '_content'
2513 $attrHash->{'_content'} = [ "\n" ];
2514 $description_content = $attrHash->{'_content'};
2515 }
2516
2517 push(@$description_content," ", $append_metadata ,"\n ");
2518 $parser->{'parameters'}->{'metamode'} = "done";
2519 }
2520
2521 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2522 return [$tagname => $attrHash];
2523}
2524
2525
2526sub mxml_filename
2527{
2528 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2529
2530 # Store the filename of the Current Fileset
2531 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2532 # FileName tag must come before Description tag
2533 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2534
2535 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2536 return [$tagname => $attrHash];
2537}
2538
2539
2540sub mxml_fileset
2541{
2542 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2543
2544 # Initilise the current_file
2545 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2546 # FileName tag must come before Description tag
2547 $parser->{'parameters'}->{'current_file'} = "";
2548
2549 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2550 return [$tagname => $attrHash];
2551}
2552
2553sub mxml_directorymetadata
2554{
2555 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2556
2557 # if we haven't processed the metadata when we reach the end of metadata.xml
2558 # it's because there's no particular FileSet element whose FileName matched
2559 # In which case, add a new FileSet for this FileName
2560 my $metamode = $parser->{'parameters'}->{'metamode'};
2561 if($metamode ne "done") {
2562
2563 if ($metamode eq "override") {
2564 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2565 }
2566
2567 # If we get to here and metamode is override, this means there
2568 # was no existing value to overide => treat as an append operation
2569
2570 # Create a new FileSet element and append to DirectoryMetadata
2571 # <FileSet>
2572 # <FileName>src_file</FileName>
2573 # <Description>
2574 # <Metadata mode="" name="">metavalue</Metadata>
2575 # </Description>
2576 # </FileSet>
2577 my $src_file = $parser->{'parameters'}->{'src_file'};
2578 my $metaname = $parser->{'parameters'}->{'metaname'};
2579 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2580 my $metadata_attr = {
2581 '_content' => $metavalue,
2582 'name' => $metaname,
2583 'mode' => "accumulate"
2584 };
2585 my $append_metadata = [ "Metadata" => $metadata_attr ];
2586 my $description_attr = { '_content' => [ "\n\t\t ", $append_metadata, "\n\t\t"] };
2587 my $description_element = [ "Description" => $description_attr ];
2588
2589 #_content is not an attribute, it's special and holds the children of this element
2590 # including the textnode value embedded in this element if any.
2591 my $filename_attr = {'_content' => $src_file};
2592 my $filename_element = [ "FileName" => $filename_attr ];
2593
2594 my $fileset_attr = {};
2595 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2596 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2597
2598
2599 # get children of dirmeta, and push the new FileSet element onto it
2600 print "Appending metadata to metadata.xml\n";
2601 my $dirmeta_content = $attrHash->{'_content'};
2602 if (ref($dirmeta_content)) {
2603 # got some existing interesting nested content
2604 #push(@$dirmeta_content, " ", $fileset ,"\n ");
2605 push(@$dirmeta_content, "\t", $fileset ,"\n");
2606 }
2607 else {
2608 #description_content is most likely a string such as "\n"
2609 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
2610 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2611 }
2612
2613 $parser->{'parameters'}->{'metamode'} = "done";
2614 }
2615 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2616 return [$tagname => $attrHash];
2617}
2618
2619
2620sub edit_metadata_xml
2621{
2622 my $self = shift @_;
2623 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2624
2625 # Set the call-back functions for the metadata tags
2626 my @rules =
2627 ( _default => 'raw',
2628 'FileName' => \&mxml_filename,
2629 'Metadata' => \&mxml_metadata,
2630 'Description' => \&mxml_description,
2631 'FileSet' => \&mxml_fileset,
2632 'DirectoryMetadata' => \&mxml_directorymetadata);
2633
2634 # use XML::Rules to add it in (read in and out again)
2635 my $parser = XML::Rules->new(rules => \@rules,
2636 style => 'filter',
2637 output_encoding => 'utf8',
2638 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2639
2640 if (!-e $metadata_xml_filename) {
2641
2642 if (open(MOUT,">$metadata_xml_filename")) {
2643
2644 my $src_file_re = &util::filename_to_regex($src_file);
2645 # shouldn't the following also be in the above utility routine??
2646 # $src_file_re =~ s/\./\\./g;
2647
2648 print MOUT "<?xml version=\"1.0\"?>\n";
2649 print MOUT "<DirectoryMetadata>\n";
2650 print MOUT " <FileSet>\n";
2651 print MOUT " <FileName>$src_file_re</FileName>\n";
2652 print MOUT " <Description>\n";
2653 print MOUT " </Description>\n";
2654 print MOUT " </FileSet>\n";
2655 print MOUT "</DirectoryMetadata>\n";
2656
2657 close(MOUT);
2658 }
2659 else {
2660 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2661 }
2662 }
2663
2664
2665 my $xml_in = "";
2666 if (!open(MIN,"<$metadata_xml_filename")) {
2667 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2668 }
2669 else {
2670 # Read them in
2671 my $line;
2672 while (defined ($line=<MIN>)) {
2673 $xml_in .= $line;
2674 }
2675 close(MIN);
2676
2677 # Filter with the call-back functions
2678 my $xml_out = "";
2679
2680 my $MOUT;
2681 if (!open($MOUT,">$metadata_xml_filename")) {
2682 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2683 }
2684 else {
2685 binmode($MOUT,":utf8");
2686
2687 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2688 # At the moment, I will just hack it!
2689 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2690 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2691 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2692 #print MOUT $xml_out;
2693
2694 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2695 metapos => $metapos,
2696 metavalue => $metavalue,
2697 metamode => $metamode,
2698 src_file => $src_file,
2699 prevmetavalue => $prevmetavalue,
2700 current_file => undef} );
2701 close($MOUT);
2702 }
2703 }
2704}
2705
2706
2707sub set_import_metadata
2708{
2709 my $self = shift @_;
2710
2711 my $username = $self->{'username'};
2712 my $collect = $self->{'collect'};
2713 my $gsdl_cgi = $self->{'gsdl_cgi'};
2714
2715 if ($baseaction::authentication_enabled) {
2716 # Ensure the user is allowed to edit this collection
2717 $self->authenticate_user($username, $collect);
2718 }
2719
2720 # Make sure the collection isn't locked by someone else
2721 $self->lock_collection($username, $collect);
2722
2723 $self->_set_import_metadata(@_);
2724
2725 # Release the lock once it is done
2726 $self->unlock_collection($username, $collect);
2727
2728}
2729
2730sub set_import_metadata_array
2731{
2732 my $self = shift @_;
2733
2734 my $username = $self->{'username'};
2735 my $collect = $self->{'collect'};
2736 my $gsdl_cgi = $self->{'gsdl_cgi'};
2737# my $gsdlhome = $self->{'gsdlhome'};
2738
2739 if ($baseaction::authentication_enabled) {
2740 # Ensure the user is allowed to edit this collection
2741 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2742 }
2743
2744 my $site = $self->{'site'};
2745 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2746
2747 $gsdl_cgi->checked_chdir($collect_dir);
2748
2749 # Make sure the collection isn't locked by someone else
2750 $self->lock_collection($username, $collect);
2751
2752 $self->_set_import_metadata_array(@_);
2753
2754 # Release the lock once it is done
2755 $self->unlock_collection($username, $collect);
2756
2757}
2758
2759
2760sub _set_import_metadata_array
2761{
2762 my $self = shift @_;
2763
2764 my $collect = $self->{'collect'};
2765 my $gsdl_cgi = $self->{'gsdl_cgi'};
2766
2767 my $site = $self->{'site'};
2768 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2769
2770 # look up additional args
2771
2772 my $infodbtype = $self->{'infodbtype'};
2773
2774 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2775 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2776
2777 my $json_str = $self->{'json'};
2778 my $doc_array = decode_json $json_str;
2779
2780 my $global_status = 0;
2781 my $global_mess = "";
2782
2783 my @all_docids = ();
2784
2785 foreach my $doc_array_rec ( @$doc_array )
2786 {
2787 my $status = -1;
2788 my $docid = $doc_array_rec->{'docid'};
2789
2790 my ($docid_root,$docid_secnum);
2791 if(defined $docid) {
2792 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2793 # as yet no support for setting subsection metadata in metadata.xml
2794 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2795 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2796 next; # skip this docid in for loop
2797 }
2798 }
2799
2800 push(@all_docids,$docid); # docid_root rather
2801
2802 my $metaname = $doc_array_rec->{'metaname'};
2803 if (defined $metaname) {
2804 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2805 my $metavalue = $doc_array_rec->{'metavalue'};
2806 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2807
2808 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2809 # make "accumulate" the default (less destructive, as it won't actually
2810 # delete any existing values)
2811 $metamode = "accumulate";
2812 }
2813
2814 # adding metapos and prevmetavalue support to import_metadata subroutines
2815 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2816 my $prevmetavalue = $self->{'prevmetavalue'};
2817
2818 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2819
2820 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2821 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2822
2823 foreach my $metatable_rec ( @$metatable ) {
2824 $metaname = $metatable_rec->{'metaname'};
2825 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2826 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2827 # make "accumulate" the default (less destructive, as it won't actually
2828 # delete any existing values)
2829 $metamode = "accumulate";
2830 }
2831
2832 # No support for metapos and prevmetavalue in the JSON metatable substructure
2833 my $metapos = undef;
2834 my $prevmetavalue = undef;
2835 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2836
2837 foreach my $metavalue ( @$metavals ) {
2838 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2839
2840 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2841 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2842 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2843 # metavalue for this metaname will be lost
2844 $metamode = "accumulate";
2845 }
2846 }
2847 }
2848 }
2849 }
2850
2851 # always a success message
2852 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2853 $gsdl_cgi->generate_ok_message($mess);
2854}
2855
2856# always returns true (1)
2857sub set_import_metadata_entry
2858{
2859 my $self = shift @_;
2860 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2861
2862 my $info_mess = <<RAWEND;
2863****************************
2864 set_import_metadata_entry()
2865****************************
2866RAWEND
2867
2868 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2869 $info_mess .= " collect = $collect\n" if defined($collect);
2870 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
2871 $info_mess .= " arcinfo_doc_filename = $arcinfo_doc_filename\n" if defined($arcinfo_doc_filename);
2872 $info_mess .= " docid = $docid\n" if defined($docid);
2873 $info_mess .= " metaname = $metaname\n" if defined($metaname);
2874 $info_mess .= " metapos = $metapos\n" if defined($metapos);
2875 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
2876 $info_mess .= " metamode = $metamode\n" if defined($metamode);
2877 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2878
2879 $info_mess .= "****************************\n";
2880
2881 $gsdl_cgi->generate_message($info_mess);
2882
2883 # import works with metadata.xml which can have inherited metadata
2884 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2885 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2886 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2887 # what they're doing if they provide a metapos.
2888 if(defined $metapos) {
2889 print STDERR "@@@@ WARNING: metapos defined.\n";
2890 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2891 }
2892
2893 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2894 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2895 my $metadata_xml_file;
2896 my $import_filename = undef;
2897
2898 if (defined $docid) {
2899 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2900 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2901
2902 # This now stores the full pathname
2903 $import_filename = $doc_rec->{'src-file'}->[0];
2904 $import_filename = &util::placeholders_to_abspath($import_filename);
2905
2906 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2907 # as the array version of the method doesn't support the -f parameter yet
2908 my $import_file = $self->{'f'};
2909 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2910 }
2911
2912 # figure out correct metadata.xml file [?]
2913 # Assuming the metadata.xml file is next to the source file
2914 # Note: This will not work if it is using the inherited metadata from the parent folder
2915 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2916 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2917
2918 # If we're overriding everything, then $prevmetavalue=undefined and
2919 # $metamode=override combined with $metapos=undefined
2920 # in which case we need to remove all metavalues for the metaname at the given (sub)section
2921 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2922 if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2923## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2924 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2925
2926 }
2927
2928 # Edit the metadata.xml
2929 # Modified by Jeffrey from DL Consulting
2930 # Handle the case where there is one metadata.xml file for multiple FileSets
2931 # The XML filter needs to know whether it is in the right FileSet
2932 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2933 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2934 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2935 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2936 #return 0;
2937 return $metadata_xml_filename;
2938}
2939
2940sub _remove_import_metadata
2941{
2942 my $self = shift @_;
2943
2944 my $collect = $self->{'collect'};
2945 my $gsdl_cgi = $self->{'gsdl_cgi'};
2946# my $gsdlhome = $self->{'gsdlhome'};
2947 my $infodbtype = $self->{'infodbtype'};
2948
2949 # Obtain the collect dir
2950 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2951 my $site = $self->{'site'};
2952 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2953
2954 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2955 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2956
2957 # look up additional args
2958 my $docid = $self->{'d'};
2959 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2960 {
2961 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2962 }
2963
2964 my $metaname = $self->{'metaname'};
2965 my $metapos = $self->{'metapos'};
2966 my $metavalue = $self->{'metavalue'};
2967
2968 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
2969 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/));
2970
2971 if(defined $metavalue) { # metavalue is now a compulsory arg for remove_import_metadata()
2972 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2973 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2974 $metapos = 0;
2975 }
2976 my $metamode = $self->{'metamode'};
2977 $metamode = undef if(defined $metamode && ($metamode =~ m/^\s*$/));
2978
2979 # import works with metadata.xml which can have inherited metadata
2980 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2981 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2982 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2983 # what they're doing if they provide a metapos.
2984 if(defined $metapos) {
2985 print STDERR "@@@@ WARNING: metapos defined.\n";
2986 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2987 }
2988
2989 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2990 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2991 my $metadata_xml_file;
2992 my $import_filename = undef;
2993 if (defined $docid)
2994 {
2995 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2996 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2997
2998 # This now stores the full pathname
2999 $import_filename = $doc_rec->{'src-file'}->[0];
3000 $import_filename = &util::placeholders_to_abspath($import_filename);
3001 }
3002
3003 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
3004 {
3005 $gsdl_cgi->generate_error("There is no metadata\n");
3006 }
3007
3008 # figure out correct metadata.xml file [?]
3009 # Assuming the metadata.xml file is next to the source file
3010 # Note: This will not work if it is using the inherited metadata from the parent folder
3011 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
3012 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3013
3014 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
3015
3016 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
3017 $mess .= " $metaname";
3018 $mess .= " = $metavalue\n";
3019
3020 $gsdl_cgi->generate_ok_message($mess);
3021
3022 #return $status; # in case calling functions have a use for this
3023}
3024
3025sub remove_import_metadata
3026{
3027 my $self = shift @_;
3028
3029 my $username = $self->{'username'};
3030 my $collect = $self->{'collect'};
3031 my $gsdl_cgi = $self->{'gsdl_cgi'};
3032
3033 if ($baseaction::authentication_enabled) {
3034 # Ensure the user is allowed to edit this collection
3035 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3036 }
3037
3038 # Make sure the collection isn't locked by someone else
3039 $self->lock_collection($username, $collect);
3040
3041 $self->_remove_import_metadata(@_);
3042
3043 # Release the lock once it is done
3044 $self->unlock_collection($username, $collect);
3045
3046}
3047
3048sub remove_from_metadata_xml
3049{
3050 my $self = shift @_;
3051 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
3052 # metamode generally has no meaning for removing meta, but is used by set_meta
3053 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
3054
3055 # Set the call-back functions for the metadata tags
3056 my @rules =
3057 (
3058 _default => 'raw',
3059 'Metadata' => \&rfmxml_metadata,
3060 'FileName' => \&mxml_filename
3061 );
3062
3063 my $parser = XML::Rules->new
3064 (
3065 rules => \@rules,
3066 style => 'filter',
3067 output_encoding => 'utf8',
3068 #normalisespaces => 1,
3069 stripspaces => 2|0|0 # ineffectual
3070 );
3071
3072 my $xml_in = "";
3073 if (!open(MIN,"<$metadata_xml_filename"))
3074 {
3075 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
3076 }
3077 else
3078 {
3079 # Read them in
3080 my $line;
3081 while (defined ($line=<MIN>)) {
3082 $xml_in .= $line;
3083 }
3084 close(MIN);
3085
3086 # Filter with the call-back functions
3087 my $xml_out = "";
3088
3089 my $MOUT;
3090 if (!open($MOUT,">$metadata_xml_filename")) {
3091 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
3092 }
3093 else {
3094 binmode($MOUT,":utf8");
3095 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
3096 close($MOUT);
3097 }
3098 }
3099}
3100
3101sub rfmxml_metadata
3102{
3103 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
3104
3105 # metadata.xml does not handle subsections
3106
3107 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
3108 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
3109 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
3110 {
3111 if (!defined $parser->{'parameters'}->{'poscount'})
3112 {
3113 $parser->{'parameters'}->{'poscount'} = 0;
3114 }
3115 else
3116 {
3117 $parser->{'parameters'}->{'poscount'}++;
3118 }
3119
3120 # if overriding but no metapos, then clear all the meta for this metaname
3121 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
3122 return [];
3123 }
3124
3125 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
3126 {
3127 return [];
3128 }
3129 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
3130 {
3131 return [];
3132 }
3133 }
3134
3135 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
3136 return [$tagname => $attrHash];
3137}
3138
3139sub _remove_live_metadata
3140{
3141 my $self = shift @_;
3142
3143 my $collect = $self->{'collect'};
3144 my $gsdl_cgi = $self->{'gsdl_cgi'};
3145# my $gsdlhome = $self->{'gsdlhome'};
3146 my $infodbtype = $self->{'infodbtype'};
3147
3148 # Obtain the collect dir
3149 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3150 my $site = $self->{'site'};
3151 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3152
3153
3154 # look up additional args
3155 my $docid = $self->{'d'};
3156 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3157 $gsdl_cgi->generate_error("No docid (d=...) specified.");
3158 }
3159
3160 # Generate the dbkey
3161 my $metaname = $self->{'metaname'};
3162 my $dbkey = "$docid.$metaname";
3163
3164 # To people who know $collect_tail please add some comments
3165 # Obtain the live gdbm_db path
3166 my $collect_tail = $collect;
3167 $collect_tail =~ s/^.*[\/|\\]//;
3168 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3169 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
3170
3171 # Remove the key
3172 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
3173 my $status = system($cmd);
3174 if ($status != 0) {
3175 # Catch error if gdbmdel failed
3176 my $mess = "Failed to set metadata key: $dbkey\n";
3177
3178 $mess .= "PATH: $ENV{'PATH'}\n";
3179 $mess .= "cmd = $cmd\n";
3180 $mess .= "Exit status: $status\n";
3181 $mess .= "System Error Message: $!\n";
3182
3183 $gsdl_cgi->generate_error($mess);
3184 }
3185 else {
3186 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
3187 }
3188
3189}
3190
3191sub remove_live_metadata
3192{
3193 my $self = shift @_;
3194
3195 my $username = $self->{'username'};
3196 my $collect = $self->{'collect'};
3197 my $gsdl_cgi = $self->{'gsdl_cgi'};
3198 my $gsdlhome = $self->{'gsdlhome'};
3199
3200 if ($baseaction::authentication_enabled) {
3201 # Ensure the user is allowed to edit this collection
3202 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3203 }
3204
3205 # Make sure the collection isn't locked by someone else
3206 $self->lock_collection($username, $collect);
3207
3208 $self->_remove_live_metadata(@_);
3209
3210 $self->unlock_collection($username, $collect);
3211}
3212
3213sub remove_metadata
3214{
3215 my $self = shift @_;
3216
3217 my $where = $self->{'where'};
3218 if(!$where || ($where =~ m/^\s*$/)) {
3219 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
3220 return;
3221 }
3222
3223 my $username = $self->{'username'};
3224 my $collect = $self->{'collect'};
3225 my $gsdl_cgi = $self->{'gsdl_cgi'};
3226
3227 if ($baseaction::authentication_enabled) {
3228 # Ensure the user is allowed to edit this collection
3229 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3230 }
3231
3232 # Make sure the collection isn't locked by someone else
3233 $self->lock_collection($username, $collect);
3234
3235 # check which directories need to be processed, specified in $where as
3236 # any combination of import|archives|index|live
3237 if($where =~ m/import/) {
3238 $self->_remove_import_metadata(@_);
3239 }
3240 if($where =~ m/archives/) {
3241 $self->_remove_archives_metadata(@_);
3242 }
3243 if($where =~ m/index/) {
3244 $self->_remove_index_metadata(@_);
3245 }
3246
3247 # Release the lock once it is done
3248 $self->unlock_collection($username, $collect);
3249}
3250
3251# the internal version, without authentication
3252sub _remove_index_metadata
3253{
3254 my $self = shift @_;
3255
3256 my $collect = $self->{'collect'};
3257 my $gsdl_cgi = $self->{'gsdl_cgi'};
3258# my $gsdlhome = $self->{'gsdlhome'};
3259 my $infodbtype = $self->{'infodbtype'};
3260
3261 # Obtain the collect dir
3262 my $site = $self->{'site'};
3263 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3264 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3265
3266 # look up additional args
3267 my $docid = $self->{'d'};
3268 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3269 $gsdl_cgi->generate_error("No docid (d=...) specified.");
3270 }
3271 my $metaname = $self->{'metaname'};
3272 my $metapos = $self->{'metapos'};
3273 my $metavalue = $self->{'metavalue'};
3274
3275 $metapos = undef if(defined $metapos && ($metapos =~ m/^\s*$/));
3276 $metavalue = undef if(defined $metavalue && ($metavalue =~ m/^\s*$/)); # necessary to force fallback to undef here
3277
3278 # To people who know $collect_tail please add some comments
3279 # -> In collection groups, I think collect_tailname is the subcollection name,
3280 # e.g. colgroup-name/col-tail-name
3281 # Obtain the path to the database
3282 my $collect_tail = $collect;
3283 $collect_tail =~ s/^.*[\/|\\]//;
3284 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3285 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3286
3287 # Read the docid entry
3288 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3289
3290 # Check to make sure the key does exist
3291 if (!defined ($doc_rec->{$metaname})) {
3292 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
3293 }
3294
3295 # Obtain the specified metadata pos
3296 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
3297 if(!defined $metapos && !defined $metavalue) {
3298 $metapos = 0;
3299 }
3300
3301
3302 # consider check key is defined before deleting?
3303 # Loop through the metadata array and ignore the specified position
3304 my $filtered_metadata = [];
3305 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
3306 for (my $i=0; $i<$num_metadata_vals; $i++) {
3307 my $metaval = shift(@{$doc_rec->{$metaname}});
3308
3309 if (!defined $metavalue && $i != $metapos) {
3310 push(@$filtered_metadata,$metaval);
3311 }
3312
3313 if(defined $metavalue && !($metavalue eq $metaval))
3314 {
3315 push(@$filtered_metadata,$metaval);
3316 }
3317 }
3318 $doc_rec->{$metaname} = $filtered_metadata;
3319
3320 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3321 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3322
3323 if ($status != 0) {
3324 my $mess = "Failed to set metadata key: $docid\n";
3325
3326 $mess .= "PATH: $ENV{'PATH'}\n";
3327 $mess .= "Exit status: $status\n";
3328 $mess .= "System Error Message: $!\n";
3329
3330 $gsdl_cgi->generate_error($mess);
3331 }
3332 else {
3333 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
3334 $mess .= " $metaname";
3335 $mess .= "->[$metapos]" if (defined $metapos);
3336 $mess .= " ($metavalue)" if (defined $metavalue);
3337
3338 $gsdl_cgi->generate_ok_message($mess);
3339 }
3340
3341 #return $status; # in case calling functions have a use for this
3342}
3343
3344sub remove_index_metadata
3345{
3346 my $self = shift @_;
3347
3348 my $username = $self->{'username'};
3349 my $collect = $self->{'collect'};
3350 my $gsdl_cgi = $self->{'gsdl_cgi'};
3351# my $gsdlhome = $self->{'gsdlhome'};
3352
3353 if ($baseaction::authentication_enabled) {
3354 # Ensure the user is allowed to edit this collection
3355 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3356 }
3357
3358 # Obtain the collect dir
3359 my $site = $self->{'site'};
3360 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3361 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3362
3363 # Make sure the collection isn't locked by someone else
3364 $self->lock_collection($username, $collect);
3365
3366 $self->_remove_index_metadata(@_);
3367
3368 # Release the lock once it is done
3369 $self->unlock_collection($username, $collect);
3370}
3371
3372
3373# Was trying to reused the codes, but the functions need to be broken
3374# down more before they can be reused, otherwise there will be too
3375# much overhead and duplicate process...
3376sub insert_metadata
3377{
3378 my $self = shift @_;
3379
3380 my $username = $self->{'username'};
3381 my $collect = $self->{'collect'};
3382 my $gsdl_cgi = $self->{'gsdl_cgi'};
3383 my $gsdlhome = $self->{'gsdlhome'};
3384 my $infodbtype = $self->{'infodbtype'};
3385
3386 # If the import metadata and gdbm database have been updated, we
3387 # need to insert some notification to warn user that the the text
3388 # they see at the moment is not indexed and require a rebuild.
3389 my $rebuild_pending_macro = "_rebuildpendingmessage_";
3390
3391 if ($baseaction::authentication_enabled) {
3392 # Ensure the user is allowed to edit this collection
3393 $self->authenticate_user($username, $collect);
3394 }
3395
3396 # Obtain the collect and archive dir
3397 my $site = $self->{'site'};
3398 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3399 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3400 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3401
3402 # Make sure the collection isn't locked by someone else
3403 $self->lock_collection($username, $collect);
3404
3405 # Check additional args
3406 my $docid = $self->{'d'};
3407 if (!defined($docid) || ($docid =~ m/^\s*$/)) {
3408 $gsdl_cgi->generate_error("No document id is specified: d=...");
3409 }
3410 my $metaname = $self->{'metaname'};
3411 if (!defined($metaname) || ($metaname =~ m/^\s*$/)) {
3412 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3413 }
3414 my $metavalue = $self->{'metavalue'};
3415 if (!defined($metavalue) || ($metavalue =~ m/^\s*$/)) {
3416 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3417 }
3418 # make "accumulate" the default (less destructive, as it won't actually
3419 # delete any existing values)
3420 my $metamode = "accumulate";
3421
3422 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3423 my $metapos = undef;
3424 my $prevmetavalue = undef;
3425
3426 #=======================================================================#
3427 # set_import_metadata [START]
3428 #=======================================================================#
3429 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3430 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3431 my $metadata_xml_file;
3432 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3433 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3434
3435 # This now stores the full pathname
3436 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3437 $import_filename = &util::placeholders_to_abspath($import_filename);
3438
3439 # figure out correct metadata.xml file [?]
3440 # Assuming the metadata.xml file is next to the source file
3441 # Note: This will not work if it is using the inherited metadata from the parent folder
3442 my ($import_tailname, $import_dirname)
3443 = File::Basename::fileparse($import_filename);
3444 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3445
3446 # Shane's escape characters
3447 $metavalue = pack "U0C*", unpack "C*", $metavalue;
3448 $metavalue =~ s/\,/&#44;/g;
3449 $metavalue =~ s/\:/&#58;/g;
3450 $metavalue =~ s/\|/&#124;/g;
3451 $metavalue =~ s/\(/&#40;/g;
3452 $metavalue =~ s/\)/&#41;/g;
3453 $metavalue =~ s/\[/&#91;/g;
3454 $metavalue =~ s/\\/&#92;/g;
3455 $metavalue =~ s/\]/&#93;/g;
3456 $metavalue =~ s/\{/&#123;/g;
3457 $metavalue =~ s/\}/&#125;/g;
3458 $metavalue =~ s/\"/&#34;/g;
3459 $metavalue =~ s/\`/&#96;/g;
3460 $metavalue =~ s/\n/_newline_/g;
3461
3462 # Edit the metadata.xml
3463 # Modified by Jeffrey from DL Consulting
3464 # Handle the case where there is one metadata.xml file for multiple FileSets
3465 # The XML filter needs to know whether it is in the right FileSet
3466 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3467 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3468 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3469 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3470 #=======================================================================#
3471 # set_import_metadata [END]
3472 #=======================================================================#
3473
3474
3475 #=======================================================================#
3476 # set_metadata (accumulate version) [START]
3477 #=======================================================================#
3478 # To people who know $collect_tail please add some comments
3479 # Obtain path to the database
3480 my $collect_tail = $collect;
3481 $collect_tail =~ s/^.*[\/|\\]//;
3482 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3483 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3484
3485 # Read the docid entry
3486 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3487
3488 # Protect the quotes
3489 $metavalue =~ s/\"/\\\"/g;
3490
3491 # Adds the pending macro
3492 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3493
3494 # If the metadata doesn't exist, create a new one
3495 if (!defined($doc_rec->{$metaname})){
3496 $doc_rec->{$metaname} = [ $macro_metavalue ];
3497 }
3498 # Else, let's acculumate the values
3499 else {
3500 push(@{$doc_rec->{$metaname}},$macro_metavalue);
3501 }
3502
3503 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3504 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3505
3506 if ($status != 0) {
3507 # Catch error if gdbmget failed
3508 my $mess = "Failed to set metadata key: $docid\n";
3509
3510 $mess .= "PATH: $ENV{'PATH'}\n";
3511 $mess .= "Exit status: $status\n";
3512 $mess .= "System Error Message: $!\n";
3513
3514 $gsdl_cgi->generate_error($mess);
3515 }
3516 else {
3517 my $mess = "insert-metadata successful: Key[$docid]\n";
3518 $mess .= " [In metadata.xml] $metaname";
3519 $mess .= " = $metavalue\n";
3520 $mess .= " [In database] $metaname";
3521 $mess .= " = $macro_metavalue\n";
3522 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3523 $gsdl_cgi->generate_ok_message($mess);
3524 }
3525 #=======================================================================#
3526 # set_metadata (accumulate version) [END]
3527 #=======================================================================#
3528
3529 # Release the lock once it is done
3530 $self->unlock_collection($username, $collect);
3531}
3532
35331;
Note: See TracBrowser for help on using the repository browser.