source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 29099

Last change on this file since 29099 was 29099, checked in by ak19, 10 years ago

Cosmetic change after more important commit.

File size: 119.3 KB
Line 
1##########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ "metapos" ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-index-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
67
68 "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69 'compulsory-args' => [ "json" ],
70 'optional-args' => [ "where" ],
71 'help-string' => [
72 'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73 ]},
74
75 #SET METHODS
76 "set-live-metadata" => {
77 'compulsory-args' => [ "d", "metaname", "metavalue" ],
78 'optional-args' => [ ] },
79
80 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81 'compulsory-args' => [ "metaname", "metavalue" ],
82 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84 "set-index-metadata" => {
85 'compulsory-args' => [ "d", "metaname", "metavalue" ],
86 'optional-args' => [ "metapos", "metamode" ] },
87
88 "set-archives-metadata" => {
89 'compulsory-args' => [ "d", "metaname", "metavalue" ],
90 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91
92 "set-import-metadata" => {
93 'compulsory-args' => [ "metaname", "metavalue" ],
94 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
95
96 #SET METHODS (ARRAY)
97 "set-metadata-array" => {
98 'compulsory-args' => [ "where", "json" ],
99 'optional-args' => [ ],
100 'help-string' => [
101 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102
103 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
110
111 "set-archives-metadata-array" => {
112 'compulsory-args' => [ "json" ],
113 'optional-args' => [ ] },
114
115 "set-import-metadata-array" => {
116 'compulsory-args' => [ "json" ],
117 'optional-args' => [ ] },
118
119 "set-index-metadata-array" => {
120 'compulsory-args' => [ "json" ],
121 'optional-args' => [ ] },
122
123 "set-live-metadata-array" => {
124 'compulsory-args' => [ "json" ],
125 'optional-args' => [ ] },
126
127 #REMOVE METHODS
128 "remove-import-metadata" => {
129 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
130 'optional-args' => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
131
132 "remove-archives-metadata" => {
133 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
134 'optional-args' => [ "metapos", "metavalue" ] },
135
136 "remove-live-metadata" => {
137 'compulsory-args' => [ "d", "metaname" ],
138 'optional-args' => [ ] },
139
140 "remove-index-metadata" => {
141 'compulsory-args' => [ "d", "metaname" ],
142 'optional-args' => [ "metapos", "metavalue" ] },
143
144 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
145 'compulsory-args' => [ "d", "metaname" ],
146 'optional-args' => [ "where", "metapos", "metavalue" ] },
147
148 #INSERT METHODS
149 "insert-metadata" => {
150 'compulsory-args' => [ "d", "metaname", "metavalue" ],
151 'optional-args' => [ ] }
152};
153
154
155sub new
156{
157 my $class = shift (@_);
158 my ($gsdl_cgi,$iis6_mode) = @_;
159
160 # Treat metavalue specially. To transmit this through a GET request
161 # the Javascript side has url-encoded it, so here we need to decode
162 # it before proceeding
163
164 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
165 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
166 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
167
168 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
169 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
170
171 # need to do the same with prevmetavalue
172 my $url_encoded_prevmetavalue = $gsdl_cgi->param("prevmetavalue");
173 my $url_decoded_prevmetavalue = &unicode::url_decode($url_encoded_prevmetavalue,1);
174 my $prevunicode_array = &unicode::utf82unicode($url_decoded_prevmetavalue);
175
176 $url_decoded_prevmetavalue = join("",map(chr($_),@$prevunicode_array));
177 $gsdl_cgi->param("prevmetavalue",$url_decoded_prevmetavalue);
178
179 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
180
181 return bless $self, $class;
182}
183
184
185sub get_live_metadata
186{
187 my $self = shift @_;
188
189 my $username = $self->{'username'};
190 my $collect = $self->{'collect'};
191 my $gsdl_cgi = $self->{'gsdl_cgi'};
192 my $gsdlhome = $self->{'gsdlhome'};
193 my $infodbtype = $self->{'infodbtype'};
194
195 # live metadata gets/saves value scoped (prefixed) by the current usename
196 # so (for now) let's not bother to enforce authentication
197
198 # Obtain the collect dir
199 my $site = $self->{'site'};
200 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
201 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
202
203 # No locking collection when getting metadata, only when setting it
204# $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
205
206 # look up additional args
207 my $docid = $self->{'d'};
208 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
209 $gsdl_cgi->generate_error("No docid (d=...) specified.");
210 }
211
212 # Generate the dbkey
213 my $metaname = $self->{'metaname'};
214 my $dbkey = "$docid.$metaname";
215
216 # To people who know $collect_tail please add some comments
217 # Obtain path to the database
218 my $collect_tail = $collect;
219 $collect_tail =~ s/^.*[\/|\\]//;
220 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
221 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
222
223 # Obtain the content of the key
224 my $cmd = "gdbmget $infodb_file_path $dbkey";
225 if (open(GIN,"$cmd |") == 0) {
226 # Catch error if gdbmget failed
227 my $mess = "Failed to get metadata key: $metaname\n";
228 $mess .= "$!\n";
229
230 $gsdl_cgi->generate_error($mess);
231 }
232 else {
233 binmode(GIN,":utf8");
234 # Read everything in and concatenate them into $metavalue
235 my $metavalue = "";
236 my $line;
237 while (defined ($line=<GIN>)) {
238 $metavalue .= $line;
239 }
240 close(GIN);
241 chomp($metavalue); # Get rid off the tailing newlines
242 $gsdl_cgi->generate_ok_message("$metavalue");
243 }
244
245 # Release the lock once it is done
246# $self->unlock_collection($username, $collect);
247}
248
249# just calls the index version
250sub get_metadata
251{
252 my $self = shift @_;
253 $self->get_index_metadata(@_);
254}
255
256# JSON version that will get the requested metadata values
257# from the requested source (index, import, archives or live)
258# One of the params is a JSON string and the return value is JSON too
259# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
260sub get_metadata_array
261{
262 my $self = shift @_;
263
264 my $where = $self->{'where'};
265 if (!$where) {
266 $where = "index"; # default behaviour is to get the values from index
267 }
268
269 # Only when setting metadata do we perform authentication and do we lock the collection,
270 # not when getting metadata
271
272 # for get_meta_array, the where param can only be ONE of import, archives, index, live
273 if($where =~ m/index/) {
274 $self->_get_index_metadata_array(@_);
275 }
276 elsif($where =~ m/archives/) {
277 $self->_get_archives_metadata_array(@_);
278 }
279 elsif($where =~ m/import/) {
280 $self->_get_import_metadata_array(@_);
281 }
282 elsif($where =~ m/live/) {
283 $self->_get_live_metadata_array(@_);
284 }
285}
286
287# Unused at present. Added for completion. Tested.
288sub _get_import_metadata_array {
289
290 my $self = shift @_;
291
292 my $collect = $self->{'collect'};
293 my $gsdl_cgi = $self->{'gsdl_cgi'};
294 my $site = $self->{'site'};
295 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
296
297 # look up additional args
298 my $infodbtype = $self->{'infodbtype'};
299
300 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
301 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
302 my $json_str = $self->{'json'};
303 my $doc_array = decode_json $json_str;
304
305 my $json_result_str = "[";
306 my $first_doc_rec = 1;
307 foreach my $doc_array_rec ( @$doc_array ) {
308
309 my $docid = $doc_array_rec->{'docid'}; # no subsection metadata support in metadata.xml, only toplevel meta
310
311 if($first_doc_rec) {
312 $first_doc_rec = 0;
313 } else {
314 $json_result_str .= ",";
315 }
316 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
317
318 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
319 $json_result_str = $json_result_str . ",\"metatable\":[";
320
321 my $first_rec = 1;
322 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
323
324 # Read the docid entry
325 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
326 # This now stores the full pathname
327 my $import_filename = $doc_rec->{'src-file'}->[0];
328 $import_filename = &util::placeholders_to_abspath($import_filename);
329
330 # figure out correct metadata.xml file [?]
331 # Assuming the metadata.xml file is next to the source file
332 # Note: This will not work if it is using the inherited metadata from the parent folder
333 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
334 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
335
336
337 if($first_rec) {
338 $first_rec = 0;
339 } else {
340 $json_result_str .= ",";
341 }
342
343 my $metaname = $metatable_rec->{'metaname'};
344 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
345
346 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
347 if(!defined $metapos) {
348 $metapos = 0;
349 }
350
351 # Obtain the specified metadata value(s)
352 my $metavalue;
353
354 if($metapos ne "all") { # get the value at a single metapos
355 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
356
357 #print STDERR "**** Metafilename, metaname, metapos, sec_num: $metadata_xml_filename, $metaname, $metapos, $import_tailname\n";
358
359 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
360
361 } else {
362 my $first_metaval = 1;
363 $metapos = 0;
364 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
365
366 while (defined $metavalue && $metavalue ne "") {
367 if($first_metaval) {
368 $first_metaval = 0;
369 } else {
370 $json_result_str .= ",";
371 }
372
373 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
374
375 $metapos++;
376 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
377 }
378 }
379
380 $json_result_str .= "]}"; # close metavals array and metatable record
381 }
382
383 $json_result_str .= "]}"; # close metatable array and docid record
384 }
385
386 $json_result_str .= "]"; # close array of docids
387 $gsdl_cgi->generate_ok_message($json_result_str."\n");
388}
389
390# Unused method, but included for completion. Tested, works. Takes a JSON string and returns a JSON string.
391# For more information on the format of the output, see _get_index_metadata_array, which is in use.
392sub _get_archives_metadata_array {
393
394 my $self = shift @_;
395
396 my $collect = $self->{'collect'};
397 my $gsdl_cgi = $self->{'gsdl_cgi'};
398 my $site = $self->{'site'};
399 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
400
401 # look up additional args
402 my $infodbtype = $self->{'infodbtype'};
403
404 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
405 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
406
407 my $json_str = $self->{'json'};
408 my $doc_array = decode_json $json_str;
409
410 my $json_result_str = "[";
411 my $first_doc_rec = 1;
412 foreach my $doc_array_rec ( @$doc_array ) {
413
414 my $docid = $doc_array_rec->{'docid'};
415
416 if($first_doc_rec) {
417 $first_doc_rec = 0;
418 } else {
419 $json_result_str .= ",";
420 }
421 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
422
423 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
424 $json_result_str = $json_result_str . ",\"metatable\":[";
425
426 my $first_rec = 1;
427 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
428
429 my ($docid, $docid_secnum) = ($doc_array_rec->{'docid'} =~ m/^(.*?)(\..*)?$/);
430 $docid_secnum = "" if (!defined $docid_secnum);
431
432 # Read the docid entry
433 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
434 # This now stores the full pathname
435 my $doc_filename = $doc_rec->{'doc-file'}->[0];
436 $doc_filename = &util::filename_cat($archive_dir, $doc_filename);
437
438 if($first_rec) {
439 $first_rec = 0;
440 } else {
441 $json_result_str .= ",";
442 }
443
444 my $metaname = $metatable_rec->{'metaname'};
445 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
446
447 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
448 if(!defined $metapos) {
449 $metapos = 0;
450 }
451
452
453 # Obtain the specified metadata value(s)
454 my $metavalue;
455
456 if($metapos ne "all") { # get the value at a single metapos
457
458 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
459 #print STDERR "**** Docname, metaname, metapos, sec_num: $doc_filename, $metaname, $metapos, $docid_secnum\n";
460
461 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
462
463 } else {
464 my $first_metaval = 1;
465 $metapos = 0;
466 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
467
468 while (defined $metavalue && $metavalue ne "") {
469 if($first_metaval) {
470 $first_metaval = 0;
471 } else {
472 $json_result_str .= ",";
473 }
474
475 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
476
477 $metapos++;
478 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
479 }
480 }
481
482 $json_result_str .= "]}"; # close metavals array and metatable record
483 }
484
485 $json_result_str .= "]}"; # close metatable array and docid record
486 }
487
488 $json_result_str .= "]"; # close array of docids
489 $gsdl_cgi->generate_ok_message($json_result_str."\n");
490}
491
492
493# Unused at present. Added for completion. Tested, but not sure if it retrieves metadata in the manner it's expected to.
494sub _get_live_metadata_array
495{
496 my $self = shift @_;
497
498 my $collect = $self->{'collect'};
499 my $gsdl_cgi = $self->{'gsdl_cgi'};
500 my $site = $self->{'site'};
501 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
502
503 # look up additional args
504 my $infodbtype = $self->{'infodbtype'};
505
506 # To people who know $collect_tail please add some comments
507 # Obtain the path to the database
508 my $collect_tail = $collect;
509 $collect_tail =~ s/^.*[\/|\\]//;
510 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
511 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
512
513 my $json_str = $self->{'json'};
514 my $doc_array = decode_json $json_str;
515
516 my $json_result_str = "[";
517 my $first_doc_rec = 1;
518
519 foreach my $doc_array_rec ( @$doc_array ) {
520
521 my $docid = $doc_array_rec->{'docid'};
522
523 if($first_doc_rec) {
524 $first_doc_rec = 0;
525 } else {
526 $json_result_str .= ",";
527 }
528 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
529
530 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
531 $json_result_str = $json_result_str . ",\"metatable\":[";
532
533 my $first_rec = 1;
534 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
535 if($first_rec) {
536 $first_rec = 0;
537 } else {
538 $json_result_str .= ",";
539 }
540
541 my $metaname = $metatable_rec->{'metaname'};
542 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
543
544 # Generate the dbkey
545 my $dbkey = "$docid.$metaname";
546
547 # metapos for get_live_metadata is always assumed to be "all".
548 # It's always going to get all the lines of metavalues associated with a metaname
549 # (It's the metaname itself that should contain an increment number, if there are to be multiple values).
550 #my $metapos = "all";
551 my $metapos = $metatable_rec->{'metapos'} || 0; # Can be 0... 1|all|undefined. Defaults to 0 if undefined/false
552 my $metavalue = "";
553
554 # Obtain the content of the key
555 my $cmd = "gdbmget $infodb_file_path $dbkey";
556 if (open(GIN,"$cmd |") != 0) { # Success.
557
558 binmode(GIN,":utf8");
559 # Read everything in and concatenate them into $metavalue
560 my $line;
561 my $first_metaval = 1;
562 my $pos = 0;
563 while (defined ($line=<GIN>)) {
564 chomp($line); # Get rid off the tailing newlines
565
566 if($metapos eq "all") {
567 if($first_metaval) {
568 $first_metaval = 0;
569 } else {
570 $json_result_str .= ",";
571 }
572 $metavalue = $line;
573 $json_result_str .= "{\"metapos\":\"$pos\",\"metavalue\":\"$metavalue\"}";
574 } elsif($metapos == $pos) {
575 $metavalue = $line;
576 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
577 last;
578 } # else, the current $pos is not the required $metapos
579 $pos += 1;
580 }
581 close(GIN);
582 } # else open cmd == 0 (failed) and metavals array will be empty [] for this metaname
583
584 $json_result_str .= "]}"; # close metavals array and metatable record
585 }
586
587 $json_result_str .= "]}"; # close metatable array and docid record
588 }
589
590 $json_result_str .= "]"; # close array of docids
591
592 $gsdl_cgi->generate_ok_message($json_result_str."\n");
593}
594
595
596# Takes a JSON string and returns a JSON string
597# Request string is of the form:
598# http://localhost:8283/greenstone/cgi-bin/metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]
599# Resulting string is of the form:
600# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
601sub _get_index_metadata_array
602{
603 my $self = shift @_;
604
605 my $collect = $self->{'collect'};
606 my $gsdl_cgi = $self->{'gsdl_cgi'};
607 my $site = $self->{'site'};
608 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
609
610 # look up additional args
611 my $infodbtype = $self->{'infodbtype'};
612
613 # To people who know $collect_tail please add some comments
614 # Obtain the path to the database
615 my $collect_tail = $collect;
616 $collect_tail =~ s/^.*[\/|\\]//;
617 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
618 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
619
620 my $json_str = $self->{'json'};
621 my $doc_array = decode_json $json_str;
622
623 my $json_result_str = "[";
624 my $first_doc_rec = 1;
625
626 foreach my $doc_array_rec ( @$doc_array ) {
627
628 my $docid = $doc_array_rec->{'docid'};
629
630 if($first_doc_rec) {
631 $first_doc_rec = 0;
632 } else {
633 $json_result_str .= ",";
634 }
635 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
636
637 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
638 $json_result_str = $json_result_str . ",\"metatable\":[";
639
640 my $first_rec = 1;
641 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
642 if($first_rec) {
643 $first_rec = 0;
644 } else {
645 $json_result_str .= ",";
646 }
647
648 my $metaname = $metatable_rec->{'metaname'};
649 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
650
651 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
652 if(!defined $metapos) {
653 $metapos = 0;
654 }
655
656 # Read the docid entry
657 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
658
659 # Basically loop through and unescape_html the values
660 foreach my $k (keys %$doc_rec) {
661 my @escaped_v = ();
662 foreach my $v (@{$doc_rec->{$k}}) {
663 my $ev = &ghtml::unescape_html($v);
664 push(@escaped_v, $ev);
665 }
666 $doc_rec->{$k} = \@escaped_v;
667 }
668
669 # Obtain the specified metadata value(s)
670 my $metavalue;
671
672 if($metapos ne "all") { # get the value at a single metapos
673
674 $metavalue = $doc_rec->{$metaname}->[$metapos];
675
676 # protect any double quotes and colons in the metavalue before putting it into JSON
677 $metavalue =~ s/\"/&quot;/g if defined $metavalue;
678 $metavalue =~ s/\:/&58;/g if defined $metavalue;
679
680 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
681
682 } else {
683 my $first_metaval = 1;
684 $metapos = 0;
685 $metavalue = $doc_rec->{$metaname}->[$metapos];
686
687 while (defined $metavalue) {
688
689 # protect any double quotes and colons in the metavalue before putting it into JSON
690 $metavalue =~ s/\"/&quot;/g;
691 $metavalue =~ s/\:/&58;/g;
692
693 if($first_metaval) {
694 $first_metaval = 0;
695 } else {
696 $json_result_str .= ",";
697 }
698
699 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
700
701 $metapos++;
702 $metavalue = $doc_rec->{$metaname}->[$metapos];
703 }
704 }
705
706 $json_result_str .= "]}"; # close metavals array and metatable record
707 }
708
709 $json_result_str .= "]}"; # close metatable array and docid record
710 }
711
712 $json_result_str .= "]"; # close array of docids
713
714 $gsdl_cgi->generate_ok_message($json_result_str."\n");
715}
716
717
718sub get_index_metadata
719{
720 my $self = shift @_;
721
722 my $username = $self->{'username'};
723 my $collect = $self->{'collect'};
724 my $gsdl_cgi = $self->{'gsdl_cgi'};
725 my $gsdlhome = $self->{'gsdlhome'};
726
727 # Obtain the collect dir
728 my $site = $self->{'site'};
729 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
730 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
731
732 # look up additional args
733 my $docid = $self->{'d'};
734 my $metaname = $self->{'metaname'};
735 my $metapos = $self->{'metapos'};
736 my $infodbtype = $self->{'infodbtype'};
737
738 # To people who know $collect_tail please add some comments
739 # Obtain path to the database
740 my $collect_tail = $collect;
741 $collect_tail =~ s/^.*[\/|\\]//;
742 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
743 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
744
745 # Read the docid entry
746 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
747
748 # Basically loop through and unescape_html the values
749 foreach my $k (keys %$doc_rec) {
750 my @escaped_v = ();
751 foreach my $v (@{$doc_rec->{$k}}) {
752 my $ev = &ghtml::unescape_html($v);
753 push(@escaped_v, $ev);
754 }
755 $doc_rec->{$k} = \@escaped_v;
756 }
757
758 # Obtain the specified metadata value
759 $metapos = 0 if (!defined $metapos);
760 my $metavalue = $doc_rec->{$metaname}->[$metapos];
761 $gsdl_cgi->generate_ok_message("$metavalue");
762
763}
764
765
766sub get_import_metadata
767{
768 my $self = shift @_;
769
770 my $username = $self->{'username'};
771 my $collect = $self->{'collect'};
772 my $gsdl_cgi = $self->{'gsdl_cgi'};
773 my $gsdlhome = $self->{'gsdlhome'};
774
775 # Obtain the collect dir
776 my $site = $self->{'site'};
777 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
778 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
779
780 # look up additional args
781 my $docid = $self->{'d'};
782 my $metaname = $self->{'metaname'};
783 my $metapos = $self->{'metapos'};
784 $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
785
786 my $infodbtype = $self->{'infodbtype'};
787 if (!defined $docid)
788 {
789 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
790 }
791
792 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
793 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
794 my $metadata_xml_file;
795 my $import_filename = undef;
796
797
798 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
799 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
800 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
801
802 # This now stores the full pathname
803 $import_filename = $doc_rec->{'src-file'}->[0];
804 $import_filename = &util::placeholders_to_abspath($import_filename);
805
806 # figure out correct metadata.xml file [?]
807 # Assuming the metadata.xml file is next to the source file
808 # Note: This will not work if it is using the inherited metadata from the parent folder
809 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
810 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
811
812 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
813
814}
815
816sub get_metadata_from_metadata_xml
817{
818 my $self = shift @_;
819 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
820
821 my @rules =
822 (
823 _default => 'raw',
824 'Metadata' => \&gfmxml_metadata,
825 'FileName' => \&mxml_filename
826 );
827
828 my $parser = XML::Rules->new
829 (
830 rules => \@rules,
831 output_encoding => 'utf8'
832 );
833
834 my $xml_in = "";
835 if (!open(MIN,"<$metadata_xml_filename"))
836 {
837 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
838 }
839 else
840 {
841 # Read them in
842 my $line;
843 while (defined ($line=<MIN>)) {
844 $xml_in .= $line;
845 }
846 close(MIN);
847
848 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
849
850 if(defined $parser->{'pad'}->{'metavalue'})
851 {
852 return $parser->{'pad'}->{'metavalue'};
853 }
854 else
855 {
856 return "";
857 }
858 }
859}
860
861sub gfmxml_metadata
862{
863 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
864
865 # no subsection support yet in metadata.xml
866
867 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
868 {
869 if (!defined $parser->{'parameters'}->{'poscount'})
870 {
871 $parser->{'parameters'}->{'poscount'} = 0;
872 }
873 else
874 {
875 $parser->{'parameters'}->{'poscount'}++;
876 }
877
878 # gets the first value by default, since metapos defaults to 0
879 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
880 {
881 if($parser->{'parameters'}->{'metapos'} > 0) {
882 print STDERR "@@@@ WARNING: non-zero metapos.\n";
883 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at position: ".$parser->{'parameters'}->{'metapos'}.".\n";
884 }
885 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
886 }
887 }
888}
889
890sub get_archives_metadata
891{
892 my $self = shift @_;
893
894 my $username = $self->{'username'};
895 my $collect = $self->{'collect'};
896 my $gsdl_cgi = $self->{'gsdl_cgi'};
897# my $gsdlhome = $self->{'gsdlhome'};
898 my $infodbtype = $self->{'infodbtype'};
899
900 # Obtain the collect dir
901 my $site = $self->{'site'};
902 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
903
904 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
905
906 # look up additional args
907 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
908 $docid_secnum = "" if (!defined $docid_secnum);
909
910 my $metaname = $self->{'metaname'};
911 my $metapos = $self->{'metapos'};
912 $metapos = 0 if (!defined $metapos);
913
914 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
915 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
916
917 # This now stores the full pathname
918 my $doc_filename = $doc_rec->{'doc-file'}->[0];
919
920 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
921
922}
923
924sub get_metadata_from_archive_xml
925{
926 my $self = shift @_;
927 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
928
929 my @start_rules = ('Section' => \&dxml_start_section);
930
931 my @rules =
932 (
933 _default => 'raw',
934 'Metadata' => \&gfdxml_metadata
935 );
936
937 my $parser = XML::Rules->new
938 (
939 start_rules => \@start_rules,
940 rules => \@rules,
941 output_encoding => 'utf8'
942 );
943
944 my $xml_in = "";
945 if (!open(MIN,"<$doc_xml_filename"))
946 {
947 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
948 }
949 else
950 {
951 # Read them in
952 my $line;
953 while (defined ($line=<MIN>)) {
954 $xml_in .= $line;
955 }
956 close(MIN);
957
958 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
959
960 if(defined $parser->{'pad'}->{'metavalue'})
961 {
962 return $parser->{'pad'}->{'metavalue'};
963 }
964 else
965 {
966 return "";
967 }
968 }
969}
970
971sub gfdxml_metadata
972{
973 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
974
975 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
976 {
977 return;
978 }
979
980 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
981 {
982 if (!defined $parser->{'parameters'}->{'poscount'})
983 {
984 $parser->{'parameters'}->{'poscount'} = 0;
985 }
986 else
987 {
988 $parser->{'parameters'}->{'poscount'}++;
989 }
990 }
991
992 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
993 {
994 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
995 }
996}
997
998sub _set_live_metadata
999{
1000 my $self = shift @_;
1001
1002 my $collect = $self->{'collect'};
1003 my $gsdl_cgi = $self->{'gsdl_cgi'};
1004 #my $gsdlhome = $self->{'gsdlhome'};
1005 my $infodbtype = $self->{'infodbtype'};
1006
1007 # Obtain the collect dir
1008 my $site = $self->{'site'};
1009 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1010 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1011
1012
1013 # look up additional args
1014 my $docid = $self->{'d'};
1015 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1016 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1017 }
1018 my $metavalue = $self->{'metavalue'};
1019
1020 # Generate the dbkey
1021 my $metaname = $self->{'metaname'};
1022 my $dbkey = "$docid.$metaname";
1023
1024 # To people who know $collect_tail please add some comments
1025 # Obtain path to the database
1026 my $collect_tail = $collect;
1027 $collect_tail =~ s/^.*[\/|\\]//;
1028 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1029 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1030
1031 # Set the new value
1032 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1033 my $status = system($cmd);
1034 if ($status != 0) {
1035 # Catch error if gdbmget failed
1036 my $mess = "Failed to set metadata key: $dbkey\n";
1037
1038 $mess .= "PATH: $ENV{'PATH'}\n";
1039 $mess .= "cmd = $cmd\n";
1040 $mess .= "Exit status: $status\n";
1041 $mess .= "System Error Message: $!\n";
1042
1043 $gsdl_cgi->generate_error($mess);
1044 }
1045 else {
1046 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
1047 }
1048
1049 #return $status; # in case calling functions have any further use for this
1050}
1051
1052sub set_live_metadata
1053{
1054 my $self = shift @_;
1055
1056 my $username = $self->{'username'};
1057 my $collect = $self->{'collect'};
1058 my $gsdl_cgi = $self->{'gsdl_cgi'};
1059
1060 if ($baseaction::authentication_enabled) {
1061 # Ensure the user is allowed to edit this collection
1062 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1063 }
1064
1065 # Make sure the collection isn't locked by someone else
1066 $self->lock_collection($username, $collect);
1067
1068 $self->_set_live_metadata(@_);
1069
1070 # Release the lock once it is done
1071 $self->unlock_collection($username, $collect);
1072}
1073
1074sub set_index_metadata_entry
1075{
1076 print STDERR "1\n";
1077 my $self = shift @_;
1078 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
1079
1080 # To people who know $collect_tail please add some comments
1081 # Obtain path to the database
1082 my $collect_tail = $collect;
1083 $collect_tail =~ s/^.*[\/|\\]//;
1084 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1085 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1086
1087 print STDERR "2\n";
1088# print STDERR "**** infodb file path = $infodb_file_path\n";
1089# print STDERR "***** infodb type = $infodbtype\n";
1090
1091 # Read the docid entry
1092 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1093
1094 # Set the metadata value
1095 if (defined $metapos) {
1096 print STDERR "3\n";
1097 # if metamode=accumulate AND metapos, warn user and then use metapos
1098 if (defined $metamode && $metamode eq "accumulate") {
1099 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
1100 print STDERR "**** Proceeding by using metapos\n";
1101 }
1102 $doc_rec->{$metaname}->[$metapos] = $metavalue;
1103 }
1104 elsif (defined $prevmetavalue) {
1105 print STDERR "4\n";
1106 my $array = $doc_rec->{$metaname};
1107 my $length = @$array;
1108
1109 my $found = 0;
1110 for (my $i = 0; $i < $length; $i++){
1111 if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
1112 $doc_rec->{$metaname}->[$i] = $metavalue;
1113 $found = 1;
1114 last;
1115 }
1116 }
1117
1118 if($found == 0){
1119 $doc_rec->{$metaname} = [ $metavalue ];
1120 }
1121 }
1122 elsif (defined $metamode && $metamode eq "override") {
1123 print STDERR "5\n";
1124 $doc_rec->{$metaname} = [ $metavalue ];
1125 }
1126 else { # default for index was to override, but because accumulate is less destructive,
1127 # and because accumulate is the default for archives and import, that's the new default for index too
1128 print STDERR "6\n";
1129 if(defined $doc_rec->{$metaname}) {
1130 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
1131 } else {
1132 $doc_rec->{$metaname} = [ $metavalue ];
1133 }
1134 }
1135 print STDERR "6\n";
1136
1137 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
1138
1139 return $status;
1140
1141}
1142
1143sub _set_import_metadata
1144{
1145 my $self = shift @_;
1146
1147 my $collect = $self->{'collect'};
1148 my $gsdl_cgi = $self->{'gsdl_cgi'};
1149 my $infodbtype = $self->{'infodbtype'};
1150# my $gsdlhome = $self->{'gsdlhome'};
1151
1152 # Obtain the collect and archive dir
1153 my $site = $self->{'site'};
1154 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1155 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1156 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1157
1158 # look up additional args
1159 # want either d= or f=
1160 my $docid = $self->{'d'};
1161 my ($docid_root,$docid_secnum);
1162 if(defined $docid) {
1163 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1164 # as yet no support for setting subsection metadata in metadata.xml
1165 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
1166 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
1167 return;
1168 }
1169 }
1170
1171 my $import_file = $self->{'f'};
1172 if ((!defined $docid) && (!defined $import_file)) {
1173 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1174 }
1175
1176 # Get the parameters and set default mode to "accumulate"
1177 my $metaname = $self->{'metaname'};
1178 my $metavalue = $self->{'metavalue'};
1179## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1180 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1181
1182 my $metamode = $self->{'metamode'};
1183 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1184 # make "accumulate" the default (less destructive, as won't actually
1185 # delete any existing values)
1186 $metamode = "accumulate";
1187 }
1188
1189 # adding metapos and prevmetavalue support to import_metadata subroutines
1190 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1191 my $prevmetavalue = $self->{'prevmetavalue'};
1192
1193 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1194 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1195
1196 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1197 $mess .= " $metaname";
1198 $mess .= " = $metavalue";
1199 $mess .= " ($metamode)\n";
1200
1201 $gsdl_cgi->generate_ok_message($mess);
1202
1203 #return $status; # in case calling functions have any further use for this
1204}
1205
1206# the version of set_index_meta that doesn't do authentication
1207sub _set_archives_metadata
1208{
1209 my $self = shift @_;
1210
1211 my $collect = $self->{'collect'};
1212 my $gsdl_cgi = $self->{'gsdl_cgi'};
1213 my $infodbtype = $self->{'infodbtype'};
1214
1215 # Obtain the collect and archive dir
1216 my $site = $self->{'site'};
1217 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1218 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1219
1220 # look up additional args
1221 my $docid = $self->{'d'};
1222 my $metaname = $self->{'metaname'};
1223 my $metavalue = $self->{'metavalue'};
1224 my $prevmetavalue = $self->{'prevmetavalue'};
1225
1226 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1227 # Don't append "|| undef", since if metapos=0 it will then be set to undef
1228
1229 my $metamode = $self->{'metamode'};
1230 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1231 # make "accumulate" the default (less destructive, as won't actually
1232 # delete any existing values)
1233 $metamode = "accumulate";
1234 }
1235
1236 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1237 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1238
1239 if ($status == 0) {
1240 my $mess = "set-archives-metadata successful: Key[$docid]\n";
1241 $mess .= " $metaname";
1242 $mess .= "->[$metapos]" if (defined $metapos);
1243 $mess .= " = $metavalue";
1244 $mess .= " ($metamode)\n";
1245
1246 $gsdl_cgi->generate_ok_message($mess);
1247 }
1248 else {
1249 my $mess .= "Failed to set archives metadata key: $docid\n";
1250 $mess .= "Exit status: $status\n";
1251 if(defined $self->{'error_msg'}) {
1252 $mess .= "Error Message: $self->{'error_msg'}\n";
1253 } else {
1254 $mess .= "System Error Message: $!\n";
1255 }
1256 $mess .= "-" x 20 . "\n";
1257
1258 $gsdl_cgi->generate_error($mess);
1259 }
1260
1261 #return $status; # in case calling functions have any further use for this
1262}
1263
1264
1265# the version of set_index_meta that doesn't do authentication
1266sub _set_index_metadata
1267{
1268 print STDERR "START SET INDEX METADATA\n";
1269 my $self = shift @_;
1270
1271 my $collect = $self->{'collect'};
1272 my $gsdl_cgi = $self->{'gsdl_cgi'};
1273
1274 my $site = $self->{'site'};
1275 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1276
1277 # look up additional args
1278 my $docid = $self->{'d'};
1279 my $metaname = $self->{'metaname'};
1280 my $metapos = $self->{'metapos'}; # undef has meaning
1281 my $metavalue = $self->{'metavalue'};
1282 my $infodbtype = $self->{'infodbtype'};
1283 my $metamode = $self->{'metamode'};
1284 my $prevmetavalue = $self->{'prevmetavalue'};
1285
1286 print STDERR "SETTING INDEX METADATA ENTRY\n";
1287 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1288 print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
1289 if ($status != 0) {
1290 # Catch error if set infodb entry failed
1291 my $mess = "Failed to set metadata key: $docid\n";
1292
1293 $mess .= "PATH: $ENV{'PATH'}\n";
1294 $mess .= "Exit status: $status\n";
1295 $mess .= "System Error Message: $!\n";
1296
1297 $gsdl_cgi->generate_error($mess);
1298 }
1299 else {
1300 my $mess = "set-index-metadata successful: Key[$docid]\n";
1301 $mess .= " $metaname";
1302 $mess .= "->[$metapos]" if (defined $metapos);
1303 $mess .= " = $metavalue\n";
1304
1305 $gsdl_cgi->generate_ok_message($mess);
1306 }
1307
1308 print STDERR "END SET INDEX METADATA\n";
1309 #return $status; # in case calling functions have any further use for this
1310}
1311
1312sub set_index_metadata
1313{
1314 my $self = shift @_;
1315
1316 my $username = $self->{'username'};
1317 my $collect = $self->{'collect'};
1318 my $gsdl_cgi = $self->{'gsdl_cgi'};
1319 #my $gsdlhome = $self->{'gsdlhome'};
1320
1321 if ($baseaction::authentication_enabled) {
1322 # Ensure the user is allowed to edit this collection
1323 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1324 }
1325
1326 my $site = $self->{'site'};
1327 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1328
1329 $gsdl_cgi->checked_chdir($collect_dir);
1330
1331 # Obtain the collect dir
1332 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1333
1334 # Make sure the collection isn't locked by someone else
1335 $self->lock_collection($username, $collect);
1336
1337 $self->_set_index_metadata(@_);
1338
1339 # Release the lock once it is done
1340 $self->unlock_collection($username, $collect);
1341}
1342
1343# call this to set the metadata for a combination of dirs archives, import or index, or live
1344# if none specified, defaults to index which was the original behaviour of set_metadata.
1345sub set_metadata
1346{
1347 my $self = shift @_;
1348
1349 # Testing that not defining a variable, setting it to "" or to " " all return false
1350 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
1351
1352 my $where = $self->{'where'};
1353 if(!$where) {
1354 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
1355 return;
1356 }
1357
1358 # authenticate and lock collection once, even if processing multiple dirs
1359 my $username = $self->{'username'};
1360 my $collect = $self->{'collect'};
1361 my $gsdl_cgi = $self->{'gsdl_cgi'};
1362
1363 if ($baseaction::authentication_enabled) {
1364 # Ensure the user is allowed to edit this collection
1365 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1366 }
1367
1368 if($where =~ m/index/) {
1369 my $site = $self->{'site'};
1370 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1371 $gsdl_cgi->checked_chdir($collect_dir);
1372 }
1373
1374 # Make sure the collection isn't locked by someone else
1375 $self->lock_collection($username, $collect);
1376
1377
1378 # now at last can set the metadata. $where can specify multiple
1379 # $where is of the form: import|archives|index, or a subset thereof
1380
1381 #my @whichdirs = split('\|', $where);
1382
1383 # just check whether $where contains import/archives/index/live in turn, and
1384 # for each case, process it accordingly
1385 if($where =~ m/import/) {
1386 $self->_set_import_metadata(@_);
1387 }
1388
1389 if($where =~ m/archives/) {
1390
1391 # look up docID arg which is optional to set_metadata because it's optional
1392 # to set_import, but which is compulsory to set_archives_metadata
1393 my $docid = $self->{'d'};
1394 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1395 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1396 }
1397 # we have a docid, so can set archives meta
1398 $self->_set_archives_metadata(@_);
1399 }
1400
1401 if($where =~ m/index/) {
1402
1403 # look up docID arg which is optional to set_metadata because it's optional
1404 # to set_import, but which is compulsory to set_archives_metadata
1405 my $docid = $self->{'d'};
1406 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1407 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1408 }
1409 # we have a docid, so can set index meta
1410 $self->_set_index_metadata(@_);
1411 }
1412
1413 if($where =~ m/live/) {
1414 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1415 }
1416
1417 # Release the lock once it is done
1418 $self->unlock_collection($username, $collect);
1419}
1420
1421sub set_metadata_array
1422{
1423 my $self = shift @_;
1424
1425 my $where = $self->{'where'};
1426 if(!$where) {
1427 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1428 return;
1429 }
1430
1431 my $username = $self->{'username'};
1432 my $collect = $self->{'collect'};
1433 my $gsdl_cgi = $self->{'gsdl_cgi'};
1434
1435 if ($baseaction::authentication_enabled) {
1436 # Ensure the user is allowed to edit this collection
1437 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1438 }
1439
1440 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1441 # But including the stmt during this code reorganisation to preserve as-is what used to happen
1442 my $site = $self->{'site'};
1443 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1444 $gsdl_cgi->checked_chdir($collect_dir);
1445
1446 # Make sure the collection isn't locked by someone else
1447 $self->lock_collection($username, $collect);
1448
1449 if($where =~ m/import/) {
1450 $self->_set_import_metadata_array(@_);
1451 }
1452 if($where =~ m/archives/) {
1453 $self->_set_archives_metadata_array(@_);
1454 }
1455 if($where =~ m/index/) {
1456 $self->_set_index_metadata_array(@_);
1457 }
1458 if($where =~ m/live/) {
1459 $self->_set_live_metadata_array(@_);
1460 }
1461
1462 # Release the lock once it is done
1463 $self->unlock_collection($username, $collect);
1464}
1465
1466sub _set_index_metadata_array
1467{
1468 my $self = shift @_;
1469
1470 my $collect = $self->{'collect'};
1471 my $gsdl_cgi = $self->{'gsdl_cgi'};
1472 my $site = $self->{'site'};
1473 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1474
1475
1476 # look up additional args
1477
1478 my $infodbtype = $self->{'infodbtype'};
1479
1480 my $json_str = $self->{'json'};
1481 my $doc_array = decode_json $json_str;
1482
1483
1484 my $global_status = 0;
1485 my $global_mess = "";
1486
1487 my @all_docids = ();
1488
1489 foreach my $doc_array_rec ( @$doc_array ) {
1490
1491 my $status = -1;
1492 my $docid = $doc_array_rec->{'docid'};
1493
1494 push(@all_docids,$docid);
1495
1496 my $metaname = $doc_array_rec->{'metaname'};
1497 if(defined $metaname) {
1498 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
1499 my $metavalue = $doc_array_rec->{'metavalue'};
1500 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1501
1502 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1503 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1504 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1505
1506 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1507 $metaname = $metatable_rec->{'metaname'};
1508 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1509 my $metapos = undef;
1510 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1511
1512 foreach my $metavalue ( @$metavals ) { # metavals is an array
1513 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1514 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1515 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1516 # metavalue for this metaname will be lost
1517 $metamode = "accumulate";
1518 }
1519 }
1520 }
1521 }
1522
1523 if ($status != 0) {
1524 # Catch error if set infodb entry failed
1525 $global_status = $status;
1526 $global_mess .= "Failed to set metadata key: $docid\n";
1527 $global_mess .= "Exit status: $status\n";
1528 $global_mess .= "System Error Message: $!\n";
1529 $global_mess .= "-" x 20;
1530 }
1531 }
1532
1533 if ($global_status != 0) {
1534 $global_mess .= "PATH: $ENV{'PATH'}\n";
1535 $gsdl_cgi->generate_error($global_mess);
1536 }
1537 else {
1538 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1539 $gsdl_cgi->generate_ok_message($mess);
1540 }
1541}
1542
1543sub set_index_metadata_array
1544{
1545 my $self = shift @_;
1546
1547 my $username = $self->{'username'};
1548 my $collect = $self->{'collect'};
1549 my $gsdl_cgi = $self->{'gsdl_cgi'};
1550# my $gsdlhome = $self->{'gsdlhome'};
1551
1552 if ($baseaction::authentication_enabled) {
1553 # Ensure the user is allowed to edit this collection
1554 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1555 }
1556
1557 my $site = $self->{'site'};
1558 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1559
1560 $gsdl_cgi->checked_chdir($collect_dir);
1561
1562 # Obtain the collect dir
1563 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1564
1565 # Make sure the collection isn't locked by someone else
1566 $self->lock_collection($username, $collect);
1567
1568 $self->_set_index_metadata_array(@_);
1569
1570 # Release the lock once it is done
1571 $self->unlock_collection($username, $collect);
1572}
1573
1574# experimental, newly added in and untested
1575sub _set_live_metadata_array
1576{
1577 my $self = shift @_;
1578
1579 my $collect = $self->{'collect'};
1580 my $gsdl_cgi = $self->{'gsdl_cgi'};
1581
1582 my $site = $self->{'site'};
1583 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1584
1585
1586 # look up additional args
1587 my $infodbtype = $self->{'infodbtype'};
1588 # To people who know $collect_tail please add some comments
1589 # Obtain path to the database
1590 my $collect_tail = $collect;
1591 $collect_tail =~ s/^.*[\/|\\]//;
1592 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1593 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1594
1595
1596 my $json_str = $self->{'json'};
1597 my $doc_array = decode_json $json_str;
1598
1599
1600 my $global_status = 0;
1601 my $global_mess = "";
1602
1603 my @all_docids = ();
1604
1605
1606 foreach my $doc_array_rec ( @$doc_array ) {
1607
1608 my $status = -1;
1609 my $docid = $doc_array_rec->{'docid'};
1610
1611 push(@all_docids,$docid);
1612
1613 my $metaname = $doc_array_rec->{'metaname'};
1614 if(defined $metaname) {
1615 my $dbkey = "$docid.$metaname";
1616 my $metavalue = $doc_array_rec->{'metavalue'};
1617
1618 # Set the new value
1619 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1620 $status = system($cmd);
1621
1622 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1623 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1624 foreach my $metatable_rec ( @$metatable ) {
1625 $metaname = $metatable_rec->{'metaname'};
1626 my $dbkey = "$docid.$metaname";
1627
1628 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1629 foreach my $metavalue ( @$metavals ) {
1630 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1631 $status = system($cmd);
1632 }
1633 }
1634
1635 }
1636
1637 if ($status != 0) {
1638 # Catch error if gdbmget failed
1639 $global_status = $status;
1640 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1641 $global_mess .= "Exit status: $status\n";
1642 $global_mess .= "System Error Message: $!\n";
1643 $global_mess .= "-" x 20;
1644 }
1645 }
1646
1647 if ($global_status != 0) {
1648 $global_mess .= "PATH: $ENV{'PATH'}\n";
1649 $gsdl_cgi->generate_error($global_mess);
1650 }
1651 else {
1652 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1653 $gsdl_cgi->generate_ok_message($mess);
1654 }
1655}
1656
1657sub set_live_metadata_array
1658{
1659 my $self = shift @_;
1660
1661 my $username = $self->{'username'};
1662 my $collect = $self->{'collect'};
1663 my $gsdl_cgi = $self->{'gsdl_cgi'};
1664
1665 if ($baseaction::authentication_enabled) {
1666 # Ensure the user is allowed to edit this collection
1667 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1668 }
1669
1670 my $site = $self->{'site'};
1671 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1672
1673 $gsdl_cgi->checked_chdir($collect_dir);
1674
1675 # Make sure the collection isn't locked by someone else
1676 $self->lock_collection($username, $collect);
1677
1678 $self->_set_live_metadata_array(@_);
1679
1680 # Release the lock once it is done
1681 $self->unlock_collection($username, $collect);
1682}
1683
1684
1685sub dxml_metadata
1686{
1687 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1688 my $metaname = $parser->{'parameters'}->{'metaname'};
1689 my $metamode = $parser->{'parameters'}->{'metamode'};
1690
1691###!!! print STDERR "**** Processing closing </Metadata> tag\n";
1692
1693 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1694
1695 # Find the right metadata tag and checks if we are going to
1696 # override it
1697 #
1698 # Note: This over writes the first metadata block it
1699 # encountered. If there are multiple Sections in the doc.xml, it
1700 # might not behave as you would expect
1701
1702 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1703## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1704## print STDERR "**** metamode = $metamode\n";
1705
1706 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1707 {
1708 my $name_attr = $attrHash->{'name'};
1709 if (($name_attr eq $metaname) && ($metamode eq "override"))
1710 {
1711 if (!defined $parser->{'parameters'}->{'poscount'})
1712 {
1713 $parser->{'parameters'}->{'poscount'} = 0;
1714 }
1715 else
1716 {
1717 $parser->{'parameters'}->{'poscount'}++;
1718 }
1719
1720 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1721 {
1722 ##print STDERR "#### got match!!\n";
1723 # Get the value and override the current value
1724 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1725 $attrHash->{'_content'} = $metavalue;
1726
1727 # Don't want it to wipe out any other pieces of metadata
1728 $parser->{'parameters'}->{'metamode'} = "done";
1729 }
1730 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1731 {
1732 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1733 $attrHash->{'_content'} = $metavalue;
1734 $parser->{'parameters'}->{'metamode'} = "done";
1735 }
1736 }
1737 }
1738
1739 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1740 return [$tagname => $attrHash];
1741}
1742
1743# This method exists purely for catching invalid section numbers that the client
1744# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1745# we've seen all the Sections in the doc.xml, and none of their section nums matched
1746# if the metamode has not been set to 'done' by then.
1747sub dxml_archive
1748{
1749 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1750 my $metamode = $parser->{'parameters'}->{'metamode'};
1751
1752 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1753 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1754
1755# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1756
1757 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1758 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1759
1760 my $metaname = $parser->{'parameters'}->{'metaname'};
1761 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1762
1763 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1764 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1765 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1766 print STDERR "\tin section $opt_doc_secnum.\n";
1767 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1768 }
1769
1770 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1771 return [$tagname => $attrHash];
1772}
1773
1774sub dxml_description
1775{
1776 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1777 my $metamode = $parser->{'parameters'}->{'metamode'};
1778
1779 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1780 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1781
1782###!!! print STDERR "**** Processing a closing </Description> tag \n";
1783# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1784
1785 # Accumulate the metadata
1786
1787 # We'll be accumulating metadata at this point if we haven't found and therefore
1788 # haven't processed the metadata yet.
1789 # For subsections, this means that if we're at a matching subsection, but haven't
1790 # found the correct metaname to override in that subsection, we accumulate it as new
1791 # meta in the subsection by adding it to the current description.
1792 # If there's no subsection info for the metadata, it will accumulate at the top level
1793 # section description if we hadn't found a matching metaname to override at this point.
1794
1795 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1796 # at the toplevel section. In the latter case, it means we want to process meta in the
1797 # toplevel section. So the eq check between the values below will work in all cases.
1798
1799 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1800 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1801 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1802
1803 if ($curr_secnum eq $opt_doc_secnum
1804 && ($metamode eq "accumulate" || $metamode eq "override")) {
1805 if ($metamode eq "override") {
1806 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1807 }
1808
1809 # If we get to here and metamode is override, this means there
1810 # was no existing value to overide => treat as an append operation
1811
1812 # Tack a new metadata tag on to the end of the <Metadata>+ block
1813 my $metaname = $parser->{'parameters'}->{'metaname'};
1814 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1815
1816 my $metadata_attr = {
1817 '_content' => $metavalue,
1818 'name' => $metaname,
1819 'mode' => "accumulate"
1820 };
1821
1822 my $append_metadata = [ "Metadata" => $metadata_attr ];
1823 my $description_content = $attrHash->{'_content'};
1824
1825 print "Appending metadata to doc.xml\n";
1826
1827 if (ref($description_content)) {
1828 # got some existing interesting nested content
1829 push(@$description_content, " ", $append_metadata ,"\n ");
1830 }
1831 else {
1832 #description_content is most likely a string such as "\n"
1833 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1834 }
1835
1836 $parser->{'parameters'}->{'metamode'} = "done";
1837 }
1838 else {
1839 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1840## print STDERR "**** NOT ACCUMULATE?!? \n";
1841 }
1842
1843 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1844 return [$tagname => $attrHash];
1845}
1846
1847
1848sub dxml_start_section
1849{
1850 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1851
1852 my $new_depth = scalar(@$contextArray);
1853
1854# print STDERR "**** START SECTION \n";
1855
1856 if ($new_depth == 1) {
1857 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1858 $parser->{'parameters'}->{'curr_section_num'} = "";
1859 }
1860
1861 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1862 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1863
1864 my $new_secnum;
1865
1866 if ($new_depth > $old_depth) {
1867 # child subsection
1868 $new_secnum = "$old_secnum.1";
1869 }
1870 elsif ($new_depth == $old_depth) {
1871 # sibling section => increase it's value by 1
1872 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1873 $tail_num++;
1874 $new_secnum = $old_secnum;
1875 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1876 }
1877 else {
1878 # back up to parent section => lopp off tail
1879 $new_secnum = $old_secnum;
1880 $new_secnum =~ s/\.\d+$//;
1881 }
1882
1883 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1884 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1885
1886 1;
1887}
1888
1889sub edit_xml_file
1890{
1891 my $self = shift @_;
1892 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1893
1894 # use XML::Rules to add it in (read in and out again)
1895 my $parser = XML::Rules->new(start_rules => $start_rules,
1896 rules => $rules,
1897 style => 'filter',
1898 output_encoding => 'utf8' );
1899
1900 my $xml_in = "";
1901 if (!open(MIN,"<$filename")) {
1902 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1903 }
1904 else {
1905 # Read all the text in
1906 my $line;
1907 while (defined ($line=<MIN>)) {
1908 $xml_in .= $line;
1909 }
1910 close(MIN);
1911
1912 my $MOUT;
1913 if (!open($MOUT,">$filename")) {
1914 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1915 }
1916 else {
1917 # Matched lines will get handled by the call backs
1918## my $xml_out = "";
1919
1920 binmode($MOUT,":utf8");
1921 $parser->filter($xml_in,$MOUT, $options);
1922
1923# binmode(MOUT,":utf8");
1924# print MOUT $xml_out;
1925 close($MOUT);
1926 }
1927 }
1928
1929 # copy across any custom error information that was stored during parsing
1930 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1931}
1932
1933sub edit_doc_xml
1934{
1935 my $self = shift @_;
1936 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1937
1938 my $info_mess = <<RAWEND;
1939****************************
1940 edit_doc_xml()
1941****************************
1942RAWEND
1943
1944 $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1945 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1946 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1947 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1948 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1949 $info_mess .= " opt_secnum = $opt_secnum\n" if defined($opt_secnum);
1950 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1951
1952 $info_mess .= "****************************\n";
1953
1954 $gsdl_cgi->generate_message($info_mess);
1955
1956 # To monitor which section/subsection number we are in
1957 my @start_rules =
1958 ( 'Section' => \&dxml_start_section );
1959
1960 # use XML::Rules to add it in (read in and out again)
1961 # Set the call back functions
1962 my @rules =
1963 ( _default => 'raw',
1964 'Metadata' => \&dxml_metadata,
1965 'Description' => \&dxml_description,
1966 'Archive' => \&dxml_archive); # just for catching errors at end
1967
1968 # Sets the parameters
1969 my $options = { 'metaname' => $metaname,
1970 'metapos' => $metapos,
1971 'metavalue' => $metavalue,
1972 'metamode' => $metamode,
1973 'prevmetavalue' => $prevmetavalue };
1974
1975 if (defined $opt_secnum) {
1976 $options->{'secnum'} = $opt_secnum;
1977 }
1978
1979 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1980}
1981
1982sub set_archives_metadata_entry
1983{
1984 my $self = shift @_;
1985 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1986
1987 my $info_mess = <<RAWEND;
1988****************************
1989 set_archives_metadata_entry()
1990****************************
1991RAWEND
1992
1993 $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1994 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1995 $info_mess .= " collect = $collect\n" if defined($collect);
1996 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
1997 $info_mess .= " docid = $docid\n" if defined($docid);
1998 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1999 $info_mess .= " metapos = $metapos\n" if defined($metapos);
2000 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
2001 $info_mess .= " metamode = $metamode\n" if defined($metamode);
2002 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2003
2004 $info_mess .= "****************************\n";
2005
2006 $gsdl_cgi->generate_message($info_mess);
2007
2008 # Obtain the doc.xml path for the specified docID
2009 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2010
2011 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2012 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
2013 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
2014
2015 # The $doc_xml_file is relative to the archives, and now let's get the full path
2016 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
2017 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
2018
2019 # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
2020 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2021 # Thereafter, we will finally be setting the overriding metavalue for this metaname
2022 if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2023 # remove all values of $metaname metadata
2024 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
2025 }
2026 # Edit the doc.xml file with the specified metadata name, value and position.
2027 # TODO: there is a potential problem here as this edit_doc_xml function
2028 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
2029 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
2030
2031 # dxml_metadata method ignores metapos if metamode anything other than override
2032 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
2033 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
2034
2035 # return 0; # return 0 for now to indicate no error
2036 return (defined $self->{'error_msg'}) ? 1 : 0;
2037}
2038
2039
2040sub set_archives_metadata
2041{
2042 my $self = shift @_;
2043
2044 my $username = $self->{'username'};
2045 my $collect = $self->{'collect'};
2046 my $gsdl_cgi = $self->{'gsdl_cgi'};
2047
2048 if ($baseaction::authentication_enabled) {
2049 # Ensure the user is allowed to edit this collection
2050 $self->authenticate_user($username, $collect);
2051 }
2052
2053 # Make sure the collection isn't locked by someone else
2054 $self->lock_collection($username, $collect);
2055
2056 $self->_set_archives_metadata(@_);
2057
2058 # Release the lock once it is done
2059 $self->unlock_collection($username, $collect);
2060}
2061
2062sub _set_archives_metadata_array
2063{
2064 my $self = shift @_;
2065
2066 my $collect = $self->{'collect'};
2067 my $gsdl_cgi = $self->{'gsdl_cgi'};
2068 my $site = $self->{'site'};
2069 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2070
2071 # look up additional args
2072
2073 my $infodbtype = $self->{'infodbtype'};
2074
2075 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2076
2077 my $json_str = $self->{'json'};
2078 my $doc_array = decode_json $json_str;
2079
2080
2081 my $global_status = 0;
2082 my $global_mess = "";
2083
2084 my @all_docids = ();
2085
2086 foreach my $doc_array_rec ( @$doc_array ) {
2087 my $status = -1;
2088 my $docid = $doc_array_rec->{'docid'};
2089
2090 push(@all_docids,$docid);
2091
2092 my $metaname = $doc_array_rec->{'metaname'};
2093 if(defined $metaname) {
2094
2095 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2096
2097 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2098 my $metavalue = $doc_array_rec->{'metavalue'};
2099 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
2100
2101
2102 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2103 # make "accumulate" the default (less destructive, as it won't actually
2104 # delete any existing values)
2105 $metamode = "accumulate";
2106 }
2107
2108 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
2109 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2110 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2111 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2112
2113 foreach my $metatable_rec ( @$metatable ) {
2114 $metaname = $metatable_rec->{'metaname'};
2115 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2116 my $metapos = undef;
2117 my $prevmetavalue = undef;
2118 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2119
2120 foreach my $metavalue ( @$metavals ) {
2121 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
2122 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2123
2124 if($metamode eq "override") { # now, having overridden the metavalue for the first,
2125 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2126 # metavalue for this metaname will be lost
2127 $metamode = "accumulate";
2128 }
2129 }
2130 }
2131 }
2132
2133 if ($status != 0) {
2134 # Catch error if set infodb entry failed
2135 $global_status = $status;
2136 $global_mess .= "Failed to set metadata key: $docid\n";
2137 $global_mess .= "Exit status: $status\n";
2138 $global_mess .= "System Error Message: $!\n";
2139 $global_mess .= "-" x 20 . "\n";
2140 }
2141 }
2142
2143 if ($global_status != 0) {
2144 $global_mess .= "PATH: $ENV{'PATH'}\n";
2145 $gsdl_cgi->generate_error($global_mess);
2146 }
2147 else {
2148 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2149 $gsdl_cgi->generate_ok_message($mess);
2150 }
2151}
2152
2153sub set_archives_metadata_array
2154{
2155 my $self = shift @_;
2156
2157 my $username = $self->{'username'};
2158 my $collect = $self->{'collect'};
2159 my $gsdl_cgi = $self->{'gsdl_cgi'};
2160# my $gsdlhome = $self->{'gsdlhome'};
2161
2162 if ($baseaction::authentication_enabled) {
2163 # Ensure the user is allowed to edit this collection
2164 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2165 }
2166
2167 my $site = $self->{'site'};
2168 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2169
2170 $gsdl_cgi->checked_chdir($collect_dir);
2171
2172 # Obtain the collect dir
2173 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2174
2175 # Make sure the collection isn't locked by someone else
2176 $self->lock_collection($username, $collect);
2177
2178 $self->_set_archives_metadata_array(@_);
2179
2180 # Release the lock once it is done
2181 $self->unlock_collection($username, $collect);
2182}
2183
2184sub _remove_archives_metadata
2185{
2186 my $self = shift @_;
2187
2188 my $collect = $self->{'collect'};
2189 my $gsdl_cgi = $self->{'gsdl_cgi'};
2190# my $gsdlhome = $self->{'gsdlhome'};
2191 my $infodbtype = $self->{'infodbtype'};
2192
2193 my $site = $self->{'site'};
2194
2195 # Obtain the collect and archive dir
2196 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2197
2198 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2199
2200 # look up additional args
2201 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
2202
2203 my $metaname = $self->{'metaname'};
2204 my $metapos = $self->{'metapos'};
2205
2206 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2207
2208 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
2209 # default to deleting the first metavalue for the given metaname
2210 # Beware that if both metapos AND metavalue are defined, both matches (if any)
2211 # seem to get deleted in one single remove_archives_meta action invocation.
2212 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
2213 # deleted, both get deleted.
2214 if(!defined $metapos && !defined $metavalue) {
2215 $metapos = 0;
2216 }
2217
2218 my $metamode = $self->{'metamode'} || undef;
2219
2220 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2221 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2222
2223 # This now stores the full pathname
2224 my $doc_filename = $doc_rec->{'doc-file'}->[0];
2225
2226 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
2227# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
2228
2229 if ($status == 0)
2230 {
2231 my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
2232 $mess .= " $metaname";
2233 $mess .= "->[$metapos]" if (defined $metapos);
2234 $mess .= " ($metavalue)" if (defined $metavalue);
2235 $gsdl_cgi->generate_ok_message($mess);
2236 }
2237 else
2238 {
2239 my $mess .= "Failed to remove archives metadata key: $docid\n";
2240 $mess .= "Exit status: $status\n";
2241 $mess .= "System Error Message: $!\n";
2242 $mess .= "-" x 20 . "\n";
2243
2244 $gsdl_cgi->generate_error($mess);
2245 }
2246
2247 #return $status; # in case calling functions have a use for this
2248}
2249
2250sub remove_archives_metadata
2251{
2252 my $self = shift @_;
2253
2254 my $username = $self->{'username'};
2255 my $collect = $self->{'collect'};
2256 my $gsdl_cgi = $self->{'gsdl_cgi'};
2257
2258 if ($baseaction::authentication_enabled)
2259 {
2260 # Ensure the user is allowed to edit this collection
2261 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2262 }
2263
2264 # Make sure the collection isn't locked by someone else
2265 $self->lock_collection($username, $collect);
2266
2267 $self->_remove_archives_metadata(@_);
2268
2269 # Release the lock once it is done
2270 $self->unlock_collection($username, $collect);
2271}
2272
2273sub remove_from_doc_xml
2274{
2275 my $self = shift @_;
2276 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
2277
2278 my @start_rules = ('Section' => \&dxml_start_section);
2279
2280 # Set the call-back functions for the metadata tags
2281 my @rules =
2282 (
2283 _default => 'raw',
2284 'Metadata' => \&rfdxml_metadata
2285 );
2286
2287 my $parser = XML::Rules->new
2288 (
2289 start_rules => \@start_rules,
2290 rules => \@rules,
2291 style => 'filter',
2292 output_encoding => 'utf8',
2293# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2294 stripspaces => 2|0|0 # ineffectual
2295 );
2296
2297 my $status = 0;
2298 my $xml_in = "";
2299 if (!open(MIN,"<$doc_xml_filename"))
2300 {
2301 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
2302 $status = 1;
2303 }
2304 else
2305 {
2306 # Read them in
2307 my $line;
2308 while (defined ($line=<MIN>)) {
2309 $xml_in .= $line;
2310 }
2311 close(MIN);
2312
2313 # Filter with the call-back functions
2314 my $xml_out = "";
2315
2316 my $MOUT;
2317 if (!open($MOUT,">$doc_xml_filename")) {
2318 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
2319 $status = 1;
2320 }
2321 else {
2322 binmode($MOUT,":utf8");
2323 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
2324 close($MOUT);
2325 }
2326 }
2327 return $status;
2328}
2329
2330sub rfdxml_metadata
2331{
2332 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2333
2334 # For comparisons, toplevel section is indicated by ""
2335 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
2336 my $secid = $parser->{'parameters'}->{'secid'} || "";
2337
2338 if (!($secid eq $curr_sec_num))
2339 {
2340 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2341 return [$tagname => $attrHash];
2342 }
2343
2344 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2345 {
2346 if (!defined $parser->{'parameters'}->{'poscount'})
2347 {
2348 $parser->{'parameters'}->{'poscount'} = 0;
2349 }
2350 else
2351 {
2352 $parser->{'parameters'}->{'poscount'}++;
2353 }
2354
2355 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
2356 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {
2357 return [];
2358 }
2359
2360 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2361 {
2362 return [];
2363 }
2364
2365 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
2366 {
2367 return [];
2368 }
2369 }
2370
2371 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2372 return [$tagname => $attrHash];
2373}
2374
2375sub mxml_metadata
2376{
2377 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2378 my $metaname = $parser->{'parameters'}->{'metaname'};
2379 my $metamode = $parser->{'parameters'}->{'metamode'};
2380
2381 # Report error if we don't see FileName tag before this
2382 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2383
2384 # Don't do anything if we are not in the right FileSet
2385 my $file_regexp = $parser->{'parameters'}->{'current_file'};
2386 if ($file_regexp =~ /\.\*/) {
2387 # Only interested in a file_regexp if it specifies precisely one
2388 # file.
2389 # So, skip anything with a .* in it as it is too general
2390## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
2391 return [$tagname => $attrHash];
2392 }
2393 my $src_file = $parser->{'parameters'}->{'src_file'};
2394 if (!($src_file =~ /$file_regexp/)) {
2395 return [$tagname => $attrHash];
2396 }
2397## print STDERR "*** mxl metamode = $metamode\n";
2398
2399 # Find the right metadata tag and checks if we are going to override it
2400 my $name_attr = $attrHash->{'name'};
2401 if (($name_attr eq $metaname) && ($metamode eq "override")) {
2402
2403 # now metadata.xml functions need to keep track of metapos
2404 if (!defined $parser->{'parameters'}->{'poscount'})
2405 {
2406 $parser->{'parameters'}->{'poscount'} = 0;
2407 }
2408 else
2409 {
2410 $parser->{'parameters'}->{'poscount'}++;
2411 }
2412
2413 # If either the metapos or prevmetavalue is set,
2414 # get the value and override the current value
2415 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2416
2417 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2418 {
2419 $attrHash->{'_content'} = $metavalue;
2420
2421 ## print STDERR "**** overriding metadata.xml\n";
2422
2423 # Don't want it to wipe out any other pieces of metadata
2424 $parser->{'parameters'}->{'metamode'} = "done";
2425 }
2426 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2427 {
2428 $attrHash->{'_content'} = $metavalue;
2429 $parser->{'parameters'}->{'metamode'} = "done";
2430 }
2431 }
2432
2433 # mxml_description will process the metadata if metadata is accumulate,
2434 # or if we haven't found the metadata to override
2435
2436 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2437 return [$tagname => $attrHash];
2438}
2439
2440
2441sub mxml_description
2442{
2443 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2444 my $metamode = $parser->{'parameters'}->{'metamode'};
2445
2446 # Failed... Report error if we don't see FileName tag before this
2447 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2448
2449 # Don't do anything if we are not in the right FileSet
2450 my $file_regexp = $parser->{'parameters'}->{'current_file'};
2451 if ($file_regexp =~ m/\.\*/) {
2452 # Only interested in a file_regexp if it specifies precisely one
2453 # file.
2454 # So, skip anything with a .* in it as it is too general
2455 return [$tagname => $attrHash];
2456 }
2457 my $src_file = $parser->{'parameters'}->{'src_file'};
2458
2459 if (!($src_file =~ m/$file_regexp/)) {
2460 return [$tagname => $attrHash];
2461 }
2462
2463 # Accumulate the metadata block to the end of the description block
2464 # Note: This adds metadata block to all description blocks, so if there are
2465 # multiple FileSets, it will add to all of them
2466 if (($metamode eq "accumulate") || ($metamode eq "override")) {
2467
2468 # if metamode was "override" but get to here then it failed to
2469 # find an item to override, in which case it should append its
2470 # value to the end, just like the "accumulate" mode
2471
2472 if ($metamode eq "override") {
2473 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2474 }
2475
2476 # tack a new metadata tag on to the end of the <Metadata>+ block
2477 my $metaname = $parser->{'parameters'}->{'metaname'};
2478 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2479
2480 my $metadata_attr = { '_content' => $metavalue,
2481 'name' => $metaname,
2482 'mode' => "accumulate" };
2483
2484 my $append_metadata = [ "Metadata" => $metadata_attr ];
2485 my $description_content = $attrHash->{'_content'};
2486
2487## print STDERR "*** appending to metadata.xml\n";
2488
2489 # append the new metadata element to the end of the current
2490 # content contained inside this tag
2491 if (ref($description_content) eq "") {
2492 # => string or numeric literal
2493 # this is caused by a <Description> block has no <Metadata> child elements
2494 # => set up an empty array in '_content'
2495 $attrHash->{'_content'} = [ "\n" ];
2496 $description_content = $attrHash->{'_content'};
2497 }
2498
2499 push(@$description_content," ", $append_metadata ,"\n ");
2500 $parser->{'parameters'}->{'metamode'} = "done";
2501 }
2502
2503 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2504 return [$tagname => $attrHash];
2505}
2506
2507
2508sub mxml_filename
2509{
2510 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2511
2512 # Store the filename of the Current Fileset
2513 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2514 # FileName tag must come before Description tag
2515 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2516
2517 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2518 return [$tagname => $attrHash];
2519}
2520
2521
2522sub mxml_fileset
2523{
2524 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2525
2526 # Initilise the current_file
2527 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2528 # FileName tag must come before Description tag
2529 $parser->{'parameters'}->{'current_file'} = "";
2530
2531 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2532 return [$tagname => $attrHash];
2533}
2534
2535sub mxml_directorymetadata
2536{
2537 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2538
2539 # if we haven't processed the metadata when we reach the end of metadata.xml
2540 # it's because there's no particular FileSet element whose FileName matched
2541 # In which case, add a new FileSet for this FileName
2542 my $metamode = $parser->{'parameters'}->{'metamode'};
2543 if($metamode ne "done") {
2544
2545 if ($metamode eq "override") {
2546 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2547 }
2548
2549 # If we get to here and metamode is override, this means there
2550 # was no existing value to overide => treat as an append operation
2551
2552 # Create a new FileSet element and append to DirectoryMetadata
2553 # <FileSet>
2554 # <FileName>src_file</FileName>
2555 # <Description>
2556 # <Metadata mode="" name="">metavalue</Metadata>
2557 # </Description>
2558 # </FileSet>
2559 my $src_file = $parser->{'parameters'}->{'src_file'};
2560 my $metaname = $parser->{'parameters'}->{'metaname'};
2561 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2562 my $metadata_attr = {
2563 '_content' => $metavalue,
2564 'name' => $metaname,
2565 'mode' => "accumulate"
2566 };
2567 my $append_metadata = [ "Metadata" => $metadata_attr ];
2568 my $description_attr = { '_content' => [ "\n\t\t ", $append_metadata, "\n\t\t"] };
2569 my $description_element = [ "Description" => $description_attr ];
2570
2571 #_content is not an attribute, it's special and holds the children of this element
2572 # including the textnode value embedded in this element if any.
2573 my $filename_attr = {'_content' => $src_file};
2574 my $filename_element = [ "FileName" => $filename_attr ];
2575
2576 my $fileset_attr = {};
2577 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2578 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2579
2580
2581 # get children of dirmeta, and push the new FileSet element onto it
2582 print "Appending metadata to metadata.xml\n";
2583 my $dirmeta_content = $attrHash->{'_content'};
2584 if (ref($dirmeta_content)) {
2585 # got some existing interesting nested content
2586 #push(@$dirmeta_content, " ", $fileset ,"\n ");
2587 push(@$dirmeta_content, "\t", $fileset ,"\n");
2588 }
2589 else {
2590 #description_content is most likely a string such as "\n"
2591 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
2592 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2593 }
2594
2595 $parser->{'parameters'}->{'metamode'} = "done";
2596 }
2597 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2598 return [$tagname => $attrHash];
2599}
2600
2601
2602sub edit_metadata_xml
2603{
2604 my $self = shift @_;
2605 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2606
2607 # Set the call-back functions for the metadata tags
2608 my @rules =
2609 ( _default => 'raw',
2610 'FileName' => \&mxml_filename,
2611 'Metadata' => \&mxml_metadata,
2612 'Description' => \&mxml_description,
2613 'FileSet' => \&mxml_fileset,
2614 'DirectoryMetadata' => \&mxml_directorymetadata);
2615
2616 # use XML::Rules to add it in (read in and out again)
2617 my $parser = XML::Rules->new(rules => \@rules,
2618 style => 'filter',
2619 output_encoding => 'utf8',
2620 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2621
2622 if (!-e $metadata_xml_filename) {
2623
2624 if (open(MOUT,">$metadata_xml_filename")) {
2625
2626 my $src_file_re = &util::filename_to_regex($src_file);
2627 # shouldn't the following also be in the above utility routine??
2628 # $src_file_re =~ s/\./\\./g;
2629
2630 print MOUT "<?xml version=\"1.0\"?>\n";
2631 print MOUT "<DirectoryMetadata>\n";
2632 print MOUT " <FileSet>\n";
2633 print MOUT " <FileName>$src_file_re</FileName>\n";
2634 print MOUT " <Description>\n";
2635 print MOUT " </Description>\n";
2636 print MOUT " </FileSet>\n";
2637 print MOUT "</DirectoryMetadata>\n";
2638
2639 close(MOUT);
2640 }
2641 else {
2642 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2643 }
2644 }
2645
2646
2647 my $xml_in = "";
2648 if (!open(MIN,"<$metadata_xml_filename")) {
2649 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2650 }
2651 else {
2652 # Read them in
2653 my $line;
2654 while (defined ($line=<MIN>)) {
2655 $xml_in .= $line;
2656 }
2657 close(MIN);
2658
2659 # Filter with the call-back functions
2660 my $xml_out = "";
2661
2662 my $MOUT;
2663 if (!open($MOUT,">$metadata_xml_filename")) {
2664 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2665 }
2666 else {
2667 binmode($MOUT,":utf8");
2668
2669 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2670 # At the moment, I will just hack it!
2671 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2672 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2673 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2674 #print MOUT $xml_out;
2675
2676 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2677 metapos => $metapos,
2678 metavalue => $metavalue,
2679 metamode => $metamode,
2680 src_file => $src_file,
2681 prevmetavalue => $prevmetavalue,
2682 current_file => undef} );
2683 close($MOUT);
2684 }
2685 }
2686}
2687
2688
2689sub set_import_metadata
2690{
2691 my $self = shift @_;
2692
2693 my $username = $self->{'username'};
2694 my $collect = $self->{'collect'};
2695 my $gsdl_cgi = $self->{'gsdl_cgi'};
2696
2697 if ($baseaction::authentication_enabled) {
2698 # Ensure the user is allowed to edit this collection
2699 $self->authenticate_user($username, $collect);
2700 }
2701
2702 # Make sure the collection isn't locked by someone else
2703 $self->lock_collection($username, $collect);
2704
2705 $self->_set_import_metadata(@_);
2706
2707 # Release the lock once it is done
2708 $self->unlock_collection($username, $collect);
2709
2710}
2711
2712sub set_import_metadata_array
2713{
2714 my $self = shift @_;
2715
2716 my $username = $self->{'username'};
2717 my $collect = $self->{'collect'};
2718 my $gsdl_cgi = $self->{'gsdl_cgi'};
2719# my $gsdlhome = $self->{'gsdlhome'};
2720
2721 if ($baseaction::authentication_enabled) {
2722 # Ensure the user is allowed to edit this collection
2723 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2724 }
2725
2726 my $site = $self->{'site'};
2727 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2728
2729 $gsdl_cgi->checked_chdir($collect_dir);
2730
2731 # Make sure the collection isn't locked by someone else
2732 $self->lock_collection($username, $collect);
2733
2734 $self->_set_import_metadata_array(@_);
2735
2736 # Release the lock once it is done
2737 $self->unlock_collection($username, $collect);
2738
2739}
2740
2741
2742sub _set_import_metadata_array
2743{
2744 my $self = shift @_;
2745
2746 my $collect = $self->{'collect'};
2747 my $gsdl_cgi = $self->{'gsdl_cgi'};
2748
2749 my $site = $self->{'site'};
2750 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2751
2752 # look up additional args
2753
2754 my $infodbtype = $self->{'infodbtype'};
2755
2756 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2757 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2758
2759 my $json_str = $self->{'json'};
2760 my $doc_array = decode_json $json_str;
2761
2762 my $global_status = 0;
2763 my $global_mess = "";
2764
2765 my @all_docids = ();
2766
2767 foreach my $doc_array_rec ( @$doc_array )
2768 {
2769 my $status = -1;
2770 my $docid = $doc_array_rec->{'docid'};
2771
2772 my ($docid_root,$docid_secnum);
2773 if(defined $docid) {
2774 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2775 # as yet no support for setting subsection metadata in metadata.xml
2776 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2777 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2778 next; # skip this docid in for loop
2779 }
2780 }
2781
2782 push(@all_docids,$docid); # docid_root rather
2783
2784 my $metaname = $doc_array_rec->{'metaname'};
2785 if (defined $metaname) {
2786 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2787 my $metavalue = $doc_array_rec->{'metavalue'};
2788 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2789
2790 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2791 # make "accumulate" the default (less destructive, as won't actually
2792 # delete any existing values)
2793 $metamode = "accumulate";
2794 }
2795
2796 # adding metapos and prevmetavalue support to import_metadata subroutines
2797 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2798 my $prevmetavalue = $self->{'prevmetavalue'};
2799
2800 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2801
2802 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2803 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2804
2805 foreach my $metatable_rec ( @$metatable ) {
2806 $metaname = $metatable_rec->{'metaname'};
2807 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2808 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2809 # make "accumulate" the default (less destructive, as won't actually
2810 # delete any existing values)
2811 $metamode = "accumulate";
2812 }
2813
2814 # No support for metapos and prevmetavalue in the JSON metatable substructure
2815 my $metapos = undef;
2816 my $prevmetavalue = undef;
2817 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2818
2819 foreach my $metavalue ( @$metavals ) {
2820 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2821
2822 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2823 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2824 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2825 # metavalue for this metaname will be lost
2826 $metamode = "accumulate";
2827 }
2828 }
2829 }
2830 }
2831 }
2832
2833 # always a success message
2834 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2835 $gsdl_cgi->generate_ok_message($mess);
2836}
2837
2838# always returns true (1)
2839sub set_import_metadata_entry
2840{
2841 my $self = shift @_;
2842 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2843
2844 my $info_mess = <<RAWEND;
2845****************************
2846 set_import_metadata_entry()
2847****************************
2848RAWEND
2849
2850 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2851 $info_mess .= " collect = $collect\n" if defined($collect);
2852 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
2853 $info_mess .= " arcinfo_doc_filename = $arcinfo_doc_filename\n" if defined($arcinfo_doc_filename);
2854 $info_mess .= " docid = $docid\n" if defined($docid);
2855 $info_mess .= " metaname = $metaname\n" if defined($metaname);
2856 $info_mess .= " metapos = $metapos\n" if defined($metapos);
2857 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
2858 $info_mess .= " metamode = $metamode\n" if defined($metamode);
2859 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2860
2861 $info_mess .= "****************************\n";
2862
2863 $gsdl_cgi->generate_message($info_mess);
2864
2865 # import works with metadata.xml which can have inherited metadata
2866 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2867 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2868 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2869 # what they're doing if they provide a metapos.
2870 if(defined $metapos) {
2871 print STDERR "@@@@ WARNING: metapos defined.\n";
2872 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2873 }
2874
2875 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2876 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2877 my $metadata_xml_file;
2878 my $import_filename = undef;
2879
2880 if (defined $docid) {
2881 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2882 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2883
2884 # This now stores the full pathname
2885 $import_filename = $doc_rec->{'src-file'}->[0];
2886 $import_filename = &util::placeholders_to_abspath($import_filename);
2887
2888 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2889 # as the array version of the method doesn't support the -f parameter yet
2890 my $import_file = $self->{'f'};
2891 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2892 }
2893
2894 # figure out correct metadata.xml file [?]
2895 # Assuming the metadata.xml file is next to the source file
2896 # Note: This will not work if it is using the inherited metadata from the parent folder
2897 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2898 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2899
2900 # If we're overriding everything, then $prevmetavalue=undefined and
2901 # $metamode=override combined with $metapos=undefined
2902 # in which case we need to remove all metavalues for the metaname at the given (sub)section
2903 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2904 if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2905## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2906 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2907
2908 }
2909
2910 # Edit the metadata.xml
2911 # Modified by Jeffrey from DL Consulting
2912 # Handle the case where there is one metadata.xml file for multiple FileSets
2913 # The XML filter needs to know whether it is in the right FileSet
2914 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2915 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2916 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2917 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2918 #return 0;
2919 return $metadata_xml_filename;
2920}
2921
2922sub _remove_import_metadata
2923{
2924 my $self = shift @_;
2925
2926 my $collect = $self->{'collect'};
2927 my $gsdl_cgi = $self->{'gsdl_cgi'};
2928# my $gsdlhome = $self->{'gsdlhome'};
2929 my $infodbtype = $self->{'infodbtype'};
2930
2931 # Obtain the collect dir
2932 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2933 my $site = $self->{'site'};
2934 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2935
2936 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2937 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2938
2939 # look up additional args
2940 my $docid = $self->{'d'};
2941 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2942 {
2943 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2944 }
2945
2946 my $metaname = $self->{'metaname'};
2947 my $metapos = $self->{'metapos'};
2948 my $metavalue = $self->{'metavalue'};
2949 if(defined $metavalue) {
2950 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2951 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2952 $metapos = 0;
2953 }
2954 my $metamode = $self->{'metamode'} || undef;
2955
2956 # import works with metadata.xml which can have inherited metadata
2957 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2958 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2959 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2960 # what they're doing if they provide a metapos.
2961 if(defined $metapos) {
2962 print STDERR "@@@@ WARNING: metapos defined.\n";
2963 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2964 }
2965
2966 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2967 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2968 my $metadata_xml_file;
2969 my $import_filename = undef;
2970 if (defined $docid)
2971 {
2972 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2973 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2974
2975 # This now stores the full pathname
2976 $import_filename = $doc_rec->{'src-file'}->[0];
2977 $import_filename = &util::placeholders_to_abspath($import_filename);
2978 }
2979
2980 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2981 {
2982 $gsdl_cgi->generate_error("There is no metadata\n");
2983 }
2984
2985 # figure out correct metadata.xml file [?]
2986 # Assuming the metadata.xml file is next to the source file
2987 # Note: This will not work if it is using the inherited metadata from the parent folder
2988 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2989 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2990
2991 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2992
2993 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2994 $mess .= " $metaname";
2995 $mess .= " = $metavalue\n";
2996
2997 $gsdl_cgi->generate_ok_message($mess);
2998
2999 #return $status; # in case calling functions have a use for this
3000}
3001
3002sub remove_import_metadata
3003{
3004 my $self = shift @_;
3005
3006 my $username = $self->{'username'};
3007 my $collect = $self->{'collect'};
3008 my $gsdl_cgi = $self->{'gsdl_cgi'};
3009
3010 if ($baseaction::authentication_enabled) {
3011 # Ensure the user is allowed to edit this collection
3012 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3013 }
3014
3015 # Make sure the collection isn't locked by someone else
3016 $self->lock_collection($username, $collect);
3017
3018 $self->_remove_import_metadata(@_);
3019
3020 # Release the lock once it is done
3021 $self->unlock_collection($username, $collect);
3022
3023}
3024
3025sub remove_from_metadata_xml
3026{
3027 my $self = shift @_;
3028 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
3029 # metamode generally has no meaning for removing meta, but is used by set_meta
3030 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
3031
3032 # Set the call-back functions for the metadata tags
3033 my @rules =
3034 (
3035 _default => 'raw',
3036 'Metadata' => \&rfmxml_metadata,
3037 'FileName' => \&mxml_filename
3038 );
3039
3040 my $parser = XML::Rules->new
3041 (
3042 rules => \@rules,
3043 style => 'filter',
3044 output_encoding => 'utf8',
3045 #normalisespaces => 1,
3046 stripspaces => 2|0|0 # ineffectual
3047 );
3048
3049 my $xml_in = "";
3050 if (!open(MIN,"<$metadata_xml_filename"))
3051 {
3052 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
3053 }
3054 else
3055 {
3056 # Read them in
3057 my $line;
3058 while (defined ($line=<MIN>)) {
3059 $xml_in .= $line;
3060 }
3061 close(MIN);
3062
3063 # Filter with the call-back functions
3064 my $xml_out = "";
3065
3066 my $MOUT;
3067 if (!open($MOUT,">$metadata_xml_filename")) {
3068 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
3069 }
3070 else {
3071 binmode($MOUT,":utf8");
3072 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
3073 close($MOUT);
3074 }
3075 }
3076}
3077
3078sub rfmxml_metadata
3079{
3080 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
3081
3082 # metadata.xml does not handle subsections
3083
3084 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
3085 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
3086 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
3087 {
3088 if (!defined $parser->{'parameters'}->{'poscount'})
3089 {
3090 $parser->{'parameters'}->{'poscount'} = 0;
3091 }
3092 else
3093 {
3094 $parser->{'parameters'}->{'poscount'}++;
3095 }
3096
3097 # if overriding but no metapos, then clear all the meta for this metaname
3098 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
3099 return [];
3100 }
3101
3102 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
3103 {
3104 return [];
3105 }
3106 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
3107 {
3108 return [];
3109 }
3110 }
3111
3112 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
3113 return [$tagname => $attrHash];
3114}
3115
3116sub _remove_live_metadata
3117{
3118 my $self = shift @_;
3119
3120 my $collect = $self->{'collect'};
3121 my $gsdl_cgi = $self->{'gsdl_cgi'};
3122# my $gsdlhome = $self->{'gsdlhome'};
3123 my $infodbtype = $self->{'infodbtype'};
3124
3125 # Obtain the collect dir
3126 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3127 my $site = $self->{'site'};
3128 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3129
3130
3131 # look up additional args
3132 my $docid = $self->{'d'};
3133 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3134 $gsdl_cgi->generate_error("No docid (d=...) specified.");
3135 }
3136
3137 # Generate the dbkey
3138 my $metaname = $self->{'metaname'};
3139 my $dbkey = "$docid.$metaname";
3140
3141 # To people who know $collect_tail please add some comments
3142 # Obtain the live gdbm_db path
3143 my $collect_tail = $collect;
3144 $collect_tail =~ s/^.*[\/|\\]//;
3145 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3146 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
3147
3148 # Remove the key
3149 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
3150 my $status = system($cmd);
3151 if ($status != 0) {
3152 # Catch error if gdbmdel failed
3153 my $mess = "Failed to set metadata key: $dbkey\n";
3154
3155 $mess .= "PATH: $ENV{'PATH'}\n";
3156 $mess .= "cmd = $cmd\n";
3157 $mess .= "Exit status: $status\n";
3158 $mess .= "System Error Message: $!\n";
3159
3160 $gsdl_cgi->generate_error($mess);
3161 }
3162 else {
3163 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
3164 }
3165
3166}
3167
3168sub remove_live_metadata
3169{
3170 my $self = shift @_;
3171
3172 my $username = $self->{'username'};
3173 my $collect = $self->{'collect'};
3174 my $gsdl_cgi = $self->{'gsdl_cgi'};
3175 my $gsdlhome = $self->{'gsdlhome'};
3176
3177 if ($baseaction::authentication_enabled) {
3178 # Ensure the user is allowed to edit this collection
3179 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3180 }
3181
3182 # Make sure the collection isn't locked by someone else
3183 $self->lock_collection($username, $collect);
3184
3185 $self->_remove_live_metadata(@_);
3186
3187 $self->unlock_collection($username, $collect);
3188}
3189
3190sub remove_metadata
3191{
3192 my $self = shift @_;
3193
3194 my $where = $self->{'where'};
3195 if(!$where) {
3196 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
3197 return;
3198 }
3199
3200 my $username = $self->{'username'};
3201 my $collect = $self->{'collect'};
3202 my $gsdl_cgi = $self->{'gsdl_cgi'};
3203
3204 if ($baseaction::authentication_enabled) {
3205 # Ensure the user is allowed to edit this collection
3206 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3207 }
3208
3209 # Make sure the collection isn't locked by someone else
3210 $self->lock_collection($username, $collect);
3211
3212 # check which directories need to be processed, specified in $where as
3213 # any combination of import|archives|index|live
3214 if($where =~ m/import/) {
3215 $self->_remove_import_metadata(@_);
3216 }
3217 if($where =~ m/archives/) {
3218 $self->_remove_archives_metadata(@_);
3219 }
3220 if($where =~ m/index/) {
3221 $self->_remove_index_metadata(@_);
3222 }
3223
3224 # Release the lock once it is done
3225 $self->unlock_collection($username, $collect);
3226}
3227
3228# the internal version, without authentication
3229sub _remove_index_metadata
3230{
3231 my $self = shift @_;
3232
3233 my $collect = $self->{'collect'};
3234 my $gsdl_cgi = $self->{'gsdl_cgi'};
3235# my $gsdlhome = $self->{'gsdlhome'};
3236 my $infodbtype = $self->{'infodbtype'};
3237
3238 # Obtain the collect dir
3239 my $site = $self->{'site'};
3240 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3241 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3242
3243 # look up additional args
3244 my $docid = $self->{'d'};
3245 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3246 $gsdl_cgi->generate_error("No docid (d=...) specified.");
3247 }
3248 my $metaname = $self->{'metaname'};
3249 my $metapos = $self->{'metapos'};
3250 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
3251
3252 # To people who know $collect_tail please add some comments
3253 # Obtain the path to the database
3254 my $collect_tail = $collect;
3255 $collect_tail =~ s/^.*[\/|\\]//;
3256 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3257 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3258
3259 # Read the docid entry
3260 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3261
3262 # Check to make sure the key does exist
3263 if (!defined ($doc_rec->{$metaname})) {
3264 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
3265 }
3266
3267 # Obtain the specified metadata pos
3268 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
3269 if(!defined $metapos && !defined $metavalue) {
3270 $metapos = 0;
3271 }
3272
3273
3274 # consider check key is defined before deleting?
3275 # Loop through the metadata array and ignore the specified position
3276 my $filtered_metadata = [];
3277 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
3278 for (my $i=0; $i<$num_metadata_vals; $i++) {
3279 my $metaval = shift(@{$doc_rec->{$metaname}});
3280
3281 if (!defined $metavalue && $i != $metapos) {
3282 push(@$filtered_metadata,$metaval);
3283 }
3284
3285 if(defined $metavalue && !($metavalue eq $metaval))
3286 {
3287 push(@$filtered_metadata,$metaval);
3288 }
3289 }
3290 $doc_rec->{$metaname} = $filtered_metadata;
3291
3292 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3293 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3294
3295 if ($status != 0) {
3296 my $mess = "Failed to set metadata key: $docid\n";
3297
3298 $mess .= "PATH: $ENV{'PATH'}\n";
3299 $mess .= "Exit status: $status\n";
3300 $mess .= "System Error Message: $!\n";
3301
3302 $gsdl_cgi->generate_error($mess);
3303 }
3304 else {
3305 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
3306 $mess .= " $metaname";
3307 $mess .= "->[$metapos]" if (defined $metapos);
3308 $mess .= " ($metavalue)" if (defined $metavalue);
3309
3310 $gsdl_cgi->generate_ok_message($mess);
3311 }
3312
3313 #return $status; # in case calling functions have a use for this
3314}
3315
3316sub remove_index_metadata
3317{
3318 my $self = shift @_;
3319
3320 my $username = $self->{'username'};
3321 my $collect = $self->{'collect'};
3322 my $gsdl_cgi = $self->{'gsdl_cgi'};
3323# my $gsdlhome = $self->{'gsdlhome'};
3324
3325 if ($baseaction::authentication_enabled) {
3326 # Ensure the user is allowed to edit this collection
3327 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3328 }
3329
3330 # Obtain the collect dir
3331 my $site = $self->{'site'};
3332 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3333 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3334
3335 # Make sure the collection isn't locked by someone else
3336 $self->lock_collection($username, $collect);
3337
3338 $self->_remove_index_metadata(@_);
3339
3340 # Release the lock once it is done
3341 $self->unlock_collection($username, $collect);
3342}
3343
3344
3345# Was trying to reused the codes, but the functions need to be broken
3346# down more before they can be reused, otherwise there will be too
3347# much overhead and duplicate process...
3348sub insert_metadata
3349{
3350 my $self = shift @_;
3351
3352 my $username = $self->{'username'};
3353 my $collect = $self->{'collect'};
3354 my $gsdl_cgi = $self->{'gsdl_cgi'};
3355 my $gsdlhome = $self->{'gsdlhome'};
3356 my $infodbtype = $self->{'infodbtype'};
3357
3358 # If the import metadata and gdbm database have been updated, we
3359 # need to insert some notification to warn user that the the text
3360 # they see at the moment is not indexed and require a rebuild.
3361 my $rebuild_pending_macro = "_rebuildpendingmessage_";
3362
3363 if ($baseaction::authentication_enabled) {
3364 # Ensure the user is allowed to edit this collection
3365 $self->authenticate_user($username, $collect);
3366 }
3367
3368 # Obtain the collect and archive dir
3369 my $site = $self->{'site'};
3370 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3371 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3372 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3373
3374 # Make sure the collection isn't locked by someone else
3375 $self->lock_collection($username, $collect);
3376
3377 # Check additional args
3378 my $docid = $self->{'d'};
3379 if (!defined($docid)) {
3380 $gsdl_cgi->generate_error("No document id is specified: d=...");
3381 }
3382 my $metaname = $self->{'metaname'};
3383 if (!defined($metaname)) {
3384 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3385 }
3386 my $metavalue = $self->{'metavalue'};
3387 if (!defined($metavalue) || $metavalue eq "") {
3388 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3389 }
3390 # make "accumulate" the default (less destructive, as won't actually
3391 # delete any existing values)
3392 my $metamode = "accumulate";
3393
3394 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3395 my $metapos = undef;
3396 my $prevmetavalue = undef;
3397
3398 #=======================================================================#
3399 # set_import_metadata [START]
3400 #=======================================================================#
3401 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3402 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3403 my $metadata_xml_file;
3404 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3405 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3406
3407 # This now stores the full pathname
3408 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3409 $import_filename = &util::placeholders_to_abspath($import_filename);
3410
3411 # figure out correct metadata.xml file [?]
3412 # Assuming the metadata.xml file is next to the source file
3413 # Note: This will not work if it is using the inherited metadata from the parent folder
3414 my ($import_tailname, $import_dirname)
3415 = File::Basename::fileparse($import_filename);
3416 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3417
3418 # Shane's escape characters
3419 $metavalue = pack "U0C*", unpack "C*", $metavalue;
3420 $metavalue =~ s/\,/&#44;/g;
3421 $metavalue =~ s/\:/&#58;/g;
3422 $metavalue =~ s/\|/&#124;/g;
3423 $metavalue =~ s/\(/&#40;/g;
3424 $metavalue =~ s/\)/&#41;/g;
3425 $metavalue =~ s/\[/&#91;/g;
3426 $metavalue =~ s/\\/&#92;/g;
3427 $metavalue =~ s/\]/&#93;/g;
3428 $metavalue =~ s/\{/&#123;/g;
3429 $metavalue =~ s/\}/&#125;/g;
3430 $metavalue =~ s/\"/&#34;/g;
3431 $metavalue =~ s/\`/&#96;/g;
3432 $metavalue =~ s/\n/_newline_/g;
3433
3434 # Edit the metadata.xml
3435 # Modified by Jeffrey from DL Consulting
3436 # Handle the case where there is one metadata.xml file for multiple FileSets
3437 # The XML filter needs to know whether it is in the right FileSet
3438 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3439 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3440 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3441 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3442 #=======================================================================#
3443 # set_import_metadata [END]
3444 #=======================================================================#
3445
3446
3447 #=======================================================================#
3448 # set_metadata (accumulate version) [START]
3449 #=======================================================================#
3450 # To people who know $collect_tail please add some comments
3451 # Obtain path to the database
3452 my $collect_tail = $collect;
3453 $collect_tail =~ s/^.*[\/|\\]//;
3454 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3455 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3456
3457 # Read the docid entry
3458 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3459
3460 # Protect the quotes
3461 $metavalue =~ s/\"/\\\"/g;
3462
3463 # Adds the pending macro
3464 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3465
3466 # If the metadata doesn't exist, create a new one
3467 if (!defined($doc_rec->{$metaname})){
3468 $doc_rec->{$metaname} = [ $macro_metavalue ];
3469 }
3470 # Else, let's acculumate the values
3471 else {
3472 push(@{$doc_rec->{$metaname}},$macro_metavalue);
3473 }
3474
3475 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3476 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3477
3478 if ($status != 0) {
3479 # Catch error if gdbmget failed
3480 my $mess = "Failed to set metadata key: $docid\n";
3481
3482 $mess .= "PATH: $ENV{'PATH'}\n";
3483 $mess .= "Exit status: $status\n";
3484 $mess .= "System Error Message: $!\n";
3485
3486 $gsdl_cgi->generate_error($mess);
3487 }
3488 else {
3489 my $mess = "insert-metadata successful: Key[$docid]\n";
3490 $mess .= " [In metadata.xml] $metaname";
3491 $mess .= " = $metavalue\n";
3492 $mess .= " [In database] $metaname";
3493 $mess .= " = $macro_metavalue\n";
3494 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3495 $gsdl_cgi->generate_ok_message($mess);
3496 }
3497 #=======================================================================#
3498 # set_metadata (accumulate version) [END]
3499 #=======================================================================#
3500
3501 # Release the lock once it is done
3502 $self->unlock_collection($username, $collect);
3503}
3504
35051;
Note: See TracBrowser for help on using the repository browser.