source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 29084

Last change on this file since 29084 was 29084, checked in by kjdon, 10 years ago

when editing metadata from the on line library (log in, and edit content), we are not passing in a metapos when changing a value. we are passing in the previous value. So need to test that metapos and prevvalue are both empty before deleting all values of a particular meta elem. Otherwise if we are editing a piece of metadata on line, then all the other values are deleted.

File size: 118.8 KB
Line 
1##########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ "metapos" ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-index-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
67
68 "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69 'compulsory-args' => [ "json" ],
70 'optional-args' => [ "where" ],
71 'help-string' => [
72 'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73 ]},
74
75 #SET METHODS
76 "set-live-metadata" => {
77 'compulsory-args' => [ "d", "metaname", "metavalue" ],
78 'optional-args' => [ ] },
79
80 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81 'compulsory-args' => [ "metaname", "metavalue" ],
82 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84 "set-index-metadata" => {
85 'compulsory-args' => [ "d", "metaname", "metavalue" ],
86 'optional-args' => [ "metapos", "metamode" ] },
87
88 "set-archives-metadata" => {
89 'compulsory-args' => [ "d", "metaname", "metavalue" ],
90 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91
92 "set-import-metadata" => {
93 'compulsory-args' => [ "metaname", "metavalue" ],
94 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
95
96 #SET METHODS (ARRAY)
97 "set-metadata-array" => {
98 'compulsory-args' => [ "where", "json" ],
99 'optional-args' => [ ],
100 'help-string' => [
101 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102
103 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
110
111 "set-archives-metadata-array" => {
112 'compulsory-args' => [ "json" ],
113 'optional-args' => [ ] },
114
115 "set-import-metadata-array" => {
116 'compulsory-args' => [ "json" ],
117 'optional-args' => [ ] },
118
119 "set-index-metadata-array" => {
120 'compulsory-args' => [ "json" ],
121 'optional-args' => [ ] },
122
123 "set-live-metadata-array" => {
124 'compulsory-args' => [ "json" ],
125 'optional-args' => [ ] },
126
127 #REMOVE METHODS
128 "remove-import-metadata" => {
129 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
130 'optional-args' => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
131
132 "remove-archives-metadata" => {
133 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
134 'optional-args' => [ "metapos", "metavalue" ] },
135
136 "remove-live-metadata" => {
137 'compulsory-args' => [ "d", "metaname" ],
138 'optional-args' => [ ] },
139
140 "remove-index-metadata" => {
141 'compulsory-args' => [ "d", "metaname" ],
142 'optional-args' => [ "metapos", "metavalue" ] },
143
144 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
145 'compulsory-args' => [ "d", "metaname" ],
146 'optional-args' => [ "where", "metapos", "metavalue" ] },
147
148 #INSERT METHODS
149 "insert-metadata" => {
150 'compulsory-args' => [ "d", "metaname", "metavalue" ],
151 'optional-args' => [ ] }
152};
153
154
155sub new
156{
157 my $class = shift (@_);
158 my ($gsdl_cgi,$iis6_mode) = @_;
159
160 # Treat metavalue specially. To transmit this through a GET request
161 # the Javascript side has url-encoded it, so here we need to decode
162 # it before proceeding
163
164 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
165 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
166 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
167
168 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
169 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
170
171 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
172
173 return bless $self, $class;
174}
175
176
177sub get_live_metadata
178{
179 my $self = shift @_;
180
181 my $username = $self->{'username'};
182 my $collect = $self->{'collect'};
183 my $gsdl_cgi = $self->{'gsdl_cgi'};
184 my $gsdlhome = $self->{'gsdlhome'};
185 my $infodbtype = $self->{'infodbtype'};
186
187 # live metadata gets/saves value scoped (prefixed) by the current usename
188 # so (for now) let's not bother to enforce authentication
189
190 # Obtain the collect dir
191 my $site = $self->{'site'};
192 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
193 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
194
195 # No locking collection when getting metadata, only when setting it
196# $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
197
198 # look up additional args
199 my $docid = $self->{'d'};
200 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
201 $gsdl_cgi->generate_error("No docid (d=...) specified.");
202 }
203
204 # Generate the dbkey
205 my $metaname = $self->{'metaname'};
206 my $dbkey = "$docid.$metaname";
207
208 # To people who know $collect_tail please add some comments
209 # Obtain path to the database
210 my $collect_tail = $collect;
211 $collect_tail =~ s/^.*[\/|\\]//;
212 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
213 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
214
215 # Obtain the content of the key
216 my $cmd = "gdbmget $infodb_file_path $dbkey";
217 if (open(GIN,"$cmd |") == 0) {
218 # Catch error if gdbmget failed
219 my $mess = "Failed to get metadata key: $metaname\n";
220 $mess .= "$!\n";
221
222 $gsdl_cgi->generate_error($mess);
223 }
224 else {
225 binmode(GIN,":utf8");
226 # Read everything in and concatenate them into $metavalue
227 my $metavalue = "";
228 my $line;
229 while (defined ($line=<GIN>)) {
230 $metavalue .= $line;
231 }
232 close(GIN);
233 chomp($metavalue); # Get rid off the tailing newlines
234 $gsdl_cgi->generate_ok_message("$metavalue");
235 }
236
237 # Release the lock once it is done
238# $self->unlock_collection($username, $collect);
239}
240
241# just calls the index version
242sub get_metadata
243{
244 my $self = shift @_;
245 $self->get_index_metadata(@_);
246}
247
248# JSON version that will get the requested metadata values
249# from the requested source (index, import, archives or live)
250# One of the params is a JSON string and the return value is JSON too
251# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
252sub get_metadata_array
253{
254 my $self = shift @_;
255
256 my $where = $self->{'where'};
257 if (!$where) {
258 $where = "index"; # default behaviour is to get the values from index
259 }
260
261 # Only when setting metadata do we perform authentication and do we lock the collection,
262 # not when getting metadata
263
264 # for get_meta_array, the where param can only be ONE of import, archives, index, live
265 if($where =~ m/index/) {
266 $self->_get_index_metadata_array(@_);
267 }
268 elsif($where =~ m/archives/) {
269 $self->_get_archives_metadata_array(@_);
270 }
271 elsif($where =~ m/import/) {
272 $self->_get_import_metadata_array(@_);
273 }
274 elsif($where =~ m/live/) {
275 $self->_get_live_metadata_array(@_);
276 }
277}
278
279# Unused at present. Added for completion. Tested.
280sub _get_import_metadata_array {
281
282 my $self = shift @_;
283
284 my $collect = $self->{'collect'};
285 my $gsdl_cgi = $self->{'gsdl_cgi'};
286 my $site = $self->{'site'};
287 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
288
289 # look up additional args
290 my $infodbtype = $self->{'infodbtype'};
291
292 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
293 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
294 my $json_str = $self->{'json'};
295 my $doc_array = decode_json $json_str;
296
297 my $json_result_str = "[";
298 my $first_doc_rec = 1;
299 foreach my $doc_array_rec ( @$doc_array ) {
300
301 my $docid = $doc_array_rec->{'docid'}; # no subsection metadata support in metadata.xml, only toplevel meta
302
303 if($first_doc_rec) {
304 $first_doc_rec = 0;
305 } else {
306 $json_result_str .= ",";
307 }
308 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
309
310 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
311 $json_result_str = $json_result_str . ",\"metatable\":[";
312
313 my $first_rec = 1;
314 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
315
316 # Read the docid entry
317 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
318 # This now stores the full pathname
319 my $import_filename = $doc_rec->{'src-file'}->[0];
320 $import_filename = &util::placeholders_to_abspath($import_filename);
321
322 # figure out correct metadata.xml file [?]
323 # Assuming the metadata.xml file is next to the source file
324 # Note: This will not work if it is using the inherited metadata from the parent folder
325 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
326 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
327
328
329 if($first_rec) {
330 $first_rec = 0;
331 } else {
332 $json_result_str .= ",";
333 }
334
335 my $metaname = $metatable_rec->{'metaname'};
336 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
337
338 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
339 if(!defined $metapos) {
340 $metapos = 0;
341 }
342
343 # Obtain the specified metadata value(s)
344 my $metavalue;
345
346 if($metapos ne "all") { # get the value at a single metapos
347 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
348
349 #print STDERR "**** Metafilename, metaname, metapos, sec_num: $metadata_xml_filename, $metaname, $metapos, $import_tailname\n";
350
351 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
352
353 } else {
354 my $first_metaval = 1;
355 $metapos = 0;
356 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
357
358 while (defined $metavalue && $metavalue ne "") {
359 if($first_metaval) {
360 $first_metaval = 0;
361 } else {
362 $json_result_str .= ",";
363 }
364
365 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
366
367 $metapos++;
368 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
369 }
370 }
371
372 $json_result_str .= "]}"; # close metavals array and metatable record
373 }
374
375 $json_result_str .= "]}"; # close metatable array and docid record
376 }
377
378 $json_result_str .= "]"; # close array of docids
379 $gsdl_cgi->generate_ok_message($json_result_str."\n");
380}
381
382# Unused method, but included for completion. Tested, works. Takes a JSON string and returns a JSON string.
383# For more information on the format of the output, see _get_index_metadata_array, which is in use.
384sub _get_archives_metadata_array {
385
386 my $self = shift @_;
387
388 my $collect = $self->{'collect'};
389 my $gsdl_cgi = $self->{'gsdl_cgi'};
390 my $site = $self->{'site'};
391 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
392
393 # look up additional args
394 my $infodbtype = $self->{'infodbtype'};
395
396 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
397 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
398
399 my $json_str = $self->{'json'};
400 my $doc_array = decode_json $json_str;
401
402 my $json_result_str = "[";
403 my $first_doc_rec = 1;
404 foreach my $doc_array_rec ( @$doc_array ) {
405
406 my $docid = $doc_array_rec->{'docid'};
407
408 if($first_doc_rec) {
409 $first_doc_rec = 0;
410 } else {
411 $json_result_str .= ",";
412 }
413 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
414
415 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
416 $json_result_str = $json_result_str . ",\"metatable\":[";
417
418 my $first_rec = 1;
419 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
420
421 my ($docid, $docid_secnum) = ($doc_array_rec->{'docid'} =~ m/^(.*?)(\..*)?$/);
422 $docid_secnum = "" if (!defined $docid_secnum);
423
424 # Read the docid entry
425 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
426 # This now stores the full pathname
427 my $doc_filename = $doc_rec->{'doc-file'}->[0];
428 $doc_filename = &util::filename_cat($archive_dir, $doc_filename);
429
430 if($first_rec) {
431 $first_rec = 0;
432 } else {
433 $json_result_str .= ",";
434 }
435
436 my $metaname = $metatable_rec->{'metaname'};
437 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
438
439 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
440 if(!defined $metapos) {
441 $metapos = 0;
442 }
443
444
445 # Obtain the specified metadata value(s)
446 my $metavalue;
447
448 if($metapos ne "all") { # get the value at a single metapos
449
450 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
451 #print STDERR "**** Docname, metaname, metapos, sec_num: $doc_filename, $metaname, $metapos, $docid_secnum\n";
452
453 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
454
455 } else {
456 my $first_metaval = 1;
457 $metapos = 0;
458 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
459
460 while (defined $metavalue && $metavalue ne "") {
461 if($first_metaval) {
462 $first_metaval = 0;
463 } else {
464 $json_result_str .= ",";
465 }
466
467 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
468
469 $metapos++;
470 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
471 }
472 }
473
474 $json_result_str .= "]}"; # close metavals array and metatable record
475 }
476
477 $json_result_str .= "]}"; # close metatable array and docid record
478 }
479
480 $json_result_str .= "]"; # close array of docids
481 $gsdl_cgi->generate_ok_message($json_result_str."\n");
482}
483
484
485# Unused at present. Added for completion. Tested, but not sure if it retrieves metadata in the manner it's expected to.
486sub _get_live_metadata_array
487{
488 my $self = shift @_;
489
490 my $collect = $self->{'collect'};
491 my $gsdl_cgi = $self->{'gsdl_cgi'};
492 my $site = $self->{'site'};
493 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
494
495 # look up additional args
496 my $infodbtype = $self->{'infodbtype'};
497
498 # To people who know $collect_tail please add some comments
499 # Obtain the path to the database
500 my $collect_tail = $collect;
501 $collect_tail =~ s/^.*[\/|\\]//;
502 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
503 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
504
505 my $json_str = $self->{'json'};
506 my $doc_array = decode_json $json_str;
507
508 my $json_result_str = "[";
509 my $first_doc_rec = 1;
510
511 foreach my $doc_array_rec ( @$doc_array ) {
512
513 my $docid = $doc_array_rec->{'docid'};
514
515 if($first_doc_rec) {
516 $first_doc_rec = 0;
517 } else {
518 $json_result_str .= ",";
519 }
520 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
521
522 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
523 $json_result_str = $json_result_str . ",\"metatable\":[";
524
525 my $first_rec = 1;
526 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
527 if($first_rec) {
528 $first_rec = 0;
529 } else {
530 $json_result_str .= ",";
531 }
532
533 my $metaname = $metatable_rec->{'metaname'};
534 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
535
536 # Generate the dbkey
537 my $dbkey = "$docid.$metaname";
538
539 # metapos for get_live_metadata is always assumed to be "all".
540 # It's always going to get all the lines of metavalues associated with a metaname
541 # (It's the metaname itself that should contain an increment number, if there are to be multiple values).
542 #my $metapos = "all";
543 my $metapos = $metatable_rec->{'metapos'} || 0; # Can be 0... 1|all|undefined. Defaults to 0 if undefined/false
544 my $metavalue = "";
545
546 # Obtain the content of the key
547 my $cmd = "gdbmget $infodb_file_path $dbkey";
548 if (open(GIN,"$cmd |") != 0) { # Success.
549
550 binmode(GIN,":utf8");
551 # Read everything in and concatenate them into $metavalue
552 my $line;
553 my $first_metaval = 1;
554 my $pos = 0;
555 while (defined ($line=<GIN>)) {
556 chomp($line); # Get rid off the tailing newlines
557
558 if($metapos eq "all") {
559 if($first_metaval) {
560 $first_metaval = 0;
561 } else {
562 $json_result_str .= ",";
563 }
564 $metavalue = $line;
565 $json_result_str .= "{\"metapos\":\"$pos\",\"metavalue\":\"$metavalue\"}";
566 } elsif($metapos == $pos) {
567 $metavalue = $line;
568 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
569 last;
570 } # else, the current $pos is not the required $metapos
571 $pos += 1;
572 }
573 close(GIN);
574 } # else open cmd == 0 (failed) and metavals array will be empty [] for this metaname
575
576 $json_result_str .= "]}"; # close metavals array and metatable record
577 }
578
579 $json_result_str .= "]}"; # close metatable array and docid record
580 }
581
582 $json_result_str .= "]"; # close array of docids
583
584 $gsdl_cgi->generate_ok_message($json_result_str."\n");
585}
586
587
588# Takes a JSON string and returns a JSON string
589# Request string is of the form:
590# http://localhost:8283/greenstone/cgi-bin/metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]
591# Resulting string is of the form:
592# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
593sub _get_index_metadata_array
594{
595 my $self = shift @_;
596
597 my $collect = $self->{'collect'};
598 my $gsdl_cgi = $self->{'gsdl_cgi'};
599 my $site = $self->{'site'};
600 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
601
602 # look up additional args
603 my $infodbtype = $self->{'infodbtype'};
604
605 # To people who know $collect_tail please add some comments
606 # Obtain the path to the database
607 my $collect_tail = $collect;
608 $collect_tail =~ s/^.*[\/|\\]//;
609 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
610 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
611
612 my $json_str = $self->{'json'};
613 my $doc_array = decode_json $json_str;
614
615 my $json_result_str = "[";
616 my $first_doc_rec = 1;
617
618 foreach my $doc_array_rec ( @$doc_array ) {
619
620 my $docid = $doc_array_rec->{'docid'};
621
622 if($first_doc_rec) {
623 $first_doc_rec = 0;
624 } else {
625 $json_result_str .= ",";
626 }
627 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
628
629 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
630 $json_result_str = $json_result_str . ",\"metatable\":[";
631
632 my $first_rec = 1;
633 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
634 if($first_rec) {
635 $first_rec = 0;
636 } else {
637 $json_result_str .= ",";
638 }
639
640 my $metaname = $metatable_rec->{'metaname'};
641 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
642
643 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
644 if(!defined $metapos) {
645 $metapos = 0;
646 }
647
648 # Read the docid entry
649 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
650
651 # Basically loop through and unescape_html the values
652 foreach my $k (keys %$doc_rec) {
653 my @escaped_v = ();
654 foreach my $v (@{$doc_rec->{$k}}) {
655 my $ev = &ghtml::unescape_html($v);
656 push(@escaped_v, $ev);
657 }
658 $doc_rec->{$k} = \@escaped_v;
659 }
660
661 # Obtain the specified metadata value(s)
662 my $metavalue;
663
664 if($metapos ne "all") { # get the value at a single metapos
665
666 $metavalue = $doc_rec->{$metaname}->[$metapos];
667
668 # protect any double quotes and colons in the metavalue before putting it into JSON
669 $metavalue =~ s/\"/&quot;/g if defined $metavalue;
670 $metavalue =~ s/\:/&58;/g if defined $metavalue;
671
672 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
673
674 } else {
675 my $first_metaval = 1;
676 $metapos = 0;
677 $metavalue = $doc_rec->{$metaname}->[$metapos];
678
679 while (defined $metavalue) {
680
681 # protect any double quotes and colons in the metavalue before putting it into JSON
682 $metavalue =~ s/\"/&quot;/g;
683 $metavalue =~ s/\:/&58;/g;
684
685 if($first_metaval) {
686 $first_metaval = 0;
687 } else {
688 $json_result_str .= ",";
689 }
690
691 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
692
693 $metapos++;
694 $metavalue = $doc_rec->{$metaname}->[$metapos];
695 }
696 }
697
698 $json_result_str .= "]}"; # close metavals array and metatable record
699 }
700
701 $json_result_str .= "]}"; # close metatable array and docid record
702 }
703
704 $json_result_str .= "]"; # close array of docids
705
706 $gsdl_cgi->generate_ok_message($json_result_str."\n");
707}
708
709
710sub get_index_metadata
711{
712 my $self = shift @_;
713
714 my $username = $self->{'username'};
715 my $collect = $self->{'collect'};
716 my $gsdl_cgi = $self->{'gsdl_cgi'};
717 my $gsdlhome = $self->{'gsdlhome'};
718
719 # Obtain the collect dir
720 my $site = $self->{'site'};
721 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
722 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
723
724 # look up additional args
725 my $docid = $self->{'d'};
726 my $metaname = $self->{'metaname'};
727 my $metapos = $self->{'metapos'};
728 my $infodbtype = $self->{'infodbtype'};
729
730 # To people who know $collect_tail please add some comments
731 # Obtain path to the database
732 my $collect_tail = $collect;
733 $collect_tail =~ s/^.*[\/|\\]//;
734 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
735 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
736
737 # Read the docid entry
738 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
739
740 # Basically loop through and unescape_html the values
741 foreach my $k (keys %$doc_rec) {
742 my @escaped_v = ();
743 foreach my $v (@{$doc_rec->{$k}}) {
744 my $ev = &ghtml::unescape_html($v);
745 push(@escaped_v, $ev);
746 }
747 $doc_rec->{$k} = \@escaped_v;
748 }
749
750 # Obtain the specified metadata value
751 $metapos = 0 if (!defined $metapos);
752 my $metavalue = $doc_rec->{$metaname}->[$metapos];
753 $gsdl_cgi->generate_ok_message("$metavalue");
754
755}
756
757
758sub get_import_metadata
759{
760 my $self = shift @_;
761
762 my $username = $self->{'username'};
763 my $collect = $self->{'collect'};
764 my $gsdl_cgi = $self->{'gsdl_cgi'};
765 my $gsdlhome = $self->{'gsdlhome'};
766
767 # Obtain the collect dir
768 my $site = $self->{'site'};
769 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
770 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
771
772 # look up additional args
773 my $docid = $self->{'d'};
774 my $metaname = $self->{'metaname'};
775 my $metapos = $self->{'metapos'};
776 $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
777
778 my $infodbtype = $self->{'infodbtype'};
779 if (!defined $docid)
780 {
781 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
782 }
783
784 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
785 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
786 my $metadata_xml_file;
787 my $import_filename = undef;
788
789
790 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
791 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
792 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
793
794 # This now stores the full pathname
795 $import_filename = $doc_rec->{'src-file'}->[0];
796 $import_filename = &util::placeholders_to_abspath($import_filename);
797
798 # figure out correct metadata.xml file [?]
799 # Assuming the metadata.xml file is next to the source file
800 # Note: This will not work if it is using the inherited metadata from the parent folder
801 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
802 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
803
804 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
805
806}
807
808sub get_metadata_from_metadata_xml
809{
810 my $self = shift @_;
811 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
812
813 my @rules =
814 (
815 _default => 'raw',
816 'Metadata' => \&gfmxml_metadata,
817 'FileName' => \&mxml_filename
818 );
819
820 my $parser = XML::Rules->new
821 (
822 rules => \@rules,
823 output_encoding => 'utf8'
824 );
825
826 my $xml_in = "";
827 if (!open(MIN,"<$metadata_xml_filename"))
828 {
829 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
830 }
831 else
832 {
833 # Read them in
834 my $line;
835 while (defined ($line=<MIN>)) {
836 $xml_in .= $line;
837 }
838 close(MIN);
839
840 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
841
842 if(defined $parser->{'pad'}->{'metavalue'})
843 {
844 return $parser->{'pad'}->{'metavalue'};
845 }
846 else
847 {
848 return "";
849 }
850 }
851}
852
853sub gfmxml_metadata
854{
855 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
856
857 # no subsection support yet in metadata.xml
858
859 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
860 {
861 if (!defined $parser->{'parameters'}->{'poscount'})
862 {
863 $parser->{'parameters'}->{'poscount'} = 0;
864 }
865 else
866 {
867 $parser->{'parameters'}->{'poscount'}++;
868 }
869
870 # gets the first value by default, since metapos defaults to 0
871 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
872 {
873 if($parser->{'parameters'}->{'metapos'} > 0) {
874 print STDERR "@@@@ WARNING: non-zero metapos.\n";
875 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at position: ".$parser->{'parameters'}->{'metapos'}.".\n";
876 }
877 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
878 }
879 }
880}
881
882sub get_archives_metadata
883{
884 my $self = shift @_;
885
886 my $username = $self->{'username'};
887 my $collect = $self->{'collect'};
888 my $gsdl_cgi = $self->{'gsdl_cgi'};
889# my $gsdlhome = $self->{'gsdlhome'};
890 my $infodbtype = $self->{'infodbtype'};
891
892 # Obtain the collect dir
893 my $site = $self->{'site'};
894 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
895
896 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
897
898 # look up additional args
899 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
900 $docid_secnum = "" if (!defined $docid_secnum);
901
902 my $metaname = $self->{'metaname'};
903 my $metapos = $self->{'metapos'};
904 $metapos = 0 if (!defined $metapos);
905
906 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
907 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
908
909 # This now stores the full pathname
910 my $doc_filename = $doc_rec->{'doc-file'}->[0];
911
912 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
913
914}
915
916sub get_metadata_from_archive_xml
917{
918 my $self = shift @_;
919 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
920
921 my @start_rules = ('Section' => \&dxml_start_section);
922
923 my @rules =
924 (
925 _default => 'raw',
926 'Metadata' => \&gfdxml_metadata
927 );
928
929 my $parser = XML::Rules->new
930 (
931 start_rules => \@start_rules,
932 rules => \@rules,
933 output_encoding => 'utf8'
934 );
935
936 my $xml_in = "";
937 if (!open(MIN,"<$doc_xml_filename"))
938 {
939 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
940 }
941 else
942 {
943 # Read them in
944 my $line;
945 while (defined ($line=<MIN>)) {
946 $xml_in .= $line;
947 }
948 close(MIN);
949
950 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
951
952 if(defined $parser->{'pad'}->{'metavalue'})
953 {
954 return $parser->{'pad'}->{'metavalue'};
955 }
956 else
957 {
958 return "";
959 }
960 }
961}
962
963sub gfdxml_metadata
964{
965 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
966
967 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
968 {
969 return;
970 }
971
972 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
973 {
974 if (!defined $parser->{'parameters'}->{'poscount'})
975 {
976 $parser->{'parameters'}->{'poscount'} = 0;
977 }
978 else
979 {
980 $parser->{'parameters'}->{'poscount'}++;
981 }
982 }
983
984 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
985 {
986 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
987 }
988}
989
990sub _set_live_metadata
991{
992 my $self = shift @_;
993
994 my $collect = $self->{'collect'};
995 my $gsdl_cgi = $self->{'gsdl_cgi'};
996 #my $gsdlhome = $self->{'gsdlhome'};
997 my $infodbtype = $self->{'infodbtype'};
998
999 # Obtain the collect dir
1000 my $site = $self->{'site'};
1001 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1002 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1003
1004
1005 # look up additional args
1006 my $docid = $self->{'d'};
1007 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1008 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1009 }
1010 my $metavalue = $self->{'metavalue'};
1011
1012 # Generate the dbkey
1013 my $metaname = $self->{'metaname'};
1014 my $dbkey = "$docid.$metaname";
1015
1016 # To people who know $collect_tail please add some comments
1017 # Obtain path to the database
1018 my $collect_tail = $collect;
1019 $collect_tail =~ s/^.*[\/|\\]//;
1020 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1021 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1022
1023 # Set the new value
1024 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1025 my $status = system($cmd);
1026 if ($status != 0) {
1027 # Catch error if gdbmget failed
1028 my $mess = "Failed to set metadata key: $dbkey\n";
1029
1030 $mess .= "PATH: $ENV{'PATH'}\n";
1031 $mess .= "cmd = $cmd\n";
1032 $mess .= "Exit status: $status\n";
1033 $mess .= "System Error Message: $!\n";
1034
1035 $gsdl_cgi->generate_error($mess);
1036 }
1037 else {
1038 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
1039 }
1040
1041 #return $status; # in case calling functions have any further use for this
1042}
1043
1044sub set_live_metadata
1045{
1046 my $self = shift @_;
1047
1048 my $username = $self->{'username'};
1049 my $collect = $self->{'collect'};
1050 my $gsdl_cgi = $self->{'gsdl_cgi'};
1051
1052 if ($baseaction::authentication_enabled) {
1053 # Ensure the user is allowed to edit this collection
1054 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1055 }
1056
1057 # Make sure the collection isn't locked by someone else
1058 $self->lock_collection($username, $collect);
1059
1060 $self->_set_live_metadata(@_);
1061
1062 # Release the lock once it is done
1063 $self->unlock_collection($username, $collect);
1064}
1065
1066sub set_index_metadata_entry
1067{
1068 print STDERR "1\n";
1069 my $self = shift @_;
1070 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
1071
1072 # To people who know $collect_tail please add some comments
1073 # Obtain path to the database
1074 my $collect_tail = $collect;
1075 $collect_tail =~ s/^.*[\/|\\]//;
1076 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1077 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1078
1079 print STDERR "2\n";
1080# print STDERR "**** infodb file path = $infodb_file_path\n";
1081# print STDERR "***** infodb type = $infodbtype\n";
1082
1083 # Read the docid entry
1084 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1085
1086 # Set the metadata value
1087 if (defined $metapos) {
1088 print STDERR "3\n";
1089 # if metamode=accumulate AND metapos, warn user and then use metapos
1090 if (defined $metamode && $metamode eq "accumulate") {
1091 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
1092 print STDERR "**** Proceeding by using metapos\n";
1093 }
1094 $doc_rec->{$metaname}->[$metapos] = $metavalue;
1095 }
1096 elsif (defined $prevmetavalue) {
1097 print STDERR "4\n";
1098 my $array = $doc_rec->{$metaname};
1099 my $length = @$array;
1100
1101 my $found = 0;
1102 for (my $i = 0; $i < $length; $i++){
1103 if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
1104 $doc_rec->{$metaname}->[$i] = $metavalue;
1105 $found = 1;
1106 last;
1107 }
1108 }
1109
1110 if($found == 0){
1111 $doc_rec->{$metaname} = [ $metavalue ];
1112 }
1113 }
1114 elsif (defined $metamode && $metamode eq "override") {
1115 print STDERR "5\n";
1116 $doc_rec->{$metaname} = [ $metavalue ];
1117 }
1118 else { # default for index was to override, but because accumulate is less destructive,
1119 # and because accumulate is the default for archives and import, that's the new default for index too
1120 print STDERR "6\n";
1121 if(defined $doc_rec->{$metaname}) {
1122 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
1123 } else {
1124 $doc_rec->{$metaname} = [ $metavalue ];
1125 }
1126 }
1127 print STDERR "6\n";
1128
1129 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
1130
1131 return $status;
1132
1133}
1134
1135sub _set_import_metadata
1136{
1137 my $self = shift @_;
1138
1139 my $collect = $self->{'collect'};
1140 my $gsdl_cgi = $self->{'gsdl_cgi'};
1141 my $infodbtype = $self->{'infodbtype'};
1142# my $gsdlhome = $self->{'gsdlhome'};
1143
1144 # Obtain the collect and archive dir
1145 my $site = $self->{'site'};
1146 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1147 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1148 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1149
1150 # look up additional args
1151 # want either d= or f=
1152 my $docid = $self->{'d'};
1153 my ($docid_root,$docid_secnum);
1154 if(defined $docid) {
1155 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1156 # as yet no support for setting subsection metadata in metadata.xml
1157 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
1158 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
1159 return;
1160 }
1161 }
1162
1163 my $import_file = $self->{'f'};
1164 if ((!defined $docid) && (!defined $import_file)) {
1165 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1166 }
1167
1168 # Get the parameters and set default mode to "accumulate"
1169 my $metaname = $self->{'metaname'};
1170 my $metavalue = $self->{'metavalue'};
1171## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1172 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1173
1174 my $metamode = $self->{'metamode'};
1175 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1176 # make "accumulate" the default (less destructive, as won't actually
1177 # delete any existing values)
1178 $metamode = "accumulate";
1179 }
1180
1181 # adding metapos and prevmetavalue support to import_metadata subroutines
1182 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1183 my $prevmetavalue = $self->{'prevmetavalue'};
1184
1185 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1186 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
1187
1188 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1189 $mess .= " $metaname";
1190 $mess .= " = $metavalue";
1191 $mess .= " ($metamode)\n";
1192
1193 $gsdl_cgi->generate_ok_message($mess);
1194
1195 #return $status; # in case calling functions have any further use for this
1196}
1197
1198# the version of set_index_meta that doesn't do authentication
1199sub _set_archives_metadata
1200{
1201 my $self = shift @_;
1202
1203 my $collect = $self->{'collect'};
1204 my $gsdl_cgi = $self->{'gsdl_cgi'};
1205 my $infodbtype = $self->{'infodbtype'};
1206
1207 # Obtain the collect and archive dir
1208 my $site = $self->{'site'};
1209 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1210 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1211
1212 # look up additional args
1213 my $docid = $self->{'d'};
1214 my $metaname = $self->{'metaname'};
1215 my $metavalue = $self->{'metavalue'};
1216 my $prevmetavalue = $self->{'prevmetavalue'};
1217
1218 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1219 # Don't append "|| undef", since if metapos=0 it will then be set to undef
1220
1221 my $metamode = $self->{'metamode'};
1222 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1223 # make "accumulate" the default (less destructive, as won't actually
1224 # delete any existing values)
1225 $metamode = "accumulate";
1226 }
1227
1228 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1229 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1230
1231 if ($status == 0) {
1232 my $mess = "set-archives-metadata successful: Key[$docid]\n";
1233 $mess .= " $metaname";
1234 $mess .= "->[$metapos]" if (defined $metapos);
1235 $mess .= " = $metavalue";
1236 $mess .= " ($metamode)\n";
1237
1238 $gsdl_cgi->generate_ok_message($mess);
1239 }
1240 else {
1241 my $mess .= "Failed to set archives metadata key: $docid\n";
1242 $mess .= "Exit status: $status\n";
1243 if(defined $self->{'error_msg'}) {
1244 $mess .= "Error Message: $self->{'error_msg'}\n";
1245 } else {
1246 $mess .= "System Error Message: $!\n";
1247 }
1248 $mess .= "-" x 20 . "\n";
1249
1250 $gsdl_cgi->generate_error($mess);
1251 }
1252
1253 #return $status; # in case calling functions have any further use for this
1254}
1255
1256
1257# the version of set_index_meta that doesn't do authentication
1258sub _set_index_metadata
1259{
1260 print STDERR "START SET INDEX METADATA\n";
1261 my $self = shift @_;
1262
1263 my $collect = $self->{'collect'};
1264 my $gsdl_cgi = $self->{'gsdl_cgi'};
1265
1266 my $site = $self->{'site'};
1267 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1268
1269 # look up additional args
1270 my $docid = $self->{'d'};
1271 my $metaname = $self->{'metaname'};
1272 my $metapos = $self->{'metapos'}; # undef has meaning
1273 my $metavalue = $self->{'metavalue'};
1274 my $infodbtype = $self->{'infodbtype'};
1275 my $metamode = $self->{'metamode'};
1276 my $prevmetavalue = $self->{'prevmetavalue'};
1277
1278 print STDERR "SETTING INDEX METADATA ENTRY\n";
1279 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1280 print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
1281 if ($status != 0) {
1282 # Catch error if set infodb entry failed
1283 my $mess = "Failed to set metadata key: $docid\n";
1284
1285 $mess .= "PATH: $ENV{'PATH'}\n";
1286 $mess .= "Exit status: $status\n";
1287 $mess .= "System Error Message: $!\n";
1288
1289 $gsdl_cgi->generate_error($mess);
1290 }
1291 else {
1292 my $mess = "set-index-metadata successful: Key[$docid]\n";
1293 $mess .= " $metaname";
1294 $mess .= "->[$metapos]" if (defined $metapos);
1295 $mess .= " = $metavalue\n";
1296
1297 $gsdl_cgi->generate_ok_message($mess);
1298 }
1299
1300 print STDERR "END SET INDEX METADATA\n";
1301 #return $status; # in case calling functions have any further use for this
1302}
1303
1304sub set_index_metadata
1305{
1306 my $self = shift @_;
1307
1308 my $username = $self->{'username'};
1309 my $collect = $self->{'collect'};
1310 my $gsdl_cgi = $self->{'gsdl_cgi'};
1311 #my $gsdlhome = $self->{'gsdlhome'};
1312
1313 if ($baseaction::authentication_enabled) {
1314 # Ensure the user is allowed to edit this collection
1315 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1316 }
1317
1318 my $site = $self->{'site'};
1319 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1320
1321 $gsdl_cgi->checked_chdir($collect_dir);
1322
1323 # Obtain the collect dir
1324 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1325
1326 # Make sure the collection isn't locked by someone else
1327 $self->lock_collection($username, $collect);
1328
1329 $self->_set_index_metadata(@_);
1330
1331 # Release the lock once it is done
1332 $self->unlock_collection($username, $collect);
1333}
1334
1335# call this to set the metadata for a combination of dirs archives, import or index, or live
1336# if none specified, defaults to index which was the original behaviour of set_metadata.
1337sub set_metadata
1338{
1339 my $self = shift @_;
1340
1341 # Testing that not defining a variable, setting it to "" or to " " all return false
1342 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
1343
1344 my $where = $self->{'where'};
1345 if(!$where) {
1346 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
1347 return;
1348 }
1349
1350 # authenticate and lock collection once, even if processing multiple dirs
1351 my $username = $self->{'username'};
1352 my $collect = $self->{'collect'};
1353 my $gsdl_cgi = $self->{'gsdl_cgi'};
1354
1355 if ($baseaction::authentication_enabled) {
1356 # Ensure the user is allowed to edit this collection
1357 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1358 }
1359
1360 if($where =~ m/index/) {
1361 my $site = $self->{'site'};
1362 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1363 $gsdl_cgi->checked_chdir($collect_dir);
1364 }
1365
1366 # Make sure the collection isn't locked by someone else
1367 $self->lock_collection($username, $collect);
1368
1369
1370 # now at last can set the metadata. $where can specify multiple
1371 # $where is of the form: import|archives|index, or a subset thereof
1372
1373 #my @whichdirs = split('\|', $where);
1374
1375 # just check whether $where contains import/archives/index/live in turn, and
1376 # for each case, process it accordingly
1377 if($where =~ m/import/) {
1378 $self->_set_import_metadata(@_);
1379 }
1380
1381 if($where =~ m/archives/) {
1382
1383 # look up docID arg which is optional to set_metadata because it's optional
1384 # to set_import, but which is compulsory to set_archives_metadata
1385 my $docid = $self->{'d'};
1386 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1387 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1388 }
1389 # we have a docid, so can set archives meta
1390 $self->_set_archives_metadata(@_);
1391 }
1392
1393 if($where =~ m/index/) {
1394
1395 # look up docID arg which is optional to set_metadata because it's optional
1396 # to set_import, but which is compulsory to set_archives_metadata
1397 my $docid = $self->{'d'};
1398 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1399 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1400 }
1401 # we have a docid, so can set index meta
1402 $self->_set_index_metadata(@_);
1403 }
1404
1405 if($where =~ m/live/) {
1406 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1407 }
1408
1409 # Release the lock once it is done
1410 $self->unlock_collection($username, $collect);
1411}
1412
1413sub set_metadata_array
1414{
1415 my $self = shift @_;
1416
1417 my $where = $self->{'where'};
1418 if(!$where) {
1419 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1420 return;
1421 }
1422
1423 my $username = $self->{'username'};
1424 my $collect = $self->{'collect'};
1425 my $gsdl_cgi = $self->{'gsdl_cgi'};
1426
1427 if ($baseaction::authentication_enabled) {
1428 # Ensure the user is allowed to edit this collection
1429 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1430 }
1431
1432 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1433 # But including the stmt during this code reorganisation to preserve as-is what used to happen
1434 my $site = $self->{'site'};
1435 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1436 $gsdl_cgi->checked_chdir($collect_dir);
1437
1438 # Make sure the collection isn't locked by someone else
1439 $self->lock_collection($username, $collect);
1440
1441 if($where =~ m/import/) {
1442 $self->_set_import_metadata_array(@_);
1443 }
1444 if($where =~ m/archives/) {
1445 $self->_set_archives_metadata_array(@_);
1446 }
1447 if($where =~ m/index/) {
1448 $self->_set_index_metadata_array(@_);
1449 }
1450 if($where =~ m/live/) {
1451 $self->_set_live_metadata_array(@_);
1452 }
1453
1454 # Release the lock once it is done
1455 $self->unlock_collection($username, $collect);
1456}
1457
1458sub _set_index_metadata_array
1459{
1460 my $self = shift @_;
1461
1462 my $collect = $self->{'collect'};
1463 my $gsdl_cgi = $self->{'gsdl_cgi'};
1464 my $site = $self->{'site'};
1465 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1466
1467
1468 # look up additional args
1469
1470 my $infodbtype = $self->{'infodbtype'};
1471
1472 my $json_str = $self->{'json'};
1473 my $doc_array = decode_json $json_str;
1474
1475
1476 my $global_status = 0;
1477 my $global_mess = "";
1478
1479 my @all_docids = ();
1480
1481 foreach my $doc_array_rec ( @$doc_array ) {
1482
1483 my $status = -1;
1484 my $docid = $doc_array_rec->{'docid'};
1485
1486 push(@all_docids,$docid);
1487
1488 my $metaname = $doc_array_rec->{'metaname'};
1489 if(defined $metaname) {
1490 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
1491 my $metavalue = $doc_array_rec->{'metavalue'};
1492 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1493
1494 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1495 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1496 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1497
1498 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1499 $metaname = $metatable_rec->{'metaname'};
1500 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1501 my $metapos = undef;
1502 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1503
1504 foreach my $metavalue ( @$metavals ) { # metavals is an array
1505 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1506 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1507 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1508 # metavalue for this metaname will be lost
1509 $metamode = "accumulate";
1510 }
1511 }
1512 }
1513 }
1514
1515 if ($status != 0) {
1516 # Catch error if set infodb entry failed
1517 $global_status = $status;
1518 $global_mess .= "Failed to set metadata key: $docid\n";
1519 $global_mess .= "Exit status: $status\n";
1520 $global_mess .= "System Error Message: $!\n";
1521 $global_mess .= "-" x 20;
1522 }
1523 }
1524
1525 if ($global_status != 0) {
1526 $global_mess .= "PATH: $ENV{'PATH'}\n";
1527 $gsdl_cgi->generate_error($global_mess);
1528 }
1529 else {
1530 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1531 $gsdl_cgi->generate_ok_message($mess);
1532 }
1533}
1534
1535sub set_index_metadata_array
1536{
1537 my $self = shift @_;
1538
1539 my $username = $self->{'username'};
1540 my $collect = $self->{'collect'};
1541 my $gsdl_cgi = $self->{'gsdl_cgi'};
1542# my $gsdlhome = $self->{'gsdlhome'};
1543
1544 if ($baseaction::authentication_enabled) {
1545 # Ensure the user is allowed to edit this collection
1546 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1547 }
1548
1549 my $site = $self->{'site'};
1550 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1551
1552 $gsdl_cgi->checked_chdir($collect_dir);
1553
1554 # Obtain the collect dir
1555 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1556
1557 # Make sure the collection isn't locked by someone else
1558 $self->lock_collection($username, $collect);
1559
1560 $self->_set_index_metadata_array(@_);
1561
1562 # Release the lock once it is done
1563 $self->unlock_collection($username, $collect);
1564}
1565
1566# experimental, newly added in and untested
1567sub _set_live_metadata_array
1568{
1569 my $self = shift @_;
1570
1571 my $collect = $self->{'collect'};
1572 my $gsdl_cgi = $self->{'gsdl_cgi'};
1573
1574 my $site = $self->{'site'};
1575 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1576
1577
1578 # look up additional args
1579 my $infodbtype = $self->{'infodbtype'};
1580 # To people who know $collect_tail please add some comments
1581 # Obtain path to the database
1582 my $collect_tail = $collect;
1583 $collect_tail =~ s/^.*[\/|\\]//;
1584 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1585 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1586
1587
1588 my $json_str = $self->{'json'};
1589 my $doc_array = decode_json $json_str;
1590
1591
1592 my $global_status = 0;
1593 my $global_mess = "";
1594
1595 my @all_docids = ();
1596
1597
1598 foreach my $doc_array_rec ( @$doc_array ) {
1599
1600 my $status = -1;
1601 my $docid = $doc_array_rec->{'docid'};
1602
1603 push(@all_docids,$docid);
1604
1605 my $metaname = $doc_array_rec->{'metaname'};
1606 if(defined $metaname) {
1607 my $dbkey = "$docid.$metaname";
1608 my $metavalue = $doc_array_rec->{'metavalue'};
1609
1610 # Set the new value
1611 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1612 $status = system($cmd);
1613
1614 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1615 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1616 foreach my $metatable_rec ( @$metatable ) {
1617 $metaname = $metatable_rec->{'metaname'};
1618 my $dbkey = "$docid.$metaname";
1619
1620 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1621 foreach my $metavalue ( @$metavals ) {
1622 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1623 $status = system($cmd);
1624 }
1625 }
1626
1627 }
1628
1629 if ($status != 0) {
1630 # Catch error if gdbmget failed
1631 $global_status = $status;
1632 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1633 $global_mess .= "Exit status: $status\n";
1634 $global_mess .= "System Error Message: $!\n";
1635 $global_mess .= "-" x 20;
1636 }
1637 }
1638
1639 if ($global_status != 0) {
1640 $global_mess .= "PATH: $ENV{'PATH'}\n";
1641 $gsdl_cgi->generate_error($global_mess);
1642 }
1643 else {
1644 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1645 $gsdl_cgi->generate_ok_message($mess);
1646 }
1647}
1648
1649sub set_live_metadata_array
1650{
1651 my $self = shift @_;
1652
1653 my $username = $self->{'username'};
1654 my $collect = $self->{'collect'};
1655 my $gsdl_cgi = $self->{'gsdl_cgi'};
1656
1657 if ($baseaction::authentication_enabled) {
1658 # Ensure the user is allowed to edit this collection
1659 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1660 }
1661
1662 my $site = $self->{'site'};
1663 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1664
1665 $gsdl_cgi->checked_chdir($collect_dir);
1666
1667 # Make sure the collection isn't locked by someone else
1668 $self->lock_collection($username, $collect);
1669
1670 $self->_set_live_metadata_array(@_);
1671
1672 # Release the lock once it is done
1673 $self->unlock_collection($username, $collect);
1674}
1675
1676
1677sub dxml_metadata
1678{
1679 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1680 my $metaname = $parser->{'parameters'}->{'metaname'};
1681 my $metamode = $parser->{'parameters'}->{'metamode'};
1682
1683###!!! print STDERR "**** Processing closing </Metadata> tag\n";
1684
1685 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1686
1687 # Find the right metadata tag and checks if we are going to
1688 # override it
1689 #
1690 # Note: This over writes the first metadata block it
1691 # encountered. If there are multiple Sections in the doc.xml, it
1692 # might not behave as you would expect
1693
1694 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1695## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1696## print STDERR "**** metamode = $metamode\n";
1697
1698 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1699 {
1700 my $name_attr = $attrHash->{'name'};
1701 if (($name_attr eq $metaname) && ($metamode eq "override"))
1702 {
1703 if (!defined $parser->{'parameters'}->{'poscount'})
1704 {
1705 $parser->{'parameters'}->{'poscount'} = 0;
1706 }
1707 else
1708 {
1709 $parser->{'parameters'}->{'poscount'}++;
1710 }
1711
1712 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1713 {
1714 ##print STDERR "#### got match!!\n";
1715 # Get the value and override the current value
1716 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1717 $attrHash->{'_content'} = $metavalue;
1718
1719 # Don't want it to wipe out any other pieces of metadata
1720 $parser->{'parameters'}->{'metamode'} = "done";
1721 }
1722 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1723 {
1724 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1725 $attrHash->{'_content'} = $metavalue;
1726 $parser->{'parameters'}->{'metamode'} = "done";
1727 }
1728 }
1729 }
1730
1731 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1732 return [$tagname => $attrHash];
1733}
1734
1735# This method exists purely for catching invalid section numbers that the client
1736# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1737# we've seen all the Sections in the doc.xml, and none of their section nums matched
1738# if the metamode has not been set to 'done' by then.
1739sub dxml_archive
1740{
1741 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1742 my $metamode = $parser->{'parameters'}->{'metamode'};
1743
1744 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1745 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1746
1747# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1748
1749 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1750 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1751
1752 my $metaname = $parser->{'parameters'}->{'metaname'};
1753 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1754
1755 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1756 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1757 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1758 print STDERR "\tin section $opt_doc_secnum.\n";
1759 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1760 }
1761
1762 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1763 return [$tagname => $attrHash];
1764}
1765
1766sub dxml_description
1767{
1768 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1769 my $metamode = $parser->{'parameters'}->{'metamode'};
1770
1771 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1772 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1773
1774###!!! print STDERR "**** Processing a closing </Description> tag \n";
1775# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1776
1777 # Accumulate the metadata
1778
1779 # We'll be accumulating metadata at this point if we haven't found and therefore
1780 # haven't processed the metadata yet.
1781 # For subsections, this means that if we're at a matching subsection, but haven't
1782 # found the correct metaname to override in that subsection, we accumulate it as new
1783 # meta in the subsection by adding it to the current description.
1784 # If there's no subsection info for the metadata, it will accumulate at the top level
1785 # section description if we hadn't found a matching metaname to override at this point.
1786
1787 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1788 # at the toplevel section. In the latter case, it means we want to process meta in the
1789 # toplevel section. So the eq check between the values below will work in all cases.
1790
1791 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1792 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1793 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1794
1795 if ($curr_secnum eq $opt_doc_secnum
1796 && ($metamode eq "accumulate" || $metamode eq "override")) {
1797 if ($metamode eq "override") {
1798 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1799 }
1800
1801 # If we get to here and metamode is override, this means there
1802 # was no existing value to overide => treat as an append operation
1803
1804 # Tack a new metadata tag on to the end of the <Metadata>+ block
1805 my $metaname = $parser->{'parameters'}->{'metaname'};
1806 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1807
1808 my $metadata_attr = {
1809 '_content' => $metavalue,
1810 'name' => $metaname,
1811 'mode' => "accumulate"
1812 };
1813
1814 my $append_metadata = [ "Metadata" => $metadata_attr ];
1815 my $description_content = $attrHash->{'_content'};
1816
1817 print "Appending metadata to doc.xml\n";
1818
1819 if (ref($description_content)) {
1820 # got some existing interesting nested content
1821 push(@$description_content, " ", $append_metadata ,"\n ");
1822 }
1823 else {
1824 #description_content is most likely a string such as "\n"
1825 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1826 }
1827
1828 $parser->{'parameters'}->{'metamode'} = "done";
1829 }
1830 else {
1831 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1832## print STDERR "**** NOT ACCUMULATE?!? \n";
1833 }
1834
1835 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1836 return [$tagname => $attrHash];
1837}
1838
1839
1840sub dxml_start_section
1841{
1842 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1843
1844 my $new_depth = scalar(@$contextArray);
1845
1846# print STDERR "**** START SECTION \n";
1847
1848 if ($new_depth == 1) {
1849 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1850 $parser->{'parameters'}->{'curr_section_num'} = "";
1851 }
1852
1853 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1854 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1855
1856 my $new_secnum;
1857
1858 if ($new_depth > $old_depth) {
1859 # child subsection
1860 $new_secnum = "$old_secnum.1";
1861 }
1862 elsif ($new_depth == $old_depth) {
1863 # sibling section => increase it's value by 1
1864 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1865 $tail_num++;
1866 $new_secnum = $old_secnum;
1867 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1868 }
1869 else {
1870 # back up to parent section => lopp off tail
1871 $new_secnum = $old_secnum;
1872 $new_secnum =~ s/\.\d+$//;
1873 }
1874
1875 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1876 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1877
1878 1;
1879}
1880
1881sub edit_xml_file
1882{
1883 my $self = shift @_;
1884 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1885
1886 # use XML::Rules to add it in (read in and out again)
1887 my $parser = XML::Rules->new(start_rules => $start_rules,
1888 rules => $rules,
1889 style => 'filter',
1890 output_encoding => 'utf8' );
1891
1892 my $xml_in = "";
1893 if (!open(MIN,"<$filename")) {
1894 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1895 }
1896 else {
1897 # Read all the text in
1898 my $line;
1899 while (defined ($line=<MIN>)) {
1900 $xml_in .= $line;
1901 }
1902 close(MIN);
1903
1904 my $MOUT;
1905 if (!open($MOUT,">$filename")) {
1906 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1907 }
1908 else {
1909 # Matched lines will get handled by the call backs
1910## my $xml_out = "";
1911
1912 binmode($MOUT,":utf8");
1913 $parser->filter($xml_in,$MOUT, $options);
1914
1915# binmode(MOUT,":utf8");
1916# print MOUT $xml_out;
1917 close($MOUT);
1918 }
1919 }
1920
1921 # copy across any custom error information that was stored during parsing
1922 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1923}
1924
1925sub edit_doc_xml
1926{
1927 my $self = shift @_;
1928 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1929
1930 my $info_mess = <<RAWEND;
1931****************************
1932 edit_doc_xml()
1933****************************
1934RAWEND
1935
1936 $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1937 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1938 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1939 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1940 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1941 $info_mess .= " opt_secnum = $opt_secnum\n" if defined($opt_secnum);
1942 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1943
1944 $info_mess .= "****************************\n";
1945
1946 $gsdl_cgi->generate_message($info_mess);
1947
1948 # To monitor which section/subsection number we are in
1949 my @start_rules =
1950 ( 'Section' => \&dxml_start_section );
1951
1952 # use XML::Rules to add it in (read in and out again)
1953 # Set the call back functions
1954 my @rules =
1955 ( _default => 'raw',
1956 'Metadata' => \&dxml_metadata,
1957 'Description' => \&dxml_description,
1958 'Archive' => \&dxml_archive); # just for catching errors at end
1959
1960 # Sets the parameters
1961 my $options = { 'metaname' => $metaname,
1962 'metapos' => $metapos,
1963 'metavalue' => $metavalue,
1964 'metamode' => $metamode,
1965 'prevmetavalue' => $prevmetavalue };
1966
1967 if (defined $opt_secnum) {
1968 $options->{'secnum'} = $opt_secnum;
1969 }
1970
1971 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1972}
1973
1974sub set_archives_metadata_entry
1975{
1976 my $self = shift @_;
1977 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1978
1979 my $info_mess = <<RAWEND;
1980****************************
1981 set_archives_metadata_entry()
1982****************************
1983RAWEND
1984
1985 $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1986 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1987 $info_mess .= " collect = $collect\n" if defined($collect);
1988 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
1989 $info_mess .= " docid = $docid\n" if defined($docid);
1990 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1991 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1992 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1993 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1994 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1995
1996 $info_mess .= "****************************\n";
1997
1998 $gsdl_cgi->generate_message($info_mess);
1999
2000 # Obtain the doc.xml path for the specified docID
2001 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2002
2003 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2004 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
2005 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
2006
2007 # The $doc_xml_file is relative to the archives, and now let's get the full path
2008 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
2009 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
2010
2011 # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
2012 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2013 # Thereafter, we will finally be setting the overriding metavalue for this metaname
2014 if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2015 # remove all values of $metaname metadata
2016 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
2017 }
2018 # Edit the doc.xml file with the specified metadata name, value and position.
2019 # TODO: there is a potential problem here as this edit_doc_xml function
2020 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
2021 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
2022
2023 # dxml_metadata method ignores metapos if metamode anything other than override
2024 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
2025 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
2026
2027 # return 0; # return 0 for now to indicate no error
2028 return (defined $self->{'error_msg'}) ? 1 : 0;
2029}
2030
2031
2032sub set_archives_metadata
2033{
2034 my $self = shift @_;
2035
2036 my $username = $self->{'username'};
2037 my $collect = $self->{'collect'};
2038 my $gsdl_cgi = $self->{'gsdl_cgi'};
2039
2040 if ($baseaction::authentication_enabled) {
2041 # Ensure the user is allowed to edit this collection
2042 $self->authenticate_user($username, $collect);
2043 }
2044
2045 # Make sure the collection isn't locked by someone else
2046 $self->lock_collection($username, $collect);
2047
2048 $self->_set_archives_metadata(@_);
2049
2050 # Release the lock once it is done
2051 $self->unlock_collection($username, $collect);
2052}
2053
2054sub _set_archives_metadata_array
2055{
2056 my $self = shift @_;
2057
2058 my $collect = $self->{'collect'};
2059 my $gsdl_cgi = $self->{'gsdl_cgi'};
2060 my $site = $self->{'site'};
2061 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2062
2063 # look up additional args
2064
2065 my $infodbtype = $self->{'infodbtype'};
2066
2067 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2068
2069 my $json_str = $self->{'json'};
2070 my $doc_array = decode_json $json_str;
2071
2072
2073 my $global_status = 0;
2074 my $global_mess = "";
2075
2076 my @all_docids = ();
2077
2078 foreach my $doc_array_rec ( @$doc_array ) {
2079 my $status = -1;
2080 my $docid = $doc_array_rec->{'docid'};
2081
2082 push(@all_docids,$docid);
2083
2084 my $metaname = $doc_array_rec->{'metaname'};
2085 if(defined $metaname) {
2086
2087 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2088
2089 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2090 my $metavalue = $doc_array_rec->{'metavalue'};
2091 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
2092
2093
2094 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2095 # make "accumulate" the default (less destructive, as it won't actually
2096 # delete any existing values)
2097 $metamode = "accumulate";
2098 }
2099
2100 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
2101 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2102 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2103 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2104
2105 foreach my $metatable_rec ( @$metatable ) {
2106 $metaname = $metatable_rec->{'metaname'};
2107 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2108 my $metapos = undef;
2109 my $prevmetavalue = undef;
2110 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2111
2112 foreach my $metavalue ( @$metavals ) {
2113 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
2114 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2115
2116 if($metamode eq "override") { # now, having overridden the metavalue for the first,
2117 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2118 # metavalue for this metaname will be lost
2119 $metamode = "accumulate";
2120 }
2121 }
2122 }
2123 }
2124
2125 if ($status != 0) {
2126 # Catch error if set infodb entry failed
2127 $global_status = $status;
2128 $global_mess .= "Failed to set metadata key: $docid\n";
2129 $global_mess .= "Exit status: $status\n";
2130 $global_mess .= "System Error Message: $!\n";
2131 $global_mess .= "-" x 20 . "\n";
2132 }
2133 }
2134
2135 if ($global_status != 0) {
2136 $global_mess .= "PATH: $ENV{'PATH'}\n";
2137 $gsdl_cgi->generate_error($global_mess);
2138 }
2139 else {
2140 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2141 $gsdl_cgi->generate_ok_message($mess);
2142 }
2143}
2144
2145sub set_archives_metadata_array
2146{
2147 my $self = shift @_;
2148
2149 my $username = $self->{'username'};
2150 my $collect = $self->{'collect'};
2151 my $gsdl_cgi = $self->{'gsdl_cgi'};
2152# my $gsdlhome = $self->{'gsdlhome'};
2153
2154 if ($baseaction::authentication_enabled) {
2155 # Ensure the user is allowed to edit this collection
2156 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2157 }
2158
2159 my $site = $self->{'site'};
2160 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2161
2162 $gsdl_cgi->checked_chdir($collect_dir);
2163
2164 # Obtain the collect dir
2165 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2166
2167 # Make sure the collection isn't locked by someone else
2168 $self->lock_collection($username, $collect);
2169
2170 $self->_set_archives_metadata_array(@_);
2171
2172 # Release the lock once it is done
2173 $self->unlock_collection($username, $collect);
2174}
2175
2176sub _remove_archives_metadata
2177{
2178 my $self = shift @_;
2179
2180 my $collect = $self->{'collect'};
2181 my $gsdl_cgi = $self->{'gsdl_cgi'};
2182# my $gsdlhome = $self->{'gsdlhome'};
2183 my $infodbtype = $self->{'infodbtype'};
2184
2185 my $site = $self->{'site'};
2186
2187 # Obtain the collect and archive dir
2188 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2189
2190 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2191
2192 # look up additional args
2193 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
2194
2195 my $metaname = $self->{'metaname'};
2196 my $metapos = $self->{'metapos'};
2197
2198 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2199
2200 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
2201 # default to deleting the first metavalue for the given metaname
2202 # Beware that if both metapos AND metavalue are defined, both matches (if any)
2203 # seem to get deleted in one single remove_archives_meta action invocation.
2204 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
2205 # deleted, both get deleted.
2206 if(!defined $metapos && !defined $metavalue) {
2207 $metapos = 0;
2208 }
2209
2210 my $metamode = $self->{'metamode'} || undef;
2211
2212 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2213 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2214
2215 # This now stores the full pathname
2216 my $doc_filename = $doc_rec->{'doc-file'}->[0];
2217
2218 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
2219# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
2220
2221 if ($status == 0)
2222 {
2223 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
2224 $mess .= " $metaname";
2225 $mess .= "->[$metapos]" if (defined $metapos);
2226
2227 $gsdl_cgi->generate_ok_message($mess);
2228 }
2229 else
2230 {
2231 my $mess .= "Failed to remove archives metadata key: $docid\n";
2232 $mess .= "Exit status: $status\n";
2233 $mess .= "System Error Message: $!\n";
2234 $mess .= "-" x 20 . "\n";
2235
2236 $gsdl_cgi->generate_error($mess);
2237 }
2238
2239 #return $status; # in case calling functions have a use for this
2240}
2241
2242sub remove_archives_metadata
2243{
2244 my $self = shift @_;
2245
2246 my $username = $self->{'username'};
2247 my $collect = $self->{'collect'};
2248 my $gsdl_cgi = $self->{'gsdl_cgi'};
2249
2250 if ($baseaction::authentication_enabled)
2251 {
2252 # Ensure the user is allowed to edit this collection
2253 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2254 }
2255
2256 # Make sure the collection isn't locked by someone else
2257 $self->lock_collection($username, $collect);
2258
2259 $self->_remove_archives_metadata(@_);
2260
2261 # Release the lock once it is done
2262 $self->unlock_collection($username, $collect);
2263}
2264
2265sub remove_from_doc_xml
2266{
2267 my $self = shift @_;
2268 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
2269
2270 my @start_rules = ('Section' => \&dxml_start_section);
2271
2272 # Set the call-back functions for the metadata tags
2273 my @rules =
2274 (
2275 _default => 'raw',
2276 'Metadata' => \&rfdxml_metadata
2277 );
2278
2279 my $parser = XML::Rules->new
2280 (
2281 start_rules => \@start_rules,
2282 rules => \@rules,
2283 style => 'filter',
2284 output_encoding => 'utf8',
2285# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2286 stripspaces => 2|0|0 # ineffectual
2287 );
2288
2289 my $status = 0;
2290 my $xml_in = "";
2291 if (!open(MIN,"<$doc_xml_filename"))
2292 {
2293 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
2294 $status = 1;
2295 }
2296 else
2297 {
2298 # Read them in
2299 my $line;
2300 while (defined ($line=<MIN>)) {
2301 $xml_in .= $line;
2302 }
2303 close(MIN);
2304
2305 # Filter with the call-back functions
2306 my $xml_out = "";
2307
2308 my $MOUT;
2309 if (!open($MOUT,">$doc_xml_filename")) {
2310 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
2311 $status = 1;
2312 }
2313 else {
2314 binmode($MOUT,":utf8");
2315 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
2316 close($MOUT);
2317 }
2318 }
2319 return $status;
2320}
2321
2322sub rfdxml_metadata
2323{
2324 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2325
2326 # For comparisons, toplevel section is indicated by ""
2327 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
2328 my $secid = $parser->{'parameters'}->{'secid'} || "";
2329
2330 if (!($secid eq $curr_sec_num))
2331 {
2332 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2333 return [$tagname => $attrHash];
2334 }
2335
2336 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2337 {
2338 if (!defined $parser->{'parameters'}->{'poscount'})
2339 {
2340 $parser->{'parameters'}->{'poscount'} = 0;
2341 }
2342 else
2343 {
2344 $parser->{'parameters'}->{'poscount'}++;
2345 }
2346
2347 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
2348 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
2349 return [];
2350 }
2351 }
2352
2353 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2354 {
2355 return [];
2356 }
2357
2358 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
2359 {
2360 return [];
2361 }
2362
2363 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2364 return [$tagname => $attrHash];
2365}
2366
2367sub mxml_metadata
2368{
2369 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2370 my $metaname = $parser->{'parameters'}->{'metaname'};
2371 my $metamode = $parser->{'parameters'}->{'metamode'};
2372
2373 # Report error if we don't see FileName tag before this
2374 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2375
2376 # Don't do anything if we are not in the right FileSet
2377 my $file_regexp = $parser->{'parameters'}->{'current_file'};
2378 if ($file_regexp =~ /\.\*/) {
2379 # Only interested in a file_regexp if it specifies precisely one
2380 # file.
2381 # So, skip anything with a .* in it as it is too general
2382## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
2383 return [$tagname => $attrHash];
2384 }
2385 my $src_file = $parser->{'parameters'}->{'src_file'};
2386 if (!($src_file =~ /$file_regexp/)) {
2387 return [$tagname => $attrHash];
2388 }
2389## print STDERR "*** mxl metamode = $metamode\n";
2390
2391 # Find the right metadata tag and checks if we are going to override it
2392 my $name_attr = $attrHash->{'name'};
2393 if (($name_attr eq $metaname) && ($metamode eq "override")) {
2394
2395 # now metadata.xml functions need to keep track of metapos
2396 if (!defined $parser->{'parameters'}->{'poscount'})
2397 {
2398 $parser->{'parameters'}->{'poscount'} = 0;
2399 }
2400 else
2401 {
2402 $parser->{'parameters'}->{'poscount'}++;
2403 }
2404
2405 # If either the metapos or prevmetavalue is set,
2406 # get the value and override the current value
2407 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2408
2409 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2410 {
2411 $attrHash->{'_content'} = $metavalue;
2412
2413 ## print STDERR "**** overriding metadata.xml\n";
2414
2415 # Don't want it to wipe out any other pieces of metadata
2416 $parser->{'parameters'}->{'metamode'} = "done";
2417 }
2418 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2419 {
2420 $attrHash->{'_content'} = $metavalue;
2421 $parser->{'parameters'}->{'metamode'} = "done";
2422 }
2423 }
2424
2425 # mxml_description will process the metadata if metadata is accumulate,
2426 # or if we haven't found the metadata to override
2427
2428 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2429 return [$tagname => $attrHash];
2430}
2431
2432
2433sub mxml_description
2434{
2435 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2436 my $metamode = $parser->{'parameters'}->{'metamode'};
2437
2438 # Failed... Report error if we don't see FileName tag before this
2439 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2440
2441 # Don't do anything if we are not in the right FileSet
2442 my $file_regexp = $parser->{'parameters'}->{'current_file'};
2443 if ($file_regexp =~ m/\.\*/) {
2444 # Only interested in a file_regexp if it specifies precisely one
2445 # file.
2446 # So, skip anything with a .* in it as it is too general
2447 return [$tagname => $attrHash];
2448 }
2449 my $src_file = $parser->{'parameters'}->{'src_file'};
2450
2451 if (!($src_file =~ m/$file_regexp/)) {
2452 return [$tagname => $attrHash];
2453 }
2454
2455 # Accumulate the metadata block to the end of the description block
2456 # Note: This adds metadata block to all description blocks, so if there are
2457 # multiple FileSets, it will add to all of them
2458 if (($metamode eq "accumulate") || ($metamode eq "override")) {
2459
2460 # if metamode was "override" but get to here then it failed to
2461 # find an item to override, in which case it should append its
2462 # value to the end, just like the "accumulate" mode
2463
2464 if ($metamode eq "override") {
2465 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2466 }
2467
2468 # tack a new metadata tag on to the end of the <Metadata>+ block
2469 my $metaname = $parser->{'parameters'}->{'metaname'};
2470 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2471
2472 my $metadata_attr = { '_content' => $metavalue,
2473 'name' => $metaname,
2474 'mode' => "accumulate" };
2475
2476 my $append_metadata = [ "Metadata" => $metadata_attr ];
2477 my $description_content = $attrHash->{'_content'};
2478
2479## print STDERR "*** appending to metadata.xml\n";
2480
2481 # append the new metadata element to the end of the current
2482 # content contained inside this tag
2483 if (ref($description_content) eq "") {
2484 # => string or numeric literal
2485 # this is caused by a <Description> block has no <Metadata> child elements
2486 # => set up an empty array in '_content'
2487 $attrHash->{'_content'} = [ "\n" ];
2488 $description_content = $attrHash->{'_content'};
2489 }
2490
2491 push(@$description_content," ", $append_metadata ,"\n ");
2492 $parser->{'parameters'}->{'metamode'} = "done";
2493 }
2494
2495 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2496 return [$tagname => $attrHash];
2497}
2498
2499
2500sub mxml_filename
2501{
2502 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2503
2504 # Store the filename of the Current Fileset
2505 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2506 # FileName tag must come before Description tag
2507 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2508
2509 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2510 return [$tagname => $attrHash];
2511}
2512
2513
2514sub mxml_fileset
2515{
2516 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2517
2518 # Initilise the current_file
2519 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2520 # FileName tag must come before Description tag
2521 $parser->{'parameters'}->{'current_file'} = "";
2522
2523 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2524 return [$tagname => $attrHash];
2525}
2526
2527sub mxml_directorymetadata
2528{
2529 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2530
2531 # if we haven't processed the metadata when we reach the end of metadata.xml
2532 # it's because there's no particular FileSet element whose FileName matched
2533 # In which case, add a new FileSet for this FileName
2534 my $metamode = $parser->{'parameters'}->{'metamode'};
2535 if($metamode ne "done") {
2536
2537 if ($metamode eq "override") {
2538 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2539 }
2540
2541 # If we get to here and metamode is override, this means there
2542 # was no existing value to overide => treat as an append operation
2543
2544 # Create a new FileSet element and append to DirectoryMetadata
2545 # <FileSet>
2546 # <FileName>src_file</FileName>
2547 # <Description>
2548 # <Metadata mode="" name="">metavalue</Metadata>
2549 # </Description>
2550 # </FileSet>
2551 my $src_file = $parser->{'parameters'}->{'src_file'};
2552 my $metaname = $parser->{'parameters'}->{'metaname'};
2553 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2554 my $metadata_attr = {
2555 '_content' => $metavalue,
2556 'name' => $metaname,
2557 'mode' => "accumulate"
2558 };
2559 my $append_metadata = [ "Metadata" => $metadata_attr ];
2560 my $description_attr = { '_content' => [ "\n\t\t ", $append_metadata, "\n\t\t"] };
2561 my $description_element = [ "Description" => $description_attr ];
2562
2563 #_content is not an attribute, it's special and holds the children of this element
2564 # including the textnode value embedded in this element if any.
2565 my $filename_attr = {'_content' => $src_file};
2566 my $filename_element = [ "FileName" => $filename_attr ];
2567
2568 my $fileset_attr = {};
2569 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2570 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2571
2572
2573 # get children of dirmeta, and push the new FileSet element onto it
2574 print "Appending metadata to metadata.xml\n";
2575 my $dirmeta_content = $attrHash->{'_content'};
2576 if (ref($dirmeta_content)) {
2577 # got some existing interesting nested content
2578 #push(@$dirmeta_content, " ", $fileset ,"\n ");
2579 push(@$dirmeta_content, "\t", $fileset ,"\n");
2580 }
2581 else {
2582 #description_content is most likely a string such as "\n"
2583 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
2584 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2585 }
2586
2587 $parser->{'parameters'}->{'metamode'} = "done";
2588 }
2589 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2590 return [$tagname => $attrHash];
2591}
2592
2593
2594sub edit_metadata_xml
2595{
2596 my $self = shift @_;
2597 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2598
2599 # Set the call-back functions for the metadata tags
2600 my @rules =
2601 ( _default => 'raw',
2602 'FileName' => \&mxml_filename,
2603 'Metadata' => \&mxml_metadata,
2604 'Description' => \&mxml_description,
2605 'FileSet' => \&mxml_fileset,
2606 'DirectoryMetadata' => \&mxml_directorymetadata);
2607
2608 # use XML::Rules to add it in (read in and out again)
2609 my $parser = XML::Rules->new(rules => \@rules,
2610 style => 'filter',
2611 output_encoding => 'utf8',
2612 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2613
2614 if (!-e $metadata_xml_filename) {
2615
2616 if (open(MOUT,">$metadata_xml_filename")) {
2617
2618 my $src_file_re = &util::filename_to_regex($src_file);
2619 # shouldn't the following also be in the above utility routine??
2620 # $src_file_re =~ s/\./\\./g;
2621
2622 print MOUT "<?xml version=\"1.0\"?>\n";
2623 print MOUT "<DirectoryMetadata>\n";
2624 print MOUT " <FileSet>\n";
2625 print MOUT " <FileName>$src_file_re</FileName>\n";
2626 print MOUT " <Description>\n";
2627 print MOUT " </Description>\n";
2628 print MOUT " </FileSet>\n";
2629 print MOUT "</DirectoryMetadata>\n";
2630
2631 close(MOUT);
2632 }
2633 else {
2634 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2635 }
2636 }
2637
2638
2639 my $xml_in = "";
2640 if (!open(MIN,"<$metadata_xml_filename")) {
2641 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2642 }
2643 else {
2644 # Read them in
2645 my $line;
2646 while (defined ($line=<MIN>)) {
2647 $xml_in .= $line;
2648 }
2649 close(MIN);
2650
2651 # Filter with the call-back functions
2652 my $xml_out = "";
2653
2654 my $MOUT;
2655 if (!open($MOUT,">$metadata_xml_filename")) {
2656 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2657 }
2658 else {
2659 binmode($MOUT,":utf8");
2660
2661 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2662 # At the moment, I will just hack it!
2663 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2664 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2665 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2666 #print MOUT $xml_out;
2667
2668 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2669 metapos => $metapos,
2670 metavalue => $metavalue,
2671 metamode => $metamode,
2672 src_file => $src_file,
2673 prevmetavalue => $prevmetavalue,
2674 current_file => undef} );
2675 close($MOUT);
2676 }
2677 }
2678}
2679
2680
2681sub set_import_metadata
2682{
2683 my $self = shift @_;
2684
2685 my $username = $self->{'username'};
2686 my $collect = $self->{'collect'};
2687 my $gsdl_cgi = $self->{'gsdl_cgi'};
2688
2689 if ($baseaction::authentication_enabled) {
2690 # Ensure the user is allowed to edit this collection
2691 $self->authenticate_user($username, $collect);
2692 }
2693
2694 # Make sure the collection isn't locked by someone else
2695 $self->lock_collection($username, $collect);
2696
2697 $self->_set_import_metadata(@_);
2698
2699 # Release the lock once it is done
2700 $self->unlock_collection($username, $collect);
2701
2702}
2703
2704sub set_import_metadata_array
2705{
2706 my $self = shift @_;
2707
2708 my $username = $self->{'username'};
2709 my $collect = $self->{'collect'};
2710 my $gsdl_cgi = $self->{'gsdl_cgi'};
2711# my $gsdlhome = $self->{'gsdlhome'};
2712
2713 if ($baseaction::authentication_enabled) {
2714 # Ensure the user is allowed to edit this collection
2715 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2716 }
2717
2718 my $site = $self->{'site'};
2719 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2720
2721 $gsdl_cgi->checked_chdir($collect_dir);
2722
2723 # Make sure the collection isn't locked by someone else
2724 $self->lock_collection($username, $collect);
2725
2726 $self->_set_import_metadata_array(@_);
2727
2728 # Release the lock once it is done
2729 $self->unlock_collection($username, $collect);
2730
2731}
2732
2733
2734sub _set_import_metadata_array
2735{
2736 my $self = shift @_;
2737
2738 my $collect = $self->{'collect'};
2739 my $gsdl_cgi = $self->{'gsdl_cgi'};
2740
2741 my $site = $self->{'site'};
2742 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2743
2744 # look up additional args
2745
2746 my $infodbtype = $self->{'infodbtype'};
2747
2748 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2749 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2750
2751 my $json_str = $self->{'json'};
2752 my $doc_array = decode_json $json_str;
2753
2754 my $global_status = 0;
2755 my $global_mess = "";
2756
2757 my @all_docids = ();
2758
2759 foreach my $doc_array_rec ( @$doc_array )
2760 {
2761 my $status = -1;
2762 my $docid = $doc_array_rec->{'docid'};
2763
2764 my ($docid_root,$docid_secnum);
2765 if(defined $docid) {
2766 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2767 # as yet no support for setting subsection metadata in metadata.xml
2768 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2769 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2770 next; # skip this docid in for loop
2771 }
2772 }
2773
2774 push(@all_docids,$docid); # docid_root rather
2775
2776 my $metaname = $doc_array_rec->{'metaname'};
2777 if (defined $metaname) {
2778 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2779 my $metavalue = $doc_array_rec->{'metavalue'};
2780 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2781
2782 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2783 # make "accumulate" the default (less destructive, as won't actually
2784 # delete any existing values)
2785 $metamode = "accumulate";
2786 }
2787
2788 # adding metapos and prevmetavalue support to import_metadata subroutines
2789 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2790 my $prevmetavalue = $self->{'prevmetavalue'};
2791
2792 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2793
2794 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2795 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2796
2797 foreach my $metatable_rec ( @$metatable ) {
2798 $metaname = $metatable_rec->{'metaname'};
2799 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2800 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2801 # make "accumulate" the default (less destructive, as won't actually
2802 # delete any existing values)
2803 $metamode = "accumulate";
2804 }
2805
2806 # No support for metapos and prevmetavalue in the JSON metatable substructure
2807 my $metapos = undef;
2808 my $prevmetavalue = undef;
2809 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2810
2811 foreach my $metavalue ( @$metavals ) {
2812 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2813
2814 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2815 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2816 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2817 # metavalue for this metaname will be lost
2818 $metamode = "accumulate";
2819 }
2820 }
2821 }
2822 }
2823 }
2824
2825 # always a success message
2826 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2827 $gsdl_cgi->generate_ok_message($mess);
2828}
2829
2830# always returns true (1)
2831sub set_import_metadata_entry
2832{
2833 my $self = shift @_;
2834 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2835
2836 my $info_mess = <<RAWEND;
2837****************************
2838 set_import_metadata_entry()
2839****************************
2840RAWEND
2841
2842 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2843 $info_mess .= " collect = $collect\n" if defined($collect);
2844 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
2845 $info_mess .= " arcinfo_doc_filename = $arcinfo_doc_filename\n" if defined($arcinfo_doc_filename);
2846 $info_mess .= " docid = $docid\n" if defined($docid);
2847 $info_mess .= " metaname = $metaname\n" if defined($metaname);
2848 $info_mess .= " metapos = $metapos\n" if defined($metapos);
2849 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
2850 $info_mess .= " metamode = $metamode\n" if defined($metamode);
2851 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2852
2853 $info_mess .= "****************************\n";
2854
2855 $gsdl_cgi->generate_message($info_mess);
2856
2857 # import works with metadata.xml which can have inherited metadata
2858 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2859 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2860 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2861 # what they're doing if they provide a metapos.
2862 if(defined $metapos) {
2863 print STDERR "@@@@ WARNING: metapos defined.\n";
2864 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2865 }
2866
2867 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2868 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2869 my $metadata_xml_file;
2870 my $import_filename = undef;
2871
2872 if (defined $docid) {
2873 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2874 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2875
2876 # This now stores the full pathname
2877 $import_filename = $doc_rec->{'src-file'}->[0];
2878 $import_filename = &util::placeholders_to_abspath($import_filename);
2879
2880 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2881 # as the array version of the method doesn't support the -f parameter yet
2882 my $import_file = $self->{'f'};
2883 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2884 }
2885
2886 # figure out correct metadata.xml file [?]
2887 # Assuming the metadata.xml file is next to the source file
2888 # Note: This will not work if it is using the inherited metadata from the parent folder
2889 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2890 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2891
2892 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
2893 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2894 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2895 if(!defined $metapos && $metamode eq "override") {
2896## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2897 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2898
2899 }
2900
2901 # Edit the metadata.xml
2902 # Modified by Jeffrey from DL Consulting
2903 # Handle the case where there is one metadata.xml file for multiple FileSets
2904 # The XML filter needs to know whether it is in the right FileSet
2905 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2906 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2907 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2908 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2909 #return 0;
2910 return $metadata_xml_filename;
2911}
2912
2913sub _remove_import_metadata
2914{
2915 my $self = shift @_;
2916
2917 my $collect = $self->{'collect'};
2918 my $gsdl_cgi = $self->{'gsdl_cgi'};
2919# my $gsdlhome = $self->{'gsdlhome'};
2920 my $infodbtype = $self->{'infodbtype'};
2921
2922 # Obtain the collect dir
2923 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2924 my $site = $self->{'site'};
2925 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2926
2927 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2928 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2929
2930 # look up additional args
2931 my $docid = $self->{'d'};
2932 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2933 {
2934 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2935 }
2936
2937 my $metaname = $self->{'metaname'};
2938 my $metapos = $self->{'metapos'};
2939 my $metavalue = $self->{'metavalue'};
2940 if(defined $metavalue) {
2941 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2942 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2943 $metapos = 0;
2944 }
2945 my $metamode = $self->{'metamode'} || undef;
2946
2947 # import works with metadata.xml which can have inherited metadata
2948 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2949 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2950 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2951 # what they're doing if they provide a metapos.
2952 if(defined $metapos) {
2953 print STDERR "@@@@ WARNING: metapos defined.\n";
2954 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2955 }
2956
2957 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2958 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2959 my $metadata_xml_file;
2960 my $import_filename = undef;
2961 if (defined $docid)
2962 {
2963 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2964 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2965
2966 # This now stores the full pathname
2967 $import_filename = $doc_rec->{'src-file'}->[0];
2968 $import_filename = &util::placeholders_to_abspath($import_filename);
2969 }
2970
2971 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2972 {
2973 $gsdl_cgi->generate_error("There is no metadata\n");
2974 }
2975
2976 # figure out correct metadata.xml file [?]
2977 # Assuming the metadata.xml file is next to the source file
2978 # Note: This will not work if it is using the inherited metadata from the parent folder
2979 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2980 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2981
2982 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2983
2984 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2985 $mess .= " $metaname";
2986 $mess .= " = $metavalue\n";
2987
2988 $gsdl_cgi->generate_ok_message($mess);
2989
2990 #return $status; # in case calling functions have a use for this
2991}
2992
2993sub remove_import_metadata
2994{
2995 my $self = shift @_;
2996
2997 my $username = $self->{'username'};
2998 my $collect = $self->{'collect'};
2999 my $gsdl_cgi = $self->{'gsdl_cgi'};
3000
3001 if ($baseaction::authentication_enabled) {
3002 # Ensure the user is allowed to edit this collection
3003 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3004 }
3005
3006 # Make sure the collection isn't locked by someone else
3007 $self->lock_collection($username, $collect);
3008
3009 $self->_remove_import_metadata(@_);
3010
3011 # Release the lock once it is done
3012 $self->unlock_collection($username, $collect);
3013
3014}
3015
3016sub remove_from_metadata_xml
3017{
3018 my $self = shift @_;
3019 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
3020 # metamode generally has no meaning for removing meta, but is used by set_meta
3021 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
3022
3023 # Set the call-back functions for the metadata tags
3024 my @rules =
3025 (
3026 _default => 'raw',
3027 'Metadata' => \&rfmxml_metadata,
3028 'FileName' => \&mxml_filename
3029 );
3030
3031 my $parser = XML::Rules->new
3032 (
3033 rules => \@rules,
3034 style => 'filter',
3035 output_encoding => 'utf8',
3036 #normalisespaces => 1,
3037 stripspaces => 2|0|0 # ineffectual
3038 );
3039
3040 my $xml_in = "";
3041 if (!open(MIN,"<$metadata_xml_filename"))
3042 {
3043 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
3044 }
3045 else
3046 {
3047 # Read them in
3048 my $line;
3049 while (defined ($line=<MIN>)) {
3050 $xml_in .= $line;
3051 }
3052 close(MIN);
3053
3054 # Filter with the call-back functions
3055 my $xml_out = "";
3056
3057 my $MOUT;
3058 if (!open($MOUT,">$metadata_xml_filename")) {
3059 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
3060 }
3061 else {
3062 binmode($MOUT,":utf8");
3063 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
3064 close($MOUT);
3065 }
3066 }
3067}
3068
3069sub rfmxml_metadata
3070{
3071 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
3072
3073 # metadata.xml does not handle subsections
3074
3075 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
3076 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
3077 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
3078 {
3079 if (!defined $parser->{'parameters'}->{'poscount'})
3080 {
3081 $parser->{'parameters'}->{'poscount'} = 0;
3082 }
3083 else
3084 {
3085 $parser->{'parameters'}->{'poscount'}++;
3086 }
3087
3088 # if overriding but no metapos, then clear all the meta for this metaname
3089 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
3090 return [];
3091 }
3092
3093 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
3094 {
3095 return [];
3096 }
3097
3098 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
3099 {
3100 return [];
3101 }
3102 }
3103
3104 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
3105 return [$tagname => $attrHash];
3106}
3107
3108sub _remove_live_metadata
3109{
3110 my $self = shift @_;
3111
3112 my $collect = $self->{'collect'};
3113 my $gsdl_cgi = $self->{'gsdl_cgi'};
3114# my $gsdlhome = $self->{'gsdlhome'};
3115 my $infodbtype = $self->{'infodbtype'};
3116
3117 # Obtain the collect dir
3118 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3119 my $site = $self->{'site'};
3120 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3121
3122
3123 # look up additional args
3124 my $docid = $self->{'d'};
3125 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3126 $gsdl_cgi->generate_error("No docid (d=...) specified.");
3127 }
3128
3129 # Generate the dbkey
3130 my $metaname = $self->{'metaname'};
3131 my $dbkey = "$docid.$metaname";
3132
3133 # To people who know $collect_tail please add some comments
3134 # Obtain the live gdbm_db path
3135 my $collect_tail = $collect;
3136 $collect_tail =~ s/^.*[\/|\\]//;
3137 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3138 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
3139
3140 # Remove the key
3141 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
3142 my $status = system($cmd);
3143 if ($status != 0) {
3144 # Catch error if gdbmdel failed
3145 my $mess = "Failed to set metadata key: $dbkey\n";
3146
3147 $mess .= "PATH: $ENV{'PATH'}\n";
3148 $mess .= "cmd = $cmd\n";
3149 $mess .= "Exit status: $status\n";
3150 $mess .= "System Error Message: $!\n";
3151
3152 $gsdl_cgi->generate_error($mess);
3153 }
3154 else {
3155 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
3156 }
3157
3158}
3159
3160sub remove_live_metadata
3161{
3162 my $self = shift @_;
3163
3164 my $username = $self->{'username'};
3165 my $collect = $self->{'collect'};
3166 my $gsdl_cgi = $self->{'gsdl_cgi'};
3167 my $gsdlhome = $self->{'gsdlhome'};
3168
3169 if ($baseaction::authentication_enabled) {
3170 # Ensure the user is allowed to edit this collection
3171 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3172 }
3173
3174 # Make sure the collection isn't locked by someone else
3175 $self->lock_collection($username, $collect);
3176
3177 $self->_remove_live_metadata(@_);
3178
3179 $self->unlock_collection($username, $collect);
3180}
3181
3182sub remove_metadata
3183{
3184 my $self = shift @_;
3185
3186 my $where = $self->{'where'};
3187 if(!$where) {
3188 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
3189 return;
3190 }
3191
3192 my $username = $self->{'username'};
3193 my $collect = $self->{'collect'};
3194 my $gsdl_cgi = $self->{'gsdl_cgi'};
3195
3196 if ($baseaction::authentication_enabled) {
3197 # Ensure the user is allowed to edit this collection
3198 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3199 }
3200
3201 # Make sure the collection isn't locked by someone else
3202 $self->lock_collection($username, $collect);
3203
3204 # check which directories need to be processed, specified in $where as
3205 # any combination of import|archives|index|live
3206 if($where =~ m/import/) {
3207 $self->_remove_import_metadata(@_);
3208 }
3209 if($where =~ m/archives/) {
3210 $self->_remove_archives_metadata(@_);
3211 }
3212 if($where =~ m/index/) {
3213 $self->_remove_index_metadata(@_);
3214 }
3215
3216 # Release the lock once it is done
3217 $self->unlock_collection($username, $collect);
3218}
3219
3220# the internal version, without authentication
3221sub _remove_index_metadata
3222{
3223 my $self = shift @_;
3224
3225 my $collect = $self->{'collect'};
3226 my $gsdl_cgi = $self->{'gsdl_cgi'};
3227# my $gsdlhome = $self->{'gsdlhome'};
3228 my $infodbtype = $self->{'infodbtype'};
3229
3230 # Obtain the collect dir
3231 my $site = $self->{'site'};
3232 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3233 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3234
3235 # look up additional args
3236 my $docid = $self->{'d'};
3237 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3238 $gsdl_cgi->generate_error("No docid (d=...) specified.");
3239 }
3240 my $metaname = $self->{'metaname'};
3241 my $metapos = $self->{'metapos'};
3242 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
3243
3244 # To people who know $collect_tail please add some comments
3245 # Obtain the path to the database
3246 my $collect_tail = $collect;
3247 $collect_tail =~ s/^.*[\/|\\]//;
3248 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3249 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3250
3251 # Read the docid entry
3252 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3253
3254 # Check to make sure the key does exist
3255 if (!defined ($doc_rec->{$metaname})) {
3256 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
3257 }
3258
3259 # Obtain the specified metadata pos
3260 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
3261 if(!defined $metapos && !defined $metavalue) {
3262 $metapos = 0;
3263 }
3264
3265
3266 # consider check key is defined before deleting?
3267 # Loop through the metadata array and ignore the specified position
3268 my $filtered_metadata = [];
3269 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
3270 for (my $i=0; $i<$num_metadata_vals; $i++) {
3271 my $metaval = shift(@{$doc_rec->{$metaname}});
3272
3273 if (!defined $metavalue && $i != $metapos) {
3274 push(@$filtered_metadata,$metaval);
3275 }
3276
3277 if(defined $metavalue && !($metavalue eq $metaval))
3278 {
3279 push(@$filtered_metadata,$metaval);
3280 }
3281 }
3282 $doc_rec->{$metaname} = $filtered_metadata;
3283
3284 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3285 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3286
3287 if ($status != 0) {
3288 my $mess = "Failed to set metadata key: $docid\n";
3289
3290 $mess .= "PATH: $ENV{'PATH'}\n";
3291 $mess .= "Exit status: $status\n";
3292 $mess .= "System Error Message: $!\n";
3293
3294 $gsdl_cgi->generate_error($mess);
3295 }
3296 else {
3297 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
3298 $mess .= " $metaname";
3299 $mess .= "->[$metapos]" if (defined $metapos);
3300
3301 $gsdl_cgi->generate_ok_message($mess);
3302 }
3303
3304 #return $status; # in case calling functions have a use for this
3305}
3306
3307sub remove_index_metadata
3308{
3309 my $self = shift @_;
3310
3311 my $username = $self->{'username'};
3312 my $collect = $self->{'collect'};
3313 my $gsdl_cgi = $self->{'gsdl_cgi'};
3314# my $gsdlhome = $self->{'gsdlhome'};
3315
3316 if ($baseaction::authentication_enabled) {
3317 # Ensure the user is allowed to edit this collection
3318 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
3319 }
3320
3321 # Obtain the collect dir
3322 my $site = $self->{'site'};
3323 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3324 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3325
3326 # Make sure the collection isn't locked by someone else
3327 $self->lock_collection($username, $collect);
3328
3329 $self->_remove_index_metadata(@_);
3330
3331 # Release the lock once it is done
3332 $self->unlock_collection($username, $collect);
3333}
3334
3335
3336# Was trying to reused the codes, but the functions need to be broken
3337# down more before they can be reused, otherwise there will be too
3338# much overhead and duplicate process...
3339sub insert_metadata
3340{
3341 my $self = shift @_;
3342
3343 my $username = $self->{'username'};
3344 my $collect = $self->{'collect'};
3345 my $gsdl_cgi = $self->{'gsdl_cgi'};
3346 my $gsdlhome = $self->{'gsdlhome'};
3347 my $infodbtype = $self->{'infodbtype'};
3348
3349 # If the import metadata and gdbm database have been updated, we
3350 # need to insert some notification to warn user that the the text
3351 # they see at the moment is not indexed and require a rebuild.
3352 my $rebuild_pending_macro = "_rebuildpendingmessage_";
3353
3354 if ($baseaction::authentication_enabled) {
3355 # Ensure the user is allowed to edit this collection
3356 $self->authenticate_user($username, $collect);
3357 }
3358
3359 # Obtain the collect and archive dir
3360 my $site = $self->{'site'};
3361 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3362 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3363 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3364
3365 # Make sure the collection isn't locked by someone else
3366 $self->lock_collection($username, $collect);
3367
3368 # Check additional args
3369 my $docid = $self->{'d'};
3370 if (!defined($docid)) {
3371 $gsdl_cgi->generate_error("No document id is specified: d=...");
3372 }
3373 my $metaname = $self->{'metaname'};
3374 if (!defined($metaname)) {
3375 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3376 }
3377 my $metavalue = $self->{'metavalue'};
3378 if (!defined($metavalue) || $metavalue eq "") {
3379 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3380 }
3381 # make "accumulate" the default (less destructive, as won't actually
3382 # delete any existing values)
3383 my $metamode = "accumulate";
3384
3385 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3386 my $metapos = undef;
3387 my $prevmetavalue = undef;
3388
3389 #=======================================================================#
3390 # set_import_metadata [START]
3391 #=======================================================================#
3392 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3393 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3394 my $metadata_xml_file;
3395 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3396 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3397
3398 # This now stores the full pathname
3399 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3400 $import_filename = &util::placeholders_to_abspath($import_filename);
3401
3402 # figure out correct metadata.xml file [?]
3403 # Assuming the metadata.xml file is next to the source file
3404 # Note: This will not work if it is using the inherited metadata from the parent folder
3405 my ($import_tailname, $import_dirname)
3406 = File::Basename::fileparse($import_filename);
3407 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3408
3409 # Shane's escape characters
3410 $metavalue = pack "U0C*", unpack "C*", $metavalue;
3411 $metavalue =~ s/\,/&#44;/g;
3412 $metavalue =~ s/\:/&#58;/g;
3413 $metavalue =~ s/\|/&#124;/g;
3414 $metavalue =~ s/\(/&#40;/g;
3415 $metavalue =~ s/\)/&#41;/g;
3416 $metavalue =~ s/\[/&#91;/g;
3417 $metavalue =~ s/\\/&#92;/g;
3418 $metavalue =~ s/\]/&#93;/g;
3419 $metavalue =~ s/\{/&#123;/g;
3420 $metavalue =~ s/\}/&#125;/g;
3421 $metavalue =~ s/\"/&#34;/g;
3422 $metavalue =~ s/\`/&#96;/g;
3423 $metavalue =~ s/\n/_newline_/g;
3424
3425 # Edit the metadata.xml
3426 # Modified by Jeffrey from DL Consulting
3427 # Handle the case where there is one metadata.xml file for multiple FileSets
3428 # The XML filter needs to know whether it is in the right FileSet
3429 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3430 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3431 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3432 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3433 #=======================================================================#
3434 # set_import_metadata [END]
3435 #=======================================================================#
3436
3437
3438 #=======================================================================#
3439 # set_metadata (accumulate version) [START]
3440 #=======================================================================#
3441 # To people who know $collect_tail please add some comments
3442 # Obtain path to the database
3443 my $collect_tail = $collect;
3444 $collect_tail =~ s/^.*[\/|\\]//;
3445 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3446 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3447
3448 # Read the docid entry
3449 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3450
3451 # Protect the quotes
3452 $metavalue =~ s/\"/\\\"/g;
3453
3454 # Adds the pending macro
3455 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3456
3457 # If the metadata doesn't exist, create a new one
3458 if (!defined($doc_rec->{$metaname})){
3459 $doc_rec->{$metaname} = [ $macro_metavalue ];
3460 }
3461 # Else, let's acculumate the values
3462 else {
3463 push(@{$doc_rec->{$metaname}},$macro_metavalue);
3464 }
3465
3466 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3467 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3468
3469 if ($status != 0) {
3470 # Catch error if gdbmget failed
3471 my $mess = "Failed to set metadata key: $docid\n";
3472
3473 $mess .= "PATH: $ENV{'PATH'}\n";
3474 $mess .= "Exit status: $status\n";
3475 $mess .= "System Error Message: $!\n";
3476
3477 $gsdl_cgi->generate_error($mess);
3478 }
3479 else {
3480 my $mess = "insert-metadata successful: Key[$docid]\n";
3481 $mess .= " [In metadata.xml] $metaname";
3482 $mess .= " = $metavalue\n";
3483 $mess .= " [In database] $metaname";
3484 $mess .= " = $macro_metavalue\n";
3485 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3486 $gsdl_cgi->generate_ok_message($mess);
3487 }
3488 #=======================================================================#
3489 # set_metadata (accumulate version) [END]
3490 #=======================================================================#
3491
3492 # Release the lock once it is done
3493 $self->unlock_collection($username, $collect);
3494}
3495
34961;
Note: See TracBrowser for help on using the repository browser.