source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27312

Last change on this file since 27312 was 27312, checked in by ak19, 8 years ago

get_metadata_array which takes a JSON string and returns one, implemented for getting index metadata. archives, import and live will be implemented later after testing the current implementation against the javascript for retrieving user added comments.

File size: 105.5 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ "metapos" ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-index-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
67
68 "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69 'compulsory-args' => [ "json" ],
70 'optional-args' => [ "where" ],
71 'help-string' => [
72 'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73 ]},
74
75 #SET METHODS
76 "set-live-metadata" => {
77 'compulsory-args' => [ "d", "metaname", "metavalue" ],
78 'optional-args' => [ ] },
79
80 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81 'compulsory-args' => [ "metaname", "metavalue" ],
82 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84 "set-index-metadata" => {
85 'compulsory-args' => [ "d", "metaname", "metavalue" ],
86 'optional-args' => [ "metapos", "metamode" ] },
87
88 "set-archives-metadata" => {
89 'compulsory-args' => [ "d", "metaname", "metavalue" ],
90 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91
92 "set-import-metadata" => {
93 'compulsory-args' => [ "metaname", "metavalue" ],
94 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
95
96 #SET METHODS (ARRAY)
97 "set-metadata-array" => {
98 'compulsory-args' => [ "where", "json" ],
99 'optional-args' => [ ],
100 'help-string' => [
101 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102
103 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
110
111 "set-archives-metadata-array" => {
112 'compulsory-args' => [ "json" ],
113 'optional-args' => [ ] },
114
115 "set-import-metadata-array" => {
116 'compulsory-args' => [ "json" ],
117 'optional-args' => [ ] },
118
119 "set-index-metadata-array" => {
120 'compulsory-args' => [ "json" ],
121 'optional-args' => [ ] },
122
123 "set-live-metadata-array" => {
124 'compulsory-args' => [ "json" ],
125 'optional-args' => [ ] },
126
127 #REMOVE METHODS
128 "remove-import-metadata" => {
129 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
130 'optional-args' => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
131
132 "remove-archives-metadata" => {
133 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
134 'optional-args' => [ "metapos", "metavalue" ] },
135
136 "remove-live-metadata" => {
137 'compulsory-args' => [ "d", "metaname" ],
138 'optional-args' => [ ] },
139
140 "remove-index-metadata" => {
141 'compulsory-args' => [ "d", "metaname" ],
142 'optional-args' => [ "metapos", "metavalue" ] },
143
144 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
145 'compulsory-args' => [ "d", "metaname" ],
146 'optional-args' => [ "where", "metapos", "metavalue" ] },
147
148 #INSERT METHODS
149 "insert-metadata" => {
150 'compulsory-args' => [ "d", "metaname", "metavalue" ],
151 'optional-args' => [ ] }
152};
153
154
155sub new
156{
157 my $class = shift (@_);
158 my ($gsdl_cgi,$iis6_mode) = @_;
159
160 # Treat metavalue specially. To transmit this through a GET request
161 # the Javascript side has url-encoded it, so here we need to decode
162 # it before proceeding
163
164 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
165 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
166 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
167
168 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
169 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
170
171 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
172
173 return bless $self, $class;
174}
175
176
177sub get_live_metadata
178{
179 my $self = shift @_;
180
181 my $username = $self->{'username'};
182 my $collect = $self->{'collect'};
183 my $gsdl_cgi = $self->{'gsdl_cgi'};
184 my $gsdlhome = $self->{'gsdlhome'};
185 my $infodbtype = $self->{'infodbtype'};
186
187 # live metadata gets/saves value scoped (prefixed) by the current usename
188 # so (for now) let's not bother to enforce authentication
189
190 # Obtain the collect dir
191 my $site = $self->{'site'};
192 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
193 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
194
195 # Make sure the collection isn't locked by someone else
196 $self->lock_collection($username, $collect);
197
198 # look up additional args
199 my $docid = $self->{'d'};
200 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
201 $gsdl_cgi->generate_error("No docid (d=...) specified.");
202 }
203
204 # Generate the dbkey
205 my $metaname = $self->{'metaname'};
206 my $dbkey = "$docid.$metaname";
207
208 # To people who know $collect_tail please add some comments
209 # Obtain path to the database
210 my $collect_tail = $collect;
211 $collect_tail =~ s/^.*[\/|\\]//;
212 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
213 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
214
215 # Obtain the content of the key
216 my $cmd = "gdbmget $infodb_file_path $dbkey";
217 if (open(GIN,"$cmd |") == 0) {
218 # Catch error if gdbmget failed
219 my $mess = "Failed to get metadata key: $metaname\n";
220 $mess .= "$!\n";
221
222 $gsdl_cgi->generate_error($mess);
223 }
224 else {
225 binmode(GIN,":utf8");
226 # Read everything in and concatenate them into $metavalue
227 my $metavalue = "";
228 my $line;
229 while (defined ($line=<GIN>)) {
230 $metavalue .= $line;
231 }
232 close(GIN);
233 chomp($metavalue); # Get rid off the tailing newlines
234 $gsdl_cgi->generate_ok_message("$metavalue");
235 }
236
237 # Release the lock once it is done
238 $self->unlock_collection($username, $collect);
239}
240
241# just calls the index version
242sub get_metadata
243{
244 my $self = shift @_;
245 $self->get_index_metadata(@_);
246}
247
248# JSON version that will get the requested metadata values
249# from the requested source (index, import, archives or live)
250# One of the params is a JSON string and the return value is JSON too
251# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
252sub get_metadata_array
253{
254 my $self = shift @_;
255
256 my $where = $self->{'where'};
257 if (!$where) {
258 $where = "index"; # default behaviour is to get the values from index
259 }
260
261 # Only when setting metadata do we perform authentication and do we lock the collection
262 # not when getting metadata
263
264 # for get_meta_array, the where param can only be ONE of import, archives, index, live
265 if($where =~ m/index/) {
266 $self->_get_index_metadata_array(@_);
267 }
268 # still to be implemented for archives, import and live
269}
270
271# takes a JSON string and returns a JSON string
272sub _get_index_metadata_array
273{
274 my $self = shift @_;
275
276 my $collect = $self->{'collect'};
277 my $gsdl_cgi = $self->{'gsdl_cgi'};
278 my $site = $self->{'site'};
279 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
280
281 # look up additional args
282 my $infodbtype = $self->{'infodbtype'};
283
284 # To people who know $collect_tail please add some comments
285 # Obtain the path to the database
286 my $collect_tail = $collect;
287 $collect_tail =~ s/^.*[\/\\]//;
288 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
289 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
290
291 my $json_str = $self->{'json'};
292 my $doc_array = decode_json $json_str;
293
294 my $json_result_str = "[";
295 my $first_doc_rec = 1;
296
297 foreach my $doc_array_rec ( @$doc_array ) {
298
299 my $docid = $doc_array_rec->{'docid'};
300
301 if($first_doc_rec) {
302 $first_doc_rec = 0;
303 } else {
304 $json_result_str .= ",";
305 }
306 $json_result_str = $json_result_str . "{docid:\"" . $docid . "\"";
307
308 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
309 $json_result_str = $json_result_str . ",metatable:[";
310
311 my $first_rec = 1;
312 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
313 if($first_rec) {
314 $first_rec = 0;
315 } else {
316 $json_result_str .= ",";
317 }
318
319 my $metaname = $metatable_rec->{'metaname'};
320 $json_result_str .= "{metaname:\"$metaname\",metavals:[";
321
322 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
323 if(!defined $metapos) {
324 $metapos = "all";
325 }
326
327 # Read the docid entry
328 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
329
330 # Basically loop through and unescape_html the values
331 foreach my $k (keys %$doc_rec) {
332 my @escaped_v = ();
333 foreach my $v (@{$doc_rec->{$k}}) {
334 my $ev = &ghtml::unescape_html($v);
335 push(@escaped_v, $ev);
336 }
337 $doc_rec->{$k} = \@escaped_v;
338 }
339
340 # Obtain the specified metadata value(s)
341 my $metavalue;
342
343 if($metapos ne "all") { # get the value at a single metapos
344
345 $metavalue = $doc_rec->{$metaname}->[$metapos];
346 $json_result_str .= "{metapos:\"$metapos\",metavalue:\"$metavalue\"}";
347
348 } else {
349 my $first_metaval = 1;
350 $metapos = 0;
351 $metavalue = $doc_rec->{$metaname}->[$metapos];
352
353 while (defined $metavalue) {
354 if($first_metaval) {
355 $first_metaval = 0;
356 } else {
357 $json_result_str .= ",";
358 }
359
360 $json_result_str .= "{metapos:\"$metapos\",metavalue:\"$metavalue\"}";
361
362 $metapos++;
363 $metavalue = $doc_rec->{$metaname}->[$metapos];
364 }
365 }
366
367 $json_result_str .= "]}"; # close metavals array and metatable record
368 }
369
370 $json_result_str .= "]}"; # close metatable array and docid record
371 }
372
373 $json_result_str .= "]"; # close array of docids
374
375 $gsdl_cgi->generate_ok_message($json_result_str);
376}
377
378
379sub get_index_metadata
380{
381 my $self = shift @_;
382
383 my $username = $self->{'username'};
384 my $collect = $self->{'collect'};
385 my $gsdl_cgi = $self->{'gsdl_cgi'};
386 my $gsdlhome = $self->{'gsdlhome'};
387
388 # Authenticate user if it is enabled
389 if ($baseaction::authentication_enabled) {
390 # Ensure the user is allowed to edit this collection
391 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
392 }
393
394 # Obtain the collect dir
395 my $site = $self->{'site'};
396 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
397 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
398
399 # Make sure the collection isn't locked by someone else
400 $self->lock_collection($username, $collect);
401
402 # look up additional args
403 my $docid = $self->{'d'};
404 my $metaname = $self->{'metaname'};
405 my $metapos = $self->{'metapos'};
406 my $infodbtype = $self->{'infodbtype'};
407
408 # To people who know $collect_tail please add some comments
409 # Obtain path to the database
410 my $collect_tail = $collect;
411 $collect_tail =~ s/^.*[\/\\]//;
412 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
413 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
414
415 # Read the docid entry
416 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
417
418 # Basically loop through and unescape_html the values
419 foreach my $k (keys %$doc_rec) {
420 my @escaped_v = ();
421 foreach my $v (@{$doc_rec->{$k}}) {
422 my $ev = &ghtml::unescape_html($v);
423 push(@escaped_v, $ev);
424 }
425 $doc_rec->{$k} = \@escaped_v;
426 }
427
428 # Obtain the specified metadata value
429 $metapos = 0 if (!defined $metapos);
430 my $metavalue = $doc_rec->{$metaname}->[$metapos];
431 $gsdl_cgi->generate_ok_message("$metavalue");
432
433 # Release the lock once it is done
434 $self->unlock_collection($username, $collect);
435}
436
437
438sub get_import_metadata
439{
440 my $self = shift @_;
441
442 my $username = $self->{'username'};
443 my $collect = $self->{'collect'};
444 my $gsdl_cgi = $self->{'gsdl_cgi'};
445 my $gsdlhome = $self->{'gsdlhome'};
446
447 # Authenticate user if it is enabled
448 if ($baseaction::authentication_enabled) {
449 # Ensure the user is allowed to edit this collection
450 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
451 }
452
453 # Obtain the collect dir
454 my $site = $self->{'site'};
455 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
456 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
457
458 # Make sure the collection isn't locked by someone else
459 $self->lock_collection($username, $collect);
460
461 # look up additional args
462 my $docid = $self->{'d'};
463 my $metaname = $self->{'metaname'};
464 my $metapos = $self->{'metapos'};
465 $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
466
467 my $infodbtype = $self->{'infodbtype'};
468 if (!defined $docid)
469 {
470 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
471 }
472
473 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
474 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
475 my $metadata_xml_file;
476 my $import_filename = undef;
477
478
479 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
480 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
481 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
482
483 # This now stores the full pathname
484 $import_filename = $doc_rec->{'src-file'}->[0];
485
486 # figure out correct metadata.xml file [?]
487 # Assuming the metadata.xml file is next to the source file
488 # Note: This will not work if it is using the inherited metadata from the parent folder
489 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
490 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
491
492 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
493
494 # Release the lock once it is done
495 $self->unlock_collection($username, $collect);
496}
497
498sub get_metadata_from_metadata_xml
499{
500 my $self = shift @_;
501 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
502
503 my @rules =
504 (
505 _default => 'raw',
506 'Metadata' => \&gfmxml_metadata,
507 'FileName' => \&mxml_filename
508 );
509
510 my $parser = XML::Rules->new
511 (
512 rules => \@rules,
513 output_encoding => 'utf8'
514 );
515
516 my $xml_in = "";
517 if (!open(MIN,"<$metadata_xml_filename"))
518 {
519 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
520 }
521 else
522 {
523 # Read them in
524 my $line;
525 while (defined ($line=<MIN>)) {
526 $xml_in .= $line;
527 }
528 close(MIN);
529
530 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
531
532 if(defined $parser->{'pad'}->{'metavalue'})
533 {
534 return $parser->{'pad'}->{'metavalue'};
535 }
536 else
537 {
538 return "";
539 }
540 }
541}
542
543sub gfmxml_metadata
544{
545 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
546
547 # no subsection support yet in metadata.xml
548
549 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
550 {
551 if (!defined $parser->{'parameters'}->{'poscount'})
552 {
553 $parser->{'parameters'}->{'poscount'} = 0;
554 }
555 else
556 {
557 $parser->{'parameters'}->{'poscount'}++;
558 }
559
560 # gets the first value by default, since metapos defaults to 0
561 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
562 {
563 if($parser->{'parameters'}->{'metapos'} > 0) {
564 print STDERR "@@@@ WARNING: non-zero metapos.\n";
565 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at".$parser->{'parameters'}->{'metapos'}.".\n";
566 }
567 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
568 }
569 }
570}
571
572sub get_archives_metadata
573{
574 my $self = shift @_;
575
576 my $username = $self->{'username'};
577 my $collect = $self->{'collect'};
578 my $gsdl_cgi = $self->{'gsdl_cgi'};
579 my $gsdlhome = $self->{'gsdlhome'};
580 my $infodbtype = $self->{'infodbtype'};
581
582 # Authenticate user if it is enabled
583 if ($baseaction::authentication_enabled) {
584 # Ensure the user is allowed to edit this collection
585 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
586 }
587
588 # Obtain the collect dir
589 my $site = $self->{'site'};
590 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
591
592 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
593
594 # Make sure the collection isn't locked by someone else
595 $self->lock_collection($username, $collect);
596
597 # look up additional args
598 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
599 $docid_secnum = "" if (!defined $docid_secnum);
600
601 my $metaname = $self->{'metaname'};
602 my $metapos = $self->{'metapos'};
603 $metapos = 0 if (!defined $metapos);
604
605 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
606 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
607
608 # This now stores the full pathname
609 my $doc_filename = $doc_rec->{'doc-file'}->[0];
610
611 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
612
613 # Release the lock once it is done
614 $self->unlock_collection($username, $collect);
615}
616
617sub get_metadata_from_archive_xml
618{
619 my $self = shift @_;
620 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
621
622 my @start_rules = ('Section' => \&dxml_start_section);
623
624 my @rules =
625 (
626 _default => 'raw',
627 'Metadata' => \&gfdxml_metadata
628 );
629
630 my $parser = XML::Rules->new
631 (
632 start_rules => \@start_rules,
633 rules => \@rules,
634 output_encoding => 'utf8'
635 );
636
637 my $xml_in = "";
638 if (!open(MIN,"<$doc_xml_filename"))
639 {
640 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
641 }
642 else
643 {
644 # Read them in
645 my $line;
646 while (defined ($line=<MIN>)) {
647 $xml_in .= $line;
648 }
649 close(MIN);
650
651 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
652
653 if(defined $parser->{'pad'}->{'metavalue'})
654 {
655 return $parser->{'pad'}->{'metavalue'};
656 }
657 else
658 {
659 return "";
660 }
661 }
662}
663
664sub gfdxml_metadata
665{
666 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
667
668 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
669 {
670 return;
671 }
672
673 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
674 {
675 if (!defined $parser->{'parameters'}->{'poscount'})
676 {
677 $parser->{'parameters'}->{'poscount'} = 0;
678 }
679 else
680 {
681 $parser->{'parameters'}->{'poscount'}++;
682 }
683 }
684
685 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
686 {
687 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
688 }
689}
690
691sub _set_live_metadata
692{
693 my $self = shift @_;
694
695 my $collect = $self->{'collect'};
696 my $gsdl_cgi = $self->{'gsdl_cgi'};
697 #my $gsdlhome = $self->{'gsdlhome'};
698 my $infodbtype = $self->{'infodbtype'};
699
700 # Obtain the collect dir
701 my $site = $self->{'site'};
702 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
703 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
704
705
706 # look up additional args
707 my $docid = $self->{'d'};
708 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
709 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
710 }
711 my $metavalue = $self->{'metavalue'};
712
713 # Generate the dbkey
714 my $metaname = $self->{'metaname'};
715 my $dbkey = "$docid.$metaname";
716
717 # To people who know $collect_tail please add some comments
718 # Obtain path to the database
719 my $collect_tail = $collect;
720 $collect_tail =~ s/^.*[\/\\]//;
721 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
722 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
723
724 # Set the new value
725 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
726 my $status = system($cmd);
727 if ($status != 0) {
728 # Catch error if gdbmget failed
729 my $mess = "Failed to set metadata key: $dbkey\n";
730
731 $mess .= "PATH: $ENV{'PATH'}\n";
732 $mess .= "cmd = $cmd\n";
733 $mess .= "Exit status: $status\n";
734 $mess .= "System Error Message: $!\n";
735
736 $gsdl_cgi->generate_error($mess);
737 }
738 else {
739 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
740 }
741
742 #return $status; # in case calling functions have any further use for this
743}
744
745sub set_live_metadata
746{
747 my $self = shift @_;
748
749 my $username = $self->{'username'};
750 my $collect = $self->{'collect'};
751 my $gsdl_cgi = $self->{'gsdl_cgi'};
752
753 if ($baseaction::authentication_enabled) {
754 # Ensure the user is allowed to edit this collection
755 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
756 }
757
758 # Make sure the collection isn't locked by someone else
759 $self->lock_collection($username, $collect);
760
761 $self->_set_live_metadata(@_);
762
763 # Release the lock once it is done
764 $self->unlock_collection($username, $collect);
765}
766
767sub set_index_metadata_entry
768{
769 my $self = shift @_;
770 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
771
772 # To people who know $collect_tail please add some comments
773 # Obtain path to the database
774 my $collect_tail = $collect;
775 $collect_tail =~ s/^.*[\/\\]//;
776 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
777 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
778
779# print STDERR "**** infodb file path = $infodb_file_path\n";
780# print STDERR "***** infodb type = $infodbtype\n";
781
782 # Read the docid entry
783 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
784
785 # Set the metadata value
786 if (defined $metapos) {
787 # if metamode=accumulate AND metapos, warn user and then use metapos
788 if (defined $metamode && $metamode eq "accumulate") {
789 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
790 print STDERR "**** Proceeding by using metapos\n";
791 }
792 $doc_rec->{$metaname}->[$metapos] = $metavalue;
793 }
794 elsif (defined $metamode && $metamode eq "override") {
795 $doc_rec->{$metaname} = [ $metavalue ];
796 }
797 else { # default for index was to override, but because accumulate is less destructive,
798 # and because accumulate is the default for archives and import, that's the new default for index too
799 if(defined $doc_rec->{$metaname}) {
800 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
801 } else {
802 $doc_rec->{$metaname} = [ $metavalue ];
803 }
804 }
805
806 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
807
808 return $status;
809
810}
811
812sub _set_import_metadata
813{
814 my $self = shift @_;
815
816 my $collect = $self->{'collect'};
817 my $gsdl_cgi = $self->{'gsdl_cgi'};
818 my $infodbtype = $self->{'infodbtype'};
819# my $gsdlhome = $self->{'gsdlhome'};
820
821 # Obtain the collect and archive dir
822 my $site = $self->{'site'};
823 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
824 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
825 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
826
827 # look up additional args
828 # want either d= or f=
829 my $docid = $self->{'d'};
830 my ($docid_root,$docid_secnum);
831 if(defined $docid) {
832 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
833 # as yet no support for setting subsection metadata in metadata.xml
834 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
835 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
836 return;
837 }
838 }
839
840 my $import_file = $self->{'f'};
841 if ((!defined $docid) && (!defined $import_file)) {
842 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
843 }
844
845 # Get the parameters and set default mode to "accumulate"
846 my $metaname = $self->{'metaname'};
847 my $metavalue = $self->{'metavalue'};
848## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
849 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
850
851 my $metamode = $self->{'metamode'};
852 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
853 # make "accumulate" the default (less destructive, as won't actually
854 # delete any existing values)
855 $metamode = "accumulate";
856 }
857
858 # adding metapos and prevmetavalue support to import_metadata subroutines
859 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
860 my $prevmetavalue = $self->{'prevmetavalue'};
861
862 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
863 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
864
865 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
866 $mess .= " $metaname";
867 $mess .= " = $metavalue";
868 $mess .= " ($metamode)\n";
869
870 $gsdl_cgi->generate_ok_message($mess);
871
872 #return $status; # in case calling functions have any further use for this
873}
874
875# the version of set_index_meta that doesn't do authentication
876sub _set_archives_metadata
877{
878 my $self = shift @_;
879
880 my $collect = $self->{'collect'};
881 my $gsdl_cgi = $self->{'gsdl_cgi'};
882 my $infodbtype = $self->{'infodbtype'};
883
884 # Obtain the collect and archive dir
885 my $site = $self->{'site'};
886 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
887 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
888
889 # look up additional args
890 my $docid = $self->{'d'};
891 my $metaname = $self->{'metaname'};
892 my $metavalue = $self->{'metavalue'};
893 my $prevmetavalue = $self->{'prevmetavalue'};
894
895 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
896 # Don't append "|| undef", since if metapos=0 it will then be set to undef
897
898 my $metamode = $self->{'metamode'};
899 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
900 # make "accumulate" the default (less destructive, as won't actually
901 # delete any existing values)
902 $metamode = "accumulate";
903 }
904
905 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
906 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
907
908 if ($status == 0) {
909 my $mess = "set-archives-metadata successful: Key[$docid]\n";
910 $mess .= " $metaname";
911 $mess .= "->[$metapos]" if (defined $metapos);
912 $mess .= " = $metavalue";
913 $mess .= " ($metamode)\n";
914
915 $gsdl_cgi->generate_ok_message($mess);
916 }
917 else {
918 my $mess .= "Failed to set archives metadata key: $docid\n";
919 $mess .= "Exit status: $status\n";
920 if(defined $self->{'error_msg'}) {
921 $mess .= "Error Message: $self->{'error_msg'}\n";
922 } else {
923 $mess .= "System Error Message: $!\n";
924 }
925 $mess .= "-" x 20 . "\n";
926
927 $gsdl_cgi->generate_error($mess);
928 }
929
930 #return $status; # in case calling functions have any further use for this
931}
932
933
934# the version of set_index_meta that doesn't do authentication
935sub _set_index_metadata
936{
937 my $self = shift @_;
938
939 my $collect = $self->{'collect'};
940 my $gsdl_cgi = $self->{'gsdl_cgi'};
941
942 my $site = $self->{'site'};
943 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
944
945 # look up additional args
946 my $docid = $self->{'d'};
947 my $metaname = $self->{'metaname'};
948 my $metapos = $self->{'metapos'}; # undef has meaning
949 my $metavalue = $self->{'metavalue'};
950 my $infodbtype = $self->{'infodbtype'};
951 my $metamode = $self->{'metamode'};
952
953 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
954
955 if ($status != 0) {
956 # Catch error if set infodb entry failed
957 my $mess = "Failed to set metadata key: $docid\n";
958
959 $mess .= "PATH: $ENV{'PATH'}\n";
960 $mess .= "Exit status: $status\n";
961 $mess .= "System Error Message: $!\n";
962
963 $gsdl_cgi->generate_error($mess);
964 }
965 else {
966 my $mess = "set-index-metadata successful: Key[$docid]\n";
967 $mess .= " $metaname";
968 $mess .= "->[$metapos]" if (defined $metapos);
969 $mess .= " = $metavalue\n";
970
971 $gsdl_cgi->generate_ok_message($mess);
972 }
973
974 #return $status; # in case calling functions have any further use for this
975}
976
977sub set_index_metadata
978{
979 my $self = shift @_;
980
981 my $username = $self->{'username'};
982 my $collect = $self->{'collect'};
983 my $gsdl_cgi = $self->{'gsdl_cgi'};
984 #my $gsdlhome = $self->{'gsdlhome'};
985
986 if ($baseaction::authentication_enabled) {
987 # Ensure the user is allowed to edit this collection
988 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
989 }
990
991 my $site = $self->{'site'};
992 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
993
994 $gsdl_cgi->checked_chdir($collect_dir);
995
996 # Obtain the collect dir
997 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
998
999 # Make sure the collection isn't locked by someone else
1000 $self->lock_collection($username, $collect);
1001
1002 $self->_set_index_metadata(@_);
1003
1004 # Release the lock once it is done
1005 $self->unlock_collection($username, $collect);
1006}
1007
1008# call this to set the metadata for a combination of dirs archives, import or index, or live
1009# if none specified, defaults to index which was the original behaviour of set_metadata.
1010sub set_metadata
1011{
1012 my $self = shift @_;
1013
1014 # Testing that not defining a variable, setting it to "" or to " " all return false
1015 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
1016
1017 my $where = $self->{'where'};
1018 if(!$where) {
1019 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
1020 return;
1021 }
1022
1023 # authenticate and lock collection once, even if processing multiple dirs
1024 my $username = $self->{'username'};
1025 my $collect = $self->{'collect'};
1026 my $gsdl_cgi = $self->{'gsdl_cgi'};
1027
1028 if ($baseaction::authentication_enabled) {
1029 # Ensure the user is allowed to edit this collection
1030 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1031 }
1032
1033 if($where =~ m/index/) {
1034 my $site = $self->{'site'};
1035 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1036 $gsdl_cgi->checked_chdir($collect_dir);
1037 }
1038
1039 # Make sure the collection isn't locked by someone else
1040 $self->lock_collection($username, $collect);
1041
1042
1043 # now at last can set the metadata. $where can specify multiple
1044 # $where is of the form: import|archives|index, or a subset thereof
1045
1046 #my @whichdirs = split('\|', $where);
1047
1048 # just check whether $where contains import/archives/index/live in turn, and
1049 # for each case, process it accordingly
1050 if($where =~ m/import/) {
1051 $self->_set_import_metadata(@_);
1052 }
1053
1054 if($where =~ m/archives/) {
1055
1056 # look up docID arg which is optional to set_metadata because it's optional
1057 # to set_import, but which is compulsory to set_archives_metadata
1058 my $docid = $self->{'d'};
1059 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1060 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1061 }
1062 # we have a docid, so can set archives meta
1063 $self->_set_archives_metadata(@_);
1064 }
1065
1066 if($where =~ m/index/) {
1067
1068 # look up docID arg which is optional to set_metadata because it's optional
1069 # to set_import, but which is compulsory to set_archives_metadata
1070 my $docid = $self->{'d'};
1071 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1072 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1073 }
1074 # we have a docid, so can set index meta
1075 $self->_set_index_metadata(@_);
1076 }
1077
1078 if($where =~ m/live/) {
1079 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1080 }
1081
1082 # Release the lock once it is done
1083 $self->unlock_collection($username, $collect);
1084}
1085
1086sub set_metadata_array
1087{
1088 my $self = shift @_;
1089
1090 my $where = $self->{'where'};
1091 if(!$where) {
1092 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1093 return;
1094 }
1095
1096 my $username = $self->{'username'};
1097 my $collect = $self->{'collect'};
1098 my $gsdl_cgi = $self->{'gsdl_cgi'};
1099
1100 if ($baseaction::authentication_enabled) {
1101 # Ensure the user is allowed to edit this collection
1102 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1103 }
1104
1105 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1106 # But including the stmt during this code reorganisation to preserve as-is what used to happen
1107 my $site = $self->{'site'};
1108 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1109 $gsdl_cgi->checked_chdir($collect_dir);
1110
1111 # Make sure the collection isn't locked by someone else
1112 $self->lock_collection($username, $collect);
1113
1114 if($where =~ m/import/) {
1115 $self->_set_import_metadata_array(@_);
1116 }
1117 if($where =~ m/archives/) {
1118 $self->_set_archives_metadata_array(@_);
1119 }
1120 if($where =~ m/index/) {
1121 $self->_set_index_metadata_array(@_);
1122 }
1123 if($where =~ m/live/) {
1124 $self->_set_live_metadata_array(@_);
1125 }
1126
1127 # Release the lock once it is done
1128 $self->unlock_collection($username, $collect);
1129}
1130
1131sub _set_index_metadata_array
1132{
1133 my $self = shift @_;
1134
1135 my $collect = $self->{'collect'};
1136 my $gsdl_cgi = $self->{'gsdl_cgi'};
1137 my $site = $self->{'site'};
1138 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1139
1140
1141 # look up additional args
1142
1143 my $infodbtype = $self->{'infodbtype'};
1144
1145 my $json_str = $self->{'json'};
1146 my $doc_array = decode_json $json_str;
1147
1148
1149 my $global_status = 0;
1150 my $global_mess = "";
1151
1152 my @all_docids = ();
1153
1154 foreach my $doc_array_rec ( @$doc_array ) {
1155
1156 my $status = -1;
1157 my $docid = $doc_array_rec->{'docid'};
1158
1159 push(@all_docids,$docid);
1160
1161 my $metaname = $doc_array_rec->{'metaname'};
1162 if(defined $metaname) {
1163 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
1164 my $metavalue = $doc_array_rec->{'metavalue'};
1165 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1166
1167 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1168 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1169 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1170
1171 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1172 $metaname = $metatable_rec->{'metaname'};
1173 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1174 my $metapos = undef;
1175 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1176
1177 foreach my $metavalue ( @$metavals ) { # metavals is an array
1178 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1179 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1180 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1181 # metavalue for this metaname will be lost
1182 $metamode = "accumulate";
1183 }
1184 }
1185 }
1186 }
1187
1188 if ($status != 0) {
1189 # Catch error if set infodb entry failed
1190 $global_status = $status;
1191 $global_mess .= "Failed to set metadata key: $docid\n";
1192 $global_mess .= "Exit status: $status\n";
1193 $global_mess .= "System Error Message: $!\n";
1194 $global_mess .= "-" x 20;
1195 }
1196 }
1197
1198 if ($global_status != 0) {
1199 $global_mess .= "PATH: $ENV{'PATH'}\n";
1200 $gsdl_cgi->generate_error($global_mess);
1201 }
1202 else {
1203 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1204 $gsdl_cgi->generate_ok_message($mess);
1205 }
1206}
1207
1208sub set_index_metadata_array
1209{
1210 my $self = shift @_;
1211
1212 my $username = $self->{'username'};
1213 my $collect = $self->{'collect'};
1214 my $gsdl_cgi = $self->{'gsdl_cgi'};
1215# my $gsdlhome = $self->{'gsdlhome'};
1216
1217 if ($baseaction::authentication_enabled) {
1218 # Ensure the user is allowed to edit this collection
1219 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1220 }
1221
1222 my $site = $self->{'site'};
1223 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1224
1225 $gsdl_cgi->checked_chdir($collect_dir);
1226
1227 # Obtain the collect dir
1228 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1229
1230 # Make sure the collection isn't locked by someone else
1231 $self->lock_collection($username, $collect);
1232
1233 $self->_set_index_metadata_array(@_);
1234
1235 # Release the lock once it is done
1236 $self->unlock_collection($username, $collect);
1237}
1238
1239# experimental, newly added in and untested
1240sub _set_live_metadata_array
1241{
1242 my $self = shift @_;
1243
1244 my $collect = $self->{'collect'};
1245 my $gsdl_cgi = $self->{'gsdl_cgi'};
1246
1247 my $site = $self->{'site'};
1248 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1249
1250
1251 # look up additional args
1252 my $infodbtype = $self->{'infodbtype'};
1253 # To people who know $collect_tail please add some comments
1254 # Obtain path to the database
1255 my $collect_tail = $collect;
1256 $collect_tail =~ s/^.*[\/\\]//;
1257 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1258 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1259
1260
1261 my $json_str = $self->{'json'};
1262 my $doc_array = decode_json $json_str;
1263
1264
1265 my $global_status = 0;
1266 my $global_mess = "";
1267
1268 my @all_docids = ();
1269
1270
1271 foreach my $doc_array_rec ( @$doc_array ) {
1272
1273 my $status = -1;
1274 my $docid = $doc_array_rec->{'docid'};
1275
1276 push(@all_docids,$docid);
1277
1278 my $metaname = $doc_array_rec->{'metaname'};
1279 if(defined $metaname) {
1280 my $dbkey = "$docid.$metaname";
1281 my $metavalue = $doc_array_rec->{'metavalue'};
1282
1283 # Set the new value
1284 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1285 $status = system($cmd);
1286
1287 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1288 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1289 foreach my $metatable_rec ( @$metatable ) {
1290 $metaname = $metatable_rec->{'metaname'};
1291 my $dbkey = "$docid.$metaname";
1292
1293 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1294 foreach my $metavalue ( @$metavals ) {
1295 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1296 $status = system($cmd);
1297 }
1298 }
1299
1300 }
1301
1302 if ($status != 0) {
1303 # Catch error if gdbmget failed
1304 $global_status = $status;
1305 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1306 $global_mess .= "Exit status: $status\n";
1307 $global_mess .= "System Error Message: $!\n";
1308 $global_mess .= "-" x 20;
1309 }
1310 }
1311
1312 if ($global_status != 0) {
1313 $global_mess .= "PATH: $ENV{'PATH'}\n";
1314 $gsdl_cgi->generate_error($global_mess);
1315 }
1316 else {
1317 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1318 $gsdl_cgi->generate_ok_message($mess);
1319 }
1320}
1321
1322sub set_live_metadata_array
1323{
1324 my $self = shift @_;
1325
1326 my $username = $self->{'username'};
1327 my $collect = $self->{'collect'};
1328 my $gsdl_cgi = $self->{'gsdl_cgi'};
1329
1330 if ($baseaction::authentication_enabled) {
1331 # Ensure the user is allowed to edit this collection
1332 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1333 }
1334
1335 my $site = $self->{'site'};
1336 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1337
1338 $gsdl_cgi->checked_chdir($collect_dir);
1339
1340 # Make sure the collection isn't locked by someone else
1341 $self->lock_collection($username, $collect);
1342
1343 $self->_set_live_metadata_array(@_);
1344
1345 # Release the lock once it is done
1346 $self->unlock_collection($username, $collect);
1347}
1348
1349
1350sub dxml_metadata
1351{
1352 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1353 my $metaname = $parser->{'parameters'}->{'metaname'};
1354 my $metamode = $parser->{'parameters'}->{'metamode'};
1355
1356 print STDERR "**** Processing closing </Metadata> tag\n";
1357
1358 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1359
1360 # Find the right metadata tag and checks if we are going to
1361 # override it
1362 #
1363 # Note: This over writes the first metadata block it
1364 # encountered. If there are multiple Sections in the doc.xml, it
1365 # might not behave as you would expect
1366
1367 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1368## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1369## print STDERR "**** metamode = $metamode\n";
1370
1371 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1372 {
1373 my $name_attr = $attrHash->{'name'};
1374 if (($name_attr eq $metaname) && ($metamode eq "override"))
1375 {
1376 if (!defined $parser->{'parameters'}->{'poscount'})
1377 {
1378 $parser->{'parameters'}->{'poscount'} = 0;
1379 }
1380 else
1381 {
1382 $parser->{'parameters'}->{'poscount'}++;
1383 }
1384
1385 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1386 {
1387 ##print STDERR "#### got match!!\n";
1388 # Get the value and override the current value
1389 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1390 $attrHash->{'_content'} = $metavalue;
1391
1392 # Don't want it to wipe out any other pieces of metadata
1393 $parser->{'parameters'}->{'metamode'} = "done";
1394 }
1395 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1396 {
1397 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1398 $attrHash->{'_content'} = $metavalue;
1399 $parser->{'parameters'}->{'metamode'} = "done";
1400 }
1401 }
1402 }
1403
1404 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1405 return [$tagname => $attrHash];
1406}
1407
1408# This method exists purely for catching invalid section numbers that the client
1409# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1410# we've seen all the Sections in the doc.xml, and none of their section nums matched
1411# if the metamode has not been set to 'done' by then.
1412sub dxml_archive
1413{
1414 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1415 my $metamode = $parser->{'parameters'}->{'metamode'};
1416
1417 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1418 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1419
1420# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1421
1422 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1423 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1424
1425 my $metaname = $parser->{'parameters'}->{'metaname'};
1426 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1427
1428 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1429 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1430 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1431 print STDERR "\tin section $opt_doc_secnum.\n";
1432 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1433 }
1434
1435 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1436 return [$tagname => $attrHash];
1437}
1438
1439sub dxml_description
1440{
1441 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1442 my $metamode = $parser->{'parameters'}->{'metamode'};
1443
1444 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1445 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1446
1447 print STDERR "**** Processing a closing </Description> tag \n";
1448# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1449
1450 # Accumulate the metadata
1451
1452 # We'll be accumulating metadata at this point if we haven't found and therefore
1453 # haven't processed the metadata yet.
1454 # For subsections, this means that if we're at a matching subsection, but haven't
1455 # found the correct metaname to override in that subsection, we accumulate it as new
1456 # meta in the subsection by adding it to the current description.
1457 # If there's no subsection info for the metadata, it will accumulate at the top level
1458 # section description if we hadn't found a matching metaname to override at this point.
1459
1460 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1461 # at the toplevel section. In the latter case, it means we want to process meta in the
1462 # toplevel section. So the eq check between the values below will work in all cases.
1463
1464 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1465 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1466 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1467
1468 if ($curr_secnum eq $opt_doc_secnum
1469 && ($metamode eq "accumulate" || $metamode eq "override")) {
1470 if ($metamode eq "override") {
1471 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1472 }
1473
1474 # If we get to here and metamode is override, this means there
1475 # was no existing value to overide => treat as an append operation
1476
1477 # Tack a new metadata tag on to the end of the <Metadata>+ block
1478 my $metaname = $parser->{'parameters'}->{'metaname'};
1479 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1480
1481 my $metadata_attr = {
1482 '_content' => $metavalue,
1483 'name' => $metaname,
1484 'mode' => "accumulate"
1485 };
1486
1487 my $append_metadata = [ "Metadata" => $metadata_attr ];
1488 my $description_content = $attrHash->{'_content'};
1489
1490 print "Appending metadata to doc.xml\n";
1491
1492 if (ref($description_content)) {
1493 # got some existing interesting nested content
1494 push(@$description_content, " ", $append_metadata ,"\n ");
1495 }
1496 else {
1497 #description_content is most likely a string such as "\n"
1498 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1499 }
1500
1501 $parser->{'parameters'}->{'metamode'} = "done";
1502 }
1503 else {
1504 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1505## print STDERR "**** NOT ACCUMULATE?!? \n";
1506 }
1507
1508 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1509 return [$tagname => $attrHash];
1510}
1511
1512
1513sub dxml_start_section
1514{
1515 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1516
1517 my $new_depth = scalar(@$contextArray);
1518
1519 print STDERR "**** START SECTION \n";
1520
1521 if ($new_depth == 1) {
1522 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1523 $parser->{'parameters'}->{'curr_section_num'} = "";
1524 }
1525
1526 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1527 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1528
1529 my $new_secnum;
1530
1531 if ($new_depth > $old_depth) {
1532 # child subsection
1533 $new_secnum = "$old_secnum.1";
1534 }
1535 elsif ($new_depth == $old_depth) {
1536 # sibling section => increase it's value by 1
1537 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1538 $tail_num++;
1539 $new_secnum = $old_secnum;
1540 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1541 }
1542 else {
1543 # back up to parent section => lopp off tail
1544 $new_secnum = $old_secnum;
1545 $new_secnum =~ s/\.\d+$//;
1546 }
1547
1548 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1549 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1550
1551 1;
1552}
1553
1554sub edit_xml_file
1555{
1556 my $self = shift @_;
1557 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1558
1559 # use XML::Rules to add it in (read in and out again)
1560 my $parser = XML::Rules->new(start_rules => $start_rules,
1561 rules => $rules,
1562 style => 'filter',
1563 output_encoding => 'utf8' );
1564
1565 my $xml_in = "";
1566 if (!open(MIN,"<$filename")) {
1567 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1568 }
1569 else {
1570 # Read all the text in
1571 my $line;
1572 while (defined ($line=<MIN>)) {
1573 $xml_in .= $line;
1574 }
1575 close(MIN);
1576
1577 my $MOUT;
1578 if (!open($MOUT,">$filename")) {
1579 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1580 }
1581 else {
1582 # Matched lines will get handled by the call backs
1583## my $xml_out = "";
1584
1585 binmode($MOUT,":utf8");
1586 $parser->filter($xml_in,$MOUT, $options);
1587
1588# binmode(MOUT,":utf8");
1589# print MOUT $xml_out;
1590 close($MOUT);
1591 }
1592 }
1593
1594 # copy across any custom error information that was stored during parsing
1595 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1596}
1597
1598sub edit_doc_xml
1599{
1600 my $self = shift @_;
1601 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1602
1603 my $info_mess = <<RAWEND;
1604****************************
1605 edit_doc_xml()
1606****************************
1607doc_xml_filename = $doc_xml_filename
1608metaname = $metaname
1609metavalue = $metavalue
1610metapos = $metapos
1611metamode = $metamode
1612opt_secnum = $opt_secnum
1613prevmetavalue = $prevmetavalue
1614****************************
1615RAWEND
1616
1617 $gsdl_cgi->generate_message($info_mess);
1618
1619 # To monitor which section/subsection number we are in
1620 my @start_rules =
1621 ( 'Section' => \&dxml_start_section );
1622
1623 # use XML::Rules to add it in (read in and out again)
1624 # Set the call back functions
1625 my @rules =
1626 ( _default => 'raw',
1627 'Metadata' => \&dxml_metadata,
1628 'Description' => \&dxml_description,
1629 'Archive' => \&dxml_archive); # just for catching errors at end
1630
1631 # Sets the parameters
1632 my $options = { 'metaname' => $metaname,
1633 'metapos' => $metapos,
1634 'metavalue' => $metavalue,
1635 'metamode' => $metamode,
1636 'prevmetavalue' => $prevmetavalue };
1637
1638 if (defined $opt_secnum) {
1639 $options->{'secnum'} = $opt_secnum;
1640 }
1641
1642 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1643}
1644
1645sub set_archives_metadata_entry
1646{
1647 my $self = shift @_;
1648 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1649
1650 my $info_mess = <<RAWEND;
1651****************************
1652 set_archives_metadata_entry()
1653****************************
1654archive_dir = $archive_dir
1655collect_dir = $collect_dir
1656collect = $collect
1657infodbtype = $infodbtype
1658docid = $docid
1659metaname = $metaname
1660metapos = $metapos
1661metavalue = $metavalue
1662metamode = $metamode
1663prevmetavalue = $prevmetavalue
1664****************************
1665RAWEND
1666
1667 $gsdl_cgi->generate_message($info_mess);
1668
1669 # Obtain the doc.xml path for the specified docID
1670 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1671
1672 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1673 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1674 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1675
1676 # The $doc_xml_file is relative to the archives, and now let's get the full path
1677 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1678 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1679
1680 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
1681 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1682 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1683 if(!defined $metapos && $metamode eq "override") {
1684 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, $metapos, undef, $docid_secnum, $metamode);
1685 }
1686
1687 # Edit the doc.xml file with the specified metadata name, value and position.
1688 # TODO: there is a potential problem here as this edit_doc_xml function
1689 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1690 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1691
1692 # dxml_metadata method ignores metapos if metamode anything other than override
1693 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1694 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1695
1696 # return 0; # return 0 for now to indicate no error
1697 return (defined $self->{'error_msg'}) ? 1 : 0;
1698}
1699
1700
1701sub set_archives_metadata
1702{
1703 my $self = shift @_;
1704
1705 my $username = $self->{'username'};
1706 my $collect = $self->{'collect'};
1707 my $gsdl_cgi = $self->{'gsdl_cgi'};
1708
1709 if ($baseaction::authentication_enabled) {
1710 # Ensure the user is allowed to edit this collection
1711 $self->authenticate_user($username, $collect);
1712 }
1713
1714 # Make sure the collection isn't locked by someone else
1715 $self->lock_collection($username, $collect);
1716
1717 $self->_set_archives_metadata(@_);
1718
1719 # Release the lock once it is done
1720 $self->unlock_collection($username, $collect);
1721}
1722
1723sub _set_archives_metadata_array
1724{
1725 my $self = shift @_;
1726
1727 my $collect = $self->{'collect'};
1728 my $gsdl_cgi = $self->{'gsdl_cgi'};
1729 my $site = $self->{'site'};
1730 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1731
1732 # look up additional args
1733
1734 my $infodbtype = $self->{'infodbtype'};
1735
1736 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1737
1738 my $json_str = $self->{'json'};
1739 my $doc_array = decode_json $json_str;
1740
1741
1742 my $global_status = 0;
1743 my $global_mess = "";
1744
1745 my @all_docids = ();
1746
1747 foreach my $doc_array_rec ( @$doc_array ) {
1748 my $status = -1;
1749 my $docid = $doc_array_rec->{'docid'};
1750
1751 push(@all_docids,$docid);
1752
1753 my $metaname = $doc_array_rec->{'metaname'};
1754 if(defined $metaname) {
1755
1756 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1757
1758 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1759 my $metavalue = $doc_array_rec->{'metavalue'};
1760 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1761
1762
1763 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1764 # make "accumulate" the default (less destructive, as it won't actually
1765 # delete any existing values)
1766 $metamode = "accumulate";
1767 }
1768
1769 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1770 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1771 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1772 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1773
1774 foreach my $metatable_rec ( @$metatable ) {
1775 $metaname = $metatable_rec->{'metaname'};
1776 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1777 my $metapos = undef;
1778 my $prevmetavalue = undef;
1779 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1780
1781 foreach my $metavalue ( @$metavals ) {
1782 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1783 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1784
1785 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1786 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1787 # metavalue for this metaname will be lost
1788 $metamode = "accumulate";
1789 }
1790 }
1791 }
1792 }
1793
1794 if ($status != 0) {
1795 # Catch error if set infodb entry failed
1796 $global_status = $status;
1797 $global_mess .= "Failed to set metadata key: $docid\n";
1798 $global_mess .= "Exit status: $status\n";
1799 $global_mess .= "System Error Message: $!\n";
1800 $global_mess .= "-" x 20 . "\n";
1801 }
1802 }
1803
1804 if ($global_status != 0) {
1805 $global_mess .= "PATH: $ENV{'PATH'}\n";
1806 $gsdl_cgi->generate_error($global_mess);
1807 }
1808 else {
1809 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1810 $gsdl_cgi->generate_ok_message($mess);
1811 }
1812}
1813
1814sub set_archives_metadata_array
1815{
1816 my $self = shift @_;
1817
1818 my $username = $self->{'username'};
1819 my $collect = $self->{'collect'};
1820 my $gsdl_cgi = $self->{'gsdl_cgi'};
1821# my $gsdlhome = $self->{'gsdlhome'};
1822
1823 if ($baseaction::authentication_enabled) {
1824 # Ensure the user is allowed to edit this collection
1825 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1826 }
1827
1828 my $site = $self->{'site'};
1829 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1830
1831 $gsdl_cgi->checked_chdir($collect_dir);
1832
1833 # Obtain the collect dir
1834 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1835
1836 # Make sure the collection isn't locked by someone else
1837 $self->lock_collection($username, $collect);
1838
1839 $self->_set_archives_metadata_array(@_);
1840
1841 # Release the lock once it is done
1842 $self->unlock_collection($username, $collect);
1843}
1844
1845sub _remove_archives_metadata
1846{
1847 my $self = shift @_;
1848
1849 my $collect = $self->{'collect'};
1850 my $gsdl_cgi = $self->{'gsdl_cgi'};
1851# my $gsdlhome = $self->{'gsdlhome'};
1852 my $infodbtype = $self->{'infodbtype'};
1853
1854 my $site = $self->{'site'};
1855
1856 # Obtain the collect and archive dir
1857 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1858
1859 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1860
1861 # look up additional args
1862 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1863
1864 my $metaname = $self->{'metaname'};
1865 my $metapos = $self->{'metapos'};
1866
1867 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1868
1869 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1870 # default to deleting the first metavalue for the given metaname
1871 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1872 # seem to get deleted in one single remove_archives_meta action invocation.
1873 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1874 # deleted, both get deleted.
1875 if(!defined $metapos && !defined $metavalue) {
1876 $metapos = 0;
1877 }
1878
1879 my $metamode = $self->{'metamode'} || undef;
1880
1881 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1882 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1883
1884 # This now stores the full pathname
1885 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1886
1887 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1888# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1889
1890 if ($status == 0)
1891 {
1892 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1893 $mess .= " $metaname";
1894 $mess .= "->[$metapos]" if (defined $metapos);
1895
1896 $gsdl_cgi->generate_ok_message($mess);
1897 }
1898 else
1899 {
1900 my $mess .= "Failed to remove archives metadata key: $docid\n";
1901 $mess .= "Exit status: $status\n";
1902 $mess .= "System Error Message: $!\n";
1903 $mess .= "-" x 20 . "\n";
1904
1905 $gsdl_cgi->generate_error($mess);
1906 }
1907
1908 #return $status; # in case calling functions have a use for this
1909}
1910
1911sub remove_archives_metadata
1912{
1913 my $self = shift @_;
1914
1915 my $username = $self->{'username'};
1916 my $collect = $self->{'collect'};
1917 my $gsdl_cgi = $self->{'gsdl_cgi'};
1918
1919 if ($baseaction::authentication_enabled)
1920 {
1921 # Ensure the user is allowed to edit this collection
1922 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1923 }
1924
1925 # Make sure the collection isn't locked by someone else
1926 $self->lock_collection($username, $collect);
1927
1928 $self->_remove_archives_metadata(@_);
1929
1930 # Release the lock once it is done
1931 $self->unlock_collection($username, $collect);
1932}
1933
1934sub remove_from_doc_xml
1935{
1936 my $self = shift @_;
1937 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1938
1939 my @start_rules = ('Section' => \&dxml_start_section);
1940
1941 # Set the call-back functions for the metadata tags
1942 my @rules =
1943 (
1944 _default => 'raw',
1945 'Metadata' => \&rfdxml_metadata
1946 );
1947
1948 my $parser = XML::Rules->new
1949 (
1950 start_rules => \@start_rules,
1951 rules => \@rules,
1952 style => 'filter',
1953 output_encoding => 'utf8',
1954# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1955 stripspaces => 2|0|0 # ineffectual
1956 );
1957
1958 my $status = 0;
1959 my $xml_in = "";
1960 if (!open(MIN,"<$doc_xml_filename"))
1961 {
1962 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1963 $status = 1;
1964 }
1965 else
1966 {
1967 # Read them in
1968 my $line;
1969 while (defined ($line=<MIN>)) {
1970 $xml_in .= $line;
1971 }
1972 close(MIN);
1973
1974 # Filter with the call-back functions
1975 my $xml_out = "";
1976
1977 my $MOUT;
1978 if (!open($MOUT,">$doc_xml_filename")) {
1979 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1980 $status = 1;
1981 }
1982 else {
1983 binmode($MOUT,":utf8");
1984 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1985 close($MOUT);
1986 }
1987 }
1988 return $status;
1989}
1990
1991sub rfdxml_metadata
1992{
1993 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1994
1995 # For comparisons, toplevel section is indicated by ""
1996 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1997 my $secid = $parser->{'parameters'}->{'secid'} || "";
1998
1999 if (!($secid eq $curr_sec_num))
2000 {
2001 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2002 return [$tagname => $attrHash];
2003 }
2004
2005 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2006 {
2007 if (!defined $parser->{'parameters'}->{'poscount'})
2008 {
2009 $parser->{'parameters'}->{'poscount'} = 0;
2010 }
2011 else
2012 {
2013 $parser->{'parameters'}->{'poscount'}++;
2014 }
2015
2016 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
2017 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
2018 return [];
2019 }
2020 }
2021
2022 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2023 {
2024 return [];
2025 }
2026
2027 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
2028 {
2029 return [];
2030 }
2031
2032 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2033 return [$tagname => $attrHash];
2034}
2035
2036sub mxml_metadata
2037{
2038 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2039 my $metaname = $parser->{'parameters'}->{'metaname'};
2040 my $metamode = $parser->{'parameters'}->{'metamode'};
2041
2042 # Report error if we don't see FileName tag before this
2043 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2044
2045 # Don't do anything if we are not in the right FileSet
2046 my $file_regexp = $parser->{'parameters'}->{'current_file'};
2047 if ($file_regexp =~ /\.\*/) {
2048 # Only interested in a file_regexp if it specifies precisely one
2049 # file.
2050 # So, skip anything with a .* in it as it is too general
2051## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
2052 return [$tagname => $attrHash];
2053 }
2054 my $src_file = $parser->{'parameters'}->{'src_file'};
2055 if (!($src_file =~ /$file_regexp/)) {
2056 return [$tagname => $attrHash];
2057 }
2058## print STDERR "*** mxl metamode = $metamode\n";
2059
2060 # Find the right metadata tag and checks if we are going to override it
2061 my $name_attr = $attrHash->{'name'};
2062 if (($name_attr eq $metaname) && ($metamode eq "override")) {
2063
2064 # now metadata.xml functions need to keep track of metapos
2065 if (!defined $parser->{'parameters'}->{'poscount'})
2066 {
2067 $parser->{'parameters'}->{'poscount'} = 0;
2068 }
2069 else
2070 {
2071 $parser->{'parameters'}->{'poscount'}++;
2072 }
2073
2074 # If either the metapos or prevmetavalue is set,
2075 # get the value and override the current value
2076 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2077
2078 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2079 {
2080 $attrHash->{'_content'} = $metavalue;
2081
2082 ## print STDERR "**** overriding metadata.xml\n";
2083
2084 # Don't want it to wipe out any other pieces of metadata
2085 $parser->{'parameters'}->{'metamode'} = "done";
2086 }
2087 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2088 {
2089 $attrHash->{'_content'} = $metavalue;
2090 $parser->{'parameters'}->{'metamode'} = "done";
2091 }
2092 }
2093
2094 # mxml_description will process the metadata if metadata is accumulate,
2095 # or if we haven't found the metadata to override
2096
2097 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2098 return [$tagname => $attrHash];
2099}
2100
2101
2102sub mxml_description
2103{
2104 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2105 my $metamode = $parser->{'parameters'}->{'metamode'};
2106
2107 # Failed... Report error if we don't see FileName tag before this
2108 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2109
2110 # Don't do anything if we are not in the right FileSet
2111 my $file_regexp = $parser->{'parameters'}->{'current_file'};
2112 if ($file_regexp =~ m/\.\*/) {
2113 # Only interested in a file_regexp if it specifies precisely one
2114 # file.
2115 # So, skip anything with a .* in it as it is too general
2116 return [$tagname => $attrHash];
2117 }
2118 my $src_file = $parser->{'parameters'}->{'src_file'};
2119
2120 if (!($src_file =~ m/$file_regexp/)) {
2121 return [$tagname => $attrHash];
2122 }
2123
2124 # Accumulate the metadata block to the end of the description block
2125 # Note: This adds metadata block to all description blocks, so if there are
2126 # multiple FileSets, it will add to all of them
2127 if (($metamode eq "accumulate") || ($metamode eq "override")) {
2128
2129 # if metamode was "override" but get to here then it failed to
2130 # find an item to override, in which case it should append its
2131 # value to the end, just like the "accumulate" mode
2132
2133 if ($metamode eq "override") {
2134 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2135 }
2136
2137 # tack a new metadata tag on to the end of the <Metadata>+ block
2138 my $metaname = $parser->{'parameters'}->{'metaname'};
2139 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2140
2141 my $metadata_attr = { '_content' => $metavalue,
2142 'name' => $metaname,
2143 'mode' => "accumulate" };
2144
2145 my $append_metadata = [ "Metadata" => $metadata_attr ];
2146 my $description_content = $attrHash->{'_content'};
2147
2148## print STDERR "*** appending to metadata.xml\n";
2149
2150 # append the new metadata element to the end of the current
2151 # content contained inside this tag
2152 if (ref($description_content) eq "") {
2153 # => string or numeric literal
2154 # this is caused by a <Description> block has no <Metadata> child elements
2155 # => set up an empty array in '_content'
2156 $attrHash->{'_content'} = [ "\n" ];
2157 $description_content = $attrHash->{'_content'};
2158 }
2159
2160 push(@$description_content," ", $append_metadata ,"\n ");
2161 $parser->{'parameters'}->{'metamode'} = "done";
2162 }
2163
2164 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2165 return [$tagname => $attrHash];
2166}
2167
2168
2169sub mxml_filename
2170{
2171 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2172
2173 # Store the filename of the Current Fileset
2174 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2175 # FileName tag must come before Description tag
2176 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2177
2178 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2179 return [$tagname => $attrHash];
2180}
2181
2182
2183sub mxml_fileset
2184{
2185 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2186
2187 # Initilise the current_file
2188 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2189 # FileName tag must come before Description tag
2190 $parser->{'parameters'}->{'current_file'} = "";
2191
2192 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2193 return [$tagname => $attrHash];
2194}
2195
2196sub mxml_directorymetadata
2197{
2198 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2199
2200 # if we haven't processed the metadata when we reach the end of metadata.xml
2201 # it's because there's no particular FileSet element whose FileName matched
2202 # In which case, add a new FileSet for this FileName
2203 my $metamode = $parser->{'parameters'}->{'metamode'};
2204 if($metamode ne "done") {
2205
2206 if ($metamode eq "override") {
2207 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2208 }
2209
2210 # If we get to here and metamode is override, this means there
2211 # was no existing value to overide => treat as an append operation
2212
2213 # Create a new FileSet element and append to DirectoryMetadata
2214 # <FileSet>
2215 # <FileName>src_file</FileName>
2216 # <Description>
2217 # <Metadata mode="" name="">metavalue</Metadata>
2218 # </Description>
2219 # </FileSet>
2220 my $src_file = $parser->{'parameters'}->{'src_file'};
2221 my $metaname = $parser->{'parameters'}->{'metaname'};
2222 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2223 my $metadata_attr = {
2224 '_content' => $metavalue,
2225 'name' => $metaname,
2226 'mode' => "accumulate"
2227 };
2228 my $append_metadata = [ "Metadata" => $metadata_attr ];
2229 my $description_attr->{'_content'} = [ "\n\t\t ", $append_metadata, "\n\t\t"];
2230 my $description_element = [ "Description" => $description_attr ];
2231
2232 #_content is not an attribute, it's special and holds the children of this element
2233 # including the textnode value embedded in this element if any.
2234 my $filename_attr = {'_content' => $src_file};
2235 my $filename_element = [ "FileName" => $filename_attr ];
2236
2237 my $fileset_attr = {};
2238 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2239 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2240
2241
2242 # get children of dirmeta, and push the new FileSet element onto it
2243 print "Appending metadata to metadata.xml\n";
2244 my $dirmeta_content = $attrHash->{'_content'};
2245 if (ref($dirmeta_content)) {
2246 # got some existing interesting nested content
2247 #push(@$dirmeta_content, " ", $fileset ,"\n ");
2248 push(@$dirmeta_content, "\t", $fileset ,"\n");
2249 }
2250 else {
2251 #description_content is most likely a string such as "\n"
2252 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
2253 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2254 }
2255
2256 $parser->{'parameters'}->{'metamode'} = "done";
2257 }
2258 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2259 return [$tagname => $attrHash];
2260}
2261
2262
2263sub edit_metadata_xml
2264{
2265 my $self = shift @_;
2266 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2267
2268 # Set the call-back functions for the metadata tags
2269 my @rules =
2270 ( _default => 'raw',
2271 'FileName' => \&mxml_filename,
2272 'Metadata' => \&mxml_metadata,
2273 'Description' => \&mxml_description,
2274 'FileSet' => \&mxml_fileset,
2275 'DirectoryMetadata' => \&mxml_directorymetadata);
2276
2277 # use XML::Rules to add it in (read in and out again)
2278 my $parser = XML::Rules->new(rules => \@rules,
2279 style => 'filter',
2280 output_encoding => 'utf8',
2281 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2282
2283 if (!-e $metadata_xml_filename) {
2284
2285 if (open(MOUT,">$metadata_xml_filename")) {
2286
2287 my $src_file_re = &util::filename_to_regex($src_file);
2288 # shouldn't the following also be in the above utility routine??
2289 # $src_file_re =~ s/\./\\./g;
2290
2291 print MOUT "<?xml version=\"1.0\"?>\n";
2292 print MOUT "<DirectoryMetadata>\n";
2293 print MOUT " <FileSet>\n";
2294 print MOUT " <FileName>$src_file_re</FileName>\n";
2295 print MOUT " <Description>\n";
2296 print MOUT " </Description>\n";
2297 print MOUT " </FileSet>\n";
2298 print MOUT "</DirectoryMetadata>\n";
2299
2300 close(MOUT);
2301 }
2302 else {
2303 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2304 }
2305 }
2306
2307
2308 my $xml_in = "";
2309 if (!open(MIN,"<$metadata_xml_filename")) {
2310 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2311 }
2312 else {
2313 # Read them in
2314 my $line;
2315 while (defined ($line=<MIN>)) {
2316 $xml_in .= $line;
2317 }
2318 close(MIN);
2319
2320 # Filter with the call-back functions
2321 my $xml_out = "";
2322
2323 my $MOUT;
2324 if (!open($MOUT,">$metadata_xml_filename")) {
2325 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2326 }
2327 else {
2328 binmode($MOUT,":utf8");
2329
2330 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2331 # At the moment, I will just hack it!
2332 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2333 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2334 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2335 #print MOUT $xml_out;
2336
2337 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2338 metapos => $metapos,
2339 metavalue => $metavalue,
2340 metamode => $metamode,
2341 src_file => $src_file,
2342 prevmetavalue => $prevmetavalue,
2343 current_file => undef} );
2344 close($MOUT);
2345 }
2346 }
2347}
2348
2349
2350sub set_import_metadata
2351{
2352 my $self = shift @_;
2353
2354 my $username = $self->{'username'};
2355 my $collect = $self->{'collect'};
2356 my $gsdl_cgi = $self->{'gsdl_cgi'};
2357
2358 if ($baseaction::authentication_enabled) {
2359 # Ensure the user is allowed to edit this collection
2360 $self->authenticate_user($username, $collect);
2361 }
2362
2363 # Make sure the collection isn't locked by someone else
2364 $self->lock_collection($username, $collect);
2365
2366 $self->_set_import_metadata(@_);
2367
2368 # Release the lock once it is done
2369 $self->unlock_collection($username, $collect);
2370
2371}
2372
2373sub set_import_metadata_array
2374{
2375 my $self = shift @_;
2376
2377 my $username = $self->{'username'};
2378 my $collect = $self->{'collect'};
2379 my $gsdl_cgi = $self->{'gsdl_cgi'};
2380# my $gsdlhome = $self->{'gsdlhome'};
2381
2382 if ($baseaction::authentication_enabled) {
2383 # Ensure the user is allowed to edit this collection
2384 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2385 }
2386
2387 my $site = $self->{'site'};
2388 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2389
2390 $gsdl_cgi->checked_chdir($collect_dir);
2391
2392 # Make sure the collection isn't locked by someone else
2393 $self->lock_collection($username, $collect);
2394
2395 $self->_set_import_metadata_array(@_);
2396
2397 # Release the lock once it is done
2398 $self->unlock_collection($username, $collect);
2399
2400}
2401
2402
2403sub _set_import_metadata_array
2404{
2405 my $self = shift @_;
2406
2407 my $collect = $self->{'collect'};
2408 my $gsdl_cgi = $self->{'gsdl_cgi'};
2409
2410 my $site = $self->{'site'};
2411 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2412
2413 # look up additional args
2414
2415 my $infodbtype = $self->{'infodbtype'};
2416
2417 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2418 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2419
2420 my $json_str = $self->{'json'};
2421 my $doc_array = decode_json $json_str;
2422
2423 my $global_status = 0;
2424 my $global_mess = "";
2425
2426 my @all_docids = ();
2427
2428 foreach my $doc_array_rec ( @$doc_array )
2429 {
2430 my $status = -1;
2431 my $docid = $doc_array_rec->{'docid'};
2432
2433 my ($docid_root,$docid_secnum);
2434 if(defined $docid) {
2435 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2436 # as yet no support for setting subsection metadata in metadata.xml
2437 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2438 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2439 next; # skip this docid in for loop
2440 }
2441 }
2442
2443 push(@all_docids,$docid); # docid_root rather
2444
2445 my $metaname = $doc_array_rec->{'metaname'};
2446 if (defined $metaname) {
2447 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2448 my $metavalue = $doc_array_rec->{'metavalue'};
2449 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2450
2451 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2452 # make "accumulate" the default (less destructive, as won't actually
2453 # delete any existing values)
2454 $metamode = "accumulate";
2455 }
2456
2457 # adding metapos and prevmetavalue support to import_metadata subroutines
2458 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2459 my $prevmetavalue = $self->{'prevmetavalue'};
2460
2461 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2462
2463 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2464 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2465
2466 foreach my $metatable_rec ( @$metatable ) {
2467 $metaname = $metatable_rec->{'metaname'};
2468 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2469 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2470 # make "accumulate" the default (less destructive, as won't actually
2471 # delete any existing values)
2472 $metamode = "accumulate";
2473 }
2474
2475 # No support for metapos and prevmetavalue in the JSON metatable substructure
2476 my $metapos = undef;
2477 my $prevmetavalue = undef;
2478 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2479
2480 foreach my $metavalue ( @$metavals ) {
2481 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2482
2483 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2484 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2485 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2486 # metavalue for this metaname will be lost
2487 $metamode = "accumulate";
2488 }
2489 }
2490 }
2491 }
2492 }
2493
2494 # always a success message
2495 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2496 $gsdl_cgi->generate_ok_message($mess);
2497}
2498
2499# always returns true (1)
2500sub set_import_metadata_entry
2501{
2502 my $self = shift @_;
2503 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2504
2505 my $info_mess = <<RAWEND;
2506****************************
2507 set_import_metadata_entry()
2508****************************
2509collect = $collect
2510collect_dir = $collect_dir
2511infodbtype = $infodbtype
2512arcinfo_doc_filename = $arcinfo_doc_filename
2513docid = $docid
2514metaname = $metaname
2515metapos = $metapos
2516metavalue = $metavalue
2517metamode = $metamode
2518prevmetavalue = $prevmetavalue
2519****************************
2520RAWEND
2521
2522 $gsdl_cgi->generate_message($info_mess);
2523
2524 # import works with metadata.xml which can have inherited metadata
2525 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2526 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2527 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2528 # what they're doing if they provide a metapos.
2529 if(defined $metapos) {
2530 print STDERR "@@@@ WARNING: metapos defined.\n";
2531 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2532 }
2533
2534 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2535 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2536 my $metadata_xml_file;
2537 my $import_filename = undef;
2538
2539 if (defined $docid) {
2540 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2541 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2542
2543 # This now stores the full pathname
2544 $import_filename = $doc_rec->{'src-file'}->[0];
2545 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2546 # as the array version of the method doesn't support the -f parameter yet
2547 my $import_file = $self->{'f'};
2548 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2549 }
2550
2551 # figure out correct metadata.xml file [?]
2552 # Assuming the metadata.xml file is next to the source file
2553 # Note: This will not work if it is using the inherited metadata from the parent folder
2554 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2555 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2556
2557 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
2558 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2559 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2560 if(!defined $metapos && $metamode eq "override") {
2561## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2562 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2563
2564 }
2565
2566 # Edit the metadata.xml
2567 # Modified by Jeffrey from DL Consulting
2568 # Handle the case where there is one metadata.xml file for multiple FileSets
2569 # The XML filter needs to know whether it is in the right FileSet
2570 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2571 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2572 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2573 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2574 #return 0;
2575 return $metadata_xml_filename;
2576}
2577
2578sub _remove_import_metadata
2579{
2580 my $self = shift @_;
2581
2582 my $collect = $self->{'collect'};
2583 my $gsdl_cgi = $self->{'gsdl_cgi'};
2584# my $gsdlhome = $self->{'gsdlhome'};
2585 my $infodbtype = $self->{'infodbtype'};
2586
2587 # Obtain the collect dir
2588 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2589 my $site = $self->{'site'};
2590 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2591
2592 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2593 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2594
2595 # look up additional args
2596 my $docid = $self->{'d'};
2597 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2598 {
2599 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2600 }
2601
2602 my $metaname = $self->{'metaname'};
2603 my $metapos = $self->{'metapos'};
2604 my $metavalue = $self->{'metavalue'};
2605 if(defined $metavalue) {
2606 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2607 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2608 $metapos = 0;
2609 }
2610 my $metamode = $self->{'metamode'} || undef;
2611
2612 # import works with metadata.xml which can have inherited metadata
2613 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2614 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2615 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2616 # what they're doing if they provide a metapos.
2617 if(defined $metapos) {
2618 print STDERR "@@@@ WARNING: metapos defined.\n";
2619 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2620 }
2621
2622 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2623 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2624 my $metadata_xml_file;
2625 my $import_filename = undef;
2626 if (defined $docid)
2627 {
2628 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2629 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2630
2631 # This now stores the full pathname
2632 $import_filename = $doc_rec->{'src-file'}->[0];
2633 }
2634
2635 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2636 {
2637 $gsdl_cgi->generate_error("There is no metadata\n");
2638 }
2639
2640 # figure out correct metadata.xml file [?]
2641 # Assuming the metadata.xml file is next to the source file
2642 # Note: This will not work if it is using the inherited metadata from the parent folder
2643 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2644 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2645
2646 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2647
2648 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2649 $mess .= " $metaname";
2650 $mess .= " = $metavalue\n";
2651
2652 $gsdl_cgi->generate_ok_message($mess);
2653
2654 #return $status; # in case calling functions have a use for this
2655}
2656
2657sub remove_import_metadata
2658{
2659 my $self = shift @_;
2660
2661 my $username = $self->{'username'};
2662 my $collect = $self->{'collect'};
2663 my $gsdl_cgi = $self->{'gsdl_cgi'};
2664
2665 if ($baseaction::authentication_enabled) {
2666 # Ensure the user is allowed to edit this collection
2667 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2668 }
2669
2670 # Make sure the collection isn't locked by someone else
2671 $self->lock_collection($username, $collect);
2672
2673 $self->_remove_import_metadata(@_);
2674
2675 # Release the lock once it is done
2676 $self->unlock_collection($username, $collect);
2677
2678}
2679
2680sub remove_from_metadata_xml
2681{
2682 my $self = shift @_;
2683 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2684 # metamode generally has no meaning for removing meta, but is used by set_meta
2685 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2686
2687 # Set the call-back functions for the metadata tags
2688 my @rules =
2689 (
2690 _default => 'raw',
2691 'Metadata' => \&rfmxml_metadata,
2692 'FileName' => \&mxml_filename
2693 );
2694
2695 my $parser = XML::Rules->new
2696 (
2697 rules => \@rules,
2698 style => 'filter',
2699 output_encoding => 'utf8',
2700 #normalisespaces => 1,
2701 stripspaces => 2|0|0 # ineffectual
2702 );
2703
2704 my $xml_in = "";
2705 if (!open(MIN,"<$metadata_xml_filename"))
2706 {
2707 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2708 }
2709 else
2710 {
2711 # Read them in
2712 my $line;
2713 while (defined ($line=<MIN>)) {
2714 $xml_in .= $line;
2715 }
2716 close(MIN);
2717
2718 # Filter with the call-back functions
2719 my $xml_out = "";
2720
2721 my $MOUT;
2722 if (!open($MOUT,">$metadata_xml_filename")) {
2723 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2724 }
2725 else {
2726 binmode($MOUT,":utf8");
2727 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2728 close($MOUT);
2729 }
2730 }
2731}
2732
2733sub rfmxml_metadata
2734{
2735 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2736
2737 # metadata.xml does not handle subsections
2738
2739 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2740 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2741 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2742 {
2743 if (!defined $parser->{'parameters'}->{'poscount'})
2744 {
2745 $parser->{'parameters'}->{'poscount'} = 0;
2746 }
2747 else
2748 {
2749 $parser->{'parameters'}->{'poscount'}++;
2750 }
2751
2752 # if overriding but no metapos, then clear all the meta for this metaname
2753 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
2754 return [];
2755 }
2756
2757 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2758 {
2759 return [];
2760 }
2761
2762 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2763 {
2764 return [];
2765 }
2766 }
2767
2768 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2769 return [$tagname => $attrHash];
2770}
2771
2772sub _remove_live_metadata
2773{
2774 my $self = shift @_;
2775
2776 my $collect = $self->{'collect'};
2777 my $gsdl_cgi = $self->{'gsdl_cgi'};
2778# my $gsdlhome = $self->{'gsdlhome'};
2779 my $infodbtype = $self->{'infodbtype'};
2780
2781 # Obtain the collect dir
2782 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2783 my $site = $self->{'site'};
2784 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2785
2786
2787 # look up additional args
2788 my $docid = $self->{'d'};
2789 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2790 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2791 }
2792
2793 # Generate the dbkey
2794 my $metaname = $self->{'metaname'};
2795 my $dbkey = "$docid.$metaname";
2796
2797 # To people who know $collect_tail please add some comments
2798 # Obtain the live gdbm_db path
2799 my $collect_tail = $collect;
2800 $collect_tail =~ s/^.*[\/\\]//;
2801 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2802 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2803
2804 # Remove the key
2805 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2806 my $status = system($cmd);
2807 if ($status != 0) {
2808 # Catch error if gdbmdel failed
2809 my $mess = "Failed to set metadata key: $dbkey\n";
2810
2811 $mess .= "PATH: $ENV{'PATH'}\n";
2812 $mess .= "cmd = $cmd\n";
2813 $mess .= "Exit status: $status\n";
2814 $mess .= "System Error Message: $!\n";
2815
2816 $gsdl_cgi->generate_error($mess);
2817 }
2818 else {
2819 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2820 }
2821
2822}
2823
2824sub remove_live_metadata
2825{
2826 my $self = shift @_;
2827
2828 my $username = $self->{'username'};
2829 my $collect = $self->{'collect'};
2830 my $gsdl_cgi = $self->{'gsdl_cgi'};
2831 my $gsdlhome = $self->{'gsdlhome'};
2832
2833 if ($baseaction::authentication_enabled) {
2834 # Ensure the user is allowed to edit this collection
2835 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2836 }
2837
2838 # Make sure the collection isn't locked by someone else
2839 $self->lock_collection($username, $collect);
2840
2841 $self->_remove_live_metadata(@_);
2842
2843 $self->unlock_collection($username, $collect);
2844}
2845
2846sub remove_metadata
2847{
2848 my $self = shift @_;
2849
2850 my $where = $self->{'where'};
2851 if(!$where) {
2852 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2853 return;
2854 }
2855
2856 my $username = $self->{'username'};
2857 my $collect = $self->{'collect'};
2858 my $gsdl_cgi = $self->{'gsdl_cgi'};
2859
2860 if ($baseaction::authentication_enabled) {
2861 # Ensure the user is allowed to edit this collection
2862 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2863 }
2864
2865 # Make sure the collection isn't locked by someone else
2866 $self->lock_collection($username, $collect);
2867
2868 # check which directories need to be processed, specified in $where as
2869 # any combination of import|archives|index|live
2870 if($where =~ m/import/) {
2871 $self->_remove_import_metadata(@_);
2872 }
2873 if($where =~ m/archives/) {
2874 $self->_remove_archives_metadata(@_);
2875 }
2876 if($where =~ m/index/) {
2877 $self->_remove_index_metadata(@_);
2878 }
2879
2880 # Release the lock once it is done
2881 $self->unlock_collection($username, $collect);
2882}
2883
2884# the internal version, without authentication
2885sub _remove_index_metadata
2886{
2887 my $self = shift @_;
2888
2889 my $collect = $self->{'collect'};
2890 my $gsdl_cgi = $self->{'gsdl_cgi'};
2891# my $gsdlhome = $self->{'gsdlhome'};
2892 my $infodbtype = $self->{'infodbtype'};
2893
2894 # Obtain the collect dir
2895 my $site = $self->{'site'};
2896 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2897 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2898
2899 # look up additional args
2900 my $docid = $self->{'d'};
2901 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2902 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2903 }
2904 my $metaname = $self->{'metaname'};
2905 my $metapos = $self->{'metapos'};
2906 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2907
2908 # To people who know $collect_tail please add some comments
2909 # Obtain the path to the database
2910 my $collect_tail = $collect;
2911 $collect_tail =~ s/^.*[\/\\]//;
2912 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2913 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2914
2915 # Read the docid entry
2916 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2917
2918 # Check to make sure the key does exist
2919 if (!defined ($doc_rec->{$metaname})) {
2920 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2921 }
2922
2923 # Obtain the specified metadata pos
2924 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2925 if(!defined $metapos && !defined $metavalue) {
2926 $metapos = 0;
2927 }
2928
2929
2930 # consider check key is defined before deleting?
2931 # Loop through the metadata array and ignore the specified position
2932 my $filtered_metadata = [];
2933 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2934 for (my $i=0; $i<$num_metadata_vals; $i++) {
2935 my $metaval = shift(@{$doc_rec->{$metaname}});
2936
2937 if (!defined $metavalue && $i != $metapos) {
2938 push(@$filtered_metadata,$metaval);
2939 }
2940
2941 if(defined $metavalue && !($metavalue eq $metaval))
2942 {
2943 push(@$filtered_metadata,$metaval);
2944 }
2945 }
2946 $doc_rec->{$metaname} = $filtered_metadata;
2947
2948 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2949 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2950
2951 if ($status != 0) {
2952 my $mess = "Failed to set metadata key: $docid\n";
2953
2954 $mess .= "PATH: $ENV{'PATH'}\n";
2955 $mess .= "Exit status: $status\n";
2956 $mess .= "System Error Message: $!\n";
2957
2958 $gsdl_cgi->generate_error($mess);
2959 }
2960 else {
2961 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2962 $mess .= " $metaname";
2963 $mess .= "->[$metapos]" if (defined $metapos);
2964
2965 $gsdl_cgi->generate_ok_message($mess);
2966 }
2967
2968 #return $status; # in case calling functions have a use for this
2969}
2970
2971sub remove_index_metadata
2972{
2973 my $self = shift @_;
2974
2975 my $username = $self->{'username'};
2976 my $collect = $self->{'collect'};
2977 my $gsdl_cgi = $self->{'gsdl_cgi'};
2978# my $gsdlhome = $self->{'gsdlhome'};
2979
2980 if ($baseaction::authentication_enabled) {
2981 # Ensure the user is allowed to edit this collection
2982 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2983 }
2984
2985 # Obtain the collect dir
2986 my $site = $self->{'site'};
2987 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2988 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2989
2990 # Make sure the collection isn't locked by someone else
2991 $self->lock_collection($username, $collect);
2992
2993 $self->_remove_index_metadata(@_);
2994
2995 # Release the lock once it is done
2996 $self->unlock_collection($username, $collect);
2997}
2998
2999
3000# Was trying to reused the codes, but the functions need to be broken
3001# down more before they can be reused, otherwise there will be too
3002# much overhead and duplicate process...
3003sub insert_metadata
3004{
3005 my $self = shift @_;
3006
3007 my $username = $self->{'username'};
3008 my $collect = $self->{'collect'};
3009 my $gsdl_cgi = $self->{'gsdl_cgi'};
3010 my $gsdlhome = $self->{'gsdlhome'};
3011 my $infodbtype = $self->{'infodbtype'};
3012
3013 # If the import metadata and gdbm database have been updated, we
3014 # need to insert some notification to warn user that the the text
3015 # they see at the moment is not indexed and require a rebuild.
3016 my $rebuild_pending_macro = "_rebuildpendingmessage_";
3017
3018 if ($baseaction::authentication_enabled) {
3019 # Ensure the user is allowed to edit this collection
3020 $self->authenticate_user($username, $collect);
3021 }
3022
3023 # Obtain the collect and archive dir
3024 my $site = $self->{'site'};
3025 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3026 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3027 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3028
3029 # Make sure the collection isn't locked by someone else
3030 $self->lock_collection($username, $collect);
3031
3032 # Check additional args
3033 my $docid = $self->{'d'};
3034 if (!defined($docid)) {
3035 $gsdl_cgi->generate_error("No document id is specified: d=...");
3036 }
3037 my $metaname = $self->{'metaname'};
3038 if (!defined($metaname)) {
3039 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3040 }
3041 my $metavalue = $self->{'metavalue'};
3042 if (!defined($metavalue) || $metavalue eq "") {
3043 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3044 }
3045 # make "accumulate" the default (less destructive, as won't actually
3046 # delete any existing values)
3047 my $metamode = "accumulate";
3048
3049 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3050 my $metapos = undef;
3051 my $prevmetavalue = undef;
3052
3053 #=======================================================================#
3054 # set_import_metadata [START]
3055 #=======================================================================#
3056 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3057 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3058 my $metadata_xml_file;
3059 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3060 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3061
3062 # This now stores the full pathname
3063 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3064
3065 # figure out correct metadata.xml file [?]
3066 # Assuming the metadata.xml file is next to the source file
3067 # Note: This will not work if it is using the inherited metadata from the parent folder
3068 my ($import_tailname, $import_dirname)
3069 = File::Basename::fileparse($import_filename);
3070 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3071
3072 # Shane's escape characters
3073 $metavalue = pack "U0C*", unpack "C*", $metavalue;
3074 $metavalue =~ s/\,/&#44;/g;
3075 $metavalue =~ s/\:/&#58;/g;
3076 $metavalue =~ s/\|/&#124;/g;
3077 $metavalue =~ s/\(/&#40;/g;
3078 $metavalue =~ s/\)/&#41;/g;
3079 $metavalue =~ s/\[/&#91;/g;
3080 $metavalue =~ s/\\/&#92;/g;
3081 $metavalue =~ s/\]/&#93;/g;
3082 $metavalue =~ s/\{/&#123;/g;
3083 $metavalue =~ s/\}/&#125;/g;
3084 $metavalue =~ s/\"/&#34;/g;
3085 $metavalue =~ s/\`/&#96;/g;
3086 $metavalue =~ s/\n/_newline_/g;
3087
3088 # Edit the metadata.xml
3089 # Modified by Jeffrey from DL Consulting
3090 # Handle the case where there is one metadata.xml file for multiple FileSets
3091 # The XML filter needs to know whether it is in the right FileSet
3092 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3093 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3094 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3095 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3096 #=======================================================================#
3097 # set_import_metadata [END]
3098 #=======================================================================#
3099
3100
3101 #=======================================================================#
3102 # set_metadata (accumulate version) [START]
3103 #=======================================================================#
3104 # To people who know $collect_tail please add some comments
3105 # Obtain path to the database
3106 my $collect_tail = $collect;
3107 $collect_tail =~ s/^.*[\/\\]//;
3108 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3109 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3110
3111 # Read the docid entry
3112 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3113
3114 # Protect the quotes
3115 $metavalue =~ s/\"/\\\"/g;
3116
3117 # Adds the pending macro
3118 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3119
3120 # If the metadata doesn't exist, create a new one
3121 if (!defined($doc_rec->{$metaname})){
3122 $doc_rec->{$metaname} = [ $macro_metavalue ];
3123 }
3124 # Else, let's acculumate the values
3125 else {
3126 push(@{$doc_rec->{$metaname}},$macro_metavalue);
3127 }
3128
3129 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3130 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3131
3132 if ($status != 0) {
3133 # Catch error if gdbmget failed
3134 my $mess = "Failed to set metadata key: $docid\n";
3135
3136 $mess .= "PATH: $ENV{'PATH'}\n";
3137 $mess .= "Exit status: $status\n";
3138 $mess .= "System Error Message: $!\n";
3139
3140 $gsdl_cgi->generate_error($mess);
3141 }
3142 else {
3143 my $mess = "insert-metadata successful: Key[$docid]\n";
3144 $mess .= " [In metadata.xml] $metaname";
3145 $mess .= " = $metavalue\n";
3146 $mess .= " [In database] $metaname";
3147 $mess .= " = $macro_metavalue\n";
3148 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3149 $gsdl_cgi->generate_ok_message($mess);
3150 }
3151 #=======================================================================#
3152 # set_metadata (accumulate version) [END]
3153 #=======================================================================#
3154
3155 # Release the lock once it is done
3156 $self->unlock_collection($username, $collect);
3157}
3158
31591;
Note: See TracBrowser for help on using the repository browser.