source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27261

Last change on this file since 27261 was 27261, checked in by ak19, 11 years ago

Adding in 2 basic JSON examples for the metadata-server.pl help/usage string that can be pasted in the browser. Still to add help strings for other metadata-server actions that take a JSON parameter.

File size: 100.5 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ "metapos" ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-index-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
67
68 #SET METHODS
69 "set-live-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ ] },
72
73 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
74 'compulsory-args' => [ "metaname", "metavalue" ],
75 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
76
77 "set-index-metadata" => {
78 'compulsory-args' => [ "d", "metaname", "metavalue" ],
79 'optional-args' => [ "metapos", "metamode" ] },
80
81 "set-archives-metadata" => {
82 'compulsory-args' => [ "d", "metaname", "metavalue" ],
83 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
84
85 "set-import-metadata" => {
86 'compulsory-args' => [ "metaname", "metavalue" ],
87 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
88
89 #SET METHODS (ARRAY)
90 "set-metadata-array" => {
91 'compulsory-args' => [ "where", "json" ],
92 'optional-args' => [ ],
93 'help-string' => [
94 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
95
96 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
97
98# The same examples rewritten for when running the metadata-server.pl script from the commandline:
99
100# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
101
102# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
103
104 "set-archives-metadata-array" => {
105 'compulsory-args' => [ "json" ],
106 'optional-args' => [ ] },
107
108 "set-import-metadata-array" => {
109 'compulsory-args' => [ "json" ],
110 'optional-args' => [ ] },
111
112 "set-index-metadata-array" => {
113 'compulsory-args' => [ "json" ],
114 'optional-args' => [ ] },
115
116 "set-live-metadata-array" => {
117 'compulsory-args' => [ "json" ],
118 'optional-args' => [ ] },
119
120 #REMOVE METHODS
121 "remove-import-metadata" => {
122 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
123 'optional-args' => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
124
125 "remove-archives-metadata" => {
126 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
127 'optional-args' => [ "metapos", "metavalue" ] },
128
129 "remove-live-metadata" => {
130 'compulsory-args' => [ "d", "metaname" ],
131 'optional-args' => [ ] },
132
133 "remove-index-metadata" => {
134 'compulsory-args' => [ "d", "metaname" ],
135 'optional-args' => [ "metapos", "metavalue" ] },
136
137 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
138 'compulsory-args' => [ "d", "metaname" ],
139 'optional-args' => [ "where", "metapos", "metavalue" ] },
140
141 #INSERT METHODS
142 "insert-metadata" => {
143 'compulsory-args' => [ "d", "metaname", "metavalue" ],
144 'optional-args' => [ ] }
145};
146
147
148sub new
149{
150 my $class = shift (@_);
151 my ($gsdl_cgi,$iis6_mode) = @_;
152
153 # Treat metavalue specially. To transmit this through a GET request
154 # the Javascript side has url-encoded it, so here we need to decode
155 # it before proceeding
156
157 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
158 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
159 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
160
161 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
162 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
163
164 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
165
166 return bless $self, $class;
167}
168
169
170sub get_live_metadata
171{
172 my $self = shift @_;
173
174 my $username = $self->{'username'};
175 my $collect = $self->{'collect'};
176 my $gsdl_cgi = $self->{'gsdl_cgi'};
177 my $gsdlhome = $self->{'gsdlhome'};
178 my $infodbtype = $self->{'infodbtype'};
179
180 # live metadata gets/saves value scoped (prefixed) by the current usename
181 # so (for now) let's not bother to enforce authentication
182
183 # Obtain the collect dir
184 my $site = $self->{'site'};
185 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
186 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
187
188 # Make sure the collection isn't locked by someone else
189 $self->lock_collection($username, $collect);
190
191 # look up additional args
192 my $docid = $self->{'d'};
193 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
194 $gsdl_cgi->generate_error("No docid (d=...) specified.");
195 }
196
197 # Generate the dbkey
198 my $metaname = $self->{'metaname'};
199 my $dbkey = "$docid.$metaname";
200
201 # To people who know $collect_tail please add some comments
202 # Obtain path to the database
203 my $collect_tail = $collect;
204 $collect_tail =~ s/^.*[\/|\\]//;
205 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
206 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
207
208 # Obtain the content of the key
209 my $cmd = "gdbmget $infodb_file_path $dbkey";
210 if (open(GIN,"$cmd |") == 0) {
211 # Catch error if gdbmget failed
212 my $mess = "Failed to get metadata key: $metaname\n";
213 $mess .= "$!\n";
214
215 $gsdl_cgi->generate_error($mess);
216 }
217 else {
218 binmode(GIN,":utf8");
219 # Read everything in and concatenate them into $metavalue
220 my $metavalue = "";
221 my $line;
222 while (defined ($line=<GIN>)) {
223 $metavalue .= $line;
224 }
225 close(GIN);
226 chomp($metavalue); # Get rid off the tailing newlines
227 $gsdl_cgi->generate_ok_message("$metavalue");
228 }
229
230 # Release the lock once it is done
231 $self->unlock_collection($username, $collect);
232}
233
234# just calls the index version
235sub get_metadata
236{
237 my $self = shift @_;
238 $self->get_index_metadata(@_);
239}
240
241sub get_index_metadata
242{
243 my $self = shift @_;
244
245 my $username = $self->{'username'};
246 my $collect = $self->{'collect'};
247 my $gsdl_cgi = $self->{'gsdl_cgi'};
248 my $gsdlhome = $self->{'gsdlhome'};
249
250 # Authenticate user if it is enabled
251 if ($baseaction::authentication_enabled) {
252 # Ensure the user is allowed to edit this collection
253 &authenticate_user($gsdl_cgi, $username, $collect);
254 }
255
256 # Obtain the collect dir
257 my $site = $self->{'site'};
258 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
259 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
260
261 # Make sure the collection isn't locked by someone else
262 $self->lock_collection($username, $collect);
263
264 # look up additional args
265 my $docid = $self->{'d'};
266 my $metaname = $self->{'metaname'};
267 my $metapos = $self->{'metapos'};
268 my $infodbtype = $self->{'infodbtype'};
269
270 # To people who know $collect_tail please add some comments
271 # Obtain path to the database
272 my $collect_tail = $collect;
273 $collect_tail =~ s/^.*[\/\\]//;
274 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
275 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
276
277 # Read the docid entry
278 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
279
280 # Basically loop through and unescape_html the values
281 foreach my $k (keys %$doc_rec) {
282 my @escaped_v = ();
283 foreach my $v (@{$doc_rec->{$k}}) {
284 my $ev = &ghtml::unescape_html($v);
285 push(@escaped_v, $ev);
286 }
287 $doc_rec->{$k} = \@escaped_v;
288 }
289
290 # Obtain the specified metadata value
291 $metapos = 0 if (!defined $metapos);
292 my $metavalue = $doc_rec->{$metaname}->[$metapos];
293 $gsdl_cgi->generate_ok_message("$metavalue");
294
295 # Release the lock once it is done
296 $self->unlock_collection($username, $collect);
297}
298
299
300sub get_import_metadata
301{
302 my $self = shift @_;
303
304 my $username = $self->{'username'};
305 my $collect = $self->{'collect'};
306 my $gsdl_cgi = $self->{'gsdl_cgi'};
307 my $gsdlhome = $self->{'gsdlhome'};
308
309 # Authenticate user if it is enabled
310 if ($baseaction::authentication_enabled) {
311 # Ensure the user is allowed to edit this collection
312 &authenticate_user($gsdl_cgi, $username, $collect);
313 }
314
315 # Obtain the collect dir
316 my $site = $self->{'site'};
317 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
318 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
319
320 # Make sure the collection isn't locked by someone else
321 $self->lock_collection($username, $collect);
322
323 # look up additional args
324 my $docid = $self->{'d'};
325 my $metaname = $self->{'metaname'};
326 my $metapos = $self->{'metapos'};
327 $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
328
329 my $infodbtype = $self->{'infodbtype'};
330 if (!defined $docid)
331 {
332 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
333 }
334
335 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
336 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
337 my $metadata_xml_file;
338 my $import_filename = undef;
339
340
341 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
342 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
343 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
344
345 # This now stores the full pathname
346 $import_filename = $doc_rec->{'src-file'}->[0];
347
348 # figure out correct metadata.xml file [?]
349 # Assuming the metadata.xml file is next to the source file
350 # Note: This will not work if it is using the inherited metadata from the parent folder
351 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
352 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
353
354 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
355
356 # Release the lock once it is done
357 $self->unlock_collection($username, $collect);
358}
359
360sub get_metadata_from_metadata_xml
361{
362 my $self = shift @_;
363 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
364
365 my @rules =
366 (
367 _default => 'raw',
368 'Metadata' => \&gfmxml_metadata,
369 'FileName' => \&mxml_filename
370 );
371
372 my $parser = XML::Rules->new
373 (
374 rules => \@rules,
375 output_encoding => 'utf8'
376 );
377
378 my $xml_in = "";
379 if (!open(MIN,"<$metadata_xml_filename"))
380 {
381 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
382 }
383 else
384 {
385 # Read them in
386 my $line;
387 while (defined ($line=<MIN>)) {
388 $xml_in .= $line;
389 }
390 close(MIN);
391
392 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
393
394 if(defined $parser->{'pad'}->{'metavalue'})
395 {
396 return $parser->{'pad'}->{'metavalue'};
397 }
398 else
399 {
400 return "";
401 }
402 }
403}
404
405sub gfmxml_metadata
406{
407 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
408
409 # no subsection support yet in metadata.xml
410
411 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
412 {
413 if (!defined $parser->{'parameters'}->{'poscount'})
414 {
415 $parser->{'parameters'}->{'poscount'} = 0;
416 }
417 else
418 {
419 $parser->{'parameters'}->{'poscount'}++;
420 }
421
422 # gets the first value by default, since metapos defaults to 0
423 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
424 {
425 if($parser->{'parameters'}->{'metapos'} > 0) {
426 print STDERR "@@@@ WARNING: non-zero metapos.\n";
427 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at".$parser->{'parameters'}->{'metapos'}.".\n";
428 }
429 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
430 }
431 }
432}
433
434sub get_archives_metadata
435{
436 my $self = shift @_;
437
438 my $username = $self->{'username'};
439 my $collect = $self->{'collect'};
440 my $gsdl_cgi = $self->{'gsdl_cgi'};
441 my $gsdlhome = $self->{'gsdlhome'};
442 my $infodbtype = $self->{'infodbtype'};
443
444 # Authenticate user if it is enabled
445 if ($baseaction::authentication_enabled) {
446 # Ensure the user is allowed to edit this collection
447 &authenticate_user($gsdl_cgi, $username, $collect);
448 }
449
450 # Obtain the collect dir
451 my $site = $self->{'site'};
452 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
453
454 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
455
456 # Make sure the collection isn't locked by someone else
457 $self->lock_collection($username, $collect);
458
459 # look up additional args
460 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
461 $docid_secnum = "" if (!defined $docid_secnum);
462
463 my $metaname = $self->{'metaname'};
464 my $metapos = $self->{'metapos'};
465 $metapos = 0 if (!defined $metapos);
466
467 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
468 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
469
470 # This now stores the full pathname
471 my $doc_filename = $doc_rec->{'doc-file'}->[0];
472
473 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
474
475 # Release the lock once it is done
476 $self->unlock_collection($username, $collect);
477}
478
479sub get_metadata_from_archive_xml
480{
481 my $self = shift @_;
482 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
483
484 my @start_rules = ('Section' => \&dxml_start_section);
485
486 my @rules =
487 (
488 _default => 'raw',
489 'Metadata' => \&gfdxml_metadata
490 );
491
492 my $parser = XML::Rules->new
493 (
494 start_rules => \@start_rules,
495 rules => \@rules,
496 output_encoding => 'utf8'
497 );
498
499 my $xml_in = "";
500 if (!open(MIN,"<$doc_xml_filename"))
501 {
502 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
503 }
504 else
505 {
506 # Read them in
507 my $line;
508 while (defined ($line=<MIN>)) {
509 $xml_in .= $line;
510 }
511 close(MIN);
512
513 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
514
515 if(defined $parser->{'pad'}->{'metavalue'})
516 {
517 return $parser->{'pad'}->{'metavalue'};
518 }
519 else
520 {
521 return "";
522 }
523 }
524}
525
526sub gfdxml_metadata
527{
528 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
529
530 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
531 {
532 return;
533 }
534
535 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
536 {
537 if (!defined $parser->{'parameters'}->{'poscount'})
538 {
539 $parser->{'parameters'}->{'poscount'} = 0;
540 }
541 else
542 {
543 $parser->{'parameters'}->{'poscount'}++;
544 }
545 }
546
547 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
548 {
549 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
550 }
551}
552
553sub _set_live_metadata
554{
555 my $self = shift @_;
556
557 my $collect = $self->{'collect'};
558 my $gsdl_cgi = $self->{'gsdl_cgi'};
559 #my $gsdlhome = $self->{'gsdlhome'};
560 my $infodbtype = $self->{'infodbtype'};
561
562 # Obtain the collect dir
563 my $site = $self->{'site'};
564 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
565 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
566
567
568 # look up additional args
569 my $docid = $self->{'d'};
570 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
571 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
572 }
573 my $metavalue = $self->{'metavalue'};
574
575 # Generate the dbkey
576 my $metaname = $self->{'metaname'};
577 my $dbkey = "$docid.$metaname";
578
579 # To people who know $collect_tail please add some comments
580 # Obtain path to the database
581 my $collect_tail = $collect;
582 $collect_tail =~ s/^.*[\/\\]//;
583 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
584 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
585
586 # Set the new value
587 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
588 my $status = system($cmd);
589 if ($status != 0) {
590 # Catch error if gdbmget failed
591 my $mess = "Failed to set metadata key: $dbkey\n";
592
593 $mess .= "PATH: $ENV{'PATH'}\n";
594 $mess .= "cmd = $cmd\n";
595 $mess .= "Exit status: $status\n";
596 $mess .= "System Error Message: $!\n";
597
598 $gsdl_cgi->generate_error($mess);
599 }
600 else {
601 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
602 }
603
604 #return $status; # in case calling functions have any further use for this
605}
606
607sub set_live_metadata
608{
609 my $self = shift @_;
610
611 my $username = $self->{'username'};
612 my $collect = $self->{'collect'};
613 my $gsdl_cgi = $self->{'gsdl_cgi'};
614
615 if ($baseaction::authentication_enabled) {
616 # Ensure the user is allowed to edit this collection
617 &authenticate_user($gsdl_cgi, $username, $collect);
618 }
619
620 # Make sure the collection isn't locked by someone else
621 $self->lock_collection($username, $collect);
622
623 $self->_set_live_metadata(@_);
624
625 # Release the lock once it is done
626 $self->unlock_collection($username, $collect);
627}
628
629sub set_index_metadata_entry
630{
631 my $self = shift @_;
632 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
633
634 # To people who know $collect_tail please add some comments
635 # Obtain path to the database
636 my $collect_tail = $collect;
637 $collect_tail =~ s/^.*[\/\\]//;
638 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
639 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
640
641# print STDERR "**** infodb file path = $infodb_file_path\n";
642# print STDERR "***** infodb type = $infodbtype\n";
643
644 # Read the docid entry
645 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
646
647 # Set the metadata value
648 if (defined $metapos) {
649 # if metamode=accumulate AND metapos, warn user and then use metapos
650 if (defined $metamode && $metamode eq "accumulate") {
651 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
652 print STDERR "**** Proceeding by using metapos\n";
653 }
654 $doc_rec->{$metaname}->[$metapos] = $metavalue;
655 }
656 elsif (defined $metamode && $metamode eq "override") {
657 $doc_rec->{$metaname} = [ $metavalue ];
658 }
659 else { # default for index was to override, but because accumulate is less destructive,
660 # and because accumulate is the default for archives and import, that's the new default for index too
661 if(defined $doc_rec->{$metaname}) {
662 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
663 } else {
664 $doc_rec->{$metaname} = [ $metavalue ];
665 }
666 }
667
668 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
669
670 return $status;
671
672}
673
674sub _set_import_metadata
675{
676 my $self = shift @_;
677
678 my $collect = $self->{'collect'};
679 my $gsdl_cgi = $self->{'gsdl_cgi'};
680 my $infodbtype = $self->{'infodbtype'};
681# my $gsdlhome = $self->{'gsdlhome'};
682
683 # Obtain the collect and archive dir
684 my $site = $self->{'site'};
685 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
686 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
687 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
688
689 # look up additional args
690 # want either d= or f=
691 my $docid = $self->{'d'};
692 my ($docid_root,$docid_secnum);
693 if(defined $docid) {
694 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
695 # as yet no support for setting subsection metadata in metadata.xml
696 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
697 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
698 return;
699 }
700 }
701
702 my $import_file = $self->{'f'};
703 if ((!defined $docid) && (!defined $import_file)) {
704 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
705 }
706
707 # Get the parameters and set default mode to "accumulate"
708 my $metaname = $self->{'metaname'};
709 my $metavalue = $self->{'metavalue'};
710## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
711 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
712
713 my $metamode = $self->{'metamode'};
714 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
715 # make "accumulate" the default (less destructive, as won't actually
716 # delete any existing values)
717 $metamode = "accumulate";
718 }
719
720 # adding metapos and prevmetavalue support to import_metadata subroutines
721 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
722 my $prevmetavalue = $self->{'prevmetavalue'};
723
724 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
725 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
726
727 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
728 $mess .= " $metaname";
729 $mess .= " = $metavalue";
730 $mess .= " ($metamode)\n";
731
732 $gsdl_cgi->generate_ok_message($mess);
733
734 #return $status; # in case calling functions have any further use for this
735}
736
737# the version of set_index_meta that doesn't do authentication
738sub _set_archives_metadata
739{
740 my $self = shift @_;
741
742 my $collect = $self->{'collect'};
743 my $gsdl_cgi = $self->{'gsdl_cgi'};
744 my $infodbtype = $self->{'infodbtype'};
745
746 # Obtain the collect and archive dir
747 my $site = $self->{'site'};
748 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
749 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
750
751 # look up additional args
752 my $docid = $self->{'d'};
753 my $metaname = $self->{'metaname'};
754 my $metavalue = $self->{'metavalue'};
755 my $prevmetavalue = $self->{'prevmetavalue'};
756
757 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
758 # Don't append "|| undef", since if metapos=0 it will then be set to undef
759
760 my $metamode = $self->{'metamode'};
761 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
762 # make "accumulate" the default (less destructive, as won't actually
763 # delete any existing values)
764 $metamode = "accumulate";
765 }
766
767 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
768 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
769
770 if ($status == 0) {
771 my $mess = "set-archives-metadata successful: Key[$docid]\n";
772 $mess .= " $metaname";
773 $mess .= "->[$metapos]" if (defined $metapos);
774 $mess .= " = $metavalue";
775 $mess .= " ($metamode)\n";
776
777 $gsdl_cgi->generate_ok_message($mess);
778 }
779 else {
780 my $mess .= "Failed to set archives metadata key: $docid\n";
781 $mess .= "Exit status: $status\n";
782 if(defined $self->{'error_msg'}) {
783 $mess .= "Error Message: $self->{'error_msg'}\n";
784 } else {
785 $mess .= "System Error Message: $!\n";
786 }
787 $mess .= "-" x 20 . "\n";
788
789 $gsdl_cgi->generate_error($mess);
790 }
791
792 #return $status; # in case calling functions have any further use for this
793}
794
795
796# the version of set_index_meta that doesn't do authentication
797sub _set_index_metadata
798{
799 my $self = shift @_;
800
801 my $collect = $self->{'collect'};
802 my $gsdl_cgi = $self->{'gsdl_cgi'};
803
804 my $site = $self->{'site'};
805 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
806
807 # look up additional args
808 my $docid = $self->{'d'};
809 my $metaname = $self->{'metaname'};
810 my $metapos = $self->{'metapos'}; # undef has meaning
811 my $metavalue = $self->{'metavalue'};
812 my $infodbtype = $self->{'infodbtype'};
813 my $metamode = $self->{'metamode'};
814
815 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
816
817 if ($status != 0) {
818 # Catch error if set infodb entry failed
819 my $mess = "Failed to set metadata key: $docid\n";
820
821 $mess .= "PATH: $ENV{'PATH'}\n";
822 $mess .= "Exit status: $status\n";
823 $mess .= "System Error Message: $!\n";
824
825 $gsdl_cgi->generate_error($mess);
826 }
827 else {
828 my $mess = "set-index-metadata successful: Key[$docid]\n";
829 $mess .= " $metaname";
830 $mess .= "->[$metapos]" if (defined $metapos);
831 $mess .= " = $metavalue\n";
832
833 $gsdl_cgi->generate_ok_message($mess);
834 }
835
836 #return $status; # in case calling functions have any further use for this
837}
838
839sub set_index_metadata
840{
841 my $self = shift @_;
842
843 my $username = $self->{'username'};
844 my $collect = $self->{'collect'};
845 my $gsdl_cgi = $self->{'gsdl_cgi'};
846 #my $gsdlhome = $self->{'gsdlhome'};
847
848 if ($baseaction::authentication_enabled) {
849 # Ensure the user is allowed to edit this collection
850 &authenticate_user($gsdl_cgi, $username, $collect);
851 }
852
853 my $site = $self->{'site'};
854 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
855
856 $gsdl_cgi->checked_chdir($collect_dir);
857
858 # Obtain the collect dir
859 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
860
861 # Make sure the collection isn't locked by someone else
862 $self->lock_collection($username, $collect);
863
864 $self->_set_index_metadata(@_);
865
866 # Release the lock once it is done
867 $self->unlock_collection($username, $collect);
868}
869
870# call this to set the metadata for a combination of dirs archives, import or index, or live
871# if none specified, defaults to index which was the original behaviour of set_metadata.
872sub set_metadata
873{
874 my $self = shift @_;
875
876 # Testing that not defining a variable, setting it to "" or to " " all return false
877 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
878
879 my $where = $self->{'where'};
880 if(!$where) {
881 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
882 return;
883 }
884
885 # authenticate and lock collection once, even if processing multiple dirs
886 my $username = $self->{'username'};
887 my $collect = $self->{'collect'};
888 my $gsdl_cgi = $self->{'gsdl_cgi'};
889
890 if ($baseaction::authentication_enabled) {
891 # Ensure the user is allowed to edit this collection
892 #&authenticate_user($gsdl_cgi, $username, $collect);
893 $self->authenticate_user($username, $collect);
894 }
895
896 if($where =~ m/index/) {
897 my $site = $self->{'site'};
898 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
899 $gsdl_cgi->checked_chdir($collect_dir);
900 }
901
902 # Make sure the collection isn't locked by someone else
903 $self->lock_collection($username, $collect);
904
905
906 # now at last can set the metadata. $where can specify multiple
907 # $where is of the form: import|archives|index, or a subset thereof
908
909 #my @whichdirs = split('\|', $where);
910
911 # just check whether $where contains import/archives/index/live in turn, and
912 # for each case, process it accordingly
913 if($where =~ m/import/) {
914 $self->_set_import_metadata(@_);
915 }
916
917 if($where =~ m/archives/) {
918
919 # look up docID arg which is optional to set_metadata because it's optional
920 # to set_import, but which is compulsory to set_archives_metadata
921 my $docid = $self->{'d'};
922 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
923 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
924 }
925 # we have a docid, so can set archives meta
926 $self->_set_archives_metadata(@_);
927 }
928
929 if($where =~ m/index/) {
930
931 # look up docID arg which is optional to set_metadata because it's optional
932 # to set_import, but which is compulsory to set_archives_metadata
933 my $docid = $self->{'d'};
934 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
935 $gsdl_cgi->generate_error("No docid (d=...) specified.");
936 }
937 # we have a docid, so can set index meta
938 $self->_set_index_metadata(@_);
939 }
940
941 if($where =~ m/live/) {
942 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
943 }
944
945 # Release the lock once it is done
946 $self->unlock_collection($username, $collect);
947}
948
949sub set_metadata_array
950{
951 my $self = shift @_;
952
953 my $where = $self->{'where'};
954 if(!$where) {
955 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
956 return;
957 }
958
959 my $username = $self->{'username'};
960 my $collect = $self->{'collect'};
961 my $gsdl_cgi = $self->{'gsdl_cgi'};
962
963 if ($baseaction::authentication_enabled) {
964 # Ensure the user is allowed to edit this collection
965 &authenticate_user($gsdl_cgi, $username, $collect);
966 }
967
968 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
969 # But including the stmt during this code reorganisation to preserve as-is what used to happen
970 my $site = $self->{'site'};
971 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
972 $gsdl_cgi->checked_chdir($collect_dir);
973
974 # Make sure the collection isn't locked by someone else
975 $self->lock_collection($username, $collect);
976
977 if($where =~ m/import/) {
978 $self->_set_import_metadata_array(@_);
979 }
980 if($where =~ m/archives/) {
981 $self->_set_archives_metadata_array(@_);
982 }
983 if($where =~ m/index/) {
984 $self->_set_index_metadata_array(@_);
985 }
986 if($where =~ m/live/) {
987 $self->_set_live_metadata_array(@_);
988 }
989
990 # Release the lock once it is done
991 $self->unlock_collection($username, $collect);
992}
993
994sub _set_index_metadata_array
995{
996 my $self = shift @_;
997
998 my $collect = $self->{'collect'};
999 my $gsdl_cgi = $self->{'gsdl_cgi'};
1000 my $site = $self->{'site'};
1001 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1002
1003
1004 # look up additional args
1005
1006 my $infodbtype = $self->{'infodbtype'};
1007
1008 my $json_str = $self->{'json'};
1009 my $doc_array = decode_json $json_str;
1010
1011
1012 my $global_status = 0;
1013 my $global_mess = "";
1014
1015 my @all_docids = ();
1016
1017 foreach my $doc_array_rec ( @$doc_array ) {
1018
1019 my $status = -1;
1020 my $docid = $doc_array_rec->{'docid'};
1021
1022 push(@all_docids,$docid);
1023
1024 my $metaname = $doc_array_rec->{'metaname'};
1025 if(defined $metaname) {
1026 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
1027 my $metavalue = $doc_array_rec->{'metavalue'};
1028 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1029
1030 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1031 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1032 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1033
1034 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1035 $metaname = $metatable_rec->{'metaname'};
1036 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1037 my $metapos = undef;
1038 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1039
1040 foreach my $metavalue ( @$metavals ) { # metavals is an array
1041 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1042 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1043 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1044 # metavalue for this metaname will be lost
1045 $metamode = "accumulate";
1046 }
1047 }
1048 }
1049 }
1050
1051 if ($status != 0) {
1052 # Catch error if set infodb entry failed
1053 $global_status = $status;
1054 $global_mess .= "Failed to set metadata key: $docid\n";
1055 $global_mess .= "Exit status: $status\n";
1056 $global_mess .= "System Error Message: $!\n";
1057 $global_mess .= "-" x 20;
1058 }
1059 }
1060
1061 if ($global_status != 0) {
1062 $global_mess .= "PATH: $ENV{'PATH'}\n";
1063 $gsdl_cgi->generate_error($global_mess);
1064 }
1065 else {
1066 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1067 $gsdl_cgi->generate_ok_message($mess);
1068 }
1069}
1070
1071sub set_index_metadata_array
1072{
1073 my $self = shift @_;
1074
1075 my $username = $self->{'username'};
1076 my $collect = $self->{'collect'};
1077 my $gsdl_cgi = $self->{'gsdl_cgi'};
1078# my $gsdlhome = $self->{'gsdlhome'};
1079
1080 if ($baseaction::authentication_enabled) {
1081 # Ensure the user is allowed to edit this collection
1082 &authenticate_user($gsdl_cgi, $username, $collect);
1083 }
1084
1085 my $site = $self->{'site'};
1086 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1087
1088 $gsdl_cgi->checked_chdir($collect_dir);
1089
1090 # Obtain the collect dir
1091 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1092
1093 # Make sure the collection isn't locked by someone else
1094 $self->lock_collection($username, $collect);
1095
1096 $self->_set_index_metadata_array(@_);
1097
1098 # Release the lock once it is done
1099 $self->unlock_collection($username, $collect);
1100}
1101
1102# experimental, newly added in and untested
1103sub _set_live_metadata_array
1104{
1105 my $self = shift @_;
1106
1107 my $collect = $self->{'collect'};
1108 my $gsdl_cgi = $self->{'gsdl_cgi'};
1109
1110 my $site = $self->{'site'};
1111 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1112
1113
1114 # look up additional args
1115 my $infodbtype = $self->{'infodbtype'};
1116 # To people who know $collect_tail please add some comments
1117 # Obtain path to the database
1118 my $collect_tail = $collect;
1119 $collect_tail =~ s/^.*[\/\\]//;
1120 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1121 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1122
1123
1124 my $json_str = $self->{'json'};
1125 my $doc_array = decode_json $json_str;
1126
1127
1128 my $global_status = 0;
1129 my $global_mess = "";
1130
1131 my @all_docids = ();
1132
1133
1134 foreach my $doc_array_rec ( @$doc_array ) {
1135
1136 my $status = -1;
1137 my $docid = $doc_array_rec->{'docid'};
1138
1139 push(@all_docids,$docid);
1140
1141 my $metaname = $doc_array_rec->{'metaname'};
1142 if(defined $metaname) {
1143 my $dbkey = "$docid.$metaname";
1144 my $metavalue = $doc_array_rec->{'metavalue'};
1145
1146 # Set the new value
1147 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1148 $status = system($cmd);
1149
1150 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1151 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1152 foreach my $metatable_rec ( @$metatable ) {
1153 $metaname = $metatable_rec->{'metaname'};
1154 my $dbkey = "$docid.$metaname";
1155
1156 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1157 foreach my $metavalue ( @$metavals ) {
1158 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1159 $status = system($cmd);
1160 }
1161 }
1162
1163 }
1164
1165 if ($status != 0) {
1166 # Catch error if gdbmget failed
1167 $global_status = $status;
1168 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1169 $global_mess .= "Exit status: $status\n";
1170 $global_mess .= "System Error Message: $!\n";
1171 $global_mess .= "-" x 20;
1172 }
1173 }
1174
1175 if ($global_status != 0) {
1176 $global_mess .= "PATH: $ENV{'PATH'}\n";
1177 $gsdl_cgi->generate_error($global_mess);
1178 }
1179 else {
1180 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1181 $gsdl_cgi->generate_ok_message($mess);
1182 }
1183}
1184
1185sub set_live_metadata_array
1186{
1187 my $self = shift @_;
1188
1189 my $username = $self->{'username'};
1190 my $collect = $self->{'collect'};
1191 my $gsdl_cgi = $self->{'gsdl_cgi'};
1192
1193 if ($baseaction::authentication_enabled) {
1194 # Ensure the user is allowed to edit this collection
1195 &authenticate_user($gsdl_cgi, $username, $collect);
1196 }
1197
1198 my $site = $self->{'site'};
1199 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1200
1201 $gsdl_cgi->checked_chdir($collect_dir);
1202
1203 # Make sure the collection isn't locked by someone else
1204 $self->lock_collection($username, $collect);
1205
1206 $self->_set_live_metadata_array(@_);
1207
1208 # Release the lock once it is done
1209 $self->unlock_collection($username, $collect);
1210}
1211
1212
1213sub dxml_metadata
1214{
1215 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1216 my $metaname = $parser->{'parameters'}->{'metaname'};
1217 my $metamode = $parser->{'parameters'}->{'metamode'};
1218
1219 print STDERR "**** Processing closing </Metadata> tag\n";
1220
1221 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1222
1223 # Find the right metadata tag and checks if we are going to
1224 # override it
1225 #
1226 # Note: This over writes the first metadata block it
1227 # encountered. If there are multiple Sections in the doc.xml, it
1228 # might not behave as you would expect
1229
1230 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1231## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1232## print STDERR "**** metamode = $metamode\n";
1233
1234 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1235 {
1236 my $name_attr = $attrHash->{'name'};
1237 if (($name_attr eq $metaname) && ($metamode eq "override"))
1238 {
1239 if (!defined $parser->{'parameters'}->{'poscount'})
1240 {
1241 $parser->{'parameters'}->{'poscount'} = 0;
1242 }
1243 else
1244 {
1245 $parser->{'parameters'}->{'poscount'}++;
1246 }
1247
1248 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1249 {
1250 ##print STDERR "#### got match!!\n";
1251 # Get the value and override the current value
1252 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1253 $attrHash->{'_content'} = $metavalue;
1254
1255 # Don't want it to wipe out any other pieces of metadata
1256 $parser->{'parameters'}->{'metamode'} = "done";
1257 }
1258 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1259 {
1260 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1261 $attrHash->{'_content'} = $metavalue;
1262 $parser->{'parameters'}->{'metamode'} = "done";
1263 }
1264 }
1265 }
1266
1267 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1268 return [$tagname => $attrHash];
1269}
1270
1271# This method exists purely for catching invalid section numbers that the client
1272# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1273# we've seen all the Sections in the doc.xml, and none of their section nums matched
1274# if the metamode has not been set to 'done' by then.
1275sub dxml_archive
1276{
1277 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1278 my $metamode = $parser->{'parameters'}->{'metamode'};
1279
1280 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1281 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1282
1283# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1284
1285 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1286 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1287
1288 my $metaname = $parser->{'parameters'}->{'metaname'};
1289 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1290
1291 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1292 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1293 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1294 print STDERR "\tin section $opt_doc_secnum.\n";
1295 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1296 }
1297
1298 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1299 return [$tagname => $attrHash];
1300}
1301
1302sub dxml_description
1303{
1304 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1305 my $metamode = $parser->{'parameters'}->{'metamode'};
1306
1307 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1308 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1309
1310 print STDERR "**** Processing a closing </Description> tag \n";
1311# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1312
1313 # Accumulate the metadata
1314
1315 # We'll be accumulating metadata at this point if we haven't found and therefore
1316 # haven't processed the metadata yet.
1317 # For subsections, this means that if we're at a matching subsection, but haven't
1318 # found the correct metaname to override in that subsection, we accumulate it as new
1319 # meta in the subsection by adding it to the current description.
1320 # If there's no subsection info for the metadata, it will accumulate at the top level
1321 # section description if we hadn't found a matching metaname to override at this point.
1322
1323 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1324 # at the toplevel section. In the latter case, it means we want to process meta in the
1325 # toplevel section. So the eq check between the values below will work in all cases.
1326
1327 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1328 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1329 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1330
1331 if ($curr_secnum eq $opt_doc_secnum
1332 && ($metamode eq "accumulate" || $metamode eq "override")) {
1333 if ($metamode eq "override") {
1334 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1335 }
1336
1337 # If we get to here and metamode is override, this means there
1338 # was no existing value to overide => treat as an append operation
1339
1340 # Tack a new metadata tag on to the end of the <Metadata>+ block
1341 my $metaname = $parser->{'parameters'}->{'metaname'};
1342 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1343
1344 my $metadata_attr = {
1345 '_content' => $metavalue,
1346 'name' => $metaname,
1347 'mode' => "accumulate"
1348 };
1349
1350 my $append_metadata = [ "Metadata" => $metadata_attr ];
1351 my $description_content = $attrHash->{'_content'};
1352
1353 print "Appending metadata to doc.xml\n";
1354
1355 if (ref($description_content)) {
1356 # got some existing interesting nested content
1357 push(@$description_content, " ", $append_metadata ,"\n ");
1358 }
1359 else {
1360 #description_content is most likely a string such as "\n"
1361 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1362 }
1363
1364 $parser->{'parameters'}->{'metamode'} = "done";
1365 }
1366 else {
1367 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1368## print STDERR "**** NOT ACCUMULATE?!? \n";
1369 }
1370
1371 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1372 return [$tagname => $attrHash];
1373}
1374
1375
1376sub dxml_start_section
1377{
1378 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1379
1380 my $new_depth = scalar(@$contextArray);
1381
1382 print STDERR "**** START SECTION \n";
1383
1384 if ($new_depth == 1) {
1385 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1386 $parser->{'parameters'}->{'curr_section_num'} = "";
1387 }
1388
1389 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1390 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1391
1392 my $new_secnum;
1393
1394 if ($new_depth > $old_depth) {
1395 # child subsection
1396 $new_secnum = "$old_secnum.1";
1397 }
1398 elsif ($new_depth == $old_depth) {
1399 # sibling section => increase it's value by 1
1400 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1401 $tail_num++;
1402 $new_secnum = $old_secnum;
1403 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1404 }
1405 else {
1406 # back up to parent section => lopp off tail
1407 $new_secnum = $old_secnum;
1408 $new_secnum =~ s/\.\d+$//;
1409 }
1410
1411 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1412 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1413
1414 1;
1415}
1416
1417sub edit_xml_file
1418{
1419 my $self = shift @_;
1420 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1421
1422 # use XML::Rules to add it in (read in and out again)
1423 my $parser = XML::Rules->new(start_rules => $start_rules,
1424 rules => $rules,
1425 style => 'filter',
1426 output_encoding => 'utf8' );
1427
1428 my $xml_in = "";
1429 if (!open(MIN,"<$filename")) {
1430 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1431 }
1432 else {
1433 # Read all the text in
1434 my $line;
1435 while (defined ($line=<MIN>)) {
1436 $xml_in .= $line;
1437 }
1438 close(MIN);
1439
1440 my $MOUT;
1441 if (!open($MOUT,">$filename")) {
1442 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1443 }
1444 else {
1445 # Matched lines will get handled by the call backs
1446## my $xml_out = "";
1447
1448 binmode($MOUT,":utf8");
1449 $parser->filter($xml_in,$MOUT, $options);
1450
1451# binmode(MOUT,":utf8");
1452# print MOUT $xml_out;
1453 close($MOUT);
1454 }
1455 }
1456
1457 # copy across any custom error information that was stored during parsing
1458 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1459}
1460
1461sub edit_doc_xml
1462{
1463 my $self = shift @_;
1464 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1465
1466 my $info_mess = <<RAWEND;
1467****************************
1468 edit_doc_xml()
1469****************************
1470doc_xml_filename = $doc_xml_filename
1471metaname = $metaname
1472metavalue = $metavalue
1473metapos = $metapos
1474metamode = $metamode
1475opt_secnum = $opt_secnum
1476prevmetavalue = $prevmetavalue
1477****************************
1478RAWEND
1479
1480 $gsdl_cgi->generate_message($info_mess);
1481
1482 # To monitor which section/subsection number we are in
1483 my @start_rules =
1484 ( 'Section' => \&dxml_start_section );
1485
1486 # use XML::Rules to add it in (read in and out again)
1487 # Set the call back functions
1488 my @rules =
1489 ( _default => 'raw',
1490 'Metadata' => \&dxml_metadata,
1491 'Description' => \&dxml_description,
1492 'Archive' => \&dxml_archive); # just for catching errors at end
1493
1494 # Sets the parameters
1495 my $options = { 'metaname' => $metaname,
1496 'metapos' => $metapos,
1497 'metavalue' => $metavalue,
1498 'metamode' => $metamode,
1499 'prevmetavalue' => $prevmetavalue };
1500
1501 if (defined $opt_secnum) {
1502 $options->{'secnum'} = $opt_secnum;
1503 }
1504
1505 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1506}
1507
1508sub set_archives_metadata_entry
1509{
1510 my $self = shift @_;
1511 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1512
1513 my $info_mess = <<RAWEND;
1514****************************
1515 set_archives_metadata_entry()
1516****************************
1517archive_dir = $archive_dir
1518collect_dir = $collect_dir
1519collect = $collect
1520infodbtype = $infodbtype
1521docid = $docid
1522metaname = $metaname
1523metapos = $metapos
1524metavalue = $metavalue
1525metamode = $metamode
1526prevmetavalue = $prevmetavalue
1527****************************
1528RAWEND
1529
1530 $gsdl_cgi->generate_message($info_mess);
1531
1532 # Obtain the doc.xml path for the specified docID
1533 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1534
1535 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1536 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1537 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1538
1539 # The $doc_xml_file is relative to the archives, and now let's get the full path
1540 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1541 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1542
1543 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
1544 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1545 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1546 if(!defined $metapos && $metamode eq "override") {
1547 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, $metapos, undef, $docid_secnum, $metamode);
1548 }
1549
1550 # Edit the doc.xml file with the specified metadata name, value and position.
1551 # TODO: there is a potential problem here as this edit_doc_xml function
1552 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1553 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1554
1555 # dxml_metadata method ignores metapos if metamode anything other than override
1556 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1557 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1558
1559 # return 0; # return 0 for now to indicate no error
1560 return (defined $self->{'error_msg'}) ? 1 : 0;
1561}
1562
1563
1564sub set_archives_metadata
1565{
1566 my $self = shift @_;
1567
1568 my $username = $self->{'username'};
1569 my $collect = $self->{'collect'};
1570 my $gsdl_cgi = $self->{'gsdl_cgi'};
1571
1572 if ($baseaction::authentication_enabled) {
1573 # Ensure the user is allowed to edit this collection
1574 $self->authenticate_user($username, $collect);
1575 }
1576
1577 # Make sure the collection isn't locked by someone else
1578 $self->lock_collection($username, $collect);
1579
1580 $self->_set_archives_metadata(@_);
1581
1582 # Release the lock once it is done
1583 $self->unlock_collection($username, $collect);
1584}
1585
1586sub _set_archives_metadata_array
1587{
1588 my $self = shift @_;
1589
1590 my $collect = $self->{'collect'};
1591 my $gsdl_cgi = $self->{'gsdl_cgi'};
1592 my $site = $self->{'site'};
1593 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1594
1595 # look up additional args
1596
1597 my $infodbtype = $self->{'infodbtype'};
1598
1599 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1600
1601 my $json_str = $self->{'json'};
1602 my $doc_array = decode_json $json_str;
1603
1604
1605 my $global_status = 0;
1606 my $global_mess = "";
1607
1608 my @all_docids = ();
1609
1610 foreach my $doc_array_rec ( @$doc_array ) {
1611 my $status = -1;
1612 my $docid = $doc_array_rec->{'docid'};
1613
1614 push(@all_docids,$docid);
1615
1616 my $metaname = $doc_array_rec->{'metaname'};
1617 if(defined $metaname) {
1618
1619 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1620
1621 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1622 my $metavalue = $doc_array_rec->{'metavalue'};
1623 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1624
1625
1626 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1627 # make "accumulate" the default (less destructive, as it won't actually
1628 # delete any existing values)
1629 $metamode = "accumulate";
1630 }
1631
1632 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1633 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1634 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1635 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1636
1637 foreach my $metatable_rec ( @$metatable ) {
1638 $metaname = $metatable_rec->{'metaname'};
1639 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1640 my $metapos = undef;
1641 my $prevmetavalue = undef;
1642 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1643
1644 foreach my $metavalue ( @$metavals ) {
1645 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1646 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1647
1648 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1649 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1650 # metavalue for this metaname will be lost
1651 $metamode = "accumulate";
1652 }
1653 }
1654 }
1655 }
1656
1657 if ($status != 0) {
1658 # Catch error if set infodb entry failed
1659 $global_status = $status;
1660 $global_mess .= "Failed to set metadata key: $docid\n";
1661 $global_mess .= "Exit status: $status\n";
1662 $global_mess .= "System Error Message: $!\n";
1663 $global_mess .= "-" x 20 . "\n";
1664 }
1665 }
1666
1667 if ($global_status != 0) {
1668 $global_mess .= "PATH: $ENV{'PATH'}\n";
1669 $gsdl_cgi->generate_error($global_mess);
1670 }
1671 else {
1672 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1673 $gsdl_cgi->generate_ok_message($mess);
1674 }
1675}
1676
1677sub set_archives_metadata_array
1678{
1679 my $self = shift @_;
1680
1681 my $username = $self->{'username'};
1682 my $collect = $self->{'collect'};
1683 my $gsdl_cgi = $self->{'gsdl_cgi'};
1684# my $gsdlhome = $self->{'gsdlhome'};
1685
1686 if ($baseaction::authentication_enabled) {
1687 # Ensure the user is allowed to edit this collection
1688 &authenticate_user($gsdl_cgi, $username, $collect);
1689 }
1690
1691 my $site = $self->{'site'};
1692 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1693
1694 $gsdl_cgi->checked_chdir($collect_dir);
1695
1696 # Obtain the collect dir
1697 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1698
1699 # Make sure the collection isn't locked by someone else
1700 $self->lock_collection($username, $collect);
1701
1702 $self->_set_archives_metadata_array(@_);
1703
1704 # Release the lock once it is done
1705 $self->unlock_collection($username, $collect);
1706}
1707
1708sub _remove_archives_metadata
1709{
1710 my $self = shift @_;
1711
1712 my $collect = $self->{'collect'};
1713 my $gsdl_cgi = $self->{'gsdl_cgi'};
1714# my $gsdlhome = $self->{'gsdlhome'};
1715 my $infodbtype = $self->{'infodbtype'};
1716
1717 my $site = $self->{'site'};
1718
1719 # Obtain the collect and archive dir
1720 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1721
1722 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1723
1724 # look up additional args
1725 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1726
1727 my $metaname = $self->{'metaname'};
1728 my $metapos = $self->{'metapos'};
1729
1730 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1731
1732 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1733 # default to deleting the first metavalue for the given metaname
1734 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1735 # seem to get deleted in one single remove_archives_meta action invocation.
1736 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1737 # deleted, both get deleted.
1738 if(!defined $metapos && !defined $metavalue) {
1739 $metapos = 0;
1740 }
1741
1742 my $metamode = $self->{'metamode'} || undef;
1743
1744 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1745 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1746
1747 # This now stores the full pathname
1748 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1749
1750 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1751# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1752
1753 if ($status == 0)
1754 {
1755 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1756 $mess .= " $metaname";
1757 $mess .= "->[$metapos]" if (defined $metapos);
1758
1759 $gsdl_cgi->generate_ok_message($mess);
1760 }
1761 else
1762 {
1763 my $mess .= "Failed to remove archives metadata key: $docid\n";
1764 $mess .= "Exit status: $status\n";
1765 $mess .= "System Error Message: $!\n";
1766 $mess .= "-" x 20 . "\n";
1767
1768 $gsdl_cgi->generate_error($mess);
1769 }
1770
1771 #return $status; # in case calling functions have a use for this
1772}
1773
1774sub remove_archives_metadata
1775{
1776 my $self = shift @_;
1777
1778 my $username = $self->{'username'};
1779 my $collect = $self->{'collect'};
1780 my $gsdl_cgi = $self->{'gsdl_cgi'};
1781
1782 if ($baseaction::authentication_enabled)
1783 {
1784 # Ensure the user is allowed to edit this collection
1785 &authenticate_user($gsdl_cgi, $username, $collect);
1786 }
1787
1788 # Make sure the collection isn't locked by someone else
1789 $self->lock_collection($username, $collect);
1790
1791 $self->_remove_archives_metadata(@_);
1792
1793 # Release the lock once it is done
1794 $self->unlock_collection($username, $collect);
1795}
1796
1797sub remove_from_doc_xml
1798{
1799 my $self = shift @_;
1800 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1801
1802 my @start_rules = ('Section' => \&dxml_start_section);
1803
1804 # Set the call-back functions for the metadata tags
1805 my @rules =
1806 (
1807 _default => 'raw',
1808 'Metadata' => \&rfdxml_metadata
1809 );
1810
1811 my $parser = XML::Rules->new
1812 (
1813 start_rules => \@start_rules,
1814 rules => \@rules,
1815 style => 'filter',
1816 output_encoding => 'utf8',
1817# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1818 stripspaces => 2|0|0 # ineffectual
1819 );
1820
1821 my $status = 0;
1822 my $xml_in = "";
1823 if (!open(MIN,"<$doc_xml_filename"))
1824 {
1825 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1826 $status = 1;
1827 }
1828 else
1829 {
1830 # Read them in
1831 my $line;
1832 while (defined ($line=<MIN>)) {
1833 $xml_in .= $line;
1834 }
1835 close(MIN);
1836
1837 # Filter with the call-back functions
1838 my $xml_out = "";
1839
1840 my $MOUT;
1841 if (!open($MOUT,">$doc_xml_filename")) {
1842 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1843 $status = 1;
1844 }
1845 else {
1846 binmode($MOUT,":utf8");
1847 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1848 close($MOUT);
1849 }
1850 }
1851 return $status;
1852}
1853
1854sub rfdxml_metadata
1855{
1856 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1857
1858 # For comparisons, toplevel section is indicated by ""
1859 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1860 my $secid = $parser->{'parameters'}->{'secid'} || "";
1861
1862 if (!($secid eq $curr_sec_num))
1863 {
1864 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1865 return [$tagname => $attrHash];
1866 }
1867
1868 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1869 {
1870 if (!defined $parser->{'parameters'}->{'poscount'})
1871 {
1872 $parser->{'parameters'}->{'poscount'} = 0;
1873 }
1874 else
1875 {
1876 $parser->{'parameters'}->{'poscount'}++;
1877 }
1878
1879 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
1880 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
1881 return [];
1882 }
1883 }
1884
1885 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1886 {
1887 return [];
1888 }
1889
1890 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1891 {
1892 return [];
1893 }
1894
1895 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1896 return [$tagname => $attrHash];
1897}
1898
1899sub mxml_metadata
1900{
1901 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1902 my $metaname = $parser->{'parameters'}->{'metaname'};
1903 my $metamode = $parser->{'parameters'}->{'metamode'};
1904
1905 # Report error if we don't see FileName tag before this
1906 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1907
1908 # Don't do anything if we are not in the right FileSet
1909 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1910 if ($file_regexp =~ /\.\*/) {
1911 # Only interested in a file_regexp if it specifies precisely one
1912 # file.
1913 # So, skip anything with a .* in it as it is too general
1914## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
1915 return [$tagname => $attrHash];
1916 }
1917 my $src_file = $parser->{'parameters'}->{'src_file'};
1918 if (!($src_file =~ /$file_regexp/)) {
1919 return [$tagname => $attrHash];
1920 }
1921## print STDERR "*** mxl metamode = $metamode\n";
1922
1923 # Find the right metadata tag and checks if we are going to override it
1924 my $name_attr = $attrHash->{'name'};
1925 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1926
1927 # now metadata.xml functions need to keep track of metapos
1928 if (!defined $parser->{'parameters'}->{'poscount'})
1929 {
1930 $parser->{'parameters'}->{'poscount'} = 0;
1931 }
1932 else
1933 {
1934 $parser->{'parameters'}->{'poscount'}++;
1935 }
1936
1937 # If either the metapos or prevmetavalue is set,
1938 # get the value and override the current value
1939 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1940
1941 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1942 {
1943 $attrHash->{'_content'} = $metavalue;
1944
1945 ## print STDERR "**** overriding metadata.xml\n";
1946
1947 # Don't want it to wipe out any other pieces of metadata
1948 $parser->{'parameters'}->{'metamode'} = "done";
1949 }
1950 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1951 {
1952 $attrHash->{'_content'} = $metavalue;
1953 $parser->{'parameters'}->{'metamode'} = "done";
1954 }
1955 }
1956
1957 # mxml_description will process the metadata if metadata is accumulate,
1958 # or if we haven't found the metadata to override
1959
1960 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1961 return [$tagname => $attrHash];
1962}
1963
1964
1965sub mxml_description
1966{
1967 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1968 my $metamode = $parser->{'parameters'}->{'metamode'};
1969
1970 # Failed... Report error if we don't see FileName tag before this
1971 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1972
1973 # Don't do anything if we are not in the right FileSet
1974 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1975 if ($file_regexp =~ m/\.\*/) {
1976 # Only interested in a file_regexp if it specifies precisely one
1977 # file.
1978 # So, skip anything with a .* in it as it is too general
1979 return [$tagname => $attrHash];
1980 }
1981 my $src_file = $parser->{'parameters'}->{'src_file'};
1982
1983 if (!($src_file =~ m/$file_regexp/)) {
1984 return [$tagname => $attrHash];
1985 }
1986
1987 # Accumulate the metadata block to the end of the description block
1988 # Note: This adds metadata block to all description blocks, so if there are
1989 # multiple FileSets, it will add to all of them
1990 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1991
1992 # if metamode was "override" but get to here then it failed to
1993 # find an item to override, in which case it should append its
1994 # value to the end, just like the "accumulate" mode
1995
1996 if ($metamode eq "override") {
1997 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1998 }
1999
2000 # tack a new metadata tag on to the end of the <Metadata>+ block
2001 my $metaname = $parser->{'parameters'}->{'metaname'};
2002 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2003
2004 my $metadata_attr = { '_content' => $metavalue,
2005 'name' => $metaname,
2006 'mode' => "accumulate" };
2007
2008 my $append_metadata = [ "Metadata" => $metadata_attr ];
2009 my $description_content = $attrHash->{'_content'};
2010
2011## print STDERR "*** appending to metadata.xml\n";
2012
2013 # append the new metadata element to the end of the current
2014 # content contained inside this tag
2015 if (ref($description_content) eq "") {
2016 # => string or numeric literal
2017 # this is caused by a <Description> block has no <Metadata> child elements
2018 # => set up an empty array in '_content'
2019 $attrHash->{'_content'} = [ "\n" ];
2020 $description_content = $attrHash->{'_content'};
2021 }
2022
2023 push(@$description_content," ", $append_metadata ,"\n ");
2024 $parser->{'parameters'}->{'metamode'} = "done";
2025 }
2026
2027 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2028 return [$tagname => $attrHash];
2029}
2030
2031
2032sub mxml_filename
2033{
2034 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2035
2036 # Store the filename of the Current Fileset
2037 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2038 # FileName tag must come before Description tag
2039 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2040
2041 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2042 return [$tagname => $attrHash];
2043}
2044
2045
2046sub mxml_fileset
2047{
2048 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2049
2050 # Initilise the current_file
2051 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2052 # FileName tag must come before Description tag
2053 $parser->{'parameters'}->{'current_file'} = "";
2054
2055 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2056 return [$tagname => $attrHash];
2057}
2058
2059sub mxml_directorymetadata
2060{
2061 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2062
2063 # if we haven't processed the metadata when we reach the end of metadata.xml
2064 # it's because there's no particular FileSet element whose FileName matched
2065 # In which case, add a new FileSet for this FileName
2066 my $metamode = $parser->{'parameters'}->{'metamode'};
2067 if($metamode ne "done") {
2068
2069 if ($metamode eq "override") {
2070 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2071 }
2072
2073 # If we get to here and metamode is override, this means there
2074 # was no existing value to overide => treat as an append operation
2075
2076 # Create a new FileSet element and append to DirectoryMetadata
2077 # <FileSet>
2078 # <FileName>src_file</FileName>
2079 # <Description>
2080 # <Metadata mode="" name="">metavalue</Metadata>
2081 # </Description>
2082 # </FileSet>
2083 my $src_file = $parser->{'parameters'}->{'src_file'};
2084 my $metaname = $parser->{'parameters'}->{'metaname'};
2085 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2086 my $metadata_attr = {
2087 '_content' => $metavalue,
2088 'name' => $metaname,
2089 'mode' => "accumulate"
2090 };
2091 my $append_metadata = [ "Metadata" => $metadata_attr ];
2092 my $description_attr->{'_content'} = [ "\n\t\t ", $append_metadata, "\n\t\t"];
2093 my $description_element = [ "Description" => $description_attr ];
2094
2095 #_content is not an attribute, it's special and holds the children of this element
2096 # including the textnode value embedded in this element if any.
2097 my $filename_attr = {'_content' => $src_file};
2098 my $filename_element = [ "FileName" => $filename_attr ];
2099
2100 my $fileset_attr = {};
2101 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2102 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2103
2104
2105 # get children of dirmeta, and push the new FileSet element onto it
2106 print "Appending metadata to metadata.xml\n";
2107 my $dirmeta_content = $attrHash->{'_content'};
2108 if (ref($dirmeta_content)) {
2109 # got some existing interesting nested content
2110 #push(@$dirmeta_content, " ", $fileset ,"\n ");
2111 push(@$dirmeta_content, "\t", $fileset ,"\n");
2112 }
2113 else {
2114 #description_content is most likely a string such as "\n"
2115 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
2116 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2117 }
2118
2119 $parser->{'parameters'}->{'metamode'} = "done";
2120 }
2121 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2122 return [$tagname => $attrHash];
2123}
2124
2125
2126sub edit_metadata_xml
2127{
2128 my $self = shift @_;
2129 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2130
2131 # Set the call-back functions for the metadata tags
2132 my @rules =
2133 ( _default => 'raw',
2134 'FileName' => \&mxml_filename,
2135 'Metadata' => \&mxml_metadata,
2136 'Description' => \&mxml_description,
2137 'FileSet' => \&mxml_fileset,
2138 'DirectoryMetadata' => \&mxml_directorymetadata);
2139
2140 # use XML::Rules to add it in (read in and out again)
2141 my $parser = XML::Rules->new(rules => \@rules,
2142 style => 'filter',
2143 output_encoding => 'utf8',
2144 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2145
2146 if (!-e $metadata_xml_filename) {
2147
2148 if (open(MOUT,">$metadata_xml_filename")) {
2149
2150 my $src_file_re = &util::filename_to_regex($src_file);
2151 # shouldn't the following also be in the above utility routine??
2152 # $src_file_re =~ s/\./\\./g;
2153
2154 print MOUT "<?xml version=\"1.0\"?>\n";
2155 print MOUT "<DirectoryMetadata>\n";
2156 print MOUT " <FileSet>\n";
2157 print MOUT " <FileName>$src_file_re</FileName>\n";
2158 print MOUT " <Description>\n";
2159 print MOUT " </Description>\n";
2160 print MOUT " </FileSet>\n";
2161 print MOUT "</DirectoryMetadata>\n";
2162
2163 close(MOUT);
2164 }
2165 else {
2166 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2167 }
2168 }
2169
2170
2171 my $xml_in = "";
2172 if (!open(MIN,"<$metadata_xml_filename")) {
2173 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2174 }
2175 else {
2176 # Read them in
2177 my $line;
2178 while (defined ($line=<MIN>)) {
2179 $xml_in .= $line;
2180 }
2181 close(MIN);
2182
2183 # Filter with the call-back functions
2184 my $xml_out = "";
2185
2186 my $MOUT;
2187 if (!open($MOUT,">$metadata_xml_filename")) {
2188 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2189 }
2190 else {
2191 binmode($MOUT,":utf8");
2192
2193 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2194 # At the moment, I will just hack it!
2195 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2196 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2197 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2198 #print MOUT $xml_out;
2199
2200 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2201 metapos => $metapos,
2202 metavalue => $metavalue,
2203 metamode => $metamode,
2204 src_file => $src_file,
2205 prevmetavalue => $prevmetavalue,
2206 current_file => undef} );
2207 close($MOUT);
2208 }
2209 }
2210}
2211
2212
2213sub set_import_metadata
2214{
2215 my $self = shift @_;
2216
2217 my $username = $self->{'username'};
2218 my $collect = $self->{'collect'};
2219 my $gsdl_cgi = $self->{'gsdl_cgi'};
2220
2221 if ($baseaction::authentication_enabled) {
2222 # Ensure the user is allowed to edit this collection
2223 $self->authenticate_user($username, $collect);
2224 }
2225
2226 # Make sure the collection isn't locked by someone else
2227 $self->lock_collection($username, $collect);
2228
2229 $self->_set_import_metadata(@_);
2230
2231 # Release the lock once it is done
2232 $self->unlock_collection($username, $collect);
2233
2234}
2235
2236sub set_import_metadata_array
2237{
2238 my $self = shift @_;
2239
2240 my $username = $self->{'username'};
2241 my $collect = $self->{'collect'};
2242 my $gsdl_cgi = $self->{'gsdl_cgi'};
2243# my $gsdlhome = $self->{'gsdlhome'};
2244
2245 if ($baseaction::authentication_enabled) {
2246 # Ensure the user is allowed to edit this collection
2247 &authenticate_user($gsdl_cgi, $username, $collect);
2248 }
2249
2250 my $site = $self->{'site'};
2251 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2252
2253 $gsdl_cgi->checked_chdir($collect_dir);
2254
2255 # Make sure the collection isn't locked by someone else
2256 $self->lock_collection($username, $collect);
2257
2258 $self->_set_import_metadata_array(@_);
2259
2260 # Release the lock once it is done
2261 $self->unlock_collection($username, $collect);
2262
2263}
2264
2265
2266sub _set_import_metadata_array
2267{
2268 my $self = shift @_;
2269
2270 my $collect = $self->{'collect'};
2271 my $gsdl_cgi = $self->{'gsdl_cgi'};
2272
2273 my $site = $self->{'site'};
2274 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2275
2276 # look up additional args
2277
2278 my $infodbtype = $self->{'infodbtype'};
2279
2280 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2281 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2282
2283 my $json_str = $self->{'json'};
2284 my $doc_array = decode_json $json_str;
2285
2286 my $global_status = 0;
2287 my $global_mess = "";
2288
2289 my @all_docids = ();
2290
2291 foreach my $doc_array_rec ( @$doc_array )
2292 {
2293 my $status = -1;
2294 my $docid = $doc_array_rec->{'docid'};
2295
2296 my ($docid_root,$docid_secnum);
2297 if(defined $docid) {
2298 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2299 # as yet no support for setting subsection metadata in metadata.xml
2300 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2301 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2302 next; # skip this docid in for loop
2303 }
2304 }
2305
2306 push(@all_docids,$docid); # docid_root rather
2307
2308 my $metaname = $doc_array_rec->{'metaname'};
2309 if (defined $metaname) {
2310 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2311 my $metavalue = $doc_array_rec->{'metavalue'};
2312 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2313
2314 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2315 # make "accumulate" the default (less destructive, as won't actually
2316 # delete any existing values)
2317 $metamode = "accumulate";
2318 }
2319
2320 # adding metapos and prevmetavalue support to import_metadata subroutines
2321 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2322 my $prevmetavalue = $self->{'prevmetavalue'};
2323
2324 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2325
2326 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2327 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2328
2329 foreach my $metatable_rec ( @$metatable ) {
2330 $metaname = $metatable_rec->{'metaname'};
2331 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2332 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2333 # make "accumulate" the default (less destructive, as won't actually
2334 # delete any existing values)
2335 $metamode = "accumulate";
2336 }
2337
2338 # No support for metapos and prevmetavalue in the JSON metatable substructure
2339 my $metapos = undef;
2340 my $prevmetavalue = undef;
2341 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2342
2343 foreach my $metavalue ( @$metavals ) {
2344 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2345
2346 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2347 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2348 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2349 # metavalue for this metaname will be lost
2350 $metamode = "accumulate";
2351 }
2352 }
2353 }
2354 }
2355 }
2356
2357 # always a success message
2358 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2359 $gsdl_cgi->generate_ok_message($mess);
2360}
2361
2362# always returns true (1)
2363sub set_import_metadata_entry
2364{
2365 my $self = shift @_;
2366 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2367
2368 my $info_mess = <<RAWEND;
2369****************************
2370 set_import_metadata_entry()
2371****************************
2372collect = $collect
2373collect_dir = $collect_dir
2374infodbtype = $infodbtype
2375arcinfo_doc_filename = $arcinfo_doc_filename
2376docid = $docid
2377metaname = $metaname
2378metapos = $metapos
2379metavalue = $metavalue
2380metamode = $metamode
2381prevmetavalue = $prevmetavalue
2382****************************
2383RAWEND
2384
2385 $gsdl_cgi->generate_message($info_mess);
2386
2387 # import works with metadata.xml which can have inherited metadata
2388 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2389 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2390 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2391 # what they're doing if they provide a metapos.
2392 if(defined $metapos) {
2393 print STDERR "@@@@ WARNING: metapos defined.\n";
2394 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2395 }
2396
2397 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2398 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2399 my $metadata_xml_file;
2400 my $import_filename = undef;
2401
2402 if (defined $docid) {
2403 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2404 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2405
2406 # This now stores the full pathname
2407 $import_filename = $doc_rec->{'src-file'}->[0];
2408 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2409 # as the array version of the method doesn't support the -f parameter yet
2410 my $import_file = $self->{'f'};
2411 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2412 }
2413
2414 # figure out correct metadata.xml file [?]
2415 # Assuming the metadata.xml file is next to the source file
2416 # Note: This will not work if it is using the inherited metadata from the parent folder
2417 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2418 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2419
2420 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
2421 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2422 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2423 if(!defined $metapos && $metamode eq "override") {
2424## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2425 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2426
2427 }
2428
2429 # Edit the metadata.xml
2430 # Modified by Jeffrey from DL Consulting
2431 # Handle the case where there is one metadata.xml file for multiple FileSets
2432 # The XML filter needs to know whether it is in the right FileSet
2433 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2434 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2435 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2436 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2437 #return 0;
2438 return $metadata_xml_filename;
2439}
2440
2441sub _remove_import_metadata
2442{
2443 my $self = shift @_;
2444
2445 my $collect = $self->{'collect'};
2446 my $gsdl_cgi = $self->{'gsdl_cgi'};
2447# my $gsdlhome = $self->{'gsdlhome'};
2448 my $infodbtype = $self->{'infodbtype'};
2449
2450 # Obtain the collect dir
2451 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2452 my $site = $self->{'site'};
2453 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2454
2455 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2456 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2457
2458 # look up additional args
2459 my $docid = $self->{'d'};
2460 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2461 {
2462 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2463 }
2464
2465 my $metaname = $self->{'metaname'};
2466 my $metapos = $self->{'metapos'};
2467 my $metavalue = $self->{'metavalue'};
2468 if(defined $metavalue) {
2469 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2470 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2471 $metapos = 0;
2472 }
2473 my $metamode = $self->{'metamode'} || undef;
2474
2475 # import works with metadata.xml which can have inherited metadata
2476 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2477 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2478 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2479 # what they're doing if they provide a metapos.
2480 if(defined $metapos) {
2481 print STDERR "@@@@ WARNING: metapos defined.\n";
2482 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2483 }
2484
2485 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2486 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2487 my $metadata_xml_file;
2488 my $import_filename = undef;
2489 if (defined $docid)
2490 {
2491 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2492 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2493
2494 # This now stores the full pathname
2495 $import_filename = $doc_rec->{'src-file'}->[0];
2496 }
2497
2498 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2499 {
2500 $gsdl_cgi->generate_error("There is no metadata\n");
2501 }
2502
2503 # figure out correct metadata.xml file [?]
2504 # Assuming the metadata.xml file is next to the source file
2505 # Note: This will not work if it is using the inherited metadata from the parent folder
2506 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2507 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2508
2509 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2510
2511 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2512 $mess .= " $metaname";
2513 $mess .= " = $metavalue\n";
2514
2515 $gsdl_cgi->generate_ok_message($mess);
2516
2517 #return $status; # in case calling functions have a use for this
2518}
2519
2520sub remove_import_metadata
2521{
2522 my $self = shift @_;
2523
2524 my $username = $self->{'username'};
2525 my $collect = $self->{'collect'};
2526 my $gsdl_cgi = $self->{'gsdl_cgi'};
2527
2528 if ($baseaction::authentication_enabled) {
2529 # Ensure the user is allowed to edit this collection
2530 &authenticate_user($gsdl_cgi, $username, $collect);
2531 }
2532
2533 # Make sure the collection isn't locked by someone else
2534 $self->lock_collection($username, $collect);
2535
2536 $self->_remove_import_metadata(@_);
2537
2538 # Release the lock once it is done
2539 $self->unlock_collection($username, $collect);
2540
2541}
2542
2543sub remove_from_metadata_xml
2544{
2545 my $self = shift @_;
2546 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2547 # metamode generally has no meaning for removing meta, but is used by set_meta
2548 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2549
2550 # Set the call-back functions for the metadata tags
2551 my @rules =
2552 (
2553 _default => 'raw',
2554 'Metadata' => \&rfmxml_metadata,
2555 'FileName' => \&mxml_filename
2556 );
2557
2558 my $parser = XML::Rules->new
2559 (
2560 rules => \@rules,
2561 style => 'filter',
2562 output_encoding => 'utf8',
2563 #normalisespaces => 1,
2564 stripspaces => 2|0|0 # ineffectual
2565 );
2566
2567 my $xml_in = "";
2568 if (!open(MIN,"<$metadata_xml_filename"))
2569 {
2570 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2571 }
2572 else
2573 {
2574 # Read them in
2575 my $line;
2576 while (defined ($line=<MIN>)) {
2577 $xml_in .= $line;
2578 }
2579 close(MIN);
2580
2581 # Filter with the call-back functions
2582 my $xml_out = "";
2583
2584 my $MOUT;
2585 if (!open($MOUT,">$metadata_xml_filename")) {
2586 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2587 }
2588 else {
2589 binmode($MOUT,":utf8");
2590 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2591 close($MOUT);
2592 }
2593 }
2594}
2595
2596sub rfmxml_metadata
2597{
2598 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2599
2600 # metadata.xml does not handle subsections
2601
2602 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2603 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2604 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2605 {
2606 if (!defined $parser->{'parameters'}->{'poscount'})
2607 {
2608 $parser->{'parameters'}->{'poscount'} = 0;
2609 }
2610 else
2611 {
2612 $parser->{'parameters'}->{'poscount'}++;
2613 }
2614
2615 # if overriding but no metapos, then clear all the meta for this metaname
2616 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
2617 return [];
2618 }
2619
2620 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2621 {
2622 return [];
2623 }
2624
2625 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2626 {
2627 return [];
2628 }
2629 }
2630
2631 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2632 return [$tagname => $attrHash];
2633}
2634
2635sub _remove_live_metadata
2636{
2637 my $self = shift @_;
2638
2639 my $collect = $self->{'collect'};
2640 my $gsdl_cgi = $self->{'gsdl_cgi'};
2641# my $gsdlhome = $self->{'gsdlhome'};
2642 my $infodbtype = $self->{'infodbtype'};
2643
2644 # Obtain the collect dir
2645 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2646 my $site = $self->{'site'};
2647 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2648
2649
2650 # look up additional args
2651 my $docid = $self->{'d'};
2652 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2653 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2654 }
2655
2656 # Generate the dbkey
2657 my $metaname = $self->{'metaname'};
2658 my $dbkey = "$docid.$metaname";
2659
2660 # To people who know $collect_tail please add some comments
2661 # Obtain the live gdbm_db path
2662 my $collect_tail = $collect;
2663 $collect_tail =~ s/^.*[\/\\]//;
2664 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2665 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2666
2667 # Remove the key
2668 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2669 my $status = system($cmd);
2670 if ($status != 0) {
2671 # Catch error if gdbmdel failed
2672 my $mess = "Failed to set metadata key: $dbkey\n";
2673
2674 $mess .= "PATH: $ENV{'PATH'}\n";
2675 $mess .= "cmd = $cmd\n";
2676 $mess .= "Exit status: $status\n";
2677 $mess .= "System Error Message: $!\n";
2678
2679 $gsdl_cgi->generate_error($mess);
2680 }
2681 else {
2682 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2683 }
2684
2685}
2686
2687sub remove_live_metadata
2688{
2689 my $self = shift @_;
2690
2691 my $username = $self->{'username'};
2692 my $collect = $self->{'collect'};
2693 my $gsdl_cgi = $self->{'gsdl_cgi'};
2694 my $gsdlhome = $self->{'gsdlhome'};
2695
2696 if ($baseaction::authentication_enabled) {
2697 # Ensure the user is allowed to edit this collection
2698 &authenticate_user($gsdl_cgi, $username, $collect);
2699 }
2700
2701 # Make sure the collection isn't locked by someone else
2702 $self->lock_collection($username, $collect);
2703
2704 $self->_remove_live_metadata(@_);
2705
2706 $self->unlock_collection($username, $collect);
2707}
2708
2709sub remove_metadata
2710{
2711 my $self = shift @_;
2712
2713 my $where = $self->{'where'};
2714 if(!$where) {
2715 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2716 return;
2717 }
2718
2719 my $username = $self->{'username'};
2720 my $collect = $self->{'collect'};
2721 my $gsdl_cgi = $self->{'gsdl_cgi'};
2722
2723 if ($baseaction::authentication_enabled) {
2724 # Ensure the user is allowed to edit this collection
2725 &authenticate_user($gsdl_cgi, $username, $collect);
2726 }
2727
2728 # Make sure the collection isn't locked by someone else
2729 $self->lock_collection($username, $collect);
2730
2731 # check which directories need to be processed, specified in $where as
2732 # any combination of import|archives|index|live
2733 if($where =~ m/import/) {
2734 $self->_remove_import_metadata(@_);
2735 }
2736 if($where =~ m/archives/) {
2737 $self->_remove_archives_metadata(@_);
2738 }
2739 if($where =~ m/index/) {
2740 $self->_remove_index_metadata(@_);
2741 }
2742
2743 # Release the lock once it is done
2744 $self->unlock_collection($username, $collect);
2745}
2746
2747# the internal version, without authentication
2748sub _remove_index_metadata
2749{
2750 my $self = shift @_;
2751
2752 my $collect = $self->{'collect'};
2753 my $gsdl_cgi = $self->{'gsdl_cgi'};
2754# my $gsdlhome = $self->{'gsdlhome'};
2755 my $infodbtype = $self->{'infodbtype'};
2756
2757 # Obtain the collect dir
2758 my $site = $self->{'site'};
2759 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2760 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2761
2762 # look up additional args
2763 my $docid = $self->{'d'};
2764 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2765 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2766 }
2767 my $metaname = $self->{'metaname'};
2768 my $metapos = $self->{'metapos'};
2769 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2770
2771 # To people who know $collect_tail please add some comments
2772 # Obtain the path to the database
2773 my $collect_tail = $collect;
2774 $collect_tail =~ s/^.*[\/\\]//;
2775 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2776 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2777
2778 # Read the docid entry
2779 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2780
2781 # Check to make sure the key does exist
2782 if (!defined ($doc_rec->{$metaname})) {
2783 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2784 }
2785
2786 # Obtain the specified metadata pos
2787 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2788 if(!defined $metapos && !defined $metavalue) {
2789 $metapos = 0;
2790 }
2791
2792
2793 # consider check key is defined before deleting?
2794 # Loop through the metadata array and ignore the specified position
2795 my $filtered_metadata = [];
2796 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2797 for (my $i=0; $i<$num_metadata_vals; $i++) {
2798 my $metaval = shift(@{$doc_rec->{$metaname}});
2799
2800 if (!defined $metavalue && $i != $metapos) {
2801 push(@$filtered_metadata,$metaval);
2802 }
2803
2804 if(defined $metavalue && !($metavalue eq $metaval))
2805 {
2806 push(@$filtered_metadata,$metaval);
2807 }
2808 }
2809 $doc_rec->{$metaname} = $filtered_metadata;
2810
2811 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2812 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2813
2814 if ($status != 0) {
2815 my $mess = "Failed to set metadata key: $docid\n";
2816
2817 $mess .= "PATH: $ENV{'PATH'}\n";
2818 $mess .= "Exit status: $status\n";
2819 $mess .= "System Error Message: $!\n";
2820
2821 $gsdl_cgi->generate_error($mess);
2822 }
2823 else {
2824 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2825 $mess .= " $metaname";
2826 $mess .= "->[$metapos]" if (defined $metapos);
2827
2828 $gsdl_cgi->generate_ok_message($mess);
2829 }
2830
2831 #return $status; # in case calling functions have a use for this
2832}
2833
2834sub remove_index_metadata
2835{
2836 my $self = shift @_;
2837
2838 my $username = $self->{'username'};
2839 my $collect = $self->{'collect'};
2840 my $gsdl_cgi = $self->{'gsdl_cgi'};
2841# my $gsdlhome = $self->{'gsdlhome'};
2842
2843 if ($baseaction::authentication_enabled) {
2844 # Ensure the user is allowed to edit this collection
2845 &authenticate_user($gsdl_cgi, $username, $collect);
2846 }
2847
2848 # Obtain the collect dir
2849 my $site = $self->{'site'};
2850 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2851 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2852
2853 # Make sure the collection isn't locked by someone else
2854 $self->lock_collection($username, $collect);
2855
2856 $self->_remove_index_metadata(@_);
2857
2858 # Release the lock once it is done
2859 $self->unlock_collection($username, $collect);
2860}
2861
2862
2863# Was trying to reused the codes, but the functions need to be broken
2864# down more before they can be reused, otherwise there will be too
2865# much overhead and duplicate process...
2866sub insert_metadata
2867{
2868 my $self = shift @_;
2869
2870 my $username = $self->{'username'};
2871 my $collect = $self->{'collect'};
2872 my $gsdl_cgi = $self->{'gsdl_cgi'};
2873 my $gsdlhome = $self->{'gsdlhome'};
2874 my $infodbtype = $self->{'infodbtype'};
2875
2876 # If the import metadata and gdbm database have been updated, we
2877 # need to insert some notification to warn user that the the text
2878 # they see at the moment is not indexed and require a rebuild.
2879 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2880
2881 if ($baseaction::authentication_enabled) {
2882 # Ensure the user is allowed to edit this collection
2883 $self->authenticate_user($username, $collect);
2884 }
2885
2886 # Obtain the collect and archive dir
2887 my $site = $self->{'site'};
2888 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2889 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2890 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2891
2892 # Make sure the collection isn't locked by someone else
2893 $self->lock_collection($username, $collect);
2894
2895 # Check additional args
2896 my $docid = $self->{'d'};
2897 if (!defined($docid)) {
2898 $gsdl_cgi->generate_error("No document id is specified: d=...");
2899 }
2900 my $metaname = $self->{'metaname'};
2901 if (!defined($metaname)) {
2902 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2903 }
2904 my $metavalue = $self->{'metavalue'};
2905 if (!defined($metavalue) || $metavalue eq "") {
2906 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2907 }
2908 # make "accumulate" the default (less destructive, as won't actually
2909 # delete any existing values)
2910 my $metamode = "accumulate";
2911
2912 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
2913 my $metapos = undef;
2914 my $prevmetavalue = undef;
2915
2916 #=======================================================================#
2917 # set_import_metadata [START]
2918 #=======================================================================#
2919 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2920 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2921 my $metadata_xml_file;
2922 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2923 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2924
2925 # This now stores the full pathname
2926 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2927
2928 # figure out correct metadata.xml file [?]
2929 # Assuming the metadata.xml file is next to the source file
2930 # Note: This will not work if it is using the inherited metadata from the parent folder
2931 my ($import_tailname, $import_dirname)
2932 = File::Basename::fileparse($import_filename);
2933 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2934
2935 # Shane's escape characters
2936 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2937 $metavalue =~ s/\,/&#44;/g;
2938 $metavalue =~ s/\:/&#58;/g;
2939 $metavalue =~ s/\|/&#124;/g;
2940 $metavalue =~ s/\(/&#40;/g;
2941 $metavalue =~ s/\)/&#41;/g;
2942 $metavalue =~ s/\[/&#91;/g;
2943 $metavalue =~ s/\\/&#92;/g;
2944 $metavalue =~ s/\]/&#93;/g;
2945 $metavalue =~ s/\{/&#123;/g;
2946 $metavalue =~ s/\}/&#125;/g;
2947 $metavalue =~ s/\"/&#34;/g;
2948 $metavalue =~ s/\`/&#96;/g;
2949 $metavalue =~ s/\n/_newline_/g;
2950
2951 # Edit the metadata.xml
2952 # Modified by Jeffrey from DL Consulting
2953 # Handle the case where there is one metadata.xml file for multiple FileSets
2954 # The XML filter needs to know whether it is in the right FileSet
2955 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2956 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2957 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2958 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2959 #=======================================================================#
2960 # set_import_metadata [END]
2961 #=======================================================================#
2962
2963
2964 #=======================================================================#
2965 # set_metadata (accumulate version) [START]
2966 #=======================================================================#
2967 # To people who know $collect_tail please add some comments
2968 # Obtain path to the database
2969 my $collect_tail = $collect;
2970 $collect_tail =~ s/^.*[\/\\]//;
2971 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2972 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2973
2974 # Read the docid entry
2975 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2976
2977 # Protect the quotes
2978 $metavalue =~ s/\"/\\\"/g;
2979
2980 # Adds the pending macro
2981 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2982
2983 # If the metadata doesn't exist, create a new one
2984 if (!defined($doc_rec->{$metaname})){
2985 $doc_rec->{$metaname} = [ $macro_metavalue ];
2986 }
2987 # Else, let's acculumate the values
2988 else {
2989 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2990 }
2991
2992 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2993 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2994
2995 if ($status != 0) {
2996 # Catch error if gdbmget failed
2997 my $mess = "Failed to set metadata key: $docid\n";
2998
2999 $mess .= "PATH: $ENV{'PATH'}\n";
3000 $mess .= "Exit status: $status\n";
3001 $mess .= "System Error Message: $!\n";
3002
3003 $gsdl_cgi->generate_error($mess);
3004 }
3005 else {
3006 my $mess = "insert-metadata successful: Key[$docid]\n";
3007 $mess .= " [In metadata.xml] $metaname";
3008 $mess .= " = $metavalue\n";
3009 $mess .= " [In database] $metaname";
3010 $mess .= " = $macro_metavalue\n";
3011 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3012 $gsdl_cgi->generate_ok_message($mess);
3013 }
3014 #=======================================================================#
3015 # set_metadata (accumulate version) [END]
3016 #=======================================================================#
3017
3018 # Release the lock once it is done
3019 $self->unlock_collection($username, $collect);
3020}
3021
30221;
Note: See TracBrowser for help on using the repository browser.