source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27180

Last change on this file since 27180 was 27180, checked in by ak19, 11 years ago

A couple of the index_meta functions used gdbm methods instead of calling the more generic db operations via dbutil. Fixed to go through dbutil instead and tested.

File size: 100.3 KB
RevLine 
[19293]1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
[27157]10# This program is free software; you can redistr te it and/or modify
[19293]11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
[21551]32use dbutil;
[19499]33use ghtml;
[19293]34
[24071]35use JSON;
[21563]36
[24071]37
[19293]38BEGIN {
[22331]39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
[19293]40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
[25097]46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
[27176]50 'optional-args' => [ "metapos" ] },
[19499]51
[25097]52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
[27157]56 "get-index-metadata" => {
[25097]57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
[19499]59
[27157]60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
[25097]64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
[19499]67
[25097]68 #SET METHODS
69 "set-live-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ ] },
[19499]72
[27157]73 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
74 'compulsory-args' => [ "metaname", "metavalue" ],
75 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
76
77 "set-index-metadata" => {
[25097]78 'compulsory-args' => [ "d", "metaname", "metavalue" ],
[27168]79 'optional-args' => [ "metapos", "metamode" ] },
[19499]80
[25097]81 "set-archives-metadata" => {
82 'compulsory-args' => [ "d", "metaname", "metavalue" ],
83 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
84
85 "set-import-metadata" => {
86 'compulsory-args' => [ "metaname", "metavalue" ],
[27176]87 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
[25097]88
89 #SET METHODS (ARRAY)
90 "set-metadata-array" => {
[27168]91 'compulsory-args' => [ "where", "json" ],
[25097]92 'optional-args' => [ ] },
[24943]93
[25097]94 "set-archives-metadata-array" => {
95 'compulsory-args' => [ "json" ],
96 'optional-args' => [ ] },
97
98 "set-import-metadata-array" => {
99 'compulsory-args' => [ "json" ],
100 'optional-args' => [ ] },
[27168]101
102 "set-index-metadata-array" => {
103 'compulsory-args' => [ "json" ],
104 'optional-args' => [ ] },
105
106 "set-live-metadata-array" => {
107 'compulsory-args' => [ "json" ],
108 'optional-args' => [ ] },
[25097]109
110 #REMOVE METHODS
111 "remove-import-metadata" => {
112 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
[27176]113 'optional-args' => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
[25097]114
115 "remove-archives-metadata" => {
116 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
117 'optional-args' => [ "metapos", "metavalue" ] },
[20538]118
[25097]119 "remove-live-metadata" => {
120 'compulsory-args' => [ "d", "metaname" ],
121 'optional-args' => [ ] },
[20538]122
[27157]123 "remove-index-metadata" => {
[25097]124 'compulsory-args' => [ "d", "metaname" ],
125 'optional-args' => [ "metapos", "metavalue" ] },
[21716]126
[27157]127 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
128 'compulsory-args' => [ "d", "metaname" ],
129 'optional-args' => [ "where", "metapos", "metavalue" ] },
130
[25097]131 #INSERT METHODS
132 "insert-metadata" => {
133 'compulsory-args' => [ "d", "metaname", "metavalue" ],
134 'optional-args' => [ ] }
[19293]135};
136
137
138sub new
139{
140 my $class = shift (@_);
141 my ($gsdl_cgi,$iis6_mode) = @_;
142
[23761]143 # Treat metavalue specially. To transmit this through a GET request
144 # the Javascript side has url-encoded it, so here we need to decode
145 # it before proceeding
146
147 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
148 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
149 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
150
151 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
152 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
153
[19293]154 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
155
156 return bless $self, $class;
157}
158
159
160sub get_live_metadata
161{
162 my $self = shift @_;
163
164 my $username = $self->{'username'};
165 my $collect = $self->{'collect'};
166 my $gsdl_cgi = $self->{'gsdl_cgi'};
167 my $gsdlhome = $self->{'gsdlhome'};
[23478]168 my $infodbtype = $self->{'infodbtype'};
[27180]169
[23447]170 # live metadata gets/saves value scoped (prefixed) by the current usename
[23761]171 # so (for now) let's not bother to enforce authentication
[21715]172
173 # Obtain the collect dir
[23766]174 my $site = $self->{'site'};
175 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
176 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]177
178 # Make sure the collection isn't locked by someone else
179 $self->lock_collection($username, $collect);
180
181 # look up additional args
182 my $docid = $self->{'d'};
183 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[21715]184 $gsdl_cgi->generate_error("No docid (d=...) specified.");
[19293]185 }
186
[21715]187 # Generate the dbkey
[19293]188 my $metaname = $self->{'metaname'};
189 my $dbkey = "$docid.$metaname";
190
[21715]191 # To people who know $collect_tail please add some comments
192 # Obtain path to the database
[19293]193 my $collect_tail = $collect;
194 $collect_tail =~ s/^.*[\/|\\]//;
[21564]195 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]196 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[21715]197
198 # Obtain the content of the key
[21569]199 my $cmd = "gdbmget $infodb_file_path $dbkey";
[19293]200 if (open(GIN,"$cmd |") == 0) {
[21715]201 # Catch error if gdbmget failed
[19293]202 my $mess = "Failed to get metadata key: $metaname\n";
203 $mess .= "$!\n";
204
205 $gsdl_cgi->generate_error($mess);
206 }
207 else {
[23761]208 binmode(GIN,":utf8");
[21715]209 # Read everything in and concatenate them into $metavalue
[19293]210 my $metavalue = "";
211 my $line;
212 while (defined ($line=<GIN>)) {
213 $metavalue .= $line;
214 }
215 close(GIN);
[21715]216 chomp($metavalue); # Get rid off the tailing newlines
[19293]217 $gsdl_cgi->generate_ok_message("$metavalue");
218 }
[21715]219
220 # Release the lock once it is done
221 $self->unlock_collection($username, $collect);
[19499]222}
[19293]223
[27157]224# just calls the index version
[19499]225sub get_metadata
226{
227 my $self = shift @_;
[27157]228 $self->get_index_metadata(@_);
229}
[19499]230
[27157]231sub get_index_metadata
232{
233 my $self = shift @_;
234
[19499]235 my $username = $self->{'username'};
236 my $collect = $self->{'collect'};
237 my $gsdl_cgi = $self->{'gsdl_cgi'};
238 my $gsdlhome = $self->{'gsdlhome'};
239
[21715]240 # Authenticate user if it is enabled
[19499]241 if ($baseaction::authentication_enabled) {
242 # Ensure the user is allowed to edit this collection
243 &authenticate_user($gsdl_cgi, $username, $collect);
244 }
245
[21715]246 # Obtain the collect dir
[23766]247 my $site = $self->{'site'};
248 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
249 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]250
251 # Make sure the collection isn't locked by someone else
252 $self->lock_collection($username, $collect);
253
254 # look up additional args
255 my $docid = $self->{'d'};
256 my $metaname = $self->{'metaname'};
257 my $metapos = $self->{'metapos'};
[23400]258 my $infodbtype = $self->{'infodbtype'};
[19499]259
[21715]260 # To people who know $collect_tail please add some comments
261 # Obtain path to the database
[19499]262 my $collect_tail = $collect;
263 $collect_tail =~ s/^.*[\/\\]//;
[21564]264 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]265 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]266
267 # Read the docid entry
[23400]268 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
269
[21715]270 # Basically loop through and unescape_html the values
[19499]271 foreach my $k (keys %$doc_rec) {
272 my @escaped_v = ();
273 foreach my $v (@{$doc_rec->{$k}}) {
274 my $ev = &ghtml::unescape_html($v);
275 push(@escaped_v, $ev);
276 }
277 $doc_rec->{$k} = \@escaped_v;
278 }
279
[21715]280 # Obtain the specified metadata value
[19499]281 $metapos = 0 if (!defined $metapos);
282 my $metavalue = $doc_rec->{$metaname}->[$metapos];
283 $gsdl_cgi->generate_ok_message("$metavalue");
[21715]284
285 # Release the lock once it is done
286 $self->unlock_collection($username, $collect);
[19293]287}
288
289
[25097]290sub get_import_metadata
291{
292 my $self = shift @_;
293
294 my $username = $self->{'username'};
295 my $collect = $self->{'collect'};
296 my $gsdl_cgi = $self->{'gsdl_cgi'};
297 my $gsdlhome = $self->{'gsdlhome'};
298
299 # Authenticate user if it is enabled
300 if ($baseaction::authentication_enabled) {
301 # Ensure the user is allowed to edit this collection
302 &authenticate_user($gsdl_cgi, $username, $collect);
303 }
304
305 # Obtain the collect dir
306 my $site = $self->{'site'};
307 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
308 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
309
310 # Make sure the collection isn't locked by someone else
311 $self->lock_collection($username, $collect);
312
313 # look up additional args
314 my $docid = $self->{'d'};
315 my $metaname = $self->{'metaname'};
[27176]316 my $metapos = $self->{'metapos'};
317 $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
318
[25097]319 my $infodbtype = $self->{'infodbtype'};
[27176]320 if (!defined $docid)
[25097]321 {
[27173]322 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
[25097]323 }
324
325 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
326 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
327 my $metadata_xml_file;
328 my $import_filename = undef;
329
330
331 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
332 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
333 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
334
335 # This now stores the full pathname
336 $import_filename = $doc_rec->{'src-file'}->[0];
337
338 # figure out correct metadata.xml file [?]
339 # Assuming the metadata.xml file is next to the source file
340 # Note: This will not work if it is using the inherited metadata from the parent folder
341 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
342 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
343
[27176]344 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
[25097]345
346 # Release the lock once it is done
347 $self->unlock_collection($username, $collect);
348}
349
350sub get_metadata_from_metadata_xml
351{
352 my $self = shift @_;
[27176]353 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
[25097]354
355 my @rules =
356 (
357 _default => 'raw',
358 'Metadata' => \&gfmxml_metadata,
359 'FileName' => \&mxml_filename
360 );
361
362 my $parser = XML::Rules->new
363 (
364 rules => \@rules,
365 output_encoding => 'utf8'
366 );
367
368 my $xml_in = "";
369 if (!open(MIN,"<$metadata_xml_filename"))
370 {
371 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
372 }
373 else
374 {
375 # Read them in
376 my $line;
377 while (defined ($line=<MIN>)) {
378 $xml_in .= $line;
379 }
380 close(MIN);
381
[27176]382 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
[25097]383
384 if(defined $parser->{'pad'}->{'metavalue'})
385 {
386 return $parser->{'pad'}->{'metavalue'};
387 }
388 else
389 {
390 return "";
391 }
392 }
393}
394
395sub gfmxml_metadata
396{
397 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
398
[27176]399 # no subsection support yet in metadata.xml
400
401 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
[25097]402 {
[27176]403 if (!defined $parser->{'parameters'}->{'poscount'})
404 {
405 $parser->{'parameters'}->{'poscount'} = 0;
406 }
407 else
408 {
409 $parser->{'parameters'}->{'poscount'}++;
410 }
411
412 # gets the first value by default, since metapos defaults to 0
413 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
414 {
415 if($parser->{'parameters'}->{'metapos'} > 0) {
416 print STDERR "@@@@ WARNING: non-zero metapos.\n";
417 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at".$parser->{'parameters'}->{'metapos'}.".\n";
418 }
419 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
420 }
[25097]421 }
422}
423
424sub get_archives_metadata
425{
426 my $self = shift @_;
427
428 my $username = $self->{'username'};
429 my $collect = $self->{'collect'};
430 my $gsdl_cgi = $self->{'gsdl_cgi'};
431 my $gsdlhome = $self->{'gsdlhome'};
432 my $infodbtype = $self->{'infodbtype'};
433
434 # Authenticate user if it is enabled
435 if ($baseaction::authentication_enabled) {
436 # Ensure the user is allowed to edit this collection
437 &authenticate_user($gsdl_cgi, $username, $collect);
438 }
439
440 # Obtain the collect dir
441 my $site = $self->{'site'};
442 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
443
444 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
445
446 # Make sure the collection isn't locked by someone else
447 $self->lock_collection($username, $collect);
448
449 # look up additional args
450 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
451 $docid_secnum = "" if (!defined $docid_secnum);
452
453 my $metaname = $self->{'metaname'};
454 my $metapos = $self->{'metapos'};
455 $metapos = 0 if (!defined $metapos);
456
457 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
458 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
459
460 # This now stores the full pathname
461 my $doc_filename = $doc_rec->{'doc-file'}->[0];
462
463 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
464
465 # Release the lock once it is done
466 $self->unlock_collection($username, $collect);
467}
468
469sub get_metadata_from_archive_xml
470{
471 my $self = shift @_;
472 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
473
474 my @start_rules = ('Section' => \&dxml_start_section);
475
476 my @rules =
477 (
478 _default => 'raw',
479 'Metadata' => \&gfdxml_metadata
480 );
481
482 my $parser = XML::Rules->new
483 (
484 start_rules => \@start_rules,
485 rules => \@rules,
486 output_encoding => 'utf8'
487 );
488
489 my $xml_in = "";
490 if (!open(MIN,"<$doc_xml_filename"))
491 {
492 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
493 }
494 else
495 {
496 # Read them in
497 my $line;
498 while (defined ($line=<MIN>)) {
499 $xml_in .= $line;
500 }
501 close(MIN);
502
503 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
504
505 if(defined $parser->{'pad'}->{'metavalue'})
506 {
507 return $parser->{'pad'}->{'metavalue'};
508 }
509 else
510 {
511 return "";
512 }
513 }
514}
515
516sub gfdxml_metadata
517{
518 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
519
520 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
521 {
522 return;
523 }
524
525 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
526 {
527 if (!defined $parser->{'parameters'}->{'poscount'})
528 {
529 $parser->{'parameters'}->{'poscount'} = 0;
530 }
531 else
532 {
533 $parser->{'parameters'}->{'poscount'}++;
534 }
535 }
536
537 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
538 {
539 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
540 }
541}
542
[27157]543sub _set_live_metadata
[19293]544{
545 my $self = shift @_;
546
547 my $collect = $self->{'collect'};
548 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27157]549 #my $gsdlhome = $self->{'gsdlhome'};
[23400]550 my $infodbtype = $self->{'infodbtype'};
[19293]551
[21715]552 # Obtain the collect dir
[27157]553 my $site = $self->{'site'};
554 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[23766]555 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]556
557
558 # look up additional args
559 my $docid = $self->{'d'};
[21715]560 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[27168]561 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
[21715]562 }
[19293]563 my $metavalue = $self->{'metavalue'};
564
[21715]565 # Generate the dbkey
566 my $metaname = $self->{'metaname'};
[19293]567 my $dbkey = "$docid.$metaname";
568
[21715]569 # To people who know $collect_tail please add some comments
570 # Obtain path to the database
[19293]571 my $collect_tail = $collect;
572 $collect_tail =~ s/^.*[\/\\]//;
[21564]573 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]574 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19293]575
[21715]576 # Set the new value
[27176]577 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
[19293]578 my $status = system($cmd);
579 if ($status != 0) {
[21715]580 # Catch error if gdbmget failed
[19293]581 my $mess = "Failed to set metadata key: $dbkey\n";
[21715]582
[19293]583 $mess .= "PATH: $ENV{'PATH'}\n";
584 $mess .= "cmd = $cmd\n";
585 $mess .= "Exit status: $status\n";
586 $mess .= "System Error Message: $!\n";
587
[19499]588 $gsdl_cgi->generate_error($mess);
[19293]589 }
590 else {
[19499]591 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
[19293]592 }
[27157]593
594 #return $status; # in case calling functions have any further use for this
595}
596
597sub set_live_metadata
598{
599 my $self = shift @_;
600
601 my $username = $self->{'username'};
602 my $collect = $self->{'collect'};
603 my $gsdl_cgi = $self->{'gsdl_cgi'};
604
605 if ($baseaction::authentication_enabled) {
606 # Ensure the user is allowed to edit this collection
607 &authenticate_user($gsdl_cgi, $username, $collect);
608 }
609
610 # Make sure the collection isn't locked by someone else
611 $self->lock_collection($username, $collect);
612
613 $self->_set_live_metadata(@_);
614
[21715]615 # Release the lock once it is done
616 $self->unlock_collection($username, $collect);
[19293]617}
618
[27168]619sub set_index_metadata_entry
[24071]620{
[27168]621 my $self = shift @_;
622 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
623
[24071]624 # To people who know $collect_tail please add some comments
625 # Obtain path to the database
626 my $collect_tail = $collect;
627 $collect_tail =~ s/^.*[\/\\]//;
628 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
629 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
630
631# print STDERR "**** infodb file path = $infodb_file_path\n";
632# print STDERR "***** infodb type = $infodbtype\n";
[27168]633
[24071]634 # Read the docid entry
635 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
[27168]636
[24071]637 # Set the metadata value
638 if (defined $metapos) {
[27168]639 # if metamode=accumulate AND metapos, warn user and then use metapos
640 if (defined $metamode && $metamode eq "accumulate") {
641 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
642 print STDERR "**** Proceeding by using metapos\n";
643 }
644 $doc_rec->{$metaname}->[$metapos] = $metavalue;
[24071]645 }
[27176]646 elsif (defined $metamode && $metamode eq "override") {
647 $doc_rec->{$metaname} = [ $metavalue ];
648 }
649 else { # default for index was to override, but because accumulate is less destructive,
650 # and because accumulate is the default for archives and import, that's the new default for index too
[27168]651 if(defined $doc_rec->{$metaname}) {
652 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
653 } else {
654 $doc_rec->{$metaname} = [ $metavalue ];
655 }
[24071]656 }
657
658 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
659
[27168]660 return $status;
[24071]661
662}
[19293]663
[27157]664sub _set_import_metadata
[19499]665{
666 my $self = shift @_;
[19293]667
[19499]668 my $collect = $self->{'collect'};
669 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27157]670 my $infodbtype = $self->{'infodbtype'};
671# my $gsdlhome = $self->{'gsdlhome'};
672
673 # Obtain the collect and archive dir
674 my $site = $self->{'site'};
675 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
676 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
677 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
678
679 # look up additional args
680 # want either d= or f=
681 my $docid = $self->{'d'};
[27173]682 my ($docid_root,$docid_secnum);
683 if(defined $docid) {
684 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
685 # as yet no support for setting subsection metadata in metadata.xml
686 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
[27176]687 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
[27173]688 return;
689 }
690 }
691
[27157]692 my $import_file = $self->{'f'};
693 if ((!defined $docid) && (!defined $import_file)) {
694 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
695 }
[19293]696
[27157]697 # Get the parameters and set default mode to "accumulate"
698 my $metaname = $self->{'metaname'};
699 my $metavalue = $self->{'metavalue'};
700## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
701 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
702
703 my $metamode = $self->{'metamode'};
704 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
705 # make "accumulate" the default (less destructive, as won't actually
706 # delete any existing values)
707 $metamode = "accumulate";
[19499]708 }
709
[27176]710 # adding metapos and prevmetavalue support to import_metadata subroutines
711 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
712 my $prevmetavalue = $self->{'prevmetavalue'};
[27157]713
[27176]714 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
715 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
[27157]716
717 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
718 $mess .= " $metaname";
719 $mess .= " = $metavalue";
720 $mess .= " ($metamode)\n";
721
722 $gsdl_cgi->generate_ok_message($mess);
723
724 #return $status; # in case calling functions have any further use for this
725}
726
727# the version of set_index_meta that doesn't do authentication
728sub _set_archives_metadata
729{
730 my $self = shift @_;
731
732 my $collect = $self->{'collect'};
733 my $gsdl_cgi = $self->{'gsdl_cgi'};
734 my $infodbtype = $self->{'infodbtype'};
735
736 # Obtain the collect and archive dir
737 my $site = $self->{'site'};
738 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
739 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
740
741 # look up additional args
742 my $docid = $self->{'d'};
743 my $metaname = $self->{'metaname'};
744 my $metavalue = $self->{'metavalue'};
745 my $prevmetavalue = $self->{'prevmetavalue'};
746
[27176]747 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
748 # Don't append "|| undef", since if metapos=0 it will then be set to undef
[27157]749
750 my $metamode = $self->{'metamode'};
751 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
752 # make "accumulate" the default (less destructive, as won't actually
753 # delete any existing values)
754 $metamode = "accumulate";
[27167]755 }
[27168]756
[27157]757 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
758 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
759
760 if ($status == 0) {
761 my $mess = "set-archives-metadata successful: Key[$docid]\n";
762 $mess .= " $metaname";
763 $mess .= "->[$metapos]" if (defined $metapos);
764 $mess .= " = $metavalue";
765 $mess .= " ($metamode)\n";
766
767 $gsdl_cgi->generate_ok_message($mess);
768 }
769 else {
770 my $mess .= "Failed to set archives metadata key: $docid\n";
771 $mess .= "Exit status: $status\n";
772 if(defined $self->{'error_msg'}) {
773 $mess .= "Error Message: $self->{'error_msg'}\n";
774 } else {
775 $mess .= "System Error Message: $!\n";
776 }
777 $mess .= "-" x 20 . "\n";
778
779 $gsdl_cgi->generate_error($mess);
780 }
[23766]781
[27157]782 #return $status; # in case calling functions have any further use for this
783}
[19499]784
785
[27157]786# the version of set_index_meta that doesn't do authentication
787sub _set_index_metadata
788{
789 my $self = shift @_;
790
791 my $collect = $self->{'collect'};
792 my $gsdl_cgi = $self->{'gsdl_cgi'};
793
794 my $site = $self->{'site'};
795 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
796
[19499]797 # look up additional args
798 my $docid = $self->{'d'};
799 my $metaname = $self->{'metaname'};
[27176]800 my $metapos = $self->{'metapos'}; # undef has meaning
[19499]801 my $metavalue = $self->{'metavalue'};
[23761]802 my $infodbtype = $self->{'infodbtype'};
[27168]803 my $metamode = $self->{'metamode'};
804
805 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
[23400]806
[19499]807 if ($status != 0) {
[23761]808 # Catch error if set infodb entry failed
809 my $mess = "Failed to set metadata key: $docid\n";
[19499]810
[23761]811 $mess .= "PATH: $ENV{'PATH'}\n";
812 $mess .= "Exit status: $status\n";
813 $mess .= "System Error Message: $!\n";
814
815 $gsdl_cgi->generate_error($mess);
[19499]816 }
817 else {
[27157]818 my $mess = "set-index-metadata successful: Key[$docid]\n";
[23761]819 $mess .= " $metaname";
820 $mess .= "->[$metapos]" if (defined $metapos);
[27157]821 $mess .= " = $metavalue\n";
[23761]822
823 $gsdl_cgi->generate_ok_message($mess);
[19499]824 }
[27157]825
826 #return $status; # in case calling functions have any further use for this
827}
828
829sub set_index_metadata
830{
831 my $self = shift @_;
832
833 my $username = $self->{'username'};
834 my $collect = $self->{'collect'};
835 my $gsdl_cgi = $self->{'gsdl_cgi'};
836 #my $gsdlhome = $self->{'gsdlhome'};
837
838 if ($baseaction::authentication_enabled) {
839 # Ensure the user is allowed to edit this collection
840 &authenticate_user($gsdl_cgi, $username, $collect);
841 }
842
843 my $site = $self->{'site'};
844 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
845
846 $gsdl_cgi->checked_chdir($collect_dir);
847
848 # Obtain the collect dir
849 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
850
851 # Make sure the collection isn't locked by someone else
852 $self->lock_collection($username, $collect);
853
854 $self->_set_index_metadata(@_);
[21715]855
856 # Release the lock once it is done
857 $self->unlock_collection($username, $collect);
[19499]858}
859
[27157]860# call this to set the metadata for a combination of dirs archives, import or index, or live
861# if none specified, defaults to index which was the original behaviour of set_metadata.
862sub set_metadata
863{
864 my $self = shift @_;
[19499]865
[27157]866 # Testing that not defining a variable, setting it to "" or to " " all return false
867 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
868
869 my $where = $self->{'where'};
870 if(!$where) {
871 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
872 return;
873 }
874
875 # authenticate and lock collection once, even if processing multiple dirs
876 my $username = $self->{'username'};
877 my $collect = $self->{'collect'};
878 my $gsdl_cgi = $self->{'gsdl_cgi'};
879
880 if ($baseaction::authentication_enabled) {
881 # Ensure the user is allowed to edit this collection
882 #&authenticate_user($gsdl_cgi, $username, $collect);
883 $self->authenticate_user($username, $collect);
884 }
885
886 if($where =~ m/index/) {
887 my $site = $self->{'site'};
888 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
889 $gsdl_cgi->checked_chdir($collect_dir);
890 }
891
892 # Make sure the collection isn't locked by someone else
893 $self->lock_collection($username, $collect);
894
895
896 # now at last can set the metadata. $where can specify multiple
897 # $where is of the form: import|archives|index, or a subset thereof
898
899 #my @whichdirs = split('\|', $where);
900
901 # just check whether $where contains import/archives/index/live in turn, and
902 # for each case, process it accordingly
903 if($where =~ m/import/) {
904 $self->_set_import_metadata(@_);
905 }
906
907 if($where =~ m/archives/) {
908
909 # look up docID arg which is optional to set_metadata because it's optional
910 # to set_import, but which is compulsory to set_archives_metadata
911 my $docid = $self->{'d'};
912 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[27168]913 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
[27157]914 }
[27168]915 # we have a docid, so can set archives meta
916 $self->_set_archives_metadata(@_);
[27157]917 }
918
919 if($where =~ m/index/) {
920
921 # look up docID arg which is optional to set_metadata because it's optional
922 # to set_import, but which is compulsory to set_archives_metadata
923 my $docid = $self->{'d'};
924 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
925 $gsdl_cgi->generate_error("No docid (d=...) specified.");
926 }
[27168]927 # we have a docid, so can set index meta
928 $self->_set_index_metadata(@_);
[27157]929 }
930
931 if($where =~ m/live/) {
932 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
933 }
934
935 # Release the lock once it is done
936 $self->unlock_collection($username, $collect);
937}
938
[24071]939sub set_metadata_array
940{
941 my $self = shift @_;
942
[27168]943 my $where = $self->{'where'};
944 if(!$where) {
945 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
946 return;
947 }
948
[24071]949 my $username = $self->{'username'};
950 my $collect = $self->{'collect'};
951 my $gsdl_cgi = $self->{'gsdl_cgi'};
952
953 if ($baseaction::authentication_enabled) {
954 # Ensure the user is allowed to edit this collection
955 &authenticate_user($gsdl_cgi, $username, $collect);
956 }
957
[27168]958 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
959 # But including the stmt during this code reorganisation to preserve as-is what used to happen
960 my $site = $self->{'site'};
961 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
962 $gsdl_cgi->checked_chdir($collect_dir);
963
964 # Make sure the collection isn't locked by someone else
965 $self->lock_collection($username, $collect);
966
967 if($where =~ m/import/) {
968 $self->_set_import_metadata_array(@_);
969 }
970 if($where =~ m/archives/) {
971 $self->_set_archives_metadata_array(@_);
972 }
973 if($where =~ m/index/) {
974 $self->_set_index_metadata_array(@_);
975 }
976 if($where =~ m/live/) {
977 $self->_set_live_metadata_array(@_);
978 }
979
980 # Release the lock once it is done
981 $self->unlock_collection($username, $collect);
982}
983
984sub _set_index_metadata_array
985{
986 my $self = shift @_;
987
988 my $collect = $self->{'collect'};
989 my $gsdl_cgi = $self->{'gsdl_cgi'};
990 my $site = $self->{'site'};
991 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
992
993
994 # look up additional args
995
996 my $infodbtype = $self->{'infodbtype'};
997
998 my $json_str = $self->{'json'};
999 my $doc_array = decode_json $json_str;
1000
1001
1002 my $global_status = 0;
1003 my $global_mess = "";
1004
1005 my @all_docids = ();
1006
1007 foreach my $doc_array_rec ( @$doc_array ) {
1008
1009 my $status = -1;
1010 my $docid = $doc_array_rec->{'docid'};
1011
1012 push(@all_docids,$docid);
1013
1014 my $metaname = $doc_array_rec->{'metaname'};
1015 if(defined $metaname) {
[27176]1016 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
[27168]1017 my $metavalue = $doc_array_rec->{'metavalue'};
1018 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1019
1020 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1021 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1022 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1023
1024 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1025 $metaname = $metatable_rec->{'metaname'};
1026 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1027 my $metapos = undef;
1028 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1029
1030 foreach my $metavalue ( @$metavals ) { # metavals is an array
1031 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1032 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1033 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1034 # metavalue for this metaname will be lost
1035 $metamode = "accumulate";
1036 }
1037 }
1038 }
1039 }
1040
1041 if ($status != 0) {
1042 # Catch error if set infodb entry failed
1043 $global_status = $status;
1044 $global_mess .= "Failed to set metadata key: $docid\n";
1045 $global_mess .= "Exit status: $status\n";
1046 $global_mess .= "System Error Message: $!\n";
1047 $global_mess .= "-" x 20;
1048 }
1049 }
1050
1051 if ($global_status != 0) {
1052 $global_mess .= "PATH: $ENV{'PATH'}\n";
1053 $gsdl_cgi->generate_error($global_mess);
1054 }
1055 else {
1056 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1057 $gsdl_cgi->generate_ok_message($mess);
1058 }
1059}
1060
1061sub set_index_metadata_array
1062{
1063 my $self = shift @_;
1064
1065 my $username = $self->{'username'};
1066 my $collect = $self->{'collect'};
1067 my $gsdl_cgi = $self->{'gsdl_cgi'};
1068# my $gsdlhome = $self->{'gsdlhome'};
1069
1070 if ($baseaction::authentication_enabled) {
1071 # Ensure the user is allowed to edit this collection
1072 &authenticate_user($gsdl_cgi, $username, $collect);
1073 }
1074
[24071]1075 my $site = $self->{'site'};
1076 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1077
1078 $gsdl_cgi->checked_chdir($collect_dir);
1079
1080 # Obtain the collect dir
1081 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1082
1083 # Make sure the collection isn't locked by someone else
1084 $self->lock_collection($username, $collect);
1085
[27168]1086 $self->_set_index_metadata_array(@_);
1087
1088 # Release the lock once it is done
1089 $self->unlock_collection($username, $collect);
1090}
1091
1092# experimental, newly added in and untested
1093sub _set_live_metadata_array
1094{
1095 my $self = shift @_;
1096
1097 my $collect = $self->{'collect'};
1098 my $gsdl_cgi = $self->{'gsdl_cgi'};
1099
1100 my $site = $self->{'site'};
1101 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1102
1103
[24071]1104 # look up additional args
[27168]1105 my $infodbtype = $self->{'infodbtype'};
1106 # To people who know $collect_tail please add some comments
1107 # Obtain path to the database
1108 my $collect_tail = $collect;
1109 $collect_tail =~ s/^.*[\/\\]//;
1110 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1111 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1112
1113
1114 my $json_str = $self->{'json'};
1115 my $doc_array = decode_json $json_str;
1116
1117
1118 my $global_status = 0;
1119 my $global_mess = "";
1120
1121 my @all_docids = ();
1122
1123
1124 foreach my $doc_array_rec ( @$doc_array ) {
[24071]1125
[27168]1126 my $status = -1;
1127 my $docid = $doc_array_rec->{'docid'};
1128
1129 push(@all_docids,$docid);
1130
1131 my $metaname = $doc_array_rec->{'metaname'};
1132 if(defined $metaname) {
1133 my $dbkey = "$docid.$metaname";
1134 my $metavalue = $doc_array_rec->{'metavalue'};
1135
1136 # Set the new value
[27176]1137 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
[27168]1138 $status = system($cmd);
1139
1140 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1141 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1142 foreach my $metatable_rec ( @$metatable ) {
1143 $metaname = $metatable_rec->{'metaname'};
1144 my $dbkey = "$docid.$metaname";
1145
1146 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1147 foreach my $metavalue ( @$metavals ) {
[27176]1148 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
[27168]1149 $status = system($cmd);
[24071]1150 }
[27168]1151 }
1152
[24071]1153 }
1154
[27168]1155 if ($status != 0) {
1156 # Catch error if gdbmget failed
1157 $global_status = $status;
1158 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1159 $global_mess .= "Exit status: $status\n";
1160 $global_mess .= "System Error Message: $!\n";
1161 $global_mess .= "-" x 20;
1162 }
[24071]1163 }
[27168]1164
1165 if ($global_status != 0) {
1166 $global_mess .= "PATH: $ENV{'PATH'}\n";
1167 $gsdl_cgi->generate_error($global_mess);
1168 }
[24071]1169 else {
[27168]1170 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1171 $gsdl_cgi->generate_ok_message($mess);
[24071]1172 }
[27168]1173}
1174
1175sub set_live_metadata_array
1176{
1177 my $self = shift @_;
1178
1179 my $username = $self->{'username'};
1180 my $collect = $self->{'collect'};
1181 my $gsdl_cgi = $self->{'gsdl_cgi'};
1182
1183 if ($baseaction::authentication_enabled) {
1184 # Ensure the user is allowed to edit this collection
1185 &authenticate_user($gsdl_cgi, $username, $collect);
1186 }
1187
1188 my $site = $self->{'site'};
1189 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1190
1191 $gsdl_cgi->checked_chdir($collect_dir);
1192
1193 # Make sure the collection isn't locked by someone else
1194 $self->lock_collection($username, $collect);
1195
1196 $self->_set_live_metadata_array(@_);
1197
[24071]1198 # Release the lock once it is done
1199 $self->unlock_collection($username, $collect);
1200}
1201
1202
[20538]1203sub dxml_metadata
1204{
1205 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1206 my $metaname = $parser->{'parameters'}->{'metaname'};
1207 my $metamode = $parser->{'parameters'}->{'metamode'};
[25891]1208
[27007]1209 print STDERR "**** Processing closing </Metadata> tag\n";
[23761]1210
1211 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1212
1213 # Find the right metadata tag and checks if we are going to
1214 # override it
1215 #
1216 # Note: This over writes the first metadata block it
1217 # encountered. If there are multiple Sections in the doc.xml, it
1218 # might not behave as you would expect
[20538]1219
[23761]1220 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1221## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1222## print STDERR "**** metamode = $metamode\n";
1223
[25097]1224 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1225 {
1226 my $name_attr = $attrHash->{'name'};
1227 if (($name_attr eq $metaname) && ($metamode eq "override"))
1228 {
1229 if (!defined $parser->{'parameters'}->{'poscount'})
1230 {
1231 $parser->{'parameters'}->{'poscount'} = 0;
1232 }
1233 else
1234 {
1235 $parser->{'parameters'}->{'poscount'}++;
1236 }
1237
1238 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1239 {
[27111]1240 ##print STDERR "#### got match!!\n";
[25097]1241 # Get the value and override the current value
1242 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1243 $attrHash->{'_content'} = $metavalue;
1244
1245 # Don't want it to wipe out any other pieces of metadata
1246 $parser->{'parameters'}->{'metamode'} = "done";
1247 }
[25891]1248 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
[25097]1249 {
[25102]1250 my $metavalue = $parser->{'parameters'}->{'metavalue'};
[25097]1251 $attrHash->{'_content'} = $metavalue;
1252 $parser->{'parameters'}->{'metamode'} = "done";
1253 }
1254 }
[20538]1255 }
1256
[21716]1257 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1258 return [$tagname => $attrHash];
[20538]1259}
1260
[27111]1261# This method exists purely for catching invalid section numbers that the client
1262# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1263# we've seen all the Sections in the doc.xml, and none of their section nums matched
1264# if the metamode has not been set to 'done' by then.
1265sub dxml_archive
1266{
1267 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1268 my $metamode = $parser->{'parameters'}->{'metamode'};
1269
1270 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1271 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1272
1273# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1274
1275 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
[27112]1276 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
[20538]1277
[27111]1278 my $metaname = $parser->{'parameters'}->{'metaname'};
1279 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1280
[27112]1281 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1282 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1283 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1284 print STDERR "\tin section $opt_doc_secnum.\n";
1285 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
[27111]1286 }
1287
1288 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1289 return [$tagname => $attrHash];
1290}
1291
[20538]1292sub dxml_description
1293{
[25558]1294 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1295 my $metamode = $parser->{'parameters'}->{'metamode'};
[20538]1296
[27111]1297 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
[27173]1298 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
[27111]1299
[27007]1300 print STDERR "**** Processing a closing </Description> tag \n";
[27111]1301# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
[25891]1302
[25558]1303 # Accumulate the metadata
[23761]1304
[27111]1305 # We'll be accumulating metadata at this point if we haven't found and therefore
1306 # haven't processed the metadata yet.
1307 # For subsections, this means that if we're at a matching subsection, but haven't
1308 # found the correct metaname to override in that subsection, we accumulate it as new
1309 # meta in the subsection by adding it to the current description.
1310 # If there's no subsection info for the metadata, it will accumulate at the top level
1311 # section description if we hadn't found a matching metaname to override at this point.
1312
1313 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1314 # at the toplevel section. In the latter case, it means we want to process meta in the
1315 # toplevel section. So the eq check between the values below will work in all cases.
1316
1317 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1318 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1319 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1320
1321 if ($curr_secnum eq $opt_doc_secnum
1322 && ($metamode eq "accumulate" || $metamode eq "override")) {
[27007]1323 if ($metamode eq "override") {
1324 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1325 }
[20538]1326
[27173]1327 # If we get to here and metamode is override, this means there
[27007]1328 # was no existing value to overide => treat as an append operation
1329
1330 # Tack a new metadata tag on to the end of the <Metadata>+ block
1331 my $metaname = $parser->{'parameters'}->{'metaname'};
1332 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1333
1334 my $metadata_attr = {
1335 '_content' => $metavalue,
1336 'name' => $metaname,
1337 'mode' => "accumulate"
1338 };
1339
1340 my $append_metadata = [ "Metadata" => $metadata_attr ];
1341 my $description_content = $attrHash->{'_content'};
1342
1343 print "Appending metadata to doc.xml\n";
1344
1345 if (ref($description_content)) {
1346 # got some existing interesting nested content
1347 push(@$description_content, " ", $append_metadata ,"\n ");
1348 }
1349 else {
1350 #description_content is most likely a string such as "\n"
1351 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1352 }
1353
1354 $parser->{'parameters'}->{'metamode'} = "done";
1355 }
[25891]1356 else {
[27007]1357 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1358## print STDERR "**** NOT ACCUMULATE?!? \n";
[25891]1359 }
[20538]1360
[21716]1361 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1362 return [$tagname => $attrHash];
[20538]1363}
1364
[21715]1365
[23761]1366sub dxml_start_section
1367{
1368 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1369
1370 my $new_depth = scalar(@$contextArray);
1371
[25891]1372 print STDERR "**** START SECTION \n";
1373
[23761]1374 if ($new_depth == 1) {
1375 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1376 $parser->{'parameters'}->{'curr_section_num'} = "";
1377 }
1378
1379 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1380 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1381
1382 my $new_secnum;
1383
1384 if ($new_depth > $old_depth) {
1385 # child subsection
1386 $new_secnum = "$old_secnum.1";
1387 }
1388 elsif ($new_depth == $old_depth) {
1389 # sibling section => increase it's value by 1
1390 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1391 $tail_num++;
1392 $new_secnum = $old_secnum;
1393 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1394 }
1395 else {
1396 # back up to parent section => lopp off tail
1397 $new_secnum = $old_secnum;
1398 $new_secnum =~ s/\.\d+$//;
1399 }
1400
1401 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1402 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
[25891]1403
1404 1;
[23761]1405}
1406
[20538]1407sub edit_xml_file
1408{
1409 my $self = shift @_;
[23761]1410 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
[20538]1411
1412 # use XML::Rules to add it in (read in and out again)
[23761]1413 my $parser = XML::Rules->new(start_rules => $start_rules,
1414 rules => $rules,
1415 style => 'filter',
1416 output_encoding => 'utf8' );
[20538]1417
1418 my $xml_in = "";
1419 if (!open(MIN,"<$filename")) {
1420 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1421 }
1422 else {
[21715]1423 # Read all the text in
[20538]1424 my $line;
1425 while (defined ($line=<MIN>)) {
1426 $xml_in .= $line;
1427 }
1428 close(MIN);
1429
[23761]1430 my $MOUT;
1431 if (!open($MOUT,">$filename")) {
[20538]1432 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1433 }
1434 else {
[23761]1435 # Matched lines will get handled by the call backs
1436## my $xml_out = "";
1437
1438 binmode($MOUT,":utf8");
1439 $parser->filter($xml_in,$MOUT, $options);
1440
1441# binmode(MOUT,":utf8");
1442# print MOUT $xml_out;
1443 close($MOUT);
[20538]1444 }
1445 }
[27112]1446
1447 # copy across any custom error information that was stored during parsing
1448 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
[20538]1449}
1450
1451sub edit_doc_xml
1452{
1453 my $self = shift @_;
[25097]1454 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
[20538]1455
[27007]1456 my $info_mess = <<RAWEND;
1457****************************
1458 edit_doc_xml()
1459****************************
1460doc_xml_filename = $doc_xml_filename
1461metaname = $metaname
1462metavalue = $metavalue
1463metapos = $metapos
1464metamode = $metamode
1465opt_secnum = $opt_secnum
1466prevmetavalue = $prevmetavalue
1467****************************
1468RAWEND
1469
1470 $gsdl_cgi->generate_message($info_mess);
[25891]1471
[23761]1472 # To monitor which section/subsection number we are in
1473 my @start_rules =
1474 ( 'Section' => \&dxml_start_section );
1475
[20538]1476 # use XML::Rules to add it in (read in and out again)
[21715]1477 # Set the call back functions
[20538]1478 my @rules =
[21716]1479 ( _default => 'raw',
[23761]1480 'Metadata' => \&dxml_metadata,
[27111]1481 'Description' => \&dxml_description,
1482 'Archive' => \&dxml_archive); # just for catching errors at end
[20538]1483
[21715]1484 # Sets the parameters
[20538]1485 my $options = { 'metaname' => $metaname,
1486 'metapos' => $metapos,
[23400]1487 'metavalue' => $metavalue,
[25097]1488 'metamode' => $metamode,
1489 'prevmetavalue' => $prevmetavalue };
[23400]1490
[23761]1491 if (defined $opt_secnum) {
1492 $options->{'secnum'} = $opt_secnum;
1493 }
1494
1495 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
[20538]1496}
1497
[24071]1498sub set_archives_metadata_entry
1499{
1500 my $self = shift @_;
[25891]1501 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
[24071]1502
[27007]1503 my $info_mess = <<RAWEND;
1504****************************
1505 set_archives_metadata_entry()
1506****************************
1507archive_dir = $archive_dir
1508collect_dir = $collect_dir
1509collect = $collect
1510infodbtype = $infodbtype
1511docid = $docid
1512metaname = $metaname
1513metapos = $metapos
1514metavalue = $metavalue
1515metamode = $metamode
1516prevmetavalue = $prevmetavalue
1517****************************
1518RAWEND
1519
1520 $gsdl_cgi->generate_message($info_mess);
[25891]1521
[24071]1522 # Obtain the doc.xml path for the specified docID
1523 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
[20538]1524
[24071]1525 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1526 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1527 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1528
1529 # The $doc_xml_file is relative to the archives, and now let's get the full path
1530 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1531 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
[27167]1532
1533 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
1534 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1535 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1536 if(!defined $metapos && $metamode eq "override") {
1537 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, $metapos, undef, $docid_secnum, $metamode);
1538 }
[27168]1539
[24071]1540 # Edit the doc.xml file with the specified metadata name, value and position.
1541 # TODO: there is a potential problem here as this edit_doc_xml function
1542 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1543 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1544
[27168]1545 # dxml_metadata method ignores metapos if metamode anything other than override
[24071]1546 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
[25097]1547 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
[27112]1548
1549 # return 0; # return 0 for now to indicate no error
1550 return (defined $self->{'error_msg'}) ? 1 : 0;
[24071]1551}
1552
1553
[20538]1554sub set_archives_metadata
1555{
1556 my $self = shift @_;
1557
1558 my $username = $self->{'username'};
1559 my $collect = $self->{'collect'};
1560 my $gsdl_cgi = $self->{'gsdl_cgi'};
[23400]1561
[20538]1562 if ($baseaction::authentication_enabled) {
[27168]1563 # Ensure the user is allowed to edit this collection
1564 $self->authenticate_user($username, $collect);
[20538]1565 }
1566
1567 # Make sure the collection isn't locked by someone else
1568 $self->lock_collection($username, $collect);
1569
[27157]1570 $self->_set_archives_metadata(@_);
[23400]1571
[24071]1572 # Release the lock once it is done
1573 $self->unlock_collection($username, $collect);
1574}
1575
[27168]1576sub _set_archives_metadata_array
1577{
1578 my $self = shift @_;
1579
1580 my $collect = $self->{'collect'};
1581 my $gsdl_cgi = $self->{'gsdl_cgi'};
1582 my $site = $self->{'site'};
1583 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[24071]1584
[27168]1585 # look up additional args
1586
1587 my $infodbtype = $self->{'infodbtype'};
1588
1589 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1590
1591 my $json_str = $self->{'json'};
1592 my $doc_array = decode_json $json_str;
1593
1594
1595 my $global_status = 0;
1596 my $global_mess = "";
1597
1598 my @all_docids = ();
1599
1600 foreach my $doc_array_rec ( @$doc_array ) {
1601 my $status = -1;
1602 my $docid = $doc_array_rec->{'docid'};
1603
1604 push(@all_docids,$docid);
1605
1606 my $metaname = $doc_array_rec->{'metaname'};
1607 if(defined $metaname) {
1608
[27176]1609 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1610
[27168]1611 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1612 my $metavalue = $doc_array_rec->{'metavalue'};
1613 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1614
1615
1616 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1617 # make "accumulate" the default (less destructive, as it won't actually
1618 # delete any existing values)
1619 $metamode = "accumulate";
1620 }
1621
1622 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1623 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1624 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1625 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1626
1627 foreach my $metatable_rec ( @$metatable ) {
1628 $metaname = $metatable_rec->{'metaname'};
1629 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1630 my $metapos = undef;
1631 my $prevmetavalue = undef;
1632 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1633
1634 foreach my $metavalue ( @$metavals ) {
1635 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1636 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1637
1638 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1639 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1640 # metavalue for this metaname will be lost
1641 $metamode = "accumulate";
1642 }
1643 }
1644 }
1645 }
1646
1647 if ($status != 0) {
1648 # Catch error if set infodb entry failed
1649 $global_status = $status;
1650 $global_mess .= "Failed to set metadata key: $docid\n";
1651 $global_mess .= "Exit status: $status\n";
1652 $global_mess .= "System Error Message: $!\n";
1653 $global_mess .= "-" x 20 . "\n";
1654 }
1655 }
1656
1657 if ($global_status != 0) {
1658 $global_mess .= "PATH: $ENV{'PATH'}\n";
1659 $gsdl_cgi->generate_error($global_mess);
1660 }
1661 else {
1662 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1663 $gsdl_cgi->generate_ok_message($mess);
1664 }
1665}
1666
[24071]1667sub set_archives_metadata_array
1668{
1669 my $self = shift @_;
1670
1671 my $username = $self->{'username'};
1672 my $collect = $self->{'collect'};
1673 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27168]1674# my $gsdlhome = $self->{'gsdlhome'};
[24071]1675
1676 if ($baseaction::authentication_enabled) {
1677 # Ensure the user is allowed to edit this collection
1678 &authenticate_user($gsdl_cgi, $username, $collect);
[23400]1679 }
[23761]1680
[24071]1681 my $site = $self->{'site'};
1682 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1683
1684 $gsdl_cgi->checked_chdir($collect_dir);
1685
1686 # Obtain the collect dir
1687 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1688
1689 # Make sure the collection isn't locked by someone else
1690 $self->lock_collection($username, $collect);
1691
[27168]1692 $self->_set_archives_metadata_array(@_);
[20538]1693
[23761]1694 # Release the lock once it is done
1695 $self->unlock_collection($username, $collect);
[20538]1696}
1697
[27157]1698sub _remove_archives_metadata
[24943]1699{
1700 my $self = shift @_;
[20538]1701
[24943]1702 my $collect = $self->{'collect'};
1703 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27157]1704# my $gsdlhome = $self->{'gsdlhome'};
[24943]1705 my $infodbtype = $self->{'infodbtype'};
1706
1707 my $site = $self->{'site'};
1708
1709 # Obtain the collect and archive dir
1710 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1711
1712 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1713
1714 # look up additional args
1715 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1716
1717 my $metaname = $self->{'metaname'};
1718 my $metapos = $self->{'metapos'};
[27168]1719
[27160]1720 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1721
1722 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1723 # default to deleting the first metavalue for the given metaname
1724 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1725 # seem to get deleted in one single remove_archives_meta action invocation.
1726 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1727 # deleted, both get deleted.
1728 if(!defined $metapos && !defined $metavalue) {
1729 $metapos = 0;
1730 }
[27167]1731
[27168]1732 my $metamode = $self->{'metamode'} || undef;
1733
[24943]1734 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1735 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1736
1737 # This now stores the full pathname
[25097]1738 my $doc_filename = $doc_rec->{'doc-file'}->[0];
[24943]1739
[27167]1740 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
[27160]1741# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
[24943]1742
1743 if ($status == 0)
1744 {
1745 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1746 $mess .= " $metaname";
1747 $mess .= "->[$metapos]" if (defined $metapos);
1748
1749 $gsdl_cgi->generate_ok_message($mess);
1750 }
1751 else
1752 {
1753 my $mess .= "Failed to remove archives metadata key: $docid\n";
1754 $mess .= "Exit status: $status\n";
1755 $mess .= "System Error Message: $!\n";
1756 $mess .= "-" x 20 . "\n";
1757
1758 $gsdl_cgi->generate_error($mess);
1759 }
[27157]1760
1761 #return $status; # in case calling functions have a use for this
[24943]1762}
1763
[27157]1764sub remove_archives_metadata
1765{
1766 my $self = shift @_;
1767
1768 my $username = $self->{'username'};
1769 my $collect = $self->{'collect'};
1770 my $gsdl_cgi = $self->{'gsdl_cgi'};
1771
1772 if ($baseaction::authentication_enabled)
1773 {
1774 # Ensure the user is allowed to edit this collection
1775 &authenticate_user($gsdl_cgi, $username, $collect);
1776 }
1777
1778 # Make sure the collection isn't locked by someone else
1779 $self->lock_collection($username, $collect);
1780
1781 $self->_remove_archives_metadata(@_);
1782
1783 # Release the lock once it is done
1784 $self->unlock_collection($username, $collect);
1785}
1786
[24943]1787sub remove_from_doc_xml
1788{
1789 my $self = shift @_;
[27167]1790 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
[24943]1791
1792 my @start_rules = ('Section' => \&dxml_start_section);
1793
1794 # Set the call-back functions for the metadata tags
1795 my @rules =
1796 (
1797 _default => 'raw',
1798 'Metadata' => \&rfdxml_metadata
1799 );
1800
1801 my $parser = XML::Rules->new
1802 (
1803 start_rules => \@start_rules,
1804 rules => \@rules,
1805 style => 'filter',
[27173]1806 output_encoding => 'utf8',
1807# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1808 stripspaces => 2|0|0 # ineffectual
[24943]1809 );
1810
1811 my $status = 0;
1812 my $xml_in = "";
1813 if (!open(MIN,"<$doc_xml_filename"))
1814 {
1815 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1816 $status = 1;
1817 }
1818 else
1819 {
1820 # Read them in
1821 my $line;
1822 while (defined ($line=<MIN>)) {
1823 $xml_in .= $line;
1824 }
1825 close(MIN);
1826
1827 # Filter with the call-back functions
1828 my $xml_out = "";
1829
1830 my $MOUT;
1831 if (!open($MOUT,">$doc_xml_filename")) {
1832 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1833 $status = 1;
1834 }
1835 else {
1836 binmode($MOUT,":utf8");
[27167]1837 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
[24943]1838 close($MOUT);
1839 }
1840 }
1841 return $status;
1842}
1843
1844sub rfdxml_metadata
1845{
1846 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1847
[27173]1848 # For comparisons, toplevel section is indicated by ""
1849 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1850 my $secid = $parser->{'parameters'}->{'secid'} || "";
1851
1852 if (!($secid eq $curr_sec_num))
[24943]1853 {
1854 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1855 return [$tagname => $attrHash];
1856 }
1857
1858 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1859 {
1860 if (!defined $parser->{'parameters'}->{'poscount'})
1861 {
1862 $parser->{'parameters'}->{'poscount'} = 0;
1863 }
1864 else
1865 {
1866 $parser->{'parameters'}->{'poscount'}++;
1867 }
[27168]1868
[27176]1869 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
[27167]1870 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
1871 return [];
1872 }
[24943]1873 }
[27168]1874
[24943]1875 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1876 {
1877 return [];
1878 }
[27168]1879
[24949]1880 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1881 {
1882 return [];
1883 }
1884
[24943]1885 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1886 return [$tagname => $attrHash];
1887}
1888
[19293]1889sub mxml_metadata
1890{
1891 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1892 my $metaname = $parser->{'parameters'}->{'metaname'};
1893 my $metamode = $parser->{'parameters'}->{'metamode'};
1894
[21716]1895 # Report error if we don't see FileName tag before this
[27173]1896 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
[21716]1897
1898 # Don't do anything if we are not in the right FileSet
1899 my $file_regexp = $parser->{'parameters'}->{'current_file'};
[23761]1900 if ($file_regexp =~ /\.\*/) {
1901 # Only interested in a file_regexp if it specifies precisely one
1902 # file.
1903 # So, skip anything with a .* in it as it is too general
[27176]1904## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
[23761]1905 return [$tagname => $attrHash];
1906 }
1907 my $src_file = $parser->{'parameters'}->{'src_file'};
1908 if (!($src_file =~ /$file_regexp/)) {
1909 return [$tagname => $attrHash];
1910 }
1911## print STDERR "*** mxl metamode = $metamode\n";
1912
[21715]1913 # Find the right metadata tag and checks if we are going to override it
[19293]1914 my $name_attr = $attrHash->{'name'};
1915 if (($name_attr eq $metaname) && ($metamode eq "override")) {
[27176]1916
1917 # now metadata.xml functions need to keep track of metapos
1918 if (!defined $parser->{'parameters'}->{'poscount'})
1919 {
1920 $parser->{'parameters'}->{'poscount'} = 0;
1921 }
1922 else
1923 {
1924 $parser->{'parameters'}->{'poscount'}++;
1925 }
1926
1927 # If either the metapos or prevmetavalue is set,
1928 # get the value and override the current value
[19293]1929 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1930
[27176]1931 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1932 {
1933 $attrHash->{'_content'} = $metavalue;
[23761]1934
[27176]1935 ## print STDERR "**** overriding metadata.xml\n";
1936
1937 # Don't want it to wipe out any other pieces of metadata
1938 $parser->{'parameters'}->{'metamode'} = "done";
1939 }
1940 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1941 {
1942 $attrHash->{'_content'} = $metavalue;
1943 $parser->{'parameters'}->{'metamode'} = "done";
1944 }
1945 }
[19293]1946
[27176]1947 # mxml_description will process the metadata if metadata is accumulate,
1948 # or if we haven't found the metadata to override
1949
[21716]1950 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1951 return [$tagname => $attrHash];
[19293]1952}
1953
1954
1955sub mxml_description
1956{
1957 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
[21716]1958 my $metamode = $parser->{'parameters'}->{'metamode'};
[19293]1959
[21716]1960 # Failed... Report error if we don't see FileName tag before this
[25097]1961 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
[21716]1962
1963 # Don't do anything if we are not in the right FileSet
1964 my $file_regexp = $parser->{'parameters'}->{'current_file'};
[24943]1965 if ($file_regexp =~ m/\.\*/) {
[23761]1966 # Only interested in a file_regexp if it specifies precisely one
1967 # file.
1968 # So, skip anything with a .* in it as it is too general
1969 return [$tagname => $attrHash];
1970 }
1971 my $src_file = $parser->{'parameters'}->{'src_file'};
[24943]1972
1973 if (!($src_file =~ m/$file_regexp/)) {
[23761]1974 return [$tagname => $attrHash];
1975 }
[21716]1976
[21715]1977 # Accumulate the metadata block to the end of the description block
1978 # Note: This adds metadata block to all description blocks, so if there are
1979 # multiple FileSets, it will add to all of them
[23761]1980 if (($metamode eq "accumulate") || ($metamode eq "override")) {
[27176]1981
[23761]1982 # if metamode was "override" but get to here then it failed to
1983 # find an item to override, in which case it should append its
1984 # value to the end, just like the "accumulate" mode
1985
[27176]1986 if ($metamode eq "override") {
1987 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1988 }
1989
[19293]1990 # tack a new metadata tag on to the end of the <Metadata>+ block
1991 my $metaname = $parser->{'parameters'}->{'metaname'};
1992 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1993
1994 my $metadata_attr = { '_content' => $metavalue,
1995 'name' => $metaname,
1996 'mode' => "accumulate" };
1997
1998 my $append_metadata = [ "Metadata" => $metadata_attr ];
1999 my $description_content = $attrHash->{'_content'};
[24943]2000
[23761]2001## print STDERR "*** appending to metadata.xml\n";
2002
2003 # append the new metadata element to the end of the current
2004 # content contained inside this tag
[24943]2005 if (ref($description_content) eq "") {
2006 # => string or numeric literal
2007 # this is caused by a <Description> block has no <Metadata> child elements
2008 # => set up an empty array in '_content'
2009 $attrHash->{'_content'} = [ "\n" ];
2010 $description_content = $attrHash->{'_content'};
2011 }
2012
[19293]2013 push(@$description_content," ", $append_metadata ,"\n ");
[23761]2014 $parser->{'parameters'}->{'metamode'} = "done";
[19293]2015 }
2016
[21716]2017 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2018 return [$tagname => $attrHash];
[19293]2019}
2020
[21715]2021
[21716]2022sub mxml_filename
2023{
2024 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2025
2026 # Store the filename of the Current Fileset
2027 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2028 # FileName tag must come before Description tag
2029 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2030
2031 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2032 return [$tagname => $attrHash];
2033}
2034
2035
2036sub mxml_fileset
2037{
2038 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2039
2040 # Initilise the current_file
2041 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2042 # FileName tag must come before Description tag
2043 $parser->{'parameters'}->{'current_file'} = "";
2044
2045 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2046 return [$tagname => $attrHash];
2047}
2048
[27173]2049sub mxml_directorymetadata
2050{
2051 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
[21716]2052
[27173]2053 # if we haven't processed the metadata when we reach the end of metadata.xml
2054 # it's because there's no particular FileSet element whose FileName matched
2055 # In which case, add a new FileSet for this FileName
2056 my $metamode = $parser->{'parameters'}->{'metamode'};
2057 if($metamode ne "done") {
2058
2059 if ($metamode eq "override") {
2060 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2061 }
2062
2063 # If we get to here and metamode is override, this means there
2064 # was no existing value to overide => treat as an append operation
2065
2066 # Create a new FileSet element and append to DirectoryMetadata
2067 # <FileSet>
2068 # <FileName>src_file</FileName>
2069 # <Description>
2070 # <Metadata mode="" name="">metavalue</Metadata>
2071 # </Description>
2072 # </FileSet>
2073 my $src_file = $parser->{'parameters'}->{'src_file'};
2074 my $metaname = $parser->{'parameters'}->{'metaname'};
2075 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2076 my $metadata_attr = {
2077 '_content' => $metavalue,
2078 'name' => $metaname,
2079 'mode' => "accumulate"
2080 };
2081 my $append_metadata = [ "Metadata" => $metadata_attr ];
2082 my $description_attr->{'_content'} = [ "\n\t\t ", $append_metadata, "\n\t\t"];
2083 my $description_element = [ "Description" => $description_attr ];
2084
2085 #_content is not an attribute, it's special and holds the children of this element
2086 # including the textnode value embedded in this element if any.
2087 my $filename_attr = {'_content' => $src_file};
2088 my $filename_element = [ "FileName" => $filename_attr ];
2089
2090 my $fileset_attr = {};
2091 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2092 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2093
2094
2095 # get children of dirmeta, and push the new FileSet element onto it
2096 print "Appending metadata to metadata.xml\n";
2097 my $dirmeta_content = $attrHash->{'_content'};
2098 if (ref($dirmeta_content)) {
2099 # got some existing interesting nested content
2100 #push(@$dirmeta_content, " ", $fileset ,"\n ");
2101 push(@$dirmeta_content, "\t", $fileset ,"\n");
2102 }
2103 else {
2104 #description_content is most likely a string such as "\n"
2105 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
2106 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2107 }
2108
2109 $parser->{'parameters'}->{'metamode'} = "done";
2110 }
2111 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2112 return [$tagname => $attrHash];
2113}
2114
2115
[19293]2116sub edit_metadata_xml
2117{
2118 my $self = shift @_;
[27176]2119 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
[19293]2120
[21715]2121 # Set the call-back functions for the metadata tags
[19293]2122 my @rules =
[21716]2123 ( _default => 'raw',
2124 'FileName' => \&mxml_filename,
[19293]2125 'Metadata' => \&mxml_metadata,
[21716]2126 'Description' => \&mxml_description,
[27173]2127 'FileSet' => \&mxml_fileset,
2128 'DirectoryMetadata' => \&mxml_directorymetadata);
[19293]2129
[21715]2130 # use XML::Rules to add it in (read in and out again)
[19293]2131 my $parser = XML::Rules->new(rules => \@rules,
[21716]2132 style => 'filter',
[27173]2133 output_encoding => 'utf8',
2134 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
[19293]2135
[24943]2136 if (!-e $metadata_xml_filename) {
2137
2138 if (open(MOUT,">$metadata_xml_filename")) {
2139
2140 my $src_file_re = &util::filename_to_regex($src_file);
2141 # shouldn't the following also be in the above utility routine??
2142 # $src_file_re =~ s/\./\\./g;
2143
2144 print MOUT "<?xml version=\"1.0\"?>\n";
2145 print MOUT "<DirectoryMetadata>\n";
2146 print MOUT " <FileSet>\n";
2147 print MOUT " <FileName>$src_file_re</FileName>\n";
2148 print MOUT " <Description>\n";
2149 print MOUT " </Description>\n";
2150 print MOUT " </FileSet>\n";
2151 print MOUT "</DirectoryMetadata>\n";
[23761]2152
[24943]2153 close(MOUT);
2154 }
2155 else {
2156 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2157 }
[19293]2158 }
[24943]2159
2160
2161 my $xml_in = "";
2162 if (!open(MIN,"<$metadata_xml_filename")) {
2163 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2164 }
[19293]2165 else {
[24943]2166 # Read them in
2167 my $line;
2168 while (defined ($line=<MIN>)) {
2169 $xml_in .= $line;
2170 }
2171 close(MIN);
[23761]2172
[24943]2173 # Filter with the call-back functions
2174 my $xml_out = "";
[23761]2175
[24943]2176 my $MOUT;
2177 if (!open($MOUT,">$metadata_xml_filename")) {
2178 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2179 }
2180 else {
2181 binmode($MOUT,":utf8");
2182
2183 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2184 # At the moment, I will just hack it!
2185 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2186 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2187 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2188 #print MOUT $xml_out;
2189
2190 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
[27176]2191 metapos => $metapos,
[24943]2192 metavalue => $metavalue,
2193 metamode => $metamode,
2194 src_file => $src_file,
[27176]2195 prevmetavalue => $prevmetavalue,
[24943]2196 current_file => undef} );
2197 close($MOUT);
2198 }
2199 }
[20538]2200}
[19293]2201
2202
2203sub set_import_metadata
2204{
2205 my $self = shift @_;
[21715]2206
[19293]2207 my $username = $self->{'username'};
2208 my $collect = $self->{'collect'};
2209 my $gsdl_cgi = $self->{'gsdl_cgi'};
[23400]2210
[19293]2211 if ($baseaction::authentication_enabled) {
[25097]2212 # Ensure the user is allowed to edit this collection
2213 $self->authenticate_user($username, $collect);
[19293]2214 }
2215
2216 # Make sure the collection isn't locked by someone else
2217 $self->lock_collection($username, $collect);
[27157]2218
2219 $self->_set_import_metadata(@_);
[19293]2220
[21715]2221 # Release the lock once it is done
2222 $self->unlock_collection($username, $collect);
[23761]2223
[19293]2224}
2225
[25097]2226sub set_import_metadata_array
2227{
[27168]2228 my $self = shift @_;
[25097]2229
2230 my $username = $self->{'username'};
2231 my $collect = $self->{'collect'};
2232 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27168]2233# my $gsdlhome = $self->{'gsdlhome'};
[25097]2234
2235 if ($baseaction::authentication_enabled) {
[27168]2236 # Ensure the user is allowed to edit this collection
2237 &authenticate_user($gsdl_cgi, $username, $collect);
[25097]2238 }
2239
[27168]2240 my $site = $self->{'site'};
2241 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2242
[25097]2243 $gsdl_cgi->checked_chdir($collect_dir);
2244
2245 # Make sure the collection isn't locked by someone else
2246 $self->lock_collection($username, $collect);
2247
[27168]2248 $self->_set_import_metadata_array(@_);
[25097]2249
[27168]2250 # Release the lock once it is done
2251 $self->unlock_collection($username, $collect);
2252
2253}
2254
2255
2256sub _set_import_metadata_array
2257{
2258 my $self = shift @_;
2259
2260 my $collect = $self->{'collect'};
2261 my $gsdl_cgi = $self->{'gsdl_cgi'};
2262
2263 my $site = $self->{'site'};
2264 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[25097]2265
[27168]2266 # look up additional args
[25097]2267
[27168]2268 my $infodbtype = $self->{'infodbtype'};
2269
2270 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2271 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2272
2273 my $json_str = $self->{'json'};
2274 my $doc_array = decode_json $json_str;
2275
2276 my $global_status = 0;
2277 my $global_mess = "";
2278
2279 my @all_docids = ();
2280
2281 foreach my $doc_array_rec ( @$doc_array )
2282 {
2283 my $status = -1;
2284 my $docid = $doc_array_rec->{'docid'};
[25097]2285
[27176]2286 my ($docid_root,$docid_secnum);
2287 if(defined $docid) {
2288 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2289 # as yet no support for setting subsection metadata in metadata.xml
2290 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2291 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2292 next; # skip this docid in for loop
2293 }
2294 }
2295
2296 push(@all_docids,$docid); # docid_root rather
[25097]2297
[27168]2298 my $metaname = $doc_array_rec->{'metaname'};
2299 if (defined $metaname) {
2300 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2301 my $metavalue = $doc_array_rec->{'metavalue'};
[27176]2302 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2303
[27168]2304 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2305 # make "accumulate" the default (less destructive, as won't actually
2306 # delete any existing values)
2307 $metamode = "accumulate";
2308 }
[27176]2309
2310 # adding metapos and prevmetavalue support to import_metadata subroutines
2311 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2312 my $prevmetavalue = $self->{'prevmetavalue'};
2313
2314 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
[27168]2315
2316 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2317 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2318
2319 foreach my $metatable_rec ( @$metatable ) {
2320 $metaname = $metatable_rec->{'metaname'};
2321 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
[25097]2322 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
[27168]2323 # make "accumulate" the default (less destructive, as won't actually
2324 # delete any existing values)
2325 $metamode = "accumulate";
2326 }
[27176]2327
2328 # No support for metapos and prevmetavalue in the JSON metatable substructure
2329 my $metapos = undef;
2330 my $prevmetavalue = undef;
[27168]2331 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2332
2333 foreach my $metavalue ( @$metavals ) {
[27176]2334 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2335
2336 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
[27168]2337 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2338 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2339 # metavalue for this metaname will be lost
[25097]2340 $metamode = "accumulate";
[27168]2341 }
2342 }
2343 }
2344 }
2345 }
[25097]2346
[27168]2347 # always a success message
2348 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2349 $gsdl_cgi->generate_ok_message($mess);
2350}
[25097]2351
[27168]2352# always returns true (1)
2353sub set_import_metadata_entry
2354{
2355 my $self = shift @_;
[27176]2356 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
[25097]2357
[27176]2358 my $info_mess = <<RAWEND;
2359****************************
2360 set_import_metadata_entry()
2361****************************
2362collect = $collect
2363collect_dir = $collect_dir
2364infodbtype = $infodbtype
2365arcinfo_doc_filename = $arcinfo_doc_filename
2366docid = $docid
2367metaname = $metaname
2368metapos = $metapos
2369metavalue = $metavalue
2370metamode = $metamode
2371prevmetavalue = $prevmetavalue
2372****************************
2373RAWEND
2374
2375 $gsdl_cgi->generate_message($info_mess);
2376
2377 # import works with metadata.xml which can have inherited metadata
2378 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2379 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2380 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2381 # what they're doing if they provide a metapos.
2382 if(defined $metapos) {
2383 print STDERR "@@@@ WARNING: metapos defined.\n";
2384 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2385 }
2386
[27168]2387 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2388 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2389 my $metadata_xml_file;
2390 my $import_filename = undef;
2391
[27176]2392 if (defined $docid) {
2393 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2394 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[25097]2395
[27176]2396 # This now stores the full pathname
2397 $import_filename = $doc_rec->{'src-file'}->[0];
2398 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2399 # as the array version of the method doesn't support the -f parameter yet
2400 my $import_file = $self->{'f'};
2401 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2402 }
[25097]2403
[27168]2404 # figure out correct metadata.xml file [?]
2405 # Assuming the metadata.xml file is next to the source file
2406 # Note: This will not work if it is using the inherited metadata from the parent folder
2407 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2408 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2409
[27176]2410 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
2411 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2412 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2413 if(!defined $metapos && $metamode eq "override") {
2414## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2415 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2416
2417 }
2418
2419 # Edit the metadata.xml
2420 # Modified by Jeffrey from DL Consulting
2421 # Handle the case where there is one metadata.xml file for multiple FileSets
2422 # The XML filter needs to know whether it is in the right FileSet
2423 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2424 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2425 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2426 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2427 #return 0;
2428 return $metadata_xml_filename;
[25097]2429}
2430
[27157]2431sub _remove_import_metadata
[24943]2432{
2433 my $self = shift @_;
[27157]2434
[24943]2435 my $collect = $self->{'collect'};
2436 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27168]2437# my $gsdlhome = $self->{'gsdlhome'};
[24943]2438 my $infodbtype = $self->{'infodbtype'};
2439
2440 # Obtain the collect dir
2441 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2442 my $site = $self->{'site'};
2443 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2444
2445 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2446 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
[27157]2447
[24943]2448 # look up additional args
2449 my $docid = $self->{'d'};
2450 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2451 {
[27173]2452 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
[24943]2453 }
2454
2455 my $metaname = $self->{'metaname'};
[27176]2456 my $metapos = $self->{'metapos'};
[24943]2457 my $metavalue = $self->{'metavalue'};
[27176]2458 if(defined $metavalue) {
2459 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2460 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2461 $metapos = 0;
2462 }
2463 my $metamode = $self->{'metamode'} || undef;
2464
2465 # import works with metadata.xml which can have inherited metadata
2466 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2467 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2468 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2469 # what they're doing if they provide a metapos.
2470 if(defined $metapos) {
2471 print STDERR "@@@@ WARNING: metapos defined.\n";
2472 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2473 }
[24943]2474
2475 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2476 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2477 my $metadata_xml_file;
2478 my $import_filename = undef;
2479 if (defined $docid)
2480 {
2481 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2482 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2483
2484 # This now stores the full pathname
2485 $import_filename = $doc_rec->{'src-file'}->[0];
2486 }
2487
2488 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2489 {
[27173]2490 $gsdl_cgi->generate_error("There is no metadata\n");
[24943]2491 }
2492
2493 # figure out correct metadata.xml file [?]
2494 # Assuming the metadata.xml file is next to the source file
2495 # Note: This will not work if it is using the inherited metadata from the parent folder
2496 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2497 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2498
[27176]2499 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
[24943]2500
2501 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2502 $mess .= " $metaname";
2503 $mess .= " = $metavalue\n";
2504
2505 $gsdl_cgi->generate_ok_message($mess);
[27157]2506
2507 #return $status; # in case calling functions have a use for this
[24943]2508}
2509
[27157]2510sub remove_import_metadata
2511{
2512 my $self = shift @_;
2513
2514 my $username = $self->{'username'};
2515 my $collect = $self->{'collect'};
2516 my $gsdl_cgi = $self->{'gsdl_cgi'};
2517
2518 if ($baseaction::authentication_enabled) {
2519 # Ensure the user is allowed to edit this collection
2520 &authenticate_user($gsdl_cgi, $username, $collect);
2521 }
2522
2523 # Make sure the collection isn't locked by someone else
2524 $self->lock_collection($username, $collect);
2525
2526 $self->_remove_import_metadata(@_);
2527
2528 # Release the lock once it is done
2529 $self->unlock_collection($username, $collect);
2530
2531}
2532
[24943]2533sub remove_from_metadata_xml
2534{
2535 my $self = shift @_;
[27176]2536 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2537 # metamode generally has no meaning for removing meta, but is used by set_meta
2538 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2539
[24943]2540 # Set the call-back functions for the metadata tags
2541 my @rules =
2542 (
2543 _default => 'raw',
2544 'Metadata' => \&rfmxml_metadata,
2545 'FileName' => \&mxml_filename
2546 );
2547
2548 my $parser = XML::Rules->new
2549 (
2550 rules => \@rules,
2551 style => 'filter',
[27173]2552 output_encoding => 'utf8',
2553 #normalisespaces => 1,
2554 stripspaces => 2|0|0 # ineffectual
[24943]2555 );
2556
2557 my $xml_in = "";
2558 if (!open(MIN,"<$metadata_xml_filename"))
2559 {
2560 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2561 }
2562 else
2563 {
2564 # Read them in
2565 my $line;
2566 while (defined ($line=<MIN>)) {
2567 $xml_in .= $line;
2568 }
2569 close(MIN);
2570
2571 # Filter with the call-back functions
2572 my $xml_out = "";
2573
2574 my $MOUT;
2575 if (!open($MOUT,">$metadata_xml_filename")) {
2576 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2577 }
2578 else {
2579 binmode($MOUT,":utf8");
[27176]2580 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
[24943]2581 close($MOUT);
2582 }
2583 }
2584}
2585
2586sub rfmxml_metadata
2587{
2588 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2589
[27176]2590 # metadata.xml does not handle subsections
2591
2592 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2593 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2594 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
[24943]2595 {
[27176]2596 if (!defined $parser->{'parameters'}->{'poscount'})
2597 {
2598 $parser->{'parameters'}->{'poscount'} = 0;
2599 }
2600 else
2601 {
2602 $parser->{'parameters'}->{'poscount'}++;
2603 }
2604
2605 # if overriding but no metapos, then clear all the meta for this metaname
2606 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
2607 return [];
2608 }
2609
2610 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2611 {
2612 return [];
2613 }
2614
2615 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2616 {
2617 return [];
2618 }
[24943]2619 }
2620
2621 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2622 return [$tagname => $attrHash];
2623}
2624
[27157]2625sub _remove_live_metadata
[19499]2626{
2627 my $self = shift @_;
2628
2629 my $collect = $self->{'collect'};
2630 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27157]2631# my $gsdlhome = $self->{'gsdlhome'};
[23400]2632 my $infodbtype = $self->{'infodbtype'};
[19499]2633
[21715]2634 # Obtain the collect dir
[23766]2635 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[27157]2636 my $site = $self->{'site'};
2637 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[19499]2638
[27157]2639
[19499]2640 # look up additional args
2641 my $docid = $self->{'d'};
[21715]2642 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2643 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2644 }
2645
2646 # Generate the dbkey
[19499]2647 my $metaname = $self->{'metaname'};
2648 my $dbkey = "$docid.$metaname";
2649
[21715]2650 # To people who know $collect_tail please add some comments
2651 # Obtain the live gdbm_db path
[19499]2652 my $collect_tail = $collect;
2653 $collect_tail =~ s/^.*[\/\\]//;
[21564]2654 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]2655 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19499]2656
[21715]2657 # Remove the key
[21569]2658 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
[19499]2659 my $status = system($cmd);
2660 if ($status != 0) {
[21715]2661 # Catch error if gdbmdel failed
[19499]2662 my $mess = "Failed to set metadata key: $dbkey\n";
2663
2664 $mess .= "PATH: $ENV{'PATH'}\n";
2665 $mess .= "cmd = $cmd\n";
2666 $mess .= "Exit status: $status\n";
2667 $mess .= "System Error Message: $!\n";
2668
2669 $gsdl_cgi->generate_error($mess);
2670 }
2671 else {
2672 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2673 }
2674
2675}
2676
[27157]2677sub remove_live_metadata
[19499]2678{
2679 my $self = shift @_;
2680
2681 my $username = $self->{'username'};
2682 my $collect = $self->{'collect'};
2683 my $gsdl_cgi = $self->{'gsdl_cgi'};
2684 my $gsdlhome = $self->{'gsdlhome'};
[23400]2685
[19499]2686 if ($baseaction::authentication_enabled) {
2687 # Ensure the user is allowed to edit this collection
2688 &authenticate_user($gsdl_cgi, $username, $collect);
2689 }
2690
[27157]2691 # Make sure the collection isn't locked by someone else
2692 $self->lock_collection($username, $collect);
[19499]2693
[27157]2694 $self->_remove_live_metadata(@_);
2695
2696 $self->unlock_collection($username, $collect);
2697}
2698
2699sub remove_metadata
2700{
2701 my $self = shift @_;
2702
2703 my $where = $self->{'where'};
2704 if(!$where) {
2705 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2706 return;
2707 }
2708
2709 my $username = $self->{'username'};
2710 my $collect = $self->{'collect'};
2711 my $gsdl_cgi = $self->{'gsdl_cgi'};
2712
2713 if ($baseaction::authentication_enabled) {
2714 # Ensure the user is allowed to edit this collection
2715 &authenticate_user($gsdl_cgi, $username, $collect);
2716 }
2717
[19499]2718 # Make sure the collection isn't locked by someone else
2719 $self->lock_collection($username, $collect);
2720
[27157]2721 # check which directories need to be processed, specified in $where as
2722 # any combination of import|archives|index|live
2723 if($where =~ m/import/) {
2724 $self->_remove_import_metadata(@_);
2725 }
2726 if($where =~ m/archives/) {
2727 $self->_remove_archives_metadata(@_);
2728 }
2729 if($where =~ m/index/) {
2730 $self->_remove_index_metadata(@_);
2731 }
2732
2733 # Release the lock once it is done
2734 $self->unlock_collection($username, $collect);
2735}
2736
2737# the internal version, without authentication
2738sub _remove_index_metadata
2739{
2740 my $self = shift @_;
2741
2742 my $collect = $self->{'collect'};
2743 my $gsdl_cgi = $self->{'gsdl_cgi'};
2744# my $gsdlhome = $self->{'gsdlhome'};
2745 my $infodbtype = $self->{'infodbtype'};
2746
2747 # Obtain the collect dir
2748 my $site = $self->{'site'};
2749 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2750 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2751
2752 # look up additional args
[19499]2753 my $docid = $self->{'d'};
[21715]2754 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2755 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2756 }
[19499]2757 my $metaname = $self->{'metaname'};
2758 my $metapos = $self->{'metapos'};
[27162]2759 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
[19499]2760
[21715]2761 # To people who know $collect_tail please add some comments
2762 # Obtain the path to the database
[19499]2763 my $collect_tail = $collect;
2764 $collect_tail =~ s/^.*[\/\\]//;
[21564]2765 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]2766 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]2767
2768 # Read the docid entry
[23400]2769 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
[21715]2770
2771 # Basically loop through and unescape_html the values
[19499]2772 foreach my $k (keys %$doc_rec) {
2773 my @escaped_v = ();
2774 foreach my $v (@{$doc_rec->{$k}}) {
2775 if ($k eq "contains") {
2776 # protect quotes in ".2;".3 etc
2777 $v =~ s/\"/\\\"/g;
2778 push(@escaped_v, $v);
2779 }
2780 else {
2781 my $ev = &ghtml::unescape_html($v);
2782 $ev =~ s/\"/\\\"/g;
2783 push(@escaped_v, $ev);
2784 }
2785 }
2786 $doc_rec->{$k} = \@escaped_v;
2787 }
2788
[21715]2789 # Check to make sure the key does exist
2790 if (!defined ($doc_rec->{$metaname})) {
2791 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2792 }
2793
2794 # Obtain the specified metadata pos
[27176]2795 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2796 if(!defined $metapos && !defined $metavalue) {
[27162]2797 $metapos = 0;
2798 }
[27168]2799
[19499]2800
2801 # consider check key is defined before deleting?
[21715]2802 # Loop through the metadata array and ignore the specified position
[19499]2803 my $filtered_metadata = [];
[21715]2804 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
[19499]2805 for (my $i=0; $i<$num_metadata_vals; $i++) {
[24949]2806 my $metaval = shift(@{$doc_rec->{$metaname}});
[19499]2807
[24949]2808 if (!defined $metavalue && $i != $metapos) {
[27162]2809 push(@$filtered_metadata,$metaval);
[19499]2810 }
[24949]2811
2812 if(defined $metavalue && !($metavalue eq $metaval))
2813 {
[27162]2814 push(@$filtered_metadata,$metaval);
[24949]2815 }
[19499]2816 }
2817 $doc_rec->{$metaname} = $filtered_metadata;
2818
[27180]2819 ## The following code assumes gdbm
[21715]2820 # Turn the record back to string
[27180]2821 #my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2822 # Store it back to the database
2823 #my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2824 #my $status = system($cmd);
[19499]2825
[27180]2826 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2827 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2828
[19499]2829 if ($status != 0) {
2830 my $mess = "Failed to set metadata key: $docid\n";
2831
2832 $mess .= "PATH: $ENV{'PATH'}\n";
[27180]2833 #$mess .= "cmd = $cmd\n";
[19499]2834 $mess .= "Exit status: $status\n";
2835 $mess .= "System Error Message: $!\n";
2836
2837 $gsdl_cgi->generate_error($mess);
2838 }
2839 else {
2840 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2841 $mess .= " $metaname";
2842 $mess .= "->[$metapos]" if (defined $metapos);
2843
2844 $gsdl_cgi->generate_ok_message($mess);
2845 }
[27155]2846
[27157]2847 #return $status; # in case calling functions have a use for this
2848}
2849
2850sub remove_index_metadata
2851{
2852 my $self = shift @_;
2853
2854 my $username = $self->{'username'};
2855 my $collect = $self->{'collect'};
2856 my $gsdl_cgi = $self->{'gsdl_cgi'};
2857# my $gsdlhome = $self->{'gsdlhome'};
2858
2859 if ($baseaction::authentication_enabled) {
2860 # Ensure the user is allowed to edit this collection
2861 &authenticate_user($gsdl_cgi, $username, $collect);
2862 }
2863
2864 # Obtain the collect dir
2865 my $site = $self->{'site'};
2866 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2867 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2868
2869 # Make sure the collection isn't locked by someone else
2870 $self->lock_collection($username, $collect);
2871
2872 $self->_remove_index_metadata(@_);
2873
[27155]2874 # Release the lock once it is done
2875 $self->unlock_collection($username, $collect);
[19499]2876}
2877
2878
[23761]2879# Was trying to reused the codes, but the functions need to be broken
2880# down more before they can be reused, otherwise there will be too
2881# much overhead and duplicate process...
[21716]2882sub insert_metadata
2883{
2884 my $self = shift @_;
2885
2886 my $username = $self->{'username'};
2887 my $collect = $self->{'collect'};
2888 my $gsdl_cgi = $self->{'gsdl_cgi'};
2889 my $gsdlhome = $self->{'gsdlhome'};
[23400]2890 my $infodbtype = $self->{'infodbtype'};
2891
[23761]2892 # If the import metadata and gdbm database have been updated, we
2893 # need to insert some notification to warn user that the the text
2894 # they see at the moment is not indexed and require a rebuild.
[21716]2895 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2896
2897 if ($baseaction::authentication_enabled) {
2898 # Ensure the user is allowed to edit this collection
2899 $self->authenticate_user($username, $collect);
2900 }
2901
[23766]2902 # Obtain the collect and archive dir
2903 my $site = $self->{'site'};
2904 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2905 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[21716]2906 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2907
2908 # Make sure the collection isn't locked by someone else
2909 $self->lock_collection($username, $collect);
2910
2911 # Check additional args
2912 my $docid = $self->{'d'};
2913 if (!defined($docid)) {
2914 $gsdl_cgi->generate_error("No document id is specified: d=...");
2915 }
2916 my $metaname = $self->{'metaname'};
2917 if (!defined($metaname)) {
2918 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2919 }
2920 my $metavalue = $self->{'metavalue'};
2921 if (!defined($metavalue) || $metavalue eq "") {
2922 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2923 }
2924 # make "accumulate" the default (less destructive, as won't actually
2925 # delete any existing values)
2926 my $metamode = "accumulate";
2927
[27176]2928 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
2929 my $metapos = undef;
2930 my $prevmetavalue = undef;
2931
[21716]2932 #=======================================================================#
2933 # set_import_metadata [START]
2934 #=======================================================================#
2935 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2936 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2937 my $metadata_xml_file;
[23400]2938 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2939 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[21716]2940
2941 # This now stores the full pathname
2942 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2943
2944 # figure out correct metadata.xml file [?]
2945 # Assuming the metadata.xml file is next to the source file
2946 # Note: This will not work if it is using the inherited metadata from the parent folder
2947 my ($import_tailname, $import_dirname)
2948 = File::Basename::fileparse($import_filename);
2949 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2950
2951 # Shane's escape characters
2952 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2953 $metavalue =~ s/\,/&#44;/g;
2954 $metavalue =~ s/\:/&#58;/g;
2955 $metavalue =~ s/\|/&#124;/g;
2956 $metavalue =~ s/\(/&#40;/g;
2957 $metavalue =~ s/\)/&#41;/g;
2958 $metavalue =~ s/\[/&#91;/g;
2959 $metavalue =~ s/\\/&#92;/g;
2960 $metavalue =~ s/\]/&#93;/g;
2961 $metavalue =~ s/\{/&#123;/g;
2962 $metavalue =~ s/\}/&#125;/g;
2963 $metavalue =~ s/\"/&#34;/g;
2964 $metavalue =~ s/\`/&#96;/g;
2965 $metavalue =~ s/\n/_newline_/g;
2966
2967 # Edit the metadata.xml
2968 # Modified by Jeffrey from DL Consulting
2969 # Handle the case where there is one metadata.xml file for multiple FileSets
2970 # The XML filter needs to know whether it is in the right FileSet
2971 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2972 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
[27176]2973 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2974 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
[21716]2975 #=======================================================================#
2976 # set_import_metadata [END]
2977 #=======================================================================#
2978
2979
2980 #=======================================================================#
2981 # set_metadata (accumulate version) [START]
2982 #=======================================================================#
2983 # To people who know $collect_tail please add some comments
2984 # Obtain path to the database
2985 my $collect_tail = $collect;
2986 $collect_tail =~ s/^.*[\/\\]//;
2987 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]2988 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21716]2989
2990 # Read the docid entry
[23400]2991 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2992
[21716]2993 foreach my $k (keys %$doc_rec) {
2994 my @escaped_v = ();
2995 foreach my $v (@{$doc_rec->{$k}}) {
2996 if ($k eq "contains") {
2997 # protect quotes in ".2;".3 etc
2998 $v =~ s/\"/\\\"/g;
2999 push(@escaped_v, $v);
3000 }
3001 else {
3002 my $ev = &ghtml::unescape_html($v);
3003 $ev =~ s/\"/\\\"/g;
3004 push(@escaped_v, $ev);
3005 }
3006 }
3007 $doc_rec->{$k} = \@escaped_v;
3008 }
3009
3010 # Protect the quotes
3011 $metavalue =~ s/\"/\\\"/g;
3012
3013 # Adds the pending macro
3014 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3015
3016 # If the metadata doesn't exist, create a new one
3017 if (!defined($doc_rec->{$metaname})){
3018 $doc_rec->{$metaname} = [ $macro_metavalue ];
3019 }
3020 # Else, let's acculumate the values
3021 else {
3022 push(@{$doc_rec->{$metaname}},$macro_metavalue);
3023 }
3024
[27180]3025 ## The following code assumes gdbm
[21716]3026 # Generate the record string
[27180]3027 #my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
3028 # Store it into GDBM
3029 #my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
3030 #my $status = system($cmd);
[21716]3031
[27180]3032 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3033 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3034
[21716]3035 if ($status != 0) {
3036 # Catch error if gdbmget failed
3037 my $mess = "Failed to set metadata key: $docid\n";
3038
3039 $mess .= "PATH: $ENV{'PATH'}\n";
[27180]3040 #$mess .= "cmd = $cmd\n";
[21716]3041 $mess .= "Exit status: $status\n";
3042 $mess .= "System Error Message: $!\n";
3043
3044 $gsdl_cgi->generate_error($mess);
3045 }
3046 else {
3047 my $mess = "insert-metadata successful: Key[$docid]\n";
3048 $mess .= " [In metadata.xml] $metaname";
3049 $mess .= " = $metavalue\n";
3050 $mess .= " [In database] $metaname";
3051 $mess .= " = $macro_metavalue\n";
3052 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3053 $gsdl_cgi->generate_ok_message($mess);
3054 }
3055 #=======================================================================#
3056 # set_metadata (accumulate version) [END]
3057 #=======================================================================#
3058
3059 # Release the lock once it is done
3060 $self->unlock_collection($username, $collect);
3061}
3062
[19293]30631;
Note: See TracBrowser for help on using the repository browser.