source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27167

Last change on this file since 27167 was 27167, checked in by ak19, 11 years ago

Another bugfix: when doing a set_archives_meta with metamode=override, it was overwriting only the first metavalue of a metaname but preserving the rest. That behaviour is only supposed to happen if metamode=override combined with metapos=0. But when metamode=override and no metapos is defined, need to remove all metavalues for the given metaname, and then set the archives metadata for that metaname to the newly provided metavalue.

File size: 79.2 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-index-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
67
68 #SET METHODS
69 "set-live-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ ] },
72
73 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
74 'compulsory-args' => [ "metaname", "metavalue" ],
75 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
76
77 "set-index-metadata" => {
78 'compulsory-args' => [ "d", "metaname", "metavalue" ],
79 'optional-args' => [ "metapos" ] },
80
81 "set-archives-metadata" => {
82 'compulsory-args' => [ "d", "metaname", "metavalue" ],
83 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
84
85 "set-import-metadata" => {
86 'compulsory-args' => [ "metaname", "metavalue" ],
87 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
88
89 #SET METHODS (ARRAY)
90 "set-metadata-array" => {
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ ] },
93
94 "set-archives-metadata-array" => {
95 'compulsory-args' => [ "json" ],
96 'optional-args' => [ ] },
97
98 "set-import-metadata-array" => {
99 'compulsory-args' => [ "json" ],
100 'optional-args' => [ ] },
101
102 #REMOVE METHODS
103 "remove-import-metadata" => {
104 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
105 'optional-args' => [ ] },
106
107 "remove-archives-metadata" => {
108 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
109 'optional-args' => [ "metapos", "metavalue" ] },
110
111 "remove-live-metadata" => {
112 'compulsory-args' => [ "d", "metaname" ],
113 'optional-args' => [ ] },
114
115 "remove-index-metadata" => {
116 'compulsory-args' => [ "d", "metaname" ],
117 'optional-args' => [ "metapos", "metavalue" ] },
118
119 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
120 'compulsory-args' => [ "d", "metaname" ],
121 'optional-args' => [ "where", "metapos", "metavalue" ] },
122
123 #INSERT METHODS
124 "insert-metadata" => {
125 'compulsory-args' => [ "d", "metaname", "metavalue" ],
126 'optional-args' => [ ] }
127};
128
129
130sub new
131{
132 my $class = shift (@_);
133 my ($gsdl_cgi,$iis6_mode) = @_;
134
135 # Treat metavalue specially. To transmit this through a GET request
136 # the Javascript side has url-encoded it, so here we need to decode
137 # it before proceeding
138
139 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
140 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
141 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
142
143 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
144 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
145
146 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
147
148 return bless $self, $class;
149}
150
151
152sub get_live_metadata
153{
154 my $self = shift @_;
155
156 my $username = $self->{'username'};
157 my $collect = $self->{'collect'};
158 my $gsdl_cgi = $self->{'gsdl_cgi'};
159 my $gsdlhome = $self->{'gsdlhome'};
160 my $infodbtype = $self->{'infodbtype'};
161
162 # live metadata gets/saves value scoped (prefixed) by the current usename
163 # so (for now) let's not bother to enforce authentication
164
165 # Obtain the collect dir
166 my $site = $self->{'site'};
167 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
168 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
169
170 # Make sure the collection isn't locked by someone else
171 $self->lock_collection($username, $collect);
172
173 # look up additional args
174 my $docid = $self->{'d'};
175 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
176 $gsdl_cgi->generate_error("No docid (d=...) specified.");
177 }
178
179 # Generate the dbkey
180 my $metaname = $self->{'metaname'};
181 my $dbkey = "$docid.$metaname";
182
183 # To people who know $collect_tail please add some comments
184 # Obtain path to the database
185 my $collect_tail = $collect;
186 $collect_tail =~ s/^.*[\/|\\]//;
187 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
188 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
189
190 # Obtain the content of the key
191 my $cmd = "gdbmget $infodb_file_path $dbkey";
192 if (open(GIN,"$cmd |") == 0) {
193 # Catch error if gdbmget failed
194 my $mess = "Failed to get metadata key: $metaname\n";
195 $mess .= "$!\n";
196
197 $gsdl_cgi->generate_error($mess);
198 }
199 else {
200 binmode(GIN,":utf8");
201 # Read everything in and concatenate them into $metavalue
202 my $metavalue = "";
203 my $line;
204 while (defined ($line=<GIN>)) {
205 $metavalue .= $line;
206 }
207 close(GIN);
208 chomp($metavalue); # Get rid off the tailing newlines
209 $gsdl_cgi->generate_ok_message("$metavalue");
210 }
211
212 # Release the lock once it is done
213 $self->unlock_collection($username, $collect);
214}
215
216# just calls the index version
217sub get_metadata
218{
219 my $self = shift @_;
220 $self->get_index_metadata(@_);
221}
222
223sub get_index_metadata
224{
225 my $self = shift @_;
226
227 my $username = $self->{'username'};
228 my $collect = $self->{'collect'};
229 my $gsdl_cgi = $self->{'gsdl_cgi'};
230 my $gsdlhome = $self->{'gsdlhome'};
231
232 # Authenticate user if it is enabled
233 if ($baseaction::authentication_enabled) {
234 # Ensure the user is allowed to edit this collection
235 &authenticate_user($gsdl_cgi, $username, $collect);
236 }
237
238 # Obtain the collect dir
239 my $site = $self->{'site'};
240 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
241 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
242
243 # Make sure the collection isn't locked by someone else
244 $self->lock_collection($username, $collect);
245
246 # look up additional args
247 my $docid = $self->{'d'};
248 my $metaname = $self->{'metaname'};
249 my $metapos = $self->{'metapos'};
250 my $infodbtype = $self->{'infodbtype'};
251
252 # To people who know $collect_tail please add some comments
253 # Obtain path to the database
254 my $collect_tail = $collect;
255 $collect_tail =~ s/^.*[\/\\]//;
256 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
257 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
258
259 # Read the docid entry
260 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
261
262 # Basically loop through and unescape_html the values
263 foreach my $k (keys %$doc_rec) {
264 my @escaped_v = ();
265 foreach my $v (@{$doc_rec->{$k}}) {
266 my $ev = &ghtml::unescape_html($v);
267 push(@escaped_v, $ev);
268 }
269 $doc_rec->{$k} = \@escaped_v;
270 }
271
272 # Obtain the specified metadata value
273 $metapos = 0 if (!defined $metapos);
274 my $metavalue = $doc_rec->{$metaname}->[$metapos];
275 $gsdl_cgi->generate_ok_message("$metavalue");
276
277 # Release the lock once it is done
278 $self->unlock_collection($username, $collect);
279}
280
281
282sub get_import_metadata
283{
284 my $self = shift @_;
285
286 my $username = $self->{'username'};
287 my $collect = $self->{'collect'};
288 my $gsdl_cgi = $self->{'gsdl_cgi'};
289 my $gsdlhome = $self->{'gsdlhome'};
290
291 # Authenticate user if it is enabled
292 if ($baseaction::authentication_enabled) {
293 # Ensure the user is allowed to edit this collection
294 &authenticate_user($gsdl_cgi, $username, $collect);
295 }
296
297 # Obtain the collect dir
298 my $site = $self->{'site'};
299 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
300 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
301
302 # Make sure the collection isn't locked by someone else
303 $self->lock_collection($username, $collect);
304
305 # look up additional args
306 my $docid = $self->{'d'};
307 my $metaname = $self->{'metaname'};
308 my $infodbtype = $self->{'infodbtype'};
309 if (!defined $docid)
310 {
311 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
312 }
313
314 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
315 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
316 my $metadata_xml_file;
317 my $import_filename = undef;
318
319
320 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
321 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
322 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
323
324 # This now stores the full pathname
325 $import_filename = $doc_rec->{'src-file'}->[0];
326
327 # figure out correct metadata.xml file [?]
328 # Assuming the metadata.xml file is next to the source file
329 # Note: This will not work if it is using the inherited metadata from the parent folder
330 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
331 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
332
333 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
334
335 # Release the lock once it is done
336 $self->unlock_collection($username, $collect);
337}
338
339sub get_metadata_from_metadata_xml
340{
341 my $self = shift @_;
342 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
343
344 my @rules =
345 (
346 _default => 'raw',
347 'Metadata' => \&gfmxml_metadata,
348 'FileName' => \&mxml_filename
349 );
350
351 my $parser = XML::Rules->new
352 (
353 rules => \@rules,
354 output_encoding => 'utf8'
355 );
356
357 my $xml_in = "";
358 if (!open(MIN,"<$metadata_xml_filename"))
359 {
360 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
361 }
362 else
363 {
364 # Read them in
365 my $line;
366 while (defined ($line=<MIN>)) {
367 $xml_in .= $line;
368 }
369 close(MIN);
370
371 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
372
373 if(defined $parser->{'pad'}->{'metavalue'})
374 {
375 return $parser->{'pad'}->{'metavalue'};
376 }
377 else
378 {
379 return "";
380 }
381 }
382}
383
384sub gfmxml_metadata
385{
386 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
387
388 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
389 {
390 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
391 }
392}
393
394sub get_archives_metadata
395{
396 my $self = shift @_;
397
398 my $username = $self->{'username'};
399 my $collect = $self->{'collect'};
400 my $gsdl_cgi = $self->{'gsdl_cgi'};
401 my $gsdlhome = $self->{'gsdlhome'};
402 my $infodbtype = $self->{'infodbtype'};
403
404 # Authenticate user if it is enabled
405 if ($baseaction::authentication_enabled) {
406 # Ensure the user is allowed to edit this collection
407 &authenticate_user($gsdl_cgi, $username, $collect);
408 }
409
410 # Obtain the collect dir
411 my $site = $self->{'site'};
412 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
413
414 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
415
416 # Make sure the collection isn't locked by someone else
417 $self->lock_collection($username, $collect);
418
419 # look up additional args
420 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
421 $docid_secnum = "" if (!defined $docid_secnum);
422
423 my $metaname = $self->{'metaname'};
424 my $metapos = $self->{'metapos'};
425 $metapos = 0 if (!defined $metapos);
426
427 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
428 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
429
430 # This now stores the full pathname
431 my $doc_filename = $doc_rec->{'doc-file'}->[0];
432
433 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
434
435 # Release the lock once it is done
436 $self->unlock_collection($username, $collect);
437}
438
439sub get_metadata_from_archive_xml
440{
441 my $self = shift @_;
442 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
443
444 my @start_rules = ('Section' => \&dxml_start_section);
445
446 my @rules =
447 (
448 _default => 'raw',
449 'Metadata' => \&gfdxml_metadata
450 );
451
452 my $parser = XML::Rules->new
453 (
454 start_rules => \@start_rules,
455 rules => \@rules,
456 output_encoding => 'utf8'
457 );
458
459 my $xml_in = "";
460 if (!open(MIN,"<$doc_xml_filename"))
461 {
462 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
463 }
464 else
465 {
466 # Read them in
467 my $line;
468 while (defined ($line=<MIN>)) {
469 $xml_in .= $line;
470 }
471 close(MIN);
472
473 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
474
475 if(defined $parser->{'pad'}->{'metavalue'})
476 {
477 return $parser->{'pad'}->{'metavalue'};
478 }
479 else
480 {
481 return "";
482 }
483 }
484}
485
486sub gfdxml_metadata
487{
488 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
489
490 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
491 {
492 return;
493 }
494
495 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
496 {
497 if (!defined $parser->{'parameters'}->{'poscount'})
498 {
499 $parser->{'parameters'}->{'poscount'} = 0;
500 }
501 else
502 {
503 $parser->{'parameters'}->{'poscount'}++;
504 }
505 }
506
507 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
508 {
509 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
510 }
511}
512
513sub _set_live_metadata
514{
515 my $self = shift @_;
516
517 my $collect = $self->{'collect'};
518 my $gsdl_cgi = $self->{'gsdl_cgi'};
519 #my $gsdlhome = $self->{'gsdlhome'};
520 my $infodbtype = $self->{'infodbtype'};
521
522 # Obtain the collect dir
523 my $site = $self->{'site'};
524 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
525 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
526
527
528 # look up additional args
529 my $docid = $self->{'d'};
530 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
531 $gsdl_cgi->generate_error("No docid (d=...) specified.");
532 }
533 my $metavalue = $self->{'metavalue'};
534
535
536 # Generate the dbkey
537 my $metaname = $self->{'metaname'};
538 my $dbkey = "$docid.$metaname";
539
540 # To people who know $collect_tail please add some comments
541 # Obtain path to the database
542 my $collect_tail = $collect;
543 $collect_tail =~ s/^.*[\/\\]//;
544 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
545 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
546
547 # Set the new value
548 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
549 my $status = system($cmd);
550 if ($status != 0) {
551 # Catch error if gdbmget failed
552 my $mess = "Failed to set metadata key: $dbkey\n";
553
554 $mess .= "PATH: $ENV{'PATH'}\n";
555 $mess .= "cmd = $cmd\n";
556 $mess .= "Exit status: $status\n";
557 $mess .= "System Error Message: $!\n";
558
559 $gsdl_cgi->generate_error($mess);
560 }
561 else {
562 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
563 }
564
565 #return $status; # in case calling functions have any further use for this
566}
567
568sub set_live_metadata
569{
570 my $self = shift @_;
571
572 my $username = $self->{'username'};
573 my $collect = $self->{'collect'};
574 my $gsdl_cgi = $self->{'gsdl_cgi'};
575
576 if ($baseaction::authentication_enabled) {
577 # Ensure the user is allowed to edit this collection
578 &authenticate_user($gsdl_cgi, $username, $collect);
579 }
580
581 # Make sure the collection isn't locked by someone else
582 $self->lock_collection($username, $collect);
583
584 $self->_set_live_metadata(@_);
585
586 # Release the lock once it is done
587 $self->unlock_collection($username, $collect);
588}
589
590sub set_metadata_entry
591{
592 my $self = shift @_;
593 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
594
595 # To people who know $collect_tail please add some comments
596 # Obtain path to the database
597 my $collect_tail = $collect;
598 $collect_tail =~ s/^.*[\/\\]//;
599 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
600 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
601
602# print STDERR "**** infodb file path = $infodb_file_path\n";
603# print STDERR "***** infodb type = $infodbtype\n";
604
605 # Read the docid entry
606 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
607
608 # Set the metadata value
609 if (defined $metapos) {
610 $doc_rec->{$metaname}->[$metapos] = $metavalue;
611 }
612 else {
613 $doc_rec->{$metaname} = [ $metavalue ];
614 }
615
616 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
617
618 return $status;
619
620}
621
622sub _set_import_metadata
623{
624 my $self = shift @_;
625
626 my $collect = $self->{'collect'};
627 my $gsdl_cgi = $self->{'gsdl_cgi'};
628 my $infodbtype = $self->{'infodbtype'};
629# my $gsdlhome = $self->{'gsdlhome'};
630
631 # Obtain the collect and archive dir
632 my $site = $self->{'site'};
633 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
634 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
635 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
636
637 # look up additional args
638 # want either d= or f=
639 my $docid = $self->{'d'};
640 my $import_file = $self->{'f'};
641 if ((!defined $docid) && (!defined $import_file)) {
642 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
643 }
644
645 # Get the parameters and set default mode to "accumulate"
646 my $metaname = $self->{'metaname'};
647 my $metavalue = $self->{'metavalue'};
648## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
649 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
650
651 my $metamode = $self->{'metamode'};
652 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
653 # make "accumulate" the default (less destructive, as won't actually
654 # delete any existing values)
655 $metamode = "accumulate";
656 }
657
658 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
659 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
660 my $metadata_xml_file;
661 my $import_filename = undef;
662 if (defined $docid) {
663 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
664 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
665
666 # This now stores the full pathname
667 $import_filename = $doc_rec->{'src-file'}->[0];
668 }
669 else {
670 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
671 }
672
673 # figure out correct metadata.xml file [?]
674 # Assuming the metadata.xml file is next to the source file
675 # Note: This will not work if it is using the inherited metadata from the parent folder
676 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
677 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
678
679 # Edit the metadata.xml
680 # Modified by Jeffrey from DL Consulting
681 # Handle the case where there is one metadata.xml file for multiple FileSets
682 # The XML filter needs to know whether it is in the right FileSet
683 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
684 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
685 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
686 $metaname, $metavalue, $metamode, $import_tailname);
687
688
689 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
690 $mess .= " $metaname";
691 $mess .= " = $metavalue";
692 $mess .= " ($metamode)\n";
693
694 $gsdl_cgi->generate_ok_message($mess);
695
696 #return $status; # in case calling functions have any further use for this
697}
698
699# the version of set_index_meta that doesn't do authentication
700sub _set_archives_metadata
701{
702 my $self = shift @_;
703
704 my $collect = $self->{'collect'};
705 my $gsdl_cgi = $self->{'gsdl_cgi'};
706 my $infodbtype = $self->{'infodbtype'};
707
708 # Obtain the collect and archive dir
709 my $site = $self->{'site'};
710 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
711 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
712
713 # look up additional args
714 my $docid = $self->{'d'};
715 my $metaname = $self->{'metaname'};
716 my $metavalue = $self->{'metavalue'};
717 my $prevmetavalue = $self->{'prevmetavalue'};
718
719 my $metapos = $self->{'metapos'}; # Don't append "|| undef", since if metapos=0 it will then be set to undef
720
721 # Don't set metapos=0 if it's undefined, as doing so will interfere when overriding all values for a metaname
722 # (all values for a metaname should get overridden if metamode=override combined with no user-defined metapos)
723
724 my $metamode = $self->{'metamode'};
725 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
726 # make "accumulate" the default (less destructive, as won't actually
727 # delete any existing values)
728 $metamode = "accumulate";
729 }
730
731 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
732 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
733
734 if ($status == 0) {
735 my $mess = "set-archives-metadata successful: Key[$docid]\n";
736 $mess .= " $metaname";
737 $mess .= "->[$metapos]" if (defined $metapos);
738 $mess .= " = $metavalue";
739 $mess .= " ($metamode)\n";
740
741 $gsdl_cgi->generate_ok_message($mess);
742 }
743 else {
744 my $mess .= "Failed to set archives metadata key: $docid\n";
745 $mess .= "Exit status: $status\n";
746 if(defined $self->{'error_msg'}) {
747 $mess .= "Error Message: $self->{'error_msg'}\n";
748 } else {
749 $mess .= "System Error Message: $!\n";
750 }
751 $mess .= "-" x 20 . "\n";
752
753 $gsdl_cgi->generate_error($mess);
754 }
755
756 #return $status; # in case calling functions have any further use for this
757}
758
759
760# the version of set_index_meta that doesn't do authentication
761sub _set_index_metadata
762{
763 my $self = shift @_;
764
765 my $collect = $self->{'collect'};
766 my $gsdl_cgi = $self->{'gsdl_cgi'};
767
768 my $site = $self->{'site'};
769 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
770
771 # look up additional args
772 my $docid = $self->{'d'};
773 my $metaname = $self->{'metaname'};
774 my $metapos = $self->{'metapos'};
775 my $metavalue = $self->{'metavalue'};
776 my $infodbtype = $self->{'infodbtype'};
777
778 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
779
780 if ($status != 0) {
781 # Catch error if set infodb entry failed
782 my $mess = "Failed to set metadata key: $docid\n";
783
784 $mess .= "PATH: $ENV{'PATH'}\n";
785 $mess .= "Exit status: $status\n";
786 $mess .= "System Error Message: $!\n";
787
788 $gsdl_cgi->generate_error($mess);
789 }
790 else {
791 my $mess = "set-index-metadata successful: Key[$docid]\n";
792 $mess .= " $metaname";
793 $mess .= "->[$metapos]" if (defined $metapos);
794 $mess .= " = $metavalue\n";
795
796 $gsdl_cgi->generate_ok_message($mess);
797 }
798
799 #return $status; # in case calling functions have any further use for this
800}
801
802sub set_index_metadata
803{
804 my $self = shift @_;
805
806 my $username = $self->{'username'};
807 my $collect = $self->{'collect'};
808 my $gsdl_cgi = $self->{'gsdl_cgi'};
809 #my $gsdlhome = $self->{'gsdlhome'};
810
811 if ($baseaction::authentication_enabled) {
812 # Ensure the user is allowed to edit this collection
813 &authenticate_user($gsdl_cgi, $username, $collect);
814 }
815
816 my $site = $self->{'site'};
817 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
818
819 $gsdl_cgi->checked_chdir($collect_dir);
820
821 # Obtain the collect dir
822 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
823
824 # Make sure the collection isn't locked by someone else
825 $self->lock_collection($username, $collect);
826
827 $self->_set_index_metadata(@_);
828
829 # Release the lock once it is done
830 $self->unlock_collection($username, $collect);
831}
832
833# call this to set the metadata for a combination of dirs archives, import or index, or live
834# if none specified, defaults to index which was the original behaviour of set_metadata.
835sub set_metadata
836{
837 my $self = shift @_;
838
839 # Testing that not defining a variable, setting it to "" or to " " all return false
840 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
841
842 my $where = $self->{'where'};
843 if(!$where) {
844 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
845 return;
846 }
847
848 # authenticate and lock collection once, even if processing multiple dirs
849 my $username = $self->{'username'};
850 my $collect = $self->{'collect'};
851 my $gsdl_cgi = $self->{'gsdl_cgi'};
852
853 if ($baseaction::authentication_enabled) {
854 # Ensure the user is allowed to edit this collection
855 #&authenticate_user($gsdl_cgi, $username, $collect);
856 $self->authenticate_user($username, $collect);
857 }
858
859 if($where =~ m/index/) {
860 my $site = $self->{'site'};
861 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
862 $gsdl_cgi->checked_chdir($collect_dir);
863 }
864
865 # Make sure the collection isn't locked by someone else
866 $self->lock_collection($username, $collect);
867
868
869 # now at last can set the metadata. $where can specify multiple
870 # $where is of the form: import|archives|index, or a subset thereof
871
872 #my @whichdirs = split('\|', $where);
873
874 # just check whether $where contains import/archives/index/live in turn, and
875 # for each case, process it accordingly
876 if($where =~ m/import/) {
877 $self->_set_import_metadata(@_);
878 }
879
880 if($where =~ m/archives/) {
881
882 # look up docID arg which is optional to set_metadata because it's optional
883 # to set_import, but which is compulsory to set_archives_metadata
884 my $docid = $self->{'d'};
885 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
886 $gsdl_cgi->generate_error("No docid (d=...) specified.");
887 }
888 else { # we have a docid, so can set archives meta
889 $self->_set_archives_metadata(@_);
890 }
891 }
892
893 if($where =~ m/index/) {
894
895 # look up docID arg which is optional to set_metadata because it's optional
896 # to set_import, but which is compulsory to set_archives_metadata
897 my $docid = $self->{'d'};
898 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
899 $gsdl_cgi->generate_error("No docid (d=...) specified.");
900 }
901 else { # we have a docid, so can set index meta
902 $self->_set_index_metadata(@_);
903 }
904 }
905
906 if($where =~ m/live/) {
907 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
908 }
909
910 # Release the lock once it is done
911 $self->unlock_collection($username, $collect);
912}
913
914sub set_metadata_array
915{
916 my $self = shift @_;
917
918 my $username = $self->{'username'};
919 my $collect = $self->{'collect'};
920 my $gsdl_cgi = $self->{'gsdl_cgi'};
921 my $gsdlhome = $self->{'gsdlhome'};
922
923 if ($baseaction::authentication_enabled) {
924 # Ensure the user is allowed to edit this collection
925 &authenticate_user($gsdl_cgi, $username, $collect);
926 }
927
928 my $site = $self->{'site'};
929 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
930
931 $gsdl_cgi->checked_chdir($collect_dir);
932
933 # Obtain the collect dir
934 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
935
936 # Make sure the collection isn't locked by someone else
937 $self->lock_collection($username, $collect);
938
939 # look up additional args
940
941 my $infodbtype = $self->{'infodbtype'};
942
943 my $json_str = $self->{'json'};
944 my $doc_array = decode_json $json_str;
945
946
947 my $global_status = 0;
948 my $global_mess = "";
949
950 my @all_docids = ();
951
952 foreach my $doc_array_rec ( @$doc_array ) {
953
954 my $docid = $doc_array_rec->{'docid'};
955 my $metaname = $doc_array_rec->{'metaname'};
956 my $metapos = $doc_array_rec->{'metapos'};
957 my $metavalue = $doc_array_rec->{'metavalue'};
958
959 push(@all_docids,$docid);
960
961 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
962
963 if ($status != 0) {
964 # Catch error if set infodb entry failed
965 $global_status = $status;
966 $global_mess .= "Failed to set metadata key: $docid\n";
967 $global_mess .= "Exit status: $status\n";
968 $global_mess .= "System Error Message: $!\n";
969 $global_mess .= "-" x 20;
970 }
971 }
972
973 if ($global_status != 0) {
974 $global_mess .= "PATH: $ENV{'PATH'}\n";
975 $gsdl_cgi->generate_error($global_mess);
976 }
977 else {
978 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
979 $gsdl_cgi->generate_ok_message($mess);
980 }
981
982 # Release the lock once it is done
983 $self->unlock_collection($username, $collect);
984}
985
986
987sub dxml_metadata
988{
989 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
990 my $metaname = $parser->{'parameters'}->{'metaname'};
991 my $metamode = $parser->{'parameters'}->{'metamode'};
992
993 print STDERR "**** Processing closing </Metadata> tag\n";
994
995 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
996
997 # Find the right metadata tag and checks if we are going to
998 # override it
999 #
1000 # Note: This over writes the first metadata block it
1001 # encountered. If there are multiple Sections in the doc.xml, it
1002 # might not behave as you would expect
1003
1004 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1005## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1006## print STDERR "**** metamode = $metamode\n";
1007
1008 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1009 {
1010 my $name_attr = $attrHash->{'name'};
1011 if (($name_attr eq $metaname) && ($metamode eq "override"))
1012 {
1013 if (!defined $parser->{'parameters'}->{'poscount'})
1014 {
1015 $parser->{'parameters'}->{'poscount'} = 0;
1016 }
1017 else
1018 {
1019 $parser->{'parameters'}->{'poscount'}++;
1020 }
1021
1022 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1023 {
1024 ##print STDERR "#### got match!!\n";
1025 # Get the value and override the current value
1026 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1027 $attrHash->{'_content'} = $metavalue;
1028
1029 # Don't want it to wipe out any other pieces of metadata
1030 $parser->{'parameters'}->{'metamode'} = "done";
1031 }
1032 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1033 {
1034 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1035 $attrHash->{'_content'} = $metavalue;
1036 $parser->{'parameters'}->{'metamode'} = "done";
1037 }
1038 }
1039 }
1040
1041 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1042 return [$tagname => $attrHash];
1043}
1044
1045# This method exists purely for catching invalid section numbers that the client
1046# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1047# we've seen all the Sections in the doc.xml, and none of their section nums matched
1048# if the metamode has not been set to 'done' by then.
1049sub dxml_archive
1050{
1051 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1052 my $metamode = $parser->{'parameters'}->{'metamode'};
1053
1054 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1055 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1056
1057# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1058
1059 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1060 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1061
1062 my $metaname = $parser->{'parameters'}->{'metaname'};
1063 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1064
1065 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1066 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1067 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1068 print STDERR "\tin section $opt_doc_secnum.\n";
1069 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1070 }
1071
1072 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1073 return [$tagname => $attrHash];
1074}
1075
1076sub dxml_description
1077{
1078 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1079 my $metamode = $parser->{'parameters'}->{'metamode'};
1080
1081 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1082 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1083
1084 print STDERR "**** Processing a closing </Description> tag \n";
1085# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1086
1087 # Accumulate the metadata
1088
1089 # We'll be accumulating metadata at this point if we haven't found and therefore
1090 # haven't processed the metadata yet.
1091 # For subsections, this means that if we're at a matching subsection, but haven't
1092 # found the correct metaname to override in that subsection, we accumulate it as new
1093 # meta in the subsection by adding it to the current description.
1094 # If there's no subsection info for the metadata, it will accumulate at the top level
1095 # section description if we hadn't found a matching metaname to override at this point.
1096
1097 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1098 # at the toplevel section. In the latter case, it means we want to process meta in the
1099 # toplevel section. So the eq check between the values below will work in all cases.
1100
1101 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1102 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1103 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1104
1105 if ($curr_secnum eq $opt_doc_secnum
1106 && ($metamode eq "accumulate" || $metamode eq "override")) {
1107 if ($metamode eq "override") {
1108 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1109 }
1110
1111 # If get to here and metamode is override, the this means there
1112 # was no existing value to overide => treat as an append operation
1113
1114 # Tack a new metadata tag on to the end of the <Metadata>+ block
1115 my $metaname = $parser->{'parameters'}->{'metaname'};
1116 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1117
1118 my $metadata_attr = {
1119 '_content' => $metavalue,
1120 'name' => $metaname,
1121 'mode' => "accumulate"
1122 };
1123
1124 my $append_metadata = [ "Metadata" => $metadata_attr ];
1125 my $description_content = $attrHash->{'_content'};
1126
1127 print "Appending metadata to doc.xml\n";
1128
1129 if (ref($description_content)) {
1130 # got some existing interesting nested content
1131 push(@$description_content, " ", $append_metadata ,"\n ");
1132 }
1133 else {
1134 #description_content is most likely a string such as "\n"
1135 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1136 }
1137
1138 $parser->{'parameters'}->{'metamode'} = "done";
1139 }
1140 else {
1141 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1142## print STDERR "**** NOT ACCUMULATE?!? \n";
1143 }
1144
1145 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1146 return [$tagname => $attrHash];
1147}
1148
1149
1150sub dxml_start_section
1151{
1152 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1153
1154 my $new_depth = scalar(@$contextArray);
1155
1156 print STDERR "**** START SECTION \n";
1157
1158 if ($new_depth == 1) {
1159 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1160 $parser->{'parameters'}->{'curr_section_num'} = "";
1161 }
1162
1163 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1164 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1165
1166 my $new_secnum;
1167
1168 if ($new_depth > $old_depth) {
1169 # child subsection
1170 $new_secnum = "$old_secnum.1";
1171 }
1172 elsif ($new_depth == $old_depth) {
1173 # sibling section => increase it's value by 1
1174 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1175 $tail_num++;
1176 $new_secnum = $old_secnum;
1177 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1178 }
1179 else {
1180 # back up to parent section => lopp off tail
1181 $new_secnum = $old_secnum;
1182 $new_secnum =~ s/\.\d+$//;
1183 }
1184
1185 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1186 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1187
1188 1;
1189}
1190
1191sub edit_xml_file
1192{
1193 my $self = shift @_;
1194 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1195
1196 # use XML::Rules to add it in (read in and out again)
1197 my $parser = XML::Rules->new(start_rules => $start_rules,
1198 rules => $rules,
1199 style => 'filter',
1200 output_encoding => 'utf8' );
1201
1202 my $xml_in = "";
1203 if (!open(MIN,"<$filename")) {
1204 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1205 }
1206 else {
1207 # Read all the text in
1208 my $line;
1209 while (defined ($line=<MIN>)) {
1210 $xml_in .= $line;
1211 }
1212 close(MIN);
1213
1214 my $MOUT;
1215 if (!open($MOUT,">$filename")) {
1216 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1217 }
1218 else {
1219 # Matched lines will get handled by the call backs
1220## my $xml_out = "";
1221
1222 binmode($MOUT,":utf8");
1223 $parser->filter($xml_in,$MOUT, $options);
1224
1225# binmode(MOUT,":utf8");
1226# print MOUT $xml_out;
1227 close($MOUT);
1228 }
1229 }
1230
1231 # copy across any custom error information that was stored during parsing
1232 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1233}
1234
1235sub edit_doc_xml
1236{
1237 my $self = shift @_;
1238 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1239
1240 my $info_mess = <<RAWEND;
1241****************************
1242 edit_doc_xml()
1243****************************
1244doc_xml_filename = $doc_xml_filename
1245metaname = $metaname
1246metavalue = $metavalue
1247metapos = $metapos
1248metamode = $metamode
1249opt_secnum = $opt_secnum
1250prevmetavalue = $prevmetavalue
1251****************************
1252RAWEND
1253
1254 $gsdl_cgi->generate_message($info_mess);
1255
1256 # To monitor which section/subsection number we are in
1257 my @start_rules =
1258 ( 'Section' => \&dxml_start_section );
1259
1260 # use XML::Rules to add it in (read in and out again)
1261 # Set the call back functions
1262 my @rules =
1263 ( _default => 'raw',
1264 'Metadata' => \&dxml_metadata,
1265 'Description' => \&dxml_description,
1266 'Archive' => \&dxml_archive); # just for catching errors at end
1267
1268 # Sets the parameters
1269 my $options = { 'metaname' => $metaname,
1270 'metapos' => $metapos,
1271 'metavalue' => $metavalue,
1272 'metamode' => $metamode,
1273 'prevmetavalue' => $prevmetavalue };
1274
1275 if (defined $opt_secnum) {
1276 $options->{'secnum'} = $opt_secnum;
1277 }
1278
1279 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1280}
1281
1282sub set_archives_metadata_entry
1283{
1284 my $self = shift @_;
1285 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1286
1287 my $info_mess = <<RAWEND;
1288****************************
1289 set_archives_metadata_entry()
1290****************************
1291archive_dir = $archive_dir
1292collect_dir = $collect_dir
1293collect = $collect
1294infodbtype = $infodbtype
1295docid = $docid
1296metaname = $metaname
1297metapos = $metapos
1298metavalue = $metavalue
1299metamode = $metamode
1300prevmetavalue = $prevmetavalue
1301****************************
1302RAWEND
1303
1304 $gsdl_cgi->generate_message($info_mess);
1305
1306 # Obtain the doc.xml path for the specified docID
1307 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1308
1309 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1310 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1311 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1312
1313 # The $doc_xml_file is relative to the archives, and now let's get the full path
1314 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1315 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1316
1317 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
1318 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1319 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1320 if(!defined $metapos && $metamode eq "override") {
1321 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, $metapos, undef, $docid_secnum, $metamode);
1322 }
1323
1324 # Edit the doc.xml file with the specified metadata name, value and position.
1325 # TODO: there is a potential problem here as this edit_doc_xml function
1326 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1327 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1328
1329 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1330 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1331
1332 # return 0; # return 0 for now to indicate no error
1333 return (defined $self->{'error_msg'}) ? 1 : 0;
1334}
1335
1336
1337sub set_archives_metadata
1338{
1339 my $self = shift @_;
1340
1341 my $username = $self->{'username'};
1342 my $collect = $self->{'collect'};
1343 my $gsdl_cgi = $self->{'gsdl_cgi'};
1344
1345 if ($baseaction::authentication_enabled) {
1346 # Ensure the user is allowed to edit this collection
1347 $self->authenticate_user($username, $collect);
1348 }
1349
1350 # Make sure the collection isn't locked by someone else
1351 $self->lock_collection($username, $collect);
1352
1353 $self->_set_archives_metadata(@_);
1354
1355 # Release the lock once it is done
1356 $self->unlock_collection($username, $collect);
1357}
1358
1359
1360sub set_archives_metadata_array
1361{
1362 my $self = shift @_;
1363
1364 my $username = $self->{'username'};
1365 my $collect = $self->{'collect'};
1366 my $gsdl_cgi = $self->{'gsdl_cgi'};
1367 my $gsdlhome = $self->{'gsdlhome'};
1368
1369 if ($baseaction::authentication_enabled) {
1370 # Ensure the user is allowed to edit this collection
1371 &authenticate_user($gsdl_cgi, $username, $collect);
1372 }
1373
1374 my $site = $self->{'site'};
1375 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1376
1377 $gsdl_cgi->checked_chdir($collect_dir);
1378
1379 # Obtain the collect dir
1380 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1381
1382 # Make sure the collection isn't locked by someone else
1383 $self->lock_collection($username, $collect);
1384
1385 # look up additional args
1386
1387 my $infodbtype = $self->{'infodbtype'};
1388
1389 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1390
1391 my $json_str = $self->{'json'};
1392 my $doc_array = decode_json $json_str;
1393
1394
1395 my $global_status = 0;
1396 my $global_mess = "";
1397
1398 my @all_docids = ();
1399
1400 foreach my $doc_array_rec ( @$doc_array ) {
1401
1402 my $docid = $doc_array_rec->{'docid'};
1403 my $metaname = $doc_array_rec->{'metaname'};
1404 my $metapos = $doc_array_rec->{'metapos'};
1405 my $metamode = $self->{'metamode'};
1406 my $metavalue = $doc_array_rec->{'metavalue'};
1407
1408 # Some sanity checks
1409 $metapos = 0 if (!defined $metapos);
1410
1411 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1412 # make "accumulate" the default (less destructive, as won't actually
1413 # delete any existing values)
1414 $metamode = "accumulate";
1415 }
1416
1417 push(@all_docids,$docid);
1418
1419 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1420 $metaname,$metapos,$metavalue,$metamode);
1421
1422 if ($status != 0) {
1423 # Catch error if set infodb entry failed
1424 $global_status = $status;
1425 $global_mess .= "Failed to set metadata key: $docid\n";
1426 $global_mess .= "Exit status: $status\n";
1427 $global_mess .= "System Error Message: $!\n";
1428 $global_mess .= "-" x 20 . "\n";
1429 }
1430 }
1431
1432 if ($global_status != 0) {
1433 $global_mess .= "PATH: $ENV{'PATH'}\n";
1434 $gsdl_cgi->generate_error($global_mess);
1435 }
1436 else {
1437 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1438 $gsdl_cgi->generate_ok_message($mess);
1439 }
1440
1441 # Release the lock once it is done
1442 $self->unlock_collection($username, $collect);
1443}
1444
1445sub _remove_archives_metadata
1446{
1447 my $self = shift @_;
1448
1449 my $collect = $self->{'collect'};
1450 my $gsdl_cgi = $self->{'gsdl_cgi'};
1451# my $gsdlhome = $self->{'gsdlhome'};
1452 my $infodbtype = $self->{'infodbtype'};
1453
1454 my $site = $self->{'site'};
1455
1456 # Obtain the collect and archive dir
1457 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1458
1459 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1460
1461 # look up additional args
1462 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1463
1464 my $metaname = $self->{'metaname'};
1465 my $metapos = $self->{'metapos'};
1466 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1467
1468 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1469 # default to deleting the first metavalue for the given metaname
1470 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1471 # seem to get deleted in one single remove_archives_meta action invocation.
1472 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1473 # deleted, both get deleted.
1474 if(!defined $metapos && !defined $metavalue) {
1475 $metapos = 0;
1476 }
1477
1478 my $metamode = $self->{'metamode'} || undef;
1479
1480 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1481 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1482
1483 # This now stores the full pathname
1484 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1485
1486 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1487# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1488
1489 if ($status == 0)
1490 {
1491 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1492 $mess .= " $metaname";
1493 $mess .= "->[$metapos]" if (defined $metapos);
1494
1495 $gsdl_cgi->generate_ok_message($mess);
1496 }
1497 else
1498 {
1499 my $mess .= "Failed to remove archives metadata key: $docid\n";
1500 $mess .= "Exit status: $status\n";
1501 $mess .= "System Error Message: $!\n";
1502 $mess .= "-" x 20 . "\n";
1503
1504 $gsdl_cgi->generate_error($mess);
1505 }
1506
1507 #return $status; # in case calling functions have a use for this
1508}
1509
1510sub remove_archives_metadata
1511{
1512 my $self = shift @_;
1513
1514 my $username = $self->{'username'};
1515 my $collect = $self->{'collect'};
1516 my $gsdl_cgi = $self->{'gsdl_cgi'};
1517
1518 if ($baseaction::authentication_enabled)
1519 {
1520 # Ensure the user is allowed to edit this collection
1521 &authenticate_user($gsdl_cgi, $username, $collect);
1522 }
1523
1524 # Make sure the collection isn't locked by someone else
1525 $self->lock_collection($username, $collect);
1526
1527 $self->_remove_archives_metadata(@_);
1528
1529 # Release the lock once it is done
1530 $self->unlock_collection($username, $collect);
1531}
1532
1533sub remove_from_doc_xml
1534{
1535 my $self = shift @_;
1536 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1537
1538 my @start_rules = ('Section' => \&dxml_start_section);
1539
1540 # Set the call-back functions for the metadata tags
1541 my @rules =
1542 (
1543 _default => 'raw',
1544 'Metadata' => \&rfdxml_metadata
1545 );
1546
1547 my $parser = XML::Rules->new
1548 (
1549 start_rules => \@start_rules,
1550 rules => \@rules,
1551 style => 'filter',
1552 output_encoding => 'utf8'
1553 );
1554
1555 my $status = 0;
1556 my $xml_in = "";
1557 if (!open(MIN,"<$doc_xml_filename"))
1558 {
1559 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1560 $status = 1;
1561 }
1562 else
1563 {
1564 # Read them in
1565 my $line;
1566 while (defined ($line=<MIN>)) {
1567 $xml_in .= $line;
1568 }
1569 close(MIN);
1570
1571 # Filter with the call-back functions
1572 my $xml_out = "";
1573
1574 my $MOUT;
1575 if (!open($MOUT,">$doc_xml_filename")) {
1576 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1577 $status = 1;
1578 }
1579 else {
1580 binmode($MOUT,":utf8");
1581 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1582 close($MOUT);
1583 }
1584 }
1585 return $status;
1586}
1587
1588sub rfdxml_metadata
1589{
1590 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1591
1592 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1593 {
1594 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1595 return [$tagname => $attrHash];
1596 }
1597
1598 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1599 {
1600 if (!defined $parser->{'parameters'}->{'poscount'})
1601 {
1602 $parser->{'parameters'}->{'poscount'} = 0;
1603 }
1604 else
1605 {
1606 $parser->{'parameters'}->{'poscount'}++;
1607 }
1608
1609 # if overriding but no metapos, then clear all the meta for this metaname
1610 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
1611 return [];
1612 }
1613 }
1614
1615 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1616 {
1617 return [];
1618 }
1619
1620 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1621 {
1622 return [];
1623 }
1624
1625 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1626 return [$tagname => $attrHash];
1627}
1628
1629sub mxml_metadata
1630{
1631 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1632 my $metaname = $parser->{'parameters'}->{'metaname'};
1633 my $metamode = $parser->{'parameters'}->{'metamode'};
1634
1635 # Report error if we don't see FileName tag before this
1636 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1637
1638 # Don't do anything if we are not in the right FileSet
1639 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1640 if ($file_regexp =~ /\.\*/) {
1641 # Only interested in a file_regexp if it specifies precisely one
1642 # file.
1643 # So, skip anything with a .* in it as it is too general
1644 return [$tagname => $attrHash];
1645 }
1646 my $src_file = $parser->{'parameters'}->{'src_file'};
1647 if (!($src_file =~ /$file_regexp/)) {
1648 return [$tagname => $attrHash];
1649 }
1650## print STDERR "*** mxl metamode = $metamode\n";
1651
1652 # Find the right metadata tag and checks if we are going to override it
1653 my $name_attr = $attrHash->{'name'};
1654 if (($name_attr eq $metaname) && ($metamode eq "override")) {
1655 # Get the value and override the current value
1656 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1657 $attrHash->{'_content'} = $metavalue;
1658
1659## print STDERR "**** overrideing metadata.xml\n";
1660
1661 # Don't want it to wipe out any other pieces of metadata
1662 $parser->{'parameters'}->{'metamode'} = "done";
1663 }
1664
1665 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1666 return [$tagname => $attrHash];
1667}
1668
1669
1670sub mxml_description
1671{
1672 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1673 my $metamode = $parser->{'parameters'}->{'metamode'};
1674
1675 # Failed... Report error if we don't see FileName tag before this
1676 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1677
1678 # Don't do anything if we are not in the right FileSet
1679 my $file_regexp = $parser->{'parameters'}->{'current_file'};
1680 if ($file_regexp =~ m/\.\*/) {
1681 # Only interested in a file_regexp if it specifies precisely one
1682 # file.
1683 # So, skip anything with a .* in it as it is too general
1684 return [$tagname => $attrHash];
1685 }
1686 my $src_file = $parser->{'parameters'}->{'src_file'};
1687
1688 if (!($src_file =~ m/$file_regexp/)) {
1689 return [$tagname => $attrHash];
1690 }
1691
1692 # Accumulate the metadata block to the end of the description block
1693 # Note: This adds metadata block to all description blocks, so if there are
1694 # multiple FileSets, it will add to all of them
1695 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1696 # if metamode was "override" but get to here then it failed to
1697 # find an item to override, in which case it should append its
1698 # value to the end, just like the "accumulate" mode
1699
1700 # tack a new metadata tag on to the end of the <Metadata>+ block
1701 my $metaname = $parser->{'parameters'}->{'metaname'};
1702 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1703
1704 my $metadata_attr = { '_content' => $metavalue,
1705 'name' => $metaname,
1706 'mode' => "accumulate" };
1707
1708 my $append_metadata = [ "Metadata" => $metadata_attr ];
1709 my $description_content = $attrHash->{'_content'};
1710
1711## print STDERR "*** appending to metadata.xml\n";
1712
1713 # append the new metadata element to the end of the current
1714 # content contained inside this tag
1715 if (ref($description_content) eq "") {
1716 # => string or numeric literal
1717 # this is caused by a <Description> block has no <Metadata> child elements
1718 # => set up an empty array in '_content'
1719 $attrHash->{'_content'} = [ "\n" ];
1720 $description_content = $attrHash->{'_content'};
1721 }
1722
1723 push(@$description_content," ", $append_metadata ,"\n ");
1724 $parser->{'parameters'}->{'metamode'} = "done";
1725 }
1726
1727 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1728 return [$tagname => $attrHash];
1729}
1730
1731
1732sub mxml_filename
1733{
1734 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1735
1736 # Store the filename of the Current Fileset
1737 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1738 # FileName tag must come before Description tag
1739 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1740
1741 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1742 return [$tagname => $attrHash];
1743}
1744
1745
1746sub mxml_fileset
1747{
1748 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1749
1750 # Initilise the current_file
1751 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1752 # FileName tag must come before Description tag
1753 $parser->{'parameters'}->{'current_file'} = "";
1754
1755 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1756 return [$tagname => $attrHash];
1757}
1758
1759
1760sub edit_metadata_xml
1761{
1762 my $self = shift @_;
1763 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
1764
1765 # Set the call-back functions for the metadata tags
1766 my @rules =
1767 ( _default => 'raw',
1768 'FileName' => \&mxml_filename,
1769 'Metadata' => \&mxml_metadata,
1770 'Description' => \&mxml_description,
1771 'FileSet' => \&mxml_fileset);
1772
1773 # use XML::Rules to add it in (read in and out again)
1774 my $parser = XML::Rules->new(rules => \@rules,
1775 style => 'filter',
1776 output_encoding => 'utf8');
1777
1778 if (!-e $metadata_xml_filename) {
1779
1780 if (open(MOUT,">$metadata_xml_filename")) {
1781
1782 my $src_file_re = &util::filename_to_regex($src_file);
1783 # shouldn't the following also be in the above utility routine??
1784 # $src_file_re =~ s/\./\\./g;
1785
1786 print MOUT "<?xml version=\"1.0\"?>\n";
1787 print MOUT "<DirectoryMetadata>\n";
1788 print MOUT " <FileSet>\n";
1789 print MOUT " <FileName>$src_file_re</FileName>\n";
1790 print MOUT " <Description>\n";
1791 print MOUT " </Description>\n";
1792 print MOUT " </FileSet>\n";
1793 print MOUT "</DirectoryMetadata>\n";
1794
1795 close(MOUT);
1796 }
1797 else {
1798 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1799 }
1800 }
1801
1802
1803 my $xml_in = "";
1804 if (!open(MIN,"<$metadata_xml_filename")) {
1805 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1806 }
1807 else {
1808 # Read them in
1809 my $line;
1810 while (defined ($line=<MIN>)) {
1811 $xml_in .= $line;
1812 }
1813 close(MIN);
1814
1815 # Filter with the call-back functions
1816 my $xml_out = "";
1817
1818 my $MOUT;
1819 if (!open($MOUT,">$metadata_xml_filename")) {
1820 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1821 }
1822 else {
1823 binmode($MOUT,":utf8");
1824
1825 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1826 # At the moment, I will just hack it!
1827 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1828 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1829 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1830 #print MOUT $xml_out;
1831
1832 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1833 metavalue => $metavalue,
1834 metamode => $metamode,
1835 src_file => $src_file,
1836 current_file => undef} );
1837 close($MOUT);
1838 }
1839 }
1840}
1841
1842
1843sub set_import_metadata
1844{
1845 my $self = shift @_;
1846
1847 my $username = $self->{'username'};
1848 my $collect = $self->{'collect'};
1849 my $gsdl_cgi = $self->{'gsdl_cgi'};
1850
1851 if ($baseaction::authentication_enabled) {
1852 # Ensure the user is allowed to edit this collection
1853 $self->authenticate_user($username, $collect);
1854 }
1855
1856 # Make sure the collection isn't locked by someone else
1857 $self->lock_collection($username, $collect);
1858
1859 $self->_set_import_metadata(@_);
1860
1861 # Release the lock once it is done
1862 $self->unlock_collection($username, $collect);
1863
1864}
1865
1866sub set_import_metadata_array
1867{
1868 my $self = shift @_;
1869
1870 my $username = $self->{'username'};
1871 my $collect = $self->{'collect'};
1872 my $gsdl_cgi = $self->{'gsdl_cgi'};
1873 my $gsdlhome = $self->{'gsdlhome'};
1874
1875 if ($baseaction::authentication_enabled) {
1876 # Ensure the user is allowed to edit this collection
1877 &authenticate_user($gsdl_cgi, $username, $collect);
1878 }
1879
1880 my $site = $self->{'site'};
1881 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1882
1883 $gsdl_cgi->checked_chdir($collect_dir);
1884
1885 # Make sure the collection isn't locked by someone else
1886 $self->lock_collection($username, $collect);
1887
1888 # look up additional args
1889
1890 my $infodbtype = $self->{'infodbtype'};
1891
1892 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1893
1894 my $json_str = $self->{'json'};
1895 my $doc_array = decode_json $json_str;
1896
1897 my $global_status = 0;
1898 my $global_mess = "";
1899
1900 my @all_docids = ();
1901
1902 foreach my $doc_array_rec ( @$doc_array )
1903 {
1904 my $docid = $doc_array_rec->{'docid'};
1905 my $metaname = $doc_array_rec->{'metaname'};
1906 my $metamode = $self->{'metamode'};
1907 my $metavalue = $doc_array_rec->{'metavalue'};
1908
1909 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1910 # make "accumulate" the default (less destructive, as won't actually
1911 # delete any existing values)
1912 $metamode = "accumulate";
1913 }
1914
1915 push(@all_docids,$docid);
1916
1917 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1918 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1919 my $metadata_xml_file;
1920 my $import_filename = undef;
1921
1922 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1923 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1924
1925 # This now stores the full pathname
1926 $import_filename = $doc_rec->{'src-file'}->[0];
1927
1928 # figure out correct metadata.xml file [?]
1929 # Assuming the metadata.xml file is next to the source file
1930 # Note: This will not work if it is using the inherited metadata from the parent folder
1931 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1932 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1933
1934 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1935 }
1936
1937 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1938 $gsdl_cgi->generate_ok_message($mess);
1939
1940 # Release the lock once it is done
1941 $self->unlock_collection($username, $collect);
1942}
1943
1944sub _remove_import_metadata
1945{
1946 my $self = shift @_;
1947
1948 my $collect = $self->{'collect'};
1949 my $gsdl_cgi = $self->{'gsdl_cgi'};
1950 my $gsdlhome = $self->{'gsdlhome'};
1951 my $infodbtype = $self->{'infodbtype'};
1952
1953 # Obtain the collect dir
1954 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1955 my $site = $self->{'site'};
1956 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1957
1958 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1959 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1960
1961 # look up additional args
1962 my $docid = $self->{'d'};
1963 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1964 {
1965 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
1966 }
1967
1968 my $metaname = $self->{'metaname'};
1969 my $metavalue = $self->{'metavalue'};
1970 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1971
1972 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1973 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1974 my $metadata_xml_file;
1975 my $import_filename = undef;
1976 if (defined $docid)
1977 {
1978 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1979 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1980
1981 # This now stores the full pathname
1982 $import_filename = $doc_rec->{'src-file'}->[0];
1983 }
1984
1985 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1986 {
1987 $gsdl_cgi->generate_error_message("There is no metadata\n");
1988 }
1989
1990 # figure out correct metadata.xml file [?]
1991 # Assuming the metadata.xml file is next to the source file
1992 # Note: This will not work if it is using the inherited metadata from the parent folder
1993 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1994 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1995
1996 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1997
1998 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1999 $mess .= " $metaname";
2000 $mess .= " = $metavalue\n";
2001
2002 $gsdl_cgi->generate_ok_message($mess);
2003
2004 #return $status; # in case calling functions have a use for this
2005}
2006
2007sub remove_import_metadata
2008{
2009 my $self = shift @_;
2010
2011 my $username = $self->{'username'};
2012 my $collect = $self->{'collect'};
2013 my $gsdl_cgi = $self->{'gsdl_cgi'};
2014
2015 if ($baseaction::authentication_enabled) {
2016 # Ensure the user is allowed to edit this collection
2017 &authenticate_user($gsdl_cgi, $username, $collect);
2018 }
2019
2020 # Make sure the collection isn't locked by someone else
2021 $self->lock_collection($username, $collect);
2022
2023 $self->_remove_import_metadata(@_);
2024
2025 # Release the lock once it is done
2026 $self->unlock_collection($username, $collect);
2027
2028}
2029
2030sub remove_from_metadata_xml
2031{
2032 my $self = shift @_;
2033 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
2034
2035 # Set the call-back functions for the metadata tags
2036 my @rules =
2037 (
2038 _default => 'raw',
2039 'Metadata' => \&rfmxml_metadata,
2040 'FileName' => \&mxml_filename
2041 );
2042
2043 my $parser = XML::Rules->new
2044 (
2045 rules => \@rules,
2046 style => 'filter',
2047 output_encoding => 'utf8'
2048 );
2049
2050 my $xml_in = "";
2051 if (!open(MIN,"<$metadata_xml_filename"))
2052 {
2053 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2054 }
2055 else
2056 {
2057 # Read them in
2058 my $line;
2059 while (defined ($line=<MIN>)) {
2060 $xml_in .= $line;
2061 }
2062 close(MIN);
2063
2064 # Filter with the call-back functions
2065 my $xml_out = "";
2066
2067 my $MOUT;
2068 if (!open($MOUT,">$metadata_xml_filename")) {
2069 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2070 }
2071 else {
2072 binmode($MOUT,":utf8");
2073 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
2074 close($MOUT);
2075 }
2076 }
2077}
2078
2079sub rfmxml_metadata
2080{
2081 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2082
2083 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2084 {
2085 return [];
2086 }
2087
2088 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2089 return [$tagname => $attrHash];
2090}
2091
2092sub _remove_live_metadata
2093{
2094 my $self = shift @_;
2095
2096 my $collect = $self->{'collect'};
2097 my $gsdl_cgi = $self->{'gsdl_cgi'};
2098# my $gsdlhome = $self->{'gsdlhome'};
2099 my $infodbtype = $self->{'infodbtype'};
2100
2101 # Obtain the collect dir
2102 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2103 my $site = $self->{'site'};
2104 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2105
2106
2107 # look up additional args
2108 my $docid = $self->{'d'};
2109 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2110 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2111 }
2112
2113 # Generate the dbkey
2114 my $metaname = $self->{'metaname'};
2115 my $dbkey = "$docid.$metaname";
2116
2117 # To people who know $collect_tail please add some comments
2118 # Obtain the live gdbm_db path
2119 my $collect_tail = $collect;
2120 $collect_tail =~ s/^.*[\/\\]//;
2121 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2122 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2123
2124 # Remove the key
2125 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2126 my $status = system($cmd);
2127 if ($status != 0) {
2128 # Catch error if gdbmdel failed
2129 my $mess = "Failed to set metadata key: $dbkey\n";
2130
2131 $mess .= "PATH: $ENV{'PATH'}\n";
2132 $mess .= "cmd = $cmd\n";
2133 $mess .= "Exit status: $status\n";
2134 $mess .= "System Error Message: $!\n";
2135
2136 $gsdl_cgi->generate_error($mess);
2137 }
2138 else {
2139 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2140 }
2141
2142}
2143
2144sub remove_live_metadata
2145{
2146 my $self = shift @_;
2147
2148 my $username = $self->{'username'};
2149 my $collect = $self->{'collect'};
2150 my $gsdl_cgi = $self->{'gsdl_cgi'};
2151 my $gsdlhome = $self->{'gsdlhome'};
2152
2153 if ($baseaction::authentication_enabled) {
2154 # Ensure the user is allowed to edit this collection
2155 &authenticate_user($gsdl_cgi, $username, $collect);
2156 }
2157
2158 # Make sure the collection isn't locked by someone else
2159 $self->lock_collection($username, $collect);
2160
2161 $self->_remove_live_metadata(@_);
2162
2163 $self->unlock_collection($username, $collect);
2164}
2165
2166sub remove_metadata
2167{
2168 my $self = shift @_;
2169
2170 my $where = $self->{'where'};
2171 if(!$where) {
2172 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2173 return;
2174 }
2175
2176 my $username = $self->{'username'};
2177 my $collect = $self->{'collect'};
2178 my $gsdl_cgi = $self->{'gsdl_cgi'};
2179
2180 if ($baseaction::authentication_enabled) {
2181 # Ensure the user is allowed to edit this collection
2182 &authenticate_user($gsdl_cgi, $username, $collect);
2183 }
2184
2185 # Make sure the collection isn't locked by someone else
2186 $self->lock_collection($username, $collect);
2187
2188 # check which directories need to be processed, specified in $where as
2189 # any combination of import|archives|index|live
2190 if($where =~ m/import/) {
2191 $self->_remove_import_metadata(@_);
2192 }
2193 if($where =~ m/archives/) {
2194 $self->_remove_archives_metadata(@_);
2195 }
2196 if($where =~ m/index/) {
2197 $self->_remove_index_metadata(@_);
2198 }
2199
2200 # Release the lock once it is done
2201 $self->unlock_collection($username, $collect);
2202}
2203
2204# the internal version, without authentication
2205sub _remove_index_metadata
2206{
2207 my $self = shift @_;
2208
2209 my $collect = $self->{'collect'};
2210 my $gsdl_cgi = $self->{'gsdl_cgi'};
2211# my $gsdlhome = $self->{'gsdlhome'};
2212 my $infodbtype = $self->{'infodbtype'};
2213
2214 # Obtain the collect dir
2215 my $site = $self->{'site'};
2216 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2217 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2218
2219 # look up additional args
2220 my $docid = $self->{'d'};
2221 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2222 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2223 }
2224 my $metaname = $self->{'metaname'};
2225 my $metapos = $self->{'metapos'};
2226 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2227
2228 # To people who know $collect_tail please add some comments
2229 # Obtain the path to the database
2230 my $collect_tail = $collect;
2231 $collect_tail =~ s/^.*[\/\\]//;
2232 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2233 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2234
2235 # Read the docid entry
2236 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2237
2238 # Basically loop through and unescape_html the values
2239 foreach my $k (keys %$doc_rec) {
2240 my @escaped_v = ();
2241 foreach my $v (@{$doc_rec->{$k}}) {
2242 if ($k eq "contains") {
2243 # protect quotes in ".2;".3 etc
2244 $v =~ s/\"/\\\"/g;
2245 push(@escaped_v, $v);
2246 }
2247 else {
2248 my $ev = &ghtml::unescape_html($v);
2249 $ev =~ s/\"/\\\"/g;
2250 push(@escaped_v, $ev);
2251 }
2252 }
2253 $doc_rec->{$k} = \@escaped_v;
2254 }
2255
2256 # Check to make sure the key does exist
2257 if (!defined ($doc_rec->{$metaname})) {
2258 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2259 }
2260
2261 # Obtain the specified metadata pos
2262 if(!defined $metapos && !defined $metavalue) { #$metapos = 0 if (!defined $metapos);
2263 $metapos = 0;
2264 }
2265
2266 # consider check key is defined before deleting?
2267 # Loop through the metadata array and ignore the specified position
2268 my $filtered_metadata = [];
2269 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2270 for (my $i=0; $i<$num_metadata_vals; $i++) {
2271 my $metaval = shift(@{$doc_rec->{$metaname}});
2272
2273 if (!defined $metavalue && $i != $metapos) {
2274 push(@$filtered_metadata,$metaval);
2275 }
2276
2277 if(defined $metavalue && !($metavalue eq $metaval))
2278 {
2279 push(@$filtered_metadata,$metaval);
2280 }
2281 }
2282 $doc_rec->{$metaname} = $filtered_metadata;
2283
2284 # Turn the record back to string
2285 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2286
2287 # Store it back to the database
2288 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2289 my $status = system($cmd);
2290 if ($status != 0) {
2291 my $mess = "Failed to set metadata key: $docid\n";
2292
2293 $mess .= "PATH: $ENV{'PATH'}\n";
2294 $mess .= "cmd = $cmd\n";
2295 $mess .= "Exit status: $status\n";
2296 $mess .= "System Error Message: $!\n";
2297
2298 $gsdl_cgi->generate_error($mess);
2299 }
2300 else {
2301 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2302 $mess .= " $metaname";
2303 $mess .= "->[$metapos]" if (defined $metapos);
2304
2305 $gsdl_cgi->generate_ok_message($mess);
2306 }
2307
2308 #return $status; # in case calling functions have a use for this
2309}
2310
2311sub remove_index_metadata
2312{
2313 my $self = shift @_;
2314
2315 my $username = $self->{'username'};
2316 my $collect = $self->{'collect'};
2317 my $gsdl_cgi = $self->{'gsdl_cgi'};
2318# my $gsdlhome = $self->{'gsdlhome'};
2319
2320 if ($baseaction::authentication_enabled) {
2321 # Ensure the user is allowed to edit this collection
2322 &authenticate_user($gsdl_cgi, $username, $collect);
2323 }
2324
2325 # Obtain the collect dir
2326 my $site = $self->{'site'};
2327 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2328 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2329
2330 # Make sure the collection isn't locked by someone else
2331 $self->lock_collection($username, $collect);
2332
2333 $self->_remove_index_metadata(@_);
2334
2335 # Release the lock once it is done
2336 $self->unlock_collection($username, $collect);
2337}
2338
2339
2340# Was trying to reused the codes, but the functions need to be broken
2341# down more before they can be reused, otherwise there will be too
2342# much overhead and duplicate process...
2343sub insert_metadata
2344{
2345 my $self = shift @_;
2346
2347 my $username = $self->{'username'};
2348 my $collect = $self->{'collect'};
2349 my $gsdl_cgi = $self->{'gsdl_cgi'};
2350 my $gsdlhome = $self->{'gsdlhome'};
2351 my $infodbtype = $self->{'infodbtype'};
2352
2353 # If the import metadata and gdbm database have been updated, we
2354 # need to insert some notification to warn user that the the text
2355 # they see at the moment is not indexed and require a rebuild.
2356 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2357
2358 if ($baseaction::authentication_enabled) {
2359 # Ensure the user is allowed to edit this collection
2360 $self->authenticate_user($username, $collect);
2361 }
2362
2363 # Obtain the collect and archive dir
2364 my $site = $self->{'site'};
2365 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2366 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2367 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2368
2369 # Make sure the collection isn't locked by someone else
2370 $self->lock_collection($username, $collect);
2371
2372 # Check additional args
2373 my $docid = $self->{'d'};
2374 if (!defined($docid)) {
2375 $gsdl_cgi->generate_error("No document id is specified: d=...");
2376 }
2377 my $metaname = $self->{'metaname'};
2378 if (!defined($metaname)) {
2379 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2380 }
2381 my $metavalue = $self->{'metavalue'};
2382 if (!defined($metavalue) || $metavalue eq "") {
2383 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2384 }
2385 # make "accumulate" the default (less destructive, as won't actually
2386 # delete any existing values)
2387 my $metamode = "accumulate";
2388
2389 #=======================================================================#
2390 # set_import_metadata [START]
2391 #=======================================================================#
2392 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2393 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2394 my $metadata_xml_file;
2395 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2396 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2397
2398 # This now stores the full pathname
2399 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2400
2401 # figure out correct metadata.xml file [?]
2402 # Assuming the metadata.xml file is next to the source file
2403 # Note: This will not work if it is using the inherited metadata from the parent folder
2404 my ($import_tailname, $import_dirname)
2405 = File::Basename::fileparse($import_filename);
2406 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2407
2408 # Shane's escape characters
2409 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2410 $metavalue =~ s/\,/&#44;/g;
2411 $metavalue =~ s/\:/&#58;/g;
2412 $metavalue =~ s/\|/&#124;/g;
2413 $metavalue =~ s/\(/&#40;/g;
2414 $metavalue =~ s/\)/&#41;/g;
2415 $metavalue =~ s/\[/&#91;/g;
2416 $metavalue =~ s/\\/&#92;/g;
2417 $metavalue =~ s/\]/&#93;/g;
2418 $metavalue =~ s/\{/&#123;/g;
2419 $metavalue =~ s/\}/&#125;/g;
2420 $metavalue =~ s/\"/&#34;/g;
2421 $metavalue =~ s/\`/&#96;/g;
2422 $metavalue =~ s/\n/_newline_/g;
2423
2424 # Edit the metadata.xml
2425 # Modified by Jeffrey from DL Consulting
2426 # Handle the case where there is one metadata.xml file for multiple FileSets
2427 # The XML filter needs to know whether it is in the right FileSet
2428 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2429 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2430 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2431 $metaname, $metavalue, $metamode, $import_tailname);
2432 #=======================================================================#
2433 # set_import_metadata [END]
2434 #=======================================================================#
2435
2436
2437 #=======================================================================#
2438 # set_metadata (accumulate version) [START]
2439 #=======================================================================#
2440 # To people who know $collect_tail please add some comments
2441 # Obtain path to the database
2442 my $collect_tail = $collect;
2443 $collect_tail =~ s/^.*[\/\\]//;
2444 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2445 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2446
2447 # Read the docid entry
2448 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2449
2450 foreach my $k (keys %$doc_rec) {
2451 my @escaped_v = ();
2452 foreach my $v (@{$doc_rec->{$k}}) {
2453 if ($k eq "contains") {
2454 # protect quotes in ".2;".3 etc
2455 $v =~ s/\"/\\\"/g;
2456 push(@escaped_v, $v);
2457 }
2458 else {
2459 my $ev = &ghtml::unescape_html($v);
2460 $ev =~ s/\"/\\\"/g;
2461 push(@escaped_v, $ev);
2462 }
2463 }
2464 $doc_rec->{$k} = \@escaped_v;
2465 }
2466
2467 # Protect the quotes
2468 $metavalue =~ s/\"/\\\"/g;
2469
2470 # Adds the pending macro
2471 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2472
2473 # If the metadata doesn't exist, create a new one
2474 if (!defined($doc_rec->{$metaname})){
2475 $doc_rec->{$metaname} = [ $macro_metavalue ];
2476 }
2477 # Else, let's acculumate the values
2478 else {
2479 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2480 }
2481
2482 # Generate the record string
2483 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2484
2485 # Store it into GDBM
2486 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2487 my $status = system($cmd);
2488 if ($status != 0) {
2489 # Catch error if gdbmget failed
2490 my $mess = "Failed to set metadata key: $docid\n";
2491
2492 $mess .= "PATH: $ENV{'PATH'}\n";
2493 $mess .= "cmd = $cmd\n";
2494 $mess .= "Exit status: $status\n";
2495 $mess .= "System Error Message: $!\n";
2496
2497 $gsdl_cgi->generate_error($mess);
2498 }
2499 else {
2500 my $mess = "insert-metadata successful: Key[$docid]\n";
2501 $mess .= " [In metadata.xml] $metaname";
2502 $mess .= " = $metavalue\n";
2503 $mess .= " [In database] $metaname";
2504 $mess .= " = $macro_metavalue\n";
2505 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2506 $gsdl_cgi->generate_ok_message($mess);
2507 }
2508 #=======================================================================#
2509 # set_metadata (accumulate version) [END]
2510 #=======================================================================#
2511
2512 # Release the lock once it is done
2513 $self->unlock_collection($username, $collect);
2514}
2515
25161;
Note: See TracBrowser for help on using the repository browser.