source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 24949

Last change on this file since 24949 was 24949, checked in by sjm84, 12 years ago

Adding the ability to specify metadata values to use to remove archive and index metadata. The code is untested at this point.

File size: 56.2 KB
RevLine 
[19293]1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
[21551]32use dbutil;
[19499]33use ghtml;
[19293]34
[24071]35use JSON;
[21563]36
[24071]37
[19293]38BEGIN {
[22331]39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
[19293]40 require XML::Rules;
41}
42
43
44@metadataaction::ISA = ('baseaction');
45
46
47my $action_table =
48{
[20538]49 "get-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [] },
[19499]51
[20538]52 "get-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
53 'optional-args' => [ "metapos" ] },
[19499]54
[20538]55 "set-live-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
56 'optional-args' => [ ] },
[19499]57
[20538]58 "set-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
59 'optional-args' => [ "metapos" ] },
[19499]60
[24071]61 "set-metadata-array" => { 'compulsory-args' => [ "json" ],
62 'optional-args' => [ ] },
63
[20538]64 "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
[23400]65 'optional-args' => [ "metapos", "metamode" ]
66 # metamode can be "accumulate", "override",
[19499]67 },
68
[24071]69 "set-archives-metadata-array" => { 'compulsory-args' => [ "json" ],
70 'optional-args' => [ "metamode" ]
71 },
72
[20538]73 "set-import-metadata" => { 'compulsory-args' => [ "metaname", "metavalue" ],
[24943]74 'optional-args' => [ "d", "f", "metamode" ] # Need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
[20538]75 # metamode can be "accumulate", "override", or "unique-id"
76 },
77
[24943]78 "remove-import-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
79 'optional-args' => [ ] },
80
81 "remove-archives-metadata" => { 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
[24949]82 'optional-args' => [ "metapos", "metavalue" ] },
[20538]83
84 "remove-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
85 'optional-args' => [ ] },
86
87 "remove-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
[24949]88 'optional-args' => [ "metapos", "metavalue" ] },
[21716]89
90 "insert-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
91 'optional-args' => [ ]
92 }
[19293]93};
94
95
96sub new
97{
98 my $class = shift (@_);
99 my ($gsdl_cgi,$iis6_mode) = @_;
100
[23761]101 # Treat metavalue specially. To transmit this through a GET request
102 # the Javascript side has url-encoded it, so here we need to decode
103 # it before proceeding
104
105 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
106 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
107
108 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
109
110 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
111
112 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
113
[19293]114 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
115
116 return bless $self, $class;
117}
118
119
120sub get_live_metadata
121{
122 my $self = shift @_;
123
124 my $username = $self->{'username'};
125 my $collect = $self->{'collect'};
126 my $gsdl_cgi = $self->{'gsdl_cgi'};
127 my $gsdlhome = $self->{'gsdlhome'};
[23478]128 my $infodbtype = $self->{'infodbtype'};
[23400]129
[23447]130 # live metadata gets/saves value scoped (prefixed) by the current usename
[23761]131 # so (for now) let's not bother to enforce authentication
[21715]132
133 # Obtain the collect dir
[23766]134 my $site = $self->{'site'};
135 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
136 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]137
138 # Make sure the collection isn't locked by someone else
139 $self->lock_collection($username, $collect);
140
141 # look up additional args
142 my $docid = $self->{'d'};
143 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[21715]144 $gsdl_cgi->generate_error("No docid (d=...) specified.");
[19293]145 }
146
[21715]147 # Generate the dbkey
[19293]148 my $metaname = $self->{'metaname'};
149 my $dbkey = "$docid.$metaname";
150
[21715]151 # To people who know $collect_tail please add some comments
152 # Obtain path to the database
[19293]153 my $collect_tail = $collect;
154 $collect_tail =~ s/^.*[\/|\\]//;
[21564]155 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]156 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[21715]157
158 # Obtain the content of the key
[21569]159 my $cmd = "gdbmget $infodb_file_path $dbkey";
[19293]160 if (open(GIN,"$cmd |") == 0) {
[21715]161 # Catch error if gdbmget failed
[19293]162 my $mess = "Failed to get metadata key: $metaname\n";
163 $mess .= "$!\n";
164
165 $gsdl_cgi->generate_error($mess);
166 }
167 else {
[23761]168 binmode(GIN,":utf8");
[21715]169 # Read everything in and concatenate them into $metavalue
[19293]170 my $metavalue = "";
171 my $line;
172 while (defined ($line=<GIN>)) {
173 $metavalue .= $line;
174 }
175 close(GIN);
[21715]176 chomp($metavalue); # Get rid off the tailing newlines
[19293]177 $gsdl_cgi->generate_ok_message("$metavalue");
178 }
[21715]179
180 # Release the lock once it is done
181 $self->unlock_collection($username, $collect);
[19499]182}
[19293]183
184
[19499]185sub get_metadata
186{
187 my $self = shift @_;
188
189 my $username = $self->{'username'};
190 my $collect = $self->{'collect'};
191 my $gsdl_cgi = $self->{'gsdl_cgi'};
192 my $gsdlhome = $self->{'gsdlhome'};
193
[21715]194 # Authenticate user if it is enabled
[19499]195 if ($baseaction::authentication_enabled) {
196 # Ensure the user is allowed to edit this collection
197 &authenticate_user($gsdl_cgi, $username, $collect);
198 }
199
[21715]200 # Obtain the collect dir
[23766]201 my $site = $self->{'site'};
202 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
203 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]204
205 # Make sure the collection isn't locked by someone else
206 $self->lock_collection($username, $collect);
207
208 # look up additional args
209 my $docid = $self->{'d'};
210 my $metaname = $self->{'metaname'};
211 my $metapos = $self->{'metapos'};
[23400]212 my $infodbtype = $self->{'infodbtype'};
[19499]213
[21715]214 # To people who know $collect_tail please add some comments
215 # Obtain path to the database
[19499]216 my $collect_tail = $collect;
217 $collect_tail =~ s/^.*[\/\\]//;
[21564]218 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]219 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]220
221 # Read the docid entry
[23400]222 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
223
[21715]224 # Basically loop through and unescape_html the values
[19499]225 foreach my $k (keys %$doc_rec) {
226 my @escaped_v = ();
227 foreach my $v (@{$doc_rec->{$k}}) {
228 my $ev = &ghtml::unescape_html($v);
229 push(@escaped_v, $ev);
230 }
231 $doc_rec->{$k} = \@escaped_v;
232 }
233
[21715]234 # Obtain the specified metadata value
[19499]235 $metapos = 0 if (!defined $metapos);
236 my $metavalue = $doc_rec->{$metaname}->[$metapos];
237 $gsdl_cgi->generate_ok_message("$metavalue");
[21715]238
239 # Release the lock once it is done
240 $self->unlock_collection($username, $collect);
[19293]241}
242
243
244sub set_live_metadata
245{
246 my $self = shift @_;
247
248 my $username = $self->{'username'};
249 my $collect = $self->{'collect'};
250 my $gsdl_cgi = $self->{'gsdl_cgi'};
251 my $gsdlhome = $self->{'gsdlhome'};
[23400]252 my $infodbtype = $self->{'infodbtype'};
253
[19293]254 if ($baseaction::authentication_enabled) {
255 # Ensure the user is allowed to edit this collection
256 &authenticate_user($gsdl_cgi, $username, $collect);
257 }
258
[21715]259 # Obtain the collect dir
[23766]260 my $site = $self->{'site'};
261 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
262 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]263
264 # Make sure the collection isn't locked by someone else
265 $self->lock_collection($username, $collect);
266
267 # look up additional args
268 my $docid = $self->{'d'};
[21715]269 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
270 $gsdl_cgi->generate_error("No docid (d=...) specified.");
271 }
[19293]272 my $metavalue = $self->{'metavalue'};
[23400]273
[19293]274
[21715]275 # Generate the dbkey
276 my $metaname = $self->{'metaname'};
[19293]277 my $dbkey = "$docid.$metaname";
278
[21715]279 # To people who know $collect_tail please add some comments
280 # Obtain path to the database
[19293]281 my $collect_tail = $collect;
282 $collect_tail =~ s/^.*[\/\\]//;
[21564]283 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]284 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19293]285
[21715]286 # Set the new value
[21569]287 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
[19293]288 my $status = system($cmd);
289 if ($status != 0) {
[21715]290 # Catch error if gdbmget failed
[19293]291 my $mess = "Failed to set metadata key: $dbkey\n";
[21715]292
[19293]293 $mess .= "PATH: $ENV{'PATH'}\n";
294 $mess .= "cmd = $cmd\n";
295 $mess .= "Exit status: $status\n";
296 $mess .= "System Error Message: $!\n";
297
[19499]298 $gsdl_cgi->generate_error($mess);
[19293]299 }
300 else {
[19499]301 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
[19293]302 }
[21715]303
304 # Release the lock once it is done
305 $self->unlock_collection($username, $collect);
[19293]306}
307
[24071]308sub set_metadata_entry
309{
310 my $self = shift @_;
311 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
312
313 # To people who know $collect_tail please add some comments
314 # Obtain path to the database
315 my $collect_tail = $collect;
316 $collect_tail =~ s/^.*[\/\\]//;
317 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
318 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
319
320# print STDERR "**** infodb file path = $infodb_file_path\n";
321# print STDERR "***** infodb type = $infodbtype\n";
322
323 # Read the docid entry
324 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
325
326 # Set the metadata value
327 if (defined $metapos) {
328 $doc_rec->{$metaname}->[$metapos] = $metavalue;
329 }
330 else {
331 $doc_rec->{$metaname} = [ $metavalue ];
332 }
333
334 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
335
336 return $status;
337
338}
[19293]339
[19499]340sub set_metadata
341{
342 my $self = shift @_;
[19293]343
[19499]344 my $username = $self->{'username'};
345 my $collect = $self->{'collect'};
346 my $gsdl_cgi = $self->{'gsdl_cgi'};
347 my $gsdlhome = $self->{'gsdlhome'};
[19293]348
[19499]349 if ($baseaction::authentication_enabled) {
350 # Ensure the user is allowed to edit this collection
351 &authenticate_user($gsdl_cgi, $username, $collect);
352 }
353
[23766]354 my $site = $self->{'site'};
355 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
356
357 $gsdl_cgi->checked_chdir($collect_dir);
358
[21715]359 # Obtain the collect dir
[23766]360 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]361
362 # Make sure the collection isn't locked by someone else
363 $self->lock_collection($username, $collect);
364
365 # look up additional args
366 my $docid = $self->{'d'};
367 my $metaname = $self->{'metaname'};
368 my $metapos = $self->{'metapos'};
369 my $metavalue = $self->{'metavalue'};
[23761]370 my $infodbtype = $self->{'infodbtype'};
[23400]371
[24071]372 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
[23400]373
[19499]374 if ($status != 0) {
[23761]375 # Catch error if set infodb entry failed
376 my $mess = "Failed to set metadata key: $docid\n";
[19499]377
[23761]378 $mess .= "PATH: $ENV{'PATH'}\n";
379 $mess .= "Exit status: $status\n";
380 $mess .= "System Error Message: $!\n";
381
382 $gsdl_cgi->generate_error($mess);
[19499]383 }
384 else {
[24071]385 my $mess = "set-metadata successful: Key[$docid]\n";
[23761]386 $mess .= " $metaname";
387 $mess .= "->[$metapos]" if (defined $metapos);
388 $mess .= " = $metavalue";
389
390 $gsdl_cgi->generate_ok_message($mess);
[19499]391 }
[21715]392
393 # Release the lock once it is done
394 $self->unlock_collection($username, $collect);
[19499]395}
396
397
[24071]398
399
400sub set_metadata_array
401{
402 my $self = shift @_;
403
404 my $username = $self->{'username'};
405 my $collect = $self->{'collect'};
406 my $gsdl_cgi = $self->{'gsdl_cgi'};
407 my $gsdlhome = $self->{'gsdlhome'};
408
409 if ($baseaction::authentication_enabled) {
410 # Ensure the user is allowed to edit this collection
411 &authenticate_user($gsdl_cgi, $username, $collect);
412 }
413
414 my $site = $self->{'site'};
415 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
416
417 $gsdl_cgi->checked_chdir($collect_dir);
418
419 # Obtain the collect dir
420 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
421
422 # Make sure the collection isn't locked by someone else
423 $self->lock_collection($username, $collect);
424
425 # look up additional args
426
427 my $infodbtype = $self->{'infodbtype'};
428
429 my $json_str = $self->{'json'};
430 my $doc_array = decode_json $json_str;
431
432
433 my $global_status = 0;
434 my $global_mess = "";
435
436 my @all_docids = ();
437
438 foreach my $doc_array_rec ( @$doc_array ) {
439
440 my $docid = $doc_array_rec->{'docid'};
441 my $metaname = $doc_array_rec->{'metaname'};
442 my $metapos = $doc_array_rec->{'metapos'};
443 my $metavalue = $doc_array_rec->{'metavalue'};
444
445 push(@all_docids,$docid);
446
447 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
448
449 if ($status != 0) {
450 # Catch error if set infodb entry failed
451 $global_status = $status;
452 $global_mess .= "Failed to set metadata key: $docid\n";
453 $global_mess .= "Exit status: $status\n";
454 $global_mess .= "System Error Message: $!\n";
455 $global_mess .= "-" x 20;
456 }
457 }
458
459 if ($global_status != 0) {
460 $global_mess .= "PATH: $ENV{'PATH'}\n";
461 $gsdl_cgi->generate_error($global_mess);
462 }
463 else {
464 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
465 $gsdl_cgi->generate_ok_message($mess);
466 }
467
468 # Release the lock once it is done
469 $self->unlock_collection($username, $collect);
470}
471
472
473
[20538]474sub dxml_metadata
475{
476 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
477 my $metaname = $parser->{'parameters'}->{'metaname'};
478 my $metamode = $parser->{'parameters'}->{'metamode'};
[23761]479
480 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
481
482 # Find the right metadata tag and checks if we are going to
483 # override it
484 #
485 # Note: This over writes the first metadata block it
486 # encountered. If there are multiple Sections in the doc.xml, it
487 # might not behave as you would expect
[20538]488
[23761]489 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
490## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
491## print STDERR "**** metamode = $metamode\n";
492
493 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum)) {
494 my $name_attr = $attrHash->{'name'};
495 if (($name_attr eq $metaname) && ($metamode eq "override")) {
496## print STDERR "**** got match!!\n";
497 # Get the value and override the current value
498 my $metavalue = $parser->{'parameters'}->{'metavalue'};
499 $attrHash->{'_content'} = $metavalue;
500
501 # Don't want it to wipe out any other pieces of metadata
502 $parser->{'parameters'}->{'metamode'} = "done";
503 }
[20538]504 }
505
[21716]506 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
507 return [$tagname => $attrHash];
[20538]508}
509
510
511sub dxml_description
512{
513 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
514 my $metamode = $parser->{'parameters'}->{'metamode'};
515
[21715]516 # Accumulate the metadata
517 # NOTE: This appends new metadata element to all description fields.
518 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
[23761]519 if (($metamode eq "accumulate") || ($metamode eq "override")) {
520 # If get to here and metamode is override, the this means there
521 # was no existing value to overide => treat as an append operation
522
523 # Tack a new metadata tag on to the end of the <Metadata>+ block
[20538]524 my $metaname = $parser->{'parameters'}->{'metaname'};
525 my $metavalue = $parser->{'parameters'}->{'metavalue'};
526
527 my $metadata_attr = { '_content' => $metavalue,
528 'name' => $metaname,
529 'mode' => "accumulate" };
530
531 my $append_metadata = [ "Metadata" => $metadata_attr ];
532 my $description_content = $attrHash->{'_content'};
533
[23761]534## print STDERR "**** appending to doc.xml\n";
535
[20538]536 push(@$description_content," ", $append_metadata ,"\n ");
[23761]537 $parser->{'parameters'}->{'metamode'} = "done";
[20538]538 }
539
[21716]540
541 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
542 return [$tagname => $attrHash];
[20538]543}
544
[21715]545
[23761]546
547sub dxml_start_section
548{
549 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
550
551 my $new_depth = scalar(@$contextArray);
552
553 if ($new_depth == 1) {
554 $parser->{'parameters'}->{'curr_section_depth'} = 1;
555 $parser->{'parameters'}->{'curr_section_num'} = "";
556 }
557
558 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
559 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
560
561 my $new_secnum;
562
563 if ($new_depth > $old_depth) {
564 # child subsection
565 $new_secnum = "$old_secnum.1";
566 }
567 elsif ($new_depth == $old_depth) {
568 # sibling section => increase it's value by 1
569 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
570 $tail_num++;
571 $new_secnum = $old_secnum;
572 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
573 }
574 else {
575 # back up to parent section => lopp off tail
576 $new_secnum = $old_secnum;
577 $new_secnum =~ s/\.\d+$//;
578 }
579
580 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
581 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
582
583 print STDERR "*** In Section: $new_secnum\n";
584}
585
[20538]586sub edit_xml_file
587{
588 my $self = shift @_;
[23761]589 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
[20538]590
591 # use XML::Rules to add it in (read in and out again)
[23761]592 my $parser = XML::Rules->new(start_rules => $start_rules,
593 rules => $rules,
594 style => 'filter',
595 output_encoding => 'utf8' );
[20538]596
597 my $xml_in = "";
598 if (!open(MIN,"<$filename")) {
599 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
600 }
601 else {
[21715]602 # Read all the text in
[20538]603 my $line;
604 while (defined ($line=<MIN>)) {
605 $xml_in .= $line;
606 }
607 close(MIN);
608
[23761]609 my $MOUT;
610 if (!open($MOUT,">$filename")) {
[20538]611 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
612 }
613 else {
[23761]614 # Matched lines will get handled by the call backs
615## my $xml_out = "";
616
617 binmode($MOUT,":utf8");
618 $parser->filter($xml_in,$MOUT, $options);
619
620# binmode(MOUT,":utf8");
621# print MOUT $xml_out;
622 close($MOUT);
[20538]623 }
624 }
625}
626
627
628sub edit_doc_xml
629{
630 my $self = shift @_;
[23761]631 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum) = @_;
[20538]632
[23761]633 # To monitor which section/subsection number we are in
634 my @start_rules =
635 ( 'Section' => \&dxml_start_section );
636
[20538]637 # use XML::Rules to add it in (read in and out again)
[21715]638 # Set the call back functions
[20538]639 my @rules =
[21716]640 ( _default => 'raw',
[23761]641 'Metadata' => \&dxml_metadata,
642 'Description' => \&dxml_description);
[20538]643
[21715]644 # Sets the parameters
[20538]645 my $options = { 'metaname' => $metaname,
646 'metapos' => $metapos,
[23400]647 'metavalue' => $metavalue,
[23761]648 'metamode' => $metamode };
[23400]649
[23761]650 if (defined $opt_secnum) {
651 $options->{'secnum'} = $opt_secnum;
652 }
653
654 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
[20538]655}
656
[24071]657sub set_archives_metadata_entry
658{
659 my $self = shift @_;
660 my ($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
661
662 # Obtain the doc.xml path for the specified docID
663 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
[20538]664
[24071]665 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
666 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
667 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
668
669 # The $doc_xml_file is relative to the archives, and now let's get the full path
670 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
671 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
672
673 # Edit the doc.xml file with the specified metadata name, value and position.
674 # TODO: there is a potential problem here as this edit_doc_xml function
675 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
676 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
677
678 print STDERR "** away to call edit_doc_xml\n";
679
680 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
681 $metaname,$metavalue,$metapos,$metamode,$docid_secnum);
682
683 print STDERR "*** finished edit_doc_xml\n";
684
685 return 0; # return 0 for now to indicate no error
686
687}
688
689
[20538]690sub set_archives_metadata
691{
692 my $self = shift @_;
693
694 my $username = $self->{'username'};
695 my $collect = $self->{'collect'};
696 my $gsdl_cgi = $self->{'gsdl_cgi'};
697 my $gsdlhome = $self->{'gsdlhome'};
[23400]698 my $infodbtype = $self->{'infodbtype'};
699
[20538]700 if ($baseaction::authentication_enabled) {
[24071]701 # Ensure the user is allowed to edit this collection
702 $self->authenticate_user($username, $collect);
[20538]703 }
704
[24071]705 my $site = $self->{'site'};
706
[23766]707 # Obtain the collect and archive dir
708 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
709
[20538]710 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
711
712 # Make sure the collection isn't locked by someone else
713 $self->lock_collection($username, $collect);
714
715 # look up additional args
716 my $docid = $self->{'d'};
717 my $metaname = $self->{'metaname'};
718 my $metavalue = $self->{'metavalue'};
[23400]719
[20538]720 my $metapos = $self->{'metapos'};
721 $metapos = 0 if (!defined $metapos);
[23400]722
[23761]723 my $metamode = $self->{'metamode'};
[23400]724 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
725 # make "accumulate" the default (less destructive, as won't actually
726 # delete any existing values)
727 $metamode = "accumulate";
[24071]728 }
729
730 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
731 $metaname,$metapos,$metavalue,$metamode);
732
733 # Release the lock once it is done
734 $self->unlock_collection($username, $collect);
735
736 if ($status == 0) {
737 my $mess = "set-archives-metadata successful: Key[$docid]\n";
738 $mess .= " $metaname";
739 $mess .= "->[$metapos]" if (defined $metapos);
740 $mess .= " = $metavalue";
741 $mess .= " ($metamode)\n";
742
743 $gsdl_cgi->generate_ok_message($mess);
744 }
745 else {
746 my $mess .= "Failed to set archives metadata key: $docid\n";
747 $mess .= "Exit status: $status\n";
748 $mess .= "System Error Message: $!\n";
749 $mess .= "-" x 20 . "\n";
750
751 $gsdl_cgi->generate_error($mess);
752 }
753}
754
755
756sub set_archives_metadata_array
757{
758 my $self = shift @_;
759
760 my $username = $self->{'username'};
761 my $collect = $self->{'collect'};
762 my $gsdl_cgi = $self->{'gsdl_cgi'};
763 my $gsdlhome = $self->{'gsdlhome'};
764
765 if ($baseaction::authentication_enabled) {
766 # Ensure the user is allowed to edit this collection
767 &authenticate_user($gsdl_cgi, $username, $collect);
[23400]768 }
[23761]769
[24071]770 my $site = $self->{'site'};
771 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
772
773 $gsdl_cgi->checked_chdir($collect_dir);
774
775 # Obtain the collect dir
776 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
777
778 # Make sure the collection isn't locked by someone else
779 $self->lock_collection($username, $collect);
780
781 # look up additional args
782
783 my $infodbtype = $self->{'infodbtype'};
784
785 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
786
787 my $json_str = $self->{'json'};
788 my $doc_array = decode_json $json_str;
789
790
791 my $global_status = 0;
792 my $global_mess = "";
793
794 my @all_docids = ();
795
796 foreach my $doc_array_rec ( @$doc_array ) {
797
798 my $docid = $doc_array_rec->{'docid'};
799 my $metaname = $doc_array_rec->{'metaname'};
800 my $metapos = $doc_array_rec->{'metapos'};
801 my $metamode = $self->{'metamode'};
802 my $metavalue = $doc_array_rec->{'metavalue'};
803
804 # Some sanity checks
805 $metapos = 0 if (!defined $metapos);
806
807 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
808 # make "accumulate" the default (less destructive, as won't actually
809 # delete any existing values)
810 $metamode = "accumulate";
811 }
812
813 push(@all_docids,$docid);
814
815 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
816 $metaname,$metapos,$metavalue,$metamode);
817
818 if ($status != 0) {
819 # Catch error if set infodb entry failed
820 $global_status = $status;
821 $global_mess .= "Failed to set metadata key: $docid\n";
822 $global_mess .= "Exit status: $status\n";
823 $global_mess .= "System Error Message: $!\n";
824 $global_mess .= "-" x 20 . "\n";
825 }
826 }
827
828 if ($global_status != 0) {
829 $global_mess .= "PATH: $ENV{'PATH'}\n";
830 $gsdl_cgi->generate_error($global_mess);
831 }
832 else {
833 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
834 $gsdl_cgi->generate_ok_message($mess);
835 }
[20538]836
[23761]837 # Release the lock once it is done
838 $self->unlock_collection($username, $collect);
[20538]839}
840
[24943]841sub remove_archives_metadata
842{
843 my $self = shift @_;
[20538]844
[24943]845 my $username = $self->{'username'};
846 my $collect = $self->{'collect'};
847 my $gsdl_cgi = $self->{'gsdl_cgi'};
848 my $gsdlhome = $self->{'gsdlhome'};
849 my $infodbtype = $self->{'infodbtype'};
850
851 if ($baseaction::authentication_enabled)
852 {
853 # Ensure the user is allowed to edit this collection
854 &authenticate_user($gsdl_cgi, $username, $collect);
855 }
856
857 my $site = $self->{'site'};
858
859 # Obtain the collect and archive dir
860 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
861
862 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
863
864 # Make sure the collection isn't locked by someone else
865 $self->lock_collection($username, $collect);
866
867 # look up additional args
868 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
869
870 my $metaname = $self->{'metaname'};
871 my $metapos = $self->{'metapos'};
872 $metapos = 0 if (!defined $metapos);
873
874 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
875 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
876
877 # This now stores the full pathname
878 my $import_filename = $doc_rec->{'doc-file'}->[0];
879
[24949]880 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $import_filename), $metaname, $metapos, null, $docid_secnum);
[24943]881
882 # Release the lock once it is done
883 $self->unlock_collection($username, $collect);
884
885 if ($status == 0)
886 {
887 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
888 $mess .= " $metaname";
889 $mess .= "->[$metapos]" if (defined $metapos);
890
891 $gsdl_cgi->generate_ok_message($mess);
892 }
893 else
894 {
895 my $mess .= "Failed to remove archives metadata key: $docid\n";
896 $mess .= "Exit status: $status\n";
897 $mess .= "System Error Message: $!\n";
898 $mess .= "-" x 20 . "\n";
899
900 $gsdl_cgi->generate_error($mess);
901 }
902}
903
904sub remove_from_doc_xml
905{
906 my $self = shift @_;
[24949]907 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
[24943]908
909 my @start_rules = ('Section' => \&dxml_start_section);
910
911 # Set the call-back functions for the metadata tags
912 my @rules =
913 (
914 _default => 'raw',
915 'Metadata' => \&rfdxml_metadata
916 );
917
918 my $parser = XML::Rules->new
919 (
920 start_rules => \@start_rules,
921 rules => \@rules,
922 style => 'filter',
923 output_encoding => 'utf8'
924 );
925
926 my $status = 0;
927 my $xml_in = "";
928 if (!open(MIN,"<$doc_xml_filename"))
929 {
930 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
931 $status = 1;
932 }
933 else
934 {
935 # Read them in
936 my $line;
937 while (defined ($line=<MIN>)) {
938 $xml_in .= $line;
939 }
940 close(MIN);
941
942 # Filter with the call-back functions
943 my $xml_out = "";
944
945 my $MOUT;
946 if (!open($MOUT,">$doc_xml_filename")) {
947 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
948 $status = 1;
949 }
950 else {
951 binmode($MOUT,":utf8");
[24949]952 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
[24943]953 close($MOUT);
954 }
955 }
956 return $status;
957}
958
959sub rfdxml_metadata
960{
961 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
962
963 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
964 {
965 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
966 return [$tagname => $attrHash];
967 }
968
969 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
970 {
971 if (!defined $parser->{'parameters'}->{'poscount'})
972 {
973 $parser->{'parameters'}->{'poscount'} = 0;
974 }
975 else
976 {
977 $parser->{'parameters'}->{'poscount'}++;
978 }
979 }
980
981 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
982 {
983 return [];
984 }
985
[24949]986 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
987 {
988 return [];
989 }
990
[24943]991 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
992 return [$tagname => $attrHash];
993}
994
[19293]995sub mxml_metadata
996{
997 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
998 my $metaname = $parser->{'parameters'}->{'metaname'};
999 my $metamode = $parser->{'parameters'}->{'metamode'};
1000
[21716]1001 # Report error if we don't see FileName tag before this
1002 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1003
1004 # Don't do anything if we are not in the right FileSet
1005 my $file_regexp = $parser->{'parameters'}->{'current_file'};
[23761]1006 if ($file_regexp =~ /\.\*/) {
1007 # Only interested in a file_regexp if it specifies precisely one
1008 # file.
1009 # So, skip anything with a .* in it as it is too general
1010 return [$tagname => $attrHash];
1011 }
1012 my $src_file = $parser->{'parameters'}->{'src_file'};
1013 if (!($src_file =~ /$file_regexp/)) {
1014 return [$tagname => $attrHash];
1015 }
1016## print STDERR "*** mxl metamode = $metamode\n";
1017
[21715]1018 # Find the right metadata tag and checks if we are going to override it
[19293]1019 my $name_attr = $attrHash->{'name'};
1020 if (($name_attr eq $metaname) && ($metamode eq "override")) {
[21715]1021 # Get the value and override the current value
[19293]1022 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1023 $attrHash->{'_content'} = $metavalue;
1024
[23761]1025## print STDERR "**** overrideing metadata.xml\n";
1026
[19293]1027 # Don't want it to wipe out any other pieces of metadata
1028 $parser->{'parameters'}->{'metamode'} = "done";
1029 }
1030
[21716]1031 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1032 return [$tagname => $attrHash];
[19293]1033}
1034
1035
1036sub mxml_description
1037{
1038 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
[21716]1039 my $metamode = $parser->{'parameters'}->{'metamode'};
[19293]1040
[21716]1041 # Failed... Report error if we don't see FileName tag before this
1042 die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
1043
1044 # Don't do anything if we are not in the right FileSet
1045 my $file_regexp = $parser->{'parameters'}->{'current_file'};
[24943]1046 if ($file_regexp =~ m/\.\*/) {
[23761]1047 # Only interested in a file_regexp if it specifies precisely one
1048 # file.
1049 # So, skip anything with a .* in it as it is too general
1050 return [$tagname => $attrHash];
1051 }
1052 my $src_file = $parser->{'parameters'}->{'src_file'};
[24943]1053
1054 if (!($src_file =~ m/$file_regexp/)) {
[23761]1055 return [$tagname => $attrHash];
1056 }
[21716]1057
[21715]1058 # Accumulate the metadata block to the end of the description block
1059 # Note: This adds metadata block to all description blocks, so if there are
1060 # multiple FileSets, it will add to all of them
[23761]1061 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1062 # if metamode was "override" but get to here then it failed to
1063 # find an item to override, in which case it should append its
1064 # value to the end, just like the "accumulate" mode
1065
[19293]1066 # tack a new metadata tag on to the end of the <Metadata>+ block
1067 my $metaname = $parser->{'parameters'}->{'metaname'};
1068 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1069
1070 my $metadata_attr = { '_content' => $metavalue,
1071 'name' => $metaname,
1072 'mode' => "accumulate" };
1073
1074 my $append_metadata = [ "Metadata" => $metadata_attr ];
1075 my $description_content = $attrHash->{'_content'};
[24943]1076
[23761]1077## print STDERR "*** appending to metadata.xml\n";
1078
1079 # append the new metadata element to the end of the current
1080 # content contained inside this tag
[24943]1081 if (ref($description_content) eq "") {
1082 # => string or numeric literal
1083 # this is caused by a <Description> block has no <Metadata> child elements
1084 # => set up an empty array in '_content'
1085 $attrHash->{'_content'} = [ "\n" ];
1086 $description_content = $attrHash->{'_content'};
1087 }
1088
[19293]1089 push(@$description_content," ", $append_metadata ,"\n ");
[23761]1090 $parser->{'parameters'}->{'metamode'} = "done";
[19293]1091 }
1092
[21716]1093 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1094 return [$tagname => $attrHash];
[19293]1095}
1096
[21715]1097
[21716]1098sub mxml_filename
1099{
1100 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1101
1102 # Store the filename of the Current Fileset
1103 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1104 # FileName tag must come before Description tag
1105 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1106
1107 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1108 return [$tagname => $attrHash];
1109}
1110
1111
1112sub mxml_fileset
1113{
1114 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1115
1116 # Initilise the current_file
1117 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1118 # FileName tag must come before Description tag
1119 $parser->{'parameters'}->{'current_file'} = "";
1120
1121 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1122 return [$tagname => $attrHash];
1123}
1124
1125
[19293]1126sub edit_metadata_xml
1127{
1128 my $self = shift @_;
[21716]1129 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
[19293]1130
[21715]1131 # Set the call-back functions for the metadata tags
[19293]1132 my @rules =
[21716]1133 ( _default => 'raw',
1134 'FileName' => \&mxml_filename,
[19293]1135 'Metadata' => \&mxml_metadata,
[21716]1136 'Description' => \&mxml_description,
1137 'FileSet' => \&mxml_fileset);
[19293]1138
[21715]1139 # use XML::Rules to add it in (read in and out again)
[19293]1140 my $parser = XML::Rules->new(rules => \@rules,
[21716]1141 style => 'filter',
1142 output_encoding => 'utf8');
[19293]1143
[24943]1144 if (!-e $metadata_xml_filename) {
1145
1146 if (open(MOUT,">$metadata_xml_filename")) {
1147
1148 my $src_file_re = &util::filename_to_regex($src_file);
1149 # shouldn't the following also be in the above utility routine??
1150 # $src_file_re =~ s/\./\\./g;
1151
1152 print MOUT "<?xml version=\"1.0\"?>\n";
1153 print MOUT "<DirectoryMetadata>\n";
1154 print MOUT " <FileSet>\n";
1155 print MOUT " <FileName>$src_file_re</FileName>\n";
1156 print MOUT " <Description>\n";
1157 print MOUT " </Description>\n";
1158 print MOUT " </FileSet>\n";
1159 print MOUT "</DirectoryMetadata>\n";
[23761]1160
[24943]1161 close(MOUT);
1162 }
1163 else {
1164 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1165 }
[19293]1166 }
[24943]1167
1168
1169 my $xml_in = "";
1170 if (!open(MIN,"<$metadata_xml_filename")) {
1171 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1172 }
[19293]1173 else {
[24943]1174 # Read them in
1175 my $line;
1176 while (defined ($line=<MIN>)) {
1177 $xml_in .= $line;
1178 }
1179 close(MIN);
[23761]1180
[24943]1181 # Filter with the call-back functions
1182 my $xml_out = "";
[23761]1183
[24943]1184 my $MOUT;
1185 if (!open($MOUT,">$metadata_xml_filename")) {
1186 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1187 }
1188 else {
1189 binmode($MOUT,":utf8");
1190
1191 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1192 # At the moment, I will just hack it!
1193 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1194 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1195 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1196 #print MOUT $xml_out;
1197
1198 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1199 metavalue => $metavalue,
1200 metamode => $metamode,
1201 src_file => $src_file,
1202 current_file => undef} );
1203 close($MOUT);
1204 }
1205 }
[20538]1206}
[19293]1207
1208
1209sub set_import_metadata
1210{
1211 my $self = shift @_;
[21715]1212
[19293]1213 my $username = $self->{'username'};
1214 my $collect = $self->{'collect'};
1215 my $gsdl_cgi = $self->{'gsdl_cgi'};
1216 my $gsdlhome = $self->{'gsdlhome'};
[23400]1217 my $infodbtype = $self->{'infodbtype'};
1218
[19293]1219 if ($baseaction::authentication_enabled) {
1220 # Ensure the user is allowed to edit this collection
1221 $self->authenticate_user($username, $collect);
1222 }
1223
[23761]1224
[21715]1225 # Obtain the collect and archive dir
[23766]1226 my $site = $self->{'site'};
1227 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1228
1229 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]1230 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1231
1232 # Make sure the collection isn't locked by someone else
1233 $self->lock_collection($username, $collect);
[21716]1234
[19293]1235 # look up additional args
1236 # want either d= or f=
1237 my $docid = $self->{'d'};
1238 my $import_file = $self->{'f'};
1239 if ((!defined $docid) && (!defined $import_file)) {
1240 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1241 }
1242
[21715]1243 # Get the parameters and set default mode to "accumulate"
[19293]1244 my $metaname = $self->{'metaname'};
1245 my $metavalue = $self->{'metavalue'};
[23761]1246## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1247 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1248 print STDERR "*** set import meta: val = $metavalue\n";
1249
[19293]1250 my $metamode = $self->{'metamode'};
1251 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1252 # make "accumulate" the default (less destructive, as won't actually
1253 # delete any existing values)
1254 $metamode = "accumulate";
1255 }
1256
[21715]1257 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1258 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
[19293]1259 my $metadata_xml_file;
[20935]1260 my $import_filename = undef;
[19293]1261 if (defined $docid) {
[23400]1262 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1263 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[19293]1264
[20935]1265 # This now stores the full pathname
[23761]1266 $import_filename = $doc_rec->{'src-file'}->[0];
[19293]1267 }
[20935]1268 else {
[21715]1269 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
[20935]1270 }
[23761]1271
[21715]1272 # figure out correct metadata.xml file [?]
1273 # Assuming the metadata.xml file is next to the source file
1274 # Note: This will not work if it is using the inherited metadata from the parent folder
[24943]1275 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
[19293]1276 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1277
[21715]1278 # Edit the metadata.xml
[21716]1279 # Modified by Jeffrey from DL Consulting
1280 # Handle the case where there is one metadata.xml file for multiple FileSets
1281 # The XML filter needs to know whether it is in the right FileSet
1282 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1283 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1284 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1285 $metaname, $metavalue, $metamode, $import_tailname);
[19293]1286
[21715]1287 # Release the lock once it is done
1288 $self->unlock_collection($username, $collect);
[23761]1289
1290 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1291 $mess .= " $metaname";
1292 $mess .= " = $metavalue";
1293 $mess .= " ($metamode)\n";
1294
1295 $gsdl_cgi->generate_ok_message($mess);
1296
[19293]1297}
1298
[24943]1299sub remove_import_metadata
1300{
1301 my $self = shift @_;
1302
1303 my $username = $self->{'username'};
1304 my $collect = $self->{'collect'};
1305 my $gsdl_cgi = $self->{'gsdl_cgi'};
1306
1307 if ($baseaction::authentication_enabled) {
1308 # Ensure the user is allowed to edit this collection
1309 &authenticate_user($gsdl_cgi, $username, $collect);
1310 }
[19293]1311
[24943]1312 my $gsdlhome = $self->{'gsdlhome'};
1313 my $infodbtype = $self->{'infodbtype'};
1314
1315 # Obtain the collect dir
1316 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1317 my $site = $self->{'site'};
1318 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1319
1320 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1321 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1322
1323 # Make sure the collection isn't locked by someone else
1324 $self->lock_collection($username, $collect);
1325
1326 # look up additional args
1327 my $docid = $self->{'d'};
1328 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1329 {
1330 die "No docid (d=...) specified.\n";
1331 }
1332
1333 my $metaname = $self->{'metaname'};
1334 my $metavalue = $self->{'metavalue'};
1335 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1336
1337 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1338 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1339 my $metadata_xml_file;
1340 my $import_filename = undef;
1341 if (defined $docid)
1342 {
1343 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1344 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1345
1346 # This now stores the full pathname
1347 $import_filename = $doc_rec->{'src-file'}->[0];
1348 }
1349
1350 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1351 {
1352 die "There is no metadata\n";
1353 }
1354
1355 # figure out correct metadata.xml file [?]
1356 # Assuming the metadata.xml file is next to the source file
1357 # Note: This will not work if it is using the inherited metadata from the parent folder
1358 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1359 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1360
1361 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1362
1363 # Release the lock once it is done
1364 $self->unlock_collection($username, $collect);
1365
1366 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1367 $mess .= " $metaname";
1368 $mess .= " = $metavalue\n";
1369
1370 $gsdl_cgi->generate_ok_message($mess);
1371}
1372
1373sub remove_from_metadata_xml
1374{
1375 my $self = shift @_;
1376 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
1377
1378 # Set the call-back functions for the metadata tags
1379 my @rules =
1380 (
1381 _default => 'raw',
1382 'Metadata' => \&rfmxml_metadata,
1383 'FileName' => \&mxml_filename
1384 );
1385
1386 my $parser = XML::Rules->new
1387 (
1388 rules => \@rules,
1389 style => 'filter',
1390 output_encoding => 'utf8'
1391 );
1392
1393 my $xml_in = "";
1394 if (!open(MIN,"<$metadata_xml_filename"))
1395 {
1396 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1397 }
1398 else
1399 {
1400 # Read them in
1401 my $line;
1402 while (defined ($line=<MIN>)) {
1403 $xml_in .= $line;
1404 }
1405 close(MIN);
1406
1407 # Filter with the call-back functions
1408 my $xml_out = "";
1409
1410 my $MOUT;
1411 if (!open($MOUT,">$metadata_xml_filename")) {
1412 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1413 }
1414 else {
1415 binmode($MOUT,":utf8");
1416 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
1417 close($MOUT);
1418 }
1419 }
1420}
1421
1422sub rfmxml_metadata
1423{
1424 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1425
1426 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
1427 {
1428 return [];
1429 }
1430
1431 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1432 return [$tagname => $attrHash];
1433}
1434
[19499]1435sub remove_live_metadata
1436{
1437 my $self = shift @_;
1438
1439 my $username = $self->{'username'};
1440 my $collect = $self->{'collect'};
1441 my $gsdl_cgi = $self->{'gsdl_cgi'};
1442 my $gsdlhome = $self->{'gsdlhome'};
[23400]1443 my $infodbtype = $self->{'infodbtype'};
1444
[19499]1445 if ($baseaction::authentication_enabled) {
1446 # Ensure the user is allowed to edit this collection
1447 &authenticate_user($gsdl_cgi, $username, $collect);
1448 }
1449
[21715]1450 # Obtain the collect dir
[23766]1451 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1452 my $site = $self->{'site'};
1453 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[19499]1454
1455 # Make sure the collection isn't locked by someone else
1456 $self->lock_collection($username, $collect);
1457
1458 # look up additional args
1459 my $docid = $self->{'d'};
[21715]1460 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1461 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1462 }
1463
1464 # Generate the dbkey
[19499]1465 my $metaname = $self->{'metaname'};
1466 my $dbkey = "$docid.$metaname";
1467
[21715]1468 # To people who know $collect_tail please add some comments
1469 # Obtain the live gdbm_db path
[19499]1470 my $collect_tail = $collect;
1471 $collect_tail =~ s/^.*[\/\\]//;
[21564]1472 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]1473 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19499]1474
[21715]1475 # Remove the key
[21569]1476 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
[19499]1477 my $status = system($cmd);
1478 if ($status != 0) {
[21715]1479 # Catch error if gdbmdel failed
[19499]1480 my $mess = "Failed to set metadata key: $dbkey\n";
1481
1482 $mess .= "PATH: $ENV{'PATH'}\n";
1483 $mess .= "cmd = $cmd\n";
1484 $mess .= "Exit status: $status\n";
1485 $mess .= "System Error Message: $!\n";
1486
1487 $gsdl_cgi->generate_error($mess);
1488 }
1489 else {
1490 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1491 }
1492
1493}
1494
1495
1496sub remove_metadata
1497{
1498 my $self = shift @_;
1499
1500 my $username = $self->{'username'};
1501 my $collect = $self->{'collect'};
1502 my $gsdl_cgi = $self->{'gsdl_cgi'};
1503 my $gsdlhome = $self->{'gsdlhome'};
[23400]1504 my $infodbtype = $self->{'infodbtype'};
1505
[19499]1506 if ($baseaction::authentication_enabled) {
1507 # Ensure the user is allowed to edit this collection
1508 &authenticate_user($gsdl_cgi, $username, $collect);
1509 }
1510
[21715]1511 # Obtain the collect dir
[23766]1512 my $site = $self->{'site'};
1513 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1514 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]1515
1516 # Make sure the collection isn't locked by someone else
1517 $self->lock_collection($username, $collect);
1518
1519 # look up additional args
1520 my $docid = $self->{'d'};
[21715]1521 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1522 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1523 }
[19499]1524 my $metaname = $self->{'metaname'};
1525 my $metapos = $self->{'metapos'};
[24949]1526 my $metavalue = $self->{'metavalue'};
[19499]1527
[21715]1528 # To people who know $collect_tail please add some comments
1529 # Obtain the path to the database
[19499]1530 my $collect_tail = $collect;
1531 $collect_tail =~ s/^.*[\/\\]//;
[21564]1532 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]1533 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]1534
1535 # Read the docid entry
[23400]1536 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
[21715]1537
1538 # Basically loop through and unescape_html the values
[19499]1539 foreach my $k (keys %$doc_rec) {
1540 my @escaped_v = ();
1541 foreach my $v (@{$doc_rec->{$k}}) {
1542 if ($k eq "contains") {
1543 # protect quotes in ".2;".3 etc
1544 $v =~ s/\"/\\\"/g;
1545 push(@escaped_v, $v);
1546 }
1547 else {
1548 my $ev = &ghtml::unescape_html($v);
1549 $ev =~ s/\"/\\\"/g;
1550 push(@escaped_v, $ev);
1551 }
1552 }
1553 $doc_rec->{$k} = \@escaped_v;
1554 }
1555
[21715]1556 # Check to make sure the key does exist
1557 if (!defined ($doc_rec->{$metaname})) {
1558 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1559 }
1560
1561 # Obtain the specified metadata pos
[19499]1562 $metapos = 0 if (!defined $metapos);
1563
1564 # consider check key is defined before deleting?
[21715]1565 # Loop through the metadata array and ignore the specified position
[19499]1566 my $filtered_metadata = [];
[21715]1567 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
[19499]1568 for (my $i=0; $i<$num_metadata_vals; $i++) {
[24949]1569 my $metaval = shift(@{$doc_rec->{$metaname}});
[19499]1570
[24949]1571 if (!defined $metavalue && $i != $metapos) {
1572 push(@$filtered_metadata,$metaval)
[19499]1573 }
[24949]1574
1575 if(defined $metavalue && !($metavalue eq $metaval))
1576 {
1577 push(@$filtered_metadata,$metavalue)
1578 }
[19499]1579 }
1580 $doc_rec->{$metaname} = $filtered_metadata;
1581
[21715]1582 # Turn the record back to string
[21551]1583 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
[19499]1584
[21715]1585 # Store it back to the database
[21569]1586 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
[19499]1587 my $status = system($cmd);
1588 if ($status != 0) {
1589 my $mess = "Failed to set metadata key: $docid\n";
1590
1591 $mess .= "PATH: $ENV{'PATH'}\n";
1592 $mess .= "cmd = $cmd\n";
1593 $mess .= "Exit status: $status\n";
1594 $mess .= "System Error Message: $!\n";
1595
1596 $gsdl_cgi->generate_error($mess);
1597 }
1598 else {
1599 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1600 $mess .= " $metaname";
1601 $mess .= "->[$metapos]" if (defined $metapos);
1602
1603 $gsdl_cgi->generate_ok_message($mess);
1604 }
1605}
1606
1607
[23761]1608# Was trying to reused the codes, but the functions need to be broken
1609# down more before they can be reused, otherwise there will be too
1610# much overhead and duplicate process...
[21716]1611sub insert_metadata
1612{
1613 my $self = shift @_;
1614
1615 my $username = $self->{'username'};
1616 my $collect = $self->{'collect'};
1617 my $gsdl_cgi = $self->{'gsdl_cgi'};
1618 my $gsdlhome = $self->{'gsdlhome'};
[23400]1619 my $infodbtype = $self->{'infodbtype'};
1620
[23761]1621 # If the import metadata and gdbm database have been updated, we
1622 # need to insert some notification to warn user that the the text
1623 # they see at the moment is not indexed and require a rebuild.
[21716]1624 my $rebuild_pending_macro = "_rebuildpendingmessage_";
1625
1626 if ($baseaction::authentication_enabled) {
1627 # Ensure the user is allowed to edit this collection
1628 $self->authenticate_user($username, $collect);
1629 }
1630
[23766]1631 # Obtain the collect and archive dir
1632 my $site = $self->{'site'};
1633 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1634 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[21716]1635 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1636
1637 # Make sure the collection isn't locked by someone else
1638 $self->lock_collection($username, $collect);
1639
1640 # Check additional args
1641 my $docid = $self->{'d'};
1642 if (!defined($docid)) {
1643 $gsdl_cgi->generate_error("No document id is specified: d=...");
1644 }
1645 my $metaname = $self->{'metaname'};
1646 if (!defined($metaname)) {
1647 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1648 }
1649 my $metavalue = $self->{'metavalue'};
1650 if (!defined($metavalue) || $metavalue eq "") {
1651 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
1652 }
1653 # make "accumulate" the default (less destructive, as won't actually
1654 # delete any existing values)
1655 my $metamode = "accumulate";
1656
1657 #=======================================================================#
1658 # set_import_metadata [START]
1659 #=======================================================================#
1660 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1661 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1662 my $metadata_xml_file;
[23400]1663 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1664 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[21716]1665
1666 # This now stores the full pathname
1667 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
1668
1669 # figure out correct metadata.xml file [?]
1670 # Assuming the metadata.xml file is next to the source file
1671 # Note: This will not work if it is using the inherited metadata from the parent folder
1672 my ($import_tailname, $import_dirname)
1673 = File::Basename::fileparse($import_filename);
1674 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1675
1676 # Shane's escape characters
1677 $metavalue = pack "U0C*", unpack "C*", $metavalue;
1678 $metavalue =~ s/\,/&#44;/g;
1679 $metavalue =~ s/\:/&#58;/g;
1680 $metavalue =~ s/\|/&#124;/g;
1681 $metavalue =~ s/\(/&#40;/g;
1682 $metavalue =~ s/\)/&#41;/g;
1683 $metavalue =~ s/\[/&#91;/g;
1684 $metavalue =~ s/\\/&#92;/g;
1685 $metavalue =~ s/\]/&#93;/g;
1686 $metavalue =~ s/\{/&#123;/g;
1687 $metavalue =~ s/\}/&#125;/g;
1688 $metavalue =~ s/\"/&#34;/g;
1689 $metavalue =~ s/\`/&#96;/g;
1690 $metavalue =~ s/\n/_newline_/g;
1691
1692 # Edit the metadata.xml
1693 # Modified by Jeffrey from DL Consulting
1694 # Handle the case where there is one metadata.xml file for multiple FileSets
1695 # The XML filter needs to know whether it is in the right FileSet
1696 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1697 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1698 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1699 $metaname, $metavalue, $metamode, $import_tailname);
1700 #=======================================================================#
1701 # set_import_metadata [END]
1702 #=======================================================================#
1703
1704
1705 #=======================================================================#
1706 # set_metadata (accumulate version) [START]
1707 #=======================================================================#
1708 # To people who know $collect_tail please add some comments
1709 # Obtain path to the database
1710 my $collect_tail = $collect;
1711 $collect_tail =~ s/^.*[\/\\]//;
1712 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]1713 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21716]1714
1715 # Read the docid entry
[23400]1716 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
1717
[21716]1718 foreach my $k (keys %$doc_rec) {
1719 my @escaped_v = ();
1720 foreach my $v (@{$doc_rec->{$k}}) {
1721 if ($k eq "contains") {
1722 # protect quotes in ".2;".3 etc
1723 $v =~ s/\"/\\\"/g;
1724 push(@escaped_v, $v);
1725 }
1726 else {
1727 my $ev = &ghtml::unescape_html($v);
1728 $ev =~ s/\"/\\\"/g;
1729 push(@escaped_v, $ev);
1730 }
1731 }
1732 $doc_rec->{$k} = \@escaped_v;
1733 }
1734
1735 # Protect the quotes
1736 $metavalue =~ s/\"/\\\"/g;
1737
1738 # Adds the pending macro
1739 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
1740
1741 # If the metadata doesn't exist, create a new one
1742 if (!defined($doc_rec->{$metaname})){
1743 $doc_rec->{$metaname} = [ $macro_metavalue ];
1744 }
1745 # Else, let's acculumate the values
1746 else {
1747 push(@{$doc_rec->{$metaname}},$macro_metavalue);
1748 }
1749
1750 # Generate the record string
1751 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
1752
1753 # Store it into GDBM
1754 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
1755 my $status = system($cmd);
1756 if ($status != 0) {
1757 # Catch error if gdbmget failed
1758 my $mess = "Failed to set metadata key: $docid\n";
1759
1760 $mess .= "PATH: $ENV{'PATH'}\n";
1761 $mess .= "cmd = $cmd\n";
1762 $mess .= "Exit status: $status\n";
1763 $mess .= "System Error Message: $!\n";
1764
1765 $gsdl_cgi->generate_error($mess);
1766 }
1767 else {
1768 my $mess = "insert-metadata successful: Key[$docid]\n";
1769 $mess .= " [In metadata.xml] $metaname";
1770 $mess .= " = $metavalue\n";
1771 $mess .= " [In database] $metaname";
1772 $mess .= " = $macro_metavalue\n";
1773 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
1774 $gsdl_cgi->generate_ok_message($mess);
1775 }
1776 #=======================================================================#
1777 # set_metadata (accumulate version) [END]
1778 #=======================================================================#
1779
1780 # Release the lock once it is done
1781 $self->unlock_collection($username, $collect);
1782}
1783
[19293]17841;
Note: See TracBrowser for help on using the repository browser.