source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27156

Last change on this file since 27156 was 27156, checked in by ak19, 11 years ago

Possible bugfix. Subroutine remove_live_metadata used to lock the collection but not unlock it again, whereas remove_archives_metadata and remove_import_metadata did do a symmetrical lock and unlock.

File size: 70.3 KB
RevLine 
[19293]1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
[27156]10# This program is free software; you can redistribute it and/or modify
[19293]11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
[21551]32use dbutil;
[19499]33use ghtml;
[19293]34
[24071]35use JSON;
[21563]36
[24071]37
[19293]38BEGIN {
[22331]39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
[19293]40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
[25097]46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
[19499]51
[25097]52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
[19499]59
[25097]60 "get-live-metadata" => {
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ ] },
[19499]63
[25097]64 #SET METHODS
65 "set-live-metadata" => {
66 'compulsory-args' => [ "d", "metaname", "metavalue" ],
67 'optional-args' => [ ] },
[19499]68
[25097]69 "set-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ "metapos" ] },
[19499]72
[25097]73 "set-archives-metadata" => {
74 'compulsory-args' => [ "d", "metaname", "metavalue" ],
75 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
76
77 "set-import-metadata" => {
78 'compulsory-args' => [ "metaname", "metavalue" ],
79 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
80
81 #SET METHODS (ARRAY)
82 "set-metadata-array" => {
83 'compulsory-args' => [ "json" ],
84 'optional-args' => [ ] },
[24943]85
[25097]86 "set-archives-metadata-array" => {
87 'compulsory-args' => [ "json" ],
88 'optional-args' => [ ] },
89
90 "set-import-metadata-array" => {
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ ] },
93
94 #REMOVE METHODS
95 "remove-import-metadata" => {
96 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
97 'optional-args' => [ ] },
98
99 "remove-archives-metadata" => {
100 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
101 'optional-args' => [ "metapos", "metavalue" ] },
[20538]102
[25097]103 "remove-live-metadata" => {
104 'compulsory-args' => [ "d", "metaname" ],
105 'optional-args' => [ ] },
[20538]106
[25097]107 "remove-metadata" => {
108 'compulsory-args' => [ "d", "metaname" ],
109 'optional-args' => [ "metapos", "metavalue" ] },
[21716]110
[25097]111 #INSERT METHODS
112 "insert-metadata" => {
113 'compulsory-args' => [ "d", "metaname", "metavalue" ],
114 'optional-args' => [ ] }
[19293]115};
116
117
118sub new
119{
120 my $class = shift (@_);
121 my ($gsdl_cgi,$iis6_mode) = @_;
122
[23761]123 # Treat metavalue specially. To transmit this through a GET request
124 # the Javascript side has url-encoded it, so here we need to decode
125 # it before proceeding
126
127 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
128 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
129 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
130
131 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
132 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
133
[19293]134 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
135
136 return bless $self, $class;
137}
138
139
140sub get_live_metadata
141{
142 my $self = shift @_;
143
144 my $username = $self->{'username'};
145 my $collect = $self->{'collect'};
146 my $gsdl_cgi = $self->{'gsdl_cgi'};
147 my $gsdlhome = $self->{'gsdlhome'};
[23478]148 my $infodbtype = $self->{'infodbtype'};
[23400]149
[23447]150 # live metadata gets/saves value scoped (prefixed) by the current usename
[23761]151 # so (for now) let's not bother to enforce authentication
[21715]152
153 # Obtain the collect dir
[23766]154 my $site = $self->{'site'};
155 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
156 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]157
158 # Make sure the collection isn't locked by someone else
159 $self->lock_collection($username, $collect);
160
161 # look up additional args
162 my $docid = $self->{'d'};
163 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[21715]164 $gsdl_cgi->generate_error("No docid (d=...) specified.");
[19293]165 }
166
[21715]167 # Generate the dbkey
[19293]168 my $metaname = $self->{'metaname'};
169 my $dbkey = "$docid.$metaname";
170
[21715]171 # To people who know $collect_tail please add some comments
172 # Obtain path to the database
[19293]173 my $collect_tail = $collect;
174 $collect_tail =~ s/^.*[\/|\\]//;
[21564]175 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]176 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[21715]177
178 # Obtain the content of the key
[21569]179 my $cmd = "gdbmget $infodb_file_path $dbkey";
[19293]180 if (open(GIN,"$cmd |") == 0) {
[21715]181 # Catch error if gdbmget failed
[19293]182 my $mess = "Failed to get metadata key: $metaname\n";
183 $mess .= "$!\n";
184
185 $gsdl_cgi->generate_error($mess);
186 }
187 else {
[23761]188 binmode(GIN,":utf8");
[21715]189 # Read everything in and concatenate them into $metavalue
[19293]190 my $metavalue = "";
191 my $line;
192 while (defined ($line=<GIN>)) {
193 $metavalue .= $line;
194 }
195 close(GIN);
[21715]196 chomp($metavalue); # Get rid off the tailing newlines
[19293]197 $gsdl_cgi->generate_ok_message("$metavalue");
198 }
[21715]199
200 # Release the lock once it is done
201 $self->unlock_collection($username, $collect);
[19499]202}
[19293]203
204
[19499]205sub get_metadata
206{
207 my $self = shift @_;
208
209 my $username = $self->{'username'};
210 my $collect = $self->{'collect'};
211 my $gsdl_cgi = $self->{'gsdl_cgi'};
212 my $gsdlhome = $self->{'gsdlhome'};
213
[21715]214 # Authenticate user if it is enabled
[19499]215 if ($baseaction::authentication_enabled) {
216 # Ensure the user is allowed to edit this collection
217 &authenticate_user($gsdl_cgi, $username, $collect);
218 }
219
[21715]220 # Obtain the collect dir
[23766]221 my $site = $self->{'site'};
222 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
223 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]224
225 # Make sure the collection isn't locked by someone else
226 $self->lock_collection($username, $collect);
227
228 # look up additional args
229 my $docid = $self->{'d'};
230 my $metaname = $self->{'metaname'};
231 my $metapos = $self->{'metapos'};
[23400]232 my $infodbtype = $self->{'infodbtype'};
[19499]233
[21715]234 # To people who know $collect_tail please add some comments
235 # Obtain path to the database
[19499]236 my $collect_tail = $collect;
237 $collect_tail =~ s/^.*[\/\\]//;
[21564]238 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]239 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]240
241 # Read the docid entry
[23400]242 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
243
[21715]244 # Basically loop through and unescape_html the values
[19499]245 foreach my $k (keys %$doc_rec) {
246 my @escaped_v = ();
247 foreach my $v (@{$doc_rec->{$k}}) {
248 my $ev = &ghtml::unescape_html($v);
249 push(@escaped_v, $ev);
250 }
251 $doc_rec->{$k} = \@escaped_v;
252 }
253
[21715]254 # Obtain the specified metadata value
[19499]255 $metapos = 0 if (!defined $metapos);
256 my $metavalue = $doc_rec->{$metaname}->[$metapos];
257 $gsdl_cgi->generate_ok_message("$metavalue");
[21715]258
259 # Release the lock once it is done
260 $self->unlock_collection($username, $collect);
[19293]261}
262
263
[25097]264sub get_import_metadata
265{
266 my $self = shift @_;
267
268 my $username = $self->{'username'};
269 my $collect = $self->{'collect'};
270 my $gsdl_cgi = $self->{'gsdl_cgi'};
271 my $gsdlhome = $self->{'gsdlhome'};
272
273 # Authenticate user if it is enabled
274 if ($baseaction::authentication_enabled) {
275 # Ensure the user is allowed to edit this collection
276 &authenticate_user($gsdl_cgi, $username, $collect);
277 }
278
279 # Obtain the collect dir
280 my $site = $self->{'site'};
281 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
282 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
283
284 # Make sure the collection isn't locked by someone else
285 $self->lock_collection($username, $collect);
286
287 # look up additional args
288 my $docid = $self->{'d'};
289 my $metaname = $self->{'metaname'};
290 my $infodbtype = $self->{'infodbtype'};
291 if (!defined $docid)
292 {
293 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
294 }
295
296 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
297 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
298 my $metadata_xml_file;
299 my $import_filename = undef;
300
301
302 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
303 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
304 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
305
306 # This now stores the full pathname
307 $import_filename = $doc_rec->{'src-file'}->[0];
308
309 # figure out correct metadata.xml file [?]
310 # Assuming the metadata.xml file is next to the source file
311 # Note: This will not work if it is using the inherited metadata from the parent folder
312 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
313 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
314
315 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
316
317 # Release the lock once it is done
318 $self->unlock_collection($username, $collect);
319}
320
321sub get_metadata_from_metadata_xml
322{
323 my $self = shift @_;
324 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
325
326 my @rules =
327 (
328 _default => 'raw',
329 'Metadata' => \&gfmxml_metadata,
330 'FileName' => \&mxml_filename
331 );
332
333 my $parser = XML::Rules->new
334 (
335 rules => \@rules,
336 output_encoding => 'utf8'
337 );
338
339 my $xml_in = "";
340 if (!open(MIN,"<$metadata_xml_filename"))
341 {
342 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
343 }
344 else
345 {
346 # Read them in
347 my $line;
348 while (defined ($line=<MIN>)) {
349 $xml_in .= $line;
350 }
351 close(MIN);
352
353 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
354
355 if(defined $parser->{'pad'}->{'metavalue'})
356 {
357 return $parser->{'pad'}->{'metavalue'};
358 }
359 else
360 {
361 return "";
362 }
363 }
364}
365
366sub gfmxml_metadata
367{
368 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
369
370 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
371 {
372 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
373 }
374}
375
376sub get_archives_metadata
377{
378 my $self = shift @_;
379
380 my $username = $self->{'username'};
381 my $collect = $self->{'collect'};
382 my $gsdl_cgi = $self->{'gsdl_cgi'};
383 my $gsdlhome = $self->{'gsdlhome'};
384 my $infodbtype = $self->{'infodbtype'};
385
386 # Authenticate user if it is enabled
387 if ($baseaction::authentication_enabled) {
388 # Ensure the user is allowed to edit this collection
389 &authenticate_user($gsdl_cgi, $username, $collect);
390 }
391
392 # Obtain the collect dir
393 my $site = $self->{'site'};
394 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
395
396 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
397
398 # Make sure the collection isn't locked by someone else
399 $self->lock_collection($username, $collect);
400
401 # look up additional args
402 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
403 $docid_secnum = "" if (!defined $docid_secnum);
404
405 my $metaname = $self->{'metaname'};
406 my $metapos = $self->{'metapos'};
407 $metapos = 0 if (!defined $metapos);
408
409 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
410 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
411
412 # This now stores the full pathname
413 my $doc_filename = $doc_rec->{'doc-file'}->[0];
414
415 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
416
417 # Release the lock once it is done
418 $self->unlock_collection($username, $collect);
419}
420
421sub get_metadata_from_archive_xml
422{
423 my $self = shift @_;
424 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
425
426 my @start_rules = ('Section' => \&dxml_start_section);
427
428 my @rules =
429 (
430 _default => 'raw',
431 'Metadata' => \&gfdxml_metadata
432 );
433
434 my $parser = XML::Rules->new
435 (
436 start_rules => \@start_rules,
437 rules => \@rules,
438 output_encoding => 'utf8'
439 );
440
441 my $xml_in = "";
442 if (!open(MIN,"<$doc_xml_filename"))
443 {
444 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
445 }
446 else
447 {
448 # Read them in
449 my $line;
450 while (defined ($line=<MIN>)) {
451 $xml_in .= $line;
452 }
453 close(MIN);
454
455 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
456
457 if(defined $parser->{'pad'}->{'metavalue'})
458 {
459 return $parser->{'pad'}->{'metavalue'};
460 }
461 else
462 {
463 return "";
464 }
465 }
466}
467
468sub gfdxml_metadata
469{
470 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
471
472 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
473 {
474 return;
475 }
476
477 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
478 {
479 if (!defined $parser->{'parameters'}->{'poscount'})
480 {
481 $parser->{'parameters'}->{'poscount'} = 0;
482 }
483 else
484 {
485 $parser->{'parameters'}->{'poscount'}++;
486 }
487 }
488
489 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
490 {
491 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
492 }
493}
494
[19293]495sub set_live_metadata
496{
497 my $self = shift @_;
498
499 my $username = $self->{'username'};
500 my $collect = $self->{'collect'};
501 my $gsdl_cgi = $self->{'gsdl_cgi'};
502 my $gsdlhome = $self->{'gsdlhome'};
[23400]503 my $infodbtype = $self->{'infodbtype'};
504
[19293]505 if ($baseaction::authentication_enabled) {
506 # Ensure the user is allowed to edit this collection
507 &authenticate_user($gsdl_cgi, $username, $collect);
508 }
509
[21715]510 # Obtain the collect dir
[23766]511 my $site = $self->{'site'};
512 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
513 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]514
515 # Make sure the collection isn't locked by someone else
516 $self->lock_collection($username, $collect);
517
518 # look up additional args
519 my $docid = $self->{'d'};
[21715]520 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
521 $gsdl_cgi->generate_error("No docid (d=...) specified.");
522 }
[19293]523 my $metavalue = $self->{'metavalue'};
[23400]524
[19293]525
[21715]526 # Generate the dbkey
527 my $metaname = $self->{'metaname'};
[19293]528 my $dbkey = "$docid.$metaname";
529
[21715]530 # To people who know $collect_tail please add some comments
531 # Obtain path to the database
[19293]532 my $collect_tail = $collect;
533 $collect_tail =~ s/^.*[\/\\]//;
[21564]534 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]535 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19293]536
[21715]537 # Set the new value
[21569]538 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
[19293]539 my $status = system($cmd);
540 if ($status != 0) {
[21715]541 # Catch error if gdbmget failed
[19293]542 my $mess = "Failed to set metadata key: $dbkey\n";
[21715]543
[19293]544 $mess .= "PATH: $ENV{'PATH'}\n";
545 $mess .= "cmd = $cmd\n";
546 $mess .= "Exit status: $status\n";
547 $mess .= "System Error Message: $!\n";
548
[19499]549 $gsdl_cgi->generate_error($mess);
[19293]550 }
551 else {
[19499]552 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
[19293]553 }
[21715]554
555 # Release the lock once it is done
556 $self->unlock_collection($username, $collect);
[19293]557}
558
[24071]559sub set_metadata_entry
560{
561 my $self = shift @_;
562 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
563
564 # To people who know $collect_tail please add some comments
565 # Obtain path to the database
566 my $collect_tail = $collect;
567 $collect_tail =~ s/^.*[\/\\]//;
568 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
569 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
570
571# print STDERR "**** infodb file path = $infodb_file_path\n";
572# print STDERR "***** infodb type = $infodbtype\n";
573
574 # Read the docid entry
575 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
576
577 # Set the metadata value
578 if (defined $metapos) {
579 $doc_rec->{$metaname}->[$metapos] = $metavalue;
580 }
581 else {
582 $doc_rec->{$metaname} = [ $metavalue ];
583 }
584
585 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
586
587 return $status;
588
589}
[19293]590
[19499]591sub set_metadata
592{
593 my $self = shift @_;
[19293]594
[19499]595 my $username = $self->{'username'};
596 my $collect = $self->{'collect'};
597 my $gsdl_cgi = $self->{'gsdl_cgi'};
598 my $gsdlhome = $self->{'gsdlhome'};
[19293]599
[19499]600 if ($baseaction::authentication_enabled) {
601 # Ensure the user is allowed to edit this collection
602 &authenticate_user($gsdl_cgi, $username, $collect);
603 }
604
[23766]605 my $site = $self->{'site'};
606 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
607
608 $gsdl_cgi->checked_chdir($collect_dir);
609
[21715]610 # Obtain the collect dir
[23766]611 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]612
613 # Make sure the collection isn't locked by someone else
614 $self->lock_collection($username, $collect);
615
616 # look up additional args
617 my $docid = $self->{'d'};
618 my $metaname = $self->{'metaname'};
619 my $metapos = $self->{'metapos'};
620 my $metavalue = $self->{'metavalue'};
[23761]621 my $infodbtype = $self->{'infodbtype'};
[23400]622
[24071]623 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
[23400]624
[19499]625 if ($status != 0) {
[23761]626 # Catch error if set infodb entry failed
627 my $mess = "Failed to set metadata key: $docid\n";
[19499]628
[23761]629 $mess .= "PATH: $ENV{'PATH'}\n";
630 $mess .= "Exit status: $status\n";
631 $mess .= "System Error Message: $!\n";
632
633 $gsdl_cgi->generate_error($mess);
[19499]634 }
635 else {
[24071]636 my $mess = "set-metadata successful: Key[$docid]\n";
[23761]637 $mess .= " $metaname";
638 $mess .= "->[$metapos]" if (defined $metapos);
639 $mess .= " = $metavalue";
640
641 $gsdl_cgi->generate_ok_message($mess);
[19499]642 }
[21715]643
644 # Release the lock once it is done
645 $self->unlock_collection($username, $collect);
[19499]646}
647
648
[24071]649sub set_metadata_array
650{
651 my $self = shift @_;
652
653 my $username = $self->{'username'};
654 my $collect = $self->{'collect'};
655 my $gsdl_cgi = $self->{'gsdl_cgi'};
656 my $gsdlhome = $self->{'gsdlhome'};
657
658 if ($baseaction::authentication_enabled) {
659 # Ensure the user is allowed to edit this collection
660 &authenticate_user($gsdl_cgi, $username, $collect);
661 }
662
663 my $site = $self->{'site'};
664 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
665
666 $gsdl_cgi->checked_chdir($collect_dir);
667
668 # Obtain the collect dir
669 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
670
671 # Make sure the collection isn't locked by someone else
672 $self->lock_collection($username, $collect);
673
674 # look up additional args
675
676 my $infodbtype = $self->{'infodbtype'};
677
678 my $json_str = $self->{'json'};
679 my $doc_array = decode_json $json_str;
680
681
682 my $global_status = 0;
683 my $global_mess = "";
684
685 my @all_docids = ();
686
687 foreach my $doc_array_rec ( @$doc_array ) {
688
689 my $docid = $doc_array_rec->{'docid'};
690 my $metaname = $doc_array_rec->{'metaname'};
691 my $metapos = $doc_array_rec->{'metapos'};
692 my $metavalue = $doc_array_rec->{'metavalue'};
693
694 push(@all_docids,$docid);
695
696 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
697
698 if ($status != 0) {
699 # Catch error if set infodb entry failed
700 $global_status = $status;
701 $global_mess .= "Failed to set metadata key: $docid\n";
702 $global_mess .= "Exit status: $status\n";
703 $global_mess .= "System Error Message: $!\n";
704 $global_mess .= "-" x 20;
705 }
706 }
707
708 if ($global_status != 0) {
709 $global_mess .= "PATH: $ENV{'PATH'}\n";
710 $gsdl_cgi->generate_error($global_mess);
711 }
712 else {
713 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
714 $gsdl_cgi->generate_ok_message($mess);
715 }
716
717 # Release the lock once it is done
718 $self->unlock_collection($username, $collect);
719}
720
721
[20538]722sub dxml_metadata
723{
724 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
725 my $metaname = $parser->{'parameters'}->{'metaname'};
726 my $metamode = $parser->{'parameters'}->{'metamode'};
[25891]727
[27007]728 print STDERR "**** Processing closing </Metadata> tag\n";
[23761]729
730 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
731
732 # Find the right metadata tag and checks if we are going to
733 # override it
734 #
735 # Note: This over writes the first metadata block it
736 # encountered. If there are multiple Sections in the doc.xml, it
737 # might not behave as you would expect
[20538]738
[23761]739 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
740## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
741## print STDERR "**** metamode = $metamode\n";
742
[25097]743 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
744 {
745 my $name_attr = $attrHash->{'name'};
746 if (($name_attr eq $metaname) && ($metamode eq "override"))
747 {
748 if (!defined $parser->{'parameters'}->{'poscount'})
749 {
750 $parser->{'parameters'}->{'poscount'} = 0;
751 }
752 else
753 {
754 $parser->{'parameters'}->{'poscount'}++;
755 }
756
757 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
758 {
[27111]759 ##print STDERR "#### got match!!\n";
[25097]760 # Get the value and override the current value
761 my $metavalue = $parser->{'parameters'}->{'metavalue'};
762 $attrHash->{'_content'} = $metavalue;
763
764 # Don't want it to wipe out any other pieces of metadata
765 $parser->{'parameters'}->{'metamode'} = "done";
766 }
[25891]767 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
[25097]768 {
[25102]769 my $metavalue = $parser->{'parameters'}->{'metavalue'};
[25097]770 $attrHash->{'_content'} = $metavalue;
771 $parser->{'parameters'}->{'metamode'} = "done";
772 }
773 }
[20538]774 }
775
[21716]776 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
777 return [$tagname => $attrHash];
[20538]778}
779
[27111]780# This method exists purely for catching invalid section numbers that the client
781# requested to edit. Once the parser has reached the end (the final </Archive> tag),
782# we've seen all the Sections in the doc.xml, and none of their section nums matched
783# if the metamode has not been set to 'done' by then.
784sub dxml_archive
785{
786 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
787 my $metamode = $parser->{'parameters'}->{'metamode'};
788
789 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
790 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
791
792# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
793
794 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
[27112]795 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
[20538]796
[27111]797 my $metaname = $parser->{'parameters'}->{'metaname'};
798 my $metavalue = $parser->{'parameters'}->{'metavalue'};
799
[27112]800 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
801 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
802 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
803 print STDERR "\tin section $opt_doc_secnum.\n";
804 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
[27111]805 }
806
807 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
808 return [$tagname => $attrHash];
809}
810
[20538]811sub dxml_description
812{
[25558]813 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
814 my $metamode = $parser->{'parameters'}->{'metamode'};
[20538]815
[27111]816 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
817 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
818
[27007]819 print STDERR "**** Processing a closing </Description> tag \n";
[27111]820# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
[25891]821
[25558]822 # Accumulate the metadata
[23761]823
[27111]824 # We'll be accumulating metadata at this point if we haven't found and therefore
825 # haven't processed the metadata yet.
826 # For subsections, this means that if we're at a matching subsection, but haven't
827 # found the correct metaname to override in that subsection, we accumulate it as new
828 # meta in the subsection by adding it to the current description.
829 # If there's no subsection info for the metadata, it will accumulate at the top level
830 # section description if we hadn't found a matching metaname to override at this point.
831
832 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
833 # at the toplevel section. In the latter case, it means we want to process meta in the
834 # toplevel section. So the eq check between the values below will work in all cases.
835
836 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
837 # this document has been provided. In that case, the metadata for that opt_doc_secnum
838 # won't get attached/accumulated to any part of the doc, not even its top-level section.
839
840 if ($curr_secnum eq $opt_doc_secnum
841 && ($metamode eq "accumulate" || $metamode eq "override")) {
[27007]842 if ($metamode eq "override") {
843 print "No metadata value to override. Switching 'metamode' to accumulate\n";
844 }
[20538]845
[27007]846 # If get to here and metamode is override, the this means there
847 # was no existing value to overide => treat as an append operation
848
849 # Tack a new metadata tag on to the end of the <Metadata>+ block
850 my $metaname = $parser->{'parameters'}->{'metaname'};
851 my $metavalue = $parser->{'parameters'}->{'metavalue'};
852
853 my $metadata_attr = {
854 '_content' => $metavalue,
855 'name' => $metaname,
856 'mode' => "accumulate"
857 };
858
859 my $append_metadata = [ "Metadata" => $metadata_attr ];
860 my $description_content = $attrHash->{'_content'};
861
862 print "Appending metadata to doc.xml\n";
863
864 if (ref($description_content)) {
865 # got some existing interesting nested content
866 push(@$description_content, " ", $append_metadata ,"\n ");
867 }
868 else {
869 #description_content is most likely a string such as "\n"
870 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
871 }
872
873 $parser->{'parameters'}->{'metamode'} = "done";
874 }
[25891]875 else {
[27007]876 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
877## print STDERR "**** NOT ACCUMULATE?!? \n";
[25891]878 }
[20538]879
[21716]880 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
881 return [$tagname => $attrHash];
[20538]882}
883
[21715]884
[23761]885sub dxml_start_section
886{
887 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
888
889 my $new_depth = scalar(@$contextArray);
890
[25891]891 print STDERR "**** START SECTION \n";
892
[23761]893 if ($new_depth == 1) {
894 $parser->{'parameters'}->{'curr_section_depth'} = 1;
895 $parser->{'parameters'}->{'curr_section_num'} = "";
896 }
897
898 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
899 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
900
901 my $new_secnum;
902
903 if ($new_depth > $old_depth) {
904 # child subsection
905 $new_secnum = "$old_secnum.1";
906 }
907 elsif ($new_depth == $old_depth) {
908 # sibling section => increase it's value by 1
909 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
910 $tail_num++;
911 $new_secnum = $old_secnum;
912 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
913 }
914 else {
915 # back up to parent section => lopp off tail
916 $new_secnum = $old_secnum;
917 $new_secnum =~ s/\.\d+$//;
918 }
919
920 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
921 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
[25891]922
923 1;
[23761]924}
925
[20538]926sub edit_xml_file
927{
928 my $self = shift @_;
[23761]929 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
[20538]930
931 # use XML::Rules to add it in (read in and out again)
[23761]932 my $parser = XML::Rules->new(start_rules => $start_rules,
933 rules => $rules,
934 style => 'filter',
935 output_encoding => 'utf8' );
[20538]936
937 my $xml_in = "";
938 if (!open(MIN,"<$filename")) {
939 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
940 }
941 else {
[21715]942 # Read all the text in
[20538]943 my $line;
944 while (defined ($line=<MIN>)) {
945 $xml_in .= $line;
946 }
947 close(MIN);
948
[23761]949 my $MOUT;
950 if (!open($MOUT,">$filename")) {
[20538]951 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
952 }
953 else {
[23761]954 # Matched lines will get handled by the call backs
955## my $xml_out = "";
956
957 binmode($MOUT,":utf8");
958 $parser->filter($xml_in,$MOUT, $options);
959
960# binmode(MOUT,":utf8");
961# print MOUT $xml_out;
962 close($MOUT);
[20538]963 }
964 }
[27112]965
966 # copy across any custom error information that was stored during parsing
967 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
[20538]968}
969
970sub edit_doc_xml
971{
972 my $self = shift @_;
[25097]973 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
[20538]974
[27007]975 my $info_mess = <<RAWEND;
976****************************
977 edit_doc_xml()
978****************************
979doc_xml_filename = $doc_xml_filename
980metaname = $metaname
981metavalue = $metavalue
982metapos = $metapos
983metamode = $metamode
984opt_secnum = $opt_secnum
985prevmetavalue = $prevmetavalue
986****************************
987RAWEND
988
989 $gsdl_cgi->generate_message($info_mess);
[25891]990
[23761]991 # To monitor which section/subsection number we are in
992 my @start_rules =
993 ( 'Section' => \&dxml_start_section );
994
[20538]995 # use XML::Rules to add it in (read in and out again)
[21715]996 # Set the call back functions
[20538]997 my @rules =
[21716]998 ( _default => 'raw',
[23761]999 'Metadata' => \&dxml_metadata,
[27111]1000 'Description' => \&dxml_description,
1001 'Archive' => \&dxml_archive); # just for catching errors at end
[20538]1002
[21715]1003 # Sets the parameters
[20538]1004 my $options = { 'metaname' => $metaname,
1005 'metapos' => $metapos,
[23400]1006 'metavalue' => $metavalue,
[25097]1007 'metamode' => $metamode,
1008 'prevmetavalue' => $prevmetavalue };
[23400]1009
[23761]1010 if (defined $opt_secnum) {
1011 $options->{'secnum'} = $opt_secnum;
1012 }
1013
1014 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
[20538]1015}
1016
[24071]1017sub set_archives_metadata_entry
1018{
1019 my $self = shift @_;
[25891]1020 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
[24071]1021
[27007]1022 my $info_mess = <<RAWEND;
1023****************************
1024 set_archives_metadata_entry()
1025****************************
1026archive_dir = $archive_dir
1027collect_dir = $collect_dir
1028collect = $collect
1029infodbtype = $infodbtype
1030docid = $docid
1031metaname = $metaname
1032metapos = $metapos
1033metavalue = $metavalue
1034metamode = $metamode
1035prevmetavalue = $prevmetavalue
1036****************************
1037RAWEND
1038
1039 $gsdl_cgi->generate_message($info_mess);
[25891]1040
[24071]1041 # Obtain the doc.xml path for the specified docID
1042 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
[20538]1043
[24071]1044 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1045 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1046 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1047
1048 # The $doc_xml_file is relative to the archives, and now let's get the full path
1049 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1050 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1051
1052 # Edit the doc.xml file with the specified metadata name, value and position.
1053 # TODO: there is a potential problem here as this edit_doc_xml function
1054 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1055 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1056
1057 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
[25097]1058 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
[27112]1059
1060 # return 0; # return 0 for now to indicate no error
1061 return (defined $self->{'error_msg'}) ? 1 : 0;
[24071]1062}
1063
1064
[20538]1065sub set_archives_metadata
1066{
1067 my $self = shift @_;
1068
1069 my $username = $self->{'username'};
1070 my $collect = $self->{'collect'};
1071 my $gsdl_cgi = $self->{'gsdl_cgi'};
1072 my $gsdlhome = $self->{'gsdlhome'};
[23400]1073 my $infodbtype = $self->{'infodbtype'};
1074
[20538]1075 if ($baseaction::authentication_enabled) {
[24071]1076 # Ensure the user is allowed to edit this collection
1077 $self->authenticate_user($username, $collect);
[20538]1078 }
1079
[24071]1080 my $site = $self->{'site'};
1081
[23766]1082 # Obtain the collect and archive dir
1083 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1084
[20538]1085 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1086
1087 # Make sure the collection isn't locked by someone else
1088 $self->lock_collection($username, $collect);
1089
1090 # look up additional args
1091 my $docid = $self->{'d'};
1092 my $metaname = $self->{'metaname'};
1093 my $metavalue = $self->{'metavalue'};
[25102]1094 my $prevmetavalue = $self->{'prevmetavalue'};
[23400]1095
[20538]1096 my $metapos = $self->{'metapos'};
1097 $metapos = 0 if (!defined $metapos);
[23400]1098
[23761]1099 my $metamode = $self->{'metamode'};
[23400]1100 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1101 # make "accumulate" the default (less destructive, as won't actually
1102 # delete any existing values)
1103 $metamode = "accumulate";
[24071]1104 }
1105
1106 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
[25097]1107 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
[24071]1108
1109 # Release the lock once it is done
1110 $self->unlock_collection($username, $collect);
1111
1112 if ($status == 0) {
1113 my $mess = "set-archives-metadata successful: Key[$docid]\n";
1114 $mess .= " $metaname";
1115 $mess .= "->[$metapos]" if (defined $metapos);
1116 $mess .= " = $metavalue";
1117 $mess .= " ($metamode)\n";
1118
1119 $gsdl_cgi->generate_ok_message($mess);
1120 }
1121 else {
1122 my $mess .= "Failed to set archives metadata key: $docid\n";
1123 $mess .= "Exit status: $status\n";
[27112]1124 if(defined $self->{'error_msg'}) {
1125 $mess .= "Error Message: $self->{'error_msg'}\n";
1126 } else {
1127 $mess .= "System Error Message: $!\n";
1128 }
[24071]1129 $mess .= "-" x 20 . "\n";
1130
1131 $gsdl_cgi->generate_error($mess);
1132 }
1133}
1134
1135
1136sub set_archives_metadata_array
1137{
1138 my $self = shift @_;
1139
1140 my $username = $self->{'username'};
1141 my $collect = $self->{'collect'};
1142 my $gsdl_cgi = $self->{'gsdl_cgi'};
1143 my $gsdlhome = $self->{'gsdlhome'};
1144
1145 if ($baseaction::authentication_enabled) {
1146 # Ensure the user is allowed to edit this collection
1147 &authenticate_user($gsdl_cgi, $username, $collect);
[23400]1148 }
[23761]1149
[24071]1150 my $site = $self->{'site'};
1151 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1152
1153 $gsdl_cgi->checked_chdir($collect_dir);
1154
1155 # Obtain the collect dir
1156 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1157
1158 # Make sure the collection isn't locked by someone else
1159 $self->lock_collection($username, $collect);
1160
1161 # look up additional args
1162
1163 my $infodbtype = $self->{'infodbtype'};
1164
1165 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1166
1167 my $json_str = $self->{'json'};
1168 my $doc_array = decode_json $json_str;
1169
1170
1171 my $global_status = 0;
1172 my $global_mess = "";
1173
1174 my @all_docids = ();
1175
1176 foreach my $doc_array_rec ( @$doc_array ) {
1177
1178 my $docid = $doc_array_rec->{'docid'};
1179 my $metaname = $doc_array_rec->{'metaname'};
1180 my $metapos = $doc_array_rec->{'metapos'};
1181 my $metamode = $self->{'metamode'};
1182 my $metavalue = $doc_array_rec->{'metavalue'};
1183
1184 # Some sanity checks
1185 $metapos = 0 if (!defined $metapos);
1186
1187 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1188 # make "accumulate" the default (less destructive, as won't actually
1189 # delete any existing values)
1190 $metamode = "accumulate";
1191 }
1192
1193 push(@all_docids,$docid);
1194
1195 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1196 $metaname,$metapos,$metavalue,$metamode);
1197
1198 if ($status != 0) {
1199 # Catch error if set infodb entry failed
1200 $global_status = $status;
1201 $global_mess .= "Failed to set metadata key: $docid\n";
1202 $global_mess .= "Exit status: $status\n";
1203 $global_mess .= "System Error Message: $!\n";
1204 $global_mess .= "-" x 20 . "\n";
1205 }
1206 }
1207
1208 if ($global_status != 0) {
1209 $global_mess .= "PATH: $ENV{'PATH'}\n";
1210 $gsdl_cgi->generate_error($global_mess);
1211 }
1212 else {
1213 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1214 $gsdl_cgi->generate_ok_message($mess);
1215 }
[20538]1216
[23761]1217 # Release the lock once it is done
1218 $self->unlock_collection($username, $collect);
[20538]1219}
1220
[24943]1221sub remove_archives_metadata
1222{
1223 my $self = shift @_;
[20538]1224
[24943]1225 my $username = $self->{'username'};
1226 my $collect = $self->{'collect'};
1227 my $gsdl_cgi = $self->{'gsdl_cgi'};
1228 my $gsdlhome = $self->{'gsdlhome'};
1229 my $infodbtype = $self->{'infodbtype'};
1230
1231 if ($baseaction::authentication_enabled)
1232 {
1233 # Ensure the user is allowed to edit this collection
1234 &authenticate_user($gsdl_cgi, $username, $collect);
1235 }
1236
1237 my $site = $self->{'site'};
1238
1239 # Obtain the collect and archive dir
1240 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1241
1242 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1243
1244 # Make sure the collection isn't locked by someone else
1245 $self->lock_collection($username, $collect);
1246
1247 # look up additional args
1248 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1249
1250 my $metaname = $self->{'metaname'};
1251 my $metapos = $self->{'metapos'};
1252 $metapos = 0 if (!defined $metapos);
1253
1254 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1255 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1256
1257 # This now stores the full pathname
[25097]1258 my $doc_filename = $doc_rec->{'doc-file'}->[0];
[24943]1259
[25097]1260 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
[24943]1261
1262 # Release the lock once it is done
1263 $self->unlock_collection($username, $collect);
1264
1265 if ($status == 0)
1266 {
1267 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1268 $mess .= " $metaname";
1269 $mess .= "->[$metapos]" if (defined $metapos);
1270
1271 $gsdl_cgi->generate_ok_message($mess);
1272 }
1273 else
1274 {
1275 my $mess .= "Failed to remove archives metadata key: $docid\n";
1276 $mess .= "Exit status: $status\n";
1277 $mess .= "System Error Message: $!\n";
1278 $mess .= "-" x 20 . "\n";
1279
1280 $gsdl_cgi->generate_error($mess);
1281 }
1282}
1283
1284sub remove_from_doc_xml
1285{
1286 my $self = shift @_;
[24949]1287 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
[24943]1288
1289 my @start_rules = ('Section' => \&dxml_start_section);
1290
1291 # Set the call-back functions for the metadata tags
1292 my @rules =
1293 (
1294 _default => 'raw',
1295 'Metadata' => \&rfdxml_metadata
1296 );
1297
1298 my $parser = XML::Rules->new
1299 (
1300 start_rules => \@start_rules,
1301 rules => \@rules,
1302 style => 'filter',
1303 output_encoding => 'utf8'
1304 );
1305
1306 my $status = 0;
1307 my $xml_in = "";
1308 if (!open(MIN,"<$doc_xml_filename"))
1309 {
1310 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1311 $status = 1;
1312 }
1313 else
1314 {
1315 # Read them in
1316 my $line;
1317 while (defined ($line=<MIN>)) {
1318 $xml_in .= $line;
1319 }
1320 close(MIN);
1321
1322 # Filter with the call-back functions
1323 my $xml_out = "";
1324
1325 my $MOUT;
1326 if (!open($MOUT,">$doc_xml_filename")) {
1327 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1328 $status = 1;
1329 }
1330 else {
1331 binmode($MOUT,":utf8");
[24949]1332 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
[24943]1333 close($MOUT);
1334 }
1335 }
1336 return $status;
1337}
1338
1339sub rfdxml_metadata
1340{
1341 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1342
1343 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1344 {
1345 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1346 return [$tagname => $attrHash];
1347 }
1348
1349 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1350 {
1351 if (!defined $parser->{'parameters'}->{'poscount'})
1352 {
1353 $parser->{'parameters'}->{'poscount'} = 0;
1354 }
1355 else
1356 {
1357 $parser->{'parameters'}->{'poscount'}++;
1358 }
1359 }
1360
1361 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1362 {
1363 return [];
1364 }
1365
[24949]1366 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1367 {
1368 return [];
1369 }
1370
[24943]1371 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1372 return [$tagname => $attrHash];
1373}
1374
[19293]1375sub mxml_metadata
1376{
1377 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1378 my $metaname = $parser->{'parameters'}->{'metaname'};
1379 my $metamode = $parser->{'parameters'}->{'metamode'};
1380
[21716]1381 # Report error if we don't see FileName tag before this
[25097]1382 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
[21716]1383
1384 # Don't do anything if we are not in the right FileSet
1385 my $file_regexp = $parser->{'parameters'}->{'current_file'};
[23761]1386 if ($file_regexp =~ /\.\*/) {
1387 # Only interested in a file_regexp if it specifies precisely one
1388 # file.
1389 # So, skip anything with a .* in it as it is too general
1390 return [$tagname => $attrHash];
1391 }
1392 my $src_file = $parser->{'parameters'}->{'src_file'};
1393 if (!($src_file =~ /$file_regexp/)) {
1394 return [$tagname => $attrHash];
1395 }
1396## print STDERR "*** mxl metamode = $metamode\n";
1397
[21715]1398 # Find the right metadata tag and checks if we are going to override it
[19293]1399 my $name_attr = $attrHash->{'name'};
1400 if (($name_attr eq $metaname) && ($metamode eq "override")) {
[21715]1401 # Get the value and override the current value
[19293]1402 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1403 $attrHash->{'_content'} = $metavalue;
1404
[23761]1405## print STDERR "**** overrideing metadata.xml\n";
1406
[19293]1407 # Don't want it to wipe out any other pieces of metadata
1408 $parser->{'parameters'}->{'metamode'} = "done";
1409 }
1410
[21716]1411 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1412 return [$tagname => $attrHash];
[19293]1413}
1414
1415
1416sub mxml_description
1417{
1418 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
[21716]1419 my $metamode = $parser->{'parameters'}->{'metamode'};
[19293]1420
[21716]1421 # Failed... Report error if we don't see FileName tag before this
[25097]1422 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
[21716]1423
1424 # Don't do anything if we are not in the right FileSet
1425 my $file_regexp = $parser->{'parameters'}->{'current_file'};
[24943]1426 if ($file_regexp =~ m/\.\*/) {
[23761]1427 # Only interested in a file_regexp if it specifies precisely one
1428 # file.
1429 # So, skip anything with a .* in it as it is too general
1430 return [$tagname => $attrHash];
1431 }
1432 my $src_file = $parser->{'parameters'}->{'src_file'};
[24943]1433
1434 if (!($src_file =~ m/$file_regexp/)) {
[23761]1435 return [$tagname => $attrHash];
1436 }
[21716]1437
[21715]1438 # Accumulate the metadata block to the end of the description block
1439 # Note: This adds metadata block to all description blocks, so if there are
1440 # multiple FileSets, it will add to all of them
[23761]1441 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1442 # if metamode was "override" but get to here then it failed to
1443 # find an item to override, in which case it should append its
1444 # value to the end, just like the "accumulate" mode
1445
[19293]1446 # tack a new metadata tag on to the end of the <Metadata>+ block
1447 my $metaname = $parser->{'parameters'}->{'metaname'};
1448 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1449
1450 my $metadata_attr = { '_content' => $metavalue,
1451 'name' => $metaname,
1452 'mode' => "accumulate" };
1453
1454 my $append_metadata = [ "Metadata" => $metadata_attr ];
1455 my $description_content = $attrHash->{'_content'};
[24943]1456
[23761]1457## print STDERR "*** appending to metadata.xml\n";
1458
1459 # append the new metadata element to the end of the current
1460 # content contained inside this tag
[24943]1461 if (ref($description_content) eq "") {
1462 # => string or numeric literal
1463 # this is caused by a <Description> block has no <Metadata> child elements
1464 # => set up an empty array in '_content'
1465 $attrHash->{'_content'} = [ "\n" ];
1466 $description_content = $attrHash->{'_content'};
1467 }
1468
[19293]1469 push(@$description_content," ", $append_metadata ,"\n ");
[23761]1470 $parser->{'parameters'}->{'metamode'} = "done";
[19293]1471 }
1472
[21716]1473 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1474 return [$tagname => $attrHash];
[19293]1475}
1476
[21715]1477
[21716]1478sub mxml_filename
1479{
1480 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1481
1482 # Store the filename of the Current Fileset
1483 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1484 # FileName tag must come before Description tag
1485 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1486
1487 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1488 return [$tagname => $attrHash];
1489}
1490
1491
1492sub mxml_fileset
1493{
1494 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1495
1496 # Initilise the current_file
1497 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1498 # FileName tag must come before Description tag
1499 $parser->{'parameters'}->{'current_file'} = "";
1500
1501 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1502 return [$tagname => $attrHash];
1503}
1504
1505
[19293]1506sub edit_metadata_xml
1507{
1508 my $self = shift @_;
[21716]1509 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
[19293]1510
[21715]1511 # Set the call-back functions for the metadata tags
[19293]1512 my @rules =
[21716]1513 ( _default => 'raw',
1514 'FileName' => \&mxml_filename,
[19293]1515 'Metadata' => \&mxml_metadata,
[21716]1516 'Description' => \&mxml_description,
1517 'FileSet' => \&mxml_fileset);
[19293]1518
[21715]1519 # use XML::Rules to add it in (read in and out again)
[19293]1520 my $parser = XML::Rules->new(rules => \@rules,
[21716]1521 style => 'filter',
1522 output_encoding => 'utf8');
[19293]1523
[24943]1524 if (!-e $metadata_xml_filename) {
1525
1526 if (open(MOUT,">$metadata_xml_filename")) {
1527
1528 my $src_file_re = &util::filename_to_regex($src_file);
1529 # shouldn't the following also be in the above utility routine??
1530 # $src_file_re =~ s/\./\\./g;
1531
1532 print MOUT "<?xml version=\"1.0\"?>\n";
1533 print MOUT "<DirectoryMetadata>\n";
1534 print MOUT " <FileSet>\n";
1535 print MOUT " <FileName>$src_file_re</FileName>\n";
1536 print MOUT " <Description>\n";
1537 print MOUT " </Description>\n";
1538 print MOUT " </FileSet>\n";
1539 print MOUT "</DirectoryMetadata>\n";
[23761]1540
[24943]1541 close(MOUT);
1542 }
1543 else {
1544 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1545 }
[19293]1546 }
[24943]1547
1548
1549 my $xml_in = "";
1550 if (!open(MIN,"<$metadata_xml_filename")) {
1551 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1552 }
[19293]1553 else {
[24943]1554 # Read them in
1555 my $line;
1556 while (defined ($line=<MIN>)) {
1557 $xml_in .= $line;
1558 }
1559 close(MIN);
[23761]1560
[24943]1561 # Filter with the call-back functions
1562 my $xml_out = "";
[23761]1563
[24943]1564 my $MOUT;
1565 if (!open($MOUT,">$metadata_xml_filename")) {
1566 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1567 }
1568 else {
1569 binmode($MOUT,":utf8");
1570
1571 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1572 # At the moment, I will just hack it!
1573 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1574 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1575 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1576 #print MOUT $xml_out;
1577
1578 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1579 metavalue => $metavalue,
1580 metamode => $metamode,
1581 src_file => $src_file,
1582 current_file => undef} );
1583 close($MOUT);
1584 }
1585 }
[20538]1586}
[19293]1587
1588
1589sub set_import_metadata
1590{
1591 my $self = shift @_;
[21715]1592
[19293]1593 my $username = $self->{'username'};
1594 my $collect = $self->{'collect'};
1595 my $gsdl_cgi = $self->{'gsdl_cgi'};
1596 my $gsdlhome = $self->{'gsdlhome'};
[23400]1597 my $infodbtype = $self->{'infodbtype'};
1598
[19293]1599 if ($baseaction::authentication_enabled) {
[25097]1600 # Ensure the user is allowed to edit this collection
1601 $self->authenticate_user($username, $collect);
[19293]1602 }
1603
[23761]1604
[21715]1605 # Obtain the collect and archive dir
[23766]1606 my $site = $self->{'site'};
1607 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1608
1609 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]1610 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1611
1612 # Make sure the collection isn't locked by someone else
1613 $self->lock_collection($username, $collect);
[21716]1614
[19293]1615 # look up additional args
1616 # want either d= or f=
1617 my $docid = $self->{'d'};
1618 my $import_file = $self->{'f'};
1619 if ((!defined $docid) && (!defined $import_file)) {
1620 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1621 }
1622
[21715]1623 # Get the parameters and set default mode to "accumulate"
[19293]1624 my $metaname = $self->{'metaname'};
1625 my $metavalue = $self->{'metavalue'};
[23761]1626## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1627 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1628
[19293]1629 my $metamode = $self->{'metamode'};
1630 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1631 # make "accumulate" the default (less destructive, as won't actually
1632 # delete any existing values)
1633 $metamode = "accumulate";
1634 }
1635
[21715]1636 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1637 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
[19293]1638 my $metadata_xml_file;
[20935]1639 my $import_filename = undef;
[19293]1640 if (defined $docid) {
[23400]1641 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1642 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[19293]1643
[20935]1644 # This now stores the full pathname
[23761]1645 $import_filename = $doc_rec->{'src-file'}->[0];
[19293]1646 }
[20935]1647 else {
[21715]1648 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
[20935]1649 }
[23761]1650
[21715]1651 # figure out correct metadata.xml file [?]
1652 # Assuming the metadata.xml file is next to the source file
1653 # Note: This will not work if it is using the inherited metadata from the parent folder
[24943]1654 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
[19293]1655 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1656
[21715]1657 # Edit the metadata.xml
[21716]1658 # Modified by Jeffrey from DL Consulting
1659 # Handle the case where there is one metadata.xml file for multiple FileSets
1660 # The XML filter needs to know whether it is in the right FileSet
1661 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1662 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1663 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1664 $metaname, $metavalue, $metamode, $import_tailname);
[19293]1665
[21715]1666 # Release the lock once it is done
1667 $self->unlock_collection($username, $collect);
[23761]1668
1669 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1670 $mess .= " $metaname";
1671 $mess .= " = $metavalue";
1672 $mess .= " ($metamode)\n";
1673
1674 $gsdl_cgi->generate_ok_message($mess);
1675
[19293]1676}
1677
[25097]1678sub set_import_metadata_array
1679{
1680 my $self = shift @_;
1681
1682 my $username = $self->{'username'};
1683 my $collect = $self->{'collect'};
1684 my $gsdl_cgi = $self->{'gsdl_cgi'};
1685 my $gsdlhome = $self->{'gsdlhome'};
1686
1687 if ($baseaction::authentication_enabled) {
1688 # Ensure the user is allowed to edit this collection
1689 &authenticate_user($gsdl_cgi, $username, $collect);
1690 }
1691
1692 my $site = $self->{'site'};
1693 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1694
1695 $gsdl_cgi->checked_chdir($collect_dir);
1696
1697 # Make sure the collection isn't locked by someone else
1698 $self->lock_collection($username, $collect);
1699
1700 # look up additional args
1701
1702 my $infodbtype = $self->{'infodbtype'};
1703
1704 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1705
1706 my $json_str = $self->{'json'};
1707 my $doc_array = decode_json $json_str;
1708
1709 my $global_status = 0;
1710 my $global_mess = "";
1711
1712 my @all_docids = ();
1713
1714 foreach my $doc_array_rec ( @$doc_array )
1715 {
1716 my $docid = $doc_array_rec->{'docid'};
1717 my $metaname = $doc_array_rec->{'metaname'};
1718 my $metamode = $self->{'metamode'};
1719 my $metavalue = $doc_array_rec->{'metavalue'};
1720
1721 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1722 # make "accumulate" the default (less destructive, as won't actually
1723 # delete any existing values)
1724 $metamode = "accumulate";
1725 }
1726
1727 push(@all_docids,$docid);
1728
1729 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1730 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1731 my $metadata_xml_file;
1732 my $import_filename = undef;
1733
1734 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1735 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1736
1737 # This now stores the full pathname
1738 $import_filename = $doc_rec->{'src-file'}->[0];
1739
1740 # figure out correct metadata.xml file [?]
1741 # Assuming the metadata.xml file is next to the source file
1742 # Note: This will not work if it is using the inherited metadata from the parent folder
1743 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1744 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1745
1746 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1747 }
1748
1749 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1750 $gsdl_cgi->generate_ok_message($mess);
1751
1752 # Release the lock once it is done
1753 $self->unlock_collection($username, $collect);
1754}
1755
[24943]1756sub remove_import_metadata
1757{
1758 my $self = shift @_;
1759
1760 my $username = $self->{'username'};
1761 my $collect = $self->{'collect'};
1762 my $gsdl_cgi = $self->{'gsdl_cgi'};
1763
1764 if ($baseaction::authentication_enabled) {
1765 # Ensure the user is allowed to edit this collection
1766 &authenticate_user($gsdl_cgi, $username, $collect);
1767 }
[19293]1768
[24943]1769 my $gsdlhome = $self->{'gsdlhome'};
1770 my $infodbtype = $self->{'infodbtype'};
1771
1772 # Obtain the collect dir
1773 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1774 my $site = $self->{'site'};
1775 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1776
1777 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1778 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1779
1780 # Make sure the collection isn't locked by someone else
1781 $self->lock_collection($username, $collect);
1782
1783 # look up additional args
1784 my $docid = $self->{'d'};
1785 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1786 {
[25097]1787 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
[24943]1788 }
1789
1790 my $metaname = $self->{'metaname'};
1791 my $metavalue = $self->{'metavalue'};
1792 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1793
1794 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1795 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1796 my $metadata_xml_file;
1797 my $import_filename = undef;
1798 if (defined $docid)
1799 {
1800 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1801 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1802
1803 # This now stores the full pathname
1804 $import_filename = $doc_rec->{'src-file'}->[0];
1805 }
1806
1807 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1808 {
[25097]1809 $gsdl_cgi->generate_error_message("There is no metadata\n");
[24943]1810 }
1811
1812 # figure out correct metadata.xml file [?]
1813 # Assuming the metadata.xml file is next to the source file
1814 # Note: This will not work if it is using the inherited metadata from the parent folder
1815 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1816 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1817
1818 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1819
1820 # Release the lock once it is done
1821 $self->unlock_collection($username, $collect);
1822
1823 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1824 $mess .= " $metaname";
1825 $mess .= " = $metavalue\n";
1826
1827 $gsdl_cgi->generate_ok_message($mess);
1828}
1829
1830sub remove_from_metadata_xml
1831{
1832 my $self = shift @_;
1833 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
1834
1835 # Set the call-back functions for the metadata tags
1836 my @rules =
1837 (
1838 _default => 'raw',
1839 'Metadata' => \&rfmxml_metadata,
1840 'FileName' => \&mxml_filename
1841 );
1842
1843 my $parser = XML::Rules->new
1844 (
1845 rules => \@rules,
1846 style => 'filter',
1847 output_encoding => 'utf8'
1848 );
1849
1850 my $xml_in = "";
1851 if (!open(MIN,"<$metadata_xml_filename"))
1852 {
1853 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1854 }
1855 else
1856 {
1857 # Read them in
1858 my $line;
1859 while (defined ($line=<MIN>)) {
1860 $xml_in .= $line;
1861 }
1862 close(MIN);
1863
1864 # Filter with the call-back functions
1865 my $xml_out = "";
1866
1867 my $MOUT;
1868 if (!open($MOUT,">$metadata_xml_filename")) {
1869 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1870 }
1871 else {
1872 binmode($MOUT,":utf8");
1873 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
1874 close($MOUT);
1875 }
1876 }
1877}
1878
1879sub rfmxml_metadata
1880{
1881 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1882
1883 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
1884 {
1885 return [];
1886 }
1887
1888 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1889 return [$tagname => $attrHash];
1890}
1891
[19499]1892sub remove_live_metadata
1893{
1894 my $self = shift @_;
1895
1896 my $username = $self->{'username'};
1897 my $collect = $self->{'collect'};
1898 my $gsdl_cgi = $self->{'gsdl_cgi'};
1899 my $gsdlhome = $self->{'gsdlhome'};
[23400]1900 my $infodbtype = $self->{'infodbtype'};
1901
[19499]1902 if ($baseaction::authentication_enabled) {
1903 # Ensure the user is allowed to edit this collection
1904 &authenticate_user($gsdl_cgi, $username, $collect);
1905 }
1906
[21715]1907 # Obtain the collect dir
[23766]1908 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1909 my $site = $self->{'site'};
1910 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[19499]1911
1912 # Make sure the collection isn't locked by someone else
1913 $self->lock_collection($username, $collect);
1914
1915 # look up additional args
1916 my $docid = $self->{'d'};
[21715]1917 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1918 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1919 }
1920
1921 # Generate the dbkey
[19499]1922 my $metaname = $self->{'metaname'};
1923 my $dbkey = "$docid.$metaname";
1924
[21715]1925 # To people who know $collect_tail please add some comments
1926 # Obtain the live gdbm_db path
[19499]1927 my $collect_tail = $collect;
1928 $collect_tail =~ s/^.*[\/\\]//;
[21564]1929 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]1930 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19499]1931
[21715]1932 # Remove the key
[21569]1933 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
[19499]1934 my $status = system($cmd);
1935 if ($status != 0) {
[21715]1936 # Catch error if gdbmdel failed
[19499]1937 my $mess = "Failed to set metadata key: $dbkey\n";
1938
1939 $mess .= "PATH: $ENV{'PATH'}\n";
1940 $mess .= "cmd = $cmd\n";
1941 $mess .= "Exit status: $status\n";
1942 $mess .= "System Error Message: $!\n";
1943
1944 $gsdl_cgi->generate_error($mess);
1945 }
1946 else {
1947 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1948 }
1949
[27156]1950 # Release the lock once it is done
1951 $self->unlock_collection($username, $collect);
[19499]1952}
1953
1954
1955sub remove_metadata
1956{
1957 my $self = shift @_;
1958
1959 my $username = $self->{'username'};
1960 my $collect = $self->{'collect'};
1961 my $gsdl_cgi = $self->{'gsdl_cgi'};
1962 my $gsdlhome = $self->{'gsdlhome'};
[23400]1963 my $infodbtype = $self->{'infodbtype'};
1964
[19499]1965 if ($baseaction::authentication_enabled) {
1966 # Ensure the user is allowed to edit this collection
1967 &authenticate_user($gsdl_cgi, $username, $collect);
1968 }
1969
[21715]1970 # Obtain the collect dir
[23766]1971 my $site = $self->{'site'};
1972 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1973 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]1974
1975 # Make sure the collection isn't locked by someone else
1976 $self->lock_collection($username, $collect);
1977
1978 # look up additional args
1979 my $docid = $self->{'d'};
[21715]1980 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1981 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1982 }
[19499]1983 my $metaname = $self->{'metaname'};
1984 my $metapos = $self->{'metapos'};
[24949]1985 my $metavalue = $self->{'metavalue'};
[19499]1986
[21715]1987 # To people who know $collect_tail please add some comments
1988 # Obtain the path to the database
[19499]1989 my $collect_tail = $collect;
1990 $collect_tail =~ s/^.*[\/\\]//;
[21564]1991 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]1992 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]1993
1994 # Read the docid entry
[23400]1995 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
[21715]1996
1997 # Basically loop through and unescape_html the values
[19499]1998 foreach my $k (keys %$doc_rec) {
1999 my @escaped_v = ();
2000 foreach my $v (@{$doc_rec->{$k}}) {
2001 if ($k eq "contains") {
2002 # protect quotes in ".2;".3 etc
2003 $v =~ s/\"/\\\"/g;
2004 push(@escaped_v, $v);
2005 }
2006 else {
2007 my $ev = &ghtml::unescape_html($v);
2008 $ev =~ s/\"/\\\"/g;
2009 push(@escaped_v, $ev);
2010 }
2011 }
2012 $doc_rec->{$k} = \@escaped_v;
2013 }
2014
[21715]2015 # Check to make sure the key does exist
2016 if (!defined ($doc_rec->{$metaname})) {
2017 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2018 }
2019
2020 # Obtain the specified metadata pos
[19499]2021 $metapos = 0 if (!defined $metapos);
2022
2023 # consider check key is defined before deleting?
[21715]2024 # Loop through the metadata array and ignore the specified position
[19499]2025 my $filtered_metadata = [];
[21715]2026 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
[19499]2027 for (my $i=0; $i<$num_metadata_vals; $i++) {
[24949]2028 my $metaval = shift(@{$doc_rec->{$metaname}});
[19499]2029
[24949]2030 if (!defined $metavalue && $i != $metapos) {
2031 push(@$filtered_metadata,$metaval)
[19499]2032 }
[24949]2033
2034 if(defined $metavalue && !($metavalue eq $metaval))
2035 {
2036 push(@$filtered_metadata,$metavalue)
2037 }
[19499]2038 }
2039 $doc_rec->{$metaname} = $filtered_metadata;
2040
[21715]2041 # Turn the record back to string
[21551]2042 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
[19499]2043
[21715]2044 # Store it back to the database
[21569]2045 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
[19499]2046 my $status = system($cmd);
2047 if ($status != 0) {
2048 my $mess = "Failed to set metadata key: $docid\n";
2049
2050 $mess .= "PATH: $ENV{'PATH'}\n";
2051 $mess .= "cmd = $cmd\n";
2052 $mess .= "Exit status: $status\n";
2053 $mess .= "System Error Message: $!\n";
2054
2055 $gsdl_cgi->generate_error($mess);
2056 }
2057 else {
2058 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2059 $mess .= " $metaname";
2060 $mess .= "->[$metapos]" if (defined $metapos);
2061
2062 $gsdl_cgi->generate_ok_message($mess);
2063 }
[27155]2064
2065 # Release the lock once it is done
2066 $self->unlock_collection($username, $collect);
[19499]2067}
2068
2069
[23761]2070# Was trying to reused the codes, but the functions need to be broken
2071# down more before they can be reused, otherwise there will be too
2072# much overhead and duplicate process...
[21716]2073sub insert_metadata
2074{
2075 my $self = shift @_;
2076
2077 my $username = $self->{'username'};
2078 my $collect = $self->{'collect'};
2079 my $gsdl_cgi = $self->{'gsdl_cgi'};
2080 my $gsdlhome = $self->{'gsdlhome'};
[23400]2081 my $infodbtype = $self->{'infodbtype'};
2082
[23761]2083 # If the import metadata and gdbm database have been updated, we
2084 # need to insert some notification to warn user that the the text
2085 # they see at the moment is not indexed and require a rebuild.
[21716]2086 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2087
2088 if ($baseaction::authentication_enabled) {
2089 # Ensure the user is allowed to edit this collection
2090 $self->authenticate_user($username, $collect);
2091 }
2092
[23766]2093 # Obtain the collect and archive dir
2094 my $site = $self->{'site'};
2095 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2096 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[21716]2097 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2098
2099 # Make sure the collection isn't locked by someone else
2100 $self->lock_collection($username, $collect);
2101
2102 # Check additional args
2103 my $docid = $self->{'d'};
2104 if (!defined($docid)) {
2105 $gsdl_cgi->generate_error("No document id is specified: d=...");
2106 }
2107 my $metaname = $self->{'metaname'};
2108 if (!defined($metaname)) {
2109 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
2110 }
2111 my $metavalue = $self->{'metavalue'};
2112 if (!defined($metavalue) || $metavalue eq "") {
2113 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
2114 }
2115 # make "accumulate" the default (less destructive, as won't actually
2116 # delete any existing values)
2117 my $metamode = "accumulate";
2118
2119 #=======================================================================#
2120 # set_import_metadata [START]
2121 #=======================================================================#
2122 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2123 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2124 my $metadata_xml_file;
[23400]2125 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2126 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[21716]2127
2128 # This now stores the full pathname
2129 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2130
2131 # figure out correct metadata.xml file [?]
2132 # Assuming the metadata.xml file is next to the source file
2133 # Note: This will not work if it is using the inherited metadata from the parent folder
2134 my ($import_tailname, $import_dirname)
2135 = File::Basename::fileparse($import_filename);
2136 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2137
2138 # Shane's escape characters
2139 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2140 $metavalue =~ s/\,/&#44;/g;
2141 $metavalue =~ s/\:/&#58;/g;
2142 $metavalue =~ s/\|/&#124;/g;
2143 $metavalue =~ s/\(/&#40;/g;
2144 $metavalue =~ s/\)/&#41;/g;
2145 $metavalue =~ s/\[/&#91;/g;
2146 $metavalue =~ s/\\/&#92;/g;
2147 $metavalue =~ s/\]/&#93;/g;
2148 $metavalue =~ s/\{/&#123;/g;
2149 $metavalue =~ s/\}/&#125;/g;
2150 $metavalue =~ s/\"/&#34;/g;
2151 $metavalue =~ s/\`/&#96;/g;
2152 $metavalue =~ s/\n/_newline_/g;
2153
2154 # Edit the metadata.xml
2155 # Modified by Jeffrey from DL Consulting
2156 # Handle the case where there is one metadata.xml file for multiple FileSets
2157 # The XML filter needs to know whether it is in the right FileSet
2158 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2159 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2160 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2161 $metaname, $metavalue, $metamode, $import_tailname);
2162 #=======================================================================#
2163 # set_import_metadata [END]
2164 #=======================================================================#
2165
2166
2167 #=======================================================================#
2168 # set_metadata (accumulate version) [START]
2169 #=======================================================================#
2170 # To people who know $collect_tail please add some comments
2171 # Obtain path to the database
2172 my $collect_tail = $collect;
2173 $collect_tail =~ s/^.*[\/\\]//;
2174 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]2175 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21716]2176
2177 # Read the docid entry
[23400]2178 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2179
[21716]2180 foreach my $k (keys %$doc_rec) {
2181 my @escaped_v = ();
2182 foreach my $v (@{$doc_rec->{$k}}) {
2183 if ($k eq "contains") {
2184 # protect quotes in ".2;".3 etc
2185 $v =~ s/\"/\\\"/g;
2186 push(@escaped_v, $v);
2187 }
2188 else {
2189 my $ev = &ghtml::unescape_html($v);
2190 $ev =~ s/\"/\\\"/g;
2191 push(@escaped_v, $ev);
2192 }
2193 }
2194 $doc_rec->{$k} = \@escaped_v;
2195 }
2196
2197 # Protect the quotes
2198 $metavalue =~ s/\"/\\\"/g;
2199
2200 # Adds the pending macro
2201 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2202
2203 # If the metadata doesn't exist, create a new one
2204 if (!defined($doc_rec->{$metaname})){
2205 $doc_rec->{$metaname} = [ $macro_metavalue ];
2206 }
2207 # Else, let's acculumate the values
2208 else {
2209 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2210 }
2211
2212 # Generate the record string
2213 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2214
2215 # Store it into GDBM
2216 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2217 my $status = system($cmd);
2218 if ($status != 0) {
2219 # Catch error if gdbmget failed
2220 my $mess = "Failed to set metadata key: $docid\n";
2221
2222 $mess .= "PATH: $ENV{'PATH'}\n";
2223 $mess .= "cmd = $cmd\n";
2224 $mess .= "Exit status: $status\n";
2225 $mess .= "System Error Message: $!\n";
2226
2227 $gsdl_cgi->generate_error($mess);
2228 }
2229 else {
2230 my $mess = "insert-metadata successful: Key[$docid]\n";
2231 $mess .= " [In metadata.xml] $metaname";
2232 $mess .= " = $metavalue\n";
2233 $mess .= " [In database] $metaname";
2234 $mess .= " = $macro_metavalue\n";
2235 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2236 $gsdl_cgi->generate_ok_message($mess);
2237 }
2238 #=======================================================================#
2239 # set_metadata (accumulate version) [END]
2240 #=======================================================================#
2241
2242 # Release the lock once it is done
2243 $self->unlock_collection($username, $collect);
2244}
2245
[19293]22461;
Note: See TracBrowser for help on using the repository browser.