source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 25103

Last change on this file since 25103 was 25103, checked in by sjm84, 12 years ago

Removed some debug statements

File size: 65.8 KB
RevLine 
[19293]1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
[21551]32use dbutil;
[19499]33use ghtml;
[19293]34
[24071]35use JSON;
[21563]36
[24071]37
[19293]38BEGIN {
[22331]39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
[19293]40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
[25097]46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ ] },
[19499]51
[25097]52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
[19499]59
[25097]60 "get-live-metadata" => {
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ ] },
[19499]63
[25097]64 #SET METHODS
65 "set-live-metadata" => {
66 'compulsory-args' => [ "d", "metaname", "metavalue" ],
67 'optional-args' => [ ] },
[19499]68
[25097]69 "set-metadata" => {
70 'compulsory-args' => [ "d", "metaname", "metavalue" ],
71 'optional-args' => [ "metapos" ] },
[19499]72
[25097]73 "set-archives-metadata" => {
74 'compulsory-args' => [ "d", "metaname", "metavalue" ],
75 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
76
77 "set-import-metadata" => {
78 'compulsory-args' => [ "metaname", "metavalue" ],
79 'optional-args' => [ "d", "f", "metamode" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos)
80
81 #SET METHODS (ARRAY)
82 "set-metadata-array" => {
83 'compulsory-args' => [ "json" ],
84 'optional-args' => [ ] },
[24943]85
[25097]86 "set-archives-metadata-array" => {
87 'compulsory-args' => [ "json" ],
88 'optional-args' => [ ] },
89
90 "set-import-metadata-array" => {
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ ] },
93
94 #REMOVE METHODS
95 "remove-import-metadata" => {
96 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
97 'optional-args' => [ ] },
98
99 "remove-archives-metadata" => {
100 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
101 'optional-args' => [ "metapos", "metavalue" ] },
[20538]102
[25097]103 "remove-live-metadata" => {
104 'compulsory-args' => [ "d", "metaname" ],
105 'optional-args' => [ ] },
[20538]106
[25097]107 "remove-metadata" => {
108 'compulsory-args' => [ "d", "metaname" ],
109 'optional-args' => [ "metapos", "metavalue" ] },
[21716]110
[25097]111 #INSERT METHODS
112 "insert-metadata" => {
113 'compulsory-args' => [ "d", "metaname", "metavalue" ],
114 'optional-args' => [ ] }
[19293]115};
116
117
118sub new
119{
120 my $class = shift (@_);
121 my ($gsdl_cgi,$iis6_mode) = @_;
122
[23761]123 # Treat metavalue specially. To transmit this through a GET request
124 # the Javascript side has url-encoded it, so here we need to decode
125 # it before proceeding
126
127 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
128 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
129
130 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
131
132 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
133
134 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
135
[19293]136 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
137
138 return bless $self, $class;
139}
140
141
142sub get_live_metadata
143{
144 my $self = shift @_;
145
146 my $username = $self->{'username'};
147 my $collect = $self->{'collect'};
148 my $gsdl_cgi = $self->{'gsdl_cgi'};
149 my $gsdlhome = $self->{'gsdlhome'};
[23478]150 my $infodbtype = $self->{'infodbtype'};
[23400]151
[23447]152 # live metadata gets/saves value scoped (prefixed) by the current usename
[23761]153 # so (for now) let's not bother to enforce authentication
[21715]154
155 # Obtain the collect dir
[23766]156 my $site = $self->{'site'};
157 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
158 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]159
160 # Make sure the collection isn't locked by someone else
161 $self->lock_collection($username, $collect);
162
163 # look up additional args
164 my $docid = $self->{'d'};
165 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[21715]166 $gsdl_cgi->generate_error("No docid (d=...) specified.");
[19293]167 }
168
[21715]169 # Generate the dbkey
[19293]170 my $metaname = $self->{'metaname'};
171 my $dbkey = "$docid.$metaname";
172
[21715]173 # To people who know $collect_tail please add some comments
174 # Obtain path to the database
[19293]175 my $collect_tail = $collect;
176 $collect_tail =~ s/^.*[\/|\\]//;
[21564]177 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]178 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[21715]179
180 # Obtain the content of the key
[21569]181 my $cmd = "gdbmget $infodb_file_path $dbkey";
[19293]182 if (open(GIN,"$cmd |") == 0) {
[21715]183 # Catch error if gdbmget failed
[19293]184 my $mess = "Failed to get metadata key: $metaname\n";
185 $mess .= "$!\n";
186
187 $gsdl_cgi->generate_error($mess);
188 }
189 else {
[23761]190 binmode(GIN,":utf8");
[21715]191 # Read everything in and concatenate them into $metavalue
[19293]192 my $metavalue = "";
193 my $line;
194 while (defined ($line=<GIN>)) {
195 $metavalue .= $line;
196 }
197 close(GIN);
[21715]198 chomp($metavalue); # Get rid off the tailing newlines
[19293]199 $gsdl_cgi->generate_ok_message("$metavalue");
200 }
[21715]201
202 # Release the lock once it is done
203 $self->unlock_collection($username, $collect);
[19499]204}
[19293]205
206
[19499]207sub get_metadata
208{
209 my $self = shift @_;
210
211 my $username = $self->{'username'};
212 my $collect = $self->{'collect'};
213 my $gsdl_cgi = $self->{'gsdl_cgi'};
214 my $gsdlhome = $self->{'gsdlhome'};
215
[21715]216 # Authenticate user if it is enabled
[19499]217 if ($baseaction::authentication_enabled) {
218 # Ensure the user is allowed to edit this collection
219 &authenticate_user($gsdl_cgi, $username, $collect);
220 }
221
[21715]222 # Obtain the collect dir
[23766]223 my $site = $self->{'site'};
224 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
225 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]226
227 # Make sure the collection isn't locked by someone else
228 $self->lock_collection($username, $collect);
229
230 # look up additional args
231 my $docid = $self->{'d'};
232 my $metaname = $self->{'metaname'};
233 my $metapos = $self->{'metapos'};
[23400]234 my $infodbtype = $self->{'infodbtype'};
[19499]235
[21715]236 # To people who know $collect_tail please add some comments
237 # Obtain path to the database
[19499]238 my $collect_tail = $collect;
239 $collect_tail =~ s/^.*[\/\\]//;
[21564]240 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]241 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]242
243 # Read the docid entry
[23400]244 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
245
[21715]246 # Basically loop through and unescape_html the values
[19499]247 foreach my $k (keys %$doc_rec) {
248 my @escaped_v = ();
249 foreach my $v (@{$doc_rec->{$k}}) {
250 my $ev = &ghtml::unescape_html($v);
251 push(@escaped_v, $ev);
252 }
253 $doc_rec->{$k} = \@escaped_v;
254 }
255
[21715]256 # Obtain the specified metadata value
[19499]257 $metapos = 0 if (!defined $metapos);
258 my $metavalue = $doc_rec->{$metaname}->[$metapos];
259 $gsdl_cgi->generate_ok_message("$metavalue");
[21715]260
261 # Release the lock once it is done
262 $self->unlock_collection($username, $collect);
[19293]263}
264
265
[25097]266sub get_import_metadata
267{
268 my $self = shift @_;
269
270 my $username = $self->{'username'};
271 my $collect = $self->{'collect'};
272 my $gsdl_cgi = $self->{'gsdl_cgi'};
273 my $gsdlhome = $self->{'gsdlhome'};
274
275 # Authenticate user if it is enabled
276 if ($baseaction::authentication_enabled) {
277 # Ensure the user is allowed to edit this collection
278 &authenticate_user($gsdl_cgi, $username, $collect);
279 }
280
281 # Obtain the collect dir
282 my $site = $self->{'site'};
283 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
284 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
285
286 # Make sure the collection isn't locked by someone else
287 $self->lock_collection($username, $collect);
288
289 # look up additional args
290 my $docid = $self->{'d'};
291 my $metaname = $self->{'metaname'};
292 my $infodbtype = $self->{'infodbtype'};
293 if (!defined $docid)
294 {
295 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
296 }
297
298 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
299 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
300 my $metadata_xml_file;
301 my $import_filename = undef;
302
303
304 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
305 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
306 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
307
308 # This now stores the full pathname
309 $import_filename = $doc_rec->{'src-file'}->[0];
310
311 # figure out correct metadata.xml file [?]
312 # Assuming the metadata.xml file is next to the source file
313 # Note: This will not work if it is using the inherited metadata from the parent folder
314 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
315 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
316
317 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $import_tailname));
318
319 # Release the lock once it is done
320 $self->unlock_collection($username, $collect);
321}
322
323sub get_metadata_from_metadata_xml
324{
325 my $self = shift @_;
326 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $src_file) = @_;
327
328 my @rules =
329 (
330 _default => 'raw',
331 'Metadata' => \&gfmxml_metadata,
332 'FileName' => \&mxml_filename
333 );
334
335 my $parser = XML::Rules->new
336 (
337 rules => \@rules,
338 output_encoding => 'utf8'
339 );
340
341 my $xml_in = "";
342 if (!open(MIN,"<$metadata_xml_filename"))
343 {
344 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
345 }
346 else
347 {
348 # Read them in
349 my $line;
350 while (defined ($line=<MIN>)) {
351 $xml_in .= $line;
352 }
353 close(MIN);
354
355 $parser->parse($xml_in, {metaname => $metaname, src_file => $src_file});
356
357 if(defined $parser->{'pad'}->{'metavalue'})
358 {
359 return $parser->{'pad'}->{'metavalue'};
360 }
361 else
362 {
363 return "";
364 }
365 }
366}
367
368sub gfmxml_metadata
369{
370 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
371
372 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}))
373 {
374 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
375 }
376}
377
378sub get_archives_metadata
379{
380 my $self = shift @_;
381
382 my $username = $self->{'username'};
383 my $collect = $self->{'collect'};
384 my $gsdl_cgi = $self->{'gsdl_cgi'};
385 my $gsdlhome = $self->{'gsdlhome'};
386 my $infodbtype = $self->{'infodbtype'};
387
388 # Authenticate user if it is enabled
389 if ($baseaction::authentication_enabled) {
390 # Ensure the user is allowed to edit this collection
391 &authenticate_user($gsdl_cgi, $username, $collect);
392 }
393
394 # Obtain the collect dir
395 my $site = $self->{'site'};
396 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
397
398 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
399
400 # Make sure the collection isn't locked by someone else
401 $self->lock_collection($username, $collect);
402
403 # look up additional args
404 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
405 $docid_secnum = "" if (!defined $docid_secnum);
406
407 my $metaname = $self->{'metaname'};
408 my $metapos = $self->{'metapos'};
409 $metapos = 0 if (!defined $metapos);
410
411 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
412 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
413
414 # This now stores the full pathname
415 my $doc_filename = $doc_rec->{'doc-file'}->[0];
416
417 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
418
419 # Release the lock once it is done
420 $self->unlock_collection($username, $collect);
421}
422
423sub get_metadata_from_archive_xml
424{
425 my $self = shift @_;
426 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
427
428 my @start_rules = ('Section' => \&dxml_start_section);
429
430 my @rules =
431 (
432 _default => 'raw',
433 'Metadata' => \&gfdxml_metadata
434 );
435
436 my $parser = XML::Rules->new
437 (
438 start_rules => \@start_rules,
439 rules => \@rules,
440 output_encoding => 'utf8'
441 );
442
443 my $xml_in = "";
444 if (!open(MIN,"<$doc_xml_filename"))
445 {
446 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
447 }
448 else
449 {
450 # Read them in
451 my $line;
452 while (defined ($line=<MIN>)) {
453 $xml_in .= $line;
454 }
455 close(MIN);
456
457 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
458
459 if(defined $parser->{'pad'}->{'metavalue'})
460 {
461 return $parser->{'pad'}->{'metavalue'};
462 }
463 else
464 {
465 return "";
466 }
467 }
468}
469
470sub gfdxml_metadata
471{
472 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
473
474 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
475 {
476 return;
477 }
478
479 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
480 {
481 if (!defined $parser->{'parameters'}->{'poscount'})
482 {
483 $parser->{'parameters'}->{'poscount'} = 0;
484 }
485 else
486 {
487 $parser->{'parameters'}->{'poscount'}++;
488 }
489 }
490
491 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
492 {
493 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
494 }
495}
496
[19293]497sub set_live_metadata
498{
499 my $self = shift @_;
500
501 my $username = $self->{'username'};
502 my $collect = $self->{'collect'};
503 my $gsdl_cgi = $self->{'gsdl_cgi'};
504 my $gsdlhome = $self->{'gsdlhome'};
[23400]505 my $infodbtype = $self->{'infodbtype'};
506
[19293]507 if ($baseaction::authentication_enabled) {
508 # Ensure the user is allowed to edit this collection
509 &authenticate_user($gsdl_cgi, $username, $collect);
510 }
511
[21715]512 # Obtain the collect dir
[23766]513 my $site = $self->{'site'};
514 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
515 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]516
517 # Make sure the collection isn't locked by someone else
518 $self->lock_collection($username, $collect);
519
520 # look up additional args
521 my $docid = $self->{'d'};
[21715]522 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
523 $gsdl_cgi->generate_error("No docid (d=...) specified.");
524 }
[19293]525 my $metavalue = $self->{'metavalue'};
[23400]526
[19293]527
[21715]528 # Generate the dbkey
529 my $metaname = $self->{'metaname'};
[19293]530 my $dbkey = "$docid.$metaname";
531
[21715]532 # To people who know $collect_tail please add some comments
533 # Obtain path to the database
[19293]534 my $collect_tail = $collect;
535 $collect_tail =~ s/^.*[\/\\]//;
[21564]536 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]537 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19293]538
[21715]539 # Set the new value
[21569]540 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
[19293]541 my $status = system($cmd);
542 if ($status != 0) {
[21715]543 # Catch error if gdbmget failed
[19293]544 my $mess = "Failed to set metadata key: $dbkey\n";
[21715]545
[19293]546 $mess .= "PATH: $ENV{'PATH'}\n";
547 $mess .= "cmd = $cmd\n";
548 $mess .= "Exit status: $status\n";
549 $mess .= "System Error Message: $!\n";
550
[19499]551 $gsdl_cgi->generate_error($mess);
[19293]552 }
553 else {
[19499]554 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
[19293]555 }
[21715]556
557 # Release the lock once it is done
558 $self->unlock_collection($username, $collect);
[19293]559}
560
[24071]561sub set_metadata_entry
562{
563 my $self = shift @_;
564 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue) = @_;
565
566 # To people who know $collect_tail please add some comments
567 # Obtain path to the database
568 my $collect_tail = $collect;
569 $collect_tail =~ s/^.*[\/\\]//;
570 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
571 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
572
573# print STDERR "**** infodb file path = $infodb_file_path\n";
574# print STDERR "***** infodb type = $infodbtype\n";
575
576 # Read the docid entry
577 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
578
579 # Set the metadata value
580 if (defined $metapos) {
581 $doc_rec->{$metaname}->[$metapos] = $metavalue;
582 }
583 else {
584 $doc_rec->{$metaname} = [ $metavalue ];
585 }
586
587 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
588
589 return $status;
590
591}
[19293]592
[19499]593sub set_metadata
594{
595 my $self = shift @_;
[19293]596
[19499]597 my $username = $self->{'username'};
598 my $collect = $self->{'collect'};
599 my $gsdl_cgi = $self->{'gsdl_cgi'};
600 my $gsdlhome = $self->{'gsdlhome'};
[19293]601
[19499]602 if ($baseaction::authentication_enabled) {
603 # Ensure the user is allowed to edit this collection
604 &authenticate_user($gsdl_cgi, $username, $collect);
605 }
606
[23766]607 my $site = $self->{'site'};
608 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
609
610 $gsdl_cgi->checked_chdir($collect_dir);
611
[21715]612 # Obtain the collect dir
[23766]613 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]614
615 # Make sure the collection isn't locked by someone else
616 $self->lock_collection($username, $collect);
617
618 # look up additional args
619 my $docid = $self->{'d'};
620 my $metaname = $self->{'metaname'};
621 my $metapos = $self->{'metapos'};
622 my $metavalue = $self->{'metavalue'};
[23761]623 my $infodbtype = $self->{'infodbtype'};
[23400]624
[24071]625 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
[23400]626
[19499]627 if ($status != 0) {
[23761]628 # Catch error if set infodb entry failed
629 my $mess = "Failed to set metadata key: $docid\n";
[19499]630
[23761]631 $mess .= "PATH: $ENV{'PATH'}\n";
632 $mess .= "Exit status: $status\n";
633 $mess .= "System Error Message: $!\n";
634
635 $gsdl_cgi->generate_error($mess);
[19499]636 }
637 else {
[24071]638 my $mess = "set-metadata successful: Key[$docid]\n";
[23761]639 $mess .= " $metaname";
640 $mess .= "->[$metapos]" if (defined $metapos);
641 $mess .= " = $metavalue";
642
643 $gsdl_cgi->generate_ok_message($mess);
[19499]644 }
[21715]645
646 # Release the lock once it is done
647 $self->unlock_collection($username, $collect);
[19499]648}
649
650
[24071]651sub set_metadata_array
652{
653 my $self = shift @_;
654
655 my $username = $self->{'username'};
656 my $collect = $self->{'collect'};
657 my $gsdl_cgi = $self->{'gsdl_cgi'};
658 my $gsdlhome = $self->{'gsdlhome'};
659
660 if ($baseaction::authentication_enabled) {
661 # Ensure the user is allowed to edit this collection
662 &authenticate_user($gsdl_cgi, $username, $collect);
663 }
664
665 my $site = $self->{'site'};
666 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
667
668 $gsdl_cgi->checked_chdir($collect_dir);
669
670 # Obtain the collect dir
671 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
672
673 # Make sure the collection isn't locked by someone else
674 $self->lock_collection($username, $collect);
675
676 # look up additional args
677
678 my $infodbtype = $self->{'infodbtype'};
679
680 my $json_str = $self->{'json'};
681 my $doc_array = decode_json $json_str;
682
683
684 my $global_status = 0;
685 my $global_mess = "";
686
687 my @all_docids = ();
688
689 foreach my $doc_array_rec ( @$doc_array ) {
690
691 my $docid = $doc_array_rec->{'docid'};
692 my $metaname = $doc_array_rec->{'metaname'};
693 my $metapos = $doc_array_rec->{'metapos'};
694 my $metavalue = $doc_array_rec->{'metavalue'};
695
696 push(@all_docids,$docid);
697
698 my $status = $self->set_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue);
699
700 if ($status != 0) {
701 # Catch error if set infodb entry failed
702 $global_status = $status;
703 $global_mess .= "Failed to set metadata key: $docid\n";
704 $global_mess .= "Exit status: $status\n";
705 $global_mess .= "System Error Message: $!\n";
706 $global_mess .= "-" x 20;
707 }
708 }
709
710 if ($global_status != 0) {
711 $global_mess .= "PATH: $ENV{'PATH'}\n";
712 $gsdl_cgi->generate_error($global_mess);
713 }
714 else {
715 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
716 $gsdl_cgi->generate_ok_message($mess);
717 }
718
719 # Release the lock once it is done
720 $self->unlock_collection($username, $collect);
721}
722
723
[20538]724sub dxml_metadata
725{
726 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
727 my $metaname = $parser->{'parameters'}->{'metaname'};
728 my $metamode = $parser->{'parameters'}->{'metamode'};
[23761]729
730 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
731
732 # Find the right metadata tag and checks if we are going to
733 # override it
734 #
735 # Note: This over writes the first metadata block it
736 # encountered. If there are multiple Sections in the doc.xml, it
737 # might not behave as you would expect
[20538]738
[23761]739 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
740## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
741## print STDERR "**** metamode = $metamode\n";
742
[25097]743 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
744 {
745 my $name_attr = $attrHash->{'name'};
746 if (($name_attr eq $metaname) && ($metamode eq "override"))
747 {
748 if (!defined $parser->{'parameters'}->{'poscount'})
749 {
750 $parser->{'parameters'}->{'poscount'} = 0;
751 }
752 else
753 {
754 $parser->{'parameters'}->{'poscount'}++;
755 }
756
757 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
758 {
759 ##print STDERR "**** got match!!\n";
760 # Get the value and override the current value
761 my $metavalue = $parser->{'parameters'}->{'metavalue'};
762 $attrHash->{'_content'} = $metavalue;
763
764 # Don't want it to wipe out any other pieces of metadata
765 $parser->{'parameters'}->{'metamode'} = "done";
766 }
[25102]767 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} == $attrHash->{'_content'})
[25097]768 {
[25102]769 my $metavalue = $parser->{'parameters'}->{'metavalue'};
[25097]770 $attrHash->{'_content'} = $metavalue;
771 $parser->{'parameters'}->{'metamode'} = "done";
772 }
773 }
[20538]774 }
775
[21716]776 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
777 return [$tagname => $attrHash];
[20538]778}
779
780
781sub dxml_description
782{
783 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
784 my $metamode = $parser->{'parameters'}->{'metamode'};
785
[21715]786 # Accumulate the metadata
787 # NOTE: This appends new metadata element to all description fields.
788 # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
[23761]789 if (($metamode eq "accumulate") || ($metamode eq "override")) {
790 # If get to here and metamode is override, the this means there
791 # was no existing value to overide => treat as an append operation
792
793 # Tack a new metadata tag on to the end of the <Metadata>+ block
[20538]794 my $metaname = $parser->{'parameters'}->{'metaname'};
795 my $metavalue = $parser->{'parameters'}->{'metavalue'};
796
797 my $metadata_attr = { '_content' => $metavalue,
798 'name' => $metaname,
799 'mode' => "accumulate" };
800
801 my $append_metadata = [ "Metadata" => $metadata_attr ];
802 my $description_content = $attrHash->{'_content'};
803
[23761]804## print STDERR "**** appending to doc.xml\n";
805
[20538]806 push(@$description_content," ", $append_metadata ,"\n ");
[23761]807 $parser->{'parameters'}->{'metamode'} = "done";
[20538]808 }
809
[21716]810
811 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
812 return [$tagname => $attrHash];
[20538]813}
814
[21715]815
[23761]816sub dxml_start_section
817{
818 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
819
820 my $new_depth = scalar(@$contextArray);
821
822 if ($new_depth == 1) {
823 $parser->{'parameters'}->{'curr_section_depth'} = 1;
824 $parser->{'parameters'}->{'curr_section_num'} = "";
825 }
826
827 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
828 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
829
830 my $new_secnum;
831
832 if ($new_depth > $old_depth) {
833 # child subsection
834 $new_secnum = "$old_secnum.1";
835 }
836 elsif ($new_depth == $old_depth) {
837 # sibling section => increase it's value by 1
838 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
839 $tail_num++;
840 $new_secnum = $old_secnum;
841 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
842 }
843 else {
844 # back up to parent section => lopp off tail
845 $new_secnum = $old_secnum;
846 $new_secnum =~ s/\.\d+$//;
847 }
848
849 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
850 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
851
[25102]852 print STDERR "*** In Section: $new_secnum\n";
[23761]853}
854
[20538]855sub edit_xml_file
856{
857 my $self = shift @_;
[23761]858 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
[20538]859
860 # use XML::Rules to add it in (read in and out again)
[23761]861 my $parser = XML::Rules->new(start_rules => $start_rules,
862 rules => $rules,
863 style => 'filter',
864 output_encoding => 'utf8' );
[20538]865
866 my $xml_in = "";
867 if (!open(MIN,"<$filename")) {
868 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
869 }
870 else {
[21715]871 # Read all the text in
[20538]872 my $line;
873 while (defined ($line=<MIN>)) {
874 $xml_in .= $line;
875 }
876 close(MIN);
877
[23761]878 my $MOUT;
879 if (!open($MOUT,">$filename")) {
[20538]880 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
881 }
882 else {
[23761]883 # Matched lines will get handled by the call backs
884## my $xml_out = "";
885
886 binmode($MOUT,":utf8");
887 $parser->filter($xml_in,$MOUT, $options);
888
889# binmode(MOUT,":utf8");
890# print MOUT $xml_out;
891 close($MOUT);
[20538]892 }
893 }
894}
895
896sub edit_doc_xml
897{
898 my $self = shift @_;
[25097]899 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
[20538]900
[23761]901 # To monitor which section/subsection number we are in
902 my @start_rules =
903 ( 'Section' => \&dxml_start_section );
904
[20538]905 # use XML::Rules to add it in (read in and out again)
[21715]906 # Set the call back functions
[20538]907 my @rules =
[21716]908 ( _default => 'raw',
[23761]909 'Metadata' => \&dxml_metadata,
910 'Description' => \&dxml_description);
[20538]911
[21715]912 # Sets the parameters
[20538]913 my $options = { 'metaname' => $metaname,
914 'metapos' => $metapos,
[23400]915 'metavalue' => $metavalue,
[25097]916 'metamode' => $metamode,
917 'prevmetavalue' => $prevmetavalue };
[23400]918
[23761]919 if (defined $opt_secnum) {
920 $options->{'secnum'} = $opt_secnum;
921 }
922
923 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
[20538]924}
925
[24071]926sub set_archives_metadata_entry
927{
928 my $self = shift @_;
[25097]929 my ($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
[24071]930
931 # Obtain the doc.xml path for the specified docID
932 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
[20538]933
[24071]934 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
935 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
936 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
937
938 # The $doc_xml_file is relative to the archives, and now let's get the full path
939 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
940 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
941
942 # Edit the doc.xml file with the specified metadata name, value and position.
943 # TODO: there is a potential problem here as this edit_doc_xml function
944 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
945 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
946
947 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
[25097]948 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
[24071]949
950 return 0; # return 0 for now to indicate no error
951
952}
953
954
[20538]955sub set_archives_metadata
956{
957 my $self = shift @_;
958
959 my $username = $self->{'username'};
960 my $collect = $self->{'collect'};
961 my $gsdl_cgi = $self->{'gsdl_cgi'};
962 my $gsdlhome = $self->{'gsdlhome'};
[23400]963 my $infodbtype = $self->{'infodbtype'};
964
[20538]965 if ($baseaction::authentication_enabled) {
[24071]966 # Ensure the user is allowed to edit this collection
967 $self->authenticate_user($username, $collect);
[20538]968 }
969
[24071]970 my $site = $self->{'site'};
971
[23766]972 # Obtain the collect and archive dir
973 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
974
[20538]975 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
976
977 # Make sure the collection isn't locked by someone else
978 $self->lock_collection($username, $collect);
979
980 # look up additional args
981 my $docid = $self->{'d'};
982 my $metaname = $self->{'metaname'};
983 my $metavalue = $self->{'metavalue'};
[25102]984 my $prevmetavalue = $self->{'prevmetavalue'};
[23400]985
[20538]986 my $metapos = $self->{'metapos'};
987 $metapos = 0 if (!defined $metapos);
[23400]988
[23761]989 my $metamode = $self->{'metamode'};
[23400]990 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
991 # make "accumulate" the default (less destructive, as won't actually
992 # delete any existing values)
993 $metamode = "accumulate";
[24071]994 }
995
996 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
[25097]997 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
[24071]998
999 # Release the lock once it is done
1000 $self->unlock_collection($username, $collect);
1001
1002 if ($status == 0) {
1003 my $mess = "set-archives-metadata successful: Key[$docid]\n";
1004 $mess .= " $metaname";
1005 $mess .= "->[$metapos]" if (defined $metapos);
1006 $mess .= " = $metavalue";
1007 $mess .= " ($metamode)\n";
1008
1009 $gsdl_cgi->generate_ok_message($mess);
1010 }
1011 else {
1012 my $mess .= "Failed to set archives metadata key: $docid\n";
1013 $mess .= "Exit status: $status\n";
1014 $mess .= "System Error Message: $!\n";
1015 $mess .= "-" x 20 . "\n";
1016
1017 $gsdl_cgi->generate_error($mess);
1018 }
1019}
1020
1021
1022sub set_archives_metadata_array
1023{
1024 my $self = shift @_;
1025
1026 my $username = $self->{'username'};
1027 my $collect = $self->{'collect'};
1028 my $gsdl_cgi = $self->{'gsdl_cgi'};
1029 my $gsdlhome = $self->{'gsdlhome'};
1030
1031 if ($baseaction::authentication_enabled) {
1032 # Ensure the user is allowed to edit this collection
1033 &authenticate_user($gsdl_cgi, $username, $collect);
[23400]1034 }
[23761]1035
[24071]1036 my $site = $self->{'site'};
1037 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1038
1039 $gsdl_cgi->checked_chdir($collect_dir);
1040
1041 # Obtain the collect dir
1042 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1043
1044 # Make sure the collection isn't locked by someone else
1045 $self->lock_collection($username, $collect);
1046
1047 # look up additional args
1048
1049 my $infodbtype = $self->{'infodbtype'};
1050
1051 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1052
1053 my $json_str = $self->{'json'};
1054 my $doc_array = decode_json $json_str;
1055
1056
1057 my $global_status = 0;
1058 my $global_mess = "";
1059
1060 my @all_docids = ();
1061
1062 foreach my $doc_array_rec ( @$doc_array ) {
1063
1064 my $docid = $doc_array_rec->{'docid'};
1065 my $metaname = $doc_array_rec->{'metaname'};
1066 my $metapos = $doc_array_rec->{'metapos'};
1067 my $metamode = $self->{'metamode'};
1068 my $metavalue = $doc_array_rec->{'metavalue'};
1069
1070 # Some sanity checks
1071 $metapos = 0 if (!defined $metapos);
1072
1073 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1074 # make "accumulate" the default (less destructive, as won't actually
1075 # delete any existing values)
1076 $metamode = "accumulate";
1077 }
1078
1079 push(@all_docids,$docid);
1080
1081 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1082 $metaname,$metapos,$metavalue,$metamode);
1083
1084 if ($status != 0) {
1085 # Catch error if set infodb entry failed
1086 $global_status = $status;
1087 $global_mess .= "Failed to set metadata key: $docid\n";
1088 $global_mess .= "Exit status: $status\n";
1089 $global_mess .= "System Error Message: $!\n";
1090 $global_mess .= "-" x 20 . "\n";
1091 }
1092 }
1093
1094 if ($global_status != 0) {
1095 $global_mess .= "PATH: $ENV{'PATH'}\n";
1096 $gsdl_cgi->generate_error($global_mess);
1097 }
1098 else {
1099 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1100 $gsdl_cgi->generate_ok_message($mess);
1101 }
[20538]1102
[23761]1103 # Release the lock once it is done
1104 $self->unlock_collection($username, $collect);
[20538]1105}
1106
[24943]1107sub remove_archives_metadata
1108{
1109 my $self = shift @_;
[20538]1110
[24943]1111 my $username = $self->{'username'};
1112 my $collect = $self->{'collect'};
1113 my $gsdl_cgi = $self->{'gsdl_cgi'};
1114 my $gsdlhome = $self->{'gsdlhome'};
1115 my $infodbtype = $self->{'infodbtype'};
1116
1117 if ($baseaction::authentication_enabled)
1118 {
1119 # Ensure the user is allowed to edit this collection
1120 &authenticate_user($gsdl_cgi, $username, $collect);
1121 }
1122
1123 my $site = $self->{'site'};
1124
1125 # Obtain the collect and archive dir
1126 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1127
1128 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1129
1130 # Make sure the collection isn't locked by someone else
1131 $self->lock_collection($username, $collect);
1132
1133 # look up additional args
1134 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1135
1136 my $metaname = $self->{'metaname'};
1137 my $metapos = $self->{'metapos'};
1138 $metapos = 0 if (!defined $metapos);
1139
1140 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1141 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1142
1143 # This now stores the full pathname
[25097]1144 my $doc_filename = $doc_rec->{'doc-file'}->[0];
[24943]1145
[25097]1146 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
[24943]1147
1148 # Release the lock once it is done
1149 $self->unlock_collection($username, $collect);
1150
1151 if ($status == 0)
1152 {
1153 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1154 $mess .= " $metaname";
1155 $mess .= "->[$metapos]" if (defined $metapos);
1156
1157 $gsdl_cgi->generate_ok_message($mess);
1158 }
1159 else
1160 {
1161 my $mess .= "Failed to remove archives metadata key: $docid\n";
1162 $mess .= "Exit status: $status\n";
1163 $mess .= "System Error Message: $!\n";
1164 $mess .= "-" x 20 . "\n";
1165
1166 $gsdl_cgi->generate_error($mess);
1167 }
1168}
1169
1170sub remove_from_doc_xml
1171{
1172 my $self = shift @_;
[24949]1173 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid) = @_;
[24943]1174
1175 my @start_rules = ('Section' => \&dxml_start_section);
1176
1177 # Set the call-back functions for the metadata tags
1178 my @rules =
1179 (
1180 _default => 'raw',
1181 'Metadata' => \&rfdxml_metadata
1182 );
1183
1184 my $parser = XML::Rules->new
1185 (
1186 start_rules => \@start_rules,
1187 rules => \@rules,
1188 style => 'filter',
1189 output_encoding => 'utf8'
1190 );
1191
1192 my $status = 0;
1193 my $xml_in = "";
1194 if (!open(MIN,"<$doc_xml_filename"))
1195 {
1196 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1197 $status = 1;
1198 }
1199 else
1200 {
1201 # Read them in
1202 my $line;
1203 while (defined ($line=<MIN>)) {
1204 $xml_in .= $line;
1205 }
1206 close(MIN);
1207
1208 # Filter with the call-back functions
1209 my $xml_out = "";
1210
1211 my $MOUT;
1212 if (!open($MOUT,">$doc_xml_filename")) {
1213 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1214 $status = 1;
1215 }
1216 else {
1217 binmode($MOUT,":utf8");
[24949]1218 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid});
[24943]1219 close($MOUT);
1220 }
1221 }
1222 return $status;
1223}
1224
1225sub rfdxml_metadata
1226{
1227 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1228
1229 if (!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1230 {
1231 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1232 return [$tagname => $attrHash];
1233 }
1234
1235 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1236 {
1237 if (!defined $parser->{'parameters'}->{'poscount'})
1238 {
1239 $parser->{'parameters'}->{'poscount'} = 0;
1240 }
1241 else
1242 {
1243 $parser->{'parameters'}->{'poscount'}++;
1244 }
1245 }
1246
1247 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1248 {
1249 return [];
1250 }
1251
[24949]1252 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1253 {
1254 return [];
1255 }
1256
[24943]1257 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1258 return [$tagname => $attrHash];
1259}
1260
[19293]1261sub mxml_metadata
1262{
1263 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1264 my $metaname = $parser->{'parameters'}->{'metaname'};
1265 my $metamode = $parser->{'parameters'}->{'metamode'};
1266
[21716]1267 # Report error if we don't see FileName tag before this
[25097]1268 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
[21716]1269
1270 # Don't do anything if we are not in the right FileSet
1271 my $file_regexp = $parser->{'parameters'}->{'current_file'};
[23761]1272 if ($file_regexp =~ /\.\*/) {
1273 # Only interested in a file_regexp if it specifies precisely one
1274 # file.
1275 # So, skip anything with a .* in it as it is too general
1276 return [$tagname => $attrHash];
1277 }
1278 my $src_file = $parser->{'parameters'}->{'src_file'};
1279 if (!($src_file =~ /$file_regexp/)) {
1280 return [$tagname => $attrHash];
1281 }
1282## print STDERR "*** mxl metamode = $metamode\n";
1283
[21715]1284 # Find the right metadata tag and checks if we are going to override it
[19293]1285 my $name_attr = $attrHash->{'name'};
1286 if (($name_attr eq $metaname) && ($metamode eq "override")) {
[21715]1287 # Get the value and override the current value
[19293]1288 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1289 $attrHash->{'_content'} = $metavalue;
1290
[23761]1291## print STDERR "**** overrideing metadata.xml\n";
1292
[19293]1293 # Don't want it to wipe out any other pieces of metadata
1294 $parser->{'parameters'}->{'metamode'} = "done";
1295 }
1296
[21716]1297 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1298 return [$tagname => $attrHash];
[19293]1299}
1300
1301
1302sub mxml_description
1303{
1304 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
[21716]1305 my $metamode = $parser->{'parameters'}->{'metamode'};
[19293]1306
[21716]1307 # Failed... Report error if we don't see FileName tag before this
[25097]1308 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
[21716]1309
1310 # Don't do anything if we are not in the right FileSet
1311 my $file_regexp = $parser->{'parameters'}->{'current_file'};
[24943]1312 if ($file_regexp =~ m/\.\*/) {
[23761]1313 # Only interested in a file_regexp if it specifies precisely one
1314 # file.
1315 # So, skip anything with a .* in it as it is too general
1316 return [$tagname => $attrHash];
1317 }
1318 my $src_file = $parser->{'parameters'}->{'src_file'};
[24943]1319
1320 if (!($src_file =~ m/$file_regexp/)) {
[23761]1321 return [$tagname => $attrHash];
1322 }
[21716]1323
[21715]1324 # Accumulate the metadata block to the end of the description block
1325 # Note: This adds metadata block to all description blocks, so if there are
1326 # multiple FileSets, it will add to all of them
[23761]1327 if (($metamode eq "accumulate") || ($metamode eq "override")) {
1328 # if metamode was "override" but get to here then it failed to
1329 # find an item to override, in which case it should append its
1330 # value to the end, just like the "accumulate" mode
1331
[19293]1332 # tack a new metadata tag on to the end of the <Metadata>+ block
1333 my $metaname = $parser->{'parameters'}->{'metaname'};
1334 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1335
1336 my $metadata_attr = { '_content' => $metavalue,
1337 'name' => $metaname,
1338 'mode' => "accumulate" };
1339
1340 my $append_metadata = [ "Metadata" => $metadata_attr ];
1341 my $description_content = $attrHash->{'_content'};
[24943]1342
[23761]1343## print STDERR "*** appending to metadata.xml\n";
1344
1345 # append the new metadata element to the end of the current
1346 # content contained inside this tag
[24943]1347 if (ref($description_content) eq "") {
1348 # => string or numeric literal
1349 # this is caused by a <Description> block has no <Metadata> child elements
1350 # => set up an empty array in '_content'
1351 $attrHash->{'_content'} = [ "\n" ];
1352 $description_content = $attrHash->{'_content'};
1353 }
1354
[19293]1355 push(@$description_content," ", $append_metadata ,"\n ");
[23761]1356 $parser->{'parameters'}->{'metamode'} = "done";
[19293]1357 }
1358
[21716]1359 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1360 return [$tagname => $attrHash];
[19293]1361}
1362
[21715]1363
[21716]1364sub mxml_filename
1365{
1366 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1367
1368 # Store the filename of the Current Fileset
1369 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1370 # FileName tag must come before Description tag
1371 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
1372
1373 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1374 return [$tagname => $attrHash];
1375}
1376
1377
1378sub mxml_fileset
1379{
1380 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1381
1382 # Initilise the current_file
1383 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
1384 # FileName tag must come before Description tag
1385 $parser->{'parameters'}->{'current_file'} = "";
1386
1387 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1388 return [$tagname => $attrHash];
1389}
1390
1391
[19293]1392sub edit_metadata_xml
1393{
1394 my $self = shift @_;
[21716]1395 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
[19293]1396
[21715]1397 # Set the call-back functions for the metadata tags
[19293]1398 my @rules =
[21716]1399 ( _default => 'raw',
1400 'FileName' => \&mxml_filename,
[19293]1401 'Metadata' => \&mxml_metadata,
[21716]1402 'Description' => \&mxml_description,
1403 'FileSet' => \&mxml_fileset);
[19293]1404
[21715]1405 # use XML::Rules to add it in (read in and out again)
[19293]1406 my $parser = XML::Rules->new(rules => \@rules,
[21716]1407 style => 'filter',
1408 output_encoding => 'utf8');
[19293]1409
[24943]1410 if (!-e $metadata_xml_filename) {
1411
1412 if (open(MOUT,">$metadata_xml_filename")) {
1413
1414 my $src_file_re = &util::filename_to_regex($src_file);
1415 # shouldn't the following also be in the above utility routine??
1416 # $src_file_re =~ s/\./\\./g;
1417
1418 print MOUT "<?xml version=\"1.0\"?>\n";
1419 print MOUT "<DirectoryMetadata>\n";
1420 print MOUT " <FileSet>\n";
1421 print MOUT " <FileName>$src_file_re</FileName>\n";
1422 print MOUT " <Description>\n";
1423 print MOUT " </Description>\n";
1424 print MOUT " </FileSet>\n";
1425 print MOUT "</DirectoryMetadata>\n";
[23761]1426
[24943]1427 close(MOUT);
1428 }
1429 else {
1430 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
1431 }
[19293]1432 }
[24943]1433
1434
1435 my $xml_in = "";
1436 if (!open(MIN,"<$metadata_xml_filename")) {
1437 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1438 }
[19293]1439 else {
[24943]1440 # Read them in
1441 my $line;
1442 while (defined ($line=<MIN>)) {
1443 $xml_in .= $line;
1444 }
1445 close(MIN);
[23761]1446
[24943]1447 # Filter with the call-back functions
1448 my $xml_out = "";
[23761]1449
[24943]1450 my $MOUT;
1451 if (!open($MOUT,">$metadata_xml_filename")) {
1452 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1453 }
1454 else {
1455 binmode($MOUT,":utf8");
1456
1457 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
1458 # At the moment, I will just hack it!
1459 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
1460 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
1461 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
1462 #print MOUT $xml_out;
1463
1464 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
1465 metavalue => $metavalue,
1466 metamode => $metamode,
1467 src_file => $src_file,
1468 current_file => undef} );
1469 close($MOUT);
1470 }
1471 }
[20538]1472}
[19293]1473
1474
1475sub set_import_metadata
1476{
1477 my $self = shift @_;
[21715]1478
[19293]1479 my $username = $self->{'username'};
1480 my $collect = $self->{'collect'};
1481 my $gsdl_cgi = $self->{'gsdl_cgi'};
1482 my $gsdlhome = $self->{'gsdlhome'};
[23400]1483 my $infodbtype = $self->{'infodbtype'};
1484
[19293]1485 if ($baseaction::authentication_enabled) {
[25097]1486 # Ensure the user is allowed to edit this collection
1487 $self->authenticate_user($username, $collect);
[19293]1488 }
1489
[23761]1490
[21715]1491 # Obtain the collect and archive dir
[23766]1492 my $site = $self->{'site'};
1493 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1494
1495 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]1496 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1497
1498 # Make sure the collection isn't locked by someone else
1499 $self->lock_collection($username, $collect);
[21716]1500
[19293]1501 # look up additional args
1502 # want either d= or f=
1503 my $docid = $self->{'d'};
1504 my $import_file = $self->{'f'};
1505 if ((!defined $docid) && (!defined $import_file)) {
1506 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1507 }
1508
[21715]1509 # Get the parameters and set default mode to "accumulate"
[19293]1510 my $metaname = $self->{'metaname'};
1511 my $metavalue = $self->{'metavalue'};
[23761]1512## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1513 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1514 print STDERR "*** set import meta: val = $metavalue\n";
1515
[19293]1516 my $metamode = $self->{'metamode'};
1517 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1518 # make "accumulate" the default (less destructive, as won't actually
1519 # delete any existing values)
1520 $metamode = "accumulate";
1521 }
1522
[21715]1523 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1524 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
[19293]1525 my $metadata_xml_file;
[20935]1526 my $import_filename = undef;
[19293]1527 if (defined $docid) {
[23400]1528 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1529 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[19293]1530
[20935]1531 # This now stores the full pathname
[23761]1532 $import_filename = $doc_rec->{'src-file'}->[0];
[19293]1533 }
[20935]1534 else {
[21715]1535 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
[20935]1536 }
[23761]1537
[21715]1538 # figure out correct metadata.xml file [?]
1539 # Assuming the metadata.xml file is next to the source file
1540 # Note: This will not work if it is using the inherited metadata from the parent folder
[24943]1541 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
[19293]1542 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1543
[21715]1544 # Edit the metadata.xml
[21716]1545 # Modified by Jeffrey from DL Consulting
1546 # Handle the case where there is one metadata.xml file for multiple FileSets
1547 # The XML filter needs to know whether it is in the right FileSet
1548 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
1549 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
1550 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
1551 $metaname, $metavalue, $metamode, $import_tailname);
[19293]1552
[21715]1553 # Release the lock once it is done
1554 $self->unlock_collection($username, $collect);
[23761]1555
1556 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1557 $mess .= " $metaname";
1558 $mess .= " = $metavalue";
1559 $mess .= " ($metamode)\n";
1560
1561 $gsdl_cgi->generate_ok_message($mess);
1562
[19293]1563}
1564
[25097]1565sub set_import_metadata_array
1566{
1567 my $self = shift @_;
1568
1569 my $username = $self->{'username'};
1570 my $collect = $self->{'collect'};
1571 my $gsdl_cgi = $self->{'gsdl_cgi'};
1572 my $gsdlhome = $self->{'gsdlhome'};
1573
1574 if ($baseaction::authentication_enabled) {
1575 # Ensure the user is allowed to edit this collection
1576 &authenticate_user($gsdl_cgi, $username, $collect);
1577 }
1578
1579 my $site = $self->{'site'};
1580 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1581
1582 $gsdl_cgi->checked_chdir($collect_dir);
1583
1584 # Make sure the collection isn't locked by someone else
1585 $self->lock_collection($username, $collect);
1586
1587 # look up additional args
1588
1589 my $infodbtype = $self->{'infodbtype'};
1590
1591 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1592
1593 my $json_str = $self->{'json'};
1594 my $doc_array = decode_json $json_str;
1595
1596 my $global_status = 0;
1597 my $global_mess = "";
1598
1599 my @all_docids = ();
1600
1601 foreach my $doc_array_rec ( @$doc_array )
1602 {
1603 my $docid = $doc_array_rec->{'docid'};
1604 my $metaname = $doc_array_rec->{'metaname'};
1605 my $metamode = $self->{'metamode'};
1606 my $metavalue = $doc_array_rec->{'metavalue'};
1607
1608 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1609 # make "accumulate" the default (less destructive, as won't actually
1610 # delete any existing values)
1611 $metamode = "accumulate";
1612 }
1613
1614 push(@all_docids,$docid);
1615
1616 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1617 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1618 my $metadata_xml_file;
1619 my $import_filename = undef;
1620
1621 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1622 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1623
1624 # This now stores the full pathname
1625 $import_filename = $doc_rec->{'src-file'}->[0];
1626
1627 # figure out correct metadata.xml file [?]
1628 # Assuming the metadata.xml file is next to the source file
1629 # Note: This will not work if it is using the inherited metadata from the parent folder
1630 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1631 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1632
1633 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $import_tailname);
1634 }
1635
1636 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1637 $gsdl_cgi->generate_ok_message($mess);
1638
1639 # Release the lock once it is done
1640 $self->unlock_collection($username, $collect);
1641}
1642
[24943]1643sub remove_import_metadata
1644{
1645 my $self = shift @_;
1646
1647 my $username = $self->{'username'};
1648 my $collect = $self->{'collect'};
1649 my $gsdl_cgi = $self->{'gsdl_cgi'};
1650
1651 if ($baseaction::authentication_enabled) {
1652 # Ensure the user is allowed to edit this collection
1653 &authenticate_user($gsdl_cgi, $username, $collect);
1654 }
[19293]1655
[24943]1656 my $gsdlhome = $self->{'gsdlhome'};
1657 my $infodbtype = $self->{'infodbtype'};
1658
1659 # Obtain the collect dir
1660 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1661 my $site = $self->{'site'};
1662 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1663
1664 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1665 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1666
1667 # Make sure the collection isn't locked by someone else
1668 $self->lock_collection($username, $collect);
1669
1670 # look up additional args
1671 my $docid = $self->{'d'};
1672 if ((!defined $docid) || ($docid =~ m/^\s*$/))
1673 {
[25097]1674 $gsdl_cgi->generate_error_message("No docid (d=...) specified.\n");
[24943]1675 }
1676
1677 my $metaname = $self->{'metaname'};
1678 my $metavalue = $self->{'metavalue'};
1679 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1680
1681 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
1682 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
1683 my $metadata_xml_file;
1684 my $import_filename = undef;
1685 if (defined $docid)
1686 {
1687 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1688 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1689
1690 # This now stores the full pathname
1691 $import_filename = $doc_rec->{'src-file'}->[0];
1692 }
1693
1694 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
1695 {
[25097]1696 $gsdl_cgi->generate_error_message("There is no metadata\n");
[24943]1697 }
1698
1699 # figure out correct metadata.xml file [?]
1700 # Assuming the metadata.xml file is next to the source file
1701 # Note: This will not work if it is using the inherited metadata from the parent folder
1702 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
1703 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
1704
1705 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $import_tailname);
1706
1707 # Release the lock once it is done
1708 $self->unlock_collection($username, $collect);
1709
1710 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1711 $mess .= " $metaname";
1712 $mess .= " = $metavalue\n";
1713
1714 $gsdl_cgi->generate_ok_message($mess);
1715}
1716
1717sub remove_from_metadata_xml
1718{
1719 my $self = shift @_;
1720 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $src_file) = @_;
1721
1722 # Set the call-back functions for the metadata tags
1723 my @rules =
1724 (
1725 _default => 'raw',
1726 'Metadata' => \&rfmxml_metadata,
1727 'FileName' => \&mxml_filename
1728 );
1729
1730 my $parser = XML::Rules->new
1731 (
1732 rules => \@rules,
1733 style => 'filter',
1734 output_encoding => 'utf8'
1735 );
1736
1737 my $xml_in = "";
1738 if (!open(MIN,"<$metadata_xml_filename"))
1739 {
1740 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
1741 }
1742 else
1743 {
1744 # Read them in
1745 my $line;
1746 while (defined ($line=<MIN>)) {
1747 $xml_in .= $line;
1748 }
1749 close(MIN);
1750
1751 # Filter with the call-back functions
1752 my $xml_out = "";
1753
1754 my $MOUT;
1755 if (!open($MOUT,">$metadata_xml_filename")) {
1756 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
1757 }
1758 else {
1759 binmode($MOUT,":utf8");
1760 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metavalue => $metavalue, src_file => $src_file, current_file => undef});
1761 close($MOUT);
1762 }
1763 }
1764}
1765
1766sub rfmxml_metadata
1767{
1768 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1769
1770 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && ($attrHash->{'name'} eq $parser->{'parameters'}->{'metaname'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
1771 {
1772 return [];
1773 }
1774
1775 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1776 return [$tagname => $attrHash];
1777}
1778
[19499]1779sub remove_live_metadata
1780{
1781 my $self = shift @_;
1782
1783 my $username = $self->{'username'};
1784 my $collect = $self->{'collect'};
1785 my $gsdl_cgi = $self->{'gsdl_cgi'};
1786 my $gsdlhome = $self->{'gsdlhome'};
[23400]1787 my $infodbtype = $self->{'infodbtype'};
1788
[19499]1789 if ($baseaction::authentication_enabled) {
1790 # Ensure the user is allowed to edit this collection
1791 &authenticate_user($gsdl_cgi, $username, $collect);
1792 }
1793
[21715]1794 # Obtain the collect dir
[23766]1795 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1796 my $site = $self->{'site'};
1797 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[19499]1798
1799 # Make sure the collection isn't locked by someone else
1800 $self->lock_collection($username, $collect);
1801
1802 # look up additional args
1803 my $docid = $self->{'d'};
[21715]1804 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1805 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1806 }
1807
1808 # Generate the dbkey
[19499]1809 my $metaname = $self->{'metaname'};
1810 my $dbkey = "$docid.$metaname";
1811
[21715]1812 # To people who know $collect_tail please add some comments
1813 # Obtain the live gdbm_db path
[19499]1814 my $collect_tail = $collect;
1815 $collect_tail =~ s/^.*[\/\\]//;
[21564]1816 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]1817 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19499]1818
[21715]1819 # Remove the key
[21569]1820 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
[19499]1821 my $status = system($cmd);
1822 if ($status != 0) {
[21715]1823 # Catch error if gdbmdel failed
[19499]1824 my $mess = "Failed to set metadata key: $dbkey\n";
1825
1826 $mess .= "PATH: $ENV{'PATH'}\n";
1827 $mess .= "cmd = $cmd\n";
1828 $mess .= "Exit status: $status\n";
1829 $mess .= "System Error Message: $!\n";
1830
1831 $gsdl_cgi->generate_error($mess);
1832 }
1833 else {
1834 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
1835 }
1836
1837}
1838
1839
1840sub remove_metadata
1841{
1842 my $self = shift @_;
1843
1844 my $username = $self->{'username'};
1845 my $collect = $self->{'collect'};
1846 my $gsdl_cgi = $self->{'gsdl_cgi'};
1847 my $gsdlhome = $self->{'gsdlhome'};
[23400]1848 my $infodbtype = $self->{'infodbtype'};
1849
[19499]1850 if ($baseaction::authentication_enabled) {
1851 # Ensure the user is allowed to edit this collection
1852 &authenticate_user($gsdl_cgi, $username, $collect);
1853 }
1854
[21715]1855 # Obtain the collect dir
[23766]1856 my $site = $self->{'site'};
1857 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1858 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]1859
1860 # Make sure the collection isn't locked by someone else
1861 $self->lock_collection($username, $collect);
1862
1863 # look up additional args
1864 my $docid = $self->{'d'};
[21715]1865 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1866 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1867 }
[19499]1868 my $metaname = $self->{'metaname'};
1869 my $metapos = $self->{'metapos'};
[24949]1870 my $metavalue = $self->{'metavalue'};
[19499]1871
[21715]1872 # To people who know $collect_tail please add some comments
1873 # Obtain the path to the database
[19499]1874 my $collect_tail = $collect;
1875 $collect_tail =~ s/^.*[\/\\]//;
[21564]1876 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]1877 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]1878
1879 # Read the docid entry
[23400]1880 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
[21715]1881
1882 # Basically loop through and unescape_html the values
[19499]1883 foreach my $k (keys %$doc_rec) {
1884 my @escaped_v = ();
1885 foreach my $v (@{$doc_rec->{$k}}) {
1886 if ($k eq "contains") {
1887 # protect quotes in ".2;".3 etc
1888 $v =~ s/\"/\\\"/g;
1889 push(@escaped_v, $v);
1890 }
1891 else {
1892 my $ev = &ghtml::unescape_html($v);
1893 $ev =~ s/\"/\\\"/g;
1894 push(@escaped_v, $ev);
1895 }
1896 }
1897 $doc_rec->{$k} = \@escaped_v;
1898 }
1899
[21715]1900 # Check to make sure the key does exist
1901 if (!defined ($doc_rec->{$metaname})) {
1902 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
1903 }
1904
1905 # Obtain the specified metadata pos
[19499]1906 $metapos = 0 if (!defined $metapos);
1907
1908 # consider check key is defined before deleting?
[21715]1909 # Loop through the metadata array and ignore the specified position
[19499]1910 my $filtered_metadata = [];
[21715]1911 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
[19499]1912 for (my $i=0; $i<$num_metadata_vals; $i++) {
[24949]1913 my $metaval = shift(@{$doc_rec->{$metaname}});
[19499]1914
[24949]1915 if (!defined $metavalue && $i != $metapos) {
1916 push(@$filtered_metadata,$metaval)
[19499]1917 }
[24949]1918
1919 if(defined $metavalue && !($metavalue eq $metaval))
1920 {
1921 push(@$filtered_metadata,$metavalue)
1922 }
[19499]1923 }
1924 $doc_rec->{$metaname} = $filtered_metadata;
1925
[21715]1926 # Turn the record back to string
[21551]1927 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
[19499]1928
[21715]1929 # Store it back to the database
[21569]1930 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
[19499]1931 my $status = system($cmd);
1932 if ($status != 0) {
1933 my $mess = "Failed to set metadata key: $docid\n";
1934
1935 $mess .= "PATH: $ENV{'PATH'}\n";
1936 $mess .= "cmd = $cmd\n";
1937 $mess .= "Exit status: $status\n";
1938 $mess .= "System Error Message: $!\n";
1939
1940 $gsdl_cgi->generate_error($mess);
1941 }
1942 else {
1943 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
1944 $mess .= " $metaname";
1945 $mess .= "->[$metapos]" if (defined $metapos);
1946
1947 $gsdl_cgi->generate_ok_message($mess);
1948 }
1949}
1950
1951
[23761]1952# Was trying to reused the codes, but the functions need to be broken
1953# down more before they can be reused, otherwise there will be too
1954# much overhead and duplicate process...
[21716]1955sub insert_metadata
1956{
1957 my $self = shift @_;
1958
1959 my $username = $self->{'username'};
1960 my $collect = $self->{'collect'};
1961 my $gsdl_cgi = $self->{'gsdl_cgi'};
1962 my $gsdlhome = $self->{'gsdlhome'};
[23400]1963 my $infodbtype = $self->{'infodbtype'};
1964
[23761]1965 # If the import metadata and gdbm database have been updated, we
1966 # need to insert some notification to warn user that the the text
1967 # they see at the moment is not indexed and require a rebuild.
[21716]1968 my $rebuild_pending_macro = "_rebuildpendingmessage_";
1969
1970 if ($baseaction::authentication_enabled) {
1971 # Ensure the user is allowed to edit this collection
1972 $self->authenticate_user($username, $collect);
1973 }
1974
[23766]1975 # Obtain the collect and archive dir
1976 my $site = $self->{'site'};
1977 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1978 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[21716]1979 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1980
1981 # Make sure the collection isn't locked by someone else
1982 $self->lock_collection($username, $collect);
1983
1984 # Check additional args
1985 my $docid = $self->{'d'};
1986 if (!defined($docid)) {
1987 $gsdl_cgi->generate_error("No document id is specified: d=...");
1988 }
1989 my $metaname = $self->{'metaname'};
1990 if (!defined($metaname)) {
1991 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
1992 }
1993 my $metavalue = $self->{'metavalue'};
1994 if (!defined($metavalue) || $metavalue eq "") {
1995 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
1996 }
1997 # make "accumulate" the default (less destructive, as won't actually
1998 # delete any existing values)
1999 my $metamode = "accumulate";
2000
2001 #=======================================================================#
2002 # set_import_metadata [START]
2003 #=======================================================================#
2004 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2005 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2006 my $metadata_xml_file;
[23400]2007 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2008 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[21716]2009
2010 # This now stores the full pathname
2011 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
2012
2013 # figure out correct metadata.xml file [?]
2014 # Assuming the metadata.xml file is next to the source file
2015 # Note: This will not work if it is using the inherited metadata from the parent folder
2016 my ($import_tailname, $import_dirname)
2017 = File::Basename::fileparse($import_filename);
2018 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2019
2020 # Shane's escape characters
2021 $metavalue = pack "U0C*", unpack "C*", $metavalue;
2022 $metavalue =~ s/\,/&#44;/g;
2023 $metavalue =~ s/\:/&#58;/g;
2024 $metavalue =~ s/\|/&#124;/g;
2025 $metavalue =~ s/\(/&#40;/g;
2026 $metavalue =~ s/\)/&#41;/g;
2027 $metavalue =~ s/\[/&#91;/g;
2028 $metavalue =~ s/\\/&#92;/g;
2029 $metavalue =~ s/\]/&#93;/g;
2030 $metavalue =~ s/\{/&#123;/g;
2031 $metavalue =~ s/\}/&#125;/g;
2032 $metavalue =~ s/\"/&#34;/g;
2033 $metavalue =~ s/\`/&#96;/g;
2034 $metavalue =~ s/\n/_newline_/g;
2035
2036 # Edit the metadata.xml
2037 # Modified by Jeffrey from DL Consulting
2038 # Handle the case where there is one metadata.xml file for multiple FileSets
2039 # The XML filter needs to know whether it is in the right FileSet
2040 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2041 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2042 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
2043 $metaname, $metavalue, $metamode, $import_tailname);
2044 #=======================================================================#
2045 # set_import_metadata [END]
2046 #=======================================================================#
2047
2048
2049 #=======================================================================#
2050 # set_metadata (accumulate version) [START]
2051 #=======================================================================#
2052 # To people who know $collect_tail please add some comments
2053 # Obtain path to the database
2054 my $collect_tail = $collect;
2055 $collect_tail =~ s/^.*[\/\\]//;
2056 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]2057 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21716]2058
2059 # Read the docid entry
[23400]2060 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2061
[21716]2062 foreach my $k (keys %$doc_rec) {
2063 my @escaped_v = ();
2064 foreach my $v (@{$doc_rec->{$k}}) {
2065 if ($k eq "contains") {
2066 # protect quotes in ".2;".3 etc
2067 $v =~ s/\"/\\\"/g;
2068 push(@escaped_v, $v);
2069 }
2070 else {
2071 my $ev = &ghtml::unescape_html($v);
2072 $ev =~ s/\"/\\\"/g;
2073 push(@escaped_v, $ev);
2074 }
2075 }
2076 $doc_rec->{$k} = \@escaped_v;
2077 }
2078
2079 # Protect the quotes
2080 $metavalue =~ s/\"/\\\"/g;
2081
2082 # Adds the pending macro
2083 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
2084
2085 # If the metadata doesn't exist, create a new one
2086 if (!defined($doc_rec->{$metaname})){
2087 $doc_rec->{$metaname} = [ $macro_metavalue ];
2088 }
2089 # Else, let's acculumate the values
2090 else {
2091 push(@{$doc_rec->{$metaname}},$macro_metavalue);
2092 }
2093
2094 # Generate the record string
2095 my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
2096
2097 # Store it into GDBM
2098 my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
2099 my $status = system($cmd);
2100 if ($status != 0) {
2101 # Catch error if gdbmget failed
2102 my $mess = "Failed to set metadata key: $docid\n";
2103
2104 $mess .= "PATH: $ENV{'PATH'}\n";
2105 $mess .= "cmd = $cmd\n";
2106 $mess .= "Exit status: $status\n";
2107 $mess .= "System Error Message: $!\n";
2108
2109 $gsdl_cgi->generate_error($mess);
2110 }
2111 else {
2112 my $mess = "insert-metadata successful: Key[$docid]\n";
2113 $mess .= " [In metadata.xml] $metaname";
2114 $mess .= " = $metavalue\n";
2115 $mess .= " [In database] $metaname";
2116 $mess .= " = $macro_metavalue\n";
2117 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
2118 $gsdl_cgi->generate_ok_message($mess);
2119 }
2120 #=======================================================================#
2121 # set_metadata (accumulate version) [END]
2122 #=======================================================================#
2123
2124 # Release the lock once it is done
2125 $self->unlock_collection($username, $collect);
2126}
2127
[19293]21281;
Note: See TracBrowser for help on using the repository browser.