source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27313

Last change on this file since 27313 was 27313, checked in by ak19, 11 years ago

Now the existing user comments to be displayed are all retrieved in one go using the new get_metadata_array subroutine in metadataaction.pm via the new gsajaxapi method getMetadataArray which take a JSON string and return one. This loads user comments much faster, and doesn't get that much slower if the number of comments stored in the index database gets larger. 2 bugfixes to metadataaction.pm's recently added get_metadata_array subroutine: if no metapos supplied it defaults to 0 like the other get_meta functions instead of defaulting to the keyword 'all'. The fieldnames in the JSON string returned also needed to be inside double quotes in order to be successfully parsed back into a JSON object on the Javascript side. Replaced the old loadUserComments() javascript function in style.dm, which now calls the new gsajaxapi.getMetadataArray() post method.

File size: 106.0 KB
Line 
1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
50 'optional-args' => [ "metapos" ] },
51
52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
56 "get-index-metadata" => {
57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
59
60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
67
68 "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69 'compulsory-args' => [ "json" ],
70 'optional-args' => [ "where" ],
71 'help-string' => [
72 'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73 ]},
74
75 #SET METHODS
76 "set-live-metadata" => {
77 'compulsory-args' => [ "d", "metaname", "metavalue" ],
78 'optional-args' => [ ] },
79
80 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81 'compulsory-args' => [ "metaname", "metavalue" ],
82 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84 "set-index-metadata" => {
85 'compulsory-args' => [ "d", "metaname", "metavalue" ],
86 'optional-args' => [ "metapos", "metamode" ] },
87
88 "set-archives-metadata" => {
89 'compulsory-args' => [ "d", "metaname", "metavalue" ],
90 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91
92 "set-import-metadata" => {
93 'compulsory-args' => [ "metaname", "metavalue" ],
94 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
95
96 #SET METHODS (ARRAY)
97 "set-metadata-array" => {
98 'compulsory-args' => [ "where", "json" ],
99 'optional-args' => [ ],
100 'help-string' => [
101 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102
103 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
110
111 "set-archives-metadata-array" => {
112 'compulsory-args' => [ "json" ],
113 'optional-args' => [ ] },
114
115 "set-import-metadata-array" => {
116 'compulsory-args' => [ "json" ],
117 'optional-args' => [ ] },
118
119 "set-index-metadata-array" => {
120 'compulsory-args' => [ "json" ],
121 'optional-args' => [ ] },
122
123 "set-live-metadata-array" => {
124 'compulsory-args' => [ "json" ],
125 'optional-args' => [ ] },
126
127 #REMOVE METHODS
128 "remove-import-metadata" => {
129 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
130 'optional-args' => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
131
132 "remove-archives-metadata" => {
133 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
134 'optional-args' => [ "metapos", "metavalue" ] },
135
136 "remove-live-metadata" => {
137 'compulsory-args' => [ "d", "metaname" ],
138 'optional-args' => [ ] },
139
140 "remove-index-metadata" => {
141 'compulsory-args' => [ "d", "metaname" ],
142 'optional-args' => [ "metapos", "metavalue" ] },
143
144 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
145 'compulsory-args' => [ "d", "metaname" ],
146 'optional-args' => [ "where", "metapos", "metavalue" ] },
147
148 #INSERT METHODS
149 "insert-metadata" => {
150 'compulsory-args' => [ "d", "metaname", "metavalue" ],
151 'optional-args' => [ ] }
152};
153
154
155sub new
156{
157 my $class = shift (@_);
158 my ($gsdl_cgi,$iis6_mode) = @_;
159
160 # Treat metavalue specially. To transmit this through a GET request
161 # the Javascript side has url-encoded it, so here we need to decode
162 # it before proceeding
163
164 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
165 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
166 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
167
168 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
169 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
170
171 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
172
173 return bless $self, $class;
174}
175
176
177sub get_live_metadata
178{
179 my $self = shift @_;
180
181 my $username = $self->{'username'};
182 my $collect = $self->{'collect'};
183 my $gsdl_cgi = $self->{'gsdl_cgi'};
184 my $gsdlhome = $self->{'gsdlhome'};
185 my $infodbtype = $self->{'infodbtype'};
186
187 # live metadata gets/saves value scoped (prefixed) by the current usename
188 # so (for now) let's not bother to enforce authentication
189
190 # Obtain the collect dir
191 my $site = $self->{'site'};
192 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
193 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
194
195 # Make sure the collection isn't locked by someone else
196 $self->lock_collection($username, $collect);
197
198 # look up additional args
199 my $docid = $self->{'d'};
200 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
201 $gsdl_cgi->generate_error("No docid (d=...) specified.");
202 }
203
204 # Generate the dbkey
205 my $metaname = $self->{'metaname'};
206 my $dbkey = "$docid.$metaname";
207
208 # To people who know $collect_tail please add some comments
209 # Obtain path to the database
210 my $collect_tail = $collect;
211 $collect_tail =~ s/^.*[\/|\\]//;
212 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
213 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
214
215 # Obtain the content of the key
216 my $cmd = "gdbmget $infodb_file_path $dbkey";
217 if (open(GIN,"$cmd |") == 0) {
218 # Catch error if gdbmget failed
219 my $mess = "Failed to get metadata key: $metaname\n";
220 $mess .= "$!\n";
221
222 $gsdl_cgi->generate_error($mess);
223 }
224 else {
225 binmode(GIN,":utf8");
226 # Read everything in and concatenate them into $metavalue
227 my $metavalue = "";
228 my $line;
229 while (defined ($line=<GIN>)) {
230 $metavalue .= $line;
231 }
232 close(GIN);
233 chomp($metavalue); # Get rid off the tailing newlines
234 $gsdl_cgi->generate_ok_message("$metavalue");
235 }
236
237 # Release the lock once it is done
238 $self->unlock_collection($username, $collect);
239}
240
241# just calls the index version
242sub get_metadata
243{
244 my $self = shift @_;
245 $self->get_index_metadata(@_);
246}
247
248# JSON version that will get the requested metadata values
249# from the requested source (index, import, archives or live)
250# One of the params is a JSON string and the return value is JSON too
251# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
252sub get_metadata_array
253{
254 my $self = shift @_;
255
256 my $where = $self->{'where'};
257 if (!$where) {
258 $where = "index"; # default behaviour is to get the values from index
259 }
260
261 # Only when setting metadata do we perform authentication and do we lock the collection
262 # not when getting metadata
263
264 # for get_meta_array, the where param can only be ONE of import, archives, index, live
265 if($where =~ m/index/) {
266 $self->_get_index_metadata_array(@_);
267 }
268 # still to be implemented for archives, import and live
269}
270
271# takes a JSON string and returns a JSON string
272# resulting string is of the form:
273# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
274sub _get_index_metadata_array
275{
276 my $self = shift @_;
277
278 my $collect = $self->{'collect'};
279 my $gsdl_cgi = $self->{'gsdl_cgi'};
280 my $site = $self->{'site'};
281 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
282
283 # look up additional args
284 my $infodbtype = $self->{'infodbtype'};
285
286 # To people who know $collect_tail please add some comments
287 # Obtain the path to the database
288 my $collect_tail = $collect;
289 $collect_tail =~ s/^.*[\/\\]//;
290 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
291 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
292
293 my $json_str = $self->{'json'};
294 my $doc_array = decode_json $json_str;
295
296 my $json_result_str = "[";
297 my $first_doc_rec = 1;
298
299 foreach my $doc_array_rec ( @$doc_array ) {
300
301 my $docid = $doc_array_rec->{'docid'};
302
303 if($first_doc_rec) {
304 $first_doc_rec = 0;
305 } else {
306 $json_result_str .= ",";
307 }
308 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
309
310 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
311 $json_result_str = $json_result_str . ",\"metatable\":[";
312
313 my $first_rec = 1;
314 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
315 if($first_rec) {
316 $first_rec = 0;
317 } else {
318 $json_result_str .= ",";
319 }
320
321 my $metaname = $metatable_rec->{'metaname'};
322 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
323
324 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
325 if(!defined $metapos) {
326 $metapos = 0;
327 }
328
329 # Read the docid entry
330 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
331
332 # Basically loop through and unescape_html the values
333 foreach my $k (keys %$doc_rec) {
334 my @escaped_v = ();
335 foreach my $v (@{$doc_rec->{$k}}) {
336 my $ev = &ghtml::unescape_html($v);
337 push(@escaped_v, $ev);
338 }
339 $doc_rec->{$k} = \@escaped_v;
340 }
341
342 # Obtain the specified metadata value(s)
343 my $metavalue;
344
345 if($metapos ne "all") { # get the value at a single metapos
346
347 $metavalue = $doc_rec->{$metaname}->[$metapos];
348 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
349
350 } else {
351 my $first_metaval = 1;
352 $metapos = 0;
353 $metavalue = $doc_rec->{$metaname}->[$metapos];
354
355 while (defined $metavalue) {
356 if($first_metaval) {
357 $first_metaval = 0;
358 } else {
359 $json_result_str .= ",";
360 }
361
362 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
363
364 $metapos++;
365 $metavalue = $doc_rec->{$metaname}->[$metapos];
366 }
367 }
368
369 $json_result_str .= "]}"; # close metavals array and metatable record
370 }
371
372 $json_result_str .= "]}"; # close metatable array and docid record
373 }
374
375 $json_result_str .= "]"; # close array of docids
376
377 $gsdl_cgi->generate_ok_message($json_result_str);
378}
379
380
381sub get_index_metadata
382{
383 my $self = shift @_;
384
385 my $username = $self->{'username'};
386 my $collect = $self->{'collect'};
387 my $gsdl_cgi = $self->{'gsdl_cgi'};
388 my $gsdlhome = $self->{'gsdlhome'};
389
390 # Authenticate user if it is enabled
391 if ($baseaction::authentication_enabled) {
392 # Ensure the user is allowed to edit this collection
393 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
394 }
395
396 # Obtain the collect dir
397 my $site = $self->{'site'};
398 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
399 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
400
401 # Make sure the collection isn't locked by someone else
402 $self->lock_collection($username, $collect);
403
404 # look up additional args
405 my $docid = $self->{'d'};
406 my $metaname = $self->{'metaname'};
407 my $metapos = $self->{'metapos'};
408 my $infodbtype = $self->{'infodbtype'};
409
410 # To people who know $collect_tail please add some comments
411 # Obtain path to the database
412 my $collect_tail = $collect;
413 $collect_tail =~ s/^.*[\/\\]//;
414 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
415 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
416
417 # Read the docid entry
418 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
419
420 # Basically loop through and unescape_html the values
421 foreach my $k (keys %$doc_rec) {
422 my @escaped_v = ();
423 foreach my $v (@{$doc_rec->{$k}}) {
424 my $ev = &ghtml::unescape_html($v);
425 push(@escaped_v, $ev);
426 }
427 $doc_rec->{$k} = \@escaped_v;
428 }
429
430 # Obtain the specified metadata value
431 $metapos = 0 if (!defined $metapos);
432 my $metavalue = $doc_rec->{$metaname}->[$metapos];
433 $gsdl_cgi->generate_ok_message("$metavalue");
434
435 # Release the lock once it is done
436 $self->unlock_collection($username, $collect);
437}
438
439
440sub get_import_metadata
441{
442 my $self = shift @_;
443
444 my $username = $self->{'username'};
445 my $collect = $self->{'collect'};
446 my $gsdl_cgi = $self->{'gsdl_cgi'};
447 my $gsdlhome = $self->{'gsdlhome'};
448
449 # Authenticate user if it is enabled
450 if ($baseaction::authentication_enabled) {
451 # Ensure the user is allowed to edit this collection
452 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
453 }
454
455 # Obtain the collect dir
456 my $site = $self->{'site'};
457 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
458 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
459
460 # Make sure the collection isn't locked by someone else
461 $self->lock_collection($username, $collect);
462
463 # look up additional args
464 my $docid = $self->{'d'};
465 my $metaname = $self->{'metaname'};
466 my $metapos = $self->{'metapos'};
467 $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
468
469 my $infodbtype = $self->{'infodbtype'};
470 if (!defined $docid)
471 {
472 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
473 }
474
475 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
476 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
477 my $metadata_xml_file;
478 my $import_filename = undef;
479
480
481 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
482 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
483 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
484
485 # This now stores the full pathname
486 $import_filename = $doc_rec->{'src-file'}->[0];
487
488 # figure out correct metadata.xml file [?]
489 # Assuming the metadata.xml file is next to the source file
490 # Note: This will not work if it is using the inherited metadata from the parent folder
491 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
492 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
493
494 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
495
496 # Release the lock once it is done
497 $self->unlock_collection($username, $collect);
498}
499
500sub get_metadata_from_metadata_xml
501{
502 my $self = shift @_;
503 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
504
505 my @rules =
506 (
507 _default => 'raw',
508 'Metadata' => \&gfmxml_metadata,
509 'FileName' => \&mxml_filename
510 );
511
512 my $parser = XML::Rules->new
513 (
514 rules => \@rules,
515 output_encoding => 'utf8'
516 );
517
518 my $xml_in = "";
519 if (!open(MIN,"<$metadata_xml_filename"))
520 {
521 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
522 }
523 else
524 {
525 # Read them in
526 my $line;
527 while (defined ($line=<MIN>)) {
528 $xml_in .= $line;
529 }
530 close(MIN);
531
532 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
533
534 if(defined $parser->{'pad'}->{'metavalue'})
535 {
536 return $parser->{'pad'}->{'metavalue'};
537 }
538 else
539 {
540 return "";
541 }
542 }
543}
544
545sub gfmxml_metadata
546{
547 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
548
549 # no subsection support yet in metadata.xml
550
551 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
552 {
553 if (!defined $parser->{'parameters'}->{'poscount'})
554 {
555 $parser->{'parameters'}->{'poscount'} = 0;
556 }
557 else
558 {
559 $parser->{'parameters'}->{'poscount'}++;
560 }
561
562 # gets the first value by default, since metapos defaults to 0
563 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
564 {
565 if($parser->{'parameters'}->{'metapos'} > 0) {
566 print STDERR "@@@@ WARNING: non-zero metapos.\n";
567 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at".$parser->{'parameters'}->{'metapos'}.".\n";
568 }
569 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
570 }
571 }
572}
573
574sub get_archives_metadata
575{
576 my $self = shift @_;
577
578 my $username = $self->{'username'};
579 my $collect = $self->{'collect'};
580 my $gsdl_cgi = $self->{'gsdl_cgi'};
581 my $gsdlhome = $self->{'gsdlhome'};
582 my $infodbtype = $self->{'infodbtype'};
583
584 # Authenticate user if it is enabled
585 if ($baseaction::authentication_enabled) {
586 # Ensure the user is allowed to edit this collection
587 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
588 }
589
590 # Obtain the collect dir
591 my $site = $self->{'site'};
592 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
593
594 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
595
596 # Make sure the collection isn't locked by someone else
597 $self->lock_collection($username, $collect);
598
599 # look up additional args
600 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
601 $docid_secnum = "" if (!defined $docid_secnum);
602
603 my $metaname = $self->{'metaname'};
604 my $metapos = $self->{'metapos'};
605 $metapos = 0 if (!defined $metapos);
606
607 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
608 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
609
610 # This now stores the full pathname
611 my $doc_filename = $doc_rec->{'doc-file'}->[0];
612
613 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
614
615 # Release the lock once it is done
616 $self->unlock_collection($username, $collect);
617}
618
619sub get_metadata_from_archive_xml
620{
621 my $self = shift @_;
622 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
623
624 my @start_rules = ('Section' => \&dxml_start_section);
625
626 my @rules =
627 (
628 _default => 'raw',
629 'Metadata' => \&gfdxml_metadata
630 );
631
632 my $parser = XML::Rules->new
633 (
634 start_rules => \@start_rules,
635 rules => \@rules,
636 output_encoding => 'utf8'
637 );
638
639 my $xml_in = "";
640 if (!open(MIN,"<$doc_xml_filename"))
641 {
642 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
643 }
644 else
645 {
646 # Read them in
647 my $line;
648 while (defined ($line=<MIN>)) {
649 $xml_in .= $line;
650 }
651 close(MIN);
652
653 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
654
655 if(defined $parser->{'pad'}->{'metavalue'})
656 {
657 return $parser->{'pad'}->{'metavalue'};
658 }
659 else
660 {
661 return "";
662 }
663 }
664}
665
666sub gfdxml_metadata
667{
668 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
669
670 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
671 {
672 return;
673 }
674
675 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
676 {
677 if (!defined $parser->{'parameters'}->{'poscount'})
678 {
679 $parser->{'parameters'}->{'poscount'} = 0;
680 }
681 else
682 {
683 $parser->{'parameters'}->{'poscount'}++;
684 }
685 }
686
687 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
688 {
689 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
690 }
691}
692
693sub _set_live_metadata
694{
695 my $self = shift @_;
696
697 my $collect = $self->{'collect'};
698 my $gsdl_cgi = $self->{'gsdl_cgi'};
699 #my $gsdlhome = $self->{'gsdlhome'};
700 my $infodbtype = $self->{'infodbtype'};
701
702 # Obtain the collect dir
703 my $site = $self->{'site'};
704 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
705 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
706
707
708 # look up additional args
709 my $docid = $self->{'d'};
710 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
711 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
712 }
713 my $metavalue = $self->{'metavalue'};
714
715 # Generate the dbkey
716 my $metaname = $self->{'metaname'};
717 my $dbkey = "$docid.$metaname";
718
719 # To people who know $collect_tail please add some comments
720 # Obtain path to the database
721 my $collect_tail = $collect;
722 $collect_tail =~ s/^.*[\/\\]//;
723 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
724 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
725
726 # Set the new value
727 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
728 my $status = system($cmd);
729 if ($status != 0) {
730 # Catch error if gdbmget failed
731 my $mess = "Failed to set metadata key: $dbkey\n";
732
733 $mess .= "PATH: $ENV{'PATH'}\n";
734 $mess .= "cmd = $cmd\n";
735 $mess .= "Exit status: $status\n";
736 $mess .= "System Error Message: $!\n";
737
738 $gsdl_cgi->generate_error($mess);
739 }
740 else {
741 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
742 }
743
744 #return $status; # in case calling functions have any further use for this
745}
746
747sub set_live_metadata
748{
749 my $self = shift @_;
750
751 my $username = $self->{'username'};
752 my $collect = $self->{'collect'};
753 my $gsdl_cgi = $self->{'gsdl_cgi'};
754
755 if ($baseaction::authentication_enabled) {
756 # Ensure the user is allowed to edit this collection
757 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
758 }
759
760 # Make sure the collection isn't locked by someone else
761 $self->lock_collection($username, $collect);
762
763 $self->_set_live_metadata(@_);
764
765 # Release the lock once it is done
766 $self->unlock_collection($username, $collect);
767}
768
769sub set_index_metadata_entry
770{
771 my $self = shift @_;
772 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
773
774 # To people who know $collect_tail please add some comments
775 # Obtain path to the database
776 my $collect_tail = $collect;
777 $collect_tail =~ s/^.*[\/\\]//;
778 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
779 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
780
781# print STDERR "**** infodb file path = $infodb_file_path\n";
782# print STDERR "***** infodb type = $infodbtype\n";
783
784 # Read the docid entry
785 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
786
787 # Set the metadata value
788 if (defined $metapos) {
789 # if metamode=accumulate AND metapos, warn user and then use metapos
790 if (defined $metamode && $metamode eq "accumulate") {
791 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
792 print STDERR "**** Proceeding by using metapos\n";
793 }
794 $doc_rec->{$metaname}->[$metapos] = $metavalue;
795 }
796 elsif (defined $metamode && $metamode eq "override") {
797 $doc_rec->{$metaname} = [ $metavalue ];
798 }
799 else { # default for index was to override, but because accumulate is less destructive,
800 # and because accumulate is the default for archives and import, that's the new default for index too
801 if(defined $doc_rec->{$metaname}) {
802 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
803 } else {
804 $doc_rec->{$metaname} = [ $metavalue ];
805 }
806 }
807
808 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
809
810 return $status;
811
812}
813
814sub _set_import_metadata
815{
816 my $self = shift @_;
817
818 my $collect = $self->{'collect'};
819 my $gsdl_cgi = $self->{'gsdl_cgi'};
820 my $infodbtype = $self->{'infodbtype'};
821# my $gsdlhome = $self->{'gsdlhome'};
822
823 # Obtain the collect and archive dir
824 my $site = $self->{'site'};
825 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
826 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
827 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
828
829 # look up additional args
830 # want either d= or f=
831 my $docid = $self->{'d'};
832 my ($docid_root,$docid_secnum);
833 if(defined $docid) {
834 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
835 # as yet no support for setting subsection metadata in metadata.xml
836 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
837 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
838 return;
839 }
840 }
841
842 my $import_file = $self->{'f'};
843 if ((!defined $docid) && (!defined $import_file)) {
844 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
845 }
846
847 # Get the parameters and set default mode to "accumulate"
848 my $metaname = $self->{'metaname'};
849 my $metavalue = $self->{'metavalue'};
850## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
851 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
852
853 my $metamode = $self->{'metamode'};
854 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
855 # make "accumulate" the default (less destructive, as won't actually
856 # delete any existing values)
857 $metamode = "accumulate";
858 }
859
860 # adding metapos and prevmetavalue support to import_metadata subroutines
861 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
862 my $prevmetavalue = $self->{'prevmetavalue'};
863
864 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
865 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
866
867 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
868 $mess .= " $metaname";
869 $mess .= " = $metavalue";
870 $mess .= " ($metamode)\n";
871
872 $gsdl_cgi->generate_ok_message($mess);
873
874 #return $status; # in case calling functions have any further use for this
875}
876
877# the version of set_index_meta that doesn't do authentication
878sub _set_archives_metadata
879{
880 my $self = shift @_;
881
882 my $collect = $self->{'collect'};
883 my $gsdl_cgi = $self->{'gsdl_cgi'};
884 my $infodbtype = $self->{'infodbtype'};
885
886 # Obtain the collect and archive dir
887 my $site = $self->{'site'};
888 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
889 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
890
891 # look up additional args
892 my $docid = $self->{'d'};
893 my $metaname = $self->{'metaname'};
894 my $metavalue = $self->{'metavalue'};
895 my $prevmetavalue = $self->{'prevmetavalue'};
896
897 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
898 # Don't append "|| undef", since if metapos=0 it will then be set to undef
899
900 my $metamode = $self->{'metamode'};
901 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
902 # make "accumulate" the default (less destructive, as won't actually
903 # delete any existing values)
904 $metamode = "accumulate";
905 }
906
907 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
908 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
909
910 if ($status == 0) {
911 my $mess = "set-archives-metadata successful: Key[$docid]\n";
912 $mess .= " $metaname";
913 $mess .= "->[$metapos]" if (defined $metapos);
914 $mess .= " = $metavalue";
915 $mess .= " ($metamode)\n";
916
917 $gsdl_cgi->generate_ok_message($mess);
918 }
919 else {
920 my $mess .= "Failed to set archives metadata key: $docid\n";
921 $mess .= "Exit status: $status\n";
922 if(defined $self->{'error_msg'}) {
923 $mess .= "Error Message: $self->{'error_msg'}\n";
924 } else {
925 $mess .= "System Error Message: $!\n";
926 }
927 $mess .= "-" x 20 . "\n";
928
929 $gsdl_cgi->generate_error($mess);
930 }
931
932 #return $status; # in case calling functions have any further use for this
933}
934
935
936# the version of set_index_meta that doesn't do authentication
937sub _set_index_metadata
938{
939 my $self = shift @_;
940
941 my $collect = $self->{'collect'};
942 my $gsdl_cgi = $self->{'gsdl_cgi'};
943
944 my $site = $self->{'site'};
945 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
946
947 # look up additional args
948 my $docid = $self->{'d'};
949 my $metaname = $self->{'metaname'};
950 my $metapos = $self->{'metapos'}; # undef has meaning
951 my $metavalue = $self->{'metavalue'};
952 my $infodbtype = $self->{'infodbtype'};
953 my $metamode = $self->{'metamode'};
954
955 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
956
957 if ($status != 0) {
958 # Catch error if set infodb entry failed
959 my $mess = "Failed to set metadata key: $docid\n";
960
961 $mess .= "PATH: $ENV{'PATH'}\n";
962 $mess .= "Exit status: $status\n";
963 $mess .= "System Error Message: $!\n";
964
965 $gsdl_cgi->generate_error($mess);
966 }
967 else {
968 my $mess = "set-index-metadata successful: Key[$docid]\n";
969 $mess .= " $metaname";
970 $mess .= "->[$metapos]" if (defined $metapos);
971 $mess .= " = $metavalue\n";
972
973 $gsdl_cgi->generate_ok_message($mess);
974 }
975
976 #return $status; # in case calling functions have any further use for this
977}
978
979sub set_index_metadata
980{
981 my $self = shift @_;
982
983 my $username = $self->{'username'};
984 my $collect = $self->{'collect'};
985 my $gsdl_cgi = $self->{'gsdl_cgi'};
986 #my $gsdlhome = $self->{'gsdlhome'};
987
988 if ($baseaction::authentication_enabled) {
989 # Ensure the user is allowed to edit this collection
990 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
991 }
992
993 my $site = $self->{'site'};
994 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
995
996 $gsdl_cgi->checked_chdir($collect_dir);
997
998 # Obtain the collect dir
999 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1000
1001 # Make sure the collection isn't locked by someone else
1002 $self->lock_collection($username, $collect);
1003
1004 $self->_set_index_metadata(@_);
1005
1006 # Release the lock once it is done
1007 $self->unlock_collection($username, $collect);
1008}
1009
1010# call this to set the metadata for a combination of dirs archives, import or index, or live
1011# if none specified, defaults to index which was the original behaviour of set_metadata.
1012sub set_metadata
1013{
1014 my $self = shift @_;
1015
1016 # Testing that not defining a variable, setting it to "" or to " " all return false
1017 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
1018
1019 my $where = $self->{'where'};
1020 if(!$where) {
1021 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
1022 return;
1023 }
1024
1025 # authenticate and lock collection once, even if processing multiple dirs
1026 my $username = $self->{'username'};
1027 my $collect = $self->{'collect'};
1028 my $gsdl_cgi = $self->{'gsdl_cgi'};
1029
1030 if ($baseaction::authentication_enabled) {
1031 # Ensure the user is allowed to edit this collection
1032 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1033 }
1034
1035 if($where =~ m/index/) {
1036 my $site = $self->{'site'};
1037 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1038 $gsdl_cgi->checked_chdir($collect_dir);
1039 }
1040
1041 # Make sure the collection isn't locked by someone else
1042 $self->lock_collection($username, $collect);
1043
1044
1045 # now at last can set the metadata. $where can specify multiple
1046 # $where is of the form: import|archives|index, or a subset thereof
1047
1048 #my @whichdirs = split('\|', $where);
1049
1050 # just check whether $where contains import/archives/index/live in turn, and
1051 # for each case, process it accordingly
1052 if($where =~ m/import/) {
1053 $self->_set_import_metadata(@_);
1054 }
1055
1056 if($where =~ m/archives/) {
1057
1058 # look up docID arg which is optional to set_metadata because it's optional
1059 # to set_import, but which is compulsory to set_archives_metadata
1060 my $docid = $self->{'d'};
1061 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1062 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
1063 }
1064 # we have a docid, so can set archives meta
1065 $self->_set_archives_metadata(@_);
1066 }
1067
1068 if($where =~ m/index/) {
1069
1070 # look up docID arg which is optional to set_metadata because it's optional
1071 # to set_import, but which is compulsory to set_archives_metadata
1072 my $docid = $self->{'d'};
1073 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1074 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1075 }
1076 # we have a docid, so can set index meta
1077 $self->_set_index_metadata(@_);
1078 }
1079
1080 if($where =~ m/live/) {
1081 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1082 }
1083
1084 # Release the lock once it is done
1085 $self->unlock_collection($username, $collect);
1086}
1087
1088sub set_metadata_array
1089{
1090 my $self = shift @_;
1091
1092 my $where = $self->{'where'};
1093 if(!$where) {
1094 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1095 return;
1096 }
1097
1098 my $username = $self->{'username'};
1099 my $collect = $self->{'collect'};
1100 my $gsdl_cgi = $self->{'gsdl_cgi'};
1101
1102 if ($baseaction::authentication_enabled) {
1103 # Ensure the user is allowed to edit this collection
1104 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1105 }
1106
1107 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1108 # But including the stmt during this code reorganisation to preserve as-is what used to happen
1109 my $site = $self->{'site'};
1110 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1111 $gsdl_cgi->checked_chdir($collect_dir);
1112
1113 # Make sure the collection isn't locked by someone else
1114 $self->lock_collection($username, $collect);
1115
1116 if($where =~ m/import/) {
1117 $self->_set_import_metadata_array(@_);
1118 }
1119 if($where =~ m/archives/) {
1120 $self->_set_archives_metadata_array(@_);
1121 }
1122 if($where =~ m/index/) {
1123 $self->_set_index_metadata_array(@_);
1124 }
1125 if($where =~ m/live/) {
1126 $self->_set_live_metadata_array(@_);
1127 }
1128
1129 # Release the lock once it is done
1130 $self->unlock_collection($username, $collect);
1131}
1132
1133sub _set_index_metadata_array
1134{
1135 my $self = shift @_;
1136
1137 my $collect = $self->{'collect'};
1138 my $gsdl_cgi = $self->{'gsdl_cgi'};
1139 my $site = $self->{'site'};
1140 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1141
1142
1143 # look up additional args
1144
1145 my $infodbtype = $self->{'infodbtype'};
1146
1147 my $json_str = $self->{'json'};
1148 my $doc_array = decode_json $json_str;
1149
1150
1151 my $global_status = 0;
1152 my $global_mess = "";
1153
1154 my @all_docids = ();
1155
1156 foreach my $doc_array_rec ( @$doc_array ) {
1157
1158 my $status = -1;
1159 my $docid = $doc_array_rec->{'docid'};
1160
1161 push(@all_docids,$docid);
1162
1163 my $metaname = $doc_array_rec->{'metaname'};
1164 if(defined $metaname) {
1165 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
1166 my $metavalue = $doc_array_rec->{'metavalue'};
1167 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1168
1169 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1170 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1171 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1172
1173 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1174 $metaname = $metatable_rec->{'metaname'};
1175 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1176 my $metapos = undef;
1177 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1178
1179 foreach my $metavalue ( @$metavals ) { # metavals is an array
1180 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1181 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1182 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1183 # metavalue for this metaname will be lost
1184 $metamode = "accumulate";
1185 }
1186 }
1187 }
1188 }
1189
1190 if ($status != 0) {
1191 # Catch error if set infodb entry failed
1192 $global_status = $status;
1193 $global_mess .= "Failed to set metadata key: $docid\n";
1194 $global_mess .= "Exit status: $status\n";
1195 $global_mess .= "System Error Message: $!\n";
1196 $global_mess .= "-" x 20;
1197 }
1198 }
1199
1200 if ($global_status != 0) {
1201 $global_mess .= "PATH: $ENV{'PATH'}\n";
1202 $gsdl_cgi->generate_error($global_mess);
1203 }
1204 else {
1205 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1206 $gsdl_cgi->generate_ok_message($mess);
1207 }
1208}
1209
1210sub set_index_metadata_array
1211{
1212 my $self = shift @_;
1213
1214 my $username = $self->{'username'};
1215 my $collect = $self->{'collect'};
1216 my $gsdl_cgi = $self->{'gsdl_cgi'};
1217# my $gsdlhome = $self->{'gsdlhome'};
1218
1219 if ($baseaction::authentication_enabled) {
1220 # Ensure the user is allowed to edit this collection
1221 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1222 }
1223
1224 my $site = $self->{'site'};
1225 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1226
1227 $gsdl_cgi->checked_chdir($collect_dir);
1228
1229 # Obtain the collect dir
1230 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1231
1232 # Make sure the collection isn't locked by someone else
1233 $self->lock_collection($username, $collect);
1234
1235 $self->_set_index_metadata_array(@_);
1236
1237 # Release the lock once it is done
1238 $self->unlock_collection($username, $collect);
1239}
1240
1241# experimental, newly added in and untested
1242sub _set_live_metadata_array
1243{
1244 my $self = shift @_;
1245
1246 my $collect = $self->{'collect'};
1247 my $gsdl_cgi = $self->{'gsdl_cgi'};
1248
1249 my $site = $self->{'site'};
1250 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1251
1252
1253 # look up additional args
1254 my $infodbtype = $self->{'infodbtype'};
1255 # To people who know $collect_tail please add some comments
1256 # Obtain path to the database
1257 my $collect_tail = $collect;
1258 $collect_tail =~ s/^.*[\/\\]//;
1259 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1260 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1261
1262
1263 my $json_str = $self->{'json'};
1264 my $doc_array = decode_json $json_str;
1265
1266
1267 my $global_status = 0;
1268 my $global_mess = "";
1269
1270 my @all_docids = ();
1271
1272
1273 foreach my $doc_array_rec ( @$doc_array ) {
1274
1275 my $status = -1;
1276 my $docid = $doc_array_rec->{'docid'};
1277
1278 push(@all_docids,$docid);
1279
1280 my $metaname = $doc_array_rec->{'metaname'};
1281 if(defined $metaname) {
1282 my $dbkey = "$docid.$metaname";
1283 my $metavalue = $doc_array_rec->{'metavalue'};
1284
1285 # Set the new value
1286 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1287 $status = system($cmd);
1288
1289 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1290 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1291 foreach my $metatable_rec ( @$metatable ) {
1292 $metaname = $metatable_rec->{'metaname'};
1293 my $dbkey = "$docid.$metaname";
1294
1295 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1296 foreach my $metavalue ( @$metavals ) {
1297 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
1298 $status = system($cmd);
1299 }
1300 }
1301
1302 }
1303
1304 if ($status != 0) {
1305 # Catch error if gdbmget failed
1306 $global_status = $status;
1307 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1308 $global_mess .= "Exit status: $status\n";
1309 $global_mess .= "System Error Message: $!\n";
1310 $global_mess .= "-" x 20;
1311 }
1312 }
1313
1314 if ($global_status != 0) {
1315 $global_mess .= "PATH: $ENV{'PATH'}\n";
1316 $gsdl_cgi->generate_error($global_mess);
1317 }
1318 else {
1319 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1320 $gsdl_cgi->generate_ok_message($mess);
1321 }
1322}
1323
1324sub set_live_metadata_array
1325{
1326 my $self = shift @_;
1327
1328 my $username = $self->{'username'};
1329 my $collect = $self->{'collect'};
1330 my $gsdl_cgi = $self->{'gsdl_cgi'};
1331
1332 if ($baseaction::authentication_enabled) {
1333 # Ensure the user is allowed to edit this collection
1334 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1335 }
1336
1337 my $site = $self->{'site'};
1338 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1339
1340 $gsdl_cgi->checked_chdir($collect_dir);
1341
1342 # Make sure the collection isn't locked by someone else
1343 $self->lock_collection($username, $collect);
1344
1345 $self->_set_live_metadata_array(@_);
1346
1347 # Release the lock once it is done
1348 $self->unlock_collection($username, $collect);
1349}
1350
1351
1352sub dxml_metadata
1353{
1354 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1355 my $metaname = $parser->{'parameters'}->{'metaname'};
1356 my $metamode = $parser->{'parameters'}->{'metamode'};
1357
1358 print STDERR "**** Processing closing </Metadata> tag\n";
1359
1360 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1361
1362 # Find the right metadata tag and checks if we are going to
1363 # override it
1364 #
1365 # Note: This over writes the first metadata block it
1366 # encountered. If there are multiple Sections in the doc.xml, it
1367 # might not behave as you would expect
1368
1369 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1370## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1371## print STDERR "**** metamode = $metamode\n";
1372
1373 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1374 {
1375 my $name_attr = $attrHash->{'name'};
1376 if (($name_attr eq $metaname) && ($metamode eq "override"))
1377 {
1378 if (!defined $parser->{'parameters'}->{'poscount'})
1379 {
1380 $parser->{'parameters'}->{'poscount'} = 0;
1381 }
1382 else
1383 {
1384 $parser->{'parameters'}->{'poscount'}++;
1385 }
1386
1387 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1388 {
1389 ##print STDERR "#### got match!!\n";
1390 # Get the value and override the current value
1391 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1392 $attrHash->{'_content'} = $metavalue;
1393
1394 # Don't want it to wipe out any other pieces of metadata
1395 $parser->{'parameters'}->{'metamode'} = "done";
1396 }
1397 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
1398 {
1399 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1400 $attrHash->{'_content'} = $metavalue;
1401 $parser->{'parameters'}->{'metamode'} = "done";
1402 }
1403 }
1404 }
1405
1406 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1407 return [$tagname => $attrHash];
1408}
1409
1410# This method exists purely for catching invalid section numbers that the client
1411# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1412# we've seen all the Sections in the doc.xml, and none of their section nums matched
1413# if the metamode has not been set to 'done' by then.
1414sub dxml_archive
1415{
1416 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1417 my $metamode = $parser->{'parameters'}->{'metamode'};
1418
1419 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1420 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1421
1422# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1423
1424 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
1425 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
1426
1427 my $metaname = $parser->{'parameters'}->{'metaname'};
1428 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1429
1430 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1431 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1432 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1433 print STDERR "\tin section $opt_doc_secnum.\n";
1434 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
1435 }
1436
1437 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1438 return [$tagname => $attrHash];
1439}
1440
1441sub dxml_description
1442{
1443 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1444 my $metamode = $parser->{'parameters'}->{'metamode'};
1445
1446 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1447 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
1448
1449 print STDERR "**** Processing a closing </Description> tag \n";
1450# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
1451
1452 # Accumulate the metadata
1453
1454 # We'll be accumulating metadata at this point if we haven't found and therefore
1455 # haven't processed the metadata yet.
1456 # For subsections, this means that if we're at a matching subsection, but haven't
1457 # found the correct metaname to override in that subsection, we accumulate it as new
1458 # meta in the subsection by adding it to the current description.
1459 # If there's no subsection info for the metadata, it will accumulate at the top level
1460 # section description if we hadn't found a matching metaname to override at this point.
1461
1462 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1463 # at the toplevel section. In the latter case, it means we want to process meta in the
1464 # toplevel section. So the eq check between the values below will work in all cases.
1465
1466 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1467 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1468 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1469
1470 if ($curr_secnum eq $opt_doc_secnum
1471 && ($metamode eq "accumulate" || $metamode eq "override")) {
1472 if ($metamode eq "override") {
1473 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1474 }
1475
1476 # If we get to here and metamode is override, this means there
1477 # was no existing value to overide => treat as an append operation
1478
1479 # Tack a new metadata tag on to the end of the <Metadata>+ block
1480 my $metaname = $parser->{'parameters'}->{'metaname'};
1481 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1482
1483 my $metadata_attr = {
1484 '_content' => $metavalue,
1485 'name' => $metaname,
1486 'mode' => "accumulate"
1487 };
1488
1489 my $append_metadata = [ "Metadata" => $metadata_attr ];
1490 my $description_content = $attrHash->{'_content'};
1491
1492 print "Appending metadata to doc.xml\n";
1493
1494 if (ref($description_content)) {
1495 # got some existing interesting nested content
1496 push(@$description_content, " ", $append_metadata ,"\n ");
1497 }
1498 else {
1499 #description_content is most likely a string such as "\n"
1500 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1501 }
1502
1503 $parser->{'parameters'}->{'metamode'} = "done";
1504 }
1505 else {
1506 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1507## print STDERR "**** NOT ACCUMULATE?!? \n";
1508 }
1509
1510 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1511 return [$tagname => $attrHash];
1512}
1513
1514
1515sub dxml_start_section
1516{
1517 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1518
1519 my $new_depth = scalar(@$contextArray);
1520
1521 print STDERR "**** START SECTION \n";
1522
1523 if ($new_depth == 1) {
1524 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1525 $parser->{'parameters'}->{'curr_section_num'} = "";
1526 }
1527
1528 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1529 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1530
1531 my $new_secnum;
1532
1533 if ($new_depth > $old_depth) {
1534 # child subsection
1535 $new_secnum = "$old_secnum.1";
1536 }
1537 elsif ($new_depth == $old_depth) {
1538 # sibling section => increase it's value by 1
1539 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1540 $tail_num++;
1541 $new_secnum = $old_secnum;
1542 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1543 }
1544 else {
1545 # back up to parent section => lopp off tail
1546 $new_secnum = $old_secnum;
1547 $new_secnum =~ s/\.\d+$//;
1548 }
1549
1550 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1551 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1552
1553 1;
1554}
1555
1556sub edit_xml_file
1557{
1558 my $self = shift @_;
1559 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
1560
1561 # use XML::Rules to add it in (read in and out again)
1562 my $parser = XML::Rules->new(start_rules => $start_rules,
1563 rules => $rules,
1564 style => 'filter',
1565 output_encoding => 'utf8' );
1566
1567 my $xml_in = "";
1568 if (!open(MIN,"<$filename")) {
1569 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1570 }
1571 else {
1572 # Read all the text in
1573 my $line;
1574 while (defined ($line=<MIN>)) {
1575 $xml_in .= $line;
1576 }
1577 close(MIN);
1578
1579 my $MOUT;
1580 if (!open($MOUT,">$filename")) {
1581 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1582 }
1583 else {
1584 # Matched lines will get handled by the call backs
1585## my $xml_out = "";
1586
1587 binmode($MOUT,":utf8");
1588 $parser->filter($xml_in,$MOUT, $options);
1589
1590# binmode(MOUT,":utf8");
1591# print MOUT $xml_out;
1592 close($MOUT);
1593 }
1594 }
1595
1596 # copy across any custom error information that was stored during parsing
1597 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
1598}
1599
1600sub edit_doc_xml
1601{
1602 my $self = shift @_;
1603 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
1604
1605 my $info_mess = <<RAWEND;
1606****************************
1607 edit_doc_xml()
1608****************************
1609doc_xml_filename = $doc_xml_filename
1610metaname = $metaname
1611metavalue = $metavalue
1612metapos = $metapos
1613metamode = $metamode
1614opt_secnum = $opt_secnum
1615prevmetavalue = $prevmetavalue
1616****************************
1617RAWEND
1618
1619 $gsdl_cgi->generate_message($info_mess);
1620
1621 # To monitor which section/subsection number we are in
1622 my @start_rules =
1623 ( 'Section' => \&dxml_start_section );
1624
1625 # use XML::Rules to add it in (read in and out again)
1626 # Set the call back functions
1627 my @rules =
1628 ( _default => 'raw',
1629 'Metadata' => \&dxml_metadata,
1630 'Description' => \&dxml_description,
1631 'Archive' => \&dxml_archive); # just for catching errors at end
1632
1633 # Sets the parameters
1634 my $options = { 'metaname' => $metaname,
1635 'metapos' => $metapos,
1636 'metavalue' => $metavalue,
1637 'metamode' => $metamode,
1638 'prevmetavalue' => $prevmetavalue };
1639
1640 if (defined $opt_secnum) {
1641 $options->{'secnum'} = $opt_secnum;
1642 }
1643
1644 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
1645}
1646
1647sub set_archives_metadata_entry
1648{
1649 my $self = shift @_;
1650 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1651
1652 my $info_mess = <<RAWEND;
1653****************************
1654 set_archives_metadata_entry()
1655****************************
1656archive_dir = $archive_dir
1657collect_dir = $collect_dir
1658collect = $collect
1659infodbtype = $infodbtype
1660docid = $docid
1661metaname = $metaname
1662metapos = $metapos
1663metavalue = $metavalue
1664metamode = $metamode
1665prevmetavalue = $prevmetavalue
1666****************************
1667RAWEND
1668
1669 $gsdl_cgi->generate_message($info_mess);
1670
1671 # Obtain the doc.xml path for the specified docID
1672 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1673
1674 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1675 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1676 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1677
1678 # The $doc_xml_file is relative to the archives, and now let's get the full path
1679 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1680 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
1681
1682 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
1683 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1684 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1685 if(!defined $metapos && $metamode eq "override") {
1686 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, $metapos, undef, $docid_secnum, $metamode);
1687 }
1688
1689 # Edit the doc.xml file with the specified metadata name, value and position.
1690 # TODO: there is a potential problem here as this edit_doc_xml function
1691 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1692 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1693
1694 # dxml_metadata method ignores metapos if metamode anything other than override
1695 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
1696 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
1697
1698 # return 0; # return 0 for now to indicate no error
1699 return (defined $self->{'error_msg'}) ? 1 : 0;
1700}
1701
1702
1703sub set_archives_metadata
1704{
1705 my $self = shift @_;
1706
1707 my $username = $self->{'username'};
1708 my $collect = $self->{'collect'};
1709 my $gsdl_cgi = $self->{'gsdl_cgi'};
1710
1711 if ($baseaction::authentication_enabled) {
1712 # Ensure the user is allowed to edit this collection
1713 $self->authenticate_user($username, $collect);
1714 }
1715
1716 # Make sure the collection isn't locked by someone else
1717 $self->lock_collection($username, $collect);
1718
1719 $self->_set_archives_metadata(@_);
1720
1721 # Release the lock once it is done
1722 $self->unlock_collection($username, $collect);
1723}
1724
1725sub _set_archives_metadata_array
1726{
1727 my $self = shift @_;
1728
1729 my $collect = $self->{'collect'};
1730 my $gsdl_cgi = $self->{'gsdl_cgi'};
1731 my $site = $self->{'site'};
1732 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1733
1734 # look up additional args
1735
1736 my $infodbtype = $self->{'infodbtype'};
1737
1738 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1739
1740 my $json_str = $self->{'json'};
1741 my $doc_array = decode_json $json_str;
1742
1743
1744 my $global_status = 0;
1745 my $global_mess = "";
1746
1747 my @all_docids = ();
1748
1749 foreach my $doc_array_rec ( @$doc_array ) {
1750 my $status = -1;
1751 my $docid = $doc_array_rec->{'docid'};
1752
1753 push(@all_docids,$docid);
1754
1755 my $metaname = $doc_array_rec->{'metaname'};
1756 if(defined $metaname) {
1757
1758 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1759
1760 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1761 my $metavalue = $doc_array_rec->{'metavalue'};
1762 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1763
1764
1765 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1766 # make "accumulate" the default (less destructive, as it won't actually
1767 # delete any existing values)
1768 $metamode = "accumulate";
1769 }
1770
1771 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1772 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1773 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1774 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1775
1776 foreach my $metatable_rec ( @$metatable ) {
1777 $metaname = $metatable_rec->{'metaname'};
1778 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1779 my $metapos = undef;
1780 my $prevmetavalue = undef;
1781 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1782
1783 foreach my $metavalue ( @$metavals ) {
1784 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1785 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1786
1787 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1788 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1789 # metavalue for this metaname will be lost
1790 $metamode = "accumulate";
1791 }
1792 }
1793 }
1794 }
1795
1796 if ($status != 0) {
1797 # Catch error if set infodb entry failed
1798 $global_status = $status;
1799 $global_mess .= "Failed to set metadata key: $docid\n";
1800 $global_mess .= "Exit status: $status\n";
1801 $global_mess .= "System Error Message: $!\n";
1802 $global_mess .= "-" x 20 . "\n";
1803 }
1804 }
1805
1806 if ($global_status != 0) {
1807 $global_mess .= "PATH: $ENV{'PATH'}\n";
1808 $gsdl_cgi->generate_error($global_mess);
1809 }
1810 else {
1811 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1812 $gsdl_cgi->generate_ok_message($mess);
1813 }
1814}
1815
1816sub set_archives_metadata_array
1817{
1818 my $self = shift @_;
1819
1820 my $username = $self->{'username'};
1821 my $collect = $self->{'collect'};
1822 my $gsdl_cgi = $self->{'gsdl_cgi'};
1823# my $gsdlhome = $self->{'gsdlhome'};
1824
1825 if ($baseaction::authentication_enabled) {
1826 # Ensure the user is allowed to edit this collection
1827 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1828 }
1829
1830 my $site = $self->{'site'};
1831 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1832
1833 $gsdl_cgi->checked_chdir($collect_dir);
1834
1835 # Obtain the collect dir
1836 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1837
1838 # Make sure the collection isn't locked by someone else
1839 $self->lock_collection($username, $collect);
1840
1841 $self->_set_archives_metadata_array(@_);
1842
1843 # Release the lock once it is done
1844 $self->unlock_collection($username, $collect);
1845}
1846
1847sub _remove_archives_metadata
1848{
1849 my $self = shift @_;
1850
1851 my $collect = $self->{'collect'};
1852 my $gsdl_cgi = $self->{'gsdl_cgi'};
1853# my $gsdlhome = $self->{'gsdlhome'};
1854 my $infodbtype = $self->{'infodbtype'};
1855
1856 my $site = $self->{'site'};
1857
1858 # Obtain the collect and archive dir
1859 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1860
1861 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1862
1863 # look up additional args
1864 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1865
1866 my $metaname = $self->{'metaname'};
1867 my $metapos = $self->{'metapos'};
1868
1869 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1870
1871 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1872 # default to deleting the first metavalue for the given metaname
1873 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1874 # seem to get deleted in one single remove_archives_meta action invocation.
1875 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1876 # deleted, both get deleted.
1877 if(!defined $metapos && !defined $metavalue) {
1878 $metapos = 0;
1879 }
1880
1881 my $metamode = $self->{'metamode'} || undef;
1882
1883 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1884 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1885
1886 # This now stores the full pathname
1887 my $doc_filename = $doc_rec->{'doc-file'}->[0];
1888
1889 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
1890# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
1891
1892 if ($status == 0)
1893 {
1894 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1895 $mess .= " $metaname";
1896 $mess .= "->[$metapos]" if (defined $metapos);
1897
1898 $gsdl_cgi->generate_ok_message($mess);
1899 }
1900 else
1901 {
1902 my $mess .= "Failed to remove archives metadata key: $docid\n";
1903 $mess .= "Exit status: $status\n";
1904 $mess .= "System Error Message: $!\n";
1905 $mess .= "-" x 20 . "\n";
1906
1907 $gsdl_cgi->generate_error($mess);
1908 }
1909
1910 #return $status; # in case calling functions have a use for this
1911}
1912
1913sub remove_archives_metadata
1914{
1915 my $self = shift @_;
1916
1917 my $username = $self->{'username'};
1918 my $collect = $self->{'collect'};
1919 my $gsdl_cgi = $self->{'gsdl_cgi'};
1920
1921 if ($baseaction::authentication_enabled)
1922 {
1923 # Ensure the user is allowed to edit this collection
1924 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
1925 }
1926
1927 # Make sure the collection isn't locked by someone else
1928 $self->lock_collection($username, $collect);
1929
1930 $self->_remove_archives_metadata(@_);
1931
1932 # Release the lock once it is done
1933 $self->unlock_collection($username, $collect);
1934}
1935
1936sub remove_from_doc_xml
1937{
1938 my $self = shift @_;
1939 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
1940
1941 my @start_rules = ('Section' => \&dxml_start_section);
1942
1943 # Set the call-back functions for the metadata tags
1944 my @rules =
1945 (
1946 _default => 'raw',
1947 'Metadata' => \&rfdxml_metadata
1948 );
1949
1950 my $parser = XML::Rules->new
1951 (
1952 start_rules => \@start_rules,
1953 rules => \@rules,
1954 style => 'filter',
1955 output_encoding => 'utf8',
1956# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1957 stripspaces => 2|0|0 # ineffectual
1958 );
1959
1960 my $status = 0;
1961 my $xml_in = "";
1962 if (!open(MIN,"<$doc_xml_filename"))
1963 {
1964 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1965 $status = 1;
1966 }
1967 else
1968 {
1969 # Read them in
1970 my $line;
1971 while (defined ($line=<MIN>)) {
1972 $xml_in .= $line;
1973 }
1974 close(MIN);
1975
1976 # Filter with the call-back functions
1977 my $xml_out = "";
1978
1979 my $MOUT;
1980 if (!open($MOUT,">$doc_xml_filename")) {
1981 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1982 $status = 1;
1983 }
1984 else {
1985 binmode($MOUT,":utf8");
1986 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
1987 close($MOUT);
1988 }
1989 }
1990 return $status;
1991}
1992
1993sub rfdxml_metadata
1994{
1995 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1996
1997 # For comparisons, toplevel section is indicated by ""
1998 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1999 my $secid = $parser->{'parameters'}->{'secid'} || "";
2000
2001 if (!($secid eq $curr_sec_num))
2002 {
2003 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2004 return [$tagname => $attrHash];
2005 }
2006
2007 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2008 {
2009 if (!defined $parser->{'parameters'}->{'poscount'})
2010 {
2011 $parser->{'parameters'}->{'poscount'} = 0;
2012 }
2013 else
2014 {
2015 $parser->{'parameters'}->{'poscount'}++;
2016 }
2017
2018 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
2019 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
2020 return [];
2021 }
2022 }
2023
2024 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2025 {
2026 return [];
2027 }
2028
2029 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
2030 {
2031 return [];
2032 }
2033
2034 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2035 return [$tagname => $attrHash];
2036}
2037
2038sub mxml_metadata
2039{
2040 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2041 my $metaname = $parser->{'parameters'}->{'metaname'};
2042 my $metamode = $parser->{'parameters'}->{'metamode'};
2043
2044 # Report error if we don't see FileName tag before this
2045 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2046
2047 # Don't do anything if we are not in the right FileSet
2048 my $file_regexp = $parser->{'parameters'}->{'current_file'};
2049 if ($file_regexp =~ /\.\*/) {
2050 # Only interested in a file_regexp if it specifies precisely one
2051 # file.
2052 # So, skip anything with a .* in it as it is too general
2053## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
2054 return [$tagname => $attrHash];
2055 }
2056 my $src_file = $parser->{'parameters'}->{'src_file'};
2057 if (!($src_file =~ /$file_regexp/)) {
2058 return [$tagname => $attrHash];
2059 }
2060## print STDERR "*** mxl metamode = $metamode\n";
2061
2062 # Find the right metadata tag and checks if we are going to override it
2063 my $name_attr = $attrHash->{'name'};
2064 if (($name_attr eq $metaname) && ($metamode eq "override")) {
2065
2066 # now metadata.xml functions need to keep track of metapos
2067 if (!defined $parser->{'parameters'}->{'poscount'})
2068 {
2069 $parser->{'parameters'}->{'poscount'} = 0;
2070 }
2071 else
2072 {
2073 $parser->{'parameters'}->{'poscount'}++;
2074 }
2075
2076 # If either the metapos or prevmetavalue is set,
2077 # get the value and override the current value
2078 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2079
2080 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2081 {
2082 $attrHash->{'_content'} = $metavalue;
2083
2084 ## print STDERR "**** overriding metadata.xml\n";
2085
2086 # Don't want it to wipe out any other pieces of metadata
2087 $parser->{'parameters'}->{'metamode'} = "done";
2088 }
2089 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2090 {
2091 $attrHash->{'_content'} = $metavalue;
2092 $parser->{'parameters'}->{'metamode'} = "done";
2093 }
2094 }
2095
2096 # mxml_description will process the metadata if metadata is accumulate,
2097 # or if we haven't found the metadata to override
2098
2099 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2100 return [$tagname => $attrHash];
2101}
2102
2103
2104sub mxml_description
2105{
2106 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2107 my $metamode = $parser->{'parameters'}->{'metamode'};
2108
2109 # Failed... Report error if we don't see FileName tag before this
2110 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
2111
2112 # Don't do anything if we are not in the right FileSet
2113 my $file_regexp = $parser->{'parameters'}->{'current_file'};
2114 if ($file_regexp =~ m/\.\*/) {
2115 # Only interested in a file_regexp if it specifies precisely one
2116 # file.
2117 # So, skip anything with a .* in it as it is too general
2118 return [$tagname => $attrHash];
2119 }
2120 my $src_file = $parser->{'parameters'}->{'src_file'};
2121
2122 if (!($src_file =~ m/$file_regexp/)) {
2123 return [$tagname => $attrHash];
2124 }
2125
2126 # Accumulate the metadata block to the end of the description block
2127 # Note: This adds metadata block to all description blocks, so if there are
2128 # multiple FileSets, it will add to all of them
2129 if (($metamode eq "accumulate") || ($metamode eq "override")) {
2130
2131 # if metamode was "override" but get to here then it failed to
2132 # find an item to override, in which case it should append its
2133 # value to the end, just like the "accumulate" mode
2134
2135 if ($metamode eq "override") {
2136 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2137 }
2138
2139 # tack a new metadata tag on to the end of the <Metadata>+ block
2140 my $metaname = $parser->{'parameters'}->{'metaname'};
2141 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2142
2143 my $metadata_attr = { '_content' => $metavalue,
2144 'name' => $metaname,
2145 'mode' => "accumulate" };
2146
2147 my $append_metadata = [ "Metadata" => $metadata_attr ];
2148 my $description_content = $attrHash->{'_content'};
2149
2150## print STDERR "*** appending to metadata.xml\n";
2151
2152 # append the new metadata element to the end of the current
2153 # content contained inside this tag
2154 if (ref($description_content) eq "") {
2155 # => string or numeric literal
2156 # this is caused by a <Description> block has no <Metadata> child elements
2157 # => set up an empty array in '_content'
2158 $attrHash->{'_content'} = [ "\n" ];
2159 $description_content = $attrHash->{'_content'};
2160 }
2161
2162 push(@$description_content," ", $append_metadata ,"\n ");
2163 $parser->{'parameters'}->{'metamode'} = "done";
2164 }
2165
2166 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2167 return [$tagname => $attrHash];
2168}
2169
2170
2171sub mxml_filename
2172{
2173 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2174
2175 # Store the filename of the Current Fileset
2176 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2177 # FileName tag must come before Description tag
2178 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2179
2180 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2181 return [$tagname => $attrHash];
2182}
2183
2184
2185sub mxml_fileset
2186{
2187 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2188
2189 # Initilise the current_file
2190 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2191 # FileName tag must come before Description tag
2192 $parser->{'parameters'}->{'current_file'} = "";
2193
2194 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2195 return [$tagname => $attrHash];
2196}
2197
2198sub mxml_directorymetadata
2199{
2200 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2201
2202 # if we haven't processed the metadata when we reach the end of metadata.xml
2203 # it's because there's no particular FileSet element whose FileName matched
2204 # In which case, add a new FileSet for this FileName
2205 my $metamode = $parser->{'parameters'}->{'metamode'};
2206 if($metamode ne "done") {
2207
2208 if ($metamode eq "override") {
2209 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2210 }
2211
2212 # If we get to here and metamode is override, this means there
2213 # was no existing value to overide => treat as an append operation
2214
2215 # Create a new FileSet element and append to DirectoryMetadata
2216 # <FileSet>
2217 # <FileName>src_file</FileName>
2218 # <Description>
2219 # <Metadata mode="" name="">metavalue</Metadata>
2220 # </Description>
2221 # </FileSet>
2222 my $src_file = $parser->{'parameters'}->{'src_file'};
2223 my $metaname = $parser->{'parameters'}->{'metaname'};
2224 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2225 my $metadata_attr = {
2226 '_content' => $metavalue,
2227 'name' => $metaname,
2228 'mode' => "accumulate"
2229 };
2230 my $append_metadata = [ "Metadata" => $metadata_attr ];
2231 my $description_attr->{'_content'} = [ "\n\t\t ", $append_metadata, "\n\t\t"];
2232 my $description_element = [ "Description" => $description_attr ];
2233
2234 #_content is not an attribute, it's special and holds the children of this element
2235 # including the textnode value embedded in this element if any.
2236 my $filename_attr = {'_content' => $src_file};
2237 my $filename_element = [ "FileName" => $filename_attr ];
2238
2239 my $fileset_attr = {};
2240 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2241 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2242
2243
2244 # get children of dirmeta, and push the new FileSet element onto it
2245 print "Appending metadata to metadata.xml\n";
2246 my $dirmeta_content = $attrHash->{'_content'};
2247 if (ref($dirmeta_content)) {
2248 # got some existing interesting nested content
2249 #push(@$dirmeta_content, " ", $fileset ,"\n ");
2250 push(@$dirmeta_content, "\t", $fileset ,"\n");
2251 }
2252 else {
2253 #description_content is most likely a string such as "\n"
2254 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
2255 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2256 }
2257
2258 $parser->{'parameters'}->{'metamode'} = "done";
2259 }
2260 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2261 return [$tagname => $attrHash];
2262}
2263
2264
2265sub edit_metadata_xml
2266{
2267 my $self = shift @_;
2268 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
2269
2270 # Set the call-back functions for the metadata tags
2271 my @rules =
2272 ( _default => 'raw',
2273 'FileName' => \&mxml_filename,
2274 'Metadata' => \&mxml_metadata,
2275 'Description' => \&mxml_description,
2276 'FileSet' => \&mxml_fileset,
2277 'DirectoryMetadata' => \&mxml_directorymetadata);
2278
2279 # use XML::Rules to add it in (read in and out again)
2280 my $parser = XML::Rules->new(rules => \@rules,
2281 style => 'filter',
2282 output_encoding => 'utf8',
2283 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2284
2285 if (!-e $metadata_xml_filename) {
2286
2287 if (open(MOUT,">$metadata_xml_filename")) {
2288
2289 my $src_file_re = &util::filename_to_regex($src_file);
2290 # shouldn't the following also be in the above utility routine??
2291 # $src_file_re =~ s/\./\\./g;
2292
2293 print MOUT "<?xml version=\"1.0\"?>\n";
2294 print MOUT "<DirectoryMetadata>\n";
2295 print MOUT " <FileSet>\n";
2296 print MOUT " <FileName>$src_file_re</FileName>\n";
2297 print MOUT " <Description>\n";
2298 print MOUT " </Description>\n";
2299 print MOUT " </FileSet>\n";
2300 print MOUT "</DirectoryMetadata>\n";
2301
2302 close(MOUT);
2303 }
2304 else {
2305 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2306 }
2307 }
2308
2309
2310 my $xml_in = "";
2311 if (!open(MIN,"<$metadata_xml_filename")) {
2312 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2313 }
2314 else {
2315 # Read them in
2316 my $line;
2317 while (defined ($line=<MIN>)) {
2318 $xml_in .= $line;
2319 }
2320 close(MIN);
2321
2322 # Filter with the call-back functions
2323 my $xml_out = "";
2324
2325 my $MOUT;
2326 if (!open($MOUT,">$metadata_xml_filename")) {
2327 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2328 }
2329 else {
2330 binmode($MOUT,":utf8");
2331
2332 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2333 # At the moment, I will just hack it!
2334 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2335 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2336 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2337 #print MOUT $xml_out;
2338
2339 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
2340 metapos => $metapos,
2341 metavalue => $metavalue,
2342 metamode => $metamode,
2343 src_file => $src_file,
2344 prevmetavalue => $prevmetavalue,
2345 current_file => undef} );
2346 close($MOUT);
2347 }
2348 }
2349}
2350
2351
2352sub set_import_metadata
2353{
2354 my $self = shift @_;
2355
2356 my $username = $self->{'username'};
2357 my $collect = $self->{'collect'};
2358 my $gsdl_cgi = $self->{'gsdl_cgi'};
2359
2360 if ($baseaction::authentication_enabled) {
2361 # Ensure the user is allowed to edit this collection
2362 $self->authenticate_user($username, $collect);
2363 }
2364
2365 # Make sure the collection isn't locked by someone else
2366 $self->lock_collection($username, $collect);
2367
2368 $self->_set_import_metadata(@_);
2369
2370 # Release the lock once it is done
2371 $self->unlock_collection($username, $collect);
2372
2373}
2374
2375sub set_import_metadata_array
2376{
2377 my $self = shift @_;
2378
2379 my $username = $self->{'username'};
2380 my $collect = $self->{'collect'};
2381 my $gsdl_cgi = $self->{'gsdl_cgi'};
2382# my $gsdlhome = $self->{'gsdlhome'};
2383
2384 if ($baseaction::authentication_enabled) {
2385 # Ensure the user is allowed to edit this collection
2386 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2387 }
2388
2389 my $site = $self->{'site'};
2390 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2391
2392 $gsdl_cgi->checked_chdir($collect_dir);
2393
2394 # Make sure the collection isn't locked by someone else
2395 $self->lock_collection($username, $collect);
2396
2397 $self->_set_import_metadata_array(@_);
2398
2399 # Release the lock once it is done
2400 $self->unlock_collection($username, $collect);
2401
2402}
2403
2404
2405sub _set_import_metadata_array
2406{
2407 my $self = shift @_;
2408
2409 my $collect = $self->{'collect'};
2410 my $gsdl_cgi = $self->{'gsdl_cgi'};
2411
2412 my $site = $self->{'site'};
2413 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2414
2415 # look up additional args
2416
2417 my $infodbtype = $self->{'infodbtype'};
2418
2419 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2420 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2421
2422 my $json_str = $self->{'json'};
2423 my $doc_array = decode_json $json_str;
2424
2425 my $global_status = 0;
2426 my $global_mess = "";
2427
2428 my @all_docids = ();
2429
2430 foreach my $doc_array_rec ( @$doc_array )
2431 {
2432 my $status = -1;
2433 my $docid = $doc_array_rec->{'docid'};
2434
2435 my ($docid_root,$docid_secnum);
2436 if(defined $docid) {
2437 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2438 # as yet no support for setting subsection metadata in metadata.xml
2439 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2440 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2441 next; # skip this docid in for loop
2442 }
2443 }
2444
2445 push(@all_docids,$docid); # docid_root rather
2446
2447 my $metaname = $doc_array_rec->{'metaname'};
2448 if (defined $metaname) {
2449 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2450 my $metavalue = $doc_array_rec->{'metavalue'};
2451 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2452
2453 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2454 # make "accumulate" the default (less destructive, as won't actually
2455 # delete any existing values)
2456 $metamode = "accumulate";
2457 }
2458
2459 # adding metapos and prevmetavalue support to import_metadata subroutines
2460 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2461 my $prevmetavalue = $self->{'prevmetavalue'};
2462
2463 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2464
2465 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2466 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2467
2468 foreach my $metatable_rec ( @$metatable ) {
2469 $metaname = $metatable_rec->{'metaname'};
2470 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2471 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2472 # make "accumulate" the default (less destructive, as won't actually
2473 # delete any existing values)
2474 $metamode = "accumulate";
2475 }
2476
2477 # No support for metapos and prevmetavalue in the JSON metatable substructure
2478 my $metapos = undef;
2479 my $prevmetavalue = undef;
2480 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2481
2482 foreach my $metavalue ( @$metavals ) {
2483 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2484
2485 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
2486 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2487 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2488 # metavalue for this metaname will be lost
2489 $metamode = "accumulate";
2490 }
2491 }
2492 }
2493 }
2494 }
2495
2496 # always a success message
2497 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2498 $gsdl_cgi->generate_ok_message($mess);
2499}
2500
2501# always returns true (1)
2502sub set_import_metadata_entry
2503{
2504 my $self = shift @_;
2505 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
2506
2507 my $info_mess = <<RAWEND;
2508****************************
2509 set_import_metadata_entry()
2510****************************
2511collect = $collect
2512collect_dir = $collect_dir
2513infodbtype = $infodbtype
2514arcinfo_doc_filename = $arcinfo_doc_filename
2515docid = $docid
2516metaname = $metaname
2517metapos = $metapos
2518metavalue = $metavalue
2519metamode = $metamode
2520prevmetavalue = $prevmetavalue
2521****************************
2522RAWEND
2523
2524 $gsdl_cgi->generate_message($info_mess);
2525
2526 # import works with metadata.xml which can have inherited metadata
2527 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2528 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2529 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2530 # what they're doing if they provide a metapos.
2531 if(defined $metapos) {
2532 print STDERR "@@@@ WARNING: metapos defined.\n";
2533 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2534 }
2535
2536 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2537 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2538 my $metadata_xml_file;
2539 my $import_filename = undef;
2540
2541 if (defined $docid) {
2542 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2543 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2544
2545 # This now stores the full pathname
2546 $import_filename = $doc_rec->{'src-file'}->[0];
2547 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2548 # as the array version of the method doesn't support the -f parameter yet
2549 my $import_file = $self->{'f'};
2550 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2551 }
2552
2553 # figure out correct metadata.xml file [?]
2554 # Assuming the metadata.xml file is next to the source file
2555 # Note: This will not work if it is using the inherited metadata from the parent folder
2556 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2557 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2558
2559 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
2560 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2561 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2562 if(!defined $metapos && $metamode eq "override") {
2563## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2564 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2565
2566 }
2567
2568 # Edit the metadata.xml
2569 # Modified by Jeffrey from DL Consulting
2570 # Handle the case where there is one metadata.xml file for multiple FileSets
2571 # The XML filter needs to know whether it is in the right FileSet
2572 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2573 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2574 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2575 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2576 #return 0;
2577 return $metadata_xml_filename;
2578}
2579
2580sub _remove_import_metadata
2581{
2582 my $self = shift @_;
2583
2584 my $collect = $self->{'collect'};
2585 my $gsdl_cgi = $self->{'gsdl_cgi'};
2586# my $gsdlhome = $self->{'gsdlhome'};
2587 my $infodbtype = $self->{'infodbtype'};
2588
2589 # Obtain the collect dir
2590 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2591 my $site = $self->{'site'};
2592 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2593
2594 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2595 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2596
2597 # look up additional args
2598 my $docid = $self->{'d'};
2599 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2600 {
2601 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
2602 }
2603
2604 my $metaname = $self->{'metaname'};
2605 my $metapos = $self->{'metapos'};
2606 my $metavalue = $self->{'metavalue'};
2607 if(defined $metavalue) {
2608 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2609 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2610 $metapos = 0;
2611 }
2612 my $metamode = $self->{'metamode'} || undef;
2613
2614 # import works with metadata.xml which can have inherited metadata
2615 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2616 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2617 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2618 # what they're doing if they provide a metapos.
2619 if(defined $metapos) {
2620 print STDERR "@@@@ WARNING: metapos defined.\n";
2621 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2622 }
2623
2624 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2625 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2626 my $metadata_xml_file;
2627 my $import_filename = undef;
2628 if (defined $docid)
2629 {
2630 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2631 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2632
2633 # This now stores the full pathname
2634 $import_filename = $doc_rec->{'src-file'}->[0];
2635 }
2636
2637 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2638 {
2639 $gsdl_cgi->generate_error("There is no metadata\n");
2640 }
2641
2642 # figure out correct metadata.xml file [?]
2643 # Assuming the metadata.xml file is next to the source file
2644 # Note: This will not work if it is using the inherited metadata from the parent folder
2645 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2646 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2647
2648 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
2649
2650 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2651 $mess .= " $metaname";
2652 $mess .= " = $metavalue\n";
2653
2654 $gsdl_cgi->generate_ok_message($mess);
2655
2656 #return $status; # in case calling functions have a use for this
2657}
2658
2659sub remove_import_metadata
2660{
2661 my $self = shift @_;
2662
2663 my $username = $self->{'username'};
2664 my $collect = $self->{'collect'};
2665 my $gsdl_cgi = $self->{'gsdl_cgi'};
2666
2667 if ($baseaction::authentication_enabled) {
2668 # Ensure the user is allowed to edit this collection
2669 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2670 }
2671
2672 # Make sure the collection isn't locked by someone else
2673 $self->lock_collection($username, $collect);
2674
2675 $self->_remove_import_metadata(@_);
2676
2677 # Release the lock once it is done
2678 $self->unlock_collection($username, $collect);
2679
2680}
2681
2682sub remove_from_metadata_xml
2683{
2684 my $self = shift @_;
2685 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2686 # metamode generally has no meaning for removing meta, but is used by set_meta
2687 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2688
2689 # Set the call-back functions for the metadata tags
2690 my @rules =
2691 (
2692 _default => 'raw',
2693 'Metadata' => \&rfmxml_metadata,
2694 'FileName' => \&mxml_filename
2695 );
2696
2697 my $parser = XML::Rules->new
2698 (
2699 rules => \@rules,
2700 style => 'filter',
2701 output_encoding => 'utf8',
2702 #normalisespaces => 1,
2703 stripspaces => 2|0|0 # ineffectual
2704 );
2705
2706 my $xml_in = "";
2707 if (!open(MIN,"<$metadata_xml_filename"))
2708 {
2709 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2710 }
2711 else
2712 {
2713 # Read them in
2714 my $line;
2715 while (defined ($line=<MIN>)) {
2716 $xml_in .= $line;
2717 }
2718 close(MIN);
2719
2720 # Filter with the call-back functions
2721 my $xml_out = "";
2722
2723 my $MOUT;
2724 if (!open($MOUT,">$metadata_xml_filename")) {
2725 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2726 }
2727 else {
2728 binmode($MOUT,":utf8");
2729 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
2730 close($MOUT);
2731 }
2732 }
2733}
2734
2735sub rfmxml_metadata
2736{
2737 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2738
2739 # metadata.xml does not handle subsections
2740
2741 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2742 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2743 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2744 {
2745 if (!defined $parser->{'parameters'}->{'poscount'})
2746 {
2747 $parser->{'parameters'}->{'poscount'} = 0;
2748 }
2749 else
2750 {
2751 $parser->{'parameters'}->{'poscount'}++;
2752 }
2753
2754 # if overriding but no metapos, then clear all the meta for this metaname
2755 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
2756 return [];
2757 }
2758
2759 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2760 {
2761 return [];
2762 }
2763
2764 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2765 {
2766 return [];
2767 }
2768 }
2769
2770 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2771 return [$tagname => $attrHash];
2772}
2773
2774sub _remove_live_metadata
2775{
2776 my $self = shift @_;
2777
2778 my $collect = $self->{'collect'};
2779 my $gsdl_cgi = $self->{'gsdl_cgi'};
2780# my $gsdlhome = $self->{'gsdlhome'};
2781 my $infodbtype = $self->{'infodbtype'};
2782
2783 # Obtain the collect dir
2784 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2785 my $site = $self->{'site'};
2786 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2787
2788
2789 # look up additional args
2790 my $docid = $self->{'d'};
2791 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2792 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2793 }
2794
2795 # Generate the dbkey
2796 my $metaname = $self->{'metaname'};
2797 my $dbkey = "$docid.$metaname";
2798
2799 # To people who know $collect_tail please add some comments
2800 # Obtain the live gdbm_db path
2801 my $collect_tail = $collect;
2802 $collect_tail =~ s/^.*[\/\\]//;
2803 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2804 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
2805
2806 # Remove the key
2807 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
2808 my $status = system($cmd);
2809 if ($status != 0) {
2810 # Catch error if gdbmdel failed
2811 my $mess = "Failed to set metadata key: $dbkey\n";
2812
2813 $mess .= "PATH: $ENV{'PATH'}\n";
2814 $mess .= "cmd = $cmd\n";
2815 $mess .= "Exit status: $status\n";
2816 $mess .= "System Error Message: $!\n";
2817
2818 $gsdl_cgi->generate_error($mess);
2819 }
2820 else {
2821 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2822 }
2823
2824}
2825
2826sub remove_live_metadata
2827{
2828 my $self = shift @_;
2829
2830 my $username = $self->{'username'};
2831 my $collect = $self->{'collect'};
2832 my $gsdl_cgi = $self->{'gsdl_cgi'};
2833 my $gsdlhome = $self->{'gsdlhome'};
2834
2835 if ($baseaction::authentication_enabled) {
2836 # Ensure the user is allowed to edit this collection
2837 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2838 }
2839
2840 # Make sure the collection isn't locked by someone else
2841 $self->lock_collection($username, $collect);
2842
2843 $self->_remove_live_metadata(@_);
2844
2845 $self->unlock_collection($username, $collect);
2846}
2847
2848sub remove_metadata
2849{
2850 my $self = shift @_;
2851
2852 my $where = $self->{'where'};
2853 if(!$where) {
2854 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2855 return;
2856 }
2857
2858 my $username = $self->{'username'};
2859 my $collect = $self->{'collect'};
2860 my $gsdl_cgi = $self->{'gsdl_cgi'};
2861
2862 if ($baseaction::authentication_enabled) {
2863 # Ensure the user is allowed to edit this collection
2864 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2865 }
2866
2867 # Make sure the collection isn't locked by someone else
2868 $self->lock_collection($username, $collect);
2869
2870 # check which directories need to be processed, specified in $where as
2871 # any combination of import|archives|index|live
2872 if($where =~ m/import/) {
2873 $self->_remove_import_metadata(@_);
2874 }
2875 if($where =~ m/archives/) {
2876 $self->_remove_archives_metadata(@_);
2877 }
2878 if($where =~ m/index/) {
2879 $self->_remove_index_metadata(@_);
2880 }
2881
2882 # Release the lock once it is done
2883 $self->unlock_collection($username, $collect);
2884}
2885
2886# the internal version, without authentication
2887sub _remove_index_metadata
2888{
2889 my $self = shift @_;
2890
2891 my $collect = $self->{'collect'};
2892 my $gsdl_cgi = $self->{'gsdl_cgi'};
2893# my $gsdlhome = $self->{'gsdlhome'};
2894 my $infodbtype = $self->{'infodbtype'};
2895
2896 # Obtain the collect dir
2897 my $site = $self->{'site'};
2898 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2899 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2900
2901 # look up additional args
2902 my $docid = $self->{'d'};
2903 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2904 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2905 }
2906 my $metaname = $self->{'metaname'};
2907 my $metapos = $self->{'metapos'};
2908 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2909
2910 # To people who know $collect_tail please add some comments
2911 # Obtain the path to the database
2912 my $collect_tail = $collect;
2913 $collect_tail =~ s/^.*[\/\\]//;
2914 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
2915 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
2916
2917 # Read the docid entry
2918 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
2919
2920 # Check to make sure the key does exist
2921 if (!defined ($doc_rec->{$metaname})) {
2922 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2923 }
2924
2925 # Obtain the specified metadata pos
2926 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2927 if(!defined $metapos && !defined $metavalue) {
2928 $metapos = 0;
2929 }
2930
2931
2932 # consider check key is defined before deleting?
2933 # Loop through the metadata array and ignore the specified position
2934 my $filtered_metadata = [];
2935 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
2936 for (my $i=0; $i<$num_metadata_vals; $i++) {
2937 my $metaval = shift(@{$doc_rec->{$metaname}});
2938
2939 if (!defined $metavalue && $i != $metapos) {
2940 push(@$filtered_metadata,$metaval);
2941 }
2942
2943 if(defined $metavalue && !($metavalue eq $metaval))
2944 {
2945 push(@$filtered_metadata,$metaval);
2946 }
2947 }
2948 $doc_rec->{$metaname} = $filtered_metadata;
2949
2950 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2951 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2952
2953 if ($status != 0) {
2954 my $mess = "Failed to set metadata key: $docid\n";
2955
2956 $mess .= "PATH: $ENV{'PATH'}\n";
2957 $mess .= "Exit status: $status\n";
2958 $mess .= "System Error Message: $!\n";
2959
2960 $gsdl_cgi->generate_error($mess);
2961 }
2962 else {
2963 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2964 $mess .= " $metaname";
2965 $mess .= "->[$metapos]" if (defined $metapos);
2966
2967 $gsdl_cgi->generate_ok_message($mess);
2968 }
2969
2970 #return $status; # in case calling functions have a use for this
2971}
2972
2973sub remove_index_metadata
2974{
2975 my $self = shift @_;
2976
2977 my $username = $self->{'username'};
2978 my $collect = $self->{'collect'};
2979 my $gsdl_cgi = $self->{'gsdl_cgi'};
2980# my $gsdlhome = $self->{'gsdlhome'};
2981
2982 if ($baseaction::authentication_enabled) {
2983 # Ensure the user is allowed to edit this collection
2984 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
2985 }
2986
2987 # Obtain the collect dir
2988 my $site = $self->{'site'};
2989 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2990 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2991
2992 # Make sure the collection isn't locked by someone else
2993 $self->lock_collection($username, $collect);
2994
2995 $self->_remove_index_metadata(@_);
2996
2997 # Release the lock once it is done
2998 $self->unlock_collection($username, $collect);
2999}
3000
3001
3002# Was trying to reused the codes, but the functions need to be broken
3003# down more before they can be reused, otherwise there will be too
3004# much overhead and duplicate process...
3005sub insert_metadata
3006{
3007 my $self = shift @_;
3008
3009 my $username = $self->{'username'};
3010 my $collect = $self->{'collect'};
3011 my $gsdl_cgi = $self->{'gsdl_cgi'};
3012 my $gsdlhome = $self->{'gsdlhome'};
3013 my $infodbtype = $self->{'infodbtype'};
3014
3015 # If the import metadata and gdbm database have been updated, we
3016 # need to insert some notification to warn user that the the text
3017 # they see at the moment is not indexed and require a rebuild.
3018 my $rebuild_pending_macro = "_rebuildpendingmessage_";
3019
3020 if ($baseaction::authentication_enabled) {
3021 # Ensure the user is allowed to edit this collection
3022 $self->authenticate_user($username, $collect);
3023 }
3024
3025 # Obtain the collect and archive dir
3026 my $site = $self->{'site'};
3027 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3028 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3029 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3030
3031 # Make sure the collection isn't locked by someone else
3032 $self->lock_collection($username, $collect);
3033
3034 # Check additional args
3035 my $docid = $self->{'d'};
3036 if (!defined($docid)) {
3037 $gsdl_cgi->generate_error("No document id is specified: d=...");
3038 }
3039 my $metaname = $self->{'metaname'};
3040 if (!defined($metaname)) {
3041 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3042 }
3043 my $metavalue = $self->{'metavalue'};
3044 if (!defined($metavalue) || $metavalue eq "") {
3045 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3046 }
3047 # make "accumulate" the default (less destructive, as won't actually
3048 # delete any existing values)
3049 my $metamode = "accumulate";
3050
3051 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3052 my $metapos = undef;
3053 my $prevmetavalue = undef;
3054
3055 #=======================================================================#
3056 # set_import_metadata [START]
3057 #=======================================================================#
3058 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3059 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3060 my $metadata_xml_file;
3061 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3062 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
3063
3064 # This now stores the full pathname
3065 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3066
3067 # figure out correct metadata.xml file [?]
3068 # Assuming the metadata.xml file is next to the source file
3069 # Note: This will not work if it is using the inherited metadata from the parent folder
3070 my ($import_tailname, $import_dirname)
3071 = File::Basename::fileparse($import_filename);
3072 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3073
3074 # Shane's escape characters
3075 $metavalue = pack "U0C*", unpack "C*", $metavalue;
3076 $metavalue =~ s/\,/&#44;/g;
3077 $metavalue =~ s/\:/&#58;/g;
3078 $metavalue =~ s/\|/&#124;/g;
3079 $metavalue =~ s/\(/&#40;/g;
3080 $metavalue =~ s/\)/&#41;/g;
3081 $metavalue =~ s/\[/&#91;/g;
3082 $metavalue =~ s/\\/&#92;/g;
3083 $metavalue =~ s/\]/&#93;/g;
3084 $metavalue =~ s/\{/&#123;/g;
3085 $metavalue =~ s/\}/&#125;/g;
3086 $metavalue =~ s/\"/&#34;/g;
3087 $metavalue =~ s/\`/&#96;/g;
3088 $metavalue =~ s/\n/_newline_/g;
3089
3090 # Edit the metadata.xml
3091 # Modified by Jeffrey from DL Consulting
3092 # Handle the case where there is one metadata.xml file for multiple FileSets
3093 # The XML filter needs to know whether it is in the right FileSet
3094 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3095 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
3096 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3097 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
3098 #=======================================================================#
3099 # set_import_metadata [END]
3100 #=======================================================================#
3101
3102
3103 #=======================================================================#
3104 # set_metadata (accumulate version) [START]
3105 #=======================================================================#
3106 # To people who know $collect_tail please add some comments
3107 # Obtain path to the database
3108 my $collect_tail = $collect;
3109 $collect_tail =~ s/^.*[\/\\]//;
3110 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
3111 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
3112
3113 # Read the docid entry
3114 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
3115
3116 # Protect the quotes
3117 $metavalue =~ s/\"/\\\"/g;
3118
3119 # Adds the pending macro
3120 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3121
3122 # If the metadata doesn't exist, create a new one
3123 if (!defined($doc_rec->{$metaname})){
3124 $doc_rec->{$metaname} = [ $macro_metavalue ];
3125 }
3126 # Else, let's acculumate the values
3127 else {
3128 push(@{$doc_rec->{$metaname}},$macro_metavalue);
3129 }
3130
3131 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3132 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3133
3134 if ($status != 0) {
3135 # Catch error if gdbmget failed
3136 my $mess = "Failed to set metadata key: $docid\n";
3137
3138 $mess .= "PATH: $ENV{'PATH'}\n";
3139 $mess .= "Exit status: $status\n";
3140 $mess .= "System Error Message: $!\n";
3141
3142 $gsdl_cgi->generate_error($mess);
3143 }
3144 else {
3145 my $mess = "insert-metadata successful: Key[$docid]\n";
3146 $mess .= " [In metadata.xml] $metaname";
3147 $mess .= " = $metavalue\n";
3148 $mess .= " [In database] $metaname";
3149 $mess .= " = $macro_metavalue\n";
3150 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3151 $gsdl_cgi->generate_ok_message($mess);
3152 }
3153 #=======================================================================#
3154 # set_metadata (accumulate version) [END]
3155 #=======================================================================#
3156
3157 # Release the lock once it is done
3158 $self->unlock_collection($username, $collect);
3159}
3160
31611;
Note: See TracBrowser for help on using the repository browser.