source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 27314

Last change on this file since 27314 was 27314, checked in by ak19, 11 years ago

No authentication and collection-locking when getting metadata. (Only for setting metadata.)

File size: 104.7 KB
RevLine 
[19293]1###########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
[27157]10# This program is free software; you can redistr te it and/or modify
[19293]11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
[21551]32use dbutil;
[19499]33use ghtml;
[19293]34
[24071]35use JSON;
[21563]36
[24071]37
[19293]38BEGIN {
[22331]39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
[19293]40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
[25097]46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
[27176]50 'optional-args' => [ "metapos" ] },
[19499]51
[25097]52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
[27157]56 "get-index-metadata" => {
[25097]57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
[19499]59
[27157]60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
[25097]64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
[19499]67
[27312]68 "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69 'compulsory-args' => [ "json" ],
70 'optional-args' => [ "where" ],
71 'help-string' => [
72 'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73 ]},
74
[25097]75 #SET METHODS
76 "set-live-metadata" => {
77 'compulsory-args' => [ "d", "metaname", "metavalue" ],
78 'optional-args' => [ ] },
[19499]79
[27157]80 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81 'compulsory-args' => [ "metaname", "metavalue" ],
82 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84 "set-index-metadata" => {
[25097]85 'compulsory-args' => [ "d", "metaname", "metavalue" ],
[27168]86 'optional-args' => [ "metapos", "metamode" ] },
[19499]87
[25097]88 "set-archives-metadata" => {
89 'compulsory-args' => [ "d", "metaname", "metavalue" ],
90 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91
92 "set-import-metadata" => {
93 'compulsory-args' => [ "metaname", "metavalue" ],
[27176]94 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
[25097]95
96 #SET METHODS (ARRAY)
97 "set-metadata-array" => {
[27168]98 'compulsory-args' => [ "where", "json" ],
[27261]99 'optional-args' => [ ],
100 'help-string' => [
101 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102
103 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
[24943]110
[25097]111 "set-archives-metadata-array" => {
112 'compulsory-args' => [ "json" ],
113 'optional-args' => [ ] },
114
115 "set-import-metadata-array" => {
116 'compulsory-args' => [ "json" ],
117 'optional-args' => [ ] },
[27168]118
119 "set-index-metadata-array" => {
120 'compulsory-args' => [ "json" ],
121 'optional-args' => [ ] },
122
123 "set-live-metadata-array" => {
124 'compulsory-args' => [ "json" ],
125 'optional-args' => [ ] },
[25097]126
127 #REMOVE METHODS
128 "remove-import-metadata" => {
129 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
[27176]130 'optional-args' => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
[25097]131
132 "remove-archives-metadata" => {
133 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
134 'optional-args' => [ "metapos", "metavalue" ] },
[20538]135
[25097]136 "remove-live-metadata" => {
137 'compulsory-args' => [ "d", "metaname" ],
138 'optional-args' => [ ] },
[20538]139
[27157]140 "remove-index-metadata" => {
[25097]141 'compulsory-args' => [ "d", "metaname" ],
142 'optional-args' => [ "metapos", "metavalue" ] },
[21716]143
[27157]144 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
145 'compulsory-args' => [ "d", "metaname" ],
146 'optional-args' => [ "where", "metapos", "metavalue" ] },
147
[25097]148 #INSERT METHODS
149 "insert-metadata" => {
150 'compulsory-args' => [ "d", "metaname", "metavalue" ],
151 'optional-args' => [ ] }
[19293]152};
153
154
155sub new
156{
157 my $class = shift (@_);
158 my ($gsdl_cgi,$iis6_mode) = @_;
159
[23761]160 # Treat metavalue specially. To transmit this through a GET request
161 # the Javascript side has url-encoded it, so here we need to decode
162 # it before proceeding
163
164 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
165 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
166 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
167
168 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
169 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
170
[19293]171 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
172
173 return bless $self, $class;
174}
175
176
177sub get_live_metadata
178{
179 my $self = shift @_;
180
181 my $username = $self->{'username'};
182 my $collect = $self->{'collect'};
183 my $gsdl_cgi = $self->{'gsdl_cgi'};
184 my $gsdlhome = $self->{'gsdlhome'};
[23478]185 my $infodbtype = $self->{'infodbtype'};
[27180]186
[23447]187 # live metadata gets/saves value scoped (prefixed) by the current usename
[23761]188 # so (for now) let's not bother to enforce authentication
[21715]189
190 # Obtain the collect dir
[23766]191 my $site = $self->{'site'};
192 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
193 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]194
[27314]195 # No locking collection when getting metadata, only when setting it
196# $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
[19293]197
198 # look up additional args
199 my $docid = $self->{'d'};
200 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[21715]201 $gsdl_cgi->generate_error("No docid (d=...) specified.");
[19293]202 }
203
[21715]204 # Generate the dbkey
[19293]205 my $metaname = $self->{'metaname'};
206 my $dbkey = "$docid.$metaname";
207
[21715]208 # To people who know $collect_tail please add some comments
209 # Obtain path to the database
[19293]210 my $collect_tail = $collect;
211 $collect_tail =~ s/^.*[\/|\\]//;
[21564]212 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]213 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[21715]214
215 # Obtain the content of the key
[21569]216 my $cmd = "gdbmget $infodb_file_path $dbkey";
[19293]217 if (open(GIN,"$cmd |") == 0) {
[21715]218 # Catch error if gdbmget failed
[19293]219 my $mess = "Failed to get metadata key: $metaname\n";
220 $mess .= "$!\n";
221
222 $gsdl_cgi->generate_error($mess);
223 }
224 else {
[23761]225 binmode(GIN,":utf8");
[21715]226 # Read everything in and concatenate them into $metavalue
[19293]227 my $metavalue = "";
228 my $line;
229 while (defined ($line=<GIN>)) {
230 $metavalue .= $line;
231 }
232 close(GIN);
[21715]233 chomp($metavalue); # Get rid off the tailing newlines
[19293]234 $gsdl_cgi->generate_ok_message("$metavalue");
235 }
[21715]236
237 # Release the lock once it is done
[27314]238# $self->unlock_collection($username, $collect);
[19499]239}
[19293]240
[27157]241# just calls the index version
[19499]242sub get_metadata
243{
244 my $self = shift @_;
[27157]245 $self->get_index_metadata(@_);
246}
[19499]247
[27312]248# JSON version that will get the requested metadata values
249# from the requested source (index, import, archives or live)
250# One of the params is a JSON string and the return value is JSON too
251# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
252sub get_metadata_array
253{
254 my $self = shift @_;
255
256 my $where = $self->{'where'};
257 if (!$where) {
258 $where = "index"; # default behaviour is to get the values from index
259 }
260
261 # Only when setting metadata do we perform authentication and do we lock the collection
262 # not when getting metadata
263
264 # for get_meta_array, the where param can only be ONE of import, archives, index, live
265 if($where =~ m/index/) {
266 $self->_get_index_metadata_array(@_);
267 }
268 # still to be implemented for archives, import and live
269}
270
271# takes a JSON string and returns a JSON string
[27313]272# resulting string is of the form:
273# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
[27312]274sub _get_index_metadata_array
275{
276 my $self = shift @_;
277
278 my $collect = $self->{'collect'};
279 my $gsdl_cgi = $self->{'gsdl_cgi'};
280 my $site = $self->{'site'};
281 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
282
283 # look up additional args
284 my $infodbtype = $self->{'infodbtype'};
285
286 # To people who know $collect_tail please add some comments
287 # Obtain the path to the database
288 my $collect_tail = $collect;
289 $collect_tail =~ s/^.*[\/\\]//;
290 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
291 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
292
293 my $json_str = $self->{'json'};
294 my $doc_array = decode_json $json_str;
295
296 my $json_result_str = "[";
297 my $first_doc_rec = 1;
298
299 foreach my $doc_array_rec ( @$doc_array ) {
300
301 my $docid = $doc_array_rec->{'docid'};
302
303 if($first_doc_rec) {
304 $first_doc_rec = 0;
305 } else {
306 $json_result_str .= ",";
307 }
[27313]308 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
[27312]309
310 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
[27313]311 $json_result_str = $json_result_str . ",\"metatable\":[";
[27312]312
313 my $first_rec = 1;
314 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
315 if($first_rec) {
316 $first_rec = 0;
317 } else {
318 $json_result_str .= ",";
319 }
320
321 my $metaname = $metatable_rec->{'metaname'};
[27313]322 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
[27312]323
324 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
325 if(!defined $metapos) {
[27313]326 $metapos = 0;
[27312]327 }
328
329 # Read the docid entry
330 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
331
332 # Basically loop through and unescape_html the values
333 foreach my $k (keys %$doc_rec) {
334 my @escaped_v = ();
335 foreach my $v (@{$doc_rec->{$k}}) {
336 my $ev = &ghtml::unescape_html($v);
337 push(@escaped_v, $ev);
338 }
339 $doc_rec->{$k} = \@escaped_v;
340 }
341
342 # Obtain the specified metadata value(s)
343 my $metavalue;
344
345 if($metapos ne "all") { # get the value at a single metapos
346
347 $metavalue = $doc_rec->{$metaname}->[$metapos];
[27313]348 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
[27312]349
350 } else {
351 my $first_metaval = 1;
352 $metapos = 0;
353 $metavalue = $doc_rec->{$metaname}->[$metapos];
354
355 while (defined $metavalue) {
356 if($first_metaval) {
357 $first_metaval = 0;
358 } else {
359 $json_result_str .= ",";
360 }
361
[27313]362 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
[27312]363
364 $metapos++;
365 $metavalue = $doc_rec->{$metaname}->[$metapos];
366 }
367 }
368
369 $json_result_str .= "]}"; # close metavals array and metatable record
370 }
371
372 $json_result_str .= "]}"; # close metatable array and docid record
373 }
374
375 $json_result_str .= "]"; # close array of docids
376
377 $gsdl_cgi->generate_ok_message($json_result_str);
378}
379
380
[27157]381sub get_index_metadata
382{
383 my $self = shift @_;
384
[19499]385 my $username = $self->{'username'};
386 my $collect = $self->{'collect'};
387 my $gsdl_cgi = $self->{'gsdl_cgi'};
388 my $gsdlhome = $self->{'gsdlhome'};
389
[21715]390 # Obtain the collect dir
[23766]391 my $site = $self->{'site'};
392 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
393 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]394
395 # look up additional args
396 my $docid = $self->{'d'};
397 my $metaname = $self->{'metaname'};
398 my $metapos = $self->{'metapos'};
[23400]399 my $infodbtype = $self->{'infodbtype'};
[19499]400
[21715]401 # To people who know $collect_tail please add some comments
402 # Obtain path to the database
[19499]403 my $collect_tail = $collect;
404 $collect_tail =~ s/^.*[\/\\]//;
[21564]405 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]406 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]407
408 # Read the docid entry
[23400]409 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
410
[21715]411 # Basically loop through and unescape_html the values
[19499]412 foreach my $k (keys %$doc_rec) {
413 my @escaped_v = ();
414 foreach my $v (@{$doc_rec->{$k}}) {
415 my $ev = &ghtml::unescape_html($v);
416 push(@escaped_v, $ev);
417 }
418 $doc_rec->{$k} = \@escaped_v;
419 }
420
[21715]421 # Obtain the specified metadata value
[19499]422 $metapos = 0 if (!defined $metapos);
423 my $metavalue = $doc_rec->{$metaname}->[$metapos];
424 $gsdl_cgi->generate_ok_message("$metavalue");
[21715]425
[19293]426}
427
428
[25097]429sub get_import_metadata
430{
431 my $self = shift @_;
432
433 my $username = $self->{'username'};
434 my $collect = $self->{'collect'};
435 my $gsdl_cgi = $self->{'gsdl_cgi'};
436 my $gsdlhome = $self->{'gsdlhome'};
437
438 # Obtain the collect dir
439 my $site = $self->{'site'};
440 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
441 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
442
443 # look up additional args
444 my $docid = $self->{'d'};
445 my $metaname = $self->{'metaname'};
[27176]446 my $metapos = $self->{'metapos'};
447 $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
448
[25097]449 my $infodbtype = $self->{'infodbtype'};
[27176]450 if (!defined $docid)
[25097]451 {
[27173]452 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
[25097]453 }
454
455 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
456 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
457 my $metadata_xml_file;
458 my $import_filename = undef;
459
460
461 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
462 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
463 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
464
465 # This now stores the full pathname
466 $import_filename = $doc_rec->{'src-file'}->[0];
467
468 # figure out correct metadata.xml file [?]
469 # Assuming the metadata.xml file is next to the source file
470 # Note: This will not work if it is using the inherited metadata from the parent folder
471 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
472 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
473
[27176]474 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
[25097]475
476}
477
478sub get_metadata_from_metadata_xml
479{
480 my $self = shift @_;
[27176]481 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
[25097]482
483 my @rules =
484 (
485 _default => 'raw',
486 'Metadata' => \&gfmxml_metadata,
487 'FileName' => \&mxml_filename
488 );
489
490 my $parser = XML::Rules->new
491 (
492 rules => \@rules,
493 output_encoding => 'utf8'
494 );
495
496 my $xml_in = "";
497 if (!open(MIN,"<$metadata_xml_filename"))
498 {
499 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
500 }
501 else
502 {
503 # Read them in
504 my $line;
505 while (defined ($line=<MIN>)) {
506 $xml_in .= $line;
507 }
508 close(MIN);
509
[27176]510 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
[25097]511
512 if(defined $parser->{'pad'}->{'metavalue'})
513 {
514 return $parser->{'pad'}->{'metavalue'};
515 }
516 else
517 {
518 return "";
519 }
520 }
521}
522
523sub gfmxml_metadata
524{
525 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
526
[27176]527 # no subsection support yet in metadata.xml
528
529 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
[25097]530 {
[27176]531 if (!defined $parser->{'parameters'}->{'poscount'})
532 {
533 $parser->{'parameters'}->{'poscount'} = 0;
534 }
535 else
536 {
537 $parser->{'parameters'}->{'poscount'}++;
538 }
539
540 # gets the first value by default, since metapos defaults to 0
541 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
542 {
543 if($parser->{'parameters'}->{'metapos'} > 0) {
544 print STDERR "@@@@ WARNING: non-zero metapos.\n";
545 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at".$parser->{'parameters'}->{'metapos'}.".\n";
546 }
547 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
548 }
[25097]549 }
550}
551
552sub get_archives_metadata
553{
554 my $self = shift @_;
555
556 my $username = $self->{'username'};
557 my $collect = $self->{'collect'};
558 my $gsdl_cgi = $self->{'gsdl_cgi'};
559 my $gsdlhome = $self->{'gsdlhome'};
560 my $infodbtype = $self->{'infodbtype'};
561
562 # Obtain the collect dir
563 my $site = $self->{'site'};
564 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
565
566 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
567
568 # look up additional args
569 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
570 $docid_secnum = "" if (!defined $docid_secnum);
571
572 my $metaname = $self->{'metaname'};
573 my $metapos = $self->{'metapos'};
574 $metapos = 0 if (!defined $metapos);
575
576 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
577 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
578
579 # This now stores the full pathname
580 my $doc_filename = $doc_rec->{'doc-file'}->[0];
581
582 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
583
584}
585
586sub get_metadata_from_archive_xml
587{
588 my $self = shift @_;
589 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
590
591 my @start_rules = ('Section' => \&dxml_start_section);
592
593 my @rules =
594 (
595 _default => 'raw',
596 'Metadata' => \&gfdxml_metadata
597 );
598
599 my $parser = XML::Rules->new
600 (
601 start_rules => \@start_rules,
602 rules => \@rules,
603 output_encoding => 'utf8'
604 );
605
606 my $xml_in = "";
607 if (!open(MIN,"<$doc_xml_filename"))
608 {
609 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
610 }
611 else
612 {
613 # Read them in
614 my $line;
615 while (defined ($line=<MIN>)) {
616 $xml_in .= $line;
617 }
618 close(MIN);
619
620 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
621
622 if(defined $parser->{'pad'}->{'metavalue'})
623 {
624 return $parser->{'pad'}->{'metavalue'};
625 }
626 else
627 {
628 return "";
629 }
630 }
631}
632
633sub gfdxml_metadata
634{
635 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
636
637 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
638 {
639 return;
640 }
641
642 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
643 {
644 if (!defined $parser->{'parameters'}->{'poscount'})
645 {
646 $parser->{'parameters'}->{'poscount'} = 0;
647 }
648 else
649 {
650 $parser->{'parameters'}->{'poscount'}++;
651 }
652 }
653
654 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
655 {
656 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
657 }
658}
659
[27157]660sub _set_live_metadata
[19293]661{
662 my $self = shift @_;
663
664 my $collect = $self->{'collect'};
665 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27157]666 #my $gsdlhome = $self->{'gsdlhome'};
[23400]667 my $infodbtype = $self->{'infodbtype'};
[19293]668
[21715]669 # Obtain the collect dir
[27157]670 my $site = $self->{'site'};
671 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[23766]672 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]673
674
675 # look up additional args
676 my $docid = $self->{'d'};
[21715]677 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[27168]678 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
[21715]679 }
[19293]680 my $metavalue = $self->{'metavalue'};
681
[21715]682 # Generate the dbkey
683 my $metaname = $self->{'metaname'};
[19293]684 my $dbkey = "$docid.$metaname";
685
[21715]686 # To people who know $collect_tail please add some comments
687 # Obtain path to the database
[19293]688 my $collect_tail = $collect;
689 $collect_tail =~ s/^.*[\/\\]//;
[21564]690 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]691 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19293]692
[21715]693 # Set the new value
[27176]694 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
[19293]695 my $status = system($cmd);
696 if ($status != 0) {
[21715]697 # Catch error if gdbmget failed
[19293]698 my $mess = "Failed to set metadata key: $dbkey\n";
[21715]699
[19293]700 $mess .= "PATH: $ENV{'PATH'}\n";
701 $mess .= "cmd = $cmd\n";
702 $mess .= "Exit status: $status\n";
703 $mess .= "System Error Message: $!\n";
704
[19499]705 $gsdl_cgi->generate_error($mess);
[19293]706 }
707 else {
[19499]708 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
[19293]709 }
[27157]710
711 #return $status; # in case calling functions have any further use for this
712}
713
714sub set_live_metadata
715{
716 my $self = shift @_;
717
718 my $username = $self->{'username'};
719 my $collect = $self->{'collect'};
720 my $gsdl_cgi = $self->{'gsdl_cgi'};
721
722 if ($baseaction::authentication_enabled) {
[27295]723 # Ensure the user is allowed to edit this collection
724 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]725 }
726
727 # Make sure the collection isn't locked by someone else
728 $self->lock_collection($username, $collect);
729
730 $self->_set_live_metadata(@_);
731
[21715]732 # Release the lock once it is done
733 $self->unlock_collection($username, $collect);
[19293]734}
735
[27168]736sub set_index_metadata_entry
[24071]737{
[27168]738 my $self = shift @_;
739 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode) = @_;
740
[24071]741 # To people who know $collect_tail please add some comments
742 # Obtain path to the database
743 my $collect_tail = $collect;
744 $collect_tail =~ s/^.*[\/\\]//;
745 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
746 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
747
748# print STDERR "**** infodb file path = $infodb_file_path\n";
749# print STDERR "***** infodb type = $infodbtype\n";
[27168]750
[24071]751 # Read the docid entry
752 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
[27168]753
[24071]754 # Set the metadata value
755 if (defined $metapos) {
[27168]756 # if metamode=accumulate AND metapos, warn user and then use metapos
757 if (defined $metamode && $metamode eq "accumulate") {
758 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
759 print STDERR "**** Proceeding by using metapos\n";
760 }
761 $doc_rec->{$metaname}->[$metapos] = $metavalue;
[24071]762 }
[27176]763 elsif (defined $metamode && $metamode eq "override") {
764 $doc_rec->{$metaname} = [ $metavalue ];
765 }
766 else { # default for index was to override, but because accumulate is less destructive,
767 # and because accumulate is the default for archives and import, that's the new default for index too
[27168]768 if(defined $doc_rec->{$metaname}) {
769 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
770 } else {
771 $doc_rec->{$metaname} = [ $metavalue ];
772 }
[24071]773 }
774
775 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
776
[27168]777 return $status;
[24071]778
779}
[19293]780
[27157]781sub _set_import_metadata
[19499]782{
783 my $self = shift @_;
[19293]784
[19499]785 my $collect = $self->{'collect'};
786 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27157]787 my $infodbtype = $self->{'infodbtype'};
788# my $gsdlhome = $self->{'gsdlhome'};
789
790 # Obtain the collect and archive dir
791 my $site = $self->{'site'};
792 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
793 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
794 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
795
796 # look up additional args
797 # want either d= or f=
798 my $docid = $self->{'d'};
[27173]799 my ($docid_root,$docid_secnum);
800 if(defined $docid) {
801 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
802 # as yet no support for setting subsection metadata in metadata.xml
803 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
[27176]804 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
[27173]805 return;
806 }
807 }
808
[27157]809 my $import_file = $self->{'f'};
810 if ((!defined $docid) && (!defined $import_file)) {
811 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
812 }
[19293]813
[27157]814 # Get the parameters and set default mode to "accumulate"
815 my $metaname = $self->{'metaname'};
816 my $metavalue = $self->{'metavalue'};
817## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
818 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
819
820 my $metamode = $self->{'metamode'};
821 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
822 # make "accumulate" the default (less destructive, as won't actually
823 # delete any existing values)
824 $metamode = "accumulate";
[19499]825 }
826
[27176]827 # adding metapos and prevmetavalue support to import_metadata subroutines
828 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
829 my $prevmetavalue = $self->{'prevmetavalue'};
[27157]830
[27176]831 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
832 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
[27157]833
834 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
835 $mess .= " $metaname";
836 $mess .= " = $metavalue";
837 $mess .= " ($metamode)\n";
838
839 $gsdl_cgi->generate_ok_message($mess);
840
841 #return $status; # in case calling functions have any further use for this
842}
843
844# the version of set_index_meta that doesn't do authentication
845sub _set_archives_metadata
846{
847 my $self = shift @_;
848
849 my $collect = $self->{'collect'};
850 my $gsdl_cgi = $self->{'gsdl_cgi'};
851 my $infodbtype = $self->{'infodbtype'};
852
853 # Obtain the collect and archive dir
854 my $site = $self->{'site'};
855 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
856 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
857
858 # look up additional args
859 my $docid = $self->{'d'};
860 my $metaname = $self->{'metaname'};
861 my $metavalue = $self->{'metavalue'};
862 my $prevmetavalue = $self->{'prevmetavalue'};
863
[27176]864 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
865 # Don't append "|| undef", since if metapos=0 it will then be set to undef
[27157]866
867 my $metamode = $self->{'metamode'};
868 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
869 # make "accumulate" the default (less destructive, as won't actually
870 # delete any existing values)
871 $metamode = "accumulate";
[27167]872 }
[27168]873
[27157]874 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
875 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
876
877 if ($status == 0) {
878 my $mess = "set-archives-metadata successful: Key[$docid]\n";
879 $mess .= " $metaname";
880 $mess .= "->[$metapos]" if (defined $metapos);
881 $mess .= " = $metavalue";
882 $mess .= " ($metamode)\n";
883
884 $gsdl_cgi->generate_ok_message($mess);
885 }
886 else {
887 my $mess .= "Failed to set archives metadata key: $docid\n";
888 $mess .= "Exit status: $status\n";
889 if(defined $self->{'error_msg'}) {
890 $mess .= "Error Message: $self->{'error_msg'}\n";
891 } else {
892 $mess .= "System Error Message: $!\n";
893 }
894 $mess .= "-" x 20 . "\n";
895
896 $gsdl_cgi->generate_error($mess);
897 }
[23766]898
[27157]899 #return $status; # in case calling functions have any further use for this
900}
[19499]901
902
[27157]903# the version of set_index_meta that doesn't do authentication
904sub _set_index_metadata
905{
906 my $self = shift @_;
907
908 my $collect = $self->{'collect'};
909 my $gsdl_cgi = $self->{'gsdl_cgi'};
910
911 my $site = $self->{'site'};
912 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
913
[19499]914 # look up additional args
915 my $docid = $self->{'d'};
916 my $metaname = $self->{'metaname'};
[27176]917 my $metapos = $self->{'metapos'}; # undef has meaning
[19499]918 my $metavalue = $self->{'metavalue'};
[23761]919 my $infodbtype = $self->{'infodbtype'};
[27168]920 my $metamode = $self->{'metamode'};
921
922 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
[23400]923
[19499]924 if ($status != 0) {
[23761]925 # Catch error if set infodb entry failed
926 my $mess = "Failed to set metadata key: $docid\n";
[19499]927
[23761]928 $mess .= "PATH: $ENV{'PATH'}\n";
929 $mess .= "Exit status: $status\n";
930 $mess .= "System Error Message: $!\n";
931
932 $gsdl_cgi->generate_error($mess);
[19499]933 }
934 else {
[27157]935 my $mess = "set-index-metadata successful: Key[$docid]\n";
[23761]936 $mess .= " $metaname";
937 $mess .= "->[$metapos]" if (defined $metapos);
[27157]938 $mess .= " = $metavalue\n";
[23761]939
940 $gsdl_cgi->generate_ok_message($mess);
[19499]941 }
[27157]942
943 #return $status; # in case calling functions have any further use for this
944}
945
946sub set_index_metadata
947{
948 my $self = shift @_;
949
950 my $username = $self->{'username'};
951 my $collect = $self->{'collect'};
952 my $gsdl_cgi = $self->{'gsdl_cgi'};
953 #my $gsdlhome = $self->{'gsdlhome'};
954
955 if ($baseaction::authentication_enabled) {
[27295]956 # Ensure the user is allowed to edit this collection
957 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]958 }
959
960 my $site = $self->{'site'};
961 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
962
963 $gsdl_cgi->checked_chdir($collect_dir);
964
965 # Obtain the collect dir
966 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
967
968 # Make sure the collection isn't locked by someone else
969 $self->lock_collection($username, $collect);
970
971 $self->_set_index_metadata(@_);
[21715]972
973 # Release the lock once it is done
974 $self->unlock_collection($username, $collect);
[19499]975}
976
[27157]977# call this to set the metadata for a combination of dirs archives, import or index, or live
978# if none specified, defaults to index which was the original behaviour of set_metadata.
979sub set_metadata
980{
981 my $self = shift @_;
[19499]982
[27157]983 # Testing that not defining a variable, setting it to "" or to " " all return false
984 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
985
986 my $where = $self->{'where'};
987 if(!$where) {
988 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
989 return;
990 }
991
992 # authenticate and lock collection once, even if processing multiple dirs
993 my $username = $self->{'username'};
994 my $collect = $self->{'collect'};
995 my $gsdl_cgi = $self->{'gsdl_cgi'};
996
997 if ($baseaction::authentication_enabled) {
[27295]998 # Ensure the user is allowed to edit this collection
999 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]1000 }
1001
1002 if($where =~ m/index/) {
1003 my $site = $self->{'site'};
1004 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1005 $gsdl_cgi->checked_chdir($collect_dir);
1006 }
1007
1008 # Make sure the collection isn't locked by someone else
1009 $self->lock_collection($username, $collect);
1010
1011
1012 # now at last can set the metadata. $where can specify multiple
1013 # $where is of the form: import|archives|index, or a subset thereof
1014
1015 #my @whichdirs = split('\|', $where);
1016
1017 # just check whether $where contains import/archives/index/live in turn, and
1018 # for each case, process it accordingly
1019 if($where =~ m/import/) {
1020 $self->_set_import_metadata(@_);
1021 }
1022
1023 if($where =~ m/archives/) {
1024
1025 # look up docID arg which is optional to set_metadata because it's optional
1026 # to set_import, but which is compulsory to set_archives_metadata
1027 my $docid = $self->{'d'};
1028 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[27168]1029 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
[27157]1030 }
[27168]1031 # we have a docid, so can set archives meta
1032 $self->_set_archives_metadata(@_);
[27157]1033 }
1034
1035 if($where =~ m/index/) {
1036
1037 # look up docID arg which is optional to set_metadata because it's optional
1038 # to set_import, but which is compulsory to set_archives_metadata
1039 my $docid = $self->{'d'};
1040 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1041 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1042 }
[27168]1043 # we have a docid, so can set index meta
1044 $self->_set_index_metadata(@_);
[27157]1045 }
1046
1047 if($where =~ m/live/) {
1048 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1049 }
1050
1051 # Release the lock once it is done
1052 $self->unlock_collection($username, $collect);
1053}
1054
[24071]1055sub set_metadata_array
1056{
1057 my $self = shift @_;
1058
[27168]1059 my $where = $self->{'where'};
1060 if(!$where) {
1061 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1062 return;
1063 }
1064
[24071]1065 my $username = $self->{'username'};
1066 my $collect = $self->{'collect'};
1067 my $gsdl_cgi = $self->{'gsdl_cgi'};
1068
1069 if ($baseaction::authentication_enabled) {
1070 # Ensure the user is allowed to edit this collection
[27295]1071 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[24071]1072 }
1073
[27168]1074 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1075 # But including the stmt during this code reorganisation to preserve as-is what used to happen
1076 my $site = $self->{'site'};
1077 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1078 $gsdl_cgi->checked_chdir($collect_dir);
1079
1080 # Make sure the collection isn't locked by someone else
1081 $self->lock_collection($username, $collect);
1082
1083 if($where =~ m/import/) {
1084 $self->_set_import_metadata_array(@_);
1085 }
1086 if($where =~ m/archives/) {
1087 $self->_set_archives_metadata_array(@_);
1088 }
1089 if($where =~ m/index/) {
1090 $self->_set_index_metadata_array(@_);
1091 }
1092 if($where =~ m/live/) {
1093 $self->_set_live_metadata_array(@_);
1094 }
1095
1096 # Release the lock once it is done
1097 $self->unlock_collection($username, $collect);
1098}
1099
1100sub _set_index_metadata_array
1101{
1102 my $self = shift @_;
1103
1104 my $collect = $self->{'collect'};
1105 my $gsdl_cgi = $self->{'gsdl_cgi'};
1106 my $site = $self->{'site'};
1107 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1108
1109
1110 # look up additional args
1111
1112 my $infodbtype = $self->{'infodbtype'};
1113
1114 my $json_str = $self->{'json'};
1115 my $doc_array = decode_json $json_str;
1116
1117
1118 my $global_status = 0;
1119 my $global_mess = "";
1120
1121 my @all_docids = ();
1122
1123 foreach my $doc_array_rec ( @$doc_array ) {
1124
1125 my $status = -1;
1126 my $docid = $doc_array_rec->{'docid'};
1127
1128 push(@all_docids,$docid);
1129
1130 my $metaname = $doc_array_rec->{'metaname'};
1131 if(defined $metaname) {
[27176]1132 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
[27168]1133 my $metavalue = $doc_array_rec->{'metavalue'};
1134 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1135
1136 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1137 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1138 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1139
1140 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1141 $metaname = $metatable_rec->{'metaname'};
1142 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1143 my $metapos = undef;
1144 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1145
1146 foreach my $metavalue ( @$metavals ) { # metavals is an array
1147 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1148 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1149 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1150 # metavalue for this metaname will be lost
1151 $metamode = "accumulate";
1152 }
1153 }
1154 }
1155 }
1156
1157 if ($status != 0) {
1158 # Catch error if set infodb entry failed
1159 $global_status = $status;
1160 $global_mess .= "Failed to set metadata key: $docid\n";
1161 $global_mess .= "Exit status: $status\n";
1162 $global_mess .= "System Error Message: $!\n";
1163 $global_mess .= "-" x 20;
1164 }
1165 }
1166
1167 if ($global_status != 0) {
1168 $global_mess .= "PATH: $ENV{'PATH'}\n";
1169 $gsdl_cgi->generate_error($global_mess);
1170 }
1171 else {
1172 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1173 $gsdl_cgi->generate_ok_message($mess);
1174 }
1175}
1176
1177sub set_index_metadata_array
1178{
1179 my $self = shift @_;
1180
1181 my $username = $self->{'username'};
1182 my $collect = $self->{'collect'};
1183 my $gsdl_cgi = $self->{'gsdl_cgi'};
1184# my $gsdlhome = $self->{'gsdlhome'};
1185
1186 if ($baseaction::authentication_enabled) {
[27295]1187 # Ensure the user is allowed to edit this collection
1188 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27168]1189 }
1190
[24071]1191 my $site = $self->{'site'};
1192 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1193
1194 $gsdl_cgi->checked_chdir($collect_dir);
1195
1196 # Obtain the collect dir
1197 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1198
1199 # Make sure the collection isn't locked by someone else
1200 $self->lock_collection($username, $collect);
1201
[27168]1202 $self->_set_index_metadata_array(@_);
1203
1204 # Release the lock once it is done
1205 $self->unlock_collection($username, $collect);
1206}
1207
1208# experimental, newly added in and untested
1209sub _set_live_metadata_array
1210{
1211 my $self = shift @_;
1212
1213 my $collect = $self->{'collect'};
1214 my $gsdl_cgi = $self->{'gsdl_cgi'};
1215
1216 my $site = $self->{'site'};
1217 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1218
1219
[24071]1220 # look up additional args
[27168]1221 my $infodbtype = $self->{'infodbtype'};
1222 # To people who know $collect_tail please add some comments
1223 # Obtain path to the database
1224 my $collect_tail = $collect;
1225 $collect_tail =~ s/^.*[\/\\]//;
1226 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1227 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1228
1229
1230 my $json_str = $self->{'json'};
1231 my $doc_array = decode_json $json_str;
1232
1233
1234 my $global_status = 0;
1235 my $global_mess = "";
1236
1237 my @all_docids = ();
1238
1239
1240 foreach my $doc_array_rec ( @$doc_array ) {
[24071]1241
[27168]1242 my $status = -1;
1243 my $docid = $doc_array_rec->{'docid'};
1244
1245 push(@all_docids,$docid);
1246
1247 my $metaname = $doc_array_rec->{'metaname'};
1248 if(defined $metaname) {
1249 my $dbkey = "$docid.$metaname";
1250 my $metavalue = $doc_array_rec->{'metavalue'};
1251
1252 # Set the new value
[27176]1253 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
[27168]1254 $status = system($cmd);
1255
1256 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1257 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1258 foreach my $metatable_rec ( @$metatable ) {
1259 $metaname = $metatable_rec->{'metaname'};
1260 my $dbkey = "$docid.$metaname";
1261
1262 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1263 foreach my $metavalue ( @$metavals ) {
[27176]1264 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
[27168]1265 $status = system($cmd);
[24071]1266 }
[27168]1267 }
1268
[24071]1269 }
1270
[27168]1271 if ($status != 0) {
1272 # Catch error if gdbmget failed
1273 $global_status = $status;
1274 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1275 $global_mess .= "Exit status: $status\n";
1276 $global_mess .= "System Error Message: $!\n";
1277 $global_mess .= "-" x 20;
1278 }
[24071]1279 }
[27168]1280
1281 if ($global_status != 0) {
1282 $global_mess .= "PATH: $ENV{'PATH'}\n";
1283 $gsdl_cgi->generate_error($global_mess);
1284 }
[24071]1285 else {
[27168]1286 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1287 $gsdl_cgi->generate_ok_message($mess);
[24071]1288 }
[27168]1289}
1290
1291sub set_live_metadata_array
1292{
1293 my $self = shift @_;
1294
1295 my $username = $self->{'username'};
1296 my $collect = $self->{'collect'};
1297 my $gsdl_cgi = $self->{'gsdl_cgi'};
1298
1299 if ($baseaction::authentication_enabled) {
[27295]1300 # Ensure the user is allowed to edit this collection
1301 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27168]1302 }
1303
1304 my $site = $self->{'site'};
1305 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1306
1307 $gsdl_cgi->checked_chdir($collect_dir);
1308
1309 # Make sure the collection isn't locked by someone else
1310 $self->lock_collection($username, $collect);
1311
1312 $self->_set_live_metadata_array(@_);
1313
[24071]1314 # Release the lock once it is done
1315 $self->unlock_collection($username, $collect);
1316}
1317
1318
[20538]1319sub dxml_metadata
1320{
1321 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1322 my $metaname = $parser->{'parameters'}->{'metaname'};
1323 my $metamode = $parser->{'parameters'}->{'metamode'};
[25891]1324
[27007]1325 print STDERR "**** Processing closing </Metadata> tag\n";
[23761]1326
1327 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1328
1329 # Find the right metadata tag and checks if we are going to
1330 # override it
1331 #
1332 # Note: This over writes the first metadata block it
1333 # encountered. If there are multiple Sections in the doc.xml, it
1334 # might not behave as you would expect
[20538]1335
[23761]1336 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1337## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1338## print STDERR "**** metamode = $metamode\n";
1339
[25097]1340 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1341 {
1342 my $name_attr = $attrHash->{'name'};
1343 if (($name_attr eq $metaname) && ($metamode eq "override"))
1344 {
1345 if (!defined $parser->{'parameters'}->{'poscount'})
1346 {
1347 $parser->{'parameters'}->{'poscount'} = 0;
1348 }
1349 else
1350 {
1351 $parser->{'parameters'}->{'poscount'}++;
1352 }
1353
1354 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1355 {
[27111]1356 ##print STDERR "#### got match!!\n";
[25097]1357 # Get the value and override the current value
1358 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1359 $attrHash->{'_content'} = $metavalue;
1360
1361 # Don't want it to wipe out any other pieces of metadata
1362 $parser->{'parameters'}->{'metamode'} = "done";
1363 }
[25891]1364 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
[25097]1365 {
[25102]1366 my $metavalue = $parser->{'parameters'}->{'metavalue'};
[25097]1367 $attrHash->{'_content'} = $metavalue;
1368 $parser->{'parameters'}->{'metamode'} = "done";
1369 }
1370 }
[20538]1371 }
1372
[21716]1373 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1374 return [$tagname => $attrHash];
[20538]1375}
1376
[27111]1377# This method exists purely for catching invalid section numbers that the client
1378# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1379# we've seen all the Sections in the doc.xml, and none of their section nums matched
1380# if the metamode has not been set to 'done' by then.
1381sub dxml_archive
1382{
1383 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1384 my $metamode = $parser->{'parameters'}->{'metamode'};
1385
1386 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1387 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1388
1389# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1390
1391 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
[27112]1392 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
[20538]1393
[27111]1394 my $metaname = $parser->{'parameters'}->{'metaname'};
1395 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1396
[27112]1397 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1398 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1399 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1400 print STDERR "\tin section $opt_doc_secnum.\n";
1401 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
[27111]1402 }
1403
1404 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1405 return [$tagname => $attrHash];
1406}
1407
[20538]1408sub dxml_description
1409{
[25558]1410 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1411 my $metamode = $parser->{'parameters'}->{'metamode'};
[20538]1412
[27111]1413 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
[27173]1414 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
[27111]1415
[27007]1416 print STDERR "**** Processing a closing </Description> tag \n";
[27111]1417# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
[25891]1418
[25558]1419 # Accumulate the metadata
[23761]1420
[27111]1421 # We'll be accumulating metadata at this point if we haven't found and therefore
1422 # haven't processed the metadata yet.
1423 # For subsections, this means that if we're at a matching subsection, but haven't
1424 # found the correct metaname to override in that subsection, we accumulate it as new
1425 # meta in the subsection by adding it to the current description.
1426 # If there's no subsection info for the metadata, it will accumulate at the top level
1427 # section description if we hadn't found a matching metaname to override at this point.
1428
1429 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1430 # at the toplevel section. In the latter case, it means we want to process meta in the
1431 # toplevel section. So the eq check between the values below will work in all cases.
1432
1433 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1434 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1435 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1436
1437 if ($curr_secnum eq $opt_doc_secnum
1438 && ($metamode eq "accumulate" || $metamode eq "override")) {
[27007]1439 if ($metamode eq "override") {
1440 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1441 }
[20538]1442
[27173]1443 # If we get to here and metamode is override, this means there
[27007]1444 # was no existing value to overide => treat as an append operation
1445
1446 # Tack a new metadata tag on to the end of the <Metadata>+ block
1447 my $metaname = $parser->{'parameters'}->{'metaname'};
1448 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1449
1450 my $metadata_attr = {
1451 '_content' => $metavalue,
1452 'name' => $metaname,
1453 'mode' => "accumulate"
1454 };
1455
1456 my $append_metadata = [ "Metadata" => $metadata_attr ];
1457 my $description_content = $attrHash->{'_content'};
1458
1459 print "Appending metadata to doc.xml\n";
1460
1461 if (ref($description_content)) {
1462 # got some existing interesting nested content
1463 push(@$description_content, " ", $append_metadata ,"\n ");
1464 }
1465 else {
1466 #description_content is most likely a string such as "\n"
1467 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1468 }
1469
1470 $parser->{'parameters'}->{'metamode'} = "done";
1471 }
[25891]1472 else {
[27007]1473 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1474## print STDERR "**** NOT ACCUMULATE?!? \n";
[25891]1475 }
[20538]1476
[21716]1477 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1478 return [$tagname => $attrHash];
[20538]1479}
1480
[21715]1481
[23761]1482sub dxml_start_section
1483{
1484 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1485
1486 my $new_depth = scalar(@$contextArray);
1487
[25891]1488 print STDERR "**** START SECTION \n";
1489
[23761]1490 if ($new_depth == 1) {
1491 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1492 $parser->{'parameters'}->{'curr_section_num'} = "";
1493 }
1494
1495 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1496 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1497
1498 my $new_secnum;
1499
1500 if ($new_depth > $old_depth) {
1501 # child subsection
1502 $new_secnum = "$old_secnum.1";
1503 }
1504 elsif ($new_depth == $old_depth) {
1505 # sibling section => increase it's value by 1
1506 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1507 $tail_num++;
1508 $new_secnum = $old_secnum;
1509 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1510 }
1511 else {
1512 # back up to parent section => lopp off tail
1513 $new_secnum = $old_secnum;
1514 $new_secnum =~ s/\.\d+$//;
1515 }
1516
1517 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1518 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
[25891]1519
1520 1;
[23761]1521}
1522
[20538]1523sub edit_xml_file
1524{
1525 my $self = shift @_;
[23761]1526 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
[20538]1527
1528 # use XML::Rules to add it in (read in and out again)
[23761]1529 my $parser = XML::Rules->new(start_rules => $start_rules,
1530 rules => $rules,
1531 style => 'filter',
1532 output_encoding => 'utf8' );
[20538]1533
1534 my $xml_in = "";
1535 if (!open(MIN,"<$filename")) {
1536 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1537 }
1538 else {
[21715]1539 # Read all the text in
[20538]1540 my $line;
1541 while (defined ($line=<MIN>)) {
1542 $xml_in .= $line;
1543 }
1544 close(MIN);
1545
[23761]1546 my $MOUT;
1547 if (!open($MOUT,">$filename")) {
[20538]1548 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1549 }
1550 else {
[23761]1551 # Matched lines will get handled by the call backs
1552## my $xml_out = "";
1553
1554 binmode($MOUT,":utf8");
1555 $parser->filter($xml_in,$MOUT, $options);
1556
1557# binmode(MOUT,":utf8");
1558# print MOUT $xml_out;
1559 close($MOUT);
[20538]1560 }
1561 }
[27112]1562
1563 # copy across any custom error information that was stored during parsing
1564 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
[20538]1565}
1566
1567sub edit_doc_xml
1568{
1569 my $self = shift @_;
[25097]1570 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
[20538]1571
[27007]1572 my $info_mess = <<RAWEND;
1573****************************
1574 edit_doc_xml()
1575****************************
1576doc_xml_filename = $doc_xml_filename
1577metaname = $metaname
1578metavalue = $metavalue
1579metapos = $metapos
1580metamode = $metamode
1581opt_secnum = $opt_secnum
1582prevmetavalue = $prevmetavalue
1583****************************
1584RAWEND
1585
1586 $gsdl_cgi->generate_message($info_mess);
[25891]1587
[23761]1588 # To monitor which section/subsection number we are in
1589 my @start_rules =
1590 ( 'Section' => \&dxml_start_section );
1591
[20538]1592 # use XML::Rules to add it in (read in and out again)
[21715]1593 # Set the call back functions
[20538]1594 my @rules =
[21716]1595 ( _default => 'raw',
[23761]1596 'Metadata' => \&dxml_metadata,
[27111]1597 'Description' => \&dxml_description,
1598 'Archive' => \&dxml_archive); # just for catching errors at end
[20538]1599
[21715]1600 # Sets the parameters
[20538]1601 my $options = { 'metaname' => $metaname,
1602 'metapos' => $metapos,
[23400]1603 'metavalue' => $metavalue,
[25097]1604 'metamode' => $metamode,
1605 'prevmetavalue' => $prevmetavalue };
[23400]1606
[23761]1607 if (defined $opt_secnum) {
1608 $options->{'secnum'} = $opt_secnum;
1609 }
1610
1611 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
[20538]1612}
1613
[24071]1614sub set_archives_metadata_entry
1615{
1616 my $self = shift @_;
[25891]1617 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
[24071]1618
[27007]1619 my $info_mess = <<RAWEND;
1620****************************
1621 set_archives_metadata_entry()
1622****************************
1623archive_dir = $archive_dir
1624collect_dir = $collect_dir
1625collect = $collect
1626infodbtype = $infodbtype
1627docid = $docid
1628metaname = $metaname
1629metapos = $metapos
1630metavalue = $metavalue
1631metamode = $metamode
1632prevmetavalue = $prevmetavalue
1633****************************
1634RAWEND
1635
1636 $gsdl_cgi->generate_message($info_mess);
[25891]1637
[24071]1638 # Obtain the doc.xml path for the specified docID
1639 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
[20538]1640
[24071]1641 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1642 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
1643 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
1644
1645 # The $doc_xml_file is relative to the archives, and now let's get the full path
1646 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
1647 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
[27167]1648
1649 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
1650 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
1651 # Thereafter, we will finally be setting the overriding metavalue for this metaname
1652 if(!defined $metapos && $metamode eq "override") {
1653 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, $metapos, undef, $docid_secnum, $metamode);
1654 }
[27168]1655
[24071]1656 # Edit the doc.xml file with the specified metadata name, value and position.
1657 # TODO: there is a potential problem here as this edit_doc_xml function
1658 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
1659 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
1660
[27168]1661 # dxml_metadata method ignores metapos if metamode anything other than override
[24071]1662 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
[25097]1663 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
[27112]1664
1665 # return 0; # return 0 for now to indicate no error
1666 return (defined $self->{'error_msg'}) ? 1 : 0;
[24071]1667}
1668
1669
[20538]1670sub set_archives_metadata
1671{
1672 my $self = shift @_;
1673
1674 my $username = $self->{'username'};
1675 my $collect = $self->{'collect'};
1676 my $gsdl_cgi = $self->{'gsdl_cgi'};
[23400]1677
[20538]1678 if ($baseaction::authentication_enabled) {
[27168]1679 # Ensure the user is allowed to edit this collection
1680 $self->authenticate_user($username, $collect);
[20538]1681 }
1682
1683 # Make sure the collection isn't locked by someone else
1684 $self->lock_collection($username, $collect);
1685
[27157]1686 $self->_set_archives_metadata(@_);
[23400]1687
[24071]1688 # Release the lock once it is done
1689 $self->unlock_collection($username, $collect);
1690}
1691
[27168]1692sub _set_archives_metadata_array
1693{
1694 my $self = shift @_;
1695
1696 my $collect = $self->{'collect'};
1697 my $gsdl_cgi = $self->{'gsdl_cgi'};
1698 my $site = $self->{'site'};
1699 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[24071]1700
[27168]1701 # look up additional args
1702
1703 my $infodbtype = $self->{'infodbtype'};
1704
1705 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1706
1707 my $json_str = $self->{'json'};
1708 my $doc_array = decode_json $json_str;
1709
1710
1711 my $global_status = 0;
1712 my $global_mess = "";
1713
1714 my @all_docids = ();
1715
1716 foreach my $doc_array_rec ( @$doc_array ) {
1717 my $status = -1;
1718 my $docid = $doc_array_rec->{'docid'};
1719
1720 push(@all_docids,$docid);
1721
1722 my $metaname = $doc_array_rec->{'metaname'};
1723 if(defined $metaname) {
1724
[27176]1725 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1726
[27168]1727 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1728 my $metavalue = $doc_array_rec->{'metavalue'};
1729 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
1730
1731
1732 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1733 # make "accumulate" the default (less destructive, as it won't actually
1734 # delete any existing values)
1735 $metamode = "accumulate";
1736 }
1737
1738 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1739 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1740 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1741 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1742
1743 foreach my $metatable_rec ( @$metatable ) {
1744 $metaname = $metatable_rec->{'metaname'};
1745 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1746 my $metapos = undef;
1747 my $prevmetavalue = undef;
1748 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1749
1750 foreach my $metavalue ( @$metavals ) {
1751 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
1752 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1753
1754 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1755 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1756 # metavalue for this metaname will be lost
1757 $metamode = "accumulate";
1758 }
1759 }
1760 }
1761 }
1762
1763 if ($status != 0) {
1764 # Catch error if set infodb entry failed
1765 $global_status = $status;
1766 $global_mess .= "Failed to set metadata key: $docid\n";
1767 $global_mess .= "Exit status: $status\n";
1768 $global_mess .= "System Error Message: $!\n";
1769 $global_mess .= "-" x 20 . "\n";
1770 }
1771 }
1772
1773 if ($global_status != 0) {
1774 $global_mess .= "PATH: $ENV{'PATH'}\n";
1775 $gsdl_cgi->generate_error($global_mess);
1776 }
1777 else {
1778 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1779 $gsdl_cgi->generate_ok_message($mess);
1780 }
1781}
1782
[24071]1783sub set_archives_metadata_array
1784{
1785 my $self = shift @_;
1786
1787 my $username = $self->{'username'};
1788 my $collect = $self->{'collect'};
1789 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27168]1790# my $gsdlhome = $self->{'gsdlhome'};
[24071]1791
1792 if ($baseaction::authentication_enabled) {
[27295]1793 # Ensure the user is allowed to edit this collection
1794 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[23400]1795 }
[23761]1796
[24071]1797 my $site = $self->{'site'};
1798 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1799
1800 $gsdl_cgi->checked_chdir($collect_dir);
1801
1802 # Obtain the collect dir
1803 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1804
1805 # Make sure the collection isn't locked by someone else
1806 $self->lock_collection($username, $collect);
1807
[27168]1808 $self->_set_archives_metadata_array(@_);
[20538]1809
[23761]1810 # Release the lock once it is done
1811 $self->unlock_collection($username, $collect);
[20538]1812}
1813
[27157]1814sub _remove_archives_metadata
[24943]1815{
1816 my $self = shift @_;
[20538]1817
[24943]1818 my $collect = $self->{'collect'};
1819 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27157]1820# my $gsdlhome = $self->{'gsdlhome'};
[24943]1821 my $infodbtype = $self->{'infodbtype'};
1822
1823 my $site = $self->{'site'};
1824
1825 # Obtain the collect and archive dir
1826 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1827
1828 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1829
1830 # look up additional args
1831 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
1832
1833 my $metaname = $self->{'metaname'};
1834 my $metapos = $self->{'metapos'};
[27168]1835
[27160]1836 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
1837
1838 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
1839 # default to deleting the first metavalue for the given metaname
1840 # Beware that if both metapos AND metavalue are defined, both matches (if any)
1841 # seem to get deleted in one single remove_archives_meta action invocation.
1842 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
1843 # deleted, both get deleted.
1844 if(!defined $metapos && !defined $metavalue) {
1845 $metapos = 0;
1846 }
[27167]1847
[27168]1848 my $metamode = $self->{'metamode'} || undef;
1849
[24943]1850 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1851 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
1852
1853 # This now stores the full pathname
[25097]1854 my $doc_filename = $doc_rec->{'doc-file'}->[0];
[24943]1855
[27167]1856 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
[27160]1857# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
[24943]1858
1859 if ($status == 0)
1860 {
1861 my $mess = "remove-archives-metadata successful: Key[$docid]\n";
1862 $mess .= " $metaname";
1863 $mess .= "->[$metapos]" if (defined $metapos);
1864
1865 $gsdl_cgi->generate_ok_message($mess);
1866 }
1867 else
1868 {
1869 my $mess .= "Failed to remove archives metadata key: $docid\n";
1870 $mess .= "Exit status: $status\n";
1871 $mess .= "System Error Message: $!\n";
1872 $mess .= "-" x 20 . "\n";
1873
1874 $gsdl_cgi->generate_error($mess);
1875 }
[27157]1876
1877 #return $status; # in case calling functions have a use for this
[24943]1878}
1879
[27157]1880sub remove_archives_metadata
1881{
1882 my $self = shift @_;
1883
1884 my $username = $self->{'username'};
1885 my $collect = $self->{'collect'};
1886 my $gsdl_cgi = $self->{'gsdl_cgi'};
1887
1888 if ($baseaction::authentication_enabled)
1889 {
[27295]1890 # Ensure the user is allowed to edit this collection
1891 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]1892 }
1893
1894 # Make sure the collection isn't locked by someone else
1895 $self->lock_collection($username, $collect);
1896
1897 $self->_remove_archives_metadata(@_);
1898
1899 # Release the lock once it is done
1900 $self->unlock_collection($username, $collect);
1901}
1902
[24943]1903sub remove_from_doc_xml
1904{
1905 my $self = shift @_;
[27167]1906 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
[24943]1907
1908 my @start_rules = ('Section' => \&dxml_start_section);
1909
1910 # Set the call-back functions for the metadata tags
1911 my @rules =
1912 (
1913 _default => 'raw',
1914 'Metadata' => \&rfdxml_metadata
1915 );
1916
1917 my $parser = XML::Rules->new
1918 (
1919 start_rules => \@start_rules,
1920 rules => \@rules,
1921 style => 'filter',
[27173]1922 output_encoding => 'utf8',
1923# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
1924 stripspaces => 2|0|0 # ineffectual
[24943]1925 );
1926
1927 my $status = 0;
1928 my $xml_in = "";
1929 if (!open(MIN,"<$doc_xml_filename"))
1930 {
1931 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1932 $status = 1;
1933 }
1934 else
1935 {
1936 # Read them in
1937 my $line;
1938 while (defined ($line=<MIN>)) {
1939 $xml_in .= $line;
1940 }
1941 close(MIN);
1942
1943 # Filter with the call-back functions
1944 my $xml_out = "";
1945
1946 my $MOUT;
1947 if (!open($MOUT,">$doc_xml_filename")) {
1948 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
1949 $status = 1;
1950 }
1951 else {
1952 binmode($MOUT,":utf8");
[27167]1953 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
[24943]1954 close($MOUT);
1955 }
1956 }
1957 return $status;
1958}
1959
1960sub rfdxml_metadata
1961{
1962 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1963
[27173]1964 # For comparisons, toplevel section is indicated by ""
1965 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
1966 my $secid = $parser->{'parameters'}->{'secid'} || "";
1967
1968 if (!($secid eq $curr_sec_num))
[24943]1969 {
1970 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1971 return [$tagname => $attrHash];
1972 }
1973
1974 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1975 {
1976 if (!defined $parser->{'parameters'}->{'poscount'})
1977 {
1978 $parser->{'parameters'}->{'poscount'} = 0;
1979 }
1980 else
1981 {
1982 $parser->{'parameters'}->{'poscount'}++;
1983 }
[27168]1984
[27176]1985 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
[27167]1986 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
1987 return [];
1988 }
[24943]1989 }
[27168]1990
[24943]1991 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1992 {
1993 return [];
1994 }
[27168]1995
[24949]1996 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
1997 {
1998 return [];
1999 }
2000
[24943]2001 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2002 return [$tagname => $attrHash];
2003}
2004
[19293]2005sub mxml_metadata
2006{
2007 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2008 my $metaname = $parser->{'parameters'}->{'metaname'};
2009 my $metamode = $parser->{'parameters'}->{'metamode'};
2010
[21716]2011 # Report error if we don't see FileName tag before this
[27173]2012 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
[21716]2013
2014 # Don't do anything if we are not in the right FileSet
2015 my $file_regexp = $parser->{'parameters'}->{'current_file'};
[23761]2016 if ($file_regexp =~ /\.\*/) {
2017 # Only interested in a file_regexp if it specifies precisely one
2018 # file.
2019 # So, skip anything with a .* in it as it is too general
[27176]2020## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
[23761]2021 return [$tagname => $attrHash];
2022 }
2023 my $src_file = $parser->{'parameters'}->{'src_file'};
2024 if (!($src_file =~ /$file_regexp/)) {
2025 return [$tagname => $attrHash];
2026 }
2027## print STDERR "*** mxl metamode = $metamode\n";
2028
[21715]2029 # Find the right metadata tag and checks if we are going to override it
[19293]2030 my $name_attr = $attrHash->{'name'};
2031 if (($name_attr eq $metaname) && ($metamode eq "override")) {
[27176]2032
2033 # now metadata.xml functions need to keep track of metapos
2034 if (!defined $parser->{'parameters'}->{'poscount'})
2035 {
2036 $parser->{'parameters'}->{'poscount'} = 0;
2037 }
2038 else
2039 {
2040 $parser->{'parameters'}->{'poscount'}++;
2041 }
2042
2043 # If either the metapos or prevmetavalue is set,
2044 # get the value and override the current value
[19293]2045 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2046
[27176]2047 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2048 {
2049 $attrHash->{'_content'} = $metavalue;
[23761]2050
[27176]2051 ## print STDERR "**** overriding metadata.xml\n";
2052
2053 # Don't want it to wipe out any other pieces of metadata
2054 $parser->{'parameters'}->{'metamode'} = "done";
2055 }
2056 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2057 {
2058 $attrHash->{'_content'} = $metavalue;
2059 $parser->{'parameters'}->{'metamode'} = "done";
2060 }
2061 }
[19293]2062
[27176]2063 # mxml_description will process the metadata if metadata is accumulate,
2064 # or if we haven't found the metadata to override
2065
[21716]2066 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2067 return [$tagname => $attrHash];
[19293]2068}
2069
2070
2071sub mxml_description
2072{
2073 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
[21716]2074 my $metamode = $parser->{'parameters'}->{'metamode'};
[19293]2075
[21716]2076 # Failed... Report error if we don't see FileName tag before this
[25097]2077 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
[21716]2078
2079 # Don't do anything if we are not in the right FileSet
2080 my $file_regexp = $parser->{'parameters'}->{'current_file'};
[24943]2081 if ($file_regexp =~ m/\.\*/) {
[23761]2082 # Only interested in a file_regexp if it specifies precisely one
2083 # file.
2084 # So, skip anything with a .* in it as it is too general
2085 return [$tagname => $attrHash];
2086 }
2087 my $src_file = $parser->{'parameters'}->{'src_file'};
[24943]2088
2089 if (!($src_file =~ m/$file_regexp/)) {
[23761]2090 return [$tagname => $attrHash];
2091 }
[21716]2092
[21715]2093 # Accumulate the metadata block to the end of the description block
2094 # Note: This adds metadata block to all description blocks, so if there are
2095 # multiple FileSets, it will add to all of them
[23761]2096 if (($metamode eq "accumulate") || ($metamode eq "override")) {
[27176]2097
[23761]2098 # if metamode was "override" but get to here then it failed to
2099 # find an item to override, in which case it should append its
2100 # value to the end, just like the "accumulate" mode
2101
[27176]2102 if ($metamode eq "override") {
2103 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2104 }
2105
[19293]2106 # tack a new metadata tag on to the end of the <Metadata>+ block
2107 my $metaname = $parser->{'parameters'}->{'metaname'};
2108 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2109
2110 my $metadata_attr = { '_content' => $metavalue,
2111 'name' => $metaname,
2112 'mode' => "accumulate" };
2113
2114 my $append_metadata = [ "Metadata" => $metadata_attr ];
2115 my $description_content = $attrHash->{'_content'};
[24943]2116
[23761]2117## print STDERR "*** appending to metadata.xml\n";
2118
2119 # append the new metadata element to the end of the current
2120 # content contained inside this tag
[24943]2121 if (ref($description_content) eq "") {
2122 # => string or numeric literal
2123 # this is caused by a <Description> block has no <Metadata> child elements
2124 # => set up an empty array in '_content'
2125 $attrHash->{'_content'} = [ "\n" ];
2126 $description_content = $attrHash->{'_content'};
2127 }
2128
[19293]2129 push(@$description_content," ", $append_metadata ,"\n ");
[23761]2130 $parser->{'parameters'}->{'metamode'} = "done";
[19293]2131 }
2132
[21716]2133 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2134 return [$tagname => $attrHash];
[19293]2135}
2136
[21715]2137
[21716]2138sub mxml_filename
2139{
2140 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2141
2142 # Store the filename of the Current Fileset
2143 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2144 # FileName tag must come before Description tag
2145 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2146
2147 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2148 return [$tagname => $attrHash];
2149}
2150
2151
2152sub mxml_fileset
2153{
2154 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2155
2156 # Initilise the current_file
2157 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2158 # FileName tag must come before Description tag
2159 $parser->{'parameters'}->{'current_file'} = "";
2160
2161 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2162 return [$tagname => $attrHash];
2163}
2164
[27173]2165sub mxml_directorymetadata
2166{
2167 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
[21716]2168
[27173]2169 # if we haven't processed the metadata when we reach the end of metadata.xml
2170 # it's because there's no particular FileSet element whose FileName matched
2171 # In which case, add a new FileSet for this FileName
2172 my $metamode = $parser->{'parameters'}->{'metamode'};
2173 if($metamode ne "done") {
2174
2175 if ($metamode eq "override") {
2176 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2177 }
2178
2179 # If we get to here and metamode is override, this means there
2180 # was no existing value to overide => treat as an append operation
2181
2182 # Create a new FileSet element and append to DirectoryMetadata
2183 # <FileSet>
2184 # <FileName>src_file</FileName>
2185 # <Description>
2186 # <Metadata mode="" name="">metavalue</Metadata>
2187 # </Description>
2188 # </FileSet>
2189 my $src_file = $parser->{'parameters'}->{'src_file'};
2190 my $metaname = $parser->{'parameters'}->{'metaname'};
2191 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2192 my $metadata_attr = {
2193 '_content' => $metavalue,
2194 'name' => $metaname,
2195 'mode' => "accumulate"
2196 };
2197 my $append_metadata = [ "Metadata" => $metadata_attr ];
2198 my $description_attr->{'_content'} = [ "\n\t\t ", $append_metadata, "\n\t\t"];
2199 my $description_element = [ "Description" => $description_attr ];
2200
2201 #_content is not an attribute, it's special and holds the children of this element
2202 # including the textnode value embedded in this element if any.
2203 my $filename_attr = {'_content' => $src_file};
2204 my $filename_element = [ "FileName" => $filename_attr ];
2205
2206 my $fileset_attr = {};
2207 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2208 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2209
2210
2211 # get children of dirmeta, and push the new FileSet element onto it
2212 print "Appending metadata to metadata.xml\n";
2213 my $dirmeta_content = $attrHash->{'_content'};
2214 if (ref($dirmeta_content)) {
2215 # got some existing interesting nested content
2216 #push(@$dirmeta_content, " ", $fileset ,"\n ");
2217 push(@$dirmeta_content, "\t", $fileset ,"\n");
2218 }
2219 else {
2220 #description_content is most likely a string such as "\n"
2221 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
2222 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2223 }
2224
2225 $parser->{'parameters'}->{'metamode'} = "done";
2226 }
2227 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2228 return [$tagname => $attrHash];
2229}
2230
2231
[19293]2232sub edit_metadata_xml
2233{
2234 my $self = shift @_;
[27176]2235 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
[19293]2236
[21715]2237 # Set the call-back functions for the metadata tags
[19293]2238 my @rules =
[21716]2239 ( _default => 'raw',
2240 'FileName' => \&mxml_filename,
[19293]2241 'Metadata' => \&mxml_metadata,
[21716]2242 'Description' => \&mxml_description,
[27173]2243 'FileSet' => \&mxml_fileset,
2244 'DirectoryMetadata' => \&mxml_directorymetadata);
[19293]2245
[21715]2246 # use XML::Rules to add it in (read in and out again)
[19293]2247 my $parser = XML::Rules->new(rules => \@rules,
[21716]2248 style => 'filter',
[27173]2249 output_encoding => 'utf8',
2250 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
[19293]2251
[24943]2252 if (!-e $metadata_xml_filename) {
2253
2254 if (open(MOUT,">$metadata_xml_filename")) {
2255
2256 my $src_file_re = &util::filename_to_regex($src_file);
2257 # shouldn't the following also be in the above utility routine??
2258 # $src_file_re =~ s/\./\\./g;
2259
2260 print MOUT "<?xml version=\"1.0\"?>\n";
2261 print MOUT "<DirectoryMetadata>\n";
2262 print MOUT " <FileSet>\n";
2263 print MOUT " <FileName>$src_file_re</FileName>\n";
2264 print MOUT " <Description>\n";
2265 print MOUT " </Description>\n";
2266 print MOUT " </FileSet>\n";
2267 print MOUT "</DirectoryMetadata>\n";
[23761]2268
[24943]2269 close(MOUT);
2270 }
2271 else {
2272 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2273 }
[19293]2274 }
[24943]2275
2276
2277 my $xml_in = "";
2278 if (!open(MIN,"<$metadata_xml_filename")) {
2279 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2280 }
[19293]2281 else {
[24943]2282 # Read them in
2283 my $line;
2284 while (defined ($line=<MIN>)) {
2285 $xml_in .= $line;
2286 }
2287 close(MIN);
[23761]2288
[24943]2289 # Filter with the call-back functions
2290 my $xml_out = "";
[23761]2291
[24943]2292 my $MOUT;
2293 if (!open($MOUT,">$metadata_xml_filename")) {
2294 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2295 }
2296 else {
2297 binmode($MOUT,":utf8");
2298
2299 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2300 # At the moment, I will just hack it!
2301 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2302 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2303 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2304 #print MOUT $xml_out;
2305
2306 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
[27176]2307 metapos => $metapos,
[24943]2308 metavalue => $metavalue,
2309 metamode => $metamode,
2310 src_file => $src_file,
[27176]2311 prevmetavalue => $prevmetavalue,
[24943]2312 current_file => undef} );
2313 close($MOUT);
2314 }
2315 }
[20538]2316}
[19293]2317
2318
2319sub set_import_metadata
2320{
2321 my $self = shift @_;
[21715]2322
[19293]2323 my $username = $self->{'username'};
2324 my $collect = $self->{'collect'};
2325 my $gsdl_cgi = $self->{'gsdl_cgi'};
[23400]2326
[19293]2327 if ($baseaction::authentication_enabled) {
[27295]2328 # Ensure the user is allowed to edit this collection
2329 $self->authenticate_user($username, $collect);
[19293]2330 }
2331
2332 # Make sure the collection isn't locked by someone else
2333 $self->lock_collection($username, $collect);
[27157]2334
2335 $self->_set_import_metadata(@_);
[19293]2336
[21715]2337 # Release the lock once it is done
2338 $self->unlock_collection($username, $collect);
[23761]2339
[19293]2340}
2341
[25097]2342sub set_import_metadata_array
2343{
[27168]2344 my $self = shift @_;
[25097]2345
2346 my $username = $self->{'username'};
2347 my $collect = $self->{'collect'};
2348 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27168]2349# my $gsdlhome = $self->{'gsdlhome'};
[25097]2350
2351 if ($baseaction::authentication_enabled) {
[27295]2352 # Ensure the user is allowed to edit this collection
2353 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[25097]2354 }
2355
[27168]2356 my $site = $self->{'site'};
2357 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2358
[25097]2359 $gsdl_cgi->checked_chdir($collect_dir);
2360
2361 # Make sure the collection isn't locked by someone else
2362 $self->lock_collection($username, $collect);
2363
[27168]2364 $self->_set_import_metadata_array(@_);
[25097]2365
[27168]2366 # Release the lock once it is done
2367 $self->unlock_collection($username, $collect);
2368
2369}
2370
2371
2372sub _set_import_metadata_array
2373{
2374 my $self = shift @_;
2375
2376 my $collect = $self->{'collect'};
2377 my $gsdl_cgi = $self->{'gsdl_cgi'};
2378
2379 my $site = $self->{'site'};
2380 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[25097]2381
[27168]2382 # look up additional args
[25097]2383
[27168]2384 my $infodbtype = $self->{'infodbtype'};
2385
2386 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2387 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2388
2389 my $json_str = $self->{'json'};
2390 my $doc_array = decode_json $json_str;
2391
2392 my $global_status = 0;
2393 my $global_mess = "";
2394
2395 my @all_docids = ();
2396
2397 foreach my $doc_array_rec ( @$doc_array )
2398 {
2399 my $status = -1;
2400 my $docid = $doc_array_rec->{'docid'};
[25097]2401
[27176]2402 my ($docid_root,$docid_secnum);
2403 if(defined $docid) {
2404 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2405 # as yet no support for setting subsection metadata in metadata.xml
2406 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2407 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2408 next; # skip this docid in for loop
2409 }
2410 }
2411
2412 push(@all_docids,$docid); # docid_root rather
[25097]2413
[27168]2414 my $metaname = $doc_array_rec->{'metaname'};
2415 if (defined $metaname) {
2416 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2417 my $metavalue = $doc_array_rec->{'metavalue'};
[27176]2418 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2419
[27168]2420 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2421 # make "accumulate" the default (less destructive, as won't actually
2422 # delete any existing values)
2423 $metamode = "accumulate";
2424 }
[27176]2425
2426 # adding metapos and prevmetavalue support to import_metadata subroutines
2427 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2428 my $prevmetavalue = $self->{'prevmetavalue'};
2429
2430 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
[27168]2431
2432 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2433 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2434
2435 foreach my $metatable_rec ( @$metatable ) {
2436 $metaname = $metatable_rec->{'metaname'};
2437 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
[25097]2438 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
[27168]2439 # make "accumulate" the default (less destructive, as won't actually
2440 # delete any existing values)
2441 $metamode = "accumulate";
2442 }
[27176]2443
2444 # No support for metapos and prevmetavalue in the JSON metatable substructure
2445 my $metapos = undef;
2446 my $prevmetavalue = undef;
[27168]2447 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2448
2449 foreach my $metavalue ( @$metavals ) {
[27176]2450 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2451
2452 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
[27168]2453 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2454 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2455 # metavalue for this metaname will be lost
[25097]2456 $metamode = "accumulate";
[27168]2457 }
2458 }
2459 }
2460 }
2461 }
[25097]2462
[27168]2463 # always a success message
2464 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2465 $gsdl_cgi->generate_ok_message($mess);
2466}
[25097]2467
[27168]2468# always returns true (1)
2469sub set_import_metadata_entry
2470{
2471 my $self = shift @_;
[27176]2472 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
[25097]2473
[27176]2474 my $info_mess = <<RAWEND;
2475****************************
2476 set_import_metadata_entry()
2477****************************
2478collect = $collect
2479collect_dir = $collect_dir
2480infodbtype = $infodbtype
2481arcinfo_doc_filename = $arcinfo_doc_filename
2482docid = $docid
2483metaname = $metaname
2484metapos = $metapos
2485metavalue = $metavalue
2486metamode = $metamode
2487prevmetavalue = $prevmetavalue
2488****************************
2489RAWEND
2490
2491 $gsdl_cgi->generate_message($info_mess);
2492
2493 # import works with metadata.xml which can have inherited metadata
2494 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2495 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2496 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2497 # what they're doing if they provide a metapos.
2498 if(defined $metapos) {
2499 print STDERR "@@@@ WARNING: metapos defined.\n";
2500 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2501 }
2502
[27168]2503 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2504 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2505 my $metadata_xml_file;
2506 my $import_filename = undef;
2507
[27176]2508 if (defined $docid) {
2509 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2510 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[25097]2511
[27176]2512 # This now stores the full pathname
2513 $import_filename = $doc_rec->{'src-file'}->[0];
2514 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2515 # as the array version of the method doesn't support the -f parameter yet
2516 my $import_file = $self->{'f'};
2517 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2518 }
[25097]2519
[27168]2520 # figure out correct metadata.xml file [?]
2521 # Assuming the metadata.xml file is next to the source file
2522 # Note: This will not work if it is using the inherited metadata from the parent folder
2523 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2524 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2525
[27176]2526 # If we're overriding everything, then $metamode=override combined with $metapos=undefined
2527 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2528 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
2529 if(!defined $metapos && $metamode eq "override") {
2530## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2531 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2532
2533 }
2534
2535 # Edit the metadata.xml
2536 # Modified by Jeffrey from DL Consulting
2537 # Handle the case where there is one metadata.xml file for multiple FileSets
2538 # The XML filter needs to know whether it is in the right FileSet
2539 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2540 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2541 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2542 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2543 #return 0;
2544 return $metadata_xml_filename;
[25097]2545}
2546
[27157]2547sub _remove_import_metadata
[24943]2548{
2549 my $self = shift @_;
[27157]2550
[24943]2551 my $collect = $self->{'collect'};
2552 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27168]2553# my $gsdlhome = $self->{'gsdlhome'};
[24943]2554 my $infodbtype = $self->{'infodbtype'};
2555
2556 # Obtain the collect dir
2557 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2558 my $site = $self->{'site'};
2559 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2560
2561 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2562 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
[27157]2563
[24943]2564 # look up additional args
2565 my $docid = $self->{'d'};
2566 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2567 {
[27173]2568 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
[24943]2569 }
2570
2571 my $metaname = $self->{'metaname'};
[27176]2572 my $metapos = $self->{'metapos'};
[24943]2573 my $metavalue = $self->{'metavalue'};
[27176]2574 if(defined $metavalue) {
2575 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2576 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2577 $metapos = 0;
2578 }
2579 my $metamode = $self->{'metamode'} || undef;
2580
2581 # import works with metadata.xml which can have inherited metadata
2582 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2583 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2584 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2585 # what they're doing if they provide a metapos.
2586 if(defined $metapos) {
2587 print STDERR "@@@@ WARNING: metapos defined.\n";
2588 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2589 }
[24943]2590
2591 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2592 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2593 my $metadata_xml_file;
2594 my $import_filename = undef;
2595 if (defined $docid)
2596 {
2597 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2598 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2599
2600 # This now stores the full pathname
2601 $import_filename = $doc_rec->{'src-file'}->[0];
2602 }
2603
2604 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2605 {
[27173]2606 $gsdl_cgi->generate_error("There is no metadata\n");
[24943]2607 }
2608
2609 # figure out correct metadata.xml file [?]
2610 # Assuming the metadata.xml file is next to the source file
2611 # Note: This will not work if it is using the inherited metadata from the parent folder
2612 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2613 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2614
[27176]2615 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
[24943]2616
2617 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2618 $mess .= " $metaname";
2619 $mess .= " = $metavalue\n";
2620
2621 $gsdl_cgi->generate_ok_message($mess);
[27157]2622
2623 #return $status; # in case calling functions have a use for this
[24943]2624}
2625
[27157]2626sub remove_import_metadata
2627{
2628 my $self = shift @_;
2629
2630 my $username = $self->{'username'};
2631 my $collect = $self->{'collect'};
2632 my $gsdl_cgi = $self->{'gsdl_cgi'};
2633
2634 if ($baseaction::authentication_enabled) {
[27295]2635 # Ensure the user is allowed to edit this collection
2636 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]2637 }
2638
2639 # Make sure the collection isn't locked by someone else
2640 $self->lock_collection($username, $collect);
2641
2642 $self->_remove_import_metadata(@_);
2643
2644 # Release the lock once it is done
2645 $self->unlock_collection($username, $collect);
2646
2647}
2648
[24943]2649sub remove_from_metadata_xml
2650{
2651 my $self = shift @_;
[27176]2652 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
2653 # metamode generally has no meaning for removing meta, but is used by set_meta
2654 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
2655
[24943]2656 # Set the call-back functions for the metadata tags
2657 my @rules =
2658 (
2659 _default => 'raw',
2660 'Metadata' => \&rfmxml_metadata,
2661 'FileName' => \&mxml_filename
2662 );
2663
2664 my $parser = XML::Rules->new
2665 (
2666 rules => \@rules,
2667 style => 'filter',
[27173]2668 output_encoding => 'utf8',
2669 #normalisespaces => 1,
2670 stripspaces => 2|0|0 # ineffectual
[24943]2671 );
2672
2673 my $xml_in = "";
2674 if (!open(MIN,"<$metadata_xml_filename"))
2675 {
2676 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2677 }
2678 else
2679 {
2680 # Read them in
2681 my $line;
2682 while (defined ($line=<MIN>)) {
2683 $xml_in .= $line;
2684 }
2685 close(MIN);
2686
2687 # Filter with the call-back functions
2688 my $xml_out = "";
2689
2690 my $MOUT;
2691 if (!open($MOUT,">$metadata_xml_filename")) {
2692 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2693 }
2694 else {
2695 binmode($MOUT,":utf8");
[27176]2696 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
[24943]2697 close($MOUT);
2698 }
2699 }
2700}
2701
2702sub rfmxml_metadata
2703{
2704 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2705
[27176]2706 # metadata.xml does not handle subsections
2707
2708 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
2709 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
2710 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
[24943]2711 {
[27176]2712 if (!defined $parser->{'parameters'}->{'poscount'})
2713 {
2714 $parser->{'parameters'}->{'poscount'} = 0;
2715 }
2716 else
2717 {
2718 $parser->{'parameters'}->{'poscount'}++;
2719 }
2720
2721 # if overriding but no metapos, then clear all the meta for this metaname
2722 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'})) {
2723 return [];
2724 }
2725
2726 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2727 {
2728 return [];
2729 }
2730
2731 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
2732 {
2733 return [];
2734 }
[24943]2735 }
2736
2737 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2738 return [$tagname => $attrHash];
2739}
2740
[27157]2741sub _remove_live_metadata
[19499]2742{
2743 my $self = shift @_;
2744
2745 my $collect = $self->{'collect'};
2746 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27157]2747# my $gsdlhome = $self->{'gsdlhome'};
[23400]2748 my $infodbtype = $self->{'infodbtype'};
[19499]2749
[21715]2750 # Obtain the collect dir
[23766]2751 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[27157]2752 my $site = $self->{'site'};
2753 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[19499]2754
[27157]2755
[19499]2756 # look up additional args
2757 my $docid = $self->{'d'};
[21715]2758 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2759 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2760 }
2761
2762 # Generate the dbkey
[19499]2763 my $metaname = $self->{'metaname'};
2764 my $dbkey = "$docid.$metaname";
2765
[21715]2766 # To people who know $collect_tail please add some comments
2767 # Obtain the live gdbm_db path
[19499]2768 my $collect_tail = $collect;
2769 $collect_tail =~ s/^.*[\/\\]//;
[21564]2770 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]2771 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19499]2772
[21715]2773 # Remove the key
[21569]2774 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
[19499]2775 my $status = system($cmd);
2776 if ($status != 0) {
[21715]2777 # Catch error if gdbmdel failed
[19499]2778 my $mess = "Failed to set metadata key: $dbkey\n";
2779
2780 $mess .= "PATH: $ENV{'PATH'}\n";
2781 $mess .= "cmd = $cmd\n";
2782 $mess .= "Exit status: $status\n";
2783 $mess .= "System Error Message: $!\n";
2784
2785 $gsdl_cgi->generate_error($mess);
2786 }
2787 else {
2788 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
2789 }
2790
2791}
2792
[27157]2793sub remove_live_metadata
[19499]2794{
2795 my $self = shift @_;
2796
2797 my $username = $self->{'username'};
2798 my $collect = $self->{'collect'};
2799 my $gsdl_cgi = $self->{'gsdl_cgi'};
2800 my $gsdlhome = $self->{'gsdlhome'};
[23400]2801
[19499]2802 if ($baseaction::authentication_enabled) {
[27295]2803 # Ensure the user is allowed to edit this collection
2804 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[19499]2805 }
2806
[27157]2807 # Make sure the collection isn't locked by someone else
2808 $self->lock_collection($username, $collect);
[19499]2809
[27157]2810 $self->_remove_live_metadata(@_);
2811
2812 $self->unlock_collection($username, $collect);
2813}
2814
2815sub remove_metadata
2816{
2817 my $self = shift @_;
2818
2819 my $where = $self->{'where'};
2820 if(!$where) {
2821 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
2822 return;
2823 }
2824
2825 my $username = $self->{'username'};
2826 my $collect = $self->{'collect'};
2827 my $gsdl_cgi = $self->{'gsdl_cgi'};
2828
2829 if ($baseaction::authentication_enabled) {
[27295]2830 # Ensure the user is allowed to edit this collection
2831 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]2832 }
2833
[19499]2834 # Make sure the collection isn't locked by someone else
2835 $self->lock_collection($username, $collect);
2836
[27157]2837 # check which directories need to be processed, specified in $where as
2838 # any combination of import|archives|index|live
2839 if($where =~ m/import/) {
2840 $self->_remove_import_metadata(@_);
2841 }
2842 if($where =~ m/archives/) {
2843 $self->_remove_archives_metadata(@_);
2844 }
2845 if($where =~ m/index/) {
2846 $self->_remove_index_metadata(@_);
2847 }
2848
2849 # Release the lock once it is done
2850 $self->unlock_collection($username, $collect);
2851}
2852
2853# the internal version, without authentication
2854sub _remove_index_metadata
2855{
2856 my $self = shift @_;
2857
2858 my $collect = $self->{'collect'};
2859 my $gsdl_cgi = $self->{'gsdl_cgi'};
2860# my $gsdlhome = $self->{'gsdlhome'};
2861 my $infodbtype = $self->{'infodbtype'};
2862
2863 # Obtain the collect dir
2864 my $site = $self->{'site'};
2865 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2866 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2867
2868 # look up additional args
[19499]2869 my $docid = $self->{'d'};
[21715]2870 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
2871 $gsdl_cgi->generate_error("No docid (d=...) specified.");
2872 }
[19499]2873 my $metaname = $self->{'metaname'};
2874 my $metapos = $self->{'metapos'};
[27162]2875 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
[19499]2876
[21715]2877 # To people who know $collect_tail please add some comments
2878 # Obtain the path to the database
[19499]2879 my $collect_tail = $collect;
2880 $collect_tail =~ s/^.*[\/\\]//;
[21564]2881 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]2882 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]2883
2884 # Read the docid entry
[23400]2885 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
[21715]2886
2887 # Check to make sure the key does exist
2888 if (!defined ($doc_rec->{$metaname})) {
2889 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
2890 }
2891
2892 # Obtain the specified metadata pos
[27176]2893 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
2894 if(!defined $metapos && !defined $metavalue) {
[27162]2895 $metapos = 0;
2896 }
[27168]2897
[19499]2898
2899 # consider check key is defined before deleting?
[21715]2900 # Loop through the metadata array and ignore the specified position
[19499]2901 my $filtered_metadata = [];
[21715]2902 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
[19499]2903 for (my $i=0; $i<$num_metadata_vals; $i++) {
[24949]2904 my $metaval = shift(@{$doc_rec->{$metaname}});
[19499]2905
[24949]2906 if (!defined $metavalue && $i != $metapos) {
[27162]2907 push(@$filtered_metadata,$metaval);
[19499]2908 }
[24949]2909
2910 if(defined $metavalue && !($metavalue eq $metaval))
2911 {
[27162]2912 push(@$filtered_metadata,$metaval);
[24949]2913 }
[19499]2914 }
2915 $doc_rec->{$metaname} = $filtered_metadata;
2916
[27180]2917 ## Use the dbutil set_entry method instead of assuming the database is gdbm
2918 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
2919
[19499]2920 if ($status != 0) {
2921 my $mess = "Failed to set metadata key: $docid\n";
2922
2923 $mess .= "PATH: $ENV{'PATH'}\n";
2924 $mess .= "Exit status: $status\n";
2925 $mess .= "System Error Message: $!\n";
2926
2927 $gsdl_cgi->generate_error($mess);
2928 }
2929 else {
2930 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
2931 $mess .= " $metaname";
2932 $mess .= "->[$metapos]" if (defined $metapos);
2933
2934 $gsdl_cgi->generate_ok_message($mess);
2935 }
[27155]2936
[27157]2937 #return $status; # in case calling functions have a use for this
2938}
2939
2940sub remove_index_metadata
2941{
2942 my $self = shift @_;
2943
2944 my $username = $self->{'username'};
2945 my $collect = $self->{'collect'};
2946 my $gsdl_cgi = $self->{'gsdl_cgi'};
2947# my $gsdlhome = $self->{'gsdlhome'};
2948
2949 if ($baseaction::authentication_enabled) {
[27295]2950 # Ensure the user is allowed to edit this collection
2951 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]2952 }
2953
2954 # Obtain the collect dir
2955 my $site = $self->{'site'};
2956 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2957 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2958
2959 # Make sure the collection isn't locked by someone else
2960 $self->lock_collection($username, $collect);
2961
2962 $self->_remove_index_metadata(@_);
2963
[27155]2964 # Release the lock once it is done
2965 $self->unlock_collection($username, $collect);
[19499]2966}
2967
2968
[23761]2969# Was trying to reused the codes, but the functions need to be broken
2970# down more before they can be reused, otherwise there will be too
2971# much overhead and duplicate process...
[21716]2972sub insert_metadata
2973{
2974 my $self = shift @_;
2975
2976 my $username = $self->{'username'};
2977 my $collect = $self->{'collect'};
2978 my $gsdl_cgi = $self->{'gsdl_cgi'};
2979 my $gsdlhome = $self->{'gsdlhome'};
[23400]2980 my $infodbtype = $self->{'infodbtype'};
2981
[23761]2982 # If the import metadata and gdbm database have been updated, we
2983 # need to insert some notification to warn user that the the text
2984 # they see at the moment is not indexed and require a rebuild.
[21716]2985 my $rebuild_pending_macro = "_rebuildpendingmessage_";
2986
2987 if ($baseaction::authentication_enabled) {
2988 # Ensure the user is allowed to edit this collection
2989 $self->authenticate_user($username, $collect);
2990 }
2991
[23766]2992 # Obtain the collect and archive dir
2993 my $site = $self->{'site'};
2994 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2995 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[21716]2996 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2997
2998 # Make sure the collection isn't locked by someone else
2999 $self->lock_collection($username, $collect);
3000
3001 # Check additional args
3002 my $docid = $self->{'d'};
3003 if (!defined($docid)) {
3004 $gsdl_cgi->generate_error("No document id is specified: d=...");
3005 }
3006 my $metaname = $self->{'metaname'};
3007 if (!defined($metaname)) {
3008 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3009 }
3010 my $metavalue = $self->{'metavalue'};
3011 if (!defined($metavalue) || $metavalue eq "") {
3012 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3013 }
3014 # make "accumulate" the default (less destructive, as won't actually
3015 # delete any existing values)
3016 my $metamode = "accumulate";
3017
[27176]3018 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3019 my $metapos = undef;
3020 my $prevmetavalue = undef;
3021
[21716]3022 #=======================================================================#
3023 # set_import_metadata [START]
3024 #=======================================================================#
3025 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3026 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3027 my $metadata_xml_file;
[23400]3028 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3029 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[21716]3030
3031 # This now stores the full pathname
3032 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
3033
3034 # figure out correct metadata.xml file [?]
3035 # Assuming the metadata.xml file is next to the source file
3036 # Note: This will not work if it is using the inherited metadata from the parent folder
3037 my ($import_tailname, $import_dirname)
3038 = File::Basename::fileparse($import_filename);
3039 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3040
3041 # Shane's escape characters
3042 $metavalue = pack "U0C*", unpack "C*", $metavalue;
3043 $metavalue =~ s/\,/&#44;/g;
3044 $metavalue =~ s/\:/&#58;/g;
3045 $metavalue =~ s/\|/&#124;/g;
3046 $metavalue =~ s/\(/&#40;/g;
3047 $metavalue =~ s/\)/&#41;/g;
3048 $metavalue =~ s/\[/&#91;/g;
3049 $metavalue =~ s/\\/&#92;/g;
3050 $metavalue =~ s/\]/&#93;/g;
3051 $metavalue =~ s/\{/&#123;/g;
3052 $metavalue =~ s/\}/&#125;/g;
3053 $metavalue =~ s/\"/&#34;/g;
3054 $metavalue =~ s/\`/&#96;/g;
3055 $metavalue =~ s/\n/_newline_/g;
3056
3057 # Edit the metadata.xml
3058 # Modified by Jeffrey from DL Consulting
3059 # Handle the case where there is one metadata.xml file for multiple FileSets
3060 # The XML filter needs to know whether it is in the right FileSet
3061 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3062 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
[27176]3063 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3064 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
[21716]3065 #=======================================================================#
3066 # set_import_metadata [END]
3067 #=======================================================================#
3068
3069
3070 #=======================================================================#
3071 # set_metadata (accumulate version) [START]
3072 #=======================================================================#
3073 # To people who know $collect_tail please add some comments
3074 # Obtain path to the database
3075 my $collect_tail = $collect;
3076 $collect_tail =~ s/^.*[\/\\]//;
3077 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]3078 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21716]3079
3080 # Read the docid entry
[23400]3081 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
[21716]3082
3083 # Protect the quotes
3084 $metavalue =~ s/\"/\\\"/g;
3085
3086 # Adds the pending macro
3087 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3088
3089 # If the metadata doesn't exist, create a new one
3090 if (!defined($doc_rec->{$metaname})){
3091 $doc_rec->{$metaname} = [ $macro_metavalue ];
3092 }
3093 # Else, let's acculumate the values
3094 else {
3095 push(@{$doc_rec->{$metaname}},$macro_metavalue);
3096 }
3097
[27180]3098 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3099 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3100
[21716]3101 if ($status != 0) {
3102 # Catch error if gdbmget failed
3103 my $mess = "Failed to set metadata key: $docid\n";
3104
3105 $mess .= "PATH: $ENV{'PATH'}\n";
3106 $mess .= "Exit status: $status\n";
3107 $mess .= "System Error Message: $!\n";
3108
3109 $gsdl_cgi->generate_error($mess);
3110 }
3111 else {
3112 my $mess = "insert-metadata successful: Key[$docid]\n";
3113 $mess .= " [In metadata.xml] $metaname";
3114 $mess .= " = $metavalue\n";
3115 $mess .= " [In database] $metaname";
3116 $mess .= " = $macro_metavalue\n";
3117 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3118 $gsdl_cgi->generate_ok_message($mess);
3119 }
3120 #=======================================================================#
3121 # set_metadata (accumulate version) [END]
3122 #=======================================================================#
3123
3124 # Release the lock once it is done
3125 $self->unlock_collection($username, $collect);
3126}
3127
[19293]31281;
Note: See TracBrowser for help on using the repository browser.