source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 29098

Last change on this file since 29098 was 29098, checked in by ak19, 10 years ago

Removing import metadata needs to take into account prevmetavalue and and not delete all metavalues in override mode if metapos is undefined. In the case of prevmetavalue being defined, it should only handle prevmetavalue. Kathy had already fixed the archives_metadata case, and the index_metadata case does not deal with xml files but the database.

File size: 119.3 KB
RevLine 
[28159]1##########################################################################
[19293]2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
[27157]10# This program is free software; you can redistr te it and/or modify
[19293]11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
[21551]32use dbutil;
[19499]33use ghtml;
[19293]34
[24071]35use JSON;
[21563]36
[24071]37
[19293]38BEGIN {
[22331]39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
[19293]40 require XML::Rules;
41}
42
43@metadataaction::ISA = ('baseaction');
44
45my $action_table =
[25097]46{
47 #GET METHODS
48 "get-import-metadata" => {
49 'compulsory-args' => [ "d", "metaname" ],
[27176]50 'optional-args' => [ "metapos" ] },
[19499]51
[25097]52 "get-archives-metadata" => {
53 'compulsory-args' => [ "d", "metaname" ],
54 'optional-args' => [ "metapos" ] },
55
[27157]56 "get-index-metadata" => {
[25097]57 'compulsory-args' => [ "d", "metaname" ],
58 'optional-args' => [ "metapos" ] },
[19499]59
[27157]60 "get-metadata" => { # alias for get-index-metadata
61 'compulsory-args' => [ "d", "metaname" ],
62 'optional-args' => [ "metapos" ] },
63
[25097]64 "get-live-metadata" => {
65 'compulsory-args' => [ "d", "metaname" ],
66 'optional-args' => [ ] },
[19499]67
[27312]68 "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
69 'compulsory-args' => [ "json" ],
70 'optional-args' => [ "where" ],
71 'help-string' => [
72 'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
73 ]},
74
[25097]75 #SET METHODS
76 "set-live-metadata" => {
77 'compulsory-args' => [ "d", "metaname", "metavalue" ],
78 'optional-args' => [ ] },
[19499]79
[27157]80 "set-metadata" => { # generic set-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
81 'compulsory-args' => [ "metaname", "metavalue" ],
82 'optional-args' => [ "where", "metapos", "metamode", "prevmetavalue", "d", "f" ] },
83
84 "set-index-metadata" => {
[25097]85 'compulsory-args' => [ "d", "metaname", "metavalue" ],
[27168]86 'optional-args' => [ "metapos", "metamode" ] },
[19499]87
[25097]88 "set-archives-metadata" => {
89 'compulsory-args' => [ "d", "metaname", "metavalue" ],
90 'optional-args' => [ "metapos", "metamode", "prevmetavalue" ] }, # metamode can be "accumulate", "override",
91
92 "set-import-metadata" => {
93 'compulsory-args' => [ "metaname", "metavalue" ],
[27176]94 'optional-args' => [ "d", "f", "metamode", "metapos", "prevmetavalue" ] }, # metamode can be "accumulate", "override", or "unique-id". Also need to add the ability to specify a previous metadata value to overwrite (because we can't use metapos). Metapos now supported, but assumes you are working with a Simple (instead of Complex) collection
[25097]95
96 #SET METHODS (ARRAY)
97 "set-metadata-array" => {
[27168]98 'compulsory-args' => [ "where", "json" ],
[27261]99 'optional-args' => [ ],
100 'help-string' => [
101 'A simple example: metadata-server.pl?a=set-metadata-array&where=archives|index|import&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metaname":"Title","metavalue":"Tralalala","metamode":"accumulate"},{"docid":"HASHbe483fa4df4e096335d1c8","metaname":"Title","metavalue":"Lala was here","metapos":0, "metamode":"override"}]',
102
103 'A more complex example: metadata-server.pl?a=set-metadata-array&where=archives|index&c=demo&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9.1","metatable":[{"metaname":"Title","metavals":["Transformers","Robots in disguise","Autobots"]}],"metamode":"override"},{"docid":"HASHbe483fa4df4e096335d1c8.2","metaname":"Title","metavalue":"Pinky was here","metamode":"accumulate"}]' ] },
104
105# The same examples rewritten for when running the metadata-server.pl script from the commandline:
106
107# the simple example: metadata-server.pl a="set-metadata-array" where="archives|index|import" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9\",\"metaname\":\"Title\",\"metavalue\":\"Tralalala\",\"metamode\":\"accumulate\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8\",\"metaname\":\"Title\",\"metavalue\":\"Lala was here\",\"metapos\":0, \"metamode\":\"override\"}]",
108
109# the more complex example: metadata-server.pl a="set-metadata-array" where="archives|index" c="demo" json="[{\"docid\":\"HASHc5bce2d6d3e5b04e470ec9.1\",\"metatable\":[{\"metaname\":\"Title\",\"metavals\":[\"Transformers\",\"Robots in disguise\",\"Autobots\"]}],\"metamode\":\"override\"},{\"docid\":\"HASHbe483fa4df4e096335d1c8.2\",\"metaname\":\"Title\",\"metavalue\":\"Pinky was here\",\"metamode\":\"accumulate\"}]"
[24943]110
[25097]111 "set-archives-metadata-array" => {
112 'compulsory-args' => [ "json" ],
113 'optional-args' => [ ] },
114
115 "set-import-metadata-array" => {
116 'compulsory-args' => [ "json" ],
117 'optional-args' => [ ] },
[27168]118
119 "set-index-metadata-array" => {
120 'compulsory-args' => [ "json" ],
121 'optional-args' => [ ] },
122
123 "set-live-metadata-array" => {
124 'compulsory-args' => [ "json" ],
125 'optional-args' => [ ] },
[25097]126
127 #REMOVE METHODS
128 "remove-import-metadata" => {
129 'compulsory-args' => [ "d", "metaname", "metavalue" ], #TODO: add f argument
[27176]130 'optional-args' => [ "metapos" ] }, # only provide metapos arg for SIMPLE collections
[25097]131
132 "remove-archives-metadata" => {
133 'compulsory-args' => [ "d", "metaname" ], #TODO: add f argument
134 'optional-args' => [ "metapos", "metavalue" ] },
[20538]135
[25097]136 "remove-live-metadata" => {
137 'compulsory-args' => [ "d", "metaname" ],
138 'optional-args' => [ ] },
[20538]139
[27157]140 "remove-index-metadata" => {
[25097]141 'compulsory-args' => [ "d", "metaname" ],
142 'optional-args' => [ "metapos", "metavalue" ] },
[21716]143
[27157]144 "remove-metadata" => { # generic remove-meta function. The 'where' param can be set to any combination of index|archives|import|live. docid d is still compulsory for setting index, archives and live meta
145 'compulsory-args' => [ "d", "metaname" ],
146 'optional-args' => [ "where", "metapos", "metavalue" ] },
147
[25097]148 #INSERT METHODS
149 "insert-metadata" => {
150 'compulsory-args' => [ "d", "metaname", "metavalue" ],
151 'optional-args' => [ ] }
[19293]152};
153
154
155sub new
156{
157 my $class = shift (@_);
158 my ($gsdl_cgi,$iis6_mode) = @_;
159
[23761]160 # Treat metavalue specially. To transmit this through a GET request
161 # the Javascript side has url-encoded it, so here we need to decode
162 # it before proceeding
163
164 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
165 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
166 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
167
168 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
169 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
170
[29086]171 # need to do the same with prevmetavalue
172 my $url_encoded_prevmetavalue = $gsdl_cgi->param("prevmetavalue");
173 my $url_decoded_prevmetavalue = &unicode::url_decode($url_encoded_prevmetavalue,1);
174 my $prevunicode_array = &unicode::utf82unicode($url_decoded_prevmetavalue);
175
176 $url_decoded_prevmetavalue = join("",map(chr($_),@$prevunicode_array));
177 $gsdl_cgi->param("prevmetavalue",$url_decoded_prevmetavalue);
178
[19293]179 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
180
181 return bless $self, $class;
182}
183
184
185sub get_live_metadata
186{
187 my $self = shift @_;
188
189 my $username = $self->{'username'};
190 my $collect = $self->{'collect'};
191 my $gsdl_cgi = $self->{'gsdl_cgi'};
192 my $gsdlhome = $self->{'gsdlhome'};
[23478]193 my $infodbtype = $self->{'infodbtype'};
[27180]194
[23447]195 # live metadata gets/saves value scoped (prefixed) by the current usename
[23761]196 # so (for now) let's not bother to enforce authentication
[21715]197
198 # Obtain the collect dir
[23766]199 my $site = $self->{'site'};
200 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
201 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]202
[27314]203 # No locking collection when getting metadata, only when setting it
204# $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
[19293]205
206 # look up additional args
207 my $docid = $self->{'d'};
208 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[21715]209 $gsdl_cgi->generate_error("No docid (d=...) specified.");
[19293]210 }
211
[21715]212 # Generate the dbkey
[19293]213 my $metaname = $self->{'metaname'};
214 my $dbkey = "$docid.$metaname";
215
[21715]216 # To people who know $collect_tail please add some comments
217 # Obtain path to the database
[19293]218 my $collect_tail = $collect;
219 $collect_tail =~ s/^.*[\/|\\]//;
[21564]220 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]221 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[21715]222
223 # Obtain the content of the key
[21569]224 my $cmd = "gdbmget $infodb_file_path $dbkey";
[19293]225 if (open(GIN,"$cmd |") == 0) {
[21715]226 # Catch error if gdbmget failed
[19293]227 my $mess = "Failed to get metadata key: $metaname\n";
228 $mess .= "$!\n";
229
230 $gsdl_cgi->generate_error($mess);
231 }
232 else {
[23761]233 binmode(GIN,":utf8");
[21715]234 # Read everything in and concatenate them into $metavalue
[19293]235 my $metavalue = "";
236 my $line;
237 while (defined ($line=<GIN>)) {
238 $metavalue .= $line;
239 }
240 close(GIN);
[21715]241 chomp($metavalue); # Get rid off the tailing newlines
[19293]242 $gsdl_cgi->generate_ok_message("$metavalue");
243 }
[21715]244
245 # Release the lock once it is done
[27314]246# $self->unlock_collection($username, $collect);
[19499]247}
[19293]248
[27157]249# just calls the index version
[19499]250sub get_metadata
251{
252 my $self = shift @_;
[27157]253 $self->get_index_metadata(@_);
254}
[19499]255
[27312]256# JSON version that will get the requested metadata values
257# from the requested source (index, import, archives or live)
258# One of the params is a JSON string and the return value is JSON too
259# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
260sub get_metadata_array
261{
262 my $self = shift @_;
263
264 my $where = $self->{'where'};
265 if (!$where) {
266 $where = "index"; # default behaviour is to get the values from index
267 }
268
[27336]269 # Only when setting metadata do we perform authentication and do we lock the collection,
[27312]270 # not when getting metadata
271
272 # for get_meta_array, the where param can only be ONE of import, archives, index, live
273 if($where =~ m/index/) {
274 $self->_get_index_metadata_array(@_);
275 }
[27315]276 elsif($where =~ m/archives/) {
[27324]277 $self->_get_archives_metadata_array(@_);
[27315]278 }
279 elsif($where =~ m/import/) {
[27333]280 $self->_get_import_metadata_array(@_);
[27315]281 }
282 elsif($where =~ m/live/) {
[27336]283 $self->_get_live_metadata_array(@_);
[27315]284 }
[27312]285}
286
[27336]287# Unused at present. Added for completion. Tested.
[27333]288sub _get_import_metadata_array {
289
290 my $self = shift @_;
[27324]291
[27333]292 my $collect = $self->{'collect'};
293 my $gsdl_cgi = $self->{'gsdl_cgi'};
294 my $site = $self->{'site'};
295 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
296
297 # look up additional args
298 my $infodbtype = $self->{'infodbtype'};
299
300 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
301 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
302 my $json_str = $self->{'json'};
303 my $doc_array = decode_json $json_str;
304
305 my $json_result_str = "[";
306 my $first_doc_rec = 1;
307 foreach my $doc_array_rec ( @$doc_array ) {
308
309 my $docid = $doc_array_rec->{'docid'}; # no subsection metadata support in metadata.xml, only toplevel meta
310
311 if($first_doc_rec) {
312 $first_doc_rec = 0;
313 } else {
314 $json_result_str .= ",";
315 }
316 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
317
318 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
319 $json_result_str = $json_result_str . ",\"metatable\":[";
320
321 my $first_rec = 1;
322 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
323
324 # Read the docid entry
325 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
326 # This now stores the full pathname
327 my $import_filename = $doc_rec->{'src-file'}->[0];
[28211]328 $import_filename = &util::placeholders_to_abspath($import_filename);
[27333]329
330 # figure out correct metadata.xml file [?]
331 # Assuming the metadata.xml file is next to the source file
332 # Note: This will not work if it is using the inherited metadata from the parent folder
333 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
334 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
335
336
337 if($first_rec) {
338 $first_rec = 0;
339 } else {
340 $json_result_str .= ",";
341 }
342
343 my $metaname = $metatable_rec->{'metaname'};
344 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
345
346 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
347 if(!defined $metapos) {
348 $metapos = 0;
349 }
350
351 # Obtain the specified metadata value(s)
352 my $metavalue;
353
354 if($metapos ne "all") { # get the value at a single metapos
355 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
356
357 #print STDERR "**** Metafilename, metaname, metapos, sec_num: $metadata_xml_filename, $metaname, $metapos, $import_tailname\n";
358
359 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
360
361 } else {
362 my $first_metaval = 1;
363 $metapos = 0;
364 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
365
366 while (defined $metavalue && $metavalue ne "") {
367 if($first_metaval) {
368 $first_metaval = 0;
369 } else {
370 $json_result_str .= ",";
371 }
372
373 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
374
375 $metapos++;
376 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
377 }
378 }
379
380 $json_result_str .= "]}"; # close metavals array and metatable record
381 }
382
383 $json_result_str .= "]}"; # close metatable array and docid record
384 }
385
386 $json_result_str .= "]"; # close array of docids
387 $gsdl_cgi->generate_ok_message($json_result_str."\n");
388}
389
[27324]390# Unused method, but included for completion. Tested, works. Takes a JSON string and returns a JSON string.
391# For more information on the format of the output, see _get_index_metadata_array, which is in use.
392sub _get_archives_metadata_array {
393
394 my $self = shift @_;
395
396 my $collect = $self->{'collect'};
397 my $gsdl_cgi = $self->{'gsdl_cgi'};
398 my $site = $self->{'site'};
399 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
400
401 # look up additional args
402 my $infodbtype = $self->{'infodbtype'};
403
404 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
405 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
406
407 my $json_str = $self->{'json'};
408 my $doc_array = decode_json $json_str;
409
410 my $json_result_str = "[";
411 my $first_doc_rec = 1;
412 foreach my $doc_array_rec ( @$doc_array ) {
413
414 my $docid = $doc_array_rec->{'docid'};
415
416 if($first_doc_rec) {
417 $first_doc_rec = 0;
418 } else {
419 $json_result_str .= ",";
420 }
421 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
422
423 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
424 $json_result_str = $json_result_str . ",\"metatable\":[";
425
426 my $first_rec = 1;
427 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
428
429 my ($docid, $docid_secnum) = ($doc_array_rec->{'docid'} =~ m/^(.*?)(\..*)?$/);
430 $docid_secnum = "" if (!defined $docid_secnum);
431
432 # Read the docid entry
433 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
434 # This now stores the full pathname
[28211]435 my $doc_filename = $doc_rec->{'doc-file'}->[0];
[27324]436 $doc_filename = &util::filename_cat($archive_dir, $doc_filename);
437
438 if($first_rec) {
439 $first_rec = 0;
440 } else {
441 $json_result_str .= ",";
442 }
443
444 my $metaname = $metatable_rec->{'metaname'};
445 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
446
447 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
448 if(!defined $metapos) {
449 $metapos = 0;
450 }
451
452
453 # Obtain the specified metadata value(s)
454 my $metavalue;
455
456 if($metapos ne "all") { # get the value at a single metapos
457
458 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
459 #print STDERR "**** Docname, metaname, metapos, sec_num: $doc_filename, $metaname, $metapos, $docid_secnum\n";
460
461 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
462
463 } else {
464 my $first_metaval = 1;
465 $metapos = 0;
466 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
467
468 while (defined $metavalue && $metavalue ne "") {
469 if($first_metaval) {
470 $first_metaval = 0;
471 } else {
472 $json_result_str .= ",";
473 }
474
475 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
476
477 $metapos++;
478 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
479 }
480 }
481
482 $json_result_str .= "]}"; # close metavals array and metatable record
483 }
484
485 $json_result_str .= "]}"; # close metatable array and docid record
486 }
487
488 $json_result_str .= "]"; # close array of docids
489 $gsdl_cgi->generate_ok_message($json_result_str."\n");
490}
491
[27336]492
493# Unused at present. Added for completion. Tested, but not sure if it retrieves metadata in the manner it's expected to.
494sub _get_live_metadata_array
495{
496 my $self = shift @_;
497
498 my $collect = $self->{'collect'};
499 my $gsdl_cgi = $self->{'gsdl_cgi'};
500 my $site = $self->{'site'};
501 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
502
503 # look up additional args
504 my $infodbtype = $self->{'infodbtype'};
505
506 # To people who know $collect_tail please add some comments
507 # Obtain the path to the database
508 my $collect_tail = $collect;
509 $collect_tail =~ s/^.*[\/|\\]//;
510 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
511 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
512
513 my $json_str = $self->{'json'};
514 my $doc_array = decode_json $json_str;
515
516 my $json_result_str = "[";
517 my $first_doc_rec = 1;
518
519 foreach my $doc_array_rec ( @$doc_array ) {
520
521 my $docid = $doc_array_rec->{'docid'};
522
523 if($first_doc_rec) {
524 $first_doc_rec = 0;
525 } else {
526 $json_result_str .= ",";
527 }
528 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
529
530 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
531 $json_result_str = $json_result_str . ",\"metatable\":[";
532
533 my $first_rec = 1;
534 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
535 if($first_rec) {
536 $first_rec = 0;
537 } else {
538 $json_result_str .= ",";
539 }
540
541 my $metaname = $metatable_rec->{'metaname'};
542 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
543
544 # Generate the dbkey
545 my $dbkey = "$docid.$metaname";
546
547 # metapos for get_live_metadata is always assumed to be "all".
548 # It's always going to get all the lines of metavalues associated with a metaname
549 # (It's the metaname itself that should contain an increment number, if there are to be multiple values).
550 #my $metapos = "all";
551 my $metapos = $metatable_rec->{'metapos'} || 0; # Can be 0... 1|all|undefined. Defaults to 0 if undefined/false
552 my $metavalue = "";
553
554 # Obtain the content of the key
555 my $cmd = "gdbmget $infodb_file_path $dbkey";
556 if (open(GIN,"$cmd |") != 0) { # Success.
557
558 binmode(GIN,":utf8");
559 # Read everything in and concatenate them into $metavalue
560 my $line;
561 my $first_metaval = 1;
562 my $pos = 0;
563 while (defined ($line=<GIN>)) {
564 chomp($line); # Get rid off the tailing newlines
565
566 if($metapos eq "all") {
567 if($first_metaval) {
568 $first_metaval = 0;
569 } else {
570 $json_result_str .= ",";
571 }
572 $metavalue = $line;
573 $json_result_str .= "{\"metapos\":\"$pos\",\"metavalue\":\"$metavalue\"}";
574 } elsif($metapos == $pos) {
575 $metavalue = $line;
576 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
577 last;
578 } # else, the current $pos is not the required $metapos
579 $pos += 1;
580 }
581 close(GIN);
582 } # else open cmd == 0 (failed) and metavals array will be empty [] for this metaname
583
584 $json_result_str .= "]}"; # close metavals array and metatable record
585 }
586
587 $json_result_str .= "]}"; # close metatable array and docid record
588 }
589
590 $json_result_str .= "]"; # close array of docids
591
592 $gsdl_cgi->generate_ok_message($json_result_str."\n");
593}
594
595
596# Takes a JSON string and returns a JSON string
597# Request string is of the form:
598# http://localhost:8283/greenstone/cgi-bin/metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]
599# Resulting string is of the form:
[27313]600# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
[27312]601sub _get_index_metadata_array
602{
603 my $self = shift @_;
604
605 my $collect = $self->{'collect'};
606 my $gsdl_cgi = $self->{'gsdl_cgi'};
607 my $site = $self->{'site'};
608 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
609
610 # look up additional args
611 my $infodbtype = $self->{'infodbtype'};
612
613 # To people who know $collect_tail please add some comments
614 # Obtain the path to the database
615 my $collect_tail = $collect;
[27336]616 $collect_tail =~ s/^.*[\/|\\]//;
[27312]617 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
618 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
619
620 my $json_str = $self->{'json'};
621 my $doc_array = decode_json $json_str;
622
623 my $json_result_str = "[";
624 my $first_doc_rec = 1;
625
626 foreach my $doc_array_rec ( @$doc_array ) {
627
628 my $docid = $doc_array_rec->{'docid'};
629
630 if($first_doc_rec) {
631 $first_doc_rec = 0;
632 } else {
633 $json_result_str .= ",";
634 }
[27313]635 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
[27312]636
637 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
[27313]638 $json_result_str = $json_result_str . ",\"metatable\":[";
[27312]639
640 my $first_rec = 1;
641 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
642 if($first_rec) {
643 $first_rec = 0;
644 } else {
645 $json_result_str .= ",";
646 }
647
648 my $metaname = $metatable_rec->{'metaname'};
[27313]649 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
[27312]650
651 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
652 if(!defined $metapos) {
[27313]653 $metapos = 0;
[27312]654 }
655
656 # Read the docid entry
657 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
658
659 # Basically loop through and unescape_html the values
660 foreach my $k (keys %$doc_rec) {
661 my @escaped_v = ();
662 foreach my $v (@{$doc_rec->{$k}}) {
663 my $ev = &ghtml::unescape_html($v);
664 push(@escaped_v, $ev);
665 }
666 $doc_rec->{$k} = \@escaped_v;
667 }
668
669 # Obtain the specified metadata value(s)
670 my $metavalue;
671
672 if($metapos ne "all") { # get the value at a single metapos
673
[27366]674 $metavalue = $doc_rec->{$metaname}->[$metapos];
675
676 # protect any double quotes and colons in the metavalue before putting it into JSON
677 $metavalue =~ s/\"/&quot;/g if defined $metavalue;
678 $metavalue =~ s/\:/&58;/g if defined $metavalue;
679
[27313]680 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
[27312]681
682 } else {
683 my $first_metaval = 1;
684 $metapos = 0;
685 $metavalue = $doc_rec->{$metaname}->[$metapos];
686
687 while (defined $metavalue) {
[27366]688
689 # protect any double quotes and colons in the metavalue before putting it into JSON
690 $metavalue =~ s/\"/&quot;/g;
691 $metavalue =~ s/\:/&58;/g;
692
[27312]693 if($first_metaval) {
694 $first_metaval = 0;
695 } else {
696 $json_result_str .= ",";
697 }
698
[27313]699 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
[27312]700
701 $metapos++;
[27366]702 $metavalue = $doc_rec->{$metaname}->[$metapos];
[27312]703 }
704 }
705
706 $json_result_str .= "]}"; # close metavals array and metatable record
707 }
708
709 $json_result_str .= "]}"; # close metatable array and docid record
710 }
711
712 $json_result_str .= "]"; # close array of docids
713
[27324]714 $gsdl_cgi->generate_ok_message($json_result_str."\n");
[27312]715}
716
717
[27157]718sub get_index_metadata
719{
720 my $self = shift @_;
721
[19499]722 my $username = $self->{'username'};
723 my $collect = $self->{'collect'};
724 my $gsdl_cgi = $self->{'gsdl_cgi'};
725 my $gsdlhome = $self->{'gsdlhome'};
726
[21715]727 # Obtain the collect dir
[23766]728 my $site = $self->{'site'};
729 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
730 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]731
732 # look up additional args
733 my $docid = $self->{'d'};
734 my $metaname = $self->{'metaname'};
735 my $metapos = $self->{'metapos'};
[23400]736 my $infodbtype = $self->{'infodbtype'};
[19499]737
[21715]738 # To people who know $collect_tail please add some comments
739 # Obtain path to the database
[19499]740 my $collect_tail = $collect;
[27336]741 $collect_tail =~ s/^.*[\/|\\]//;
[21564]742 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]743 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]744
745 # Read the docid entry
[23400]746 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
747
[21715]748 # Basically loop through and unescape_html the values
[19499]749 foreach my $k (keys %$doc_rec) {
750 my @escaped_v = ();
751 foreach my $v (@{$doc_rec->{$k}}) {
752 my $ev = &ghtml::unescape_html($v);
753 push(@escaped_v, $ev);
754 }
755 $doc_rec->{$k} = \@escaped_v;
756 }
757
[21715]758 # Obtain the specified metadata value
[19499]759 $metapos = 0 if (!defined $metapos);
760 my $metavalue = $doc_rec->{$metaname}->[$metapos];
761 $gsdl_cgi->generate_ok_message("$metavalue");
[21715]762
[19293]763}
764
765
[25097]766sub get_import_metadata
767{
768 my $self = shift @_;
769
770 my $username = $self->{'username'};
771 my $collect = $self->{'collect'};
772 my $gsdl_cgi = $self->{'gsdl_cgi'};
773 my $gsdlhome = $self->{'gsdlhome'};
774
775 # Obtain the collect dir
776 my $site = $self->{'site'};
777 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
778 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
779
780 # look up additional args
781 my $docid = $self->{'d'};
782 my $metaname = $self->{'metaname'};
[27176]783 my $metapos = $self->{'metapos'};
784 $metapos = 0 if (!defined $metapos); # gets the first value by default since metapos defaults to 0
785
[25097]786 my $infodbtype = $self->{'infodbtype'};
[27176]787 if (!defined $docid)
[25097]788 {
[27173]789 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
[25097]790 }
791
792 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
793 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
794 my $metadata_xml_file;
795 my $import_filename = undef;
796
797
798 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
799 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
800 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
801
802 # This now stores the full pathname
803 $import_filename = $doc_rec->{'src-file'}->[0];
[28211]804 $import_filename = &util::placeholders_to_abspath($import_filename);
[25097]805
806 # figure out correct metadata.xml file [?]
807 # Assuming the metadata.xml file is next to the source file
808 # Note: This will not work if it is using the inherited metadata from the parent folder
809 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
810 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
811
[27176]812 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
[25097]813
814}
815
816sub get_metadata_from_metadata_xml
817{
818 my $self = shift @_;
[27176]819 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
[25097]820
821 my @rules =
822 (
823 _default => 'raw',
824 'Metadata' => \&gfmxml_metadata,
825 'FileName' => \&mxml_filename
826 );
827
828 my $parser = XML::Rules->new
829 (
830 rules => \@rules,
831 output_encoding => 'utf8'
832 );
833
834 my $xml_in = "";
835 if (!open(MIN,"<$metadata_xml_filename"))
836 {
837 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
838 }
839 else
840 {
841 # Read them in
842 my $line;
843 while (defined ($line=<MIN>)) {
844 $xml_in .= $line;
845 }
846 close(MIN);
847
[27176]848 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
[25097]849
850 if(defined $parser->{'pad'}->{'metavalue'})
851 {
852 return $parser->{'pad'}->{'metavalue'};
853 }
854 else
855 {
856 return "";
857 }
858 }
859}
860
861sub gfmxml_metadata
862{
863 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
864
[27176]865 # no subsection support yet in metadata.xml
866
867 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
[25097]868 {
[27176]869 if (!defined $parser->{'parameters'}->{'poscount'})
870 {
871 $parser->{'parameters'}->{'poscount'} = 0;
872 }
873 else
874 {
875 $parser->{'parameters'}->{'poscount'}++;
876 }
877
878 # gets the first value by default, since metapos defaults to 0
879 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
880 {
881 if($parser->{'parameters'}->{'metapos'} > 0) {
882 print STDERR "@@@@ WARNING: non-zero metapos.\n";
[27333]883 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at position: ".$parser->{'parameters'}->{'metapos'}.".\n";
[27176]884 }
885 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
886 }
[25097]887 }
888}
889
890sub get_archives_metadata
891{
892 my $self = shift @_;
893
894 my $username = $self->{'username'};
895 my $collect = $self->{'collect'};
896 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27324]897# my $gsdlhome = $self->{'gsdlhome'};
[25097]898 my $infodbtype = $self->{'infodbtype'};
899
900 # Obtain the collect dir
901 my $site = $self->{'site'};
902 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
903
904 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
905
906 # look up additional args
907 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
908 $docid_secnum = "" if (!defined $docid_secnum);
909
910 my $metaname = $self->{'metaname'};
911 my $metapos = $self->{'metapos'};
912 $metapos = 0 if (!defined $metapos);
913
914 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
915 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
916
917 # This now stores the full pathname
918 my $doc_filename = $doc_rec->{'doc-file'}->[0];
919
920 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
921
922}
923
924sub get_metadata_from_archive_xml
925{
926 my $self = shift @_;
927 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
928
929 my @start_rules = ('Section' => \&dxml_start_section);
930
931 my @rules =
932 (
933 _default => 'raw',
934 'Metadata' => \&gfdxml_metadata
935 );
936
937 my $parser = XML::Rules->new
938 (
939 start_rules => \@start_rules,
940 rules => \@rules,
941 output_encoding => 'utf8'
942 );
943
944 my $xml_in = "";
945 if (!open(MIN,"<$doc_xml_filename"))
946 {
947 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
948 }
949 else
950 {
951 # Read them in
952 my $line;
953 while (defined ($line=<MIN>)) {
954 $xml_in .= $line;
955 }
956 close(MIN);
957
958 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
959
960 if(defined $parser->{'pad'}->{'metavalue'})
961 {
962 return $parser->{'pad'}->{'metavalue'};
963 }
964 else
965 {
966 return "";
967 }
968 }
969}
970
971sub gfdxml_metadata
972{
973 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
974
975 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
976 {
977 return;
978 }
979
980 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
981 {
982 if (!defined $parser->{'parameters'}->{'poscount'})
983 {
984 $parser->{'parameters'}->{'poscount'} = 0;
985 }
986 else
987 {
988 $parser->{'parameters'}->{'poscount'}++;
989 }
990 }
991
992 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
993 {
994 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
995 }
996}
997
[27157]998sub _set_live_metadata
[19293]999{
1000 my $self = shift @_;
1001
1002 my $collect = $self->{'collect'};
1003 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27157]1004 #my $gsdlhome = $self->{'gsdlhome'};
[23400]1005 my $infodbtype = $self->{'infodbtype'};
[19293]1006
[21715]1007 # Obtain the collect dir
[27157]1008 my $site = $self->{'site'};
1009 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[23766]1010 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]1011
1012
1013 # look up additional args
1014 my $docid = $self->{'d'};
[21715]1015 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[27168]1016 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
[21715]1017 }
[19293]1018 my $metavalue = $self->{'metavalue'};
1019
[21715]1020 # Generate the dbkey
1021 my $metaname = $self->{'metaname'};
[19293]1022 my $dbkey = "$docid.$metaname";
1023
[21715]1024 # To people who know $collect_tail please add some comments
1025 # Obtain path to the database
[19293]1026 my $collect_tail = $collect;
[27336]1027 $collect_tail =~ s/^.*[\/|\\]//;
[21564]1028 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]1029 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19293]1030
[21715]1031 # Set the new value
[27176]1032 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
[19293]1033 my $status = system($cmd);
1034 if ($status != 0) {
[21715]1035 # Catch error if gdbmget failed
[19293]1036 my $mess = "Failed to set metadata key: $dbkey\n";
[21715]1037
[19293]1038 $mess .= "PATH: $ENV{'PATH'}\n";
1039 $mess .= "cmd = $cmd\n";
1040 $mess .= "Exit status: $status\n";
1041 $mess .= "System Error Message: $!\n";
1042
[19499]1043 $gsdl_cgi->generate_error($mess);
[19293]1044 }
1045 else {
[19499]1046 $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
[19293]1047 }
[27157]1048
1049 #return $status; # in case calling functions have any further use for this
1050}
1051
1052sub set_live_metadata
1053{
1054 my $self = shift @_;
1055
1056 my $username = $self->{'username'};
1057 my $collect = $self->{'collect'};
1058 my $gsdl_cgi = $self->{'gsdl_cgi'};
1059
1060 if ($baseaction::authentication_enabled) {
[27295]1061 # Ensure the user is allowed to edit this collection
1062 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]1063 }
1064
1065 # Make sure the collection isn't locked by someone else
1066 $self->lock_collection($username, $collect);
1067
1068 $self->_set_live_metadata(@_);
1069
[21715]1070 # Release the lock once it is done
1071 $self->unlock_collection($username, $collect);
[19293]1072}
1073
[27168]1074sub set_index_metadata_entry
[24071]1075{
[28159]1076 print STDERR "1\n";
[27168]1077 my $self = shift @_;
[28064]1078 my ($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue) = @_;
[27168]1079
[24071]1080 # To people who know $collect_tail please add some comments
1081 # Obtain path to the database
1082 my $collect_tail = $collect;
[27336]1083 $collect_tail =~ s/^.*[\/|\\]//;
[24071]1084 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1085 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
1086
[28159]1087 print STDERR "2\n";
[24071]1088# print STDERR "**** infodb file path = $infodb_file_path\n";
1089# print STDERR "***** infodb type = $infodbtype\n";
[27168]1090
[24071]1091 # Read the docid entry
1092 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
[27168]1093
[24071]1094 # Set the metadata value
1095 if (defined $metapos) {
[28159]1096 print STDERR "3\n";
[27168]1097 # if metamode=accumulate AND metapos, warn user and then use metapos
1098 if (defined $metamode && $metamode eq "accumulate") {
1099 print STDERR "**** Warning: metamode is set to accumulate yet metapos is also provided for $docid\n";
1100 print STDERR "**** Proceeding by using metapos\n";
1101 }
1102 $doc_rec->{$metaname}->[$metapos] = $metavalue;
[24071]1103 }
[28064]1104 elsif (defined $prevmetavalue) {
[28159]1105 print STDERR "4\n";
[28064]1106 my $array = $doc_rec->{$metaname};
1107 my $length = @$array;
1108
1109 my $found = 0;
[28252]1110 for (my $i = 0; $i < $length; $i++){
[28064]1111 if(defined $doc_rec->{$metaname}->[$i] && $doc_rec->{$metaname}->[$i] eq $prevmetavalue){
1112 $doc_rec->{$metaname}->[$i] = $metavalue;
1113 $found = 1;
1114 last;
1115 }
1116 }
1117
1118 if($found == 0){
1119 $doc_rec->{$metaname} = [ $metavalue ];
1120 }
1121 }
[27176]1122 elsif (defined $metamode && $metamode eq "override") {
[28159]1123 print STDERR "5\n";
[27176]1124 $doc_rec->{$metaname} = [ $metavalue ];
1125 }
1126 else { # default for index was to override, but because accumulate is less destructive,
1127 # and because accumulate is the default for archives and import, that's the new default for index too
[28159]1128 print STDERR "6\n";
[27168]1129 if(defined $doc_rec->{$metaname}) {
1130 push(@{$doc_rec->{$metaname}}, $metavalue); # accumulate the value for that metaname
1131 } else {
1132 $doc_rec->{$metaname} = [ $metavalue ];
1133 }
[24071]1134 }
[28159]1135 print STDERR "6\n";
[24071]1136
1137 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path,$docid,$doc_rec);
1138
[27168]1139 return $status;
[24071]1140
1141}
[19293]1142
[27157]1143sub _set_import_metadata
[19499]1144{
1145 my $self = shift @_;
[19293]1146
[19499]1147 my $collect = $self->{'collect'};
1148 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27157]1149 my $infodbtype = $self->{'infodbtype'};
1150# my $gsdlhome = $self->{'gsdlhome'};
1151
1152 # Obtain the collect and archive dir
1153 my $site = $self->{'site'};
1154 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1155 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1156 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1157
1158 # look up additional args
1159 # want either d= or f=
1160 my $docid = $self->{'d'};
[27173]1161 my ($docid_root,$docid_secnum);
1162 if(defined $docid) {
1163 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
1164 # as yet no support for setting subsection metadata in metadata.xml
1165 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
[27176]1166 $gsdl_cgi->generate_message("*** No support yet for setting import metadata at subsections level.\n");
[27173]1167 return;
1168 }
1169 }
1170
[27157]1171 my $import_file = $self->{'f'};
1172 if ((!defined $docid) && (!defined $import_file)) {
1173 $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
1174 }
[19293]1175
[27157]1176 # Get the parameters and set default mode to "accumulate"
1177 my $metaname = $self->{'metaname'};
1178 my $metavalue = $self->{'metavalue'};
1179## $metavalue =~ s/&amp;lt;(.*?)&amp;gt;/<$1>/g;
1180 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
1181
1182 my $metamode = $self->{'metamode'};
1183 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1184 # make "accumulate" the default (less destructive, as won't actually
1185 # delete any existing values)
1186 $metamode = "accumulate";
[19499]1187 }
1188
[27176]1189 # adding metapos and prevmetavalue support to import_metadata subroutines
1190 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1191 my $prevmetavalue = $self->{'prevmetavalue'};
[27157]1192
[27176]1193 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
1194 my $metadata_xml_filename = $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos,$metavalue, $metamode,$prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
[27157]1195
1196 my $mess = "set-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
1197 $mess .= " $metaname";
1198 $mess .= " = $metavalue";
1199 $mess .= " ($metamode)\n";
1200
1201 $gsdl_cgi->generate_ok_message($mess);
1202
1203 #return $status; # in case calling functions have any further use for this
1204}
1205
1206# the version of set_index_meta that doesn't do authentication
1207sub _set_archives_metadata
1208{
1209 my $self = shift @_;
1210
1211 my $collect = $self->{'collect'};
1212 my $gsdl_cgi = $self->{'gsdl_cgi'};
1213 my $infodbtype = $self->{'infodbtype'};
1214
1215 # Obtain the collect and archive dir
1216 my $site = $self->{'site'};
1217 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1218 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
1219
1220 # look up additional args
1221 my $docid = $self->{'d'};
1222 my $metaname = $self->{'metaname'};
1223 my $metavalue = $self->{'metavalue'};
1224 my $prevmetavalue = $self->{'prevmetavalue'};
1225
[27176]1226 my $metapos = $self->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
1227 # Don't append "|| undef", since if metapos=0 it will then be set to undef
[27157]1228
1229 my $metamode = $self->{'metamode'};
1230 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
1231 # make "accumulate" the default (less destructive, as won't actually
1232 # delete any existing values)
1233 $metamode = "accumulate";
[27167]1234 }
[27168]1235
[27157]1236 my $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
1237 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
1238
1239 if ($status == 0) {
1240 my $mess = "set-archives-metadata successful: Key[$docid]\n";
1241 $mess .= " $metaname";
1242 $mess .= "->[$metapos]" if (defined $metapos);
1243 $mess .= " = $metavalue";
1244 $mess .= " ($metamode)\n";
1245
1246 $gsdl_cgi->generate_ok_message($mess);
1247 }
1248 else {
1249 my $mess .= "Failed to set archives metadata key: $docid\n";
1250 $mess .= "Exit status: $status\n";
1251 if(defined $self->{'error_msg'}) {
1252 $mess .= "Error Message: $self->{'error_msg'}\n";
1253 } else {
1254 $mess .= "System Error Message: $!\n";
1255 }
1256 $mess .= "-" x 20 . "\n";
1257
1258 $gsdl_cgi->generate_error($mess);
1259 }
[23766]1260
[27157]1261 #return $status; # in case calling functions have any further use for this
1262}
[19499]1263
1264
[27157]1265# the version of set_index_meta that doesn't do authentication
1266sub _set_index_metadata
1267{
[28159]1268 print STDERR "START SET INDEX METADATA\n";
[27157]1269 my $self = shift @_;
1270
1271 my $collect = $self->{'collect'};
1272 my $gsdl_cgi = $self->{'gsdl_cgi'};
1273
1274 my $site = $self->{'site'};
1275 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1276
[19499]1277 # look up additional args
1278 my $docid = $self->{'d'};
1279 my $metaname = $self->{'metaname'};
[27176]1280 my $metapos = $self->{'metapos'}; # undef has meaning
[19499]1281 my $metavalue = $self->{'metavalue'};
[23761]1282 my $infodbtype = $self->{'infodbtype'};
[28064]1283 my $metamode = $self->{'metamode'};
1284 my $prevmetavalue = $self->{'prevmetavalue'};
[27168]1285
[28159]1286 print STDERR "SETTING INDEX METADATA ENTRY\n";
[28064]1287 my $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
[28159]1288 print STDERR "DONE SETTING INDEX METADATA ENTRY\n";
[19499]1289 if ($status != 0) {
[23761]1290 # Catch error if set infodb entry failed
1291 my $mess = "Failed to set metadata key: $docid\n";
[19499]1292
[23761]1293 $mess .= "PATH: $ENV{'PATH'}\n";
1294 $mess .= "Exit status: $status\n";
1295 $mess .= "System Error Message: $!\n";
1296
1297 $gsdl_cgi->generate_error($mess);
[19499]1298 }
1299 else {
[27157]1300 my $mess = "set-index-metadata successful: Key[$docid]\n";
[23761]1301 $mess .= " $metaname";
1302 $mess .= "->[$metapos]" if (defined $metapos);
[27157]1303 $mess .= " = $metavalue\n";
[23761]1304
1305 $gsdl_cgi->generate_ok_message($mess);
[19499]1306 }
[27157]1307
[28159]1308 print STDERR "END SET INDEX METADATA\n";
[27157]1309 #return $status; # in case calling functions have any further use for this
1310}
1311
1312sub set_index_metadata
1313{
1314 my $self = shift @_;
1315
1316 my $username = $self->{'username'};
1317 my $collect = $self->{'collect'};
1318 my $gsdl_cgi = $self->{'gsdl_cgi'};
1319 #my $gsdlhome = $self->{'gsdlhome'};
1320
1321 if ($baseaction::authentication_enabled) {
[27295]1322 # Ensure the user is allowed to edit this collection
1323 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]1324 }
1325
1326 my $site = $self->{'site'};
1327 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1328
1329 $gsdl_cgi->checked_chdir($collect_dir);
1330
1331 # Obtain the collect dir
1332 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1333
1334 # Make sure the collection isn't locked by someone else
1335 $self->lock_collection($username, $collect);
1336
1337 $self->_set_index_metadata(@_);
[21715]1338
1339 # Release the lock once it is done
1340 $self->unlock_collection($username, $collect);
[19499]1341}
1342
[27157]1343# call this to set the metadata for a combination of dirs archives, import or index, or live
1344# if none specified, defaults to index which was the original behaviour of set_metadata.
1345sub set_metadata
1346{
1347 my $self = shift @_;
[19499]1348
[27157]1349 # Testing that not defining a variable, setting it to "" or to " " all return false
1350 # >perl -e 'my $whichdirs=""; if($whichdirs) {print "$whichdirs\n"};'
1351
1352 my $where = $self->{'where'};
1353 if(!$where) {
1354 $self->set_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
1355 return;
1356 }
1357
1358 # authenticate and lock collection once, even if processing multiple dirs
1359 my $username = $self->{'username'};
1360 my $collect = $self->{'collect'};
1361 my $gsdl_cgi = $self->{'gsdl_cgi'};
1362
1363 if ($baseaction::authentication_enabled) {
[27295]1364 # Ensure the user is allowed to edit this collection
1365 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]1366 }
1367
1368 if($where =~ m/index/) {
1369 my $site = $self->{'site'};
1370 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1371 $gsdl_cgi->checked_chdir($collect_dir);
1372 }
1373
1374 # Make sure the collection isn't locked by someone else
1375 $self->lock_collection($username, $collect);
1376
1377
1378 # now at last can set the metadata. $where can specify multiple
1379 # $where is of the form: import|archives|index, or a subset thereof
1380
1381 #my @whichdirs = split('\|', $where);
1382
1383 # just check whether $where contains import/archives/index/live in turn, and
1384 # for each case, process it accordingly
1385 if($where =~ m/import/) {
1386 $self->_set_import_metadata(@_);
1387 }
1388
1389 if($where =~ m/archives/) {
1390
1391 # look up docID arg which is optional to set_metadata because it's optional
1392 # to set_import, but which is compulsory to set_archives_metadata
1393 my $docid = $self->{'d'};
1394 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[27168]1395 $gsdl_cgi->generate_error("No docid (d=...) specified."); # generates error and dies
[27157]1396 }
[27168]1397 # we have a docid, so can set archives meta
1398 $self->_set_archives_metadata(@_);
[27157]1399 }
1400
1401 if($where =~ m/index/) {
1402
1403 # look up docID arg which is optional to set_metadata because it's optional
1404 # to set_import, but which is compulsory to set_archives_metadata
1405 my $docid = $self->{'d'};
1406 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
1407 $gsdl_cgi->generate_error("No docid (d=...) specified.");
1408 }
[27168]1409 # we have a docid, so can set index meta
1410 $self->_set_index_metadata(@_);
[27157]1411 }
1412
1413 if($where =~ m/live/) {
1414 $self->_set_live_metadata(@_); # docid param, d, is compulsory, but is checked for in subroutine
1415 }
1416
1417 # Release the lock once it is done
1418 $self->unlock_collection($username, $collect);
1419}
1420
[24071]1421sub set_metadata_array
1422{
1423 my $self = shift @_;
1424
[27168]1425 my $where = $self->{'where'};
1426 if(!$where) {
1427 $self->set_index_metadata_array(@_); # default behaviour is the full version of set_index_meta_array
1428 return;
1429 }
1430
[24071]1431 my $username = $self->{'username'};
1432 my $collect = $self->{'collect'};
1433 my $gsdl_cgi = $self->{'gsdl_cgi'};
1434
1435 if ($baseaction::authentication_enabled) {
1436 # Ensure the user is allowed to edit this collection
[27295]1437 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[24071]1438 }
1439
[27168]1440 # Not sure if the checked_chdir is necessary, since lock_collection also does a chdir
1441 # But including the stmt during this code reorganisation to preserve as-is what used to happen
1442 my $site = $self->{'site'};
1443 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1444 $gsdl_cgi->checked_chdir($collect_dir);
1445
1446 # Make sure the collection isn't locked by someone else
1447 $self->lock_collection($username, $collect);
1448
1449 if($where =~ m/import/) {
1450 $self->_set_import_metadata_array(@_);
1451 }
1452 if($where =~ m/archives/) {
1453 $self->_set_archives_metadata_array(@_);
1454 }
1455 if($where =~ m/index/) {
1456 $self->_set_index_metadata_array(@_);
1457 }
1458 if($where =~ m/live/) {
1459 $self->_set_live_metadata_array(@_);
1460 }
1461
1462 # Release the lock once it is done
1463 $self->unlock_collection($username, $collect);
1464}
1465
1466sub _set_index_metadata_array
1467{
1468 my $self = shift @_;
1469
1470 my $collect = $self->{'collect'};
1471 my $gsdl_cgi = $self->{'gsdl_cgi'};
1472 my $site = $self->{'site'};
1473 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1474
1475
1476 # look up additional args
1477
1478 my $infodbtype = $self->{'infodbtype'};
1479
1480 my $json_str = $self->{'json'};
1481 my $doc_array = decode_json $json_str;
1482
1483
1484 my $global_status = 0;
1485 my $global_mess = "";
1486
1487 my @all_docids = ();
1488
1489 foreach my $doc_array_rec ( @$doc_array ) {
1490
1491 my $status = -1;
1492 my $docid = $doc_array_rec->{'docid'};
1493
1494 push(@all_docids,$docid);
1495
1496 my $metaname = $doc_array_rec->{'metaname'};
1497 if(defined $metaname) {
[27176]1498 my $metapos = $doc_array_rec->{'metapos'}; # can legitimately be undef
[27168]1499 my $metavalue = $doc_array_rec->{'metavalue'};
1500 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
1501
1502 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode);
1503 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1504 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1505
1506 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
1507 $metaname = $metatable_rec->{'metaname'};
1508 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
1509 my $metapos = undef;
1510 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1511
1512 foreach my $metavalue ( @$metavals ) { # metavals is an array
1513 $status = $self->set_index_metadata_entry($collect_dir,$collect,$infodbtype,$docid,$metaname,$metapos,$metavalue,$metamode); # how do we use metamode in set_meta_entry?
1514 if($metamode eq "override") { # now, having overridden the metavalue for the first,
1515 # need to accumulate subsequent metavals for this metaname, else the just-assigned
1516 # metavalue for this metaname will be lost
1517 $metamode = "accumulate";
1518 }
1519 }
1520 }
1521 }
1522
1523 if ($status != 0) {
1524 # Catch error if set infodb entry failed
1525 $global_status = $status;
1526 $global_mess .= "Failed to set metadata key: $docid\n";
1527 $global_mess .= "Exit status: $status\n";
1528 $global_mess .= "System Error Message: $!\n";
1529 $global_mess .= "-" x 20;
1530 }
1531 }
1532
1533 if ($global_status != 0) {
1534 $global_mess .= "PATH: $ENV{'PATH'}\n";
1535 $gsdl_cgi->generate_error($global_mess);
1536 }
1537 else {
1538 my $mess = "set-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1539 $gsdl_cgi->generate_ok_message($mess);
1540 }
1541}
1542
1543sub set_index_metadata_array
1544{
1545 my $self = shift @_;
1546
1547 my $username = $self->{'username'};
1548 my $collect = $self->{'collect'};
1549 my $gsdl_cgi = $self->{'gsdl_cgi'};
1550# my $gsdlhome = $self->{'gsdlhome'};
1551
1552 if ($baseaction::authentication_enabled) {
[27295]1553 # Ensure the user is allowed to edit this collection
1554 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27168]1555 }
1556
[24071]1557 my $site = $self->{'site'};
1558 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1559
1560 $gsdl_cgi->checked_chdir($collect_dir);
1561
1562 # Obtain the collect dir
1563 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
1564
1565 # Make sure the collection isn't locked by someone else
1566 $self->lock_collection($username, $collect);
1567
[27168]1568 $self->_set_index_metadata_array(@_);
1569
1570 # Release the lock once it is done
1571 $self->unlock_collection($username, $collect);
1572}
1573
1574# experimental, newly added in and untested
1575sub _set_live_metadata_array
1576{
1577 my $self = shift @_;
1578
1579 my $collect = $self->{'collect'};
1580 my $gsdl_cgi = $self->{'gsdl_cgi'};
1581
1582 my $site = $self->{'site'};
1583 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1584
1585
[24071]1586 # look up additional args
[27168]1587 my $infodbtype = $self->{'infodbtype'};
1588 # To people who know $collect_tail please add some comments
1589 # Obtain path to the database
1590 my $collect_tail = $collect;
[27336]1591 $collect_tail =~ s/^.*[\/|\\]//;
[27168]1592 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
1593 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
1594
1595
1596 my $json_str = $self->{'json'};
1597 my $doc_array = decode_json $json_str;
1598
1599
1600 my $global_status = 0;
1601 my $global_mess = "";
1602
1603 my @all_docids = ();
1604
1605
1606 foreach my $doc_array_rec ( @$doc_array ) {
[24071]1607
[27168]1608 my $status = -1;
1609 my $docid = $doc_array_rec->{'docid'};
1610
1611 push(@all_docids,$docid);
1612
1613 my $metaname = $doc_array_rec->{'metaname'};
1614 if(defined $metaname) {
1615 my $dbkey = "$docid.$metaname";
1616 my $metavalue = $doc_array_rec->{'metavalue'};
1617
1618 # Set the new value
[27176]1619 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
[27168]1620 $status = system($cmd);
1621
1622 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
1623 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
1624 foreach my $metatable_rec ( @$metatable ) {
1625 $metaname = $metatable_rec->{'metaname'};
1626 my $dbkey = "$docid.$metaname";
1627
1628 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
1629 foreach my $metavalue ( @$metavals ) {
[27176]1630 my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
[27168]1631 $status = system($cmd);
[24071]1632 }
[27168]1633 }
1634
[24071]1635 }
1636
[27168]1637 if ($status != 0) {
1638 # Catch error if gdbmget failed
1639 $global_status = $status;
1640 $global_mess .= "Failed to set metadata key: $docid\n"; # $dbkey
1641 $global_mess .= "Exit status: $status\n";
1642 $global_mess .= "System Error Message: $!\n";
1643 $global_mess .= "-" x 20;
1644 }
[24071]1645 }
[27168]1646
1647 if ($global_status != 0) {
1648 $global_mess .= "PATH: $ENV{'PATH'}\n";
1649 $gsdl_cgi->generate_error($global_mess);
1650 }
[24071]1651 else {
[27168]1652 my $mess = "set-live-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
1653 $gsdl_cgi->generate_ok_message($mess);
[24071]1654 }
[27168]1655}
1656
1657sub set_live_metadata_array
1658{
1659 my $self = shift @_;
1660
1661 my $username = $self->{'username'};
1662 my $collect = $self->{'collect'};
1663 my $gsdl_cgi = $self->{'gsdl_cgi'};
1664
1665 if ($baseaction::authentication_enabled) {
[27295]1666 # Ensure the user is allowed to edit this collection
1667 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27168]1668 }
1669
1670 my $site = $self->{'site'};
1671 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
1672
1673 $gsdl_cgi->checked_chdir($collect_dir);
1674
1675 # Make sure the collection isn't locked by someone else
1676 $self->lock_collection($username, $collect);
1677
1678 $self->_set_live_metadata_array(@_);
1679
[24071]1680 # Release the lock once it is done
1681 $self->unlock_collection($username, $collect);
1682}
1683
1684
[20538]1685sub dxml_metadata
1686{
1687 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1688 my $metaname = $parser->{'parameters'}->{'metaname'};
1689 my $metamode = $parser->{'parameters'}->{'metamode'};
[25891]1690
[28160]1691###!!! print STDERR "**** Processing closing </Metadata> tag\n";
[23761]1692
1693 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1694
1695 # Find the right metadata tag and checks if we are going to
1696 # override it
1697 #
1698 # Note: This over writes the first metadata block it
1699 # encountered. If there are multiple Sections in the doc.xml, it
1700 # might not behave as you would expect
[20538]1701
[23761]1702 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1703## print STDERR "**** checking $opt_doc_secnum <=> $curr_secnum\n";
1704## print STDERR "**** metamode = $metamode\n";
1705
[25097]1706 if ((!defined $opt_doc_secnum) || ($opt_doc_secnum eq $curr_secnum))
1707 {
1708 my $name_attr = $attrHash->{'name'};
1709 if (($name_attr eq $metaname) && ($metamode eq "override"))
1710 {
1711 if (!defined $parser->{'parameters'}->{'poscount'})
1712 {
1713 $parser->{'parameters'}->{'poscount'} = 0;
1714 }
1715 else
1716 {
1717 $parser->{'parameters'}->{'poscount'}++;
1718 }
1719
1720 if(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
1721 {
[27111]1722 ##print STDERR "#### got match!!\n";
[25097]1723 # Get the value and override the current value
1724 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1725 $attrHash->{'_content'} = $metavalue;
1726
1727 # Don't want it to wipe out any other pieces of metadata
1728 $parser->{'parameters'}->{'metamode'} = "done";
1729 }
[25891]1730 elsif(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
[25097]1731 {
[25102]1732 my $metavalue = $parser->{'parameters'}->{'metavalue'};
[25097]1733 $attrHash->{'_content'} = $metavalue;
1734 $parser->{'parameters'}->{'metamode'} = "done";
1735 }
1736 }
[20538]1737 }
1738
[21716]1739 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1740 return [$tagname => $attrHash];
[20538]1741}
1742
[27111]1743# This method exists purely for catching invalid section numbers that the client
1744# requested to edit. Once the parser has reached the end (the final </Archive> tag),
1745# we've seen all the Sections in the doc.xml, and none of their section nums matched
1746# if the metamode has not been set to 'done' by then.
1747sub dxml_archive
1748{
1749 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1750 my $metamode = $parser->{'parameters'}->{'metamode'};
1751
1752 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
1753 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'};
1754
1755# print STDERR "@@@ $tagname Processing a closing </Archive> tag [$curr_secnum|$opt_doc_secnum]\n";
1756
1757 if ($metamode ne "done" && $curr_secnum ne $opt_doc_secnum) {
[27112]1758 print STDERR "@@@ $tagname Finished processing FINAL Section.\n";
[20538]1759
[27111]1760 my $metaname = $parser->{'parameters'}->{'metaname'};
1761 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1762
[27112]1763 print STDERR "@@@ Requested section number $opt_doc_secnum not found.\n";
1764 print STDERR "\t(last seen section number in document was $curr_secnum)\n";
1765 print STDERR "\tDiscarded metadata value '$metavalue' for meta '$metaname'\n";
1766 print STDERR "\tin section $opt_doc_secnum.\n";
1767 $parser->{'custom_err_msg'} = "Requested section number $opt_doc_secnum not found.";
[27111]1768 }
1769
1770 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1771 return [$tagname => $attrHash];
1772}
1773
[20538]1774sub dxml_description
1775{
[25558]1776 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1777 my $metamode = $parser->{'parameters'}->{'metamode'};
[20538]1778
[27111]1779 my $curr_secnum = $parser->{'parameters'}->{'curr_section_num'};
[27173]1780 my $opt_doc_secnum = $parser->{'parameters'}->{'secnum'} || "";
[27111]1781
[28160]1782###!!! print STDERR "**** Processing a closing </Description> tag \n";
[27111]1783# print STDERR "@@@ $tagname Processing a closing </Description> tag [$curr_secnum|$opt_doc_secnum]\n";
[25891]1784
[25558]1785 # Accumulate the metadata
[23761]1786
[27111]1787 # We'll be accumulating metadata at this point if we haven't found and therefore
1788 # haven't processed the metadata yet.
1789 # For subsections, this means that if we're at a matching subsection, but haven't
1790 # found the correct metaname to override in that subsection, we accumulate it as new
1791 # meta in the subsection by adding it to the current description.
1792 # If there's no subsection info for the metadata, it will accumulate at the top level
1793 # section description if we hadn't found a matching metaname to override at this point.
1794
1795 # Both curr_secnum and opt_doc_secnum can be "". In the former case, it means we're now
1796 # at the toplevel section. In the latter case, it means we want to process meta in the
1797 # toplevel section. So the eq check between the values below will work in all cases.
1798
1799 # The only time this won't work is if an opt_doc_secnum beyond the section numbers of
1800 # this document has been provided. In that case, the metadata for that opt_doc_secnum
1801 # won't get attached/accumulated to any part of the doc, not even its top-level section.
1802
1803 if ($curr_secnum eq $opt_doc_secnum
1804 && ($metamode eq "accumulate" || $metamode eq "override")) {
[27007]1805 if ($metamode eq "override") {
1806 print "No metadata value to override. Switching 'metamode' to accumulate\n";
1807 }
[20538]1808
[27173]1809 # If we get to here and metamode is override, this means there
[27007]1810 # was no existing value to overide => treat as an append operation
1811
1812 # Tack a new metadata tag on to the end of the <Metadata>+ block
1813 my $metaname = $parser->{'parameters'}->{'metaname'};
1814 my $metavalue = $parser->{'parameters'}->{'metavalue'};
1815
1816 my $metadata_attr = {
1817 '_content' => $metavalue,
1818 'name' => $metaname,
1819 'mode' => "accumulate"
1820 };
1821
1822 my $append_metadata = [ "Metadata" => $metadata_attr ];
1823 my $description_content = $attrHash->{'_content'};
1824
1825 print "Appending metadata to doc.xml\n";
1826
1827 if (ref($description_content)) {
1828 # got some existing interesting nested content
1829 push(@$description_content, " ", $append_metadata ,"\n ");
1830 }
1831 else {
1832 #description_content is most likely a string such as "\n"
1833 $attrHash->{'_content'} = [$description_content, " ", $append_metadata ,"\n" ];
1834 }
1835
1836 $parser->{'parameters'}->{'metamode'} = "done";
1837 }
[25891]1838 else {
[27007]1839 # metamode most likely "done" signifying that it has already found a position to add the metadata to.
1840## print STDERR "**** NOT ACCUMULATE?!? \n";
[25891]1841 }
[20538]1842
[21716]1843 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
1844 return [$tagname => $attrHash];
[20538]1845}
1846
[21715]1847
[23761]1848sub dxml_start_section
1849{
1850 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1851
1852 my $new_depth = scalar(@$contextArray);
1853
[27324]1854# print STDERR "**** START SECTION \n";
[25891]1855
[23761]1856 if ($new_depth == 1) {
1857 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1858 $parser->{'parameters'}->{'curr_section_num'} = "";
1859 }
1860
1861 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1862 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1863
1864 my $new_secnum;
1865
1866 if ($new_depth > $old_depth) {
1867 # child subsection
1868 $new_secnum = "$old_secnum.1";
1869 }
1870 elsif ($new_depth == $old_depth) {
1871 # sibling section => increase it's value by 1
1872 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1873 $tail_num++;
1874 $new_secnum = $old_secnum;
1875 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1876 }
1877 else {
1878 # back up to parent section => lopp off tail
1879 $new_secnum = $old_secnum;
1880 $new_secnum =~ s/\.\d+$//;
1881 }
1882
1883 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1884 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
[25891]1885
1886 1;
[23761]1887}
1888
[20538]1889sub edit_xml_file
1890{
1891 my $self = shift @_;
[23761]1892 my ($gsdl_cgi, $filename, $start_rules, $rules, $options) = @_;
[20538]1893
1894 # use XML::Rules to add it in (read in and out again)
[23761]1895 my $parser = XML::Rules->new(start_rules => $start_rules,
1896 rules => $rules,
1897 style => 'filter',
1898 output_encoding => 'utf8' );
[20538]1899
1900 my $xml_in = "";
1901 if (!open(MIN,"<$filename")) {
1902 $gsdl_cgi->generate_error("Unable to read in $filename: $!");
1903 }
1904 else {
[21715]1905 # Read all the text in
[20538]1906 my $line;
1907 while (defined ($line=<MIN>)) {
1908 $xml_in .= $line;
1909 }
1910 close(MIN);
1911
[23761]1912 my $MOUT;
1913 if (!open($MOUT,">$filename")) {
[20538]1914 $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
1915 }
1916 else {
[23761]1917 # Matched lines will get handled by the call backs
1918## my $xml_out = "";
1919
1920 binmode($MOUT,":utf8");
1921 $parser->filter($xml_in,$MOUT, $options);
1922
1923# binmode(MOUT,":utf8");
1924# print MOUT $xml_out;
1925 close($MOUT);
[20538]1926 }
1927 }
[27112]1928
1929 # copy across any custom error information that was stored during parsing
1930 $self->{'error_msg'} = $parser->{'custom_err_msg'} if(defined $parser->{'custom_err_msg'});
[20538]1931}
1932
1933sub edit_doc_xml
1934{
1935 my $self = shift @_;
[25097]1936 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos, $metamode, $opt_secnum, $prevmetavalue) = @_;
[20538]1937
[27007]1938 my $info_mess = <<RAWEND;
1939****************************
1940 edit_doc_xml()
1941****************************
1942RAWEND
1943
[28159]1944 $info_mess .= " doc_xml_filename = $doc_xml_filename\n" if defined($doc_xml_filename);
1945 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1946 $info_mess .= " metapos = $metapos\n" if defined($metapos);
1947 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
1948 $info_mess .= " metamode = $metamode\n" if defined($metamode);
1949 $info_mess .= " opt_secnum = $opt_secnum\n" if defined($opt_secnum);
1950 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
1951
1952 $info_mess .= "****************************\n";
1953
[27007]1954 $gsdl_cgi->generate_message($info_mess);
[25891]1955
[23761]1956 # To monitor which section/subsection number we are in
1957 my @start_rules =
1958 ( 'Section' => \&dxml_start_section );
1959
[20538]1960 # use XML::Rules to add it in (read in and out again)
[21715]1961 # Set the call back functions
[20538]1962 my @rules =
[21716]1963 ( _default => 'raw',
[23761]1964 'Metadata' => \&dxml_metadata,
[27111]1965 'Description' => \&dxml_description,
1966 'Archive' => \&dxml_archive); # just for catching errors at end
[20538]1967
[21715]1968 # Sets the parameters
[20538]1969 my $options = { 'metaname' => $metaname,
1970 'metapos' => $metapos,
[23400]1971 'metavalue' => $metavalue,
[25097]1972 'metamode' => $metamode,
1973 'prevmetavalue' => $prevmetavalue };
[23400]1974
[23761]1975 if (defined $opt_secnum) {
1976 $options->{'secnum'} = $opt_secnum;
1977 }
1978
1979 $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@start_rules,\@rules,$options);
[20538]1980}
1981
[24071]1982sub set_archives_metadata_entry
1983{
[28159]1984 my $self = shift @_;
1985 my ($gsdl_cgi, $archive_dir, $collect_dir, $collect, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue) = @_;
1986
1987 my $info_mess = <<RAWEND;
[27007]1988****************************
1989 set_archives_metadata_entry()
1990****************************
1991RAWEND
1992
[28159]1993 $info_mess .= " archive_dir = $archive_dir\n" if defined($archive_dir);
1994 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
1995 $info_mess .= " collect = $collect\n" if defined($collect);
1996 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
1997 $info_mess .= " docid = $docid\n" if defined($docid);
1998 $info_mess .= " metaname = $metaname\n" if defined($metaname);
1999 $info_mess .= " metapos = $metapos\n" if defined($metapos);
2000 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
2001 $info_mess .= " metamode = $metamode\n" if defined($metamode);
2002 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2003
2004 $info_mess .= "****************************\n";
2005
[27007]2006 $gsdl_cgi->generate_message($info_mess);
[25891]2007
[24071]2008 # Obtain the doc.xml path for the specified docID
2009 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
[20538]2010
[24071]2011 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2012 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid_root);
2013 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
2014
2015 # The $doc_xml_file is relative to the archives, and now let's get the full path
2016 my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
2017 my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
[27167]2018
[29084]2019 # If we're overriding everything, then $metamode=override combined with $metapos=undefined and $prevmetavalue=undefined
[27167]2020 # in which case, we need to remove all metavalues for the metaname at the given (sub)section
2021 # Thereafter, we will finally be setting the overriding metavalue for this metaname
[29084]2022 if (!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
2023 # remove all values of $metaname metadata
2024 $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_xml_file), $metaname, undef, undef, $docid_secnum, $metamode);
[27167]2025 }
[24071]2026 # Edit the doc.xml file with the specified metadata name, value and position.
2027 # TODO: there is a potential problem here as this edit_doc_xml function
2028 # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
2029 # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
2030
[27168]2031 # dxml_metadata method ignores metapos if metamode anything other than override
[24071]2032 $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
[25097]2033 $metaname,$metavalue,$metapos,$metamode,$docid_secnum,$prevmetavalue);
[27112]2034
2035 # return 0; # return 0 for now to indicate no error
2036 return (defined $self->{'error_msg'}) ? 1 : 0;
[24071]2037}
2038
2039
[20538]2040sub set_archives_metadata
2041{
2042 my $self = shift @_;
2043
2044 my $username = $self->{'username'};
2045 my $collect = $self->{'collect'};
2046 my $gsdl_cgi = $self->{'gsdl_cgi'};
[23400]2047
[20538]2048 if ($baseaction::authentication_enabled) {
[27168]2049 # Ensure the user is allowed to edit this collection
2050 $self->authenticate_user($username, $collect);
[20538]2051 }
2052
2053 # Make sure the collection isn't locked by someone else
2054 $self->lock_collection($username, $collect);
2055
[27157]2056 $self->_set_archives_metadata(@_);
[23400]2057
[24071]2058 # Release the lock once it is done
2059 $self->unlock_collection($username, $collect);
2060}
2061
[27168]2062sub _set_archives_metadata_array
2063{
2064 my $self = shift @_;
2065
2066 my $collect = $self->{'collect'};
2067 my $gsdl_cgi = $self->{'gsdl_cgi'};
2068 my $site = $self->{'site'};
2069 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[24071]2070
[27168]2071 # look up additional args
2072
2073 my $infodbtype = $self->{'infodbtype'};
2074
2075 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2076
2077 my $json_str = $self->{'json'};
2078 my $doc_array = decode_json $json_str;
2079
2080
2081 my $global_status = 0;
2082 my $global_mess = "";
2083
2084 my @all_docids = ();
2085
2086 foreach my $doc_array_rec ( @$doc_array ) {
2087 my $status = -1;
2088 my $docid = $doc_array_rec->{'docid'};
2089
2090 push(@all_docids,$docid);
2091
2092 my $metaname = $doc_array_rec->{'metaname'};
2093 if(defined $metaname) {
2094
[27176]2095 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2096
[27168]2097 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2098 my $metavalue = $doc_array_rec->{'metavalue'};
2099 my $prevmetavalue = $self->{'prevmetavalue'}; # to make this sub behave as _set_archives_metadata
2100
2101
2102 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2103 # make "accumulate" the default (less destructive, as it won't actually
2104 # delete any existing values)
2105 $metamode = "accumulate";
2106 }
2107
2108 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect, $infodbtype,$docid,
2109 $metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2110 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2111 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2112
2113 foreach my $metatable_rec ( @$metatable ) {
2114 $metaname = $metatable_rec->{'metaname'};
2115 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
2116 my $metapos = undef;
2117 my $prevmetavalue = undef;
2118 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2119
2120 foreach my $metavalue ( @$metavals ) {
2121 $status = $self->set_archives_metadata_entry($gsdl_cgi,$archive_dir, $collect_dir,$collect,$infodbtype,
2122 $docid,$metaname,$metapos,$metavalue,$metamode,$prevmetavalue);
2123
2124 if($metamode eq "override") { # now, having overridden the metavalue for the first,
2125 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2126 # metavalue for this metaname will be lost
2127 $metamode = "accumulate";
2128 }
2129 }
2130 }
2131 }
2132
2133 if ($status != 0) {
2134 # Catch error if set infodb entry failed
2135 $global_status = $status;
2136 $global_mess .= "Failed to set metadata key: $docid\n";
2137 $global_mess .= "Exit status: $status\n";
2138 $global_mess .= "System Error Message: $!\n";
2139 $global_mess .= "-" x 20 . "\n";
2140 }
2141 }
2142
2143 if ($global_status != 0) {
2144 $global_mess .= "PATH: $ENV{'PATH'}\n";
2145 $gsdl_cgi->generate_error($global_mess);
2146 }
2147 else {
2148 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2149 $gsdl_cgi->generate_ok_message($mess);
2150 }
2151}
2152
[24071]2153sub set_archives_metadata_array
2154{
2155 my $self = shift @_;
2156
2157 my $username = $self->{'username'};
2158 my $collect = $self->{'collect'};
2159 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27168]2160# my $gsdlhome = $self->{'gsdlhome'};
[24071]2161
2162 if ($baseaction::authentication_enabled) {
[27295]2163 # Ensure the user is allowed to edit this collection
2164 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[23400]2165 }
[23761]2166
[24071]2167 my $site = $self->{'site'};
2168 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2169
2170 $gsdl_cgi->checked_chdir($collect_dir);
2171
2172 # Obtain the collect dir
2173 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2174
2175 # Make sure the collection isn't locked by someone else
2176 $self->lock_collection($username, $collect);
2177
[27168]2178 $self->_set_archives_metadata_array(@_);
[20538]2179
[23761]2180 # Release the lock once it is done
2181 $self->unlock_collection($username, $collect);
[20538]2182}
2183
[27157]2184sub _remove_archives_metadata
[24943]2185{
2186 my $self = shift @_;
[20538]2187
[24943]2188 my $collect = $self->{'collect'};
2189 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27157]2190# my $gsdlhome = $self->{'gsdlhome'};
[24943]2191 my $infodbtype = $self->{'infodbtype'};
2192
2193 my $site = $self->{'site'};
2194
2195 # Obtain the collect and archive dir
2196 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2197
2198 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2199
2200 # look up additional args
2201 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
2202
2203 my $metaname = $self->{'metaname'};
2204 my $metapos = $self->{'metapos'};
[27168]2205
[27160]2206 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
2207
2208 # if the user hasn't told us what to delete, not having given a metavalue or metapos,
2209 # default to deleting the first metavalue for the given metaname
2210 # Beware that if both metapos AND metavalue are defined, both matches (if any)
2211 # seem to get deleted in one single remove_archives_meta action invocation.
2212 # Similarly, if 2 identical metavalues for a metaname exist and that metavalue is being
2213 # deleted, both get deleted.
2214 if(!defined $metapos && !defined $metavalue) {
2215 $metapos = 0;
2216 }
[27167]2217
[27168]2218 my $metamode = $self->{'metamode'} || undef;
2219
[24943]2220 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2221 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2222
2223 # This now stores the full pathname
[25097]2224 my $doc_filename = $doc_rec->{'doc-file'}->[0];
[24943]2225
[27167]2226 my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $metavalue, $docid_secnum, $metamode);
[27160]2227# my $status = $self->remove_from_doc_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, undef, $docid_secnum);
[24943]2228
2229 if ($status == 0)
2230 {
[29086]2231 my $mess = "\nremove-archives-metadata successful: \nKey[$docid]\n";
[24943]2232 $mess .= " $metaname";
2233 $mess .= "->[$metapos]" if (defined $metapos);
[29086]2234 $mess .= " ($metavalue)" if (defined $metavalue);
[24943]2235 $gsdl_cgi->generate_ok_message($mess);
2236 }
2237 else
2238 {
2239 my $mess .= "Failed to remove archives metadata key: $docid\n";
2240 $mess .= "Exit status: $status\n";
2241 $mess .= "System Error Message: $!\n";
2242 $mess .= "-" x 20 . "\n";
2243
2244 $gsdl_cgi->generate_error($mess);
2245 }
[27157]2246
2247 #return $status; # in case calling functions have a use for this
[24943]2248}
2249
[27157]2250sub remove_archives_metadata
2251{
2252 my $self = shift @_;
2253
2254 my $username = $self->{'username'};
2255 my $collect = $self->{'collect'};
2256 my $gsdl_cgi = $self->{'gsdl_cgi'};
2257
2258 if ($baseaction::authentication_enabled)
2259 {
[27295]2260 # Ensure the user is allowed to edit this collection
2261 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]2262 }
2263
2264 # Make sure the collection isn't locked by someone else
2265 $self->lock_collection($username, $collect);
2266
2267 $self->_remove_archives_metadata(@_);
2268
2269 # Release the lock once it is done
2270 $self->unlock_collection($username, $collect);
2271}
2272
[24943]2273sub remove_from_doc_xml
2274{
2275 my $self = shift @_;
[27167]2276 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $metavalue, $secid, $metamode) = @_;
[24943]2277
2278 my @start_rules = ('Section' => \&dxml_start_section);
2279
2280 # Set the call-back functions for the metadata tags
2281 my @rules =
2282 (
2283 _default => 'raw',
2284 'Metadata' => \&rfdxml_metadata
2285 );
2286
2287 my $parser = XML::Rules->new
2288 (
2289 start_rules => \@start_rules,
2290 rules => \@rules,
2291 style => 'filter',
[27173]2292 output_encoding => 'utf8',
2293# normalisespaces => 1, # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
2294 stripspaces => 2|0|0 # ineffectual
[24943]2295 );
2296
2297 my $status = 0;
2298 my $xml_in = "";
2299 if (!open(MIN,"<$doc_xml_filename"))
2300 {
2301 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
2302 $status = 1;
2303 }
2304 else
2305 {
2306 # Read them in
2307 my $line;
2308 while (defined ($line=<MIN>)) {
2309 $xml_in .= $line;
2310 }
2311 close(MIN);
2312
2313 # Filter with the call-back functions
2314 my $xml_out = "";
2315
2316 my $MOUT;
2317 if (!open($MOUT,">$doc_xml_filename")) {
2318 $gsdl_cgi->generate_error("Unable to write out to $doc_xml_filename: $!");
2319 $status = 1;
2320 }
2321 else {
2322 binmode($MOUT,":utf8");
[27167]2323 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, secid => $secid, metamode => $metamode});
[24943]2324 close($MOUT);
2325 }
2326 }
2327 return $status;
2328}
2329
2330sub rfdxml_metadata
2331{
2332 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2333
[27173]2334 # For comparisons, toplevel section is indicated by ""
2335 my $curr_sec_num = $parser->{'parameters'}->{'curr_section_num'} || "";
2336 my $secid = $parser->{'parameters'}->{'secid'} || "";
2337
2338 if (!($secid eq $curr_sec_num))
[24943]2339 {
2340 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2341 return [$tagname => $attrHash];
2342 }
2343
2344 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
2345 {
2346 if (!defined $parser->{'parameters'}->{'poscount'})
2347 {
2348 $parser->{'parameters'}->{'poscount'} = 0;
2349 }
2350 else
2351 {
2352 $parser->{'parameters'}->{'poscount'}++;
2353 }
[27168]2354
[27176]2355 # if overriding (for set-meta) but no metapos, then clear all the meta for this metaname
[29086]2356 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) &&(!defined $parser->{'parameters'}->{'metavalue'})) {
[27167]2357 return [];
2358 }
[27168]2359
[29098]2360 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
2361 {
2362 return [];
2363 }
2364
2365 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($parser->{'parameters'}->{'metavalue'} eq $attrHash->{'_content'}))
2366 {
2367 return [];
2368 }
[24943]2369 }
[24949]2370
[24943]2371 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2372 return [$tagname => $attrHash];
2373}
2374
[19293]2375sub mxml_metadata
2376{
2377 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2378 my $metaname = $parser->{'parameters'}->{'metaname'};
2379 my $metamode = $parser->{'parameters'}->{'metamode'};
2380
[21716]2381 # Report error if we don't see FileName tag before this
[27173]2382 die "Fatal Error: Unexpected metadata.xml structure. Undefined current_file, possibly encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
[21716]2383
2384 # Don't do anything if we are not in the right FileSet
2385 my $file_regexp = $parser->{'parameters'}->{'current_file'};
[23761]2386 if ($file_regexp =~ /\.\*/) {
2387 # Only interested in a file_regexp if it specifies precisely one
2388 # file.
2389 # So, skip anything with a .* in it as it is too general
[27176]2390## print STDERR "@@@@ Skipping entry in metadata.xml where FileName=.* as it is too general\n";
[23761]2391 return [$tagname => $attrHash];
2392 }
2393 my $src_file = $parser->{'parameters'}->{'src_file'};
2394 if (!($src_file =~ /$file_regexp/)) {
2395 return [$tagname => $attrHash];
2396 }
2397## print STDERR "*** mxl metamode = $metamode\n";
2398
[21715]2399 # Find the right metadata tag and checks if we are going to override it
[19293]2400 my $name_attr = $attrHash->{'name'};
2401 if (($name_attr eq $metaname) && ($metamode eq "override")) {
[27176]2402
2403 # now metadata.xml functions need to keep track of metapos
2404 if (!defined $parser->{'parameters'}->{'poscount'})
2405 {
2406 $parser->{'parameters'}->{'poscount'} = 0;
2407 }
2408 else
2409 {
2410 $parser->{'parameters'}->{'poscount'}++;
2411 }
2412
2413 # If either the metapos or prevmetavalue is set,
2414 # get the value and override the current value
[19293]2415 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2416
[27176]2417 if(defined $parser->{'parameters'}->{'prevmetavalue'} && $parser->{'parameters'}->{'prevmetavalue'} eq $attrHash->{'_content'})
2418 {
2419 $attrHash->{'_content'} = $metavalue;
[23761]2420
[27176]2421 ## print STDERR "**** overriding metadata.xml\n";
2422
2423 # Don't want it to wipe out any other pieces of metadata
2424 $parser->{'parameters'}->{'metamode'} = "done";
2425 }
2426 elsif(defined $parser->{'parameters'}->{'metapos'} && $parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'})
2427 {
2428 $attrHash->{'_content'} = $metavalue;
2429 $parser->{'parameters'}->{'metamode'} = "done";
2430 }
2431 }
[19293]2432
[27176]2433 # mxml_description will process the metadata if metadata is accumulate,
2434 # or if we haven't found the metadata to override
2435
[21716]2436 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2437 return [$tagname => $attrHash];
[19293]2438}
2439
2440
2441sub mxml_description
2442{
2443 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
[21716]2444 my $metamode = $parser->{'parameters'}->{'metamode'};
[19293]2445
[21716]2446 # Failed... Report error if we don't see FileName tag before this
[25097]2447 die "Fatal Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
[21716]2448
2449 # Don't do anything if we are not in the right FileSet
2450 my $file_regexp = $parser->{'parameters'}->{'current_file'};
[24943]2451 if ($file_regexp =~ m/\.\*/) {
[23761]2452 # Only interested in a file_regexp if it specifies precisely one
2453 # file.
2454 # So, skip anything with a .* in it as it is too general
2455 return [$tagname => $attrHash];
2456 }
2457 my $src_file = $parser->{'parameters'}->{'src_file'};
[24943]2458
2459 if (!($src_file =~ m/$file_regexp/)) {
[23761]2460 return [$tagname => $attrHash];
2461 }
[21716]2462
[21715]2463 # Accumulate the metadata block to the end of the description block
2464 # Note: This adds metadata block to all description blocks, so if there are
2465 # multiple FileSets, it will add to all of them
[23761]2466 if (($metamode eq "accumulate") || ($metamode eq "override")) {
[27176]2467
[23761]2468 # if metamode was "override" but get to here then it failed to
2469 # find an item to override, in which case it should append its
2470 # value to the end, just like the "accumulate" mode
2471
[27176]2472 if ($metamode eq "override") {
2473 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2474 }
2475
[19293]2476 # tack a new metadata tag on to the end of the <Metadata>+ block
2477 my $metaname = $parser->{'parameters'}->{'metaname'};
2478 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2479
2480 my $metadata_attr = { '_content' => $metavalue,
2481 'name' => $metaname,
2482 'mode' => "accumulate" };
2483
2484 my $append_metadata = [ "Metadata" => $metadata_attr ];
2485 my $description_content = $attrHash->{'_content'};
[24943]2486
[23761]2487## print STDERR "*** appending to metadata.xml\n";
2488
2489 # append the new metadata element to the end of the current
2490 # content contained inside this tag
[24943]2491 if (ref($description_content) eq "") {
2492 # => string or numeric literal
2493 # this is caused by a <Description> block has no <Metadata> child elements
2494 # => set up an empty array in '_content'
2495 $attrHash->{'_content'} = [ "\n" ];
2496 $description_content = $attrHash->{'_content'};
2497 }
2498
[19293]2499 push(@$description_content," ", $append_metadata ,"\n ");
[23761]2500 $parser->{'parameters'}->{'metamode'} = "done";
[19293]2501 }
2502
[21716]2503 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2504 return [$tagname => $attrHash];
[19293]2505}
2506
[21715]2507
[21716]2508sub mxml_filename
2509{
2510 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2511
2512 # Store the filename of the Current Fileset
2513 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2514 # FileName tag must come before Description tag
2515 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
2516
2517 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2518 return [$tagname => $attrHash];
2519}
2520
2521
2522sub mxml_fileset
2523{
2524 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
2525
2526 # Initilise the current_file
2527 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
2528 # FileName tag must come before Description tag
2529 $parser->{'parameters'}->{'current_file'} = "";
2530
2531 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2532 return [$tagname => $attrHash];
2533}
2534
[27173]2535sub mxml_directorymetadata
2536{
2537 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
[21716]2538
[27173]2539 # if we haven't processed the metadata when we reach the end of metadata.xml
2540 # it's because there's no particular FileSet element whose FileName matched
2541 # In which case, add a new FileSet for this FileName
2542 my $metamode = $parser->{'parameters'}->{'metamode'};
2543 if($metamode ne "done") {
2544
2545 if ($metamode eq "override") {
2546 print "No metadata value to override. Switching 'metamode' to accumulate\n";
2547 }
2548
2549 # If we get to here and metamode is override, this means there
2550 # was no existing value to overide => treat as an append operation
2551
2552 # Create a new FileSet element and append to DirectoryMetadata
2553 # <FileSet>
2554 # <FileName>src_file</FileName>
2555 # <Description>
2556 # <Metadata mode="" name="">metavalue</Metadata>
2557 # </Description>
2558 # </FileSet>
2559 my $src_file = $parser->{'parameters'}->{'src_file'};
2560 my $metaname = $parser->{'parameters'}->{'metaname'};
2561 my $metavalue = $parser->{'parameters'}->{'metavalue'};
2562 my $metadata_attr = {
2563 '_content' => $metavalue,
2564 'name' => $metaname,
2565 'mode' => "accumulate"
2566 };
2567 my $append_metadata = [ "Metadata" => $metadata_attr ];
[28159]2568 my $description_attr = { '_content' => [ "\n\t\t ", $append_metadata, "\n\t\t"] };
[27173]2569 my $description_element = [ "Description" => $description_attr ];
2570
2571 #_content is not an attribute, it's special and holds the children of this element
2572 # including the textnode value embedded in this element if any.
2573 my $filename_attr = {'_content' => $src_file};
2574 my $filename_element = [ "FileName" => $filename_attr ];
2575
2576 my $fileset_attr = {};
2577 $fileset_attr->{'_content'} = [ "\n\t\t", $filename_element,"\n\t\t",$description_element ,"\n\t" ];
2578 my $fileset = [ "FileSet" => $fileset_attr ]; #my $fileset = [ "FileSet" => {} ];
2579
2580
2581 # get children of dirmeta, and push the new FileSet element onto it
2582 print "Appending metadata to metadata.xml\n";
2583 my $dirmeta_content = $attrHash->{'_content'};
2584 if (ref($dirmeta_content)) {
2585 # got some existing interesting nested content
2586 #push(@$dirmeta_content, " ", $fileset ,"\n ");
2587 push(@$dirmeta_content, "\t", $fileset ,"\n");
2588 }
2589 else {
2590 #description_content is most likely a string such as "\n"
2591 #$attrHash->{'_content'} = [$dirmeta_content, " ", $fileset ,"\n" ];
2592 $attrHash->{'_content'} = [$dirmeta_content, "\t", $fileset ,"\n" ];
2593 }
2594
2595 $parser->{'parameters'}->{'metamode'} = "done";
2596 }
2597 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
2598 return [$tagname => $attrHash];
2599}
2600
2601
[19293]2602sub edit_metadata_xml
2603{
2604 my $self = shift @_;
[27176]2605 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $metamode, $src_file, $prevmetavalue) = @_;
[19293]2606
[21715]2607 # Set the call-back functions for the metadata tags
[19293]2608 my @rules =
[21716]2609 ( _default => 'raw',
2610 'FileName' => \&mxml_filename,
[19293]2611 'Metadata' => \&mxml_metadata,
[21716]2612 'Description' => \&mxml_description,
[27173]2613 'FileSet' => \&mxml_fileset,
2614 'DirectoryMetadata' => \&mxml_directorymetadata);
[19293]2615
[21715]2616 # use XML::Rules to add it in (read in and out again)
[19293]2617 my $parser = XML::Rules->new(rules => \@rules,
[21716]2618 style => 'filter',
[27173]2619 output_encoding => 'utf8',
2620 stripspaces => 2|0|0); # http://search.cpan.org/~jenda/XML-Rules-1.16/lib/XML/Rules.pm
[19293]2621
[24943]2622 if (!-e $metadata_xml_filename) {
2623
2624 if (open(MOUT,">$metadata_xml_filename")) {
2625
2626 my $src_file_re = &util::filename_to_regex($src_file);
2627 # shouldn't the following also be in the above utility routine??
2628 # $src_file_re =~ s/\./\\./g;
2629
2630 print MOUT "<?xml version=\"1.0\"?>\n";
2631 print MOUT "<DirectoryMetadata>\n";
2632 print MOUT " <FileSet>\n";
2633 print MOUT " <FileName>$src_file_re</FileName>\n";
2634 print MOUT " <Description>\n";
2635 print MOUT " </Description>\n";
2636 print MOUT " </FileSet>\n";
2637 print MOUT "</DirectoryMetadata>\n";
[23761]2638
[24943]2639 close(MOUT);
2640 }
2641 else {
2642 $gsdl_cgi->generate_error("Unable to create $metadata_xml_filename: $!");
2643 }
[19293]2644 }
[24943]2645
2646
2647 my $xml_in = "";
2648 if (!open(MIN,"<$metadata_xml_filename")) {
2649 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
2650 }
[19293]2651 else {
[24943]2652 # Read them in
2653 my $line;
2654 while (defined ($line=<MIN>)) {
2655 $xml_in .= $line;
2656 }
2657 close(MIN);
[23761]2658
[24943]2659 # Filter with the call-back functions
2660 my $xml_out = "";
[23761]2661
[24943]2662 my $MOUT;
2663 if (!open($MOUT,">$metadata_xml_filename")) {
2664 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
2665 }
2666 else {
2667 binmode($MOUT,":utf8");
2668
2669 # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
2670 # At the moment, I will just hack it!
2671 #my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
2672 #$header_with_utf8_dtd .= "<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
2673 #$xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
2674 #print MOUT $xml_out;
2675
2676 $parser->filter($xml_in, $MOUT, { metaname => $metaname,
[27176]2677 metapos => $metapos,
[24943]2678 metavalue => $metavalue,
2679 metamode => $metamode,
2680 src_file => $src_file,
[27176]2681 prevmetavalue => $prevmetavalue,
[24943]2682 current_file => undef} );
2683 close($MOUT);
2684 }
2685 }
[20538]2686}
[19293]2687
2688
2689sub set_import_metadata
2690{
2691 my $self = shift @_;
[21715]2692
[19293]2693 my $username = $self->{'username'};
2694 my $collect = $self->{'collect'};
2695 my $gsdl_cgi = $self->{'gsdl_cgi'};
[23400]2696
[19293]2697 if ($baseaction::authentication_enabled) {
[27295]2698 # Ensure the user is allowed to edit this collection
2699 $self->authenticate_user($username, $collect);
[19293]2700 }
2701
2702 # Make sure the collection isn't locked by someone else
2703 $self->lock_collection($username, $collect);
[27157]2704
2705 $self->_set_import_metadata(@_);
[19293]2706
[21715]2707 # Release the lock once it is done
2708 $self->unlock_collection($username, $collect);
[23761]2709
[19293]2710}
2711
[25097]2712sub set_import_metadata_array
2713{
[27168]2714 my $self = shift @_;
[25097]2715
2716 my $username = $self->{'username'};
2717 my $collect = $self->{'collect'};
2718 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27168]2719# my $gsdlhome = $self->{'gsdlhome'};
[25097]2720
2721 if ($baseaction::authentication_enabled) {
[27295]2722 # Ensure the user is allowed to edit this collection
2723 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[25097]2724 }
2725
[27168]2726 my $site = $self->{'site'};
2727 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2728
[25097]2729 $gsdl_cgi->checked_chdir($collect_dir);
2730
2731 # Make sure the collection isn't locked by someone else
2732 $self->lock_collection($username, $collect);
2733
[27168]2734 $self->_set_import_metadata_array(@_);
[25097]2735
[27168]2736 # Release the lock once it is done
2737 $self->unlock_collection($username, $collect);
2738
2739}
2740
2741
2742sub _set_import_metadata_array
2743{
2744 my $self = shift @_;
2745
2746 my $collect = $self->{'collect'};
2747 my $gsdl_cgi = $self->{'gsdl_cgi'};
2748
2749 my $site = $self->{'site'};
2750 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[25097]2751
[27168]2752 # look up additional args
[25097]2753
[27168]2754 my $infodbtype = $self->{'infodbtype'};
2755
2756 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
2757 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2758
2759 my $json_str = $self->{'json'};
2760 my $doc_array = decode_json $json_str;
2761
2762 my $global_status = 0;
2763 my $global_mess = "";
2764
2765 my @all_docids = ();
2766
2767 foreach my $doc_array_rec ( @$doc_array )
2768 {
2769 my $status = -1;
2770 my $docid = $doc_array_rec->{'docid'};
[25097]2771
[27176]2772 my ($docid_root,$docid_secnum);
2773 if(defined $docid) {
2774 ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
2775 # as yet no support for setting subsection metadata in metadata.xml
2776 if ((defined $docid_secnum) && ($docid_secnum !~ m/^\s*$/)) {
2777 $gsdl_cgi->generate_message("*** docid: $docid. No support yet for setting import metadata at subsections level.\n");
2778 next; # skip this docid in for loop
2779 }
2780 }
2781
2782 push(@all_docids,$docid); # docid_root rather
[25097]2783
[27168]2784 my $metaname = $doc_array_rec->{'metaname'};
2785 if (defined $metaname) {
2786 my $metamode = $doc_array_rec->{'metamode'} || $self->{'metamode'};
2787 my $metavalue = $doc_array_rec->{'metavalue'};
[27176]2788 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2789
[27168]2790 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
2791 # make "accumulate" the default (less destructive, as won't actually
2792 # delete any existing values)
2793 $metamode = "accumulate";
2794 }
[27176]2795
2796 # adding metapos and prevmetavalue support to import_metadata subroutines
2797 my $metapos = $doc_array_rec->{'metapos'}; # don't force undef to 0. Undef has meaning when metamode=override
2798 my $prevmetavalue = $self->{'prevmetavalue'};
2799
2800 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
[27168]2801
2802 } elsif (defined $doc_array_rec->{'metatable'}) { # if no metaname, we expect a metatable
2803 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
2804
2805 foreach my $metatable_rec ( @$metatable ) {
2806 $metaname = $metatable_rec->{'metaname'};
2807 my $metamode = $metatable_rec->{'metamode'} || $doc_array_rec->{'metamode'} || $self->{'metamode'};
[25097]2808 if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
[27168]2809 # make "accumulate" the default (less destructive, as won't actually
2810 # delete any existing values)
2811 $metamode = "accumulate";
2812 }
[27176]2813
2814 # No support for metapos and prevmetavalue in the JSON metatable substructure
2815 my $metapos = undef;
2816 my $prevmetavalue = undef;
[27168]2817 my $metavals = $metatable_rec->{'metavals'}; # a sub-subarray
2818
2819 foreach my $metavalue ( @$metavals ) {
[27176]2820 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2821
2822 $self->set_import_metadata_entry($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid_root, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir); # at this point, docid_root = docid
[27168]2823 if($metamode eq "override") { # now, having overridden the first metavalue of the metaname,
2824 # need to accumulate subsequent metavals for this metaname, else the just-assigned
2825 # metavalue for this metaname will be lost
[25097]2826 $metamode = "accumulate";
[27168]2827 }
2828 }
2829 }
2830 }
2831 }
[25097]2832
[27168]2833 # always a success message
2834 my $mess = "set-archives-metadata-array successful: Keys[ ".join(", ",@all_docids)."]\n";
2835 $gsdl_cgi->generate_ok_message($mess);
2836}
[25097]2837
[27168]2838# always returns true (1)
2839sub set_import_metadata_entry
2840{
2841 my $self = shift @_;
[27176]2842 my ($gsdl_cgi, $arcinfo_doc_filename, $infodbtype, $docid, $metaname, $metapos, $metavalue, $metamode, $prevmetavalue, $collect, $collect_dir) = @_;
[25097]2843
[27176]2844 my $info_mess = <<RAWEND;
2845****************************
2846 set_import_metadata_entry()
2847****************************
2848RAWEND
2849
[28159]2850 $info_mess .= " collect_dir = $collect_dir\n" if defined($collect_dir);
2851 $info_mess .= " collect = $collect\n" if defined($collect);
2852 $info_mess .= " infodbtype = $infodbtype\n" if defined($infodbtype);
2853 $info_mess .= " arcinfo_doc_filename = $arcinfo_doc_filename\n" if defined($arcinfo_doc_filename);
2854 $info_mess .= " docid = $docid\n" if defined($docid);
2855 $info_mess .= " metaname = $metaname\n" if defined($metaname);
2856 $info_mess .= " metapos = $metapos\n" if defined($metapos);
2857 $info_mess .= " metavalue = $metavalue\n" if defined($metavalue);
2858 $info_mess .= " metamode = $metamode\n" if defined($metamode);
2859 $info_mess .= " prevmetaval = $prevmetavalue\n" if defined($prevmetavalue);
2860
2861 $info_mess .= "****************************\n";
2862
[27176]2863 $gsdl_cgi->generate_message($info_mess);
2864
2865 # import works with metadata.xml which can have inherited metadata
2866 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2867 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2868 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2869 # what they're doing if they provide a metapos.
2870 if(defined $metapos) {
2871 print STDERR "@@@@ WARNING: metapos defined.\n";
2872 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2873 }
2874
[27168]2875 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2876 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2877 my $metadata_xml_file;
2878 my $import_filename = undef;
2879
[27176]2880 if (defined $docid) {
2881 # my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2882 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[25097]2883
[27176]2884 # This now stores the full pathname
2885 $import_filename = $doc_rec->{'src-file'}->[0];
[28211]2886 $import_filename = &util::placeholders_to_abspath($import_filename);
2887
[27176]2888 } else { # only for set_import_meta, not the case when calling method is set_import_metadata_array
2889 # as the array version of the method doesn't support the -f parameter yet
2890 my $import_file = $self->{'f'};
2891 $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
2892 }
[25097]2893
[27168]2894 # figure out correct metadata.xml file [?]
2895 # Assuming the metadata.xml file is next to the source file
2896 # Note: This will not work if it is using the inherited metadata from the parent folder
2897 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2898 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2899
[29098]2900 # If we're overriding everything, then $prevmetavalue=undefined and
2901 # $metamode=override combined with $metapos=undefined
2902 # in which case we need to remove all metavalues for the metaname at the given (sub)section
[27176]2903 # Thereafter, we will finally be able to set the overriding metavalue for this metaname
[29098]2904 if(!defined $prevmetavalue && !defined $metapos && $metamode eq "override") {
[27176]2905## print STDERR "@@@ REMOVING all import metadata for $metaname\n";
2906 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, undef, $import_tailname, $metamode); # we're removing all values, so metavalue=undef
2907
2908 }
2909
2910 # Edit the metadata.xml
2911 # Modified by Jeffrey from DL Consulting
2912 # Handle the case where there is one metadata.xml file for multiple FileSets
2913 # The XML filter needs to know whether it is in the right FileSet
2914 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
2915 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
2916 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
2917 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
2918 #return 0;
2919 return $metadata_xml_filename;
[25097]2920}
2921
[27157]2922sub _remove_import_metadata
[24943]2923{
2924 my $self = shift @_;
[27157]2925
[24943]2926 my $collect = $self->{'collect'};
2927 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27168]2928# my $gsdlhome = $self->{'gsdlhome'};
[24943]2929 my $infodbtype = $self->{'infodbtype'};
2930
2931 # Obtain the collect dir
2932 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2933 my $site = $self->{'site'};
2934 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
2935
2936 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
2937 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
[27157]2938
[24943]2939 # look up additional args
2940 my $docid = $self->{'d'};
2941 if ((!defined $docid) || ($docid =~ m/^\s*$/))
2942 {
[27173]2943 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
[24943]2944 }
2945
2946 my $metaname = $self->{'metaname'};
[27176]2947 my $metapos = $self->{'metapos'};
[24943]2948 my $metavalue = $self->{'metavalue'};
[27176]2949 if(defined $metavalue) {
2950 $metavalue =~ s/&lt;(.*?)&gt;/<$1>/g;
2951 } elsif (!defined $metapos) { # if given no metavalue or metapos to delete, default to deleting the 1st
2952 $metapos = 0;
2953 }
2954 my $metamode = $self->{'metamode'} || undef;
2955
2956 # import works with metadata.xml which can have inherited metadata
2957 # so setting or removing at a metapos can have unintended effects for a COMPLEX collection
2958 # (a collection that has or can have inherited metadata). Metapos has expected behaviour for
2959 # a SIMPLE collection, which is one that doesn't have inherited metadata. Assume caller knows
2960 # what they're doing if they provide a metapos.
2961 if(defined $metapos) {
2962 print STDERR "@@@@ WARNING: metapos defined.\n";
2963 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to modify the import meta at $metapos.\n";
2964 }
[24943]2965
2966 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
2967 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
2968 my $metadata_xml_file;
2969 my $import_filename = undef;
2970 if (defined $docid)
2971 {
2972 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
2973 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
2974
2975 # This now stores the full pathname
2976 $import_filename = $doc_rec->{'src-file'}->[0];
[28211]2977 $import_filename = &util::placeholders_to_abspath($import_filename);
[24943]2978 }
2979
2980 if((!defined $import_filename) || ($import_filename =~ m/^\s*$/))
2981 {
[27173]2982 $gsdl_cgi->generate_error("There is no metadata\n");
[24943]2983 }
2984
2985 # figure out correct metadata.xml file [?]
2986 # Assuming the metadata.xml file is next to the source file
2987 # Note: This will not work if it is using the inherited metadata from the parent folder
2988 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
2989 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
2990
[27176]2991 $self->remove_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $import_tailname, $metamode); # metamode has no meaning for removing meta, but is used by set_meta when overriding All
[24943]2992
2993 my $mess = "remove-import-metadata successful: Key[$docid] -> $metadata_xml_filename\n";
2994 $mess .= " $metaname";
2995 $mess .= " = $metavalue\n";
2996
2997 $gsdl_cgi->generate_ok_message($mess);
[27157]2998
2999 #return $status; # in case calling functions have a use for this
[24943]3000}
3001
[27157]3002sub remove_import_metadata
3003{
3004 my $self = shift @_;
3005
3006 my $username = $self->{'username'};
3007 my $collect = $self->{'collect'};
3008 my $gsdl_cgi = $self->{'gsdl_cgi'};
3009
3010 if ($baseaction::authentication_enabled) {
[27295]3011 # Ensure the user is allowed to edit this collection
3012 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]3013 }
3014
3015 # Make sure the collection isn't locked by someone else
3016 $self->lock_collection($username, $collect);
3017
3018 $self->_remove_import_metadata(@_);
3019
3020 # Release the lock once it is done
3021 $self->unlock_collection($username, $collect);
3022
3023}
3024
[24943]3025sub remove_from_metadata_xml
3026{
3027 my $self = shift @_;
[27176]3028 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $metavalue, $src_file, $metamode) = @_;
3029 # metamode generally has no meaning for removing meta, but is used by set_meta
3030 # when overriding all metavals for a metaname, in which case remove_meta is called with metamode
3031
[24943]3032 # Set the call-back functions for the metadata tags
3033 my @rules =
3034 (
3035 _default => 'raw',
3036 'Metadata' => \&rfmxml_metadata,
3037 'FileName' => \&mxml_filename
3038 );
3039
3040 my $parser = XML::Rules->new
3041 (
3042 rules => \@rules,
3043 style => 'filter',
[27173]3044 output_encoding => 'utf8',
3045 #normalisespaces => 1,
3046 stripspaces => 2|0|0 # ineffectual
[24943]3047 );
3048
3049 my $xml_in = "";
3050 if (!open(MIN,"<$metadata_xml_filename"))
3051 {
3052 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
3053 }
3054 else
3055 {
3056 # Read them in
3057 my $line;
3058 while (defined ($line=<MIN>)) {
3059 $xml_in .= $line;
3060 }
3061 close(MIN);
3062
3063 # Filter with the call-back functions
3064 my $xml_out = "";
3065
3066 my $MOUT;
3067 if (!open($MOUT,">$metadata_xml_filename")) {
3068 $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
3069 }
3070 else {
3071 binmode($MOUT,":utf8");
[27176]3072 $parser->filter($xml_in, $MOUT, {metaname => $metaname, metapos => $metapos, metavalue => $metavalue, src_file => $src_file, metamode => $metamode, current_file => undef});
[24943]3073 close($MOUT);
3074 }
3075 }
3076}
3077
3078sub rfmxml_metadata
3079{
3080 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
3081
[27176]3082 # metadata.xml does not handle subsections
3083
3084 # since metadata.xml now has to deal with metapos, we keep track of the metadata position
3085 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'})
3086 && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
[24943]3087 {
[27176]3088 if (!defined $parser->{'parameters'}->{'poscount'})
3089 {
3090 $parser->{'parameters'}->{'poscount'} = 0;
3091 }
3092 else
3093 {
3094 $parser->{'parameters'}->{'poscount'}++;
3095 }
3096
3097 # if overriding but no metapos, then clear all the meta for this metaname
[29098]3098 if ((defined $parser->{'parameters'}->{'metamode'}) && ($parser->{'parameters'}->{'metamode'} eq "override") && (!defined $parser->{'parameters'}->{'metapos'}) && (!defined $parser->{'parameters'}->{'metavalue'})) {
[27176]3099 return [];
3100 }
3101
3102 if ((defined $parser->{'parameters'}->{'metapos'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
3103 {
3104 return [];
3105 }
3106 if ((defined $parser->{'parameters'}->{'metavalue'}) && ($attrHash->{'_content'} eq $parser->{'parameters'}->{'metavalue'}))
3107 {
3108 return [];
3109 }
[24943]3110 }
3111
3112 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
3113 return [$tagname => $attrHash];
3114}
3115
[27157]3116sub _remove_live_metadata
[19499]3117{
3118 my $self = shift @_;
3119
3120 my $collect = $self->{'collect'};
3121 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27157]3122# my $gsdlhome = $self->{'gsdlhome'};
[23400]3123 my $infodbtype = $self->{'infodbtype'};
[19499]3124
[21715]3125 # Obtain the collect dir
[23766]3126 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[27157]3127 my $site = $self->{'site'};
3128 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
[19499]3129
[27157]3130
[19499]3131 # look up additional args
3132 my $docid = $self->{'d'};
[21715]3133 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3134 $gsdl_cgi->generate_error("No docid (d=...) specified.");
3135 }
3136
3137 # Generate the dbkey
[19499]3138 my $metaname = $self->{'metaname'};
3139 my $dbkey = "$docid.$metaname";
3140
[21715]3141 # To people who know $collect_tail please add some comments
3142 # Obtain the live gdbm_db path
[19499]3143 my $collect_tail = $collect;
[27336]3144 $collect_tail =~ s/^.*[\/|\\]//;
[21564]3145 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]3146 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[19499]3147
[21715]3148 # Remove the key
[21569]3149 my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
[19499]3150 my $status = system($cmd);
3151 if ($status != 0) {
[21715]3152 # Catch error if gdbmdel failed
[19499]3153 my $mess = "Failed to set metadata key: $dbkey\n";
3154
3155 $mess .= "PATH: $ENV{'PATH'}\n";
3156 $mess .= "cmd = $cmd\n";
3157 $mess .= "Exit status: $status\n";
3158 $mess .= "System Error Message: $!\n";
3159
3160 $gsdl_cgi->generate_error($mess);
3161 }
3162 else {
3163 $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
3164 }
3165
3166}
3167
[27157]3168sub remove_live_metadata
[19499]3169{
3170 my $self = shift @_;
3171
3172 my $username = $self->{'username'};
3173 my $collect = $self->{'collect'};
3174 my $gsdl_cgi = $self->{'gsdl_cgi'};
3175 my $gsdlhome = $self->{'gsdlhome'};
[23400]3176
[19499]3177 if ($baseaction::authentication_enabled) {
[27295]3178 # Ensure the user is allowed to edit this collection
3179 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[19499]3180 }
3181
[27157]3182 # Make sure the collection isn't locked by someone else
3183 $self->lock_collection($username, $collect);
[19499]3184
[27157]3185 $self->_remove_live_metadata(@_);
3186
3187 $self->unlock_collection($username, $collect);
3188}
3189
3190sub remove_metadata
3191{
3192 my $self = shift @_;
3193
3194 my $where = $self->{'where'};
3195 if(!$where) {
3196 $self->remove_index_metadata(@_); # call the full version of set_index_meta for the default behaviour
3197 return;
3198 }
3199
3200 my $username = $self->{'username'};
3201 my $collect = $self->{'collect'};
3202 my $gsdl_cgi = $self->{'gsdl_cgi'};
3203
3204 if ($baseaction::authentication_enabled) {
[27295]3205 # Ensure the user is allowed to edit this collection
3206 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]3207 }
3208
[19499]3209 # Make sure the collection isn't locked by someone else
3210 $self->lock_collection($username, $collect);
3211
[27157]3212 # check which directories need to be processed, specified in $where as
3213 # any combination of import|archives|index|live
3214 if($where =~ m/import/) {
3215 $self->_remove_import_metadata(@_);
3216 }
3217 if($where =~ m/archives/) {
3218 $self->_remove_archives_metadata(@_);
3219 }
3220 if($where =~ m/index/) {
3221 $self->_remove_index_metadata(@_);
3222 }
3223
3224 # Release the lock once it is done
3225 $self->unlock_collection($username, $collect);
3226}
3227
3228# the internal version, without authentication
3229sub _remove_index_metadata
3230{
3231 my $self = shift @_;
3232
3233 my $collect = $self->{'collect'};
3234 my $gsdl_cgi = $self->{'gsdl_cgi'};
3235# my $gsdlhome = $self->{'gsdlhome'};
3236 my $infodbtype = $self->{'infodbtype'};
3237
3238 # Obtain the collect dir
3239 my $site = $self->{'site'};
3240 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3241 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3242
3243 # look up additional args
[19499]3244 my $docid = $self->{'d'};
[21715]3245 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
3246 $gsdl_cgi->generate_error("No docid (d=...) specified.");
3247 }
[19499]3248 my $metaname = $self->{'metaname'};
3249 my $metapos = $self->{'metapos'};
[27162]3250 my $metavalue = $self->{'metavalue'} || undef; # necessary to force fallback to undef here
[19499]3251
[21715]3252 # To people who know $collect_tail please add some comments
3253 # Obtain the path to the database
[19499]3254 my $collect_tail = $collect;
[27336]3255 $collect_tail =~ s/^.*[\/|\\]//;
[21564]3256 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]3257 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]3258
3259 # Read the docid entry
[23400]3260 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
[21715]3261
3262 # Check to make sure the key does exist
3263 if (!defined ($doc_rec->{$metaname})) {
3264 $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
3265 }
3266
3267 # Obtain the specified metadata pos
[27176]3268 # if no metavalue or metapos to delete, default to deleting the 1st value for the metaname
3269 if(!defined $metapos && !defined $metavalue) {
[27162]3270 $metapos = 0;
3271 }
[27168]3272
[19499]3273
3274 # consider check key is defined before deleting?
[21715]3275 # Loop through the metadata array and ignore the specified position
[19499]3276 my $filtered_metadata = [];
[21715]3277 my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
[19499]3278 for (my $i=0; $i<$num_metadata_vals; $i++) {
[24949]3279 my $metaval = shift(@{$doc_rec->{$metaname}});
[19499]3280
[24949]3281 if (!defined $metavalue && $i != $metapos) {
[27162]3282 push(@$filtered_metadata,$metaval);
[19499]3283 }
[24949]3284
3285 if(defined $metavalue && !($metavalue eq $metaval))
3286 {
[27162]3287 push(@$filtered_metadata,$metaval);
[24949]3288 }
[19499]3289 }
3290 $doc_rec->{$metaname} = $filtered_metadata;
3291
[27180]3292 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3293 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3294
[19499]3295 if ($status != 0) {
3296 my $mess = "Failed to set metadata key: $docid\n";
3297
3298 $mess .= "PATH: $ENV{'PATH'}\n";
3299 $mess .= "Exit status: $status\n";
3300 $mess .= "System Error Message: $!\n";
3301
3302 $gsdl_cgi->generate_error($mess);
3303 }
3304 else {
3305 my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
3306 $mess .= " $metaname";
3307 $mess .= "->[$metapos]" if (defined $metapos);
[29098]3308 $mess .= " ($metavalue)" if (defined $metavalue);
[19499]3309
3310 $gsdl_cgi->generate_ok_message($mess);
3311 }
[27155]3312
[27157]3313 #return $status; # in case calling functions have a use for this
3314}
3315
3316sub remove_index_metadata
3317{
3318 my $self = shift @_;
3319
3320 my $username = $self->{'username'};
3321 my $collect = $self->{'collect'};
3322 my $gsdl_cgi = $self->{'gsdl_cgi'};
3323# my $gsdlhome = $self->{'gsdlhome'};
3324
3325 if ($baseaction::authentication_enabled) {
[27295]3326 # Ensure the user is allowed to edit this collection
3327 $self->authenticate_user($username, $collect); #&authenticate_user($gsdl_cgi, $username, $collect);
[27157]3328 }
3329
3330 # Obtain the collect dir
3331 my $site = $self->{'site'};
3332 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3333 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
3334
3335 # Make sure the collection isn't locked by someone else
3336 $self->lock_collection($username, $collect);
3337
3338 $self->_remove_index_metadata(@_);
3339
[27155]3340 # Release the lock once it is done
3341 $self->unlock_collection($username, $collect);
[19499]3342}
3343
3344
[23761]3345# Was trying to reused the codes, but the functions need to be broken
3346# down more before they can be reused, otherwise there will be too
3347# much overhead and duplicate process...
[21716]3348sub insert_metadata
3349{
3350 my $self = shift @_;
3351
3352 my $username = $self->{'username'};
3353 my $collect = $self->{'collect'};
3354 my $gsdl_cgi = $self->{'gsdl_cgi'};
3355 my $gsdlhome = $self->{'gsdlhome'};
[23400]3356 my $infodbtype = $self->{'infodbtype'};
3357
[23761]3358 # If the import metadata and gdbm database have been updated, we
3359 # need to insert some notification to warn user that the the text
3360 # they see at the moment is not indexed and require a rebuild.
[21716]3361 my $rebuild_pending_macro = "_rebuildpendingmessage_";
3362
3363 if ($baseaction::authentication_enabled) {
3364 # Ensure the user is allowed to edit this collection
3365 $self->authenticate_user($username, $collect);
3366 }
3367
[23766]3368 # Obtain the collect and archive dir
3369 my $site = $self->{'site'};
3370 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
3371 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[21716]3372 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
3373
3374 # Make sure the collection isn't locked by someone else
3375 $self->lock_collection($username, $collect);
3376
3377 # Check additional args
3378 my $docid = $self->{'d'};
3379 if (!defined($docid)) {
3380 $gsdl_cgi->generate_error("No document id is specified: d=...");
3381 }
3382 my $metaname = $self->{'metaname'};
3383 if (!defined($metaname)) {
3384 $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
3385 }
3386 my $metavalue = $self->{'metavalue'};
3387 if (!defined($metavalue) || $metavalue eq "") {
3388 $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
3389 }
3390 # make "accumulate" the default (less destructive, as won't actually
3391 # delete any existing values)
3392 my $metamode = "accumulate";
3393
[27176]3394 # metapos/prevmetavalue were never before used in this subroutine, so set them to undefined
3395 my $metapos = undef;
3396 my $prevmetavalue = undef;
3397
[21716]3398 #=======================================================================#
3399 # set_import_metadata [START]
3400 #=======================================================================#
3401 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
3402 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
3403 my $metadata_xml_file;
[23400]3404 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
3405 my $archive_doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
[21716]3406
3407 # This now stores the full pathname
3408 my $import_filename = $archive_doc_rec->{'src-file'}->[0];
[28211]3409 $import_filename = &util::placeholders_to_abspath($import_filename);
[21716]3410
3411 # figure out correct metadata.xml file [?]
3412 # Assuming the metadata.xml file is next to the source file
3413 # Note: This will not work if it is using the inherited metadata from the parent folder
3414 my ($import_tailname, $import_dirname)
3415 = File::Basename::fileparse($import_filename);
3416 my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
3417
3418 # Shane's escape characters
3419 $metavalue = pack "U0C*", unpack "C*", $metavalue;
3420 $metavalue =~ s/\,/&#44;/g;
3421 $metavalue =~ s/\:/&#58;/g;
3422 $metavalue =~ s/\|/&#124;/g;
3423 $metavalue =~ s/\(/&#40;/g;
3424 $metavalue =~ s/\)/&#41;/g;
3425 $metavalue =~ s/\[/&#91;/g;
3426 $metavalue =~ s/\\/&#92;/g;
3427 $metavalue =~ s/\]/&#93;/g;
3428 $metavalue =~ s/\{/&#123;/g;
3429 $metavalue =~ s/\}/&#125;/g;
3430 $metavalue =~ s/\"/&#34;/g;
3431 $metavalue =~ s/\`/&#96;/g;
3432 $metavalue =~ s/\n/_newline_/g;
3433
3434 # Edit the metadata.xml
3435 # Modified by Jeffrey from DL Consulting
3436 # Handle the case where there is one metadata.xml file for multiple FileSets
3437 # The XML filter needs to know whether it is in the right FileSet
3438 # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
3439 # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
[27176]3440 $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname,
3441 $metapos, $metavalue, $metamode, $import_tailname, $prevmetavalue);
[21716]3442 #=======================================================================#
3443 # set_import_metadata [END]
3444 #=======================================================================#
3445
3446
3447 #=======================================================================#
3448 # set_metadata (accumulate version) [START]
3449 #=======================================================================#
3450 # To people who know $collect_tail please add some comments
3451 # Obtain path to the database
3452 my $collect_tail = $collect;
[27336]3453 $collect_tail =~ s/^.*[\/|\\]//;
[21716]3454 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]3455 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21716]3456
3457 # Read the docid entry
[23400]3458 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
[21716]3459
3460 # Protect the quotes
3461 $metavalue =~ s/\"/\\\"/g;
3462
3463 # Adds the pending macro
3464 my $macro_metavalue = $rebuild_pending_macro . $metavalue;
3465
3466 # If the metadata doesn't exist, create a new one
3467 if (!defined($doc_rec->{$metaname})){
3468 $doc_rec->{$metaname} = [ $macro_metavalue ];
3469 }
3470 # Else, let's acculumate the values
3471 else {
3472 push(@{$doc_rec->{$metaname}},$macro_metavalue);
3473 }
3474
[27180]3475 ## Use the dbutil set_entry method instead of assuming the database is gdbm
3476 my $status = &dbutil::set_infodb_entry($infodbtype, $infodb_file_path, $docid, $doc_rec);
3477
[21716]3478 if ($status != 0) {
3479 # Catch error if gdbmget failed
3480 my $mess = "Failed to set metadata key: $docid\n";
3481
3482 $mess .= "PATH: $ENV{'PATH'}\n";
3483 $mess .= "Exit status: $status\n";
3484 $mess .= "System Error Message: $!\n";
3485
3486 $gsdl_cgi->generate_error($mess);
3487 }
3488 else {
3489 my $mess = "insert-metadata successful: Key[$docid]\n";
3490 $mess .= " [In metadata.xml] $metaname";
3491 $mess .= " = $metavalue\n";
3492 $mess .= " [In database] $metaname";
3493 $mess .= " = $macro_metavalue\n";
3494 $mess .= " The new text has not been indexed, rebuilding collection is required\n";
3495 $gsdl_cgi->generate_ok_message($mess);
3496 }
3497 #=======================================================================#
3498 # set_metadata (accumulate version) [END]
3499 #=======================================================================#
3500
3501 # Release the lock once it is done
3502 $self->unlock_collection($username, $collect);
3503}
3504
[19293]35051;
Note: See TracBrowser for help on using the repository browser.