source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 31602

Last change on this file since 31602 was 31602, checked in by ak19, 4 years ago

Follows Dr Bainbridge's suggestion to prevent URL based calls to set-metadata and remove-meta metadataserver.pl operations. Split metadataaction.pm into modmetadataaction.pm and metadataaction.pm, shifting the methods that modify metadata (set and remove subroutines) into the first. Now GS3 sets an env var that will control whether the meta-modifying subroutines will be available when called. If the env var is set, then metadataaction.pm will include the modmetadataaction.pm file in the begin block. For GS2, it works as before, always including the meta modifying subroutines. Tested on Linux with the GS3 web doc editor vs calling metadataserver.pl to set metadata directly from a URL.

File size: 30.4 KB
Line 
1##########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37# http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl
38# http://www.perlmonks.org/?node_id=881761 - splitting module into multiple files
39# http://www.perlmonks.org/?node_id=524456 - merging hashes
40
41our $modmeta_action_table; # don't init to empty hash here, else it will overwrite whatever BEGIN sets this to
42 # see http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl
43
44BEGIN {
45# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
46 require XML::Rules;
47
48 # if we're GS3, then GS3_AUTHENTICATED must be defined and set to true
49 # in order to have access to subroutines that modify metadata (the set-
50 # and remove- metadata subroutines).
51 # TODO: if we're GS2, then we continue to behave as before?
52
53 if(!defined $ENV{'GSDL3HOME'} || (defined $ENV{'GS3_AUTHENTICATED'} && $ENV{'GS3_AUTHENTICATED'} eq "true")) {
54 print STDERR "\nIncluding modmeta...\n";
55 require modmetadataaction;
56 }
57 else {
58 $modmeta_action_table = {};
59 }
60 print STDERR "End of metadata::BEGIN\n\n";
61}
62
63@metadataaction::ISA = ('baseaction');
64
65
66my $getmeta_action_table =
67{
68 #GET METHODS
69 "get-import-metadata" => {
70 'compulsory-args' => [ "d", "metaname" ],
71 'optional-args' => [ "metapos" ] },
72
73 "get-archives-metadata" => {
74 'compulsory-args' => [ "d", "metaname" ],
75 'optional-args' => [ "metapos" ] },
76
77 "get-index-metadata" => {
78 'compulsory-args' => [ "d", "metaname" ],
79 'optional-args' => [ "metapos" ] },
80
81 "get-metadata" => { # alias for get-index-metadata
82 'compulsory-args' => [ "d", "metaname" ],
83 'optional-args' => [ "metapos" ] },
84
85 "get-live-metadata" => {
86 'compulsory-args' => [ "d", "metaname" ],
87 'optional-args' => [ ] },
88
89 "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
90 'compulsory-args' => [ "json" ],
91 'optional-args' => [ "where" ],
92 'help-string' => [
93 'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
94 ]}
95};
96
97print STDERR "\nMODMETA_TABLE 2: " .join(",", keys $metadataaction::modmeta_action_table) ."\n";
98
99
100# merge the hashes - http://www.perlmonks.org/?node_id=524456
101my $action_table = { %$getmeta_action_table, %$modmeta_action_table };
102
103
104sub new
105{
106 my $class = shift (@_);
107 my ($gsdl_cgi,$iis6_mode) = @_;
108
109 # Treat metavalue specially. To transmit this through a GET request
110 # the Javascript side has url-encoded it, so here we need to decode
111 # it before proceeding
112
113 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
114 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
115 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
116
117 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
118 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
119
120 # need to do the same with prevmetavalue
121 my $url_encoded_prevmetavalue = $gsdl_cgi->param("prevmetavalue");
122 my $url_decoded_prevmetavalue = &unicode::url_decode($url_encoded_prevmetavalue,1);
123 my $prevunicode_array = &unicode::utf82unicode($url_decoded_prevmetavalue);
124
125 $url_decoded_prevmetavalue = join("",map(chr($_),@$prevunicode_array));
126 $gsdl_cgi->param("prevmetavalue",$url_decoded_prevmetavalue);
127
128 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
129
130 return bless $self, $class;
131}
132
133
134sub get_live_metadata
135{
136 my $self = shift @_;
137
138 my $username = $self->{'username'};
139 my $collect = $self->{'collect'};
140 my $gsdl_cgi = $self->{'gsdl_cgi'};
141 my $gsdlhome = $self->{'gsdlhome'};
142 my $infodbtype = $self->{'infodbtype'};
143
144 # live metadata gets/saves value scoped (prefixed) by the current usename
145 # so (for now) let's not bother to enforce authentication
146
147 # Obtain the collect dir
148 my $site = $self->{'site'};
149 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
150 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
151
152 # No locking collection when getting metadata, only when setting it
153# $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
154
155 # look up additional args
156 my $docid = $self->{'d'};
157 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
158 $gsdl_cgi->generate_error("No docid (d=...) specified.");
159 }
160
161 # Generate the dbkey
162 my $metaname = $self->{'metaname'};
163 my $dbkey = "$docid.$metaname";
164
165 # To people who know $collect_tail please add some comments
166 # Obtain path to the database
167 my $collect_tail = $collect;
168 $collect_tail =~ s/^.*[\/|\\]//;
169 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
170 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
171
172 # Obtain the content of the key
173 my $cmd = "gdbmget $infodb_file_path $dbkey";
174 if (open(GIN,"$cmd |") == 0) {
175 # Catch error if gdbmget failed
176 my $mess = "Failed to get metadata key: $metaname\n";
177 $mess .= "$!\n";
178
179 $gsdl_cgi->generate_error($mess);
180 }
181 else {
182 binmode(GIN,":utf8");
183 # Read everything in and concatenate them into $metavalue
184 my $metavalue = "";
185 my $line;
186 while (defined ($line=<GIN>)) {
187 $metavalue .= $line;
188 }
189 close(GIN);
190 chomp($metavalue); # Get rid off the tailing newlines
191 $gsdl_cgi->generate_ok_message("$metavalue");
192 }
193
194 # Release the lock once it is done
195# $self->unlock_collection($username, $collect);
196}
197
198# just calls the index version
199sub get_metadata
200{
201 my $self = shift @_;
202 $self->get_index_metadata(@_);
203}
204
205# JSON version that will get the requested metadata values
206# from the requested source (index, import, archives or live)
207# One of the params is a JSON string and the return value is JSON too
208# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
209sub get_metadata_array
210{
211 my $self = shift @_;
212
213 my $where = $self->{'where'};
214 if (!$where || ($where =~ m/^\s*$/)) { # 0, "0", "" and undef are all false. All else is true.
215 # What is truth in perl: http://www.berkeleyinternet.com/perl/node11.html
216 # and http://www.perlmonks.org/?node_id=33638
217
218 $where = "index"; # default behaviour is to get the values from index
219 }
220
221 # Only when setting metadata do we perform authentication and do we lock the collection,
222 # not when getting metadata
223
224 # for get_meta_array, the where param can only be ONE of import, archives, index, live
225 if($where =~ m/index/) {
226 $self->_get_index_metadata_array(@_);
227 }
228 elsif($where =~ m/archives/) {
229 $self->_get_archives_metadata_array(@_);
230 }
231 elsif($where =~ m/import/) {
232 $self->_get_import_metadata_array(@_);
233 }
234 elsif($where =~ m/live/) {
235 $self->_get_live_metadata_array(@_);
236 }
237}
238
239# Unused at present. Added for completion. Tested.
240sub _get_import_metadata_array {
241
242 my $self = shift @_;
243
244 my $collect = $self->{'collect'};
245 my $gsdl_cgi = $self->{'gsdl_cgi'};
246 my $site = $self->{'site'};
247 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
248
249 # look up additional args
250 my $infodbtype = $self->{'infodbtype'};
251
252 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
253 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
254 my $json_str = $self->{'json'};
255 my $doc_array = decode_json $json_str;
256
257 my $json_result_str = "[";
258 my $first_doc_rec = 1;
259 foreach my $doc_array_rec ( @$doc_array ) {
260
261 my $docid = $doc_array_rec->{'docid'}; # no subsection metadata support in metadata.xml, only toplevel meta
262
263 if($first_doc_rec) {
264 $first_doc_rec = 0;
265 } else {
266 $json_result_str .= ",";
267 }
268 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
269
270 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
271 $json_result_str = $json_result_str . ",\"metatable\":[";
272
273 my $first_rec = 1;
274 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
275
276 # Read the docid entry
277 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
278 # This now stores the full pathname
279 my $import_filename = $doc_rec->{'src-file'}->[0];
280 $import_filename = &util::placeholders_to_abspath($import_filename);
281
282 # figure out correct metadata.xml file [?]
283 # Assuming the metadata.xml file is next to the source file
284 # Note: This will not work if it is using the inherited metadata from the parent folder
285 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
286 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
287
288
289 if($first_rec) {
290 $first_rec = 0;
291 } else {
292 $json_result_str .= ",";
293 }
294
295 my $metaname = $metatable_rec->{'metaname'};
296 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
297
298 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
299 if(!defined $metapos) {
300 $metapos = 0;
301 }
302
303 # Obtain the specified metadata value(s)
304 my $metavalue;
305
306 if($metapos ne "all") { # get the value at a single metapos
307 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
308
309 #print STDERR "**** Metafilename, metaname, metapos, sec_num: $metadata_xml_filename, $metaname, $metapos, $import_tailname\n";
310
311 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
312
313 } else {
314 my $first_metaval = 1;
315 $metapos = 0;
316 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
317
318 while (defined $metavalue && $metavalue ne "") {
319 if($first_metaval) {
320 $first_metaval = 0;
321 } else {
322 $json_result_str .= ",";
323 }
324
325 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
326
327 $metapos++;
328 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
329 }
330 }
331
332 $json_result_str .= "]}"; # close metavals array and metatable record
333 }
334
335 $json_result_str .= "]}"; # close metatable array and docid record
336 }
337
338 $json_result_str .= "]"; # close array of docids
339 $gsdl_cgi->generate_ok_message($json_result_str."\n");
340}
341
342# Unused method, but included for completion. Tested, works. Takes a JSON string and returns a JSON string.
343# For more information on the format of the output, see _get_index_metadata_array, which is in use.
344sub _get_archives_metadata_array {
345
346 my $self = shift @_;
347
348 my $collect = $self->{'collect'};
349 my $gsdl_cgi = $self->{'gsdl_cgi'};
350 my $site = $self->{'site'};
351 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
352
353 # look up additional args
354 my $infodbtype = $self->{'infodbtype'};
355
356 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
357 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
358
359 my $json_str = $self->{'json'};
360 my $doc_array = decode_json $json_str;
361
362 my $json_result_str = "[";
363 my $first_doc_rec = 1;
364 foreach my $doc_array_rec ( @$doc_array ) {
365
366 my $docid = $doc_array_rec->{'docid'};
367
368 if($first_doc_rec) {
369 $first_doc_rec = 0;
370 } else {
371 $json_result_str .= ",";
372 }
373 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
374
375 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
376 $json_result_str = $json_result_str . ",\"metatable\":[";
377
378 my $first_rec = 1;
379 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
380
381 my ($docid, $docid_secnum) = ($doc_array_rec->{'docid'} =~ m/^(.*?)(\..*)?$/);
382 $docid_secnum = "" if (!defined $docid_secnum);
383
384 # Read the docid entry
385 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
386 # This now stores the full pathname
387 my $doc_filename = $doc_rec->{'doc-file'}->[0];
388 $doc_filename = &util::filename_cat($archive_dir, $doc_filename);
389
390 if($first_rec) {
391 $first_rec = 0;
392 } else {
393 $json_result_str .= ",";
394 }
395
396 my $metaname = $metatable_rec->{'metaname'};
397 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
398
399 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
400 if(!defined $metapos) {
401 $metapos = 0;
402 }
403
404
405 # Obtain the specified metadata value(s)
406 my $metavalue;
407
408 if($metapos ne "all") { # get the value at a single metapos
409
410 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
411 #print STDERR "**** Docname, metaname, metapos, sec_num: $doc_filename, $metaname, $metapos, $docid_secnum\n";
412
413 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
414
415 } else {
416 my $first_metaval = 1;
417 $metapos = 0;
418 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
419
420 while (defined $metavalue && $metavalue ne "") {
421 if($first_metaval) {
422 $first_metaval = 0;
423 } else {
424 $json_result_str .= ",";
425 }
426
427 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
428
429 $metapos++;
430 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
431 }
432 }
433
434 $json_result_str .= "]}"; # close metavals array and metatable record
435 }
436
437 $json_result_str .= "]}"; # close metatable array and docid record
438 }
439
440 $json_result_str .= "]"; # close array of docids
441 $gsdl_cgi->generate_ok_message($json_result_str."\n");
442}
443
444
445# Unused at present. Added for completion. Tested, but not sure if it retrieves metadata in the manner it's expected to.
446sub _get_live_metadata_array
447{
448 my $self = shift @_;
449
450 my $collect = $self->{'collect'};
451 my $gsdl_cgi = $self->{'gsdl_cgi'};
452 my $site = $self->{'site'};
453 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
454
455 # look up additional args
456 my $infodbtype = $self->{'infodbtype'};
457
458 # To people who know $collect_tail please add some comments
459 # Obtain the path to the database
460 my $collect_tail = $collect;
461 $collect_tail =~ s/^.*[\/|\\]//;
462 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
463 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
464
465 my $json_str = $self->{'json'};
466 my $doc_array = decode_json $json_str;
467
468 my $json_result_str = "[";
469 my $first_doc_rec = 1;
470
471 foreach my $doc_array_rec ( @$doc_array ) {
472
473 my $docid = $doc_array_rec->{'docid'};
474
475 if($first_doc_rec) {
476 $first_doc_rec = 0;
477 } else {
478 $json_result_str .= ",";
479 }
480 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
481
482 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
483 $json_result_str = $json_result_str . ",\"metatable\":[";
484
485 my $first_rec = 1;
486 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
487 if($first_rec) {
488 $first_rec = 0;
489 } else {
490 $json_result_str .= ",";
491 }
492
493 my $metaname = $metatable_rec->{'metaname'};
494 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
495
496 # Generate the dbkey
497 my $dbkey = "$docid.$metaname";
498
499 # metapos for get_live_metadata is always assumed to be "all".
500 # It's always going to get all the lines of metavalues associated with a metaname
501 # (It's the metaname itself that should contain an increment number, if there are to be multiple values).
502 #my $metapos = "all";
503 my $metapos = $metatable_rec->{'metapos'} || 0; # Can be 0... 1|all|undefined. Defaults to 0 if undefined/false
504 my $metavalue = "";
505
506 # Obtain the content of the key
507 my $cmd = "gdbmget $infodb_file_path $dbkey";
508 if (open(GIN,"$cmd |") != 0) { # Success.
509
510 binmode(GIN,":utf8");
511 # Read everything in and concatenate them into $metavalue
512 my $line;
513 my $first_metaval = 1;
514 my $pos = 0;
515 while (defined ($line=<GIN>)) {
516 chomp($line); # Get rid off the tailing newlines
517
518 if($metapos eq "all") {
519 if($first_metaval) {
520 $first_metaval = 0;
521 } else {
522 $json_result_str .= ",";
523 }
524 $metavalue = $line;
525 $json_result_str .= "{\"metapos\":\"$pos\",\"metavalue\":\"$metavalue\"}";
526 } elsif($metapos == $pos) {
527 $metavalue = $line;
528 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
529 last;
530 } # else, the current $pos is not the required $metapos
531 $pos += 1;
532 }
533 close(GIN);
534 } # else open cmd == 0 (failed) and metavals array will be empty [] for this metaname
535
536 $json_result_str .= "]}"; # close metavals array and metatable record
537 }
538
539 $json_result_str .= "]}"; # close metatable array and docid record
540 }
541
542 $json_result_str .= "]"; # close array of docids
543
544 $gsdl_cgi->generate_ok_message($json_result_str."\n");
545}
546
547
548# Takes a JSON string and returns a JSON string
549# Request string is of the form:
550# http://localhost:8283/greenstone/cgi-bin/metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]
551# Resulting string is of the form:
552# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
553sub _get_index_metadata_array
554{
555 my $self = shift @_;
556
557 my $collect = $self->{'collect'};
558 my $gsdl_cgi = $self->{'gsdl_cgi'};
559 my $site = $self->{'site'};
560 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
561
562 # look up additional args
563 my $infodbtype = $self->{'infodbtype'};
564
565 # To people who know $collect_tail please add some comments
566 # Obtain the path to the database
567 my $collect_tail = $collect;
568 $collect_tail =~ s/^.*[\/|\\]//;
569 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
570 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
571
572 my $json_str = $self->{'json'};
573 my $doc_array = decode_json $json_str;
574
575 my $json_result_str = "[";
576 my $first_doc_rec = 1;
577
578 foreach my $doc_array_rec ( @$doc_array ) {
579
580 my $docid = $doc_array_rec->{'docid'};
581
582 if($first_doc_rec) {
583 $first_doc_rec = 0;
584 } else {
585 $json_result_str .= ",";
586 }
587 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
588
589 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
590 $json_result_str = $json_result_str . ",\"metatable\":[";
591
592 my $first_rec = 1;
593 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
594 if($first_rec) {
595 $first_rec = 0;
596 } else {
597 $json_result_str .= ",";
598 }
599
600 my $metaname = $metatable_rec->{'metaname'};
601 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
602
603 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
604 if(!defined $metapos) {
605 $metapos = 0;
606 }
607
608 # Read the docid entry
609 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
610
611 # Basically loop through and unescape_html the values
612 foreach my $k (keys %$doc_rec) {
613 my @escaped_v = ();
614 foreach my $v (@{$doc_rec->{$k}}) {
615 my $ev = &ghtml::unescape_html($v);
616 push(@escaped_v, $ev);
617 }
618 $doc_rec->{$k} = \@escaped_v;
619 }
620
621 # Obtain the specified metadata value(s)
622 my $metavalue;
623
624 if($metapos ne "all") { # get the value at a single metapos
625
626 $metavalue = $doc_rec->{$metaname}->[$metapos];
627
628 # protect any double quotes and colons in the metavalue before putting it into JSON
629 $metavalue =~ s/\"/&quot;/g if defined $metavalue;
630 $metavalue =~ s/\:/&58;/g if defined $metavalue;
631
632 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
633
634 } else {
635 my $first_metaval = 1;
636 $metapos = 0;
637 $metavalue = $doc_rec->{$metaname}->[$metapos];
638
639 while (defined $metavalue) {
640
641 # protect any double quotes and colons in the metavalue before putting it into JSON
642 $metavalue =~ s/\"/&quot;/g;
643 $metavalue =~ s/\:/&58;/g;
644
645 if($first_metaval) {
646 $first_metaval = 0;
647 } else {
648 $json_result_str .= ",";
649 }
650
651 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
652
653 $metapos++;
654 $metavalue = $doc_rec->{$metaname}->[$metapos];
655 }
656 }
657
658 $json_result_str .= "]}"; # close metavals array and metatable record
659 }
660
661 $json_result_str .= "]}"; # close metatable array and docid record
662 }
663
664 $json_result_str .= "]"; # close array of docids
665
666 $gsdl_cgi->generate_ok_message($json_result_str."\n");
667}
668
669
670sub get_index_metadata
671{
672 my $self = shift @_;
673
674 my $username = $self->{'username'};
675 my $collect = $self->{'collect'};
676 my $gsdl_cgi = $self->{'gsdl_cgi'};
677 my $gsdlhome = $self->{'gsdlhome'};
678
679 # Obtain the collect dir
680 my $site = $self->{'site'};
681 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
682 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
683
684 # look up additional args
685 my $docid = $self->{'d'};
686 my $metaname = $self->{'metaname'};
687 my $metapos = $self->{'metapos'};
688 my $infodbtype = $self->{'infodbtype'};
689
690 # To people who know $collect_tail please add some comments
691 # Obtain path to the database
692 my $collect_tail = $collect;
693 $collect_tail =~ s/^.*[\/|\\]//;
694 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
695 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
696
697 # Read the docid entry
698 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
699
700 # Basically loop through and unescape_html the values
701 foreach my $k (keys %$doc_rec) {
702 my @escaped_v = ();
703 foreach my $v (@{$doc_rec->{$k}}) {
704 my $ev = &ghtml::unescape_html($v);
705 push(@escaped_v, $ev);
706 }
707 $doc_rec->{$k} = \@escaped_v;
708 }
709
710 # Obtain the specified metadata value
711 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
712 my $metavalue = $doc_rec->{$metaname}->[$metapos];
713 $gsdl_cgi->generate_ok_message("$metavalue");
714
715}
716
717
718sub get_import_metadata
719{
720 my $self = shift @_;
721
722 my $username = $self->{'username'};
723 my $collect = $self->{'collect'};
724 my $gsdl_cgi = $self->{'gsdl_cgi'};
725 my $gsdlhome = $self->{'gsdlhome'};
726
727 # Obtain the collect dir
728 my $site = $self->{'site'};
729 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
730 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
731
732 # look up additional args
733 my $docid = $self->{'d'};
734 my $metaname = $self->{'metaname'};
735 my $metapos = $self->{'metapos'};
736 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/)); # gets the first value by default since metapos defaults to 0
737
738 my $infodbtype = $self->{'infodbtype'};
739 if (!defined $docid)
740 {
741 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
742 }
743
744 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
745 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
746 my $metadata_xml_file;
747 my $import_filename = undef;
748
749
750 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
751 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
752 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
753
754 # This now stores the full pathname
755 $import_filename = $doc_rec->{'src-file'}->[0];
756 $import_filename = &util::placeholders_to_abspath($import_filename);
757
758 # figure out correct metadata.xml file [?]
759 # Assuming the metadata.xml file is next to the source file
760 # Note: This will not work if it is using the inherited metadata from the parent folder
761 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
762 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
763
764 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
765
766}
767
768sub get_metadata_from_metadata_xml
769{
770 my $self = shift @_;
771 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
772
773 my @rules =
774 (
775 _default => 'raw',
776 'Metadata' => \&gfmxml_metadata,
777 'FileName' => \&mxml_filename
778 );
779
780 my $parser = XML::Rules->new
781 (
782 rules => \@rules,
783 output_encoding => 'utf8'
784 );
785
786 my $xml_in = "";
787 if (!open(MIN,"<$metadata_xml_filename"))
788 {
789 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
790 }
791 else
792 {
793 # Read them in
794 my $line;
795 while (defined ($line=<MIN>)) {
796 $xml_in .= $line;
797 }
798 close(MIN);
799
800 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
801
802 if(defined $parser->{'pad'}->{'metavalue'})
803 {
804 return $parser->{'pad'}->{'metavalue'};
805 }
806 else
807 {
808 return "";
809 }
810 }
811}
812
813sub gfmxml_metadata
814{
815 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
816
817 # no subsection support yet in metadata.xml
818
819 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
820 {
821 if (!defined $parser->{'parameters'}->{'poscount'})
822 {
823 $parser->{'parameters'}->{'poscount'} = 0;
824 }
825 else
826 {
827 $parser->{'parameters'}->{'poscount'}++;
828 }
829
830 # gets the first value by default, since metapos defaults to 0
831 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
832 {
833 if($parser->{'parameters'}->{'metapos'} > 0) {
834 print STDERR "@@@@ WARNING: non-zero metapos.\n";
835 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at position: ".$parser->{'parameters'}->{'metapos'}.".\n";
836 }
837 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
838 }
839 }
840}
841
842sub get_archives_metadata
843{
844 my $self = shift @_;
845
846 my $username = $self->{'username'};
847 my $collect = $self->{'collect'};
848 my $gsdl_cgi = $self->{'gsdl_cgi'};
849# my $gsdlhome = $self->{'gsdlhome'};
850 my $infodbtype = $self->{'infodbtype'};
851
852 # Obtain the collect dir
853 my $site = $self->{'site'};
854 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
855
856 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
857
858 # look up additional args
859 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
860 $docid_secnum = "" if (!defined $docid_secnum);
861
862 my $metaname = $self->{'metaname'};
863 my $metapos = $self->{'metapos'};
864 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
865
866 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
867 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
868
869 # This now stores the full pathname
870 my $doc_filename = $doc_rec->{'doc-file'}->[0];
871
872 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
873
874}
875
876sub get_metadata_from_archive_xml
877{
878 my $self = shift @_;
879 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
880
881 my @start_rules = ('Section' => \&dxml_start_section);
882
883 my @rules =
884 (
885 _default => 'raw',
886 'Metadata' => \&gfdxml_metadata
887 );
888
889 my $parser = XML::Rules->new
890 (
891 start_rules => \@start_rules,
892 rules => \@rules,
893 output_encoding => 'utf8'
894 );
895
896 my $xml_in = "";
897 if (!open(MIN,"<$doc_xml_filename"))
898 {
899 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
900 }
901 else
902 {
903 # Read them in
904 my $line;
905 while (defined ($line=<MIN>)) {
906 $xml_in .= $line;
907 }
908 close(MIN);
909
910 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
911
912 if(defined $parser->{'pad'}->{'metavalue'})
913 {
914 return $parser->{'pad'}->{'metavalue'};
915 }
916 else
917 {
918 return "";
919 }
920 }
921}
922
923sub gfdxml_metadata
924{
925 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
926
927 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
928 {
929 return;
930 }
931
932 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
933 {
934 if (!defined $parser->{'parameters'}->{'poscount'})
935 {
936 $parser->{'parameters'}->{'poscount'} = 0;
937 }
938 else
939 {
940 $parser->{'parameters'}->{'poscount'}++;
941 }
942 }
943
944 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
945 {
946 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
947 }
948}
949
9501;
Note: See TracBrowser for help on using the repository browser.