source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 31617

Last change on this file since 31617 was 31617, checked in by ak19, 7 years ago

Removing the debugging.

File size: 30.6 KB
Line 
1##########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37# This class is conditionally expanded with set-metadata, remove-metadata and insert-metadata subroutines
38# defined in modmetadataaction.pm. The BEGIN code block determines whether the condition holds.
39# See
40# http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl
41# http://www.perlmonks.org/?node_id=881761 - splitting module into multiple files
42# http://www.perlmonks.org/?node_id=524456 - merging hashes
43
44our $modmeta_action_table; # don't init to empty hash here, else it will overwrite whatever BEGIN sets this to
45 # see http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl
46
47BEGIN {
48# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
49 require XML::Rules;
50
51 # if we're GS3, then GS3_AUTHENTICATED must be defined and set to true
52 # in order to have access to subroutines that modify metadata (the set-
53 # and remove- metadata subroutines).
54 # TODO: if we're GS2, then we continue to behave as before?
55
56 if(!defined $ENV{'GSDL3HOME'} || (defined $ENV{'GS3_AUTHENTICATED'} && $ENV{'GS3_AUTHENTICATED'} eq "true")) {
57 require modmetadataaction;
58 }
59 else {
60 $modmeta_action_table = {};
61 }
62}
63
64@metadataaction::ISA = ('baseaction');
65
66
67my $getmeta_action_table =
68{
69 #GET METHODS
70 "get-import-metadata" => {
71 'compulsory-args' => [ "d", "metaname" ],
72 'optional-args' => [ "metapos" ] },
73
74 "get-archives-metadata" => {
75 'compulsory-args' => [ "d", "metaname" ],
76 'optional-args' => [ "metapos" ] },
77
78 "get-index-metadata" => {
79 'compulsory-args' => [ "d", "metaname" ],
80 'optional-args' => [ "metapos" ] },
81
82 "get-metadata" => { # alias for get-index-metadata
83 'compulsory-args' => [ "d", "metaname" ],
84 'optional-args' => [ "metapos" ] },
85
86 "get-live-metadata" => {
87 'compulsory-args' => [ "d", "metaname" ],
88 'optional-args' => [ ] },
89
90 "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
91 'compulsory-args' => [ "json" ],
92 'optional-args' => [ "where" ],
93 'help-string' => [
94 'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
95 ]}
96};
97
98# To get the final action_table of all available subroutines in this class,
99# merge the get- and mod-metadata hashes. See http://www.perlmonks.org/?node_id=524456
100# Note that modmeta_action_table will be empty of subroutines if the user does not have permissions
101# to modify metadata.
102my $action_table = { %$getmeta_action_table, %$modmeta_action_table };
103
104
105sub new
106{
107 my $class = shift (@_);
108 my ($gsdl_cgi,$iis6_mode) = @_;
109
110 # Treat metavalue specially. To transmit this through a GET request
111 # the Javascript side has url-encoded it, so here we need to decode
112 # it before proceeding
113
114 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
115 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
116 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
117
118 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
119 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
120
121 # need to do the same with prevmetavalue
122 my $url_encoded_prevmetavalue = $gsdl_cgi->param("prevmetavalue");
123 my $url_decoded_prevmetavalue = &unicode::url_decode($url_encoded_prevmetavalue,1);
124 my $prevunicode_array = &unicode::utf82unicode($url_decoded_prevmetavalue);
125
126 $url_decoded_prevmetavalue = join("",map(chr($_),@$prevunicode_array));
127 $gsdl_cgi->param("prevmetavalue",$url_decoded_prevmetavalue);
128
129 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
130
131 return bless $self, $class;
132}
133
134
135sub get_live_metadata
136{
137 my $self = shift @_;
138
139 my $username = $self->{'username'};
140 my $collect = $self->{'collect'};
141 my $gsdl_cgi = $self->{'gsdl_cgi'};
142 my $gsdlhome = $self->{'gsdlhome'};
143 my $infodbtype = $self->{'infodbtype'};
144
145 # live metadata gets/saves value scoped (prefixed) by the current usename
146 # so (for now) let's not bother to enforce authentication
147
148 # Obtain the collect dir
149 my $site = $self->{'site'};
150 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
151 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
152
153 # No locking collection when getting metadata, only when setting it
154# $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
155
156 # look up additional args
157 my $docid = $self->{'d'};
158 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
159 $gsdl_cgi->generate_error("No docid (d=...) specified.");
160 }
161
162 # Generate the dbkey
163 my $metaname = $self->{'metaname'};
164 my $dbkey = "$docid.$metaname";
165
166 # To people who know $collect_tail please add some comments
167 # Obtain path to the database
168 my $collect_tail = $collect;
169 $collect_tail =~ s/^.*[\/|\\]//;
170 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
171 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
172
173 # Obtain the content of the key
174 my $cmd = "gdbmget $infodb_file_path $dbkey";
175 if (open(GIN,"$cmd |") == 0) {
176 # Catch error if gdbmget failed
177 my $mess = "Failed to get metadata key: $metaname\n";
178 $mess .= "$!\n";
179
180 $gsdl_cgi->generate_error($mess);
181 }
182 else {
183 binmode(GIN,":utf8");
184 # Read everything in and concatenate them into $metavalue
185 my $metavalue = "";
186 my $line;
187 while (defined ($line=<GIN>)) {
188 $metavalue .= $line;
189 }
190 close(GIN);
191 chomp($metavalue); # Get rid off the tailing newlines
192 $gsdl_cgi->generate_ok_message("$metavalue");
193 }
194
195 # Release the lock once it is done
196# $self->unlock_collection($username, $collect);
197}
198
199# just calls the index version
200sub get_metadata
201{
202 my $self = shift @_;
203 $self->get_index_metadata(@_);
204}
205
206# JSON version that will get the requested metadata values
207# from the requested source (index, import, archives or live)
208# One of the params is a JSON string and the return value is JSON too
209# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
210sub get_metadata_array
211{
212 my $self = shift @_;
213
214 my $where = $self->{'where'};
215 if (!$where || ($where =~ m/^\s*$/)) { # 0, "0", "" and undef are all false. All else is true.
216 # What is truth in perl: http://www.berkeleyinternet.com/perl/node11.html
217 # and http://www.perlmonks.org/?node_id=33638
218
219 $where = "index"; # default behaviour is to get the values from index
220 }
221
222 # Only when setting metadata do we perform authentication and do we lock the collection,
223 # not when getting metadata
224
225 # for get_meta_array, the where param can only be ONE of import, archives, index, live
226 if($where =~ m/index/) {
227 $self->_get_index_metadata_array(@_);
228 }
229 elsif($where =~ m/archives/) {
230 $self->_get_archives_metadata_array(@_);
231 }
232 elsif($where =~ m/import/) {
233 $self->_get_import_metadata_array(@_);
234 }
235 elsif($where =~ m/live/) {
236 $self->_get_live_metadata_array(@_);
237 }
238}
239
240# Unused at present. Added for completion. Tested.
241sub _get_import_metadata_array {
242
243 my $self = shift @_;
244
245 my $collect = $self->{'collect'};
246 my $gsdl_cgi = $self->{'gsdl_cgi'};
247 my $site = $self->{'site'};
248 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
249
250 # look up additional args
251 my $infodbtype = $self->{'infodbtype'};
252
253 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
254 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
255 my $json_str = $self->{'json'};
256 my $doc_array = decode_json $json_str;
257
258 my $json_result_str = "[";
259 my $first_doc_rec = 1;
260 foreach my $doc_array_rec ( @$doc_array ) {
261
262 my $docid = $doc_array_rec->{'docid'}; # no subsection metadata support in metadata.xml, only toplevel meta
263
264 if($first_doc_rec) {
265 $first_doc_rec = 0;
266 } else {
267 $json_result_str .= ",";
268 }
269 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
270
271 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
272 $json_result_str = $json_result_str . ",\"metatable\":[";
273
274 my $first_rec = 1;
275 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
276
277 # Read the docid entry
278 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
279 # This now stores the full pathname
280 my $import_filename = $doc_rec->{'src-file'}->[0];
281 $import_filename = &util::placeholders_to_abspath($import_filename);
282
283 # figure out correct metadata.xml file [?]
284 # Assuming the metadata.xml file is next to the source file
285 # Note: This will not work if it is using the inherited metadata from the parent folder
286 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
287 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
288
289
290 if($first_rec) {
291 $first_rec = 0;
292 } else {
293 $json_result_str .= ",";
294 }
295
296 my $metaname = $metatable_rec->{'metaname'};
297 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
298
299 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
300 if(!defined $metapos) {
301 $metapos = 0;
302 }
303
304 # Obtain the specified metadata value(s)
305 my $metavalue;
306
307 if($metapos ne "all") { # get the value at a single metapos
308 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
309
310 #print STDERR "**** Metafilename, metaname, metapos, sec_num: $metadata_xml_filename, $metaname, $metapos, $import_tailname\n";
311
312 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
313
314 } else {
315 my $first_metaval = 1;
316 $metapos = 0;
317 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
318
319 while (defined $metavalue && $metavalue ne "") {
320 if($first_metaval) {
321 $first_metaval = 0;
322 } else {
323 $json_result_str .= ",";
324 }
325
326 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
327
328 $metapos++;
329 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
330 }
331 }
332
333 $json_result_str .= "]}"; # close metavals array and metatable record
334 }
335
336 $json_result_str .= "]}"; # close metatable array and docid record
337 }
338
339 $json_result_str .= "]"; # close array of docids
340 $gsdl_cgi->generate_ok_message($json_result_str."\n");
341}
342
343# Unused method, but included for completion. Tested, works. Takes a JSON string and returns a JSON string.
344# For more information on the format of the output, see _get_index_metadata_array, which is in use.
345sub _get_archives_metadata_array {
346
347 my $self = shift @_;
348
349 my $collect = $self->{'collect'};
350 my $gsdl_cgi = $self->{'gsdl_cgi'};
351 my $site = $self->{'site'};
352 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
353
354 # look up additional args
355 my $infodbtype = $self->{'infodbtype'};
356
357 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
358 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
359
360 my $json_str = $self->{'json'};
361 my $doc_array = decode_json $json_str;
362
363 my $json_result_str = "[";
364 my $first_doc_rec = 1;
365 foreach my $doc_array_rec ( @$doc_array ) {
366
367 my $docid = $doc_array_rec->{'docid'};
368
369 if($first_doc_rec) {
370 $first_doc_rec = 0;
371 } else {
372 $json_result_str .= ",";
373 }
374 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
375
376 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
377 $json_result_str = $json_result_str . ",\"metatable\":[";
378
379 my $first_rec = 1;
380 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
381
382 my ($docid, $docid_secnum) = ($doc_array_rec->{'docid'} =~ m/^(.*?)(\..*)?$/);
383 $docid_secnum = "" if (!defined $docid_secnum);
384
385 # Read the docid entry
386 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
387 # This now stores the full pathname
388 my $doc_filename = $doc_rec->{'doc-file'}->[0];
389 $doc_filename = &util::filename_cat($archive_dir, $doc_filename);
390
391 if($first_rec) {
392 $first_rec = 0;
393 } else {
394 $json_result_str .= ",";
395 }
396
397 my $metaname = $metatable_rec->{'metaname'};
398 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
399
400 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
401 if(!defined $metapos) {
402 $metapos = 0;
403 }
404
405
406 # Obtain the specified metadata value(s)
407 my $metavalue;
408
409 if($metapos ne "all") { # get the value at a single metapos
410
411 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
412 #print STDERR "**** Docname, metaname, metapos, sec_num: $doc_filename, $metaname, $metapos, $docid_secnum\n";
413
414 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
415
416 } else {
417 my $first_metaval = 1;
418 $metapos = 0;
419 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
420
421 while (defined $metavalue && $metavalue ne "") {
422 if($first_metaval) {
423 $first_metaval = 0;
424 } else {
425 $json_result_str .= ",";
426 }
427
428 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
429
430 $metapos++;
431 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
432 }
433 }
434
435 $json_result_str .= "]}"; # close metavals array and metatable record
436 }
437
438 $json_result_str .= "]}"; # close metatable array and docid record
439 }
440
441 $json_result_str .= "]"; # close array of docids
442 $gsdl_cgi->generate_ok_message($json_result_str."\n");
443}
444
445
446# Unused at present. Added for completion. Tested, but not sure if it retrieves metadata in the manner it's expected to.
447sub _get_live_metadata_array
448{
449 my $self = shift @_;
450
451 my $collect = $self->{'collect'};
452 my $gsdl_cgi = $self->{'gsdl_cgi'};
453 my $site = $self->{'site'};
454 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
455
456 # look up additional args
457 my $infodbtype = $self->{'infodbtype'};
458
459 # To people who know $collect_tail please add some comments
460 # Obtain the path to the database
461 my $collect_tail = $collect;
462 $collect_tail =~ s/^.*[\/|\\]//;
463 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
464 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
465
466 my $json_str = $self->{'json'};
467 my $doc_array = decode_json $json_str;
468
469 my $json_result_str = "[";
470 my $first_doc_rec = 1;
471
472 foreach my $doc_array_rec ( @$doc_array ) {
473
474 my $docid = $doc_array_rec->{'docid'};
475
476 if($first_doc_rec) {
477 $first_doc_rec = 0;
478 } else {
479 $json_result_str .= ",";
480 }
481 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
482
483 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
484 $json_result_str = $json_result_str . ",\"metatable\":[";
485
486 my $first_rec = 1;
487 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
488 if($first_rec) {
489 $first_rec = 0;
490 } else {
491 $json_result_str .= ",";
492 }
493
494 my $metaname = $metatable_rec->{'metaname'};
495 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
496
497 # Generate the dbkey
498 my $dbkey = "$docid.$metaname";
499
500 # metapos for get_live_metadata is always assumed to be "all".
501 # It's always going to get all the lines of metavalues associated with a metaname
502 # (It's the metaname itself that should contain an increment number, if there are to be multiple values).
503 #my $metapos = "all";
504 my $metapos = $metatable_rec->{'metapos'} || 0; # Can be 0... 1|all|undefined. Defaults to 0 if undefined/false
505 my $metavalue = "";
506
507 # Obtain the content of the key
508 my $cmd = "gdbmget $infodb_file_path $dbkey";
509 if (open(GIN,"$cmd |") != 0) { # Success.
510
511 binmode(GIN,":utf8");
512 # Read everything in and concatenate them into $metavalue
513 my $line;
514 my $first_metaval = 1;
515 my $pos = 0;
516 while (defined ($line=<GIN>)) {
517 chomp($line); # Get rid off the tailing newlines
518
519 if($metapos eq "all") {
520 if($first_metaval) {
521 $first_metaval = 0;
522 } else {
523 $json_result_str .= ",";
524 }
525 $metavalue = $line;
526 $json_result_str .= "{\"metapos\":\"$pos\",\"metavalue\":\"$metavalue\"}";
527 } elsif($metapos == $pos) {
528 $metavalue = $line;
529 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
530 last;
531 } # else, the current $pos is not the required $metapos
532 $pos += 1;
533 }
534 close(GIN);
535 } # else open cmd == 0 (failed) and metavals array will be empty [] for this metaname
536
537 $json_result_str .= "]}"; # close metavals array and metatable record
538 }
539
540 $json_result_str .= "]}"; # close metatable array and docid record
541 }
542
543 $json_result_str .= "]"; # close array of docids
544
545 $gsdl_cgi->generate_ok_message($json_result_str."\n");
546}
547
548
549# Takes a JSON string and returns a JSON string
550# Request string is of the form:
551# http://localhost:8283/greenstone/cgi-bin/metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]
552# Resulting string is of the form:
553# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
554sub _get_index_metadata_array
555{
556 my $self = shift @_;
557
558 my $collect = $self->{'collect'};
559 my $gsdl_cgi = $self->{'gsdl_cgi'};
560 my $site = $self->{'site'};
561 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
562
563 # look up additional args
564 my $infodbtype = $self->{'infodbtype'};
565
566 # To people who know $collect_tail please add some comments
567 # Obtain the path to the database
568 my $collect_tail = $collect;
569 $collect_tail =~ s/^.*[\/|\\]//;
570 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
571 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
572
573 my $json_str = $self->{'json'};
574 my $doc_array = decode_json $json_str;
575
576 my $json_result_str = "[";
577 my $first_doc_rec = 1;
578
579 foreach my $doc_array_rec ( @$doc_array ) {
580
581 my $docid = $doc_array_rec->{'docid'};
582
583 if($first_doc_rec) {
584 $first_doc_rec = 0;
585 } else {
586 $json_result_str .= ",";
587 }
588 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
589
590 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
591 $json_result_str = $json_result_str . ",\"metatable\":[";
592
593 my $first_rec = 1;
594 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
595 if($first_rec) {
596 $first_rec = 0;
597 } else {
598 $json_result_str .= ",";
599 }
600
601 my $metaname = $metatable_rec->{'metaname'};
602 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
603
604 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
605 if(!defined $metapos) {
606 $metapos = 0;
607 }
608
609 # Read the docid entry
610 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
611
612 # Basically loop through and unescape_html the values
613 foreach my $k (keys %$doc_rec) {
614 my @escaped_v = ();
615 foreach my $v (@{$doc_rec->{$k}}) {
616 my $ev = &ghtml::unescape_html($v);
617 push(@escaped_v, $ev);
618 }
619 $doc_rec->{$k} = \@escaped_v;
620 }
621
622 # Obtain the specified metadata value(s)
623 my $metavalue;
624
625 if($metapos ne "all") { # get the value at a single metapos
626
627 $metavalue = $doc_rec->{$metaname}->[$metapos];
628
629 # protect any double quotes and colons in the metavalue before putting it into JSON
630 $metavalue =~ s/\"/&quot;/g if defined $metavalue;
631 $metavalue =~ s/\:/&58;/g if defined $metavalue;
632
633 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
634
635 } else {
636 my $first_metaval = 1;
637 $metapos = 0;
638 $metavalue = $doc_rec->{$metaname}->[$metapos];
639
640 while (defined $metavalue) {
641
642 # protect any double quotes and colons in the metavalue before putting it into JSON
643 $metavalue =~ s/\"/&quot;/g;
644 $metavalue =~ s/\:/&58;/g;
645
646 if($first_metaval) {
647 $first_metaval = 0;
648 } else {
649 $json_result_str .= ",";
650 }
651
652 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
653
654 $metapos++;
655 $metavalue = $doc_rec->{$metaname}->[$metapos];
656 }
657 }
658
659 $json_result_str .= "]}"; # close metavals array and metatable record
660 }
661
662 $json_result_str .= "]}"; # close metatable array and docid record
663 }
664
665 $json_result_str .= "]"; # close array of docids
666
667 $gsdl_cgi->generate_ok_message($json_result_str."\n");
668}
669
670
671sub get_index_metadata
672{
673 my $self = shift @_;
674
675 my $username = $self->{'username'};
676 my $collect = $self->{'collect'};
677 my $gsdl_cgi = $self->{'gsdl_cgi'};
678 my $gsdlhome = $self->{'gsdlhome'};
679
680 # Obtain the collect dir
681 my $site = $self->{'site'};
682 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
683 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
684
685 # look up additional args
686 my $docid = $self->{'d'};
687 my $metaname = $self->{'metaname'};
688 my $metapos = $self->{'metapos'};
689 my $infodbtype = $self->{'infodbtype'};
690
691 # To people who know $collect_tail please add some comments
692 # Obtain path to the database
693 my $collect_tail = $collect;
694 $collect_tail =~ s/^.*[\/|\\]//;
695 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
696 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
697
698 # Read the docid entry
699 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
700
701 # Basically loop through and unescape_html the values
702 foreach my $k (keys %$doc_rec) {
703 my @escaped_v = ();
704 foreach my $v (@{$doc_rec->{$k}}) {
705 my $ev = &ghtml::unescape_html($v);
706 push(@escaped_v, $ev);
707 }
708 $doc_rec->{$k} = \@escaped_v;
709 }
710
711 # Obtain the specified metadata value
712 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
713 my $metavalue = $doc_rec->{$metaname}->[$metapos];
714 $gsdl_cgi->generate_ok_message("$metavalue");
715
716}
717
718
719sub get_import_metadata
720{
721 my $self = shift @_;
722
723 my $username = $self->{'username'};
724 my $collect = $self->{'collect'};
725 my $gsdl_cgi = $self->{'gsdl_cgi'};
726 my $gsdlhome = $self->{'gsdlhome'};
727
728 # Obtain the collect dir
729 my $site = $self->{'site'};
730 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
731 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
732
733 # look up additional args
734 my $docid = $self->{'d'};
735 my $metaname = $self->{'metaname'};
736 my $metapos = $self->{'metapos'};
737 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/)); # gets the first value by default since metapos defaults to 0
738
739 my $infodbtype = $self->{'infodbtype'};
740 if (!defined $docid)
741 {
742 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
743 }
744
745 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
746 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
747 my $metadata_xml_file;
748 my $import_filename = undef;
749
750
751 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
752 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
753 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
754
755 # This now stores the full pathname
756 $import_filename = $doc_rec->{'src-file'}->[0];
757 $import_filename = &util::placeholders_to_abspath($import_filename);
758
759 # figure out correct metadata.xml file [?]
760 # Assuming the metadata.xml file is next to the source file
761 # Note: This will not work if it is using the inherited metadata from the parent folder
762 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
763 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
764
765 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
766
767}
768
769sub get_metadata_from_metadata_xml
770{
771 my $self = shift @_;
772 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
773
774 my @rules =
775 (
776 _default => 'raw',
777 'Metadata' => \&gfmxml_metadata,
778 'FileName' => \&mxml_filename
779 );
780
781 my $parser = XML::Rules->new
782 (
783 rules => \@rules,
784 output_encoding => 'utf8'
785 );
786
787 my $xml_in = "";
788 if (!open(MIN,"<$metadata_xml_filename"))
789 {
790 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
791 }
792 else
793 {
794 # Read them in
795 my $line;
796 while (defined ($line=<MIN>)) {
797 $xml_in .= $line;
798 }
799 close(MIN);
800
801 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
802
803 if(defined $parser->{'pad'}->{'metavalue'})
804 {
805 return $parser->{'pad'}->{'metavalue'};
806 }
807 else
808 {
809 return "";
810 }
811 }
812}
813
814sub gfmxml_metadata
815{
816 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
817
818 # no subsection support yet in metadata.xml
819
820 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
821 {
822 if (!defined $parser->{'parameters'}->{'poscount'})
823 {
824 $parser->{'parameters'}->{'poscount'} = 0;
825 }
826 else
827 {
828 $parser->{'parameters'}->{'poscount'}++;
829 }
830
831 # gets the first value by default, since metapos defaults to 0
832 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
833 {
834 if($parser->{'parameters'}->{'metapos'} > 0) {
835 print STDERR "@@@@ WARNING: non-zero metapos.\n";
836 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at position: ".$parser->{'parameters'}->{'metapos'}.".\n";
837 }
838 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
839 }
840 }
841}
842
843sub get_archives_metadata
844{
845 my $self = shift @_;
846
847 my $username = $self->{'username'};
848 my $collect = $self->{'collect'};
849 my $gsdl_cgi = $self->{'gsdl_cgi'};
850# my $gsdlhome = $self->{'gsdlhome'};
851 my $infodbtype = $self->{'infodbtype'};
852
853 # Obtain the collect dir
854 my $site = $self->{'site'};
855 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
856
857 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
858
859 # look up additional args
860 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
861 $docid_secnum = "" if (!defined $docid_secnum);
862
863 my $metaname = $self->{'metaname'};
864 my $metapos = $self->{'metapos'};
865 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
866
867 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
868 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
869
870 # This now stores the full pathname
871 my $doc_filename = $doc_rec->{'doc-file'}->[0];
872
873 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
874
875}
876
877sub get_metadata_from_archive_xml
878{
879 my $self = shift @_;
880 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
881
882 my @start_rules = ('Section' => \&dxml_start_section);
883
884 my @rules =
885 (
886 _default => 'raw',
887 'Metadata' => \&gfdxml_metadata
888 );
889
890 my $parser = XML::Rules->new
891 (
892 start_rules => \@start_rules,
893 rules => \@rules,
894 output_encoding => 'utf8'
895 );
896
897 my $xml_in = "";
898 if (!open(MIN,"<$doc_xml_filename"))
899 {
900 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
901 }
902 else
903 {
904 # Read them in
905 my $line;
906 while (defined ($line=<MIN>)) {
907 $xml_in .= $line;
908 }
909 close(MIN);
910
911 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
912
913 if(defined $parser->{'pad'}->{'metavalue'})
914 {
915 return $parser->{'pad'}->{'metavalue'};
916 }
917 else
918 {
919 return "";
920 }
921 }
922}
923
924sub gfdxml_metadata
925{
926 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
927
928 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
929 {
930 return;
931 }
932
933 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
934 {
935 if (!defined $parser->{'parameters'}->{'poscount'})
936 {
937 $parser->{'parameters'}->{'poscount'} = 0;
938 }
939 else
940 {
941 $parser->{'parameters'}->{'poscount'}++;
942 }
943 }
944
945 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
946 {
947 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
948 }
949}
950
9511;
Note: See TracBrowser for help on using the repository browser.