source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 32062

Last change on this file since 32062 was 32062, checked in by ak19, 6 years ago

Committing unused and untested new subroutines, in case they may prove useful in future.

File size: 34.7 KB
Line 
1##########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37# This class is conditionally expanded with set-metadata, remove-metadata and insert-metadata subroutines
38# defined in modmetadataaction.pm. The BEGIN code block determines whether the condition holds.
39# See
40# http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl
41# http://www.perlmonks.org/?node_id=881761 - splitting module into multiple files
42# http://www.perlmonks.org/?node_id=524456 - merging hashes
43
44our $modmeta_action_table; # don't init to empty hash here, else it will overwrite whatever BEGIN sets this to
45 # see http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl
46
47BEGIN {
48# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
49 require XML::Rules;
50
51 # if we're GS3, then GS3_AUTHENTICATED must be defined and set to true
52 # in order to have access to subroutines that modify metadata (the set-
53 # and remove- metadata subroutines).
54 # TODO: if we're GS2, then we continue to behave as before?
55
56 if(!defined $ENV{'GSDL3HOME'} || (defined $ENV{'GS3_AUTHENTICATED'} && $ENV{'GS3_AUTHENTICATED'} eq "true")) {
57 require modmetadataaction;
58 }
59 else {
60 $modmeta_action_table = {};
61 }
62}
63
64@metadataaction::ISA = ('baseaction');
65
66
67my $getmeta_action_table =
68{
69 # unused and untested
70 # when DocEdit=1, need to retrieve a doc's full text (or doc section's full text) from archives
71 "get-archives-text" => {
72 'compulsory-args' => [ "d" ] },
73 #'compulsory-args' => [ "d" ],
74 #'optional-args' => [ "section" ] },
75
76 #GET METHODS
77 "get-import-metadata" => {
78 'compulsory-args' => [ "d", "metaname" ],
79 'optional-args' => [ "metapos" ] },
80
81 "get-archives-metadata" => {
82 'compulsory-args' => [ "d", "metaname" ],
83 'optional-args' => [ "metapos" ] },
84
85 "get-index-metadata" => {
86 'compulsory-args' => [ "d", "metaname" ],
87 'optional-args' => [ "metapos" ] },
88
89 "get-metadata" => { # alias for get-index-metadata
90 'compulsory-args' => [ "d", "metaname" ],
91 'optional-args' => [ "metapos" ] },
92
93 "get-live-metadata" => {
94 'compulsory-args' => [ "d", "metaname" ],
95 'optional-args' => [ ] },
96
97 "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
98 'compulsory-args' => [ "json" ],
99 'optional-args' => [ "where" ],
100 'help-string' => [
101 'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
102 ]}
103};
104
105# To get the final action_table of all available subroutines in this class,
106# merge the get- and mod-metadata hashes. See http://www.perlmonks.org/?node_id=524456
107# Note that modmeta_action_table will be empty of subroutines if the user does not have permissions
108# to modify metadata.
109my $action_table = { %$getmeta_action_table, %$modmeta_action_table };
110
111
112sub new
113{
114 my $class = shift (@_);
115 my ($gsdl_cgi,$iis6_mode) = @_;
116
117 # Treat metavalue specially. To transmit this through a GET request
118 # the Javascript side has url-encoded it, so here we need to decode
119 # it before proceeding
120
121 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
122 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
123 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
124
125 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
126 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
127
128 # need to do the same with prevmetavalue
129 my $url_encoded_prevmetavalue = $gsdl_cgi->param("prevmetavalue");
130 my $url_decoded_prevmetavalue = &unicode::url_decode($url_encoded_prevmetavalue,1);
131 my $prevunicode_array = &unicode::utf82unicode($url_decoded_prevmetavalue);
132
133 $url_decoded_prevmetavalue = join("",map(chr($_),@$prevunicode_array));
134 $gsdl_cgi->param("prevmetavalue",$url_decoded_prevmetavalue);
135
136 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
137
138 return bless $self, $class;
139}
140
141
142sub get_live_metadata
143{
144 my $self = shift @_;
145
146 my $username = $self->{'username'};
147 my $collect = $self->{'collect'};
148 my $gsdl_cgi = $self->{'gsdl_cgi'};
149 my $gsdlhome = $self->{'gsdlhome'};
150 my $infodbtype = $self->{'infodbtype'};
151
152 # live metadata gets/saves value scoped (prefixed) by the current usename
153 # so (for now) let's not bother to enforce authentication
154
155 # Obtain the collect dir
156 my $site = $self->{'site'};
157 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
158 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
159
160 # No locking collection when getting metadata, only when setting it
161# $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
162
163 # look up additional args
164 my $docid = $self->{'d'};
165 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
166 $gsdl_cgi->generate_error("No docid (d=...) specified.");
167 }
168
169 # Generate the dbkey
170 my $metaname = $self->{'metaname'};
171 my $dbkey = "$docid.$metaname";
172
173 # To people who know $collect_tail please add some comments
174 # Obtain path to the database
175 my $collect_tail = $collect;
176 $collect_tail =~ s/^.*[\/|\\]//;
177 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
178 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
179
180 # Obtain the content of the key
181 my $cmd = "gdbmget $infodb_file_path $dbkey";
182 if (open(GIN,"$cmd |") == 0) {
183 # Catch error if gdbmget failed
184 my $mess = "Failed to get metadata key: $metaname\n";
185 $mess .= "$!\n";
186
187 $gsdl_cgi->generate_error($mess);
188 }
189 else {
190 binmode(GIN,":utf8");
191 # Read everything in and concatenate them into $metavalue
192 my $metavalue = "";
193 my $line;
194 while (defined ($line=<GIN>)) {
195 $metavalue .= $line;
196 }
197 close(GIN);
198 chomp($metavalue); # Get rid off the tailing newlines
199 $gsdl_cgi->generate_ok_message("$metavalue");
200 }
201
202 # Release the lock once it is done
203# $self->unlock_collection($username, $collect);
204}
205
206# just calls the index version
207sub get_metadata
208{
209 my $self = shift @_;
210 $self->get_index_metadata(@_);
211}
212
213# JSON version that will get the requested metadata values
214# from the requested source (index, import, archives or live)
215# One of the params is a JSON string and the return value is JSON too
216# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
217sub get_metadata_array
218{
219 my $self = shift @_;
220
221 my $where = $self->{'where'};
222 if (!$where || ($where =~ m/^\s*$/)) { # 0, "0", "" and undef are all false. All else is true.
223 # What is truth in perl: http://www.berkeleyinternet.com/perl/node11.html
224 # and http://www.perlmonks.org/?node_id=33638
225
226 $where = "index"; # default behaviour is to get the values from index
227 }
228
229 # Only when setting metadata do we perform authentication and do we lock the collection,
230 # not when getting metadata
231
232 # for get_meta_array, the where param can only be ONE of import, archives, index, live
233 if($where =~ m/index/) {
234 $self->_get_index_metadata_array(@_);
235 }
236 elsif($where =~ m/archives/) {
237 $self->_get_archives_metadata_array(@_);
238 }
239 elsif($where =~ m/import/) {
240 $self->_get_import_metadata_array(@_);
241 }
242 elsif($where =~ m/live/) {
243 $self->_get_live_metadata_array(@_);
244 }
245}
246
247# Unused at present. Added for completion. Tested.
248sub _get_import_metadata_array {
249
250 my $self = shift @_;
251
252 my $collect = $self->{'collect'};
253 my $gsdl_cgi = $self->{'gsdl_cgi'};
254 my $site = $self->{'site'};
255 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
256
257 # look up additional args
258 my $infodbtype = $self->{'infodbtype'};
259
260 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
261 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
262 my $json_str = $self->{'json'};
263 my $doc_array = decode_json $json_str;
264
265 my $json_result_str = "[";
266 my $first_doc_rec = 1;
267 foreach my $doc_array_rec ( @$doc_array ) {
268
269 my $docid = $doc_array_rec->{'docid'}; # no subsection metadata support in metadata.xml, only toplevel meta
270
271 if($first_doc_rec) {
272 $first_doc_rec = 0;
273 } else {
274 $json_result_str .= ",";
275 }
276 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
277
278 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
279 $json_result_str = $json_result_str . ",\"metatable\":[";
280
281 my $first_rec = 1;
282 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
283
284 # Read the docid entry
285 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
286 # This now stores the full pathname
287 my $import_filename = $doc_rec->{'src-file'}->[0];
288 $import_filename = &util::placeholders_to_abspath($import_filename);
289
290 # figure out correct metadata.xml file [?]
291 # Assuming the metadata.xml file is next to the source file
292 # Note: This will not work if it is using the inherited metadata from the parent folder
293 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
294 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
295
296
297 if($first_rec) {
298 $first_rec = 0;
299 } else {
300 $json_result_str .= ",";
301 }
302
303 my $metaname = $metatable_rec->{'metaname'};
304 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
305
306 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
307 if(!defined $metapos) {
308 $metapos = 0;
309 }
310
311 # Obtain the specified metadata value(s)
312 my $metavalue;
313
314 if($metapos ne "all") { # get the value at a single metapos
315 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
316
317 #print STDERR "**** Metafilename, metaname, metapos, sec_num: $metadata_xml_filename, $metaname, $metapos, $import_tailname\n";
318
319 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
320
321 } else {
322 my $first_metaval = 1;
323 $metapos = 0;
324 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
325
326 while (defined $metavalue && $metavalue ne "") {
327 if($first_metaval) {
328 $first_metaval = 0;
329 } else {
330 $json_result_str .= ",";
331 }
332
333 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
334
335 $metapos++;
336 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
337 }
338 }
339
340 $json_result_str .= "]}"; # close metavals array and metatable record
341 }
342
343 $json_result_str .= "]}"; # close metatable array and docid record
344 }
345
346 $json_result_str .= "]"; # close array of docids
347 $gsdl_cgi->generate_ok_message($json_result_str."\n");
348}
349
350# Unused method, but included for completion. Tested, works. Takes a JSON string and returns a JSON string.
351# For more information on the format of the output, see _get_index_metadata_array, which is in use.
352sub _get_archives_metadata_array {
353
354 my $self = shift @_;
355
356 my $collect = $self->{'collect'};
357 my $gsdl_cgi = $self->{'gsdl_cgi'};
358 my $site = $self->{'site'};
359 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
360
361 # look up additional args
362 my $infodbtype = $self->{'infodbtype'};
363
364 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
365 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
366
367 my $json_str = $self->{'json'};
368 my $doc_array = decode_json $json_str;
369
370 my $json_result_str = "[";
371 my $first_doc_rec = 1;
372 foreach my $doc_array_rec ( @$doc_array ) {
373
374 my $docid = $doc_array_rec->{'docid'};
375
376 if($first_doc_rec) {
377 $first_doc_rec = 0;
378 } else {
379 $json_result_str .= ",";
380 }
381 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
382
383 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
384 $json_result_str = $json_result_str . ",\"metatable\":[";
385
386 my $first_rec = 1;
387 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
388
389 my ($docid, $docid_secnum) = ($doc_array_rec->{'docid'} =~ m/^(.*?)(\..*)?$/);
390 $docid_secnum = "" if (!defined $docid_secnum);
391
392 # Read the docid entry
393 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
394 # This now stores the full pathname
395 my $doc_filename = $doc_rec->{'doc-file'}->[0];
396 $doc_filename = &util::filename_cat($archive_dir, $doc_filename);
397
398 if($first_rec) {
399 $first_rec = 0;
400 } else {
401 $json_result_str .= ",";
402 }
403
404 my $metaname = $metatable_rec->{'metaname'};
405 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
406
407 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
408 if(!defined $metapos) {
409 $metapos = 0;
410 }
411
412
413 # Obtain the specified metadata value(s)
414 my $metavalue;
415
416 if($metapos ne "all") { # get the value at a single metapos
417
418 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
419 #print STDERR "**** Docname, metaname, metapos, sec_num: $doc_filename, $metaname, $metapos, $docid_secnum\n";
420
421 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
422
423 } else {
424 my $first_metaval = 1;
425 $metapos = 0;
426 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
427
428 while (defined $metavalue && $metavalue ne "") {
429 if($first_metaval) {
430 $first_metaval = 0;
431 } else {
432 $json_result_str .= ",";
433 }
434
435 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
436
437 $metapos++;
438 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
439 }
440 }
441
442 $json_result_str .= "]}"; # close metavals array and metatable record
443 }
444
445 $json_result_str .= "]}"; # close metatable array and docid record
446 }
447
448 $json_result_str .= "]"; # close array of docids
449 $gsdl_cgi->generate_ok_message($json_result_str."\n");
450}
451
452
453# Unused at present. Added for completion. Tested, but not sure if it retrieves metadata in the manner it's expected to.
454sub _get_live_metadata_array
455{
456 my $self = shift @_;
457
458 my $collect = $self->{'collect'};
459 my $gsdl_cgi = $self->{'gsdl_cgi'};
460 my $site = $self->{'site'};
461 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
462
463 # look up additional args
464 my $infodbtype = $self->{'infodbtype'};
465
466 # To people who know $collect_tail please add some comments
467 # Obtain the path to the database
468 my $collect_tail = $collect;
469 $collect_tail =~ s/^.*[\/|\\]//;
470 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
471 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
472
473 my $json_str = $self->{'json'};
474 my $doc_array = decode_json $json_str;
475
476 my $json_result_str = "[";
477 my $first_doc_rec = 1;
478
479 foreach my $doc_array_rec ( @$doc_array ) {
480
481 my $docid = $doc_array_rec->{'docid'};
482
483 if($first_doc_rec) {
484 $first_doc_rec = 0;
485 } else {
486 $json_result_str .= ",";
487 }
488 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
489
490 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
491 $json_result_str = $json_result_str . ",\"metatable\":[";
492
493 my $first_rec = 1;
494 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
495 if($first_rec) {
496 $first_rec = 0;
497 } else {
498 $json_result_str .= ",";
499 }
500
501 my $metaname = $metatable_rec->{'metaname'};
502 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
503
504 # Generate the dbkey
505 my $dbkey = "$docid.$metaname";
506
507 # metapos for get_live_metadata is always assumed to be "all".
508 # It's always going to get all the lines of metavalues associated with a metaname
509 # (It's the metaname itself that should contain an increment number, if there are to be multiple values).
510 #my $metapos = "all";
511 my $metapos = $metatable_rec->{'metapos'} || 0; # Can be 0... 1|all|undefined. Defaults to 0 if undefined/false
512 my $metavalue = "";
513
514 # Obtain the content of the key
515 my $cmd = "gdbmget $infodb_file_path $dbkey";
516 if (open(GIN,"$cmd |") != 0) { # Success.
517
518 binmode(GIN,":utf8");
519 # Read everything in and concatenate them into $metavalue
520 my $line;
521 my $first_metaval = 1;
522 my $pos = 0;
523 while (defined ($line=<GIN>)) {
524 chomp($line); # Get rid off the tailing newlines
525
526 if($metapos eq "all") {
527 if($first_metaval) {
528 $first_metaval = 0;
529 } else {
530 $json_result_str .= ",";
531 }
532 $metavalue = $line;
533 $json_result_str .= "{\"metapos\":\"$pos\",\"metavalue\":\"$metavalue\"}";
534 } elsif($metapos == $pos) {
535 $metavalue = $line;
536 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
537 last;
538 } # else, the current $pos is not the required $metapos
539 $pos += 1;
540 }
541 close(GIN);
542 } # else open cmd == 0 (failed) and metavals array will be empty [] for this metaname
543
544 $json_result_str .= "]}"; # close metavals array and metatable record
545 }
546
547 $json_result_str .= "]}"; # close metatable array and docid record
548 }
549
550 $json_result_str .= "]"; # close array of docids
551
552 $gsdl_cgi->generate_ok_message($json_result_str."\n");
553}
554
555
556# Takes a JSON string and returns a JSON string
557# Request string is of the form:
558# http://localhost:8283/greenstone/cgi-bin/metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]
559# Resulting string is of the form:
560# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
561sub _get_index_metadata_array
562{
563 my $self = shift @_;
564
565 my $collect = $self->{'collect'};
566 my $gsdl_cgi = $self->{'gsdl_cgi'};
567 my $site = $self->{'site'};
568 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
569
570 # look up additional args
571 my $infodbtype = $self->{'infodbtype'};
572
573 # To people who know $collect_tail please add some comments
574 # Obtain the path to the database
575 my $collect_tail = $collect;
576 $collect_tail =~ s/^.*[\/|\\]//;
577 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
578 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
579
580 my $json_str = $self->{'json'};
581 my $doc_array = decode_json $json_str;
582
583 my $json_result_str = "[";
584 my $first_doc_rec = 1;
585
586 foreach my $doc_array_rec ( @$doc_array ) {
587
588 my $docid = $doc_array_rec->{'docid'};
589
590 if($first_doc_rec) {
591 $first_doc_rec = 0;
592 } else {
593 $json_result_str .= ",";
594 }
595 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
596
597 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
598 $json_result_str = $json_result_str . ",\"metatable\":[";
599
600 my $first_rec = 1;
601 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
602 if($first_rec) {
603 $first_rec = 0;
604 } else {
605 $json_result_str .= ",";
606 }
607
608 my $metaname = $metatable_rec->{'metaname'};
609 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
610
611 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
612 if(!defined $metapos) {
613 $metapos = 0;
614 }
615
616 # Read the docid entry
617 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
618
619 # Basically loop through and unescape_html the values
620 foreach my $k (keys %$doc_rec) {
621 my @escaped_v = ();
622 foreach my $v (@{$doc_rec->{$k}}) {
623 my $ev = &ghtml::unescape_html($v);
624 push(@escaped_v, $ev);
625 }
626 $doc_rec->{$k} = \@escaped_v;
627 }
628
629 # Obtain the specified metadata value(s)
630 my $metavalue;
631
632 if($metapos ne "all") { # get the value at a single metapos
633
634 $metavalue = $doc_rec->{$metaname}->[$metapos];
635
636 # protect any double quotes and colons in the metavalue before putting it into JSON
637 $metavalue =~ s/\"/&quot;/g if defined $metavalue;
638 $metavalue =~ s/\:/&58;/g if defined $metavalue;
639
640 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
641
642 } else {
643 my $first_metaval = 1;
644 $metapos = 0;
645 $metavalue = $doc_rec->{$metaname}->[$metapos];
646
647 while (defined $metavalue) {
648
649 # protect any double quotes and colons in the metavalue before putting it into JSON
650 $metavalue =~ s/\"/&quot;/g;
651 $metavalue =~ s/\:/&58;/g;
652
653 if($first_metaval) {
654 $first_metaval = 0;
655 } else {
656 $json_result_str .= ",";
657 }
658
659 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
660
661 $metapos++;
662 $metavalue = $doc_rec->{$metaname}->[$metapos];
663 }
664 }
665
666 $json_result_str .= "]}"; # close metavals array and metatable record
667 }
668
669 $json_result_str .= "]}"; # close metatable array and docid record
670 }
671
672 $json_result_str .= "]"; # close array of docids
673
674 $gsdl_cgi->generate_ok_message($json_result_str."\n");
675}
676
677
678sub get_index_metadata
679{
680 my $self = shift @_;
681
682 my $username = $self->{'username'};
683 my $collect = $self->{'collect'};
684 my $gsdl_cgi = $self->{'gsdl_cgi'};
685 my $gsdlhome = $self->{'gsdlhome'};
686
687 # Obtain the collect dir
688 my $site = $self->{'site'};
689 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
690 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
691
692 # look up additional args
693 my $docid = $self->{'d'};
694 my $metaname = $self->{'metaname'};
695 my $metapos = $self->{'metapos'};
696 my $infodbtype = $self->{'infodbtype'};
697
698 # To people who know $collect_tail please add some comments
699 # Obtain path to the database
700 my $collect_tail = $collect;
701 $collect_tail =~ s/^.*[\/|\\]//;
702 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
703 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
704
705 # Read the docid entry
706 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
707
708 # Basically loop through and unescape_html the values
709 foreach my $k (keys %$doc_rec) {
710 my @escaped_v = ();
711 foreach my $v (@{$doc_rec->{$k}}) {
712 my $ev = &ghtml::unescape_html($v);
713 push(@escaped_v, $ev);
714 }
715 $doc_rec->{$k} = \@escaped_v;
716 }
717
718 # Obtain the specified metadata value
719 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
720 my $metavalue = $doc_rec->{$metaname}->[$metapos];
721 $gsdl_cgi->generate_ok_message("$metavalue");
722
723}
724
725
726sub get_import_metadata
727{
728 my $self = shift @_;
729
730 my $username = $self->{'username'};
731 my $collect = $self->{'collect'};
732 my $gsdl_cgi = $self->{'gsdl_cgi'};
733 my $gsdlhome = $self->{'gsdlhome'};
734
735 # Obtain the collect dir
736 my $site = $self->{'site'};
737 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
738 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
739
740 # look up additional args
741 my $docid = $self->{'d'};
742 my $metaname = $self->{'metaname'};
743 my $metapos = $self->{'metapos'};
744 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/)); # gets the first value by default since metapos defaults to 0
745
746 my $infodbtype = $self->{'infodbtype'};
747 if (!defined $docid)
748 {
749 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
750 }
751
752 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
753 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
754 my $metadata_xml_file;
755 my $import_filename = undef;
756
757
758 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
759 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
760 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
761
762 # This now stores the full pathname
763 $import_filename = $doc_rec->{'src-file'}->[0];
764 $import_filename = &util::placeholders_to_abspath($import_filename);
765
766 # figure out correct metadata.xml file [?]
767 # Assuming the metadata.xml file is next to the source file
768 # Note: This will not work if it is using the inherited metadata from the parent folder
769 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
770 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
771
772 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
773
774}
775
776sub get_metadata_from_metadata_xml
777{
778 my $self = shift @_;
779 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
780
781 my @rules =
782 (
783 _default => 'raw',
784 'Metadata' => \&gfmxml_metadata,
785 'FileName' => \&mxml_filename
786 );
787
788 my $parser = XML::Rules->new
789 (
790 rules => \@rules,
791 output_encoding => 'utf8'
792 );
793
794 my $xml_in = "";
795 if (!open(MIN,"<$metadata_xml_filename"))
796 {
797 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
798 }
799 else
800 {
801 # Read them in
802 my $line;
803 while (defined ($line=<MIN>)) {
804 $xml_in .= $line;
805 }
806 close(MIN);
807
808 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
809
810 if(defined $parser->{'pad'}->{'metavalue'})
811 {
812 return $parser->{'pad'}->{'metavalue'};
813 }
814 else
815 {
816 return "";
817 }
818 }
819}
820
821sub gfmxml_metadata
822{
823 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
824
825 # no subsection support yet in metadata.xml
826
827 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
828 {
829 if (!defined $parser->{'parameters'}->{'poscount'})
830 {
831 $parser->{'parameters'}->{'poscount'} = 0;
832 }
833 else
834 {
835 $parser->{'parameters'}->{'poscount'}++;
836 }
837
838 # gets the first value by default, since metapos defaults to 0
839 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
840 {
841 if($parser->{'parameters'}->{'metapos'} > 0) {
842 print STDERR "@@@@ WARNING: non-zero metapos.\n";
843 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at position: ".$parser->{'parameters'}->{'metapos'}.".\n";
844 }
845 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
846 }
847 }
848}
849
850sub mxml_filename
851{
852 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
853
854 # Store the filename of the Current Fileset
855 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
856 # FileName tag must come before Description tag
857 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
858
859 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
860 return [$tagname => $attrHash];
861}
862
863sub get_archives_metadata
864{
865 my $self = shift @_;
866
867 my $username = $self->{'username'};
868 my $collect = $self->{'collect'};
869 my $gsdl_cgi = $self->{'gsdl_cgi'};
870# my $gsdlhome = $self->{'gsdlhome'};
871 my $infodbtype = $self->{'infodbtype'};
872
873 # Obtain the collect dir
874 my $site = $self->{'site'};
875 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
876
877 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
878
879 # look up additional args
880 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
881 $docid_secnum = "" if (!defined $docid_secnum);
882
883 my $metaname = $self->{'metaname'};
884 my $metapos = $self->{'metapos'};
885 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
886
887 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
888 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
889
890 # This now stores the full pathname
891 my $doc_filename = $doc_rec->{'doc-file'}->[0];
892
893 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
894
895}
896
897# unused and untested
898sub get_archives_text
899{
900 my $self = shift @_;
901
902 my $username = $self->{'username'};
903 my $collect = $self->{'collect'};
904 my $gsdl_cgi = $self->{'gsdl_cgi'};
905# my $gsdlhome = $self->{'gsdlhome'};
906 my $infodbtype = $self->{'infodbtype'};
907
908 # Obtain the collect dir
909 my $site = $self->{'site'};
910 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
911
912 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
913
914 # look up additional args
915 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
916 $docid_secnum = "" if (!defined $docid_secnum);
917
918 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
919 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
920
921 # This now stores the full pathname
922 my $doc_filename = $doc_rec->{'doc-file'}->[0];
923
924 my $metaname = undef;
925 my $metapos = -1;
926
927 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
928
929}
930
931sub get_metadata_from_archive_xml
932{
933 my $self = shift @_;
934 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
935
936 my @start_rules = ('Section' => \&dxml_start_section);
937
938 my @rules =
939 (
940 _default => 'raw',
941 'Metadata' => \&gfdxml_metadata
942 );
943
944 my $parser = XML::Rules->new
945 (
946 start_rules => \@start_rules,
947 rules => \@rules,
948 output_encoding => 'utf8'
949 );
950
951 my $xml_in = "";
952 if (!open(MIN,"<$doc_xml_filename"))
953 {
954 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
955 }
956 else
957 {
958 # Read them in
959 my $line;
960 while (defined ($line=<MIN>)) {
961 $xml_in .= $line;
962 }
963 close(MIN);
964
965 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
966
967 if(defined $parser->{'pad'}->{'metavalue'})
968 {
969 return $parser->{'pad'}->{'metavalue'};
970 }
971 else
972 {
973 return "";
974 }
975 }
976}
977
978# unused and untested
979sub get_text_from_archive_xml
980{
981 my $self = shift @_;
982 my ($gsdl_cgi, $doc_xml_filename, $secid) = @_;
983
984 # To monitor which section/subsection number we are in
985 my @start_rules = ('Section' => \&dxml_start_section);
986
987 # set the callback functions for the elements in doc.xml we're interested in, <Content>
988 my @rules =
989 (
990 _default => 'raw',
991 'Content' => \&gfdxml_text # gfdxml = get from doc xml?
992 );
993
994 my $parser = XML::Rules->new
995 (
996 start_rules => \@start_rules,
997 rules => \@rules,
998 output_encoding => 'utf8'
999 );
1000
1001 my $xml_in = "";
1002 if (!open(MIN,"<$doc_xml_filename"))
1003 {
1004 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1005 }
1006 else
1007 {
1008 # Read them in
1009 my $line;
1010 while (defined ($line=<MIN>)) {
1011 $xml_in .= $line;
1012 }
1013 close(MIN);
1014
1015 $parser->parse($xml_in, {secid => $secid});
1016
1017 if(defined $parser->{'pad'}->{'textcontent'})
1018 {
1019 return $parser->{'pad'}->{'textcontent'};
1020 }
1021 else
1022 {
1023 return "";
1024 }
1025 }
1026}
1027
1028sub gfdxml_metadata
1029{
1030 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1031
1032 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1033 {
1034 return;
1035 }
1036
1037 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1038 {
1039 if (!defined $parser->{'parameters'}->{'poscount'})
1040 {
1041 $parser->{'parameters'}->{'poscount'} = 0;
1042 }
1043 else
1044 {
1045 $parser->{'parameters'}->{'poscount'}++;
1046 }
1047 }
1048
1049 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1050 {
1051 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
1052 }
1053}
1054
1055# unused and untested - for get_archives_text
1056sub gfdxml_text
1057{
1058 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1059
1060 if($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'})
1061 {
1062 $parser->{'pad'}->{'textcontent'} = $attrHash->{'_content'}; # the textnode content
1063
1064 }
1065 else {
1066 return;
1067 }
1068}
1069
1070sub dxml_start_section
1071{
1072 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1073
1074 my $new_depth = scalar(@$contextArray);
1075
1076# print STDERR "**** START SECTION \n";
1077
1078 if ($new_depth == 1) {
1079 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1080 $parser->{'parameters'}->{'curr_section_num'} = "";
1081 }
1082
1083 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1084 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1085
1086 my $new_secnum;
1087
1088 if ($new_depth > $old_depth) {
1089 # child subsection
1090 $new_secnum = "$old_secnum.1";
1091 }
1092 elsif ($new_depth == $old_depth) {
1093 # sibling section => increase it's value by 1
1094 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1095 $tail_num++;
1096 $new_secnum = $old_secnum;
1097 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1098 }
1099 else {
1100 # back up to parent section => lop off tail
1101 $new_secnum = $old_secnum;
1102 $new_secnum =~ s/\.\d+$//;
1103 }
1104
1105 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1106 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1107
1108 1;
1109}
1110
11111;
Note: See TracBrowser for help on using the repository browser.