source: main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm@ 33183

Last change on this file since 33183 was 33183, checked in by ak19, 5 years ago
  1. Added the untested erase_archives/index/live/import_metadata() subroutines to modmetadataaction, which will remove all metadata with matching metadata name from the specified docid. They generate no perl interpreter errors when running metadata-server.pl in authenticated mode, but otherwise they're untested. 2. Fixed an issue in baseaction.pm with running metadata-server.pl. 3. Commented out debug messages from metadataaction.pm. Second commit phase will need to introduce matching javascript methods to erase metadata.
File size: 35.0 KB
RevLine 
[28159]1##########################################################################
[19293]2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
[29099]10# This program is free software; you can redistribute it and/or modify
[19293]11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
[21551]32use dbutil;
[19499]33use ghtml;
[19293]34
[24071]35use JSON;
[21563]36
[31617]37# This class is conditionally expanded with set-metadata, remove-metadata and insert-metadata subroutines
38# defined in modmetadataaction.pm. The BEGIN code block determines whether the condition holds.
39# See
[31602]40# http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl
41# http://www.perlmonks.org/?node_id=881761 - splitting module into multiple files
42# http://www.perlmonks.org/?node_id=524456 - merging hashes
[24071]43
[31602]44our $modmeta_action_table; # don't init to empty hash here, else it will overwrite whatever BEGIN sets this to
45 # see http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl
46
[19293]47BEGIN {
[22331]48# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
[19293]49 require XML::Rules;
[31602]50
51 # if we're GS3, then GS3_AUTHENTICATED must be defined and set to true
52 # in order to have access to subroutines that modify metadata (the set-
53 # and remove- metadata subroutines).
54 # TODO: if we're GS2, then we continue to behave as before?
55
56 if(!defined $ENV{'GSDL3HOME'} || (defined $ENV{'GS3_AUTHENTICATED'} && $ENV{'GS3_AUTHENTICATED'} eq "true")) {
57 require modmetadataaction;
58 }
59 else {
60 $modmeta_action_table = {};
61 }
[19293]62}
63
64@metadataaction::ISA = ('baseaction');
65
[31602]66
67my $getmeta_action_table =
[25097]68{
[32062]69 # unused and untested
70 # when DocEdit=1, need to retrieve a doc's full text (or doc section's full text) from archives
71 "get-archives-text" => {
72 'compulsory-args' => [ "d" ] },
73 #'compulsory-args' => [ "d" ],
74 #'optional-args' => [ "section" ] },
75
[25097]76 #GET METHODS
77 "get-import-metadata" => {
78 'compulsory-args' => [ "d", "metaname" ],
[27176]79 'optional-args' => [ "metapos" ] },
[19499]80
[25097]81 "get-archives-metadata" => {
82 'compulsory-args' => [ "d", "metaname" ],
83 'optional-args' => [ "metapos" ] },
84
[27157]85 "get-index-metadata" => {
[25097]86 'compulsory-args' => [ "d", "metaname" ],
87 'optional-args' => [ "metapos" ] },
[19499]88
[27157]89 "get-metadata" => { # alias for get-index-metadata
90 'compulsory-args' => [ "d", "metaname" ],
91 'optional-args' => [ "metapos" ] },
92
[25097]93 "get-live-metadata" => {
94 'compulsory-args' => [ "d", "metaname" ],
95 'optional-args' => [ ] },
[19499]96
[27312]97 "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
98 'compulsory-args' => [ "json" ],
99 'optional-args' => [ "where" ],
100 'help-string' => [
101 'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
[31602]102 ]}
103};
[27312]104
[31617]105# To get the final action_table of all available subroutines in this class,
106# merge the get- and mod-metadata hashes. See http://www.perlmonks.org/?node_id=524456
107# Note that modmeta_action_table will be empty of subroutines if the user does not have permissions
108# to modify metadata.
[31602]109my $action_table = { %$getmeta_action_table, %$modmeta_action_table };
[19499]110
[27261]111
[19293]112sub new
113{
114 my $class = shift (@_);
115 my ($gsdl_cgi,$iis6_mode) = @_;
116
[23761]117 # Treat metavalue specially. To transmit this through a GET request
118 # the Javascript side has url-encoded it, so here we need to decode
119 # it before proceeding
120
121 my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
122 my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
[33183]123#$gsdl_cgi->generate_message("@@@ metaaction new - DEBUG before utf82unicode: " . &unicode::debug_unicode_string($url_decoded_metavalue));
124
[23761]125 my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
126
127 $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
[33183]128
129#$gsdl_cgi->generate_message("@@@ metaaction new - DEBUG after utf82unicode: " . &unicode::debug_unicode_string($url_decoded_metavalue));
130
[23761]131 $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
132
[29086]133 # need to do the same with prevmetavalue
134 my $url_encoded_prevmetavalue = $gsdl_cgi->param("prevmetavalue");
135 my $url_decoded_prevmetavalue = &unicode::url_decode($url_encoded_prevmetavalue,1);
136 my $prevunicode_array = &unicode::utf82unicode($url_decoded_prevmetavalue);
137
138 $url_decoded_prevmetavalue = join("",map(chr($_),@$prevunicode_array));
139 $gsdl_cgi->param("prevmetavalue",$url_decoded_prevmetavalue);
140
[19293]141 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
142
143 return bless $self, $class;
144}
145
146
147sub get_live_metadata
148{
149 my $self = shift @_;
150
151 my $username = $self->{'username'};
152 my $collect = $self->{'collect'};
153 my $gsdl_cgi = $self->{'gsdl_cgi'};
154 my $gsdlhome = $self->{'gsdlhome'};
[23478]155 my $infodbtype = $self->{'infodbtype'};
[27180]156
[23447]157 # live metadata gets/saves value scoped (prefixed) by the current usename
[23761]158 # so (for now) let's not bother to enforce authentication
[21715]159
160 # Obtain the collect dir
[23766]161 my $site = $self->{'site'};
162 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
163 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19293]164
[27314]165 # No locking collection when getting metadata, only when setting it
166# $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
[19293]167
168 # look up additional args
169 my $docid = $self->{'d'};
170 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
[21715]171 $gsdl_cgi->generate_error("No docid (d=...) specified.");
[19293]172 }
173
[21715]174 # Generate the dbkey
[19293]175 my $metaname = $self->{'metaname'};
176 my $dbkey = "$docid.$metaname";
177
[21715]178 # To people who know $collect_tail please add some comments
179 # Obtain path to the database
[19293]180 my $collect_tail = $collect;
181 $collect_tail =~ s/^.*[\/|\\]//;
[21564]182 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]183 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
[21715]184
185 # Obtain the content of the key
[21569]186 my $cmd = "gdbmget $infodb_file_path $dbkey";
[19293]187 if (open(GIN,"$cmd |") == 0) {
[21715]188 # Catch error if gdbmget failed
[19293]189 my $mess = "Failed to get metadata key: $metaname\n";
190 $mess .= "$!\n";
191
192 $gsdl_cgi->generate_error($mess);
193 }
194 else {
[23761]195 binmode(GIN,":utf8");
[21715]196 # Read everything in and concatenate them into $metavalue
[19293]197 my $metavalue = "";
198 my $line;
199 while (defined ($line=<GIN>)) {
200 $metavalue .= $line;
201 }
202 close(GIN);
[21715]203 chomp($metavalue); # Get rid off the tailing newlines
[19293]204 $gsdl_cgi->generate_ok_message("$metavalue");
205 }
[21715]206
207 # Release the lock once it is done
[27314]208# $self->unlock_collection($username, $collect);
[19499]209}
[19293]210
[27157]211# just calls the index version
[19499]212sub get_metadata
213{
214 my $self = shift @_;
[27157]215 $self->get_index_metadata(@_);
216}
[19499]217
[27312]218# JSON version that will get the requested metadata values
219# from the requested source (index, import, archives or live)
220# One of the params is a JSON string and the return value is JSON too
221# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
222sub get_metadata_array
223{
224 my $self = shift @_;
225
226 my $where = $self->{'where'};
[31589]227 if (!$where || ($where =~ m/^\s*$/)) { # 0, "0", "" and undef are all false. All else is true.
228 # What is truth in perl: http://www.berkeleyinternet.com/perl/node11.html
229 # and http://www.perlmonks.org/?node_id=33638
230
[27312]231 $where = "index"; # default behaviour is to get the values from index
232 }
233
[27336]234 # Only when setting metadata do we perform authentication and do we lock the collection,
[27312]235 # not when getting metadata
236
237 # for get_meta_array, the where param can only be ONE of import, archives, index, live
238 if($where =~ m/index/) {
239 $self->_get_index_metadata_array(@_);
240 }
[27315]241 elsif($where =~ m/archives/) {
[27324]242 $self->_get_archives_metadata_array(@_);
[27315]243 }
244 elsif($where =~ m/import/) {
[27333]245 $self->_get_import_metadata_array(@_);
[27315]246 }
247 elsif($where =~ m/live/) {
[27336]248 $self->_get_live_metadata_array(@_);
[27315]249 }
[27312]250}
251
[27336]252# Unused at present. Added for completion. Tested.
[27333]253sub _get_import_metadata_array {
254
255 my $self = shift @_;
[27324]256
[27333]257 my $collect = $self->{'collect'};
258 my $gsdl_cgi = $self->{'gsdl_cgi'};
259 my $site = $self->{'site'};
260 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
261
262 # look up additional args
263 my $infodbtype = $self->{'infodbtype'};
264
265 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
266 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
267 my $json_str = $self->{'json'};
268 my $doc_array = decode_json $json_str;
269
270 my $json_result_str = "[";
271 my $first_doc_rec = 1;
272 foreach my $doc_array_rec ( @$doc_array ) {
273
274 my $docid = $doc_array_rec->{'docid'}; # no subsection metadata support in metadata.xml, only toplevel meta
275
276 if($first_doc_rec) {
277 $first_doc_rec = 0;
278 } else {
279 $json_result_str .= ",";
280 }
281 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
282
283 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
284 $json_result_str = $json_result_str . ",\"metatable\":[";
285
286 my $first_rec = 1;
287 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
288
289 # Read the docid entry
290 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
291 # This now stores the full pathname
292 my $import_filename = $doc_rec->{'src-file'}->[0];
[28211]293 $import_filename = &util::placeholders_to_abspath($import_filename);
[27333]294
295 # figure out correct metadata.xml file [?]
296 # Assuming the metadata.xml file is next to the source file
297 # Note: This will not work if it is using the inherited metadata from the parent folder
298 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
299 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
300
301
302 if($first_rec) {
303 $first_rec = 0;
304 } else {
305 $json_result_str .= ",";
306 }
307
308 my $metaname = $metatable_rec->{'metaname'};
309 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
310
311 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
312 if(!defined $metapos) {
313 $metapos = 0;
314 }
315
316 # Obtain the specified metadata value(s)
317 my $metavalue;
318
319 if($metapos ne "all") { # get the value at a single metapos
320 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
321
322 #print STDERR "**** Metafilename, metaname, metapos, sec_num: $metadata_xml_filename, $metaname, $metapos, $import_tailname\n";
323
324 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
325
326 } else {
327 my $first_metaval = 1;
328 $metapos = 0;
329 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
330
331 while (defined $metavalue && $metavalue ne "") {
332 if($first_metaval) {
333 $first_metaval = 0;
334 } else {
335 $json_result_str .= ",";
336 }
337
338 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
339
340 $metapos++;
341 $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
342 }
343 }
344
345 $json_result_str .= "]}"; # close metavals array and metatable record
346 }
347
348 $json_result_str .= "]}"; # close metatable array and docid record
349 }
350
351 $json_result_str .= "]"; # close array of docids
352 $gsdl_cgi->generate_ok_message($json_result_str."\n");
353}
354
[27324]355# Unused method, but included for completion. Tested, works. Takes a JSON string and returns a JSON string.
356# For more information on the format of the output, see _get_index_metadata_array, which is in use.
357sub _get_archives_metadata_array {
358
359 my $self = shift @_;
360
361 my $collect = $self->{'collect'};
362 my $gsdl_cgi = $self->{'gsdl_cgi'};
363 my $site = $self->{'site'};
364 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
365
366 # look up additional args
367 my $infodbtype = $self->{'infodbtype'};
368
369 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
370 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
371
372 my $json_str = $self->{'json'};
373 my $doc_array = decode_json $json_str;
374
375 my $json_result_str = "[";
376 my $first_doc_rec = 1;
377 foreach my $doc_array_rec ( @$doc_array ) {
378
379 my $docid = $doc_array_rec->{'docid'};
380
381 if($first_doc_rec) {
382 $first_doc_rec = 0;
383 } else {
384 $json_result_str .= ",";
385 }
386 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
387
388 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
389 $json_result_str = $json_result_str . ",\"metatable\":[";
390
391 my $first_rec = 1;
392 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
393
394 my ($docid, $docid_secnum) = ($doc_array_rec->{'docid'} =~ m/^(.*?)(\..*)?$/);
395 $docid_secnum = "" if (!defined $docid_secnum);
396
397 # Read the docid entry
398 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
399 # This now stores the full pathname
[28211]400 my $doc_filename = $doc_rec->{'doc-file'}->[0];
[27324]401 $doc_filename = &util::filename_cat($archive_dir, $doc_filename);
402
403 if($first_rec) {
404 $first_rec = 0;
405 } else {
406 $json_result_str .= ",";
407 }
408
409 my $metaname = $metatable_rec->{'metaname'};
410 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
411
412 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
413 if(!defined $metapos) {
414 $metapos = 0;
415 }
416
417
418 # Obtain the specified metadata value(s)
419 my $metavalue;
420
421 if($metapos ne "all") { # get the value at a single metapos
422
423 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
424 #print STDERR "**** Docname, metaname, metapos, sec_num: $doc_filename, $metaname, $metapos, $docid_secnum\n";
425
426 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
427
428 } else {
429 my $first_metaval = 1;
430 $metapos = 0;
431 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
432
433 while (defined $metavalue && $metavalue ne "") {
434 if($first_metaval) {
435 $first_metaval = 0;
436 } else {
437 $json_result_str .= ",";
438 }
439
440 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
441
442 $metapos++;
443 $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
444 }
445 }
446
447 $json_result_str .= "]}"; # close metavals array and metatable record
448 }
449
450 $json_result_str .= "]}"; # close metatable array and docid record
451 }
452
453 $json_result_str .= "]"; # close array of docids
454 $gsdl_cgi->generate_ok_message($json_result_str."\n");
455}
456
[27336]457
458# Unused at present. Added for completion. Tested, but not sure if it retrieves metadata in the manner it's expected to.
459sub _get_live_metadata_array
460{
461 my $self = shift @_;
462
463 my $collect = $self->{'collect'};
464 my $gsdl_cgi = $self->{'gsdl_cgi'};
465 my $site = $self->{'site'};
466 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
467
468 # look up additional args
469 my $infodbtype = $self->{'infodbtype'};
470
471 # To people who know $collect_tail please add some comments
472 # Obtain the path to the database
473 my $collect_tail = $collect;
474 $collect_tail =~ s/^.*[\/|\\]//;
475 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
476 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
477
478 my $json_str = $self->{'json'};
479 my $doc_array = decode_json $json_str;
480
481 my $json_result_str = "[";
482 my $first_doc_rec = 1;
483
484 foreach my $doc_array_rec ( @$doc_array ) {
485
486 my $docid = $doc_array_rec->{'docid'};
487
488 if($first_doc_rec) {
489 $first_doc_rec = 0;
490 } else {
491 $json_result_str .= ",";
492 }
493 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
494
495 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
496 $json_result_str = $json_result_str . ",\"metatable\":[";
497
498 my $first_rec = 1;
499 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
500 if($first_rec) {
501 $first_rec = 0;
502 } else {
503 $json_result_str .= ",";
504 }
505
506 my $metaname = $metatable_rec->{'metaname'};
507 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
508
509 # Generate the dbkey
510 my $dbkey = "$docid.$metaname";
511
512 # metapos for get_live_metadata is always assumed to be "all".
513 # It's always going to get all the lines of metavalues associated with a metaname
514 # (It's the metaname itself that should contain an increment number, if there are to be multiple values).
515 #my $metapos = "all";
516 my $metapos = $metatable_rec->{'metapos'} || 0; # Can be 0... 1|all|undefined. Defaults to 0 if undefined/false
517 my $metavalue = "";
518
519 # Obtain the content of the key
520 my $cmd = "gdbmget $infodb_file_path $dbkey";
521 if (open(GIN,"$cmd |") != 0) { # Success.
522
523 binmode(GIN,":utf8");
524 # Read everything in and concatenate them into $metavalue
525 my $line;
526 my $first_metaval = 1;
527 my $pos = 0;
528 while (defined ($line=<GIN>)) {
529 chomp($line); # Get rid off the tailing newlines
530
531 if($metapos eq "all") {
532 if($first_metaval) {
533 $first_metaval = 0;
534 } else {
535 $json_result_str .= ",";
536 }
537 $metavalue = $line;
538 $json_result_str .= "{\"metapos\":\"$pos\",\"metavalue\":\"$metavalue\"}";
539 } elsif($metapos == $pos) {
540 $metavalue = $line;
541 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
542 last;
543 } # else, the current $pos is not the required $metapos
544 $pos += 1;
545 }
546 close(GIN);
547 } # else open cmd == 0 (failed) and metavals array will be empty [] for this metaname
548
549 $json_result_str .= "]}"; # close metavals array and metatable record
550 }
551
552 $json_result_str .= "]}"; # close metatable array and docid record
553 }
554
555 $json_result_str .= "]"; # close array of docids
556
557 $gsdl_cgi->generate_ok_message($json_result_str."\n");
558}
559
560
561# Takes a JSON string and returns a JSON string
562# Request string is of the form:
563# http://localhost:8283/greenstone/cgi-bin/metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]
564# Resulting string is of the form:
[27313]565# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
[27312]566sub _get_index_metadata_array
567{
568 my $self = shift @_;
569
570 my $collect = $self->{'collect'};
571 my $gsdl_cgi = $self->{'gsdl_cgi'};
572 my $site = $self->{'site'};
573 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
574
575 # look up additional args
576 my $infodbtype = $self->{'infodbtype'};
577
578 # To people who know $collect_tail please add some comments
579 # Obtain the path to the database
580 my $collect_tail = $collect;
[27336]581 $collect_tail =~ s/^.*[\/|\\]//;
[27312]582 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
583 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
584
585 my $json_str = $self->{'json'};
586 my $doc_array = decode_json $json_str;
587
588 my $json_result_str = "[";
589 my $first_doc_rec = 1;
590
591 foreach my $doc_array_rec ( @$doc_array ) {
592
593 my $docid = $doc_array_rec->{'docid'};
594
595 if($first_doc_rec) {
596 $first_doc_rec = 0;
597 } else {
598 $json_result_str .= ",";
599 }
[27313]600 $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\"";
[27312]601
602 my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
[27313]603 $json_result_str = $json_result_str . ",\"metatable\":[";
[27312]604
605 my $first_rec = 1;
606 foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps
607 if($first_rec) {
608 $first_rec = 0;
609 } else {
610 $json_result_str .= ",";
611 }
612
613 my $metaname = $metatable_rec->{'metaname'};
[27313]614 $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
[27312]615
616 my $metapos = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
617 if(!defined $metapos) {
[27313]618 $metapos = 0;
[27312]619 }
620
621 # Read the docid entry
622 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
623
624 # Basically loop through and unescape_html the values
625 foreach my $k (keys %$doc_rec) {
626 my @escaped_v = ();
627 foreach my $v (@{$doc_rec->{$k}}) {
628 my $ev = &ghtml::unescape_html($v);
629 push(@escaped_v, $ev);
630 }
631 $doc_rec->{$k} = \@escaped_v;
632 }
633
634 # Obtain the specified metadata value(s)
635 my $metavalue;
636
637 if($metapos ne "all") { # get the value at a single metapos
638
[27366]639 $metavalue = $doc_rec->{$metaname}->[$metapos];
640
641 # protect any double quotes and colons in the metavalue before putting it into JSON
642 $metavalue =~ s/\"/&quot;/g if defined $metavalue;
643 $metavalue =~ s/\:/&58;/g if defined $metavalue;
644
[27313]645 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
[27312]646
647 } else {
648 my $first_metaval = 1;
649 $metapos = 0;
650 $metavalue = $doc_rec->{$metaname}->[$metapos];
651
652 while (defined $metavalue) {
[27366]653
654 # protect any double quotes and colons in the metavalue before putting it into JSON
655 $metavalue =~ s/\"/&quot;/g;
656 $metavalue =~ s/\:/&58;/g;
657
[27312]658 if($first_metaval) {
659 $first_metaval = 0;
660 } else {
661 $json_result_str .= ",";
662 }
663
[27313]664 $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
[27312]665
666 $metapos++;
[27366]667 $metavalue = $doc_rec->{$metaname}->[$metapos];
[27312]668 }
669 }
670
671 $json_result_str .= "]}"; # close metavals array and metatable record
672 }
673
674 $json_result_str .= "]}"; # close metatable array and docid record
675 }
676
677 $json_result_str .= "]"; # close array of docids
678
[27324]679 $gsdl_cgi->generate_ok_message($json_result_str."\n");
[27312]680}
681
682
[27157]683sub get_index_metadata
684{
685 my $self = shift @_;
686
[19499]687 my $username = $self->{'username'};
688 my $collect = $self->{'collect'};
689 my $gsdl_cgi = $self->{'gsdl_cgi'};
690 my $gsdlhome = $self->{'gsdlhome'};
691
[21715]692 # Obtain the collect dir
[23766]693 my $site = $self->{'site'};
694 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
695 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
[19499]696
697 # look up additional args
698 my $docid = $self->{'d'};
699 my $metaname = $self->{'metaname'};
700 my $metapos = $self->{'metapos'};
[23400]701 my $infodbtype = $self->{'infodbtype'};
[19499]702
[21715]703 # To people who know $collect_tail please add some comments
704 # Obtain path to the database
[19499]705 my $collect_tail = $collect;
[27336]706 $collect_tail =~ s/^.*[\/|\\]//;
[21564]707 my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
[23400]708 my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
[21715]709
710 # Read the docid entry
[23400]711 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
712
[21715]713 # Basically loop through and unescape_html the values
[19499]714 foreach my $k (keys %$doc_rec) {
715 my @escaped_v = ();
716 foreach my $v (@{$doc_rec->{$k}}) {
717 my $ev = &ghtml::unescape_html($v);
718 push(@escaped_v, $ev);
719 }
720 $doc_rec->{$k} = \@escaped_v;
721 }
722
[21715]723 # Obtain the specified metadata value
[31589]724 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
[19499]725 my $metavalue = $doc_rec->{$metaname}->[$metapos];
726 $gsdl_cgi->generate_ok_message("$metavalue");
[21715]727
[19293]728}
729
730
[25097]731sub get_import_metadata
732{
733 my $self = shift @_;
734
735 my $username = $self->{'username'};
736 my $collect = $self->{'collect'};
737 my $gsdl_cgi = $self->{'gsdl_cgi'};
738 my $gsdlhome = $self->{'gsdlhome'};
739
740 # Obtain the collect dir
741 my $site = $self->{'site'};
742 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
743 ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
744
745 # look up additional args
746 my $docid = $self->{'d'};
747 my $metaname = $self->{'metaname'};
[27176]748 my $metapos = $self->{'metapos'};
[31589]749 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/)); # gets the first value by default since metapos defaults to 0
[27176]750
[25097]751 my $infodbtype = $self->{'infodbtype'};
[27176]752 if (!defined $docid)
[25097]753 {
[27173]754 $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
[25097]755 }
756
757 # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
758 # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
759 my $metadata_xml_file;
760 my $import_filename = undef;
761
762
763 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
764 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
765 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
766
767 # This now stores the full pathname
768 $import_filename = $doc_rec->{'src-file'}->[0];
[28211]769 $import_filename = &util::placeholders_to_abspath($import_filename);
[25097]770
771 # figure out correct metadata.xml file [?]
772 # Assuming the metadata.xml file is next to the source file
773 # Note: This will not work if it is using the inherited metadata from the parent folder
774 my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
775 my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
776
[27176]777 $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
[25097]778
779}
780
781sub get_metadata_from_metadata_xml
782{
783 my $self = shift @_;
[27176]784 my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
[25097]785
786 my @rules =
787 (
788 _default => 'raw',
789 'Metadata' => \&gfmxml_metadata,
790 'FileName' => \&mxml_filename
791 );
792
793 my $parser = XML::Rules->new
794 (
795 rules => \@rules,
796 output_encoding => 'utf8'
797 );
798
799 my $xml_in = "";
800 if (!open(MIN,"<$metadata_xml_filename"))
801 {
802 $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
803 }
804 else
805 {
806 # Read them in
807 my $line;
808 while (defined ($line=<MIN>)) {
809 $xml_in .= $line;
810 }
811 close(MIN);
812
[27176]813 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
[25097]814
815 if(defined $parser->{'pad'}->{'metavalue'})
816 {
817 return $parser->{'pad'}->{'metavalue'};
818 }
819 else
820 {
821 return "";
822 }
823 }
824}
825
826sub gfmxml_metadata
827{
828 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
829
[27176]830 # no subsection support yet in metadata.xml
831
832 if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
[25097]833 {
[27176]834 if (!defined $parser->{'parameters'}->{'poscount'})
835 {
836 $parser->{'parameters'}->{'poscount'} = 0;
837 }
838 else
839 {
840 $parser->{'parameters'}->{'poscount'}++;
841 }
842
843 # gets the first value by default, since metapos defaults to 0
844 if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
845 {
846 if($parser->{'parameters'}->{'metapos'} > 0) {
847 print STDERR "@@@@ WARNING: non-zero metapos.\n";
[27333]848 print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at position: ".$parser->{'parameters'}->{'metapos'}.".\n";
[27176]849 }
850 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
851 }
[25097]852 }
853}
854
[32061]855sub mxml_filename
856{
857 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
858
859 # Store the filename of the Current Fileset
860 # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
861 # FileName tag must come before Description tag
862 $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
863
864 # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
865 return [$tagname => $attrHash];
866}
867
[25097]868sub get_archives_metadata
869{
870 my $self = shift @_;
871
872 my $username = $self->{'username'};
873 my $collect = $self->{'collect'};
874 my $gsdl_cgi = $self->{'gsdl_cgi'};
[27324]875# my $gsdlhome = $self->{'gsdlhome'};
[25097]876 my $infodbtype = $self->{'infodbtype'};
877
878 # Obtain the collect dir
879 my $site = $self->{'site'};
880 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
881
882 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
883
884 # look up additional args
885 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
886 $docid_secnum = "" if (!defined $docid_secnum);
887
888 my $metaname = $self->{'metaname'};
889 my $metapos = $self->{'metapos'};
[31589]890 $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
[25097]891
892 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
893 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
894
895 # This now stores the full pathname
896 my $doc_filename = $doc_rec->{'doc-file'}->[0];
897
898 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
899
900}
901
[32062]902# unused and untested
903sub get_archives_text
904{
905 my $self = shift @_;
906
907 my $username = $self->{'username'};
908 my $collect = $self->{'collect'};
909 my $gsdl_cgi = $self->{'gsdl_cgi'};
910# my $gsdlhome = $self->{'gsdlhome'};
911 my $infodbtype = $self->{'infodbtype'};
912
913 # Obtain the collect dir
914 my $site = $self->{'site'};
915 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
916
917 my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
918
919 # look up additional args
920 my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
921 $docid_secnum = "" if (!defined $docid_secnum);
922
923 my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
924 my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
925
926 # This now stores the full pathname
927 my $doc_filename = $doc_rec->{'doc-file'}->[0];
928
929 my $metaname = undef;
930 my $metapos = -1;
931
932 $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
933
934}
935
[25097]936sub get_metadata_from_archive_xml
937{
938 my $self = shift @_;
939 my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
940
941 my @start_rules = ('Section' => \&dxml_start_section);
942
943 my @rules =
944 (
945 _default => 'raw',
946 'Metadata' => \&gfdxml_metadata
947 );
948
949 my $parser = XML::Rules->new
950 (
951 start_rules => \@start_rules,
952 rules => \@rules,
953 output_encoding => 'utf8'
954 );
955
956 my $xml_in = "";
957 if (!open(MIN,"<$doc_xml_filename"))
958 {
959 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
960 }
961 else
962 {
963 # Read them in
964 my $line;
965 while (defined ($line=<MIN>)) {
966 $xml_in .= $line;
967 }
968 close(MIN);
969
970 $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
971
972 if(defined $parser->{'pad'}->{'metavalue'})
973 {
974 return $parser->{'pad'}->{'metavalue'};
975 }
976 else
977 {
978 return "";
979 }
980 }
981}
982
[32062]983# unused and untested
984sub get_text_from_archive_xml
985{
986 my $self = shift @_;
987 my ($gsdl_cgi, $doc_xml_filename, $secid) = @_;
988
989 # To monitor which section/subsection number we are in
990 my @start_rules = ('Section' => \&dxml_start_section);
991
992 # set the callback functions for the elements in doc.xml we're interested in, <Content>
993 my @rules =
994 (
995 _default => 'raw',
996 'Content' => \&gfdxml_text # gfdxml = get from doc xml?
997 );
998
999 my $parser = XML::Rules->new
1000 (
1001 start_rules => \@start_rules,
1002 rules => \@rules,
1003 output_encoding => 'utf8'
1004 );
1005
1006 my $xml_in = "";
1007 if (!open(MIN,"<$doc_xml_filename"))
1008 {
1009 $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1010 }
1011 else
1012 {
1013 # Read them in
1014 my $line;
1015 while (defined ($line=<MIN>)) {
1016 $xml_in .= $line;
1017 }
1018 close(MIN);
1019
1020 $parser->parse($xml_in, {secid => $secid});
1021
1022 if(defined $parser->{'pad'}->{'textcontent'})
1023 {
1024 return $parser->{'pad'}->{'textcontent'};
1025 }
1026 else
1027 {
1028 return "";
1029 }
1030 }
1031}
1032
[25097]1033sub gfdxml_metadata
1034{
1035 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1036
1037 if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1038 {
1039 return;
1040 }
1041
1042 if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1043 {
1044 if (!defined $parser->{'parameters'}->{'poscount'})
1045 {
1046 $parser->{'parameters'}->{'poscount'} = 0;
1047 }
1048 else
1049 {
1050 $parser->{'parameters'}->{'poscount'}++;
1051 }
1052 }
1053
1054 if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1055 {
1056 $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
1057 }
1058}
1059
[32062]1060# unused and untested - for get_archives_text
1061sub gfdxml_text
1062{
1063 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1064
1065 if($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'})
1066 {
1067 $parser->{'pad'}->{'textcontent'} = $attrHash->{'_content'}; # the textnode content
[32061]1068
[32062]1069 }
1070 else {
1071 return;
1072 }
1073}
1074
[32061]1075sub dxml_start_section
1076{
1077 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1078
1079 my $new_depth = scalar(@$contextArray);
1080
1081# print STDERR "**** START SECTION \n";
1082
1083 if ($new_depth == 1) {
1084 $parser->{'parameters'}->{'curr_section_depth'} = 1;
1085 $parser->{'parameters'}->{'curr_section_num'} = "";
1086 }
1087
1088 my $old_depth = $parser->{'parameters'}->{'curr_section_depth'};
1089 my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1090
1091 my $new_secnum;
1092
1093 if ($new_depth > $old_depth) {
1094 # child subsection
1095 $new_secnum = "$old_secnum.1";
1096 }
1097 elsif ($new_depth == $old_depth) {
1098 # sibling section => increase it's value by 1
1099 my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1100 $tail_num++;
1101 $new_secnum = $old_secnum;
1102 $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1103 }
1104 else {
[32062]1105 # back up to parent section => lop off tail
[32061]1106 $new_secnum = $old_secnum;
1107 $new_secnum =~ s/\.\d+$//;
1108 }
1109
1110 $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1111 $parser->{'parameters'}->{'curr_section_num'} = $new_secnum;
1112
[32062]1113 1;
[32061]1114}
1115
[19293]11161;
Note: See TracBrowser for help on using the repository browser.