root/main/trunk/greenstone2/perllib/cgiactions/metadataaction.pm @ 33311

Revision 33311, 35.2 KB (checked in by ak19, 13 months ago)

Bugfix to metadataaction, which hadn't been calculating subsections correctly past 1.x. If any section has subsections, then the next section wasn't being incremented at the correct time with the increment lagging. For example, 3, 3.1, 3.2 ... 3.6, then 3 again instead of being immediately changed to 4 when necessary.

Line 
1##########################################################################
2#
3# metadataaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package metadataaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34
35use JSON;
36
37# This class is conditionally expanded with set-metadata, remove-metadata and insert-metadata subroutines
38# defined in modmetadataaction.pm. The BEGIN code block determines whether the condition holds.
39# See
40# http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl
41# http://www.perlmonks.org/?node_id=881761 - splitting module into multiple files
42# http://www.perlmonks.org/?node_id=524456 - merging hashes
43
44our $modmeta_action_table; # don't init to empty hash here, else it will overwrite whatever BEGIN sets this to
45                  # see http://stackoverflow.com/questions/3998619/what-is-the-role-of-the-begin-block-in-perl
46
47BEGIN {
48#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
49    require XML::Rules;
50
51    # if we're GS3, then GS3_AUTHENTICATED must be defined and set to true
52    # in order to have access to subroutines that modify metadata (the set-
53    # and remove- metadata subroutines).
54    # TODO: if we're GS2, then we continue to behave as before?
55
56    if(!defined $ENV{'GSDL3HOME'} || (defined $ENV{'GS3_AUTHENTICATED'} && $ENV{'GS3_AUTHENTICATED'} eq "true")) {
57    require modmetadataaction;
58    }
59    else {
60    $modmeta_action_table = {};
61    }
62}
63
64@metadataaction::ISA = ('baseaction');
65
66
67my $getmeta_action_table =
68{
69    # unused and untested
70    # when DocEdit=1, need to retrieve a doc's full text (or doc section's full text) from archives
71    "get-archives-text" => {
72    'compulsory-args' => [ "d" ] },
73    #'compulsory-args' => [ "d" ],
74    #'optional-args' => [ "section" ] },
75   
76    #GET METHODS
77    "get-import-metadata" => {
78        'compulsory-args' => [ "d", "metaname" ],
79        'optional-args'   => [ "metapos" ] },
80
81    "get-archives-metadata" => {
82        'compulsory-args' => [ "d", "metaname" ],
83        'optional-args'   => [ "metapos" ] },
84   
85    "get-index-metadata" => {
86        'compulsory-args' => [ "d", "metaname" ],
87        'optional-args'   => [ "metapos" ] },
88
89    "get-metadata" => { # alias for get-index-metadata
90        'compulsory-args' => [ "d", "metaname" ],
91        'optional-args'   => [ "metapos" ] },
92
93    "get-live-metadata" => {
94        'compulsory-args' => [ "d", "metaname" ],
95        'optional-args'   => [ ] },
96
97    "get-metadata-array" => { # where param can be ONE of: index (default), import, archives, live
98        'compulsory-args' => [ "json" ],
99        'optional-args'   => [ "where" ],
100        'help-string' => [
101        'metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]'
102        ]}
103};
104
105# To get the final action_table of all available subroutines in this class,
106# merge the get- and mod-metadata hashes. See http://www.perlmonks.org/?node_id=524456
107# Note that modmeta_action_table will be empty of subroutines if the user does not have permissions
108# to modify metadata.
109my $action_table = { %$getmeta_action_table, %$modmeta_action_table };
110
111
112sub new
113{
114    my $class = shift (@_);
115    my ($gsdl_cgi,$iis6_mode) = @_;
116
117    # Treat metavalue specially.  To transmit this through a GET request
118    # the Javascript side has url-encoded it, so here we need to decode
119    # it before proceeding
120
121    my $url_encoded_metavalue = $gsdl_cgi->param("metavalue");
122    my $url_decoded_metavalue = &unicode::url_decode($url_encoded_metavalue,1);
123#$gsdl_cgi->generate_message("@@@ metaaction new - DEBUG before utf82unicode: " . &unicode::debug_unicode_string($url_decoded_metavalue));
124   
125    my $unicode_array = &unicode::utf82unicode($url_decoded_metavalue);
126
127    $url_decoded_metavalue = join("",map(chr($_),@$unicode_array));
128
129#$gsdl_cgi->generate_message("@@@ metaaction new - DEBUG after utf82unicode: " . &unicode::debug_unicode_string($url_decoded_metavalue));
130   
131    $gsdl_cgi->param("metavalue",$url_decoded_metavalue);
132
133    # need to do the same with prevmetavalue
134    my $url_encoded_prevmetavalue = $gsdl_cgi->param("prevmetavalue");
135    my $url_decoded_prevmetavalue = &unicode::url_decode($url_encoded_prevmetavalue,1);
136    my $prevunicode_array = &unicode::utf82unicode($url_decoded_prevmetavalue);
137
138    $url_decoded_prevmetavalue = join("",map(chr($_),@$prevunicode_array));
139    $gsdl_cgi->param("prevmetavalue",$url_decoded_prevmetavalue);
140
141    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
142
143    return bless $self, $class;
144}
145
146
147sub get_live_metadata
148{
149    my $self = shift @_;
150
151    my $username  = $self->{'username'};
152    my $collect   = $self->{'collect'};
153    my $gsdl_cgi  = $self->{'gsdl_cgi'};
154    my $gsdlhome  = $self->{'gsdlhome'};
155    my $infodbtype = $self->{'infodbtype'};
156   
157    # live metadata gets/saves value scoped (prefixed) by the current usename
158    # so (for now) let's not bother to enforce authentication
159
160    # Obtain the collect dir
161    my $site = $self->{'site'};
162    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
163    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
164
165    # No locking collection when getting metadata, only when setting it
166#    $self->lock_collection($username, $collect); # Make sure the collection isn't locked by someone else
167
168    # look up additional args
169    my $docid  = $self->{'d'};
170    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
171       $gsdl_cgi->generate_error("No docid (d=...) specified.");
172    }
173
174    # Generate the dbkey
175    my $metaname  = $self->{'metaname'};
176    my $dbkey = "$docid.$metaname";
177
178    # To people who know $collect_tail please add some comments
179    # Obtain path to the database
180    my $collect_tail = $collect;
181    $collect_tail =~ s/^.*[\/|\\]//;
182    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
183    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
184   
185    # Obtain the content of the key
186    my $cmd = "gdbmget $infodb_file_path $dbkey";
187    if (open(GIN,"$cmd |") == 0) {
188        # Catch error if gdbmget failed
189    my $mess = "Failed to get metadata key: $metaname\n";
190    $mess .= "$!\n";
191
192    $gsdl_cgi->generate_error($mess);
193    }
194    else {
195    binmode(GIN,":utf8");
196        # Read everything in and concatenate them into $metavalue
197    my $metavalue = "";
198    my $line;
199    while (defined ($line=<GIN>)) {
200        $metavalue .= $line;
201    }
202    close(GIN);
203    chomp($metavalue); # Get rid off the tailing newlines
204    $gsdl_cgi->generate_ok_message("$metavalue");
205    }
206
207    # Release the lock once it is done
208#    $self->unlock_collection($username, $collect);
209}
210
211# just calls the index version
212sub get_metadata
213{
214    my $self = shift @_;
215    $self->get_index_metadata(@_);
216}
217
218# JSON version that will get the requested metadata values
219# from the requested source (index, import, archives or live)
220# One of the params is a JSON string and the return value is JSON too
221# http://forums.asp.net/t/1844684.aspx/1 - Web api method return json in string
222sub get_metadata_array
223{
224    my $self = shift @_;
225
226    my $where = $self->{'where'};
227    if (!$where || ($where =~ m/^\s*$/)) { # 0, "0", "" and undef are all false. All else is true.
228    # What is truth in perl: http://www.berkeleyinternet.com/perl/node11.html
229    # and http://www.perlmonks.org/?node_id=33638
230
231    $where = "index"; # default behaviour is to get the values from index
232    }
233
234    # Only when setting metadata do we perform authentication and do we lock the collection,
235    # not when getting metadata
236
237    # for get_meta_array, the where param can only be ONE of import, archives, index, live
238    if($where =~ m/index/) {
239    $self->_get_index_metadata_array(@_);
240    }
241    elsif($where =~ m/archives/) {
242    $self->_get_archives_metadata_array(@_);
243    }
244    elsif($where =~ m/import/) {
245    $self->_get_import_metadata_array(@_);
246    }
247    elsif($where =~ m/live/) {
248        $self->_get_live_metadata_array(@_);
249    }
250}
251
252# Unused at present. Added for completion. Tested.
253sub _get_import_metadata_array {
254   
255    my $self = shift @_;
256
257    my $collect   = $self->{'collect'};
258    my $gsdl_cgi  = $self->{'gsdl_cgi'};
259    my $site = $self->{'site'};
260    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
261   
262    # look up additional args
263    my $infodbtype = $self->{'infodbtype'};
264   
265    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
266    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
267    my $json_str      = $self->{'json'};
268    my $doc_array = decode_json $json_str;
269
270    my $json_result_str = "[";
271    my $first_doc_rec = 1;
272    foreach my $doc_array_rec ( @$doc_array ) {
273   
274    my $docid = $doc_array_rec->{'docid'}; # no subsection metadata support in metadata.xml, only toplevel meta
275   
276    if($first_doc_rec) {
277        $first_doc_rec = 0;
278    } else {
279        $json_result_str .= ",";
280    }
281    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
282
283    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
284    $json_result_str = $json_result_str . ",\"metatable\":[";
285
286    my $first_rec = 1;
287    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
288
289        # Read the docid entry     
290        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
291        # This now stores the full pathname
292        my $import_filename = $doc_rec->{'src-file'}->[0];
293        $import_filename = &util::placeholders_to_abspath($import_filename);
294
295        # figure out correct metadata.xml file [?]
296        # Assuming the metadata.xml file is next to the source file
297        # Note: This will not work if it is using the inherited metadata from the parent folder
298        my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
299        my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
300
301
302        if($first_rec) {
303        $first_rec = 0;
304        } else {
305        $json_result_str .= ",";       
306        }
307       
308        my $metaname  = $metatable_rec->{'metaname'};
309        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
310
311        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
312        if(!defined $metapos) {
313        $metapos = 0;
314        }
315
316        # Obtain the specified metadata value(s)
317        my $metavalue;
318
319        if($metapos ne "all") { # get the value at a single metapos
320        $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
321
322        #print STDERR "**** Metafilename, metaname, metapos, sec_num: $metadata_xml_filename, $metaname, $metapos, $import_tailname\n";
323       
324        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
325
326        } else {
327        my $first_metaval = 1;
328        $metapos = 0;
329        $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
330
331        while (defined $metavalue && $metavalue ne "") {
332            if($first_metaval) {       
333            $first_metaval = 0;
334            } else {
335            $json_result_str .= ",";
336            }
337       
338            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
339
340            $metapos++;
341            $metavalue = $self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname);
342        }
343        }
344
345        $json_result_str .= "]}"; # close metavals array and metatable record
346    }
347   
348    $json_result_str .= "]}"; # close metatable array and docid record
349    }
350
351    $json_result_str .= "]"; # close array of docids
352    $gsdl_cgi->generate_ok_message($json_result_str."\n");
353}
354
355# Unused method, but included for completion. Tested, works. Takes a JSON string and returns a JSON string.
356# For more information on the format of the output, see _get_index_metadata_array, which is in use.
357sub _get_archives_metadata_array {
358
359    my $self = shift @_;
360
361    my $collect   = $self->{'collect'};
362    my $gsdl_cgi  = $self->{'gsdl_cgi'};
363    my $site = $self->{'site'};
364    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
365
366    # look up additional args   
367    my $infodbtype = $self->{'infodbtype'};
368
369    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
370    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
371
372    my $json_str      = $self->{'json'};
373    my $doc_array = decode_json $json_str;
374
375    my $json_result_str = "[";
376    my $first_doc_rec = 1;
377    foreach my $doc_array_rec ( @$doc_array ) {
378   
379    my $docid     = $doc_array_rec->{'docid'};
380   
381    if($first_doc_rec) {
382        $first_doc_rec = 0;
383    } else {
384        $json_result_str .= ",";
385    }
386    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
387
388    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
389    $json_result_str = $json_result_str . ",\"metatable\":[";
390
391    my $first_rec = 1;
392    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
393
394        my ($docid, $docid_secnum) = ($doc_array_rec->{'docid'} =~ m/^(.*?)(\..*)?$/);
395        $docid_secnum = "" if (!defined $docid_secnum);
396
397        # Read the docid entry     
398        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
399        # This now stores the full pathname
400        my $doc_filename = $doc_rec->{'doc-file'}->[0];
401        $doc_filename = &util::filename_cat($archive_dir, $doc_filename);
402
403        if($first_rec) {
404        $first_rec = 0;
405        } else {
406        $json_result_str .= ",";       
407        }
408       
409        my $metaname  = $metatable_rec->{'metaname'};
410        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
411
412        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
413        if(!defined $metapos) {
414        $metapos = 0;
415        }
416
417
418        # Obtain the specified metadata value(s)
419        my $metavalue;
420
421        if($metapos ne "all") { # get the value at a single metapos
422
423        $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
424        #print STDERR "**** Docname, metaname, metapos, sec_num: $doc_filename, $metaname, $metapos, $docid_secnum\n";
425       
426        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
427
428        } else {
429        my $first_metaval = 1;
430        $metapos = 0;
431        $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
432
433        while (defined $metavalue && $metavalue ne "") {
434            if($first_metaval) {       
435            $first_metaval = 0;
436            } else {
437            $json_result_str .= ",";
438            }
439       
440            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
441
442            $metapos++;
443            $metavalue = $self->get_metadata_from_archive_xml($gsdl_cgi, $doc_filename, $metaname, $metapos, $docid_secnum);
444        }
445        }
446
447        $json_result_str .= "]}"; # close metavals array and metatable record
448    }
449   
450    $json_result_str .= "]}"; # close metatable array and docid record
451    }
452
453    $json_result_str .= "]"; # close array of docids
454    $gsdl_cgi->generate_ok_message($json_result_str."\n");
455}
456
457
458# Unused at present. Added for completion. Tested, but not sure if it retrieves metadata in the manner it's expected to.
459sub _get_live_metadata_array
460{
461    my $self = shift @_;
462
463    my $collect   = $self->{'collect'};
464    my $gsdl_cgi  = $self->{'gsdl_cgi'};
465    my $site = $self->{'site'};
466    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
467
468    # look up additional args   
469    my $infodbtype = $self->{'infodbtype'};
470   
471    # To people who know $collect_tail please add some comments
472    # Obtain the path to the database
473    my $collect_tail = $collect;
474    $collect_tail =~ s/^.*[\/|\\]//;
475    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
476    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, "live-$collect_tail", $index_text_directory);
477
478    my $json_str      = $self->{'json'};
479    my $doc_array = decode_json $json_str;
480
481    my $json_result_str = "[";
482    my $first_doc_rec = 1;
483
484    foreach my $doc_array_rec ( @$doc_array ) {
485   
486    my $docid     = $doc_array_rec->{'docid'};
487   
488    if($first_doc_rec) {
489        $first_doc_rec = 0;
490    } else {
491        $json_result_str .= ",";
492    }
493    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
494   
495    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
496    $json_result_str = $json_result_str . ",\"metatable\":[";
497   
498    my $first_rec = 1;
499    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
500        if($first_rec) {
501        $first_rec = 0;
502        } else {
503        $json_result_str .= ",";       
504        }
505       
506        my $metaname  = $metatable_rec->{'metaname'};
507        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
508       
509        # Generate the dbkey
510        my $dbkey = "$docid.$metaname";
511       
512        # metapos for get_live_metadata is always assumed to be "all".
513        # It's always going to get all the lines of metavalues associated with a metaname
514        # (It's the metaname itself that should contain an increment number, if there are to be multiple values).
515        #my $metapos = "all";
516        my $metapos = $metatable_rec->{'metapos'} || 0; # Can be 0... 1|all|undefined. Defaults to 0 if undefined/false
517        my $metavalue = "";
518       
519        # Obtain the content of the key
520        my $cmd = "gdbmget $infodb_file_path $dbkey";
521        if (open(GIN,"$cmd |") != 0) { # Success.
522       
523        binmode(GIN,":utf8");
524        # Read everything in and concatenate them into $metavalue       
525        my $line;
526        my $first_metaval = 1;
527        my $pos = 0;
528        while (defined ($line=<GIN>)) {
529            chomp($line); # Get rid off the tailing newlines
530           
531            if($metapos eq "all") {
532            if($first_metaval) {       
533                $first_metaval = 0;
534            } else {
535                $json_result_str .= ",";
536            }           
537            $metavalue = $line;
538            $json_result_str .= "{\"metapos\":\"$pos\",\"metavalue\":\"$metavalue\"}";
539            } elsif($metapos == $pos) {
540            $metavalue = $line;
541            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
542            last;
543            } # else, the current $pos is not the required $metapos
544            $pos += 1;
545        }
546        close(GIN);
547        } # else open cmd == 0 (failed) and metavals array will be empty [] for this metaname
548       
549        $json_result_str .= "]}"; # close metavals array and metatable record
550    }
551   
552    $json_result_str .= "]}"; # close metatable array and docid record
553    }
554
555    $json_result_str .= "]"; # close array of docids
556   
557    $gsdl_cgi->generate_ok_message($json_result_str."\n");   
558}
559
560
561# Takes a JSON string and returns a JSON string
562# Request string is of the form:
563# http://localhost:8283/greenstone/cgi-bin/metadata-server.pl?a=get-metadata-array&c=demo&where=index&json=[{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metapos":"all"},{"metaname":"usertimestamp","metapos":"all"}, {"metaname":"usercomment","metapos":"all"}]}]
564# Resulting string is of the form:
565# [{"docid":"HASHc5bce2d6d3e5b04e470ec9","metatable":[{"metaname":"username","metavals":[{"metapos":"0","metavalue":"me"},{"metapos":"1","metavalue":"admin"}]},{"metaname":"usertimestamp","metavals":[{"metapos":"0","metavalue":"1367900586888"},{"metapos":"1","metavalue":"1367900616574"}]},{"metaname":"usercomment","metavals":[{"metapos":"0","metavalue":"Hi"},{"metapos":"1","metavalue":"Hello"}]}]}]
566sub _get_index_metadata_array
567{
568    my $self = shift @_;
569
570    my $collect   = $self->{'collect'};
571    my $gsdl_cgi  = $self->{'gsdl_cgi'};
572    my $site = $self->{'site'};
573    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
574
575    # look up additional args   
576    my $infodbtype = $self->{'infodbtype'};
577   
578    # To people who know $collect_tail please add some comments
579    # Obtain the path to the database
580    my $collect_tail = $collect;
581    $collect_tail =~ s/^.*[\/|\\]//;
582    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
583    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
584
585    my $json_str      = $self->{'json'};
586    my $doc_array = decode_json $json_str;
587
588    my $json_result_str = "[";
589    my $first_doc_rec = 1;
590
591    foreach my $doc_array_rec ( @$doc_array ) {
592   
593    my $docid     = $doc_array_rec->{'docid'};
594   
595    if($first_doc_rec) {
596        $first_doc_rec = 0;
597    } else {
598        $json_result_str .= ",";
599    }
600    $json_result_str = $json_result_str . "{\"docid\":\"" . $docid . "\""; 
601
602    my $metatable = $doc_array_rec->{'metatable'}; # a subarray, or need to generate an error saying JSON structure is wrong
603    $json_result_str = $json_result_str . ",\"metatable\":[";
604
605    my $first_rec = 1;
606    foreach my $metatable_rec ( @$metatable ) { # the subarray metatable is an array of hashmaps       
607        if($first_rec) {
608        $first_rec = 0;
609        } else {
610        $json_result_str .= ",";       
611        }
612       
613        my $metaname  = $metatable_rec->{'metaname'};
614        $json_result_str .= "{\"metaname\":\"$metaname\",\"metavals\":[";
615
616        my $metapos   = $metatable_rec->{'metapos'}; # 0... 1|all|undefined
617        if(!defined $metapos) {
618        $metapos = 0;
619        }
620
621         # Read the docid entry
622        my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
623 
624        # Basically loop through and unescape_html the values
625        foreach my $k (keys %$doc_rec) {
626        my @escaped_v = ();
627        foreach my $v (@{$doc_rec->{$k}}) {
628            my $ev = &ghtml::unescape_html($v);
629            push(@escaped_v, $ev);
630        }
631        $doc_rec->{$k} = \@escaped_v;
632        }
633
634        # Obtain the specified metadata value(s)
635        my $metavalue;
636
637        if($metapos ne "all") { # get the value at a single metapos
638
639        $metavalue = $doc_rec->{$metaname}->[$metapos];
640
641        # protect any double quotes and colons in the metavalue before putting it into JSON
642        $metavalue =~ s/\"/&quot;/g if defined $metavalue;
643        $metavalue =~ s/\:/&58;/g if defined $metavalue;
644
645        $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
646
647        } else {
648        my $first_metaval = 1;
649        $metapos = 0;
650        $metavalue = $doc_rec->{$metaname}->[$metapos];
651
652        while (defined $metavalue) {
653
654            # protect any double quotes and colons in the metavalue before putting it into JSON
655            $metavalue =~ s/\"/&quot;/g;
656            $metavalue =~ s/\:/&58;/g;
657
658            if($first_metaval) {       
659            $first_metaval = 0;
660            } else {
661            $json_result_str .= ",";
662            }
663       
664            $json_result_str .= "{\"metapos\":\"$metapos\",\"metavalue\":\"$metavalue\"}";
665
666            $metapos++;
667            $metavalue = $doc_rec->{$metaname}->[$metapos];
668        }
669        }
670
671        $json_result_str .= "]}"; # close metavals array and metatable record
672    }
673   
674    $json_result_str .= "]}"; # close metatable array and docid record
675    }
676
677    $json_result_str .= "]"; # close array of docids
678
679    $gsdl_cgi->generate_ok_message($json_result_str."\n");   
680}
681
682
683sub get_index_metadata
684{
685    my $self = shift @_;
686
687    my $username  = $self->{'username'};
688    my $collect   = $self->{'collect'};
689    my $gsdl_cgi  = $self->{'gsdl_cgi'};
690    my $gsdlhome  = $self->{'gsdlhome'};
691
692    # Obtain the collect dir
693    my $site = $self->{'site'};
694    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
695    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
696
697    # look up additional args
698    my $docid     = $self->{'d'};
699    my $metaname  = $self->{'metaname'};
700    my $metapos   = $self->{'metapos'};
701    my $infodbtype = $self->{'infodbtype'};
702
703    # To people who know $collect_tail please add some comments
704    # Obtain path to the database
705    my $collect_tail = $collect;
706    $collect_tail =~ s/^.*[\/|\\]//;
707    my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
708    my $infodb_file_path = &dbutil::get_infodb_file_path($infodbtype, $collect_tail, $index_text_directory);
709
710    # Read the docid entry
711    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $infodb_file_path, $docid);
712 
713    # Basically loop through and unescape_html the values
714    foreach my $k (keys %$doc_rec) {
715    my @escaped_v = ();
716    foreach my $v (@{$doc_rec->{$k}}) {
717        my $ev = &ghtml::unescape_html($v);
718        push(@escaped_v, $ev);
719    }
720    $doc_rec->{$k} = \@escaped_v;
721    }
722
723    # Obtain the specified metadata value
724    $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
725    my $metavalue = $doc_rec->{$metaname}->[$metapos];
726    $gsdl_cgi->generate_ok_message("$metavalue");
727   
728}
729
730
731sub get_import_metadata
732{
733    my $self = shift @_;
734
735    my $username  = $self->{'username'};
736    my $collect   = $self->{'collect'};
737    my $gsdl_cgi  = $self->{'gsdl_cgi'};
738    my $gsdlhome  = $self->{'gsdlhome'};
739
740    # Obtain the collect dir
741    my $site = $self->{'site'};
742    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
743    ##my $collect_dir = &util::filename_cat($gsdlhome, "collect");
744
745    # look up additional args
746    my $docid     = $self->{'d'};
747    my $metaname  = $self->{'metaname'};
748    my $metapos = $self->{'metapos'};
749    $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/)); # gets the first value by default since metapos defaults to 0
750
751    my $infodbtype = $self->{'infodbtype'};
752    if (!defined $docid)
753    {
754        $gsdl_cgi->generate_error("No docid (d=...) specified.\n");
755    }
756
757    # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
758    # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
759    my $metadata_xml_file;
760    my $import_filename = undef;
761   
762
763    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
764    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
765    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
766
767    # This now stores the full pathname
768    $import_filename = $doc_rec->{'src-file'}->[0];
769    $import_filename = &util::placeholders_to_abspath($import_filename);
770
771    # figure out correct metadata.xml file [?]
772    # Assuming the metadata.xml file is next to the source file
773    # Note: This will not work if it is using the inherited metadata from the parent folder
774    my ($import_tailname, $import_dirname) = File::Basename::fileparse($import_filename);
775    my $metadata_xml_filename = &util::filename_cat($import_dirname, "metadata.xml");
776
777    $gsdl_cgi->generate_ok_message($self->get_metadata_from_metadata_xml($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $import_tailname));
778
779}
780
781sub get_metadata_from_metadata_xml
782{
783    my $self = shift @_;
784    my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metapos, $src_file) = @_;
785   
786    my @rules =
787    (
788        _default => 'raw',
789        'Metadata' => \&gfmxml_metadata,
790        'FileName' => \&mxml_filename
791    );
792       
793    my $parser = XML::Rules->new
794    (
795        rules => \@rules,
796        output_encoding => 'utf8'
797    );
798   
799    my $xml_in = "";
800    if (!open(MIN,"<$metadata_xml_filename"))
801    {
802        $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
803    }
804    else
805    {
806        # Read them in
807        my $line;
808        while (defined ($line=<MIN>)) {
809            $xml_in .= $line;
810        }
811        close(MIN);
812
813        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, src_file => $src_file});
814       
815        if(defined $parser->{'pad'}->{'metavalue'})
816        {
817            return $parser->{'pad'}->{'metavalue'};
818        }
819        else
820        {
821            return "";
822        }
823    }
824}
825
826sub gfmxml_metadata
827{
828    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
829
830    # no subsection support yet in metadata.xml
831
832    if (($parser->{'parameters'}->{'src_file'} eq $parser->{'parameters'}->{'current_file'}) && $parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
833    {
834        if (!defined $parser->{'parameters'}->{'poscount'})
835        {
836            $parser->{'parameters'}->{'poscount'} = 0;
837        }
838        else
839        {
840            $parser->{'parameters'}->{'poscount'}++;
841        }
842   
843        # gets the first value by default, since metapos defaults to 0
844        if (($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
845        {
846            if($parser->{'parameters'}->{'metapos'} > 0) {
847            print STDERR "@@@@ WARNING: non-zero metapos.\n";
848            print STDERR "@@@@ Assuming SIMPLE collection and proceeding to retrieve the import meta at position: ".$parser->{'parameters'}->{'metapos'}.".\n";
849            }
850            $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
851        }
852    }
853}
854
855sub mxml_filename
856{
857    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
858
859    # Store the filename of the Current Fileset
860    # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
861    # FileName tag must come before Description tag
862    $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
863
864    # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
865    return [$tagname => $attrHash];
866}
867
868sub get_archives_metadata
869{
870    my $self = shift @_;
871
872    my $username  = $self->{'username'};
873    my $collect   = $self->{'collect'};
874    my $gsdl_cgi  = $self->{'gsdl_cgi'};
875#   my $gsdlhome  = $self->{'gsdlhome'};
876    my $infodbtype = $self->{'infodbtype'};
877
878    # Obtain the collect dir
879    my $site = $self->{'site'};
880    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
881   
882    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
883
884    # look up additional args
885    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
886    $docid_secnum = "" if (!defined $docid_secnum);
887   
888    my $metaname = $self->{'metaname'};
889    my $metapos = $self->{'metapos'};
890    $metapos = 0 if (!defined $metapos || ($metapos =~ m/^\s*$/));
891   
892    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
893    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
894
895    # This now stores the full pathname
896    my $doc_filename = $doc_rec->{'doc-file'}->[0];
897
898    $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
899
900}
901
902# unused and untested
903sub get_archives_text
904{
905    my $self = shift @_;
906
907    my $username  = $self->{'username'};
908    my $collect   = $self->{'collect'};
909    my $gsdl_cgi  = $self->{'gsdl_cgi'};
910#   my $gsdlhome  = $self->{'gsdlhome'};
911    my $infodbtype = $self->{'infodbtype'};
912
913    # Obtain the collect dir
914    my $site = $self->{'site'};
915    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
916   
917    my $archive_dir = &util::filename_cat($collect_dir, $collect, "archives");
918
919    # look up additional args
920    my ($docid, $docid_secnum) = ($self->{'d'} =~ m/^(.*?)(\..*)?$/);
921    $docid_secnum = "" if (!defined $docid_secnum);
922   
923    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-doc", $archive_dir);
924    my $doc_rec = &dbutil::read_infodb_entry($infodbtype, $arcinfo_doc_filename, $docid);
925
926    # This now stores the full pathname
927    my $doc_filename = $doc_rec->{'doc-file'}->[0];
928   
929    my $metaname = undef;
930    my $metapos = -1;
931   
932    $gsdl_cgi->generate_ok_message($self->get_metadata_from_archive_xml($gsdl_cgi, &util::filename_cat($archive_dir, $doc_filename), $metaname, $metapos, $docid_secnum));
933
934}
935
936sub get_metadata_from_archive_xml
937{
938    my $self = shift @_;
939    my ($gsdl_cgi, $doc_xml_filename, $metaname, $metapos, $secid) = @_;
940   
941    my @start_rules = ('Section' => \&dxml_start_section);
942   
943    my @rules =
944    (
945        _default => 'raw',
946        'Metadata' => \&gfdxml_metadata
947    );
948       
949    my $parser = XML::Rules->new
950    (
951        start_rules => \@start_rules,
952        rules => \@rules,
953        output_encoding => 'utf8'
954    );
955   
956    my $xml_in = "";
957    if (!open(MIN,"<$doc_xml_filename"))
958    {
959        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
960    }
961    else
962    {
963        # Read them in
964        my $line;
965        while (defined ($line=<MIN>)) {
966            $xml_in .= $line;
967        }
968        close(MIN);
969
970        $parser->parse($xml_in, {metaname => $metaname, metapos => $metapos, secid => $secid});
971       
972        if(defined $parser->{'pad'}->{'metavalue'})
973        {
974            return $parser->{'pad'}->{'metavalue'};
975        }
976        else
977        {
978            return "";
979        }
980    }
981}
982
983# unused and untested
984sub get_text_from_archive_xml
985{
986    my $self = shift @_;
987    my ($gsdl_cgi, $doc_xml_filename, $secid) = @_;
988
989    # To monitor which section/subsection number we are in
990    my @start_rules = ('Section' => \&dxml_start_section);
991
992    # set the callback functions for the elements in doc.xml we're interested in, <Content>
993    my @rules =
994    (
995        _default => 'raw',
996        'Content' => \&gfdxml_text # gfdxml = get from doc xml?
997    );
998       
999    my $parser = XML::Rules->new
1000    (
1001        start_rules => \@start_rules,
1002        rules => \@rules,
1003        output_encoding => 'utf8'
1004    );
1005   
1006    my $xml_in = "";
1007    if (!open(MIN,"<$doc_xml_filename"))
1008    {
1009        $gsdl_cgi->generate_error("Unable to read in $doc_xml_filename: $!");
1010    }
1011    else
1012    {
1013        # Read them in
1014        my $line;
1015        while (defined ($line=<MIN>)) {
1016            $xml_in .= $line;
1017        }
1018        close(MIN);
1019
1020        $parser->parse($xml_in, {secid => $secid});
1021       
1022        if(defined $parser->{'pad'}->{'textcontent'})
1023        {
1024            return $parser->{'pad'}->{'textcontent'};
1025        }
1026        else
1027        {
1028            return "";
1029        }
1030    }
1031}
1032
1033sub gfdxml_metadata
1034{
1035    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1036   
1037    if(!($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'}))
1038    {
1039        return;
1040    }
1041
1042    if ($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'})
1043    {
1044        if (!defined $parser->{'parameters'}->{'poscount'})
1045        {
1046            $parser->{'parameters'}->{'poscount'} = 0;
1047        }
1048        else
1049        {
1050            $parser->{'parameters'}->{'poscount'}++;
1051        }
1052    }
1053
1054    if (($parser->{'parameters'}->{'metaname'} eq $attrHash->{'name'}) && ($parser->{'parameters'}->{'poscount'} == $parser->{'parameters'}->{'metapos'}))
1055    {   
1056        $parser->{'pad'}->{'metavalue'} = $attrHash->{'_content'};
1057    }
1058}
1059
1060# unused and untested - for get_archives_text
1061sub gfdxml_text
1062{
1063    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1064   
1065    if($parser->{'parameters'}->{'secid'} eq $parser->{'parameters'}->{'curr_section_num'})
1066    {
1067        $parser->{'pad'}->{'textcontent'} = $attrHash->{'_content'}; # the textnode content
1068
1069    }
1070    else {
1071        return;
1072    }
1073}
1074
1075sub dxml_start_section
1076{
1077    my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
1078
1079    my $new_depth = scalar(@$contextArray);
1080
1081#   print STDERR "**** START SECTION \n";
1082   
1083    if ($new_depth == 1) {
1084    $parser->{'parameters'}->{'curr_section_depth'} = 1;
1085    $parser->{'parameters'}->{'curr_section_num'}   = "";
1086    }
1087
1088    my $old_depth  = $parser->{'parameters'}->{'curr_section_depth'};
1089    my $old_secnum = $parser->{'parameters'}->{'curr_section_num'};
1090   
1091    my $new_secnum;
1092
1093    if ($new_depth > $old_depth) {
1094    # child subsection
1095    $new_secnum = "$old_secnum.1";
1096    }
1097    elsif ($new_depth == $old_depth) {
1098    # sibling section => increase its value by 1
1099    my ($tail_num) = ($old_secnum =~ m/\.(\d+)$/);
1100    $tail_num++;
1101    $new_secnum = $old_secnum;
1102    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1103    }
1104    else {
1105    # back up to parent section => lop off tail
1106    $new_secnum = $old_secnum;
1107    $new_secnum =~ s/\.\d+$//;
1108    # remember to increment from parent section
1109    my ($tail_num) = ($new_secnum =~ m/\.(\d+)$/);
1110    $tail_num++;
1111    $new_secnum =~ s/\.(\d+)$/\.$tail_num/;
1112    }
1113
1114    $parser->{'parameters'}->{'curr_section_depth'} = $new_depth;
1115    $parser->{'parameters'}->{'curr_section_num'}   = $new_secnum;
1116
1117    #print STDERR "**** START SECTION - $new_secnum\n************************\n\n";
1118    1;
1119}
1120
11211;
Note: See TracBrowser for help on using the browser.