root/main/trunk/greenstone2/perllib/collConfigxml.pm @ 22485

Revision 22485, 15.2 KB (checked in by ak19, 10 years ago)

1. Dr Bainbridge fixed the database perl modules to all have the method read_info_keys (which reads the keys from the database into a map), so that dbutil.pm can have the same as a generic method. 2. buildConfigxml.pm only writes out the defaultIndex if it is set (to prevent an Uninitialised Variable warning message from Perl).

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# collConfigxml.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in configuration files of xml form
27
28package collConfigxml;
29use strict;
30no strict 'refs';
31no strict 'subs';
32
33# Wrapper that ensures the right version of XML::Parser is loaded given
34# the version of Perl being used.  Need to distinguish between Perl 5.6 and
35# Perl 5.8
36sub BEGIN {
37    my $perl_dir;
38
39    # Note: $] encodes the version number of perl
40    if ($]>=5.010) {
41    $perl_dir = "perl-5.10";
42    }
43    elsif ($]>5.008) {
44    # perl 5.8.1 or above
45    $perl_dir = "perl-5.8";
46    }
47    elsif ($]<5.008) {
48    # assume perl 5.6
49    $perl_dir = "perl-5.6";
50    }
51    else {
52    print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n";
53    print STDERR "         Please upgrade to a newer version of Perl.\n";
54    $perl_dir = "perl-5.8";
55    }
56
57    if ($ENV{'GSDLOS'} !~ /^windows$/i) {
58    # Use push to put this on the end, so an existing XML::Parser will be used by default
59    push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir");
60    }
61}
62
63use XML::Parser;
64
65# A mapping hash to resolve name discrepancy between gs2 and gs3.
66my $nameMap = {"key" => "value",
67           "creator" => "creator",
68           "maintainer" => "maintainer",
69           "public" => "public",
70           "infodb" => "infodbtype",
71           "defaultIndex" => "defaultindex",
72           "defaultLevel" => "defaultlevel",
73           "name" => "collectionname",
74           "description" => "collectionextra",
75           "smallicon" => "iconcollectionsmall",
76           "icon" => "iconcollection",
77           "level" => "levels",
78           "classifier" => "classify",
79           "indexSubcollection" => "indexsubcollections",
80           "indexLanguage" => "languages",
81           "defaultIndexLanguage" => "defaultlanguage",
82           "index" => "indexes",
83           "plugin" => "plugin",
84           "plugout" => "plugout",
85           "indexOption" => "indexoptions",
86           "searchType" => "searchtype",
87           "languageMetadata" => "languagemetadata",
88           "buildType" => "buildtype",
89           "buildOption" => "buildOption"
90           };
91# A hash structure which is returned by sub read_cfg_file.
92my $data = {};
93
94my $repeatedBlock = q/^(browse|pluginList)$/; #|buildOptionList)$/;
95
96# use those unique attribute values to locate the text within the elements
97# creator, public, maintainer and within a displayItem.
98my $currentLocation = "";
99my $stringexp = q/^(creator|maintainer|public|buildType)$/;
100my $displayItemNames = q/^(name|description)$/;
101 
102# For storing the attributes during the StartTag subroutine, so that
103# we can use it later in Text (or EndTag) subroutines
104my $currentAttrRef = undef;
105
106my $currentLevel = "";
107
108# Count the elements with same name within the same block
109# ("plugin", "option")
110my $currentIndex = 0;
111my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/;
112my $arrayarrayexp= q/^(plugin|classifier)$/; #|buildOption)$/;
113my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here
114my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here.
115
116my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/;
117
118# Reads in the model collection configuration file, collectionConfig.xml,
119# into a structure which complies with the one used by gs2 (i.e. one read
120# in by &cfgread::read_cfg_file).
121sub read_cfg_file {
122    my ($filename) = @_;
123    $data = {};
124    if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) {
125        return undef;
126    }
127
128    # create XML::Parser object for parsing metadata.xml files
129    my $parser;
130    if ($]<5.008) {
131    # Perl 5.6
132    $parser = new XML::Parser('Style' => 'Stream',
133                  'Handlers' => {'Char' => \&Char,
134                         'Doctype' => \&Doctype
135                         });
136    }
137    else {
138    # Perl 5.8
139    $parser = new XML::Parser('Style' => 'Stream',
140                  'ProtocolEncoding' => 'ISO-8859-1',
141                  'Handlers' => {'Char' => \&Char,
142                         'Doctype' => \&Doctype
143                         });
144    }
145
146    if (!open (COLCFG, $filename)) {
147    print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n";
148    } else {
149
150      $parser->parsefile ($filename);# (COLCFG);
151      close (COLCFG);
152    }
153
154    #&Display;
155    return $data;
156}
157
158sub StartTag {
159# Those marked with #@ will not be executed at the same time when this sub is being called
160# so that if/elsif is used to avoid unnecessary tests
161    my ($expat, $element) = @_;
162   
163    # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream
164    # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes
165    # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text).
166    $currentAttrRef = \%_;
167
168    my $name = $_{'name'};
169    my $value = $_{'value'};
170    my $type = $_{'type'};
171
172    # for subcollections
173    my $filter = $_{'filter'};
174   
175    # was this just a flax thing??
176    my $assigned = $_{'assigned'};
177   
178    #@ Marking repeated block
179    if ($element =~ /$repeatedBlock/) {
180    $currentIndex = 0;
181    }
182
183    #@ handling block metadataList
184    elsif (defined $name and $name =~ /$stringexp/){
185      $currentLocation = $name;
186    }
187    #@ handling default search index/level/indexLanguage and languageMetadata
188    elsif ($element =~ /$defaults/) {
189      if (defined $name and $name =~ /\w/) {
190    $data->{$nameMap->{$element}} = $name;
191      }
192    }
193
194    #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2)
195    elsif($element eq "displayItemList") {
196    $currentLevel = "displayItemList"; # storing the parent if it is displayItemList
197    }
198    elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements
199    if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) {
200        # either when there is no "assigned" attribute, or when assigned=true (for displayItems):
201        $currentLocation = $name;
202    }
203    }
204
205    #@ Handling database type: gdbm or gdbm-txtgz, later jdbm.
206    elsif ($element eq "infodb") {
207      $data->{'infodbtype'} = $type;
208    }
209   
210    #@ Handling indexer: mgpp/mg/lucene; stringexp
211    elsif ($element eq "search") {
212      $data->{'buildtype'} = $type;
213    }
214
215    #@ Handling searchtype: plain,form; arrayexp
216    #elsif ($element eq "format" and defined $name and $name =~ /searchType/) {
217    #@ Handling searchtype: plain, form
218    #$currentLocation = $name; 
219    #}
220 
221    #@ Handle index|level|indexSubcollection|indexLanguage
222    elsif ($element =~ /$arrayexp/) {
223      my $key = $nameMap->{$element};   
224      if (!defined $data->{$key}) {
225    $data->{$key} = [];
226      }
227
228      push (@{$data->{$key}},$name);
229    }
230
231    #*****************************************
232    elsif ($element eq "buildOption") {
233        print STDERR "**** BUILD OPTION PAIR $name $value\n";
234        $data->{$name} = $value;
235    }   
236
237
238    #@ indexoptions: accentfold/casefold/stem; arrayexp
239    elsif ($element eq "indexOption") {
240      $currentLevel = "indexOption";
241    }
242    if ($currentLevel eq "indexOption" and $element eq "option") {
243      my $key = $nameMap->{$currentLevel}; 
244      if (!defined $data->{$key}) {
245    $data->{$key} = [];
246      }
247      push (@{$data->{$key}},$name);
248    }
249    #@ plugout options
250    elsif ($element eq "plugout") {
251    $currentLevel = "plugout";
252    my $key = $nameMap->{$currentLevel};   
253    if (!defined $data->{$key}) {
254        $data->{$key} = [];
255    }
256    if(defined $name and $name ne ""){
257        push (@{$data->{$key}},$name);
258    }
259    else{
260       push (@{$data->{$key}},"GreenstoneXMLPlugout");
261    }
262    }
263    if ($currentLevel eq "plugout" and $element eq "option") {     
264    my $key = $nameMap->{$currentLevel};
265    if (defined $name and $name ne ""){
266        push (@{$data->{$key}},$name);
267    }
268    if (defined $value and $value ne  ""){
269        push (@{$data->{$key}},$value);
270    }
271    }
272
273    #@ use hash of hash of strings: hashexp
274    elsif ($element =~ /$hashexp/) {
275      if (!defined $data->{$element}) {
276    $data->{$element} = {};
277      }
278      if (defined $name and $name =~ /\w/) {
279    if (defined $filter and $filter =~ /\w/) {
280      $data->{$element}->{$name} = $filter;
281
282    }
283      }
284    }
285
286    #@ Handling each classifier/plugin element
287    elsif ($element =~ /$arrayarrayexp/) {
288    # find the gs2 mapping name
289        $currentLevel = $element;
290        my $key = $nameMap->{$element};
291
292    # define an array of array of strings   foreach $k (@{$data->{$key}}) {
293    if (!defined $data->{$key}) {
294      $data->{$key} = [];
295    }
296    # Push classifier/plugin name (e.g. AZList) into $data as the first string
297    push (@{$data->{$key}->[$currentIndex]},$name);
298    if (defined $value and $value =~ /\w/) {
299        push (@{$data->{$key}->[$currentIndex]}, $value);
300        print "$value\n";
301    }
302    #print $currentIndex."indexup\n";
303      }
304   
305    #@ Handling the option elements in each classifier/plugin element (as the following strings)
306    elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") {
307    # find the gs2 mapping name for classifier and plugin
308        my $key = $nameMap->{$currentLevel};   
309
310    if (defined $name and $name =~ /\w/) {
311        push (@{$data->{$key}->[$currentIndex]}, $name);
312    }
313    if (defined $value and $value =~ /\w/) {
314            push (@{$data->{$key}->[$currentIndex]}, $value);
315    }
316
317    }
318   
319}
320
321sub EndTag {
322    my ($expat, $element) = @_;
323    my $endTags = q/^(browse|pluginList|displayItemList)$/; #|buildOptionList)$/;   
324    if ($element =~ /$endTags/) {
325        $currentIndex = 0;
326        $currentLevel = "";
327    }
328    # $arrayarrayexp contains classifier|plugin
329    elsif($element =~ /$arrayarrayexp/ ){
330        $currentIndex = $currentIndex + 1;
331    }
332}
333
334sub Text {
335    if (defined $currentLocation) {
336    #@ Handling block metadataList(creator, maintainer, public)
337    if($currentLocation =~ /$stringexp/){
338        #print $currentLocation;
339        my $key = $nameMap->{$currentLocation};
340        $data->{$key} = $_;
341        undef $currentLocation;
342    }
343   
344    #@ Handling displayItem metadata that are children of displayItemList
345    # that means we will be getting the collection's name and possibly description ('collectionextra' in GS2).
346    elsif($currentLevel eq "displayItemList" && $currentLocation =~ /$displayItemNames/) {
347        my $lang = $currentAttrRef->{'lang'};
348        my $name = $currentAttrRef->{'name'};
349       
350        # this is how data->collectionmeta's language is set in Greenstone 2.
351        # Need to be consistent, since export.pl accesses these values all in the same way
352        if(!defined $lang) {
353        $lang = 'default';
354        } else {
355        $lang = "[l=$lang]";
356        }
357       
358        if(defined $name and $name =~ /$displayItemNames/) { # attribute name = 'name' || 'description'
359        # using $nameMap->$name resolves to 'collectionname' if $name='name' and 'collectionextra' if $name='description'
360        $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang} = $_; # the value is the Text parsed
361        #print STDERR "***Found: $nameMap->{$name} collectionmeta, lang is $lang. Value: $data->{'collectionmeta'}->{$nameMap->{$name}}->{$lang}\n";
362        }
363        undef $currentLocation;
364    }
365 
366    #@ Handling searchtype: plain,form; arrayexp
367    elsif (defined $currentLocation and $currentLocation =~ /searchType/) {
368        # map 'searchType' into 'searchtype'
369        my $key = $nameMap->{$currentLocation};
370        # split it by ','
371        my ($plain, $form) = split (",", $_);
372       
373        if (!defined $data->{$key}) {
374        $data->{$key} = [];
375        }
376        if (defined $plain and $plain =~ /\w/) {
377        push @{ $data->{$key} }, $plain;
378        }
379        if (defined $form and $form =~ /\w/) {
380        push @{ $data->{$key} }, $form;
381        }
382    }
383    }   
384}
385
386# This sub is for debugging purposes
387sub Display {
388    # metadataList
389    foreach my $k (keys %{$data}) {
390    print STDERR "*** metadatalist key $k\n";
391    }
392 
393    print STDERR "*** creator: ".$data->{'creator'}."\n" if (defined $data->{'creator'});
394    print STDERR "*** maintainer: ".$data->{"maintainer"}."\n" if (defined $data->{"maintainer"});
395    print STDERR "*** public: ".$data->{"public"}."\n" if (defined $data->{"public"});
396    print STDERR "*** default index: ".$data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"});
397    print STDERR "*** default level: ".$data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"});
398    print STDERR "*** build type: ".$data->{"buildtype"}."\n" if (defined $data->{"buildtype"});
399    print STDERR "*** search types: \n";
400    print STDERR join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"});
401    print STDERR "*** levels: \n";
402    print STDERR join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'});
403    print STDERR "*** index subcollections: \n";
404    print STDERR join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'});
405    print STDERR "*** indexes: \n";
406    print STDERR join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'});
407    print STDERR "*** index options: \n";
408    print STDERR join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'});
409    print STDERR "*** languages: \n";
410    print STDERR join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'});
411    print STDERR "*** language metadata: \n";
412    print STDERR join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'});
413 
414    print STDERR "*** Plugins: \n";
415    if (defined $data->{'plugin'}) {
416    foreach $a (@{$data->{'plugin'}}) {
417        print join(",",@$a);
418        print "\n";
419    }
420    }
421
422    #print STDERR "*** Build options: \n";
423    #if (defined $data->{'store_metadata_coverage'}) {
424    #foreach $a (@{$data->{'store_metadata_coverage'}}) {
425    #    print join(",",@$a,@$_);
426    #    print "\n";
427    #}
428    #}
429
430    if (defined $data->{'classify'}) {
431    print STDERR "*** Classifiers: \n";
432    map { print join(",",@$_)."\n"; } @{$data->{'classify'}};
433    }
434   
435    if (defined $data->{'subcollection'}) {
436    foreach my $key (keys %{$data->{'subcollection'}}) {
437        print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n";
438    }
439    }
440}
441# is this actually used??
442sub Doctype {
443    my ($expat, $name, $sysid, $pubid, $internal) = @_;
444
445    die if ($name !~ /^CollectionConfig$/);
446}
447
448# This Char function overrides the one in XML::Parser::Stream to overcome a
449# problem where $expat->{Text} is treated as the return value, slowing
450# things down significantly in some cases.
451sub Char {
452    if ($]<5.008) {
453    use bytes;  # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6
454    }
455    $_[0]->{'Text'} .= $_[1];
456    return undef;
457}
458
459
460
461
462#########################################################
463
4641;
Note: See TracBrowser for help on using the browser.