########################################################################### # # collConfigxml.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # reads in configuration files of xml form package collConfigxml; use strict; no strict 'refs'; no strict 'subs'; use XMLParser; # A mapping hash to resolve name discrepancy between gs2 and gs3. # the first item is the gs3 element name from collectionConfig, the second one # is the internal name for the option my $nameMap = {"key" => "value", "creator" => "creator", "maintainer" => "maintainer", "public" => "public", "infodb" => "infodbtype", "defaultIndex" => "defaultindex", "defaultLevel" => "defaultlevel", "name" => "collectionname", "description" => "collectionextra", "smallicon" => "iconcollectionsmall", "icon" => "iconcollection", "level" => "levels", "classifier" => "classify", "indexSubcollection" => "indexsubcollections", "indexLanguage" => "languages", "defaultIndexLanguage" => "defaultlanguage", "index" => "indexes", "indexfieldoptions" => "indexfieldoptions", "sort" => "sortfields", "facet" => "facetfields", "plugin" => "plugin", "plugout" => "plugout", "indexOption" => "indexoptions", "searchType" => "searchtype", "languageMetadata" => "languagemetadata", "buildType" => "buildtype", "orthogonalBuildTypes" => "orthogonalbuildtypes", }; # A hash structure which is returned by sub read_cfg_file. my $data = {}; my $repeatedBlock = q/^(browse|pluginList)$/; # use those unique attribute values to locate the text within the elements # creator, public, maintainer and within a displayItem. my $currentLocation = ""; my $stringexp = q/^(creator|maintainer|public|buildType)$/; my $displayItemNames = q/^(name|description)$/; # these options get set at top level my $topleveloptionexp = q/^(importOption|buildOption)$/; # For storing the attributes during the StartTag subroutine, so that # we can use it later in Text (or EndTag) subroutines my $currentAttrRef = undef; my $currentLevel = ""; # Count the elements with same name within the same block # ("plugin", "option") my $currentIndex = 0; my $structexp = q/^(index)$/; # structexp contains a hashmap of option(name, value) pairs per index name like allfields/ZZ or titles/TI # e.g. # # my $arrayexp = q/^(sort|facet|level|indexOption|indexSubcollection|indexLanguage|orthogonalBuildTypes)$/; #my $arrayexp = q/^(index|sort|facet|level|indexOption|indexSubcollection|indexLanguage|orthogonalBuildTypes)$/; my $arrayarrayexp = q/^(plugin|classifier)$/; #|buildOption)$/; my $hashexp = q/^(subcollection)$/; # add other element names that should be represented by hash expressions here my $hashhashexp = q/^(displayItem)$/; # add other (collectionmeta) element names that should be represented by hashes of hashes here. my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/; # Reads in the model collection configuration file, collectionConfig.xml, # into a structure which complies with the one used by gs2 (i.e. one read # in by &cfgread::read_cfg_file). sub read_cfg_file { my ($filename) = @_; $data = {}; if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) { return undef; } # Removed ProtocolEncoding (see MetadataXMLPlugin for details) # create XML::Parser object for parsing metadata.xml files my $parser = new XML::Parser('Style' => 'Stream', 'Pkg' => 'collConfigxml', 'Handlers' => {'Char' => \&Char, 'Doctype' => \&Doctype }); if (!open (COLCFG, $filename)) { print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n"; } else { $parser->parsefile ($filename);# (COLCFG); close (COLCFG); } #&Display; return $data; } sub StartTag { # Those marked with #@ will not be executed at the same time when this sub is being called # so that if/elsif is used to avoid unnecessary tests my ($expat, $element) = @_; # See http://search.cpan.org/~msergeant/XML-Parser-2.36/Parser.pm#Stream # %_ is a hash of all the attributes of this element, we want to store them so we can use the attributes # when the textnode contents of the element are parsed in the subroutine Text (that's the handler for Text). $currentAttrRef = \%_; my $name = $_{'name'}; my $value = $_{'value'}; my $type = $_{'type'}; my $orthogonal = $_{'orthogonal'}; # for subcollections my $filter = $_{'filter'}; # was this just a flax thing?? my $assigned = $_{'assigned'}; #@ Marking repeated block if ($element =~ /$repeatedBlock/) { $currentIndex = 0; } #@ handling block metadataList elsif (defined $name and $name =~ /$stringexp/){ $currentLocation = $name; } #@ handling default search index/level/indexLanguage and languageMetadata elsif ($element =~ /$defaults/) { if (defined $name and $name =~ /\w/) { $data->{$nameMap->{$element}} = $name; } } #@ handling the displayItems name and description (known as collectionname and collectionextra in GS2) elsif($element eq "displayItemList") { $currentLevel = "displayItemList"; # storing the parent if it is displayItemList } elsif($element =~ /$hashhashexp/) { # can expand on this to check for other collectionmeta elements if((!defined $assigned) || (defined $assigned and $assigned =~ /\w/ and $assigned eq "true")) { # either when there is no "assigned" attribute, or when assigned=true (for displayItems): $currentLocation = $name; } } #@ Handling database type: gdbm or gdbm-txtgz, later jdbm. elsif ($element eq "infodb") { $data->{'infodbtype'} = $type; } #@ Handling indexer: mgpp/mg/lucene; stringexp #@ Handling orthogonal indexers: audioDB; arrayexp elsif ($element eq "search") { if ((defined $orthogonal) && ($orthogonal =~ m/^(true|on|1)$/i)) { push(@{$data->{'orthogonalbuildtypes'}},$type); } else { $data->{'buildtype'} = $type; } } elsif ($element eq "store_metadata_coverage") { ## print STDERR "*&*&*&*&*& HERE &*&*&*&*&*&*"; $data->{'store_metadata_coverage'} = $value; } #@ Handling searchtype: plain,form; arrayexp #elsif ($element eq "format" and defined $name and $name =~ /searchType/) { #@ Handling searchtype: plain, form #$currentLocation = $name; #} #@ Handle sort|facet|level|indexOption|indexSubcollection|indexLanguage elsif ($element =~ /$arrayexp/) { my $key = $nameMap->{$element}; # if (!defined $data->{$key}) { $data->{$key} = []; } if (defined $name) { push (@{$data->{$key}},$name); } } #@ Handle index which can have options as children to be put in a map: