########################################################################### # # cfgread4gs3.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # reads in configuration files of xml form package cfgread4gs3; use strict; no strict 'refs'; no strict 'subs'; # Wrapper that ensures the right version of XML::Parser is loaded given # the version of Perl being used. Need to distinguish between Perl 5.6 and # Perl 5.8 sub BEGIN { my $perl_dir; # Note: $] encodes the version number of perl if ($]>5.008) { # perl 5.8.1 or above $perl_dir = "perl-5.8"; } elsif ($]<5.008) { # assume perl 5.6 $perl_dir = "perl-5.6"; } else { print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n"; print STDERR " Please upgrade to a newer version of Perl.\n"; $perl_dir = "perl-5.8"; } if ($ENV{'GSDLOS'} !~ /^windows$/i) { # Use push to put this on the end, so an existing XML::Parser will be used by default push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir"); } } use XML::Parser; # A mapping hash to resolve name descrepency between gs2 and gs3. my $nameMap = {"key" => "value", "creator" => "creator", "maintainer" => "maintainer", "public" => "public", "defaultIndex" => "defaultindex", "defaultLevel" => "defaultlevel", "name" => "collectionname", "description" => "collectionextra", "smallicon" => "iconcollectionsmall", "icon" => "iconcollection", "level" => "levels", "classifier" => "classify", "indexSubcollection" => "indexsubcollections", "indexLanguage" => "languages", "defaultIndexLanguage" => "defaultlanguage", "index" => "indexes", "plugin" => "plugin", "indexOption" => "indexoptions", "searchType" => "searchtype", "languageMetadata" => "languagemetadata", }; # A hash structure which is returned by sub read_cfg_file. my $data = {}; my $repeatedBlock = q/^(browse|pluginList)$/; # use those unique attribute values to locate the text within the elements # creator, public, maintainer. my $currentLocation = ""; my $stringexp = q/^(creator|maintainer|public)$/; my $currentLevel = ""; # Count the elements with same name within the same block # ("plugin", "option") my $currentIndex = 0; my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/; my $arrayarrayexp= q/^(plugin|classifier)$/; my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/; sub StartTag { # Those marked with #@ will not be executed at the same time when this sub is being called # so that if/elsif is used to avoid unnecessary tests my ($expat, $element) = @_; my $name = $_{'name'}; my $value = $_{'value'}; my $type = $_{'type'}; # for subcollections my $filter = $_{'filter'}; #@ Marking repeated block if ($element =~ /$repeatedBlock/) { $currentIndex = 0; } #@ handling block metadataList elsif (defined $name and $name =~ /$stringexp/){ $currentLocation = $name; } #@ handling default search index/level/indexLanguage and languageMetadata elsif ($element =~ /$defaults/) { if (defined $name and $name =~ /\w/) { $data->{$nameMap->{$element}} = $name; } } #@ Handling indexer: mgpp/mg/lucene; stringexp elsif ($element eq "search") { $data->{'buildtype'} = $type; } #@ Handling searchtype: plain,form; arrayexp #elsif ($element eq "format" and defined $name and $name =~ /searchType/) { #@ Handling searchtype: plain, form #$currentLocation = $name; #} #@ Handle index|level|indexSubcollection|indexLanguage elsif ($element =~ /$arrayexp/) { my $key = $nameMap->{$element}; if (!defined $data->{$key}) { $data->{$key} = []; } push (@{$data->{$key}},$name); } #@ indexoptions: accentfold/casefold/stem; arrayexp elsif ($element eq "indexOption") { $currentLevel = "indexOption"; } if ($currentLevel eq "indexOption" and $element eq "option") { my $key = $nameMap->{$currentLevel}; if (!defined $data->{$key}) { $data->{$key} = []; } push (@{$data->{$key}},$name); } #@ use hash of hash of strings: hashexp elsif ($element eq "subcollection") { if (!defined $data->{'subcollection'}) { $data->{'subcollection'} = {}; } if (defined $name and $name =~ /\w/) { if (defined $filter and $filter =~ /\w/) { $data->{'subcollection'}->{$name} = $filter; } } } #@ Handling each classifier/plugin element elsif ($element =~ /$arrayarrayexp/) { # find the gs2 mapping name $currentLevel = $element; my $key = $nameMap->{$element}; # define an array of array of strings foreach $k (@{$data->{$key}}) { if (!defined $data->{$key}) { $data->{$key} = []; } # Push classifier/plugin name (e.g. AZList) into $data as the first string push (@{$data->{$key}->[$currentIndex]},$name); #print $currentIndex."indexup\n"; } #@ Handling the option elements in each classifier/plugin element (as the following strings) elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") { # find the gs2 mapping name for classifier and plugin my $key = $nameMap->{$currentLevel}; if (defined $name and $name =~ /\w/) { push (@{$data->{$key}->[$currentIndex]}, $name); } if (defined $value and $value =~ /\w/) { push (@{$data->{$key}->[$currentIndex]}, $value); } } } sub EndTag { my ($expat, $element) = @_; my $endTags = q/^(browse|pluginList)$/; if ($element =~ /$endTags/) { $currentIndex = 0; $currentLevel = ""; } # $arrayarrayexp contains classifier|plugin elsif($element =~ /$arrayarrayexp/){ $currentIndex = $currentIndex + 1; } } sub Text { #@ Handling block metadataList(creator, maintainer, public) if (defined $currentLocation and $currentLocation =~ /$stringexp/){ #print $currentLocation; my $key = $nameMap->{$currentLocation}; $data->{$key} = $_; undef $currentLocation; } #@ Handling searchtype: plain,form; arrayexp if (defined $currentLocation and $currentLocation =~ /searchType/) { # map 'searchType' into 'searchtype' my $key = $nameMap->{$currentLocation}; # split it by ',' my ($plain, $form) = split (",", $_); if (!defined $data->{$key}) { $data->{$key} = []; } if (defined $plain and $plain =~ /\w/) { push @{ $data->{$key} }, $plain; } if (defined $form and $form =~ /\w/) { push @{ $data->{$key} }, $form; } } } # This sub is for debugging purposes sub Display { # metadataList print $data->{'creator'}."\n" if (defined $data->{'creator'}); print $data->{"maintainer"}."\n" if (defined $data->{"maintainer"}); print $data->{"public"}."\n" if (defined $data->{"public"}); print $data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"}); print $data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"}); print $data->{"buildtype"}."\n" if (defined $data->{"buildtype"}); print join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"}); print join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'}); print join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'}); print join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'}); print join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'}); print join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'}); print join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'}); if (defined $data->{'plugin'}) { foreach $a (@{$data->{'plugin'}}) { print join(",",@$a); print "\n"; } } if (defined $data->{'classify'}) { print "Classifiers: \n"; map { print join(",",@$_)."\n"; } @{$data->{'classify'}}; } if (defined $data->{'subcollection'}) { foreach my $key (keys %{$data->{'subcollection'}}) { print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n"; } } } sub Doctype { my ($expat, $name, $sysid, $pubid, $internal) = @_; # allow the short-lived and badly named "GreenstoneDirectoryMetadata" files # to be processed as well as the "DirectoryMetadata" files which should now # be created by import.pl die if ($name !~ /^(Greenstone)?DirectoryMetadata$/); } # This Char function overrides the one in XML::Parser::Stream to overcome a # problem where $expat->{Text} is treated as the return value, slowing # things down significantly in some cases. sub Char { if ($]<5.008) { use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6 } $_[0]->{'Text'} .= $_[1]; return undef; } # Reads in the model collection configuration file, collectionConfig.xml, # into a structure which complies with the one used by gs2 (i.e. one read # in by &cfgread::read_cfg_file). sub read_cfg_file { my ($filename) = @_; $data = {}; if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) { return undef; } # create XML::Parser object for parsing metadata.xml files my $parser; if ($]<5.008) { # Perl 5.6 $parser = new XML::Parser('Style' => 'Stream', 'Handlers' => {'Char' => \&Char, 'Doctype' => \&Doctype }); } else { # Perl 5.8 $parser = new XML::Parser('Style' => 'Stream', 'ProtocolEncoding' => 'ISO-8859-1', 'Handlers' => {'Char' => \&Char, 'Doctype' => \&Doctype }); } if (!open (COLCFG, $filename)) { print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n"; } else { $parser->parsefile ($filename);# (COLCFG); close (COLCFG); } #&Display; print "***********"; return $data; } sub write_line { my ($filehandle, $line) = @_; print $filehandle join ("", @$line), "\n"; } # Create the buildConfig.xml file for a specific collection sub write_cfg_file { # information needed from $collectcfg include: defaultindex, defaultlevel, classifiers, # the rest is from $buildcfg my ($buildoutfile, $buildcfg, $collectcfg) = @_; my $line = []; if (!open (COLCFG, ">$buildoutfile")) { print STDERR "cfgread::write_cfg_file couldn't write the cfg file $buildoutfile\n"; die; } &write_line('COLCFG', [""]); # output building metadata to build config file my $buildtype; if (defined $buildcfg->{"buildtype"}) { $buildtype = $buildcfg->{"buildtype"}; } else { $buildtype = "mgpp"; } my $numdocs; if (defined $buildcfg->{"numdocs"}) { $numdocs = $buildcfg->{"numdocs"}; } &write_line('COLCFG', [""]); &write_line('COLCFG', ["", $numdocs, ""]); &write_line('COLCFG', ["", $buildtype, ""]); &write_line('COLCFG', [""]); my $service_type = "MGPP"; if ($buildtype eq "mg") { $service_type = "MG"; } elsif ($buildtype eq "lucene") { $service_type = "Lucene"; } # output serviceRackList &write_line('COLCFG', [""]); # This serviceRack enables the collection to provide the oai metadata retrieve service, which is served by the OAIMetadataRetrieve.java class # For each collection, we write the following serviceRack in the collection's buildConfig.xml file as follows and also specify the metadata format this oai service provides in the rack. But whether this service is going to be put in use depends on its name appearing in the OAIConfig.xml. &write_line('COLCFG', [""]); # What metadata sets to support is collection specific and is specified in each collection's buildConfig.xml file. To support other metadata schema, simply add an OAIMetadataFormat element here. # The support of unqualified Dublin Core metadata set is mendatory in the oai specification. &write_line('COLCFG', [""]); &write_line('COLCFG', ["oai_dc"]); &write_line('COLCFG', ["dc"]); &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); # do the search service &write_line('COLCFG', [""]); if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } #indexes # maps index name to shortname my $indexmap = {}; # keeps the order for indexes my @indexlist = (); my $defaultindex = ""; my $first = 1; my $maptype = "indexfieldmap"; if ($buildtype eq "mg") { $maptype = "indexmap"; } if (defined $buildcfg->{$maptype}) { my $indexmap_t = $buildcfg->{$maptype}; foreach my $i (@$indexmap_t) { my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/; $indexmap->{$k} = $v; push @indexlist, $k; if ($first) { $defaultindex = $v; $first = 0; } } # now if the user has assigned a default index, we use it if (defined $collectcfg->{"defaultindex"}) { $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}}; } } else { print STDERR "$maptype not defined"; } #for each index in indexList, write them out &write_line('COLCFG', [""]); foreach my $i (@indexlist) { my $index = $indexmap->{$i}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); # do default index only for mg if ($buildtype eq "mg") { &write_line('COLCFG', [""]); } # do indexOptionList if ($buildtype eq "mg" || $buildtype eq "mgpp") { &write_line('COLCFG', [""]); my $stemindexes = 3; # default is stem and casefold if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) { $stemindexes = $buildcfg->{'stemindexes'}; } &write_line('COLCFG', [""]); my $maxnumeric = 4; # default if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) { $maxnumeric = $buildcfg->{'maxnumeric'}; } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); } # levelList my $levelmap = {}; my @levellist = (); my $default_search_level = ""; my $default_retrieve_level = "Doc";#this is defaultGDBMLevel (also for the retrieve service) $first = 1; if ($buildtype eq "mgpp" || $buildtype eq "lucene") { if (defined $buildcfg->{'levelmap'}) { my $levelmap_t = $buildcfg->{'levelmap'}; foreach my $l (@$levelmap_t) { my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/; $levelmap->{$key} = $val; push @levellist, $key; if ($first) { $default_search_level = $val; $first = 0; } } } # now if the user has assigned a default level, we use it if (defined $collectcfg->{"defaultlevel"}) { $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}}; $default_retrieve_level = $default_search_level; } #if (defined $buildcfg->{'textlevel'}) { # $default_retrieve_level = $buildcfg->{'textlevel'}; #} } #for each level in levelList, write them out if ($buildtype ne "mg") { &write_line('COLCFG', [""]); foreach my $lv (@levellist) { my $level = $levelmap->{$lv}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); } # add in defaultLevel as the same level as indexLevelList, making the reading job easier if ($buildtype eq "lucene" || $buildtype eq "mgpp") { &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); # do searchTypeList if ($buildtype eq "mgpp" || $buildtype eq "lucene") { &write_line('COLCFG', [""]); if (defined $buildcfg->{"searchtype"}) { my $searchtype_t = $buildcfg->{"searchtype"}; foreach my $s (@$searchtype_t) { &write_line('COLCFG', [""]); } } else { &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); } # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap] $first = 1; my $default_lang = ""; my $default_lang_short = ""; if (defined $buildcfg->{"languagemap"}) { &write_line('COLCFG', [""]); my $langmap_t = $buildcfg->{"languagemap"}; foreach my $l (@$langmap_t) { my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/; &write_line('COLCFG', [""]); if ($first) { $default_lang = $k; #name $default_lang_short = $v; #shortname $first = 0; } } &write_line('COLCFG', [""]); # now if the user has assigned a default language (as "en", "ru" etc.) if (defined $collectcfg->{"defaultlanguage"}) { $default_lang = $collectcfg->{"defaultlanguage"}; } &write_line('COLCFG', [""]); } # do indexSubcollectionList my $default_subcol = "";# make it in sub scope to be used in the concatenation if (defined $buildcfg->{'subcollectionmap'}) { &write_line('COLCFG', [""]); my $subcolmap = {}; my @subcollist = (); $first = 1; my $subcolmap_t = $buildcfg->{'subcollectionmap'}; foreach my $l (@$subcolmap_t) { my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/; $subcolmap->{$k} = $v; push @subcollist, $k; if ($first) { $default_subcol = $v; $first = 0; } } foreach my $sl (@subcollist) { my $subcol = $subcolmap->{$sl}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); } # close off search service &write_line('COLCFG', [""]); # do the retrieve service &write_line('COLCFG', [""]); # do default index if (defined $buildcfg->{"languagemap"}) { &write_line('COLCFG', [""]); } if (defined $buildcfg->{'subcollectionmap'}) { &write_line('COLCFG', [""]); } if ($buildtype eq "mg") { &write_line('COLCFG', [""]); } if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } if ($buildtype eq "mgpp" || $buildtype eq "lucene") { &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); # do the browse service my $count = 1; my $phind = 0; my $started_classifiers = 0; my $classifiers = $collectcfg->{"classify"}; foreach my $cl (@$classifiers) { my $name = "CL$count"; $count++; my ($classname) = @$cl[0]; if ($classname =~ /^phind$/i) { $phind=1; #should add it into coll config classifiers next; } if (not $started_classifiers) { &write_line('COLCFG', [""]); if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); $started_classifiers = 1; } my $content = ''; #use buttonname first, then metadata if ($classname eq "DateList") { $content = "Date"; } else { for (my $j=0; $j"]); } if ($started_classifiers) { # end the classifiers &write_line('COLCFG', [""]); # close off the Browse service &write_line('COLCFG', [""]); } # the phind classifier is a separate service if ($phind) { # if phind classifier &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); close (COLCFG); } ######################################################### 1;