########################################################################### # # cfgread4gs3.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # reads in configuration files of xml form package cfgread4gs3; use strict; no strict 'refs'; no strict 'subs'; # Wrapper that ensures the right version of XML::Parser is loaded given # the version of Perl being used. Need to distinguish between Perl 5.6 and # Perl 5.8 sub BEGIN { my $perl_dir; # Note: $] encodes the version number of perl if ($]>5.008) { # perl 5.8.1 or above $perl_dir = "perl-5.8"; } elsif ($]<5.008) { # assume perl 5.6 $perl_dir = "perl-5.6"; } else { print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n"; print STDERR " Please upgrade to a newer version of Perl.\n"; $perl_dir = "perl-5.8"; } if ($ENV{'GSDLOS'} !~ /^windows$/i) { # Use push to put this on the end, so an existing XML::Parser will be used by default push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir"); } } use XML::Parser; # A mapping hash to resolve name descrepency between gs2 and gs3. my $nameMap = {"key" => "value", "creator" => "creator", "maintainer" => "maintainer", "public" => "public", "defaultIndex" => "defaultindex", "defaultLevel" => "defaultlevel", "name" => "collectionname", "description" => "collectionextra", "smallicon" => "iconcollectionsmall", "icon" => "iconcollection", "level" => "levels", "classifier" => "classify", "indexSubcollection" => "indexsubcollections", "indexLanguage" => "languages", "defaultIndexLanguage" => "defaultlanguage", "index" => "indexes", "plugin" => "plugin", "indexOption" => "indexoptions", "searchType" => "searchtype", "languageMetadata" => "languagemetadata", }; # A hash structure which is returned by sub read_cfg_file. my $data = {}; my $repeatedBlock = q/^(browse|pluginList)$/; # use those unique attribute values to locate the text within the elements # creator, public, maintainer. my $currentLocation = ""; my $stringexp = q/^(creator|maintainer|public)$/; my $currentLevel = ""; # Count the elements with same name within the same block # ("plugin", "option") my $currentIndex = 0; my $arrayexp = q/^(index|level|indexSubcollection|indexLanguage)$/; my $arrayarrayexp= q/^(plugin|classifier)$/; my $defaults = q/^(defaultIndex|defaultLevel|defaultIndexLanguage|languageMetadata)$/; sub StartTag { # Those marked with #@ will not be executed at the same time when this sub is being called # so that if/elsif is used to avoid unnecessary tests my ($expat, $element) = @_; my $name = $_{'name'}; my $value = $_{'value'}; my $type = $_{'type'}; # for subcollections my $filter = $_{'filter'}; #@ Marking repeated block if ($element =~ /$repeatedBlock/) { $currentIndex = 0; } #@ handling block metadataList elsif (defined $name and $name =~ /$stringexp/){ $currentLocation = $name; } #@ handling default search index/level/indexLanguage and languageMetadata elsif ($element =~ /$defaults/) { if (defined $name and $name =~ /\w/) { $data->{$nameMap->{$element}} = $name; } } #@ Handling indexer: mgpp/mg/lucene; stringexp elsif ($element eq "search") { $data->{'buildtype'} = $type; } #@ Handling searchtype: plain,form; arrayexp #elsif ($element eq "format" and defined $name and $name =~ /searchType/) { #@ Handling searchtype: plain, form #$currentLocation = $name; #} #@ Handle index|level|indexSubcollection|indexLanguage elsif ($element =~ /$arrayexp/) { my $key = $nameMap->{$element}; if (!defined $data->{$key}) { $data->{$key} = []; } push (@{$data->{$key}},$name); } #@ indexoptions: accentfold/casefold/stem; arrayexp elsif ($element eq "indexOption") { $currentLevel = "indexOption"; } if ($currentLevel eq "indexOption" and $element eq "option") { my $key = $nameMap->{$currentLevel}; if (!defined $data->{$key}) { $data->{$key} = []; } push (@{$data->{$key}},$name); } #@ use hash of hash of strings: hashexp elsif ($element eq "subcollection") { if (!defined $data->{'subcollection'}) { $data->{'subcollection'} = {}; } if (defined $name and $name =~ /\w/) { if (defined $filter and $filter =~ /\w/) { $data->{'subcollection'}->{$name} = $filter; } } } #@ Handling each classifier/plugin element elsif ($element =~ /$arrayarrayexp/) { # find the gs2 mapping name $currentLevel = $element; my $key = $nameMap->{$element}; # define an array of array of strings foreach $k (@{$data->{$key}}) { if (!defined $data->{$key}) { $data->{$key} = []; } # Push classifier/plugin name (e.g. AZList) into $data as the first string push (@{$data->{$key}->[$currentIndex]},$name); #print $currentIndex."indexup\n"; } #@ Handling the option elements in each classifier/plugin element (as the following strings) elsif ($currentLevel =~ /$arrayarrayexp/ and $element eq "option") { # find the gs2 mapping name for classifier and plugin my $key = $nameMap->{$currentLevel}; if (defined $name and $name =~ /\w/) { push (@{$data->{$key}->[$currentIndex]}, $name); } if (defined $value and $value =~ /\w/) { push (@{$data->{$key}->[$currentIndex]}, $value); } } } sub EndTag { my ($expat, $element) = @_; my $endTags = q/^(browse|pluginList)$/; if ($element =~ /$endTags/) { $currentIndex = 0; $currentLevel = ""; } # $arrayarrayexp contains classifier|plugin elsif($element =~ /$arrayarrayexp/){ $currentIndex = $currentIndex + 1; } } sub Text { #@ Handling block metadataList(creator, maintainer, public) if (defined $currentLocation and $currentLocation =~ /$stringexp/){ #print $currentLocation; my $key = $nameMap->{$currentLocation}; $data->{$key} = $_; undef $currentLocation; } #@ Handling searchtype: plain,form; arrayexp if (defined $currentLocation and $currentLocation =~ /searchType/) { # map 'searchType' into 'searchtype' my $key = $nameMap->{$currentLocation}; # split it by ',' my ($plain, $form) = split (",", $_); if (!defined $data->{$key}) { $data->{$key} = []; } if (defined $plain and $plain =~ /\w/) { push @{ $data->{$key} }, $plain; } if (defined $form and $form =~ /\w/) { push @{ $data->{$key} }, $form; } } } # This sub is for debugging purposes sub Display { # metadataList print $data->{'creator'}."\n" if (defined $data->{'creator'}); print $data->{"maintainer"}."\n" if (defined $data->{"maintainer"}); print $data->{"public"}."\n" if (defined $data->{"public"}); print $data->{"defaultindex"}."\n" if (defined $data->{"defaultindex"}); print $data->{"defaultlevel"}."\n" if (defined $data->{"defaultlevel"}); print $data->{"buildtype"}."\n" if (defined $data->{"buildtype"}); print join(",",@{$data->{"searchtype"}})."\n" if (defined $data->{"searchtype"}); print join(",",@{$data->{'levels'}})."\n" if (defined $data->{'levels'}); print join(",",@{$data->{'indexsubcollections'}})."\n" if (defined $data->{'indexsubcollections'}); print join(",",@{$data->{'indexes'}})."\n" if (defined $data->{'indexes'}); print join(",",@{$data->{'indexoptions'}})."\n" if (defined $data->{'indexoptions'}); print join(",",@{$data->{'languages'}})."\n" if (defined $data->{'languages'}); print join(",",@{$data->{'languagemetadata'}})."\n" if (defined $data->{'languagemetadata'}); if (defined $data->{'plugin'}) { foreach $a (@{$data->{'plugin'}}) { print join(",",@$a); print "\n"; } } if (defined $data->{'classify'}) { print "Classifiers: \n"; map { print join(",",@$_)."\n"; } @{$data->{'classify'}}; } if (defined $data->{'subcollection'}) { foreach my $key (keys %{$data->{'subcollection'}}) { print "subcollection ".$key." ".$data->{'subcollection'}->{$key}."\n"; } } } sub Doctype { my ($expat, $name, $sysid, $pubid, $internal) = @_; # allow the short-lived and badly named "GreenstoneDirectoryMetadata" files # to be processed as well as the "DirectoryMetadata" files which should now # be created by import.pl die if ($name !~ /^(Greenstone)?DirectoryMetadata$/); } # This Char function overrides the one in XML::Parser::Stream to overcome a # problem where $expat->{Text} is treated as the return value, slowing # things down significantly in some cases. sub Char { if ($]<5.008) { use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6 } $_[0]->{'Text'} .= $_[1]; return undef; } # Reads in the model collection configuration file, collectionConfig.xml, # into a structure which complies with the one used by gs2 (i.e. one read # in by &cfgread::read_cfg_file). sub read_cfg_file { my ($filename) = @_; $data = {}; if ($filename !~ /collectionConfig\.xml$/ || !-f $filename) { return undef; } # create XML::Parser object for parsing metadata.xml files my $parser; if ($]<5.008) { # Perl 5.6 $parser = new XML::Parser('Style' => 'Stream', 'Handlers' => {'Char' => \&Char, 'Doctype' => \&Doctype }); } else { # Perl 5.8 $parser = new XML::Parser('Style' => 'Stream', 'ProtocolEncoding' => 'ISO-8859-1', 'Handlers' => {'Char' => \&Char, 'Doctype' => \&Doctype }); } if (!open (COLCFG, $filename)) { print STDERR "cfgread::read_cfg_file couldn't read the cfg file $filename\n"; } else { $parser->parsefile ($filename);# (COLCFG); close (COLCFG); } #print "*** collectionConfig.xml internal ***\n"; #&Display; return $data; } sub write_line { my ($filehandle, $line) = @_; print $filehandle join ("", @$line), "\n"; } # Create the buildConfig.xml file for a specific collection sub write_cfg_file { # this sub is called in make_auxiliary_files() in basebuilder.pm # the received args: $buildoutfile - destination file: buildConfig.xml # $buildcfg - all build options, eg, disable_OAI # $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in cfgread4gs3.pm. my ($buildoutfile, $buildcfg, $collectcfg, $disable_OAI) = @_; my $line = []; if (!open (COLCFG, ">$buildoutfile")) { print STDERR "cfgread4gs3::write_cfg_file couldn't write the build config file $buildoutfile\n"; die; } &write_line('COLCFG', [""]); # output building metadata to build config file my $buildtype; if (defined $buildcfg->{"buildtype"}) { $buildtype = $buildcfg->{"buildtype"}; } else { $buildtype = "mgpp"; } my $numdocs; if (defined $buildcfg->{"numdocs"}) { $numdocs = $buildcfg->{"numdocs"}; } &write_line('COLCFG', [""]); &write_line('COLCFG', ["", $numdocs, ""]); &write_line('COLCFG', ["", $buildtype, ""]); &write_line('COLCFG', [""]); my $service_type = "MGPP"; if ($buildtype eq "mg") { $service_type = "MG"; } elsif ($buildtype eq "lucene") { $service_type = "Lucene"; } # output serviceRackList &write_line('COLCFG', [""]); # This serviceRack enables the collection to provide the oai metadata retrieve service, which is served by the OAIPMH.java class # For each collection, we write the following serviceRack in the collection's buildConfig.xml file if the 'disable_OAI' argument is not checked in the GLI (or equivalently, a 'disable_OAI' flag is not specified on the command line). There are also other configurations in the OAIConfig.xml. if ($disable_OAI == 0) { &write_line('COLCFG', [""]); if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); } # do the search service &write_line('COLCFG', [""]); if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } #indexes # maps index name to shortname my $indexmap = {}; # keeps the order for indexes my @indexlist = (); my $defaultindex = ""; my $first = 1; my $maptype = "indexfieldmap"; if ($buildtype eq "mg") { $maptype = "indexmap"; } #map {print $_."\n"} keys %$buildcfg; if (defined $buildcfg->{$maptype}) { my $indexmap_t = $buildcfg->{$maptype}; foreach my $i (@$indexmap_t) { my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/; $indexmap->{$k} = $v; push @indexlist, $k; if ($first) { $defaultindex = $v; $first = 0; } } # now if the user has assigned a default index, we use it if (defined $collectcfg->{"defaultindex"}) { $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}}; } } else { print STDERR "$maptype not defined"; } #for each index in indexList, write them out &write_line('COLCFG', [""]); foreach my $i (@indexlist) { my $index = $indexmap->{$i}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); # do default index only for mg if ($buildtype eq "mg") { &write_line('COLCFG', [""]); } # do indexOptionList if ($buildtype eq "mg" || $buildtype eq "mgpp") { &write_line('COLCFG', [""]); my $stemindexes = 3; # default is stem and casefold if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) { $stemindexes = $buildcfg->{'stemindexes'}; } &write_line('COLCFG', [""]); my $maxnumeric = 4; # default if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) { $maxnumeric = $buildcfg->{'maxnumeric'}; } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); } # levelList my $levelmap = {}; my @levellist = (); my $default_search_level = "Doc"; my $default_retrieve_level = "Doc"; my $default_gdbm_level = "Doc"; $first = 1; if ($buildtype eq "mgpp" || $buildtype eq "lucene") { if (defined $buildcfg->{'levelmap'}) { my $levelmap_t = $buildcfg->{'levelmap'}; foreach my $l (@$levelmap_t) { my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/; $levelmap->{$key} = $val; push @levellist, $key; if ($first) { # let default search level follow the first level in the level list $default_search_level = $val; # retrieve/GDBM levels may get modified later if text level is defined $default_retrieve_level = $val; $default_gdbm_level = $val; $first = 0; } } } # even if the user has assigned a default level, we ignore it. Why? # I don't know, but it seems it's the way how the serving works #if (defined $collectcfg->{"defaultlevel"}) { # $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}}; # $default_retrieve_level = $default_search_level; #} if (defined $buildcfg->{'textlevel'}) { # let the retrieve/gdbm levels always follow the textlevel $default_retrieve_level = $buildcfg->{'textlevel'}; $default_gdbm_level = $buildcfg->{'textlevel'}; } } #for each level in levelList, write them out if ($buildtype ne "mg") { &write_line('COLCFG', [""]); foreach my $lv (@levellist) { my $level = $levelmap->{$lv}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); } # add in defaultLevel as the same level as indexLevelList, making the reading job easier if ($buildtype eq "lucene" || $buildtype eq "mgpp") { &write_line('COLCFG', [""]); } if ($buildtype eq "lucene" || $buildtype eq "mgpp") { # make the GDBM level &write_line('COLCFG', [""]); } # do searchTypeList if ($buildtype eq "mgpp" || $buildtype eq "lucene") { &write_line('COLCFG', [""]); if (defined $buildcfg->{"searchtype"}) { my $searchtype_t = $buildcfg->{"searchtype"}; foreach my $s (@$searchtype_t) { &write_line('COLCFG', [""]); } } else { &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); } # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap] $first = 1; my $default_lang = ""; my $default_lang_short = ""; if (defined $buildcfg->{"languagemap"}) { &write_line('COLCFG', [""]); my $langmap_t = $buildcfg->{"languagemap"}; foreach my $l (@$langmap_t) { my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/; &write_line('COLCFG', [""]); if ($first) { $default_lang = $k; #name $default_lang_short = $v; #shortname $first = 0; } } &write_line('COLCFG', [""]); # now if the user has assigned a default language (as "en", "ru" etc.) if (defined $collectcfg->{"defaultlanguage"}) { $default_lang = $collectcfg->{"defaultlanguage"}; } &write_line('COLCFG', [""]); } # do indexSubcollectionList my $default_subcol = "";# make it in sub scope to be used in the concatenation if (defined $buildcfg->{'subcollectionmap'}) { &write_line('COLCFG', [""]); my $subcolmap = {}; my @subcollist = (); $first = 1; my $subcolmap_t = $buildcfg->{'subcollectionmap'}; foreach my $l (@$subcolmap_t) { my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/; $subcolmap->{$k} = $v; push @subcollist, $k; if ($first) { $default_subcol = $v; $first = 0; } } foreach my $sl (@subcollist) { my $subcol = $subcolmap->{$sl}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); } # close off search service &write_line('COLCFG', [""]); # do the retrieve service &write_line('COLCFG', [""]); # do default index if (defined $buildcfg->{"languagemap"}) { &write_line('COLCFG', [""]); } if (defined $buildcfg->{'subcollectionmap'}) { &write_line('COLCFG', [""]); } if ($buildtype eq "mg") { &write_line('COLCFG', [""]); } if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } if ($buildtype eq "mgpp" || $buildtype eq "lucene") { &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); # do the browse service my $count = 1; my $phind = 0; my $started_classifiers = 0; my $classifiers = $collectcfg->{"classify"}; foreach my $cl (@$classifiers) { my $name = "CL$count"; $count++; my ($classname) = @$cl[0]; if ($classname =~ /^phind$/i) { $phind=1; #should add it into coll config classifiers next; } if (not $started_classifiers) { &write_line('COLCFG', [""]); if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); $started_classifiers = 1; } my $content = ''; #use buttonname first, then metadata if ($classname eq "DateList") { $content = "Date"; } else { for (my $j=0; $j"]); } if ($started_classifiers) { # end the classifiers &write_line('COLCFG', [""]); # close off the Browse service &write_line('COLCFG', [""]); } # the phind classifier is a separate service if ($phind) { # if phind classifier &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); close (COLCFG); } ######################################################### 1;