########################################################################### # # buildConfigxml.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # reads in buildConfig.xml # Note, only implemented the bits that are currently used, eg by incremental # build code. # The resulting data is not a full representation on buildConfig.xml. package buildConfigxml; use strict; no strict 'refs'; no strict 'subs'; # Wrapper that ensures the right version of XML::Parser is loaded given # the version of Perl being used. Need to distinguish between Perl 5.6 and # Perl 5.8 sub BEGIN { my $perl_dir; # Note: $] encodes the version number of perl if ($]>5.008) { # perl 5.8.1 or above $perl_dir = "perl-5.8"; } elsif ($]<5.008) { # assume perl 5.6 $perl_dir = "perl-5.6"; } else { print STDERR "Warning: Perl 5.8.0 is not a maintained release.\n"; print STDERR " Please upgrade to a newer version of Perl.\n"; $perl_dir = "perl-5.8"; } if ($ENV{'GSDLOS'} !~ /^windows$/i) { # Use push to put this on the end, so an existing XML::Parser will be used by default push (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/$perl_dir"); } } use XML::Parser; # A mapping hash to resolve name discrepancy between gs2 and gs3. my $nameMap = {"numDocs" => "numdocs", "buildType" => "buildtype" }; # A hash structure which is returned by sub read_cfg_file. my $data = {}; # use those unique attribute values to locate the text within the elements my $currentLocation = ""; my $stringexp = q/^(buildType|numDocs)$/; my $indexmap_name = ""; my $haveindexfields = 0; # Reads in the model collection configuration file, collectionConfig.xml, # into a structure which complies with the one used by gs2 (i.e. one read # in by &cfgread::read_cfg_file). sub read_cfg_file { my ($filename) = @_; $data = {}; if ($filename !~ /buildConfig\.xml$/ || !-f $filename) { return undef; } # create XML::Parser object for parsing metadata.xml files my $parser; if ($]<5.008) { # Perl 5.6 $parser = new XML::Parser('Style' => 'Stream', 'Handlers' => {'Char' => \&Char, 'Doctype' => \&Doctype }); } else { # Perl 5.8 $parser = new XML::Parser('Style' => 'Stream', 'ProtocolEncoding' => 'ISO-8859-1', 'Handlers' => {'Char' => \&Char, 'Doctype' => \&Doctype }); } if (!open (COLCFG, $filename)) { print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n"; } else { $parser->parsefile ($filename);# (COLCFG); close (COLCFG); } #&Display; return $data; } sub StartTag { # Those marked with #@ will not be executed at the same time when this sub is being called # so that if/elsif is used to avoid unnecessary tests my ($expat, $element) = @_; my $name = $_{'name'}; my $shortname = $_{'shortname'}; #@ handling block metadataList if (defined $name and $name =~ /$stringexp/){ $currentLocation = $name; # the value will be retrieved later in Text sub } #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene) elsif ($element =~ /^indexList$/) { # set up the data arrays # this assumes that the build type has been read already, which is # currently the order we save the file in. if ($data->{'buildtype'} eq "mg") { $indexmap_name = "indexmap"; if (!defined $data->{"indexmap"}) { $data->{"indexmap"} = []; } } else { $indexmap_name = "indexfieldmap"; $haveindexfields = 1; if (!defined $data->{"indexfieldmap"}) { $data->{"indexfieldmap"} = []; } if (!defined $data->{"indexfields"}) { $data->{"indexfields"} = []; } } } elsif ($element =~ /index/) { # store each index in the map if (defined $name && defined $shortname) { push @{$data->{$indexmap_name}}, "$name->$shortname"; if ($haveindexfields) { push @{$data->{'indexfields'}}, $name; } } } } sub EndTag { my ($expat, $element) = @_; } sub Text { if (defined $currentLocation) { #@ Handling block metadataList(numDocs, buildType) if($currentLocation =~ /$stringexp/){ #print $currentLocation; my $key = $nameMap->{$currentLocation}; $data->{$key} = $_; undef $currentLocation; } } } # This sub is for debugging purposes sub Display { print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'}); print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'}); print "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'}); print "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'}); print "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'}); } # is this actually used?? sub Doctype { my ($expat, $name, $sysid, $pubid, $internal) = @_; die if ($name !~ /^buildConfig$/); } # This Char function overrides the one in XML::Parser::Stream to overcome a # problem where $expat->{Text} is treated as the return value, slowing # things down significantly in some cases. sub Char { if ($]<5.008) { use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6 } $_[0]->{'Text'} .= $_[1]; return undef; } sub write_line { my ($filehandle, $line) = @_; print $filehandle join ("", @$line), "\n"; } # Create the buildConfig.xml file for a specific collection sub write_cfg_file { # this sub is called in make_auxiliary_files() in basebuilder.pm # the received args: $buildoutfile - destination file: buildConfig.xml # $buildcfg - all build options, eg, disable_OAI # $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in buildConfigxml.pm. my ($buildoutfile, $buildcfg, $collectcfg, $disable_OAI) = @_; my $line = []; if (!open (COLCFG, ">$buildoutfile")) { print STDERR "buildConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n"; die; } &write_line('COLCFG', [""]); # output building metadata to build config file my $buildtype; if (defined $buildcfg->{"buildtype"}) { $buildtype = $buildcfg->{"buildtype"}; } else { $buildtype = "mgpp"; } my $numdocs; if (defined $buildcfg->{"numdocs"}) { $numdocs = $buildcfg->{"numdocs"}; } &write_line('COLCFG', [""]); &write_line('COLCFG', ["", $numdocs, ""]); &write_line('COLCFG', ["", $buildtype, ""]); &write_line('COLCFG', [""]); my $service_type = "MGPP"; if ($buildtype eq "mg") { $service_type = "MG"; } elsif ($buildtype eq "lucene") { $service_type = "Lucene"; } # output serviceRackList &write_line('COLCFG', [""]); # This serviceRack enables the collection to provide the oai metadata retrieve service, which is served by the OAIPMH.java class # For each collection, we write the following serviceRack in the collection's buildConfig.xml file if the 'disable_OAI' argument is not checked in the GLI (or equivalently, a 'disable_OAI' flag is not specified on the command line). There are also other configurations in the OAIConfig.xml. if ($disable_OAI == 0) { &write_line('COLCFG', [""]); if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); } # do the search service &write_line('COLCFG', [""]); if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } #indexes # maps index name to shortname my $indexmap = {}; # keeps the order for indexes my @indexlist = (); my $defaultindex = ""; my $first = 1; my $maptype = "indexfieldmap"; if ($buildtype eq "mg") { $maptype = "indexmap"; } #map {print $_."\n"} keys %$buildcfg; if (defined $buildcfg->{$maptype}) { my $indexmap_t = $buildcfg->{$maptype}; foreach my $i (@$indexmap_t) { my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/; $indexmap->{$k} = $v; push @indexlist, $k; if ($first) { $defaultindex = $v; $first = 0; } } # now if the user has assigned a default index, we use it if (defined $collectcfg->{"defaultindex"}) { $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}}; } } else { print STDERR "$maptype not defined"; } #for each index in indexList, write them out &write_line('COLCFG', [""]); foreach my $i (@indexlist) { my $index = $indexmap->{$i}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); # do indexOptionList if ($buildtype eq "mg" || $buildtype eq "mgpp") { &write_line('COLCFG', [""]); my $stemindexes = 3; # default is stem and casefold if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) { $stemindexes = $buildcfg->{'stemindexes'}; } &write_line('COLCFG', [""]); my $maxnumeric = 4; # default if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) { $maxnumeric = $buildcfg->{'maxnumeric'}; } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); } # levelList my $levelmap = {}; my @levellist = (); my $default_search_level = "Doc"; my $default_retrieve_level = "Doc"; my $default_db_level = "Doc"; $first = 1; if ($buildtype eq "mgpp" || $buildtype eq "lucene") { if (defined $buildcfg->{'levelmap'}) { my $levelmap_t = $buildcfg->{'levelmap'}; foreach my $l (@$levelmap_t) { my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/; $levelmap->{$key} = $val; push @levellist, $key; if ($first) { # let default search level follow the first level in the level list $default_search_level = $val; # retrieve/database levels may get modified later if text level is defined $default_retrieve_level = $val; $default_db_level = $val; $first = 0; } } } # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same. #if (defined $collectcfg->{"defaultlevel"}) { $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}}; # $default_retrieve_level = $default_search_level; #} if (defined $buildcfg->{'textlevel'}) { # let the retrieve/database levels always follow the textlevel $default_retrieve_level = $buildcfg->{'textlevel'}; $default_db_level = $buildcfg->{'textlevel'}; } } #for each level in levelList, write them out if ($buildtype ne "mg") { &write_line('COLCFG', [""]); foreach my $lv (@levellist) { my $level = $levelmap->{$lv}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); } # add in defaultLevel as the same level as indexLevelList, making the reading job easier if ($buildtype eq "lucene" || $buildtype eq "mgpp") { &write_line('COLCFG', [""]); } if ($buildtype eq "lucene" || $buildtype eq "mgpp") { &write_line('COLCFG', [""]); } # do searchTypeList if ($buildtype eq "mgpp" || $buildtype eq "lucene") { &write_line('COLCFG', [""]); if (defined $buildcfg->{"searchtype"}) { my $searchtype_t = $buildcfg->{"searchtype"}; foreach my $s (@$searchtype_t) { &write_line('COLCFG', [""]); } } else { &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); } # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap] $first = 1; my $default_lang = ""; my $default_lang_short = ""; if (defined $buildcfg->{"languagemap"}) { &write_line('COLCFG', [""]); my $langmap_t = $buildcfg->{"languagemap"}; foreach my $l (@$langmap_t) { my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/; &write_line('COLCFG', [""]); if ($first) { $default_lang = $k; #name $default_lang_short = $v; #shortname $first = 0; } } &write_line('COLCFG', [""]); # now if the user has assigned a default language (as "en", "ru" etc.) if (defined $collectcfg->{"defaultlanguage"}) { $default_lang = $collectcfg->{"defaultlanguage"}; } &write_line('COLCFG', [""]); } # do indexSubcollectionList my $default_subcol = "";# make it in sub scope to be used in the concatenation if (defined $buildcfg->{'subcollectionmap'}) { &write_line('COLCFG', [""]); my $subcolmap = {}; my @subcollist = (); $first = 1; my $subcolmap_t = $buildcfg->{'subcollectionmap'}; foreach my $l (@$subcolmap_t) { my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/; $subcolmap->{$k} = $v; push @subcollist, $k; if ($first) { $default_subcol = $v; $first = 0; } } foreach my $sl (@subcollist) { my $subcol = $subcolmap->{$sl}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); } # close off search service &write_line('COLCFG', [""]); # do the retrieve service &write_line('COLCFG', [""]); # do default index if (defined $buildcfg->{"languagemap"}) { &write_line('COLCFG', [""]); } if (defined $buildcfg->{'subcollectionmap'}) { &write_line('COLCFG', [""]); } if ($buildtype eq "mg") { &write_line('COLCFG', [""]); } if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } if ($buildtype eq "mgpp" || $buildtype eq "lucene") { &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); # do the browse service my $count = 1; my $phind = 0; my $started_classifiers = 0; my $classifiers = $collectcfg->{"classify"}; foreach my $cl (@$classifiers) { my $name = "CL$count"; $count++; my ($classname) = @$cl[0]; if ($classname =~ /^phind$/i) { $phind=1; #should add it into coll config classifiers next; } if (not $started_classifiers) { &write_line('COLCFG', [""]); if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); $started_classifiers = 1; } my $content = ''; #use buttonname first, then metadata if ($classname eq "DateList") { $content = "Date"; } else { for (my $j=0; $j"]); } if ($started_classifiers) { # end the classifiers &write_line('COLCFG', [""]); # close off the Browse service &write_line('COLCFG', [""]); } # the phind classifier is a separate service if ($phind) { # if phind classifier &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); close (COLCFG); } ######################################################### 1;