########################################################################### # # buildConfigxml.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # reads in buildConfig.xml # Note, only implemented the bits that are currently used, eg by incremental # build code. # The resulting data is not a full representation on buildConfig.xml. package buildConfigxml; use strict; no strict 'refs'; no strict 'subs'; use XMLParser; # A mapping hash to resolve name discrepancy between gs2 and gs3. my $nameMap = {"numDocs" => "numdocs", "buildType" => "buildtype", "orthogonalBuildTypes" => "orthogonalbuildtypes" }; # A hash structure which is returned by sub read_cfg_file. my $data = {}; # use those unique attribute values to locate the text within the elements my $currentLocation = ""; my $stringexp = q/^(buildType|numDocs)$/; my $arrayexp = q/^(orthogonalBuildTypes)$/; my $indexmap_name = ""; my $haveindexfields = 0; # Reads in the model collection configuration file, collectionConfig.xml, # into a structure which complies with the one used by gs2 (i.e. one read # in by &cfgread::read_cfg_file). sub read_cfg_file { my ($filename) = @_; $data = {}; if ($filename !~ /buildConfig\.xml$/ || !-f $filename) { return undef; } # Removed ProtocolEncoding (see MetadataXMLPlugin for details) # create XML::Parser object for parsing metadata.xml files my $parser = new XML::Parser('Style' => 'Stream', 'Pkg' => 'buildConfigxml', 'Handlers' => {'Char' => \&Char, 'Doctype' => \&Doctype }); if (!open (COLCFG, $filename)) { print STDERR "buildConfigxml::read_cfg_file couldn't read the cfg file $filename\n"; } else { $parser->parsefile ($filename);# (COLCFG); close (COLCFG); } #&Display; return $data; } sub StartTag { # Those marked with #@ will not be executed at the same time when this sub is being called # so that if/elsif is used to avoid unnecessary tests my ($expat, $element) = @_; my $name = $_{'name'}; my $shortname = $_{'shortname'}; #@ handling block metadataList if (defined $name && (($name =~ m/$stringexp/) || ($name =~ m/$arrayexp/))) { $currentLocation = $name; # the value will be retrieved later in Text sub } #@ handle indexes - store indexmap (mg) or indexfields and indexfieldmap (mgpp/lucene/solr) elsif ($element =~ /^indexList$/) { # set up the data arrays # this assumes that the build type has been read already, which is # currently the order we save the file in. if ($data->{'buildtype'} eq "mg") { $indexmap_name = "indexmap"; if (!defined $data->{"indexmap"}) { $data->{"indexmap"} = []; } } else { # mgpp, lucene or solr $indexmap_name = "indexfieldmap"; $haveindexfields = 1; if (!defined $data->{"indexfieldmap"}) { $data->{"indexfieldmap"} = []; } if (!defined $data->{"indexfields"}) { $data->{"indexfields"} = []; } } } elsif ($element =~ /index/) { # store each index in the map if (defined $name && defined $shortname) { push @{$data->{$indexmap_name}}, "$name->$shortname"; if ($haveindexfields) { push @{$data->{'indexfields'}}, $name; } } } } sub EndTag { my ($expat, $element) = @_; } sub Text { if (defined $currentLocation) { #@ Handling block metadataList(numDocs, buildType) if ($currentLocation =~ /$stringexp/) { #print $currentLocation; my $key = $nameMap->{$currentLocation}; $data->{$key} = $_; undef $currentLocation; } elsif ($currentLocation =~ /$arrayexp/) { #print $currentLocation; my $key = $nameMap->{$currentLocation}; push(@{$data->{$key}},$_); undef $currentLocation; } } } # This sub is for debugging purposes sub Display { print "NumDocs = ".$data->{'numdocs'}."\n" if (defined $data->{'numdocs'}); print "BuildType = ".$data->{'buildtype'}."\n" if (defined $data->{'buildtype'}); print "OrthogonalBuildTypes = ".join(",",@{$data->{'orthogonalbuildtypes'}})."\n" if (defined $data->{'orthogonalbuildtypes'}); print "IndexMap = ". join(" ",@{$data->{'indexmap'}})."\n" if (defined $data->{'indexmap'}); print "IndexFieldMap = ". join(" ",@{$data->{'indexfieldmap'}})."\n" if (defined $data->{'indexfieldmap'}); print "IndexFields = ". join(" ",@{$data->{'indexfields'}})."\n" if (defined $data->{'indexfields'}); } # is this actually used?? sub Doctype { my ($expat, $name, $sysid, $pubid, $internal) = @_; die if ($name !~ /^buildConfig$/); } # This Char function overrides the one in XML::Parser::Stream to overcome a # problem where $expat->{Text} is treated as the return value, slowing # things down significantly in some cases. sub Char { if ($]<5.008) { use bytes; # Necessary to prevent encoding issues with XML::Parser 2.31+ and Perl 5.6 } $_[0]->{'Text'} .= $_[1]; return undef; } sub write_line { my ($filehandle, $line) = @_; print $filehandle join ("", @$line), "\n"; } sub search_and_retrieve_settings { my ($buildcfg,$collectcfg) = @_; my $settings = {}; if (defined $buildcfg->{"buildtype"}) { $settings->{'buildtype'} = $buildcfg->{"buildtype"}; } else { $settings->{'buildtype'} = "mgpp"; } my $buildtype = $settings->{'buildtype'}; if (defined $collectcfg->{"orthogonalbuildtypes"}) { # Note the use of collectcfg, not bulidcfg $settings->{'orthogonalbuildtypes'} = $collectcfg->{"orthogonalbuildtypes"}; } else { $settings->{'orthogonalbuildtypes '}= []; } if (defined $buildcfg->{"numdocs"}) { $settings->{'numdocs'} = $buildcfg->{"numdocs"}; } else { $settings->{'numdocs'} = 0; } my $service_type = "MGPP"; if ($buildtype eq "mg") { $service_type = "MG"; } elsif ($buildtype eq "lucene") { $service_type = "Lucene"; } elsif ($buildtype eq "solr") { $service_type = "Solr"; } $settings->{'service_type'} = $service_type; if (defined $buildcfg->{"infodbtype"}) { $settings->{'infodbtype'} = $buildcfg->{'infodbtype'}; } else { $settings->{'infodbtype'} = "gdbm"; } #-- # indexes #-- my $indexmap = {}; # maps index name to shortname my $indexlist = []; # keeps the order for indexes my $defaultindex = ""; my $maptype = ($buildtype eq "mg") ? "indexmap" : "indexfieldmap"; if (defined $buildcfg->{$maptype}) { my $first = 1; my $indexmap_t = $buildcfg->{$maptype}; foreach my $i (@$indexmap_t) { my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/; $indexmap->{$k} = $v; push @$indexlist, $k; if ($first) { $defaultindex = $v; $first = 0; } } # now if the user has assigned a default index, we use it if (defined $collectcfg->{"defaultindex"}) { $defaultindex = $indexmap->{$collectcfg->{"defaultindex"}}; } } else { print STDERR "$maptype not defined\n"; } $settings->{'num_indexes'} = $buildcfg->{'num_indexes'}; $settings->{'defaultindex'} = $defaultindex; $settings->{'indexmap'} = $indexmap; $settings->{'indexlist'} = $indexlist; #-- # default lang #-- $settings->{'default_lang'} = ""; $settings->{'default_lang_short'} = ""; if (defined $buildcfg->{"languagemap"}) { my $langmap_t = $buildcfg->{"languagemap"}; if ((defined $langmap_t) && (scalar(@$langmap_t)>=1)) { my $l = $langmap_t->[0]; my ($k, $v) = $l =~ m/^(.*)\-\>(.*)$/; $settings->{'default_lang'} = $k; #name $settings->{'default_lang_short'} = $v; #short name } # now if the user has assigned a default language (as "en", "ru" etc.) if (defined $collectcfg->{"defaultlanguage"}) { $settings->{'default_lang'} = $collectcfg->{"defaultlanguage"}; # what about default_lang_short ?? #### } } # default subcol $settings->{'default_subcol'} = ""; if (defined $buildcfg->{'subcollectionmap'}) { my $subcolmap_t = $buildcfg->{'subcollectionmap'}; if ((defined $subcolmap_t) && (scalar(@$subcolmap_t)>=1)) { my $l = $subcolmap_t->[0]; my ($k, $v) = $l =~ m/^(.*)\-\>(.*)$/; $settings->{'default_subcol'} = $v; } } #-- # indexstem #-- if (defined $buildcfg->{'indexstem'}) { $settings->{'indexstem'} = $buildcfg->{'indexstem'}; } #-- # levelList #-- my $levelmap = {}; my $levellist = []; my $default_search_level = "Doc"; my $default_retrieve_level = "Doc"; my $default_db_level = "Doc"; if ($buildtype eq "mgpp" || $buildtype eq "lucene" || $buildtype eq "solr") { if (defined $buildcfg->{'levelmap'}) { my $first = 1; my $levelmap_t = $buildcfg->{'levelmap'}; foreach my $l (@$levelmap_t) { my ($key, $val) = $l =~ /^(.*)\-\>(.*)$/; $levelmap->{$key} = $val; push @$levellist, $key; if ($first) { # let default search level follow the first level in the level list $default_search_level = $val; # retrieve/database levels may get modified later if text level is defined $default_retrieve_level = $val; $default_db_level = $val; $first = 0; } } } # the default level assigned by the user is no longer ignored [Shaoqun], but the retrievel level stays the same. if (defined $collectcfg->{"defaultlevel"}) { $default_search_level = $levelmap->{$collectcfg->{"defaultlevel"}}; # $default_retrieve_level = $default_search_level; } if (defined $buildcfg->{'textlevel'}) { # let the retrieve/database levels always follow the textlevel $default_retrieve_level = $buildcfg->{'textlevel'}; $default_db_level = $buildcfg->{'textlevel'}; } } $settings->{'levelmap'} = $levelmap; $settings->{'levellist'} = $levellist; $settings->{'default_search_level'} = $default_search_level if $default_search_level; $settings->{'default_retrieve_level'} = $default_retrieve_level; $settings->{'default_db_level'} = $default_db_level; return $settings; } sub write_search_servicerack { my ($buildcfg,$settings) = @_; my $buildtype = $settings->{'buildtype'}; my $infodbtype = $settings->{'infodbtype'}; my $service_type = $settings->{'service_type'}; # there's no searching and therefore no search services if there are no indexes return if($settings->{'num_indexes'} <= 0); # do the search service &write_line('COLCFG', [""]); if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } if (defined $buildcfg->{'infodbtype'}) { &write_line('COLCFG', [""]); } #indexes my $indexmap = $settings->{'indexmap'}; my $indexlist = $settings->{'indexlist'}; my $defaultindex = $settings->{'defaultindex'}; #for each index in indexList, write them out &write_line('COLCFG', [""]); foreach my $i (@$indexlist) { my $index = $indexmap->{$i}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); #$defaultindex = "ZZ" if (!$defaultindex); # index allfields by default if ($defaultindex) { &write_line('COLCFG', [""]); } # do indexOptionList if ($buildtype eq "mg" || $buildtype eq "mgpp") { &write_line('COLCFG', [""]); my $stemindexes = 3; # default is stem and casefold if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) { $stemindexes = $buildcfg->{'stemindexes'}; } &write_line('COLCFG', [""]); my $maxnumeric = 4; # default if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) { $maxnumeric = $buildcfg->{'maxnumeric'}; } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); } #-- # levelList #-- my $levelmap = $settings->{'levelmap'}; my $levellist = $settings->{'levellist'}; my $default_search_level = $settings->{'default_search_level'}; my $default_retrieve_level = $settings->{'default_retrieve_level'}; my $default_db_level = $settings->{'default_db_level'}; #for each level in levelList, write them out if ($buildtype ne "mg") { &write_line('COLCFG', [""]); foreach my $lv (@$levellist) { my $level = $levelmap->{$lv}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); } # add in defaultLevel as the same level as indexLevelList, making the reading job easier if ($buildtype eq "lucene" || $buildtype eq "mgpp" || $buildtype eq "solr") { &write_line('COLCFG', [""]); } if ($buildtype eq "lucene" || $buildtype eq "mgpp" || $buildtype eq "solr") { &write_line('COLCFG', [""]); } # do searchTypeList if ($buildtype eq "mgpp" || $buildtype eq "lucene" || $buildtype eq "solr") { &write_line('COLCFG', [""]); if (defined $buildcfg->{"searchtype"}) { my $searchtype_t = $buildcfg->{"searchtype"}; foreach my $s (@$searchtype_t) { &write_line('COLCFG', [""]); } } else { &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); } # do indexLanguageList [in collect.cfg: languages; in build.cfg: languagemap] my $default_lang = $settings->{'default_lang'}; my $default_lang_short = $settings->{'default_lang_short'}; if (defined $buildcfg->{"languagemap"}) { &write_line('COLCFG', [""]); my $langmap_t = $buildcfg->{"languagemap"}; foreach my $l (@$langmap_t) { my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); } # do indexSubcollectionList my $default_subcol = $settings->{'default_subcol'}; if (defined $buildcfg->{'subcollectionmap'}) { &write_line('COLCFG', [""]); my $subcolmap = {}; my @subcollist = (); my $subcolmap_t = $buildcfg->{'subcollectionmap'}; foreach my $l (@$subcolmap_t) { my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/; $subcolmap->{$k} = $v; push @subcollist, $k; } foreach my $sl (@subcollist) { my $subcol = $subcolmap->{$sl}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); } # close off search service &write_line('COLCFG', [""]); } sub write_orthogonalsearch_serviceracks { my ($buildcfg,$settings) = @_; #return if($settings->{'num_indexes'} <= 0); # no search if no indexes my $infodbtype = $settings->{'infodbtype'}; my $orthogonalbuildtypes = $settings->{'orthogonalbuildtypes'}; foreach my $obt (@$orthogonalbuildtypes) { $obt =~ s/^(.)/\u$1/; # capitialize initial letter $obt =~ s/-(.)/\u$1/g; # change any hyphenated words to cap next letter &write_line('COLCFG', [""]); &write_line('COLCFG',[""]); &write_line('COLCFG', [""]); } } sub write_retrieve_servicerack { my ($buildcfg,$settings) = @_; my $buildtype = $settings->{'buildtype'}; my $infodbtype = $settings->{'infodbtype'}; my $service_type = $settings->{'service_type'}; # do the retrieve service &write_line('COLCFG', [""]); # do default index if (defined $buildcfg->{"languagemap"}) { my $default_lang = $settings->{'default_lang'}; &write_line('COLCFG', [""]); } if (defined $buildcfg->{'subcollectionmap'}) { my $default_subcol = $settings->{'default_subcol'}; &write_line('COLCFG', [""]); } if ($buildtype eq "mg") { my $defaultindex = $settings->{'defaultindex'}; &write_line('COLCFG', [""]); } if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } if ($buildtype eq "mgpp" || $buildtype eq "lucene" || $buildtype eq "solr") { my $default_retrieve_level = $settings->{'default_retrieve_level'}; &write_line('COLCFG', [""]); } if (defined $buildcfg->{'infodbtype'}) { &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); } # Create the buildConfig.xml file for a specific collection sub write_cfg_file { # this sub is called in make_auxiliary_files() in basebuilder.pm # the received args: $buildoutfile - destination file: buildConfig.xml # $buildcfg - all build options, # $collectcfg - contents of collectionConfig.xml read in by read_cfg_file sub in buildConfigxml.pm. my ($buildoutfile, $buildcfg, $collectcfg) = @_; my $line = []; if (!open (COLCFG, ">$buildoutfile")) { print STDERR "buildConfigxml::write_cfg_file couldn't write the build config file $buildoutfile\n"; die; } my $settings = search_and_retrieve_settings($buildcfg,$collectcfg); my $buildtype = $settings->{'buildtype'}; my $orthogonalbuildtypes = $settings->{'orthogonalbuildtypes'}; my $numdocs = $settings->{'numdocs'}; &write_line('COLCFG', [""]); # output building metadata to build config file &write_line('COLCFG', [""]); &write_line('COLCFG', ["", $numdocs, ""]); &write_line('COLCFG', ["", $buildtype, ""]); foreach my $obt (@$orthogonalbuildtypes) { &write_line('COLCFG', ["", $obt, ""]); } if (defined $buildcfg->{'indexstem'}) { &write_line('COLCFG', ["", $buildcfg->{"indexstem"}, ""]); } if (defined $buildcfg->{'infodbtype'}) { &write_line('COLCFG', ["", $buildcfg->{"infodbtype"}, ""]); } if (defined $buildcfg->{'earliestdatestamp'}) { &write_line('COLCFG', ["", $buildcfg->{"earliestdatestamp"}, ""]); } &write_line('COLCFG', [""]); # output serviceRackList &write_line('COLCFG', [""]); write_search_servicerack($buildcfg,$settings); # add in orthogonalbuildtypes write_orthogonalsearch_serviceracks($buildcfg,$settings); write_retrieve_servicerack($buildcfg,$settings); # do the browse service my $count = 1; my $phind = 0; my $started_classifiers = 0; my $classifiers = $collectcfg->{"classify"}; foreach my $cl (@$classifiers) { my $name = "CL$count"; $count++; my ($classname) = @$cl[0]; if ($classname =~ /^phind$/i) { $phind=1; #should add it into coll config classifiers next; } if (not $started_classifiers) { &write_line('COLCFG', [""]); if (defined $buildcfg->{'indexstem'}) { my $indexstem = $buildcfg->{'indexstem'}; &write_line('COLCFG', [""]); } if (defined $buildcfg->{'infodbtype'}) { my $infodbtype = $buildcfg->{'infodbtype'}; &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); $started_classifiers = 1; } my $content = ''; #use buttonname first, then metadata if ($classname eq "DateList") { $content = "Date"; } else { for (my $j=0; $j"]); } if ($started_classifiers) { # end the classifiers &write_line('COLCFG', [""]); # close off the Browse service &write_line('COLCFG', [""]); } # the phind classifier is a separate service if ($phind) { # if phind classifier &write_line('COLCFG', [""]); } &write_line('COLCFG', [""]); &write_line('COLCFG', [""]); close (COLCFG); } ######################################################### 1;