########################################################################### # # colcfg.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # reads in a collection configuration file package colcfg; use cfgread; use cfgread4gs3; use gsprintf 'gsprintf'; use strict; # the collection configuration file data is stored in the form # # {'infodbtype'}->string # {'creator'}->string # {'public'}->string # {'defaultindex'}->string # {'importdir'}->string # {'archivedir'}->string # {'cachedir'}->string # {'builddir'}->string # {'removeold'}->string # {'textcompress'}->string # {'buildtype'}->string # {'maxnumeric'}->string # {'separate_cjk'}->string # {'sections_index_document_metadata'}->string (always, unless_section_metadata_exists) # {'maintainer'}->array of strings # {'languages'}->array of strings # {'indexsubcollections'}->array of strings # {'indexes'}->array of strings # {'indexoptions'}->array of strings (stem, casefold, accentfold) # {'dontbuild'}->array of strings # {'dontgdbm'}->array of strings # {'mirror'}->array of strings # {'phind'}->array of strings # {'plugout'}->array of strings # {'levels'}->array of strings (for mgpp eg Section, Paragraph) # {'searchtype'}->array of strings (for mgpp, form or plain) # {'subcollection'}->hash of key-value pairs # {'acquire'}->array of arrays of strings # {'plugin'}->array of arrays of strings # {'classify'}->array of arrays of strings # {'collectionmeta'}->hash of key->hash of param-value -used # for language specification # for example, collectionmeta->collectionname->default->demo # ->mi->maori demo sub read_collect_cfg { my ($filename) = @_; return &cfgread::read_cfg_file ($filename, q/^(infodbtype|creator|public|defaultindex|importdir|/ . q/archivedir|cachedir|builddir|removeold|/ . q/textcompress|buildtype|no_text|keepold|gzip|/ . q/verbosity|remove_empty_classifications|OIDtype|OIDmetadata|/ . q/groupsize|maxdocs|debug|mode|saveas|/ . q/sortmeta|removesuffix|removeprefix|create_images|/ . q/maxnumeric|languagemetadata|/ . q/no_strip_html|index|sections_index_document_metadata|/ . q/store_metadata_coverage)$/, q/(maintainer|languages|indexsubcollections|/ . q/indexes|indexoptions|dontbuild|dontgdbm|mirror|levels|plugout|/ . q/searchtype|searchtypes)$/, q/^(subcollection|format)$/, q/^(acquire|plugin|classify)$/, q/^(collectionmeta)$/); } sub read_collection_cfg_xml { my ($filename) = @_; return &cfgread4gs3::read_cfg_file ($filename); } sub write_collection_cfg_xml { } sub write_build_cfg_xml { my ($buildoutfile, $buildcfg, $collectcfg, $disable_OAI) = @_; return &cfgread4gs3::write_cfg_file ($buildoutfile, $buildcfg, $collectcfg, $disable_OAI); } sub write_collect_cfg { my ($filename, $data) = @_; &cfgread::write_cfg_file($filename, $data, q/^(infodbtype|creator|public|defaultindex|importdir|/ . q/archivedir|cachedir|builddir|removeold|/ . q/textcompress|buildtype|no_text|keepold|gzip|/ . q/verbosity|remove_empty_classifications|OIDtype|OIDmetadata|/. q/groupsize|maxdocs|debug|mode|saveas|/ . q/sortmeta|removesuffix|removeprefix|create_images|/ . q/maxnumeric|language_metadata/ . q/no_strip_html|index|sections_index_document_metadata)$/, q/^(maintainer|languages|indexsubcollections|/ . q/indexes|indexoptions|dontbuild|dontgdbm|mirror|levels|/. q/searchtype|searchtypes)$/, q/^(subcollection|format)$/, q/^(acquire|plugin|classify)$/, q/^(collectionmeta)$/); } # the build configuration file data is stored in the form # # {'infodbtype'}->string # {'builddate'}->string # {'buildtype'}->string # {'metadata'}->array of strings # {'languages'}->array of strings # {'numdocs'}->string # {'numsections'}->string # {'numwords'}->string # {'numbytes'}->string # {'maxnumeric'}->string # {'indexfields'}->array of strings # {'indexfieldmap'}->array of strings in the form "field->FI" # {'indexmap'} -> array of strings # {'indexlevels'} -> array of strings # {'stemindexes'} -> string (int) # {'textlevel'}->string # {'levelmap'} -> array of strings in the form "level->shortname" sub read_build_cfg { my ($filename) = @_; return &cfgread::read_cfg_file ($filename, q/^(infodbtype|builddate|buildtype|numdocs|numsections|numwords|numbytes|maxnumeric|textlevel|indexstem|stemindexes|separate_cjk)$/, q/^(indexmap|subcollectionmap|languagemap|notbuilt|indexfields|indexfieldmap|indexlevels|levelmap)$/); } sub write_build_cfg { my ($filename, $data) = @_; &cfgread::write_cfg_file($filename, $data, q/^(infodbtype|builddate|buildtype|numdocs|numsections|numwords|numbytes|maxnumeric|textlevel|indexstem|stemindexes|separate_cjk)$/, q/^(indexmap|subcollectionmap|languagemap|notbuilt|indexfields|indexfieldmap|indexlevels|levelmap)$/); } sub get_collect_cfg_name { my ($out) = @_; # First check if there's a # gsdl/collect/COLLECTION/custom/COLLECTION/etc/custom.cfg file. This # customization was added for DLC by Stefan, 30/6/2007. my $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "custom", $ENV{'GSDLCOLLECTION'}, "etc", "custom.cfg"); if (-e $configfilename) { return ($configfilename, "gs2"); } # Now check if there is a collect.cfg file in the usual place, i.e. it is gs2. $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg"); if (-e $configfilename) { return ($configfilename, "gs2"); } # If we get to here we check if there is a collectionConfig.xml file, # i.e. it is gs3. $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collectionConfig.xml"); if (-e $configfilename) { return ($configfilename, "gs3"); } # Error. No collection configuration file. (&gsprintf($out, "{common.cannot_find_cfg_file}\n", $configfilename) && die); } sub use_collection { my ($site, $collection, $collectdir) = @_; if ((defined $site) && ($site ne "")) { return &util::use_site_collection($site, $collection, $collectdir); } else { return &util::use_collection($collection, $collectdir); } } 1;