source: main/trunk/greenstone2/perllib/colcfg.pm@ 31899

Last change on this file since 31899 was 31396, checked in by ak19, 7 years ago

2 general bugfixes. Bugs found when preparing to test implementation of OAI deletion policy. 1. buildcol.pl needs to accept but toss OIDtype flag and value, so that we can run full-rebuild.pl and incremental-rebuild.pl with this flag (which will then pass the flag to the appropriate import script, which needs it, and the appropriate buildcol script which used to reject it with an error message). 2. OIDtype and OIDmetadata can end up all lowercase in the collect.cfg file when created by GLI. However, this is not recognised in the perl code, which expects OIDtype and OIDmetadata and sets up keys into hashes with this. Fixed the code to deal with changes to these two alone (not making it case insensitive in general).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 11.9 KB
Line 
1###########################################################################
2#
3# colcfg.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads/writes the collection configuration files:
27# collect.cfg/collectionConfig.xml and build.cfg/buildConfig.xml
28
29package colcfg;
30
31use cfgread;
32use gsprintf 'gsprintf';
33use util;
34use FileUtils;
35
36use strict;
37
38# the collection configuration file data is stored in the form
39#
40# {'infodbtype'}->string
41# {'creator'}->string
42# {'public'}->string
43# {'complexmeta'}->string (true, false)
44# {'defaultindex'}->string
45# {'importdir'}->string
46# {'archivedir'}->string
47# {'cachedir'}->string
48# {'builddir'}->string
49# {'removeold'}->string
50# {'textcompress'}->string
51# {'buildtype'}->string
52# {'orthogonalbuildtypes'}->array of strings
53# {'maxnumeric'}->string
54# {'separate_cjk'}->string
55# {'sections_index_document_metadata'}->string (never, always, unless_section_metadata_exists)
56# {'sections_sort_on_document_metadata'}->string (never, always, unless_section_metadata_exists)
57# {'languagemetadata'} -> string
58# {'maintainer'}->array of strings
59# {'languages'}->array of strings
60# {'indexsubcollections'}->array of strings
61# {'indexes'}->array of strings
62# {'indexoptions'}->array of strings (stem, casefold, accentfold, separate_cjk)
63# {'dontbuild'}->array of strings
64# {'dontgdbm'}->array of strings
65# {'mirror'}->array of strings
66# {'phind'}->array of strings
67# {'plugout'}->array of strings
68# {'levels'}->array of strings (for mgpp eg Section, Paragraph)
69# {'searchtype'}->array of strings (for mgpp, form or plain)
70# {'sortfields'}->array of strings (for lucene)
71# {'subcollection'}->hash of key-value pairs
72
73# {'acquire'}->array of arrays of strings
74# {'plugin'}->array of arrays of strings
75# {'classify'}->array of arrays of strings
76
77# {'collectionmeta'}->hash of key->hash of param-value -used
78# for language specification
79# for example, collectionmeta->collectionname->default->demo
80# ->mi->maori demo
81
82# convenience method for reading in either collect.cfg/collectionConfig.xml
83sub read_collection_cfg {
84 my ($filename,$gs_mode) = @_;
85
86 my $collectcfg = undef;
87
88 if ($gs_mode eq "gs2") {
89 $collectcfg = &colcfg::read_collect_cfg ($filename);
90 } elsif ($gs_mode eq "gs3") {
91 $collectcfg = &colcfg::read_collection_cfg_xml ($filename);
92 }
93 else {
94 print STDERR "Failed to read collection configuration file\n";
95 print STDERR " Unrecognized mode: $gs_mode\n";
96 }
97
98 return $collectcfg;
99}
100
101# convenience method for writing out either collect.cfg/collectionConfig.xml
102# is this ever used??
103sub write_collection_cfg {
104 my ($filename, $collectcfg_data, $gs_mode) = @_;
105
106 if ($gs_mode eq "gs2") {
107 &colcfg::write_collect_cfg ($filename, $collectcfg_data );
108 } elsif ($gs_mode eq "gs3") {
109 &colcfg::write_collection_cfg_xml ($filename, $collectcfg_data);
110 }
111 else {
112 print STDERR "Failed to write collection configuration file\n";
113 print STDERR " Unrecognized mode: $gs_mode\n";
114 }
115}
116
117# the build configuration file data is stored in the form
118#
119# {'infodbtype'}->string
120# {'builddate'}->string
121# {'buildtype'}->string
122# {'orthogonalbuildtypes'}->array of strings
123# {'metadata'}->array of strings
124# {'languages'}->array of strings
125# {'numdocs'}->string
126# {'numsections'}->string
127# {'numwords'}->string
128# {'numbytes'}->string
129# {'maxnumeric'}->string
130# {'indexfields'}->array of strings
131# {'indexfieldmap'}->array of strings in the form "field->FI"
132# {'indexmap'} -> array of strings
133# {'indexlevels'} -> array of strings
134# {'indexsortfields'} -> array of strings
135# {'indexsortfieldmap'} -> array of strings in the form "field->byFI"
136# {'stemindexes'} -> string (int)
137# {'textlevel'}->string
138# {'levelmap'} -> array of strings in the form "level->shortname"
139
140# convenience method for reading in either build.cfg/buildConfig.xml
141sub read_building_cfg {
142 my ($filename,$gs_mode) = @_;
143
144 my $buildcfg = undef;
145
146 if ($gs_mode eq "gs2") {
147 $buildcfg = &colcfg::read_build_cfg ($filename);
148 } elsif ($gs_mode eq "gs3") {
149 $buildcfg = &colcfg::read_build_cfg_xml ($filename);
150 }
151 else {
152 print STDERR "Failed to read building configuration file\n";
153 print STDERR " Unrecognized mode: $gs_mode\n";
154 }
155
156 return $buildcfg;
157}
158
159# convenience method for writing out either build.cfg/buildConfig.xml
160# haven't got one, as gs3 version needs extra parameters
161#sub write_building_cfg {}
162
163##############################
164### gs2/gs3 specific methods
165###############################
166
167#####################################
168### collect.cfg/collectionConfig.xml
169#####################################
170
171# gs2 read in collect.cfg
172sub read_collect_cfg {
173 my ($filename) = @_;
174
175 return &cfgread::read_cfg_file_unicode ($filename,
176 q/^(infodbtype|creator|public|complexmeta|defaultindex|importdir|/ .
177 q/archivedir|exportdir|cachedir|builddir|removeold|/ .
178 q/textcompress|buildtype|othogonalbuildtypes|no_text|keepold|gzip|/ .
179 q/verbosity|remove_empty_classifications|OIDtype|OIDmetadata|oidtype|oidmetadata|/ .
180 q/groupsize|maxdocs|debug|mode|saveas|saveas_options|/ .
181 q/sortmeta|removesuffix|removeprefix|create_images|/ .
182 q/maxnumeric|languagemetadata|/ .
183 q/no_strip_html|index|sections_index_document_metadata|sections_sort_on_document_metadata|/ .
184 q/store_metadata_coverage|indexname|indexlevel)$/,
185 q/(maintainer|languages|indexsubcollections|orthogonalbuildtypes|/ .
186 q/indexes|indexoptions|dontbuild|dontgdbm|mirror|levels|sortfields|plugout|/ .
187 q/searchtype|searchtypes)$/,
188 q/^(subcollection|format)$/,
189 q/^(acquire|plugin|classify)$/,
190 q/^(collectionmeta)$/);
191}
192
193# gs2 write out collect.cfg
194sub write_collect_cfg {
195 my ($filename, $data) = @_;
196
197 &cfgread::write_cfg_file($filename, $data,
198 q/^(infodbtype|creator|public|complexmeta|defaultindex|importdir|/ .
199 q/archivedir|cachedir|builddir|removeold|/ .
200 q/textcompress|buildtype|no_text|keepold|gzip|/ .
201 q/verbosity|remove_empty_classifications|OIDtype|OIDmetadata|/.
202 q/groupsize|maxdocs|debug|mode|saveas|/ .
203 q/sortmeta|removesuffix|removeprefix|create_images|/ .
204 q/maxnumeric|languagemetadata/ .
205 q/no_strip_html|index|sections_index_document_metadata|sections_sort_on_document_metadata)$/.
206 q/store_metadata_coverage)$/,
207 q/^(maintainer|languages|indexsubcollections|orthogonalbuildtypes|/ .
208 q/indexes|indexoptions|dontbuild|dontgdbm|mirror|levels|/.
209 q/searchtype|searchtypes)$/,
210 q/^(subcollection|format)$/,
211 q/^(acquire|plugin|classify)$/,
212 q/^(collectionmeta)$/);
213}
214
215# gs3 read in collectionConfig.xml
216sub read_collection_cfg_xml {
217 my ($filename) = @_;
218
219 require collConfigxml;
220 return &collConfigxml::read_cfg_file ($filename);
221}
222
223# gs3 write out collectionConfig.xml
224sub write_collection_cfg_xml {
225
226}
227
228#####################################
229### build.cfg/buildConfig.xml
230######################################
231
232# gs2 read build.cfg
233sub read_build_cfg {
234 my ($filename) = @_;
235
236 return &cfgread::read_cfg_file ($filename,
237 q/^(earliestdatestamp|infodbtype|builddate|buildtype|numdocs|numsections|numwords|numbytes|maxnumeric|textlevel|indexstem|stemindexes|separate_cjk)$/,
238 q/^(indexmap|subcollectionmap|languagemap|orthogonalbuildtypes|notbuilt|indexfields|indexfieldmap|indexlevels|levelmap|indexsortfields|indexsortfieldmap)$/);
239
240}
241
242# gs2 write build.cfg
243sub write_build_cfg {
244 my ($filename, $data) = @_;
245
246 &cfgread::write_cfg_file($filename, $data,
247 q/^(earliestdatestamp|infodbtype|builddate|buildtype|numdocs|numsections|numwords|numbytes|maxnumeric|textlevel|indexstem|stemindexes|separate_cjk)$/,
248 q/^(indexmap|subcollectionmap|languagemap|orthogonalbuildtypes|notbuilt|indexfields|indexfieldmap|indexlevels|levelmap|indexsortfields|indexsortfieldmap)$/);
249}
250
251# gs3 read buildConfig.xml
252sub read_build_cfg_xml {
253
254 my ($filename) = @_;
255
256 require buildConfigxml;
257 return &buildConfigxml::read_cfg_file($filename);
258}
259
260# gs3 write buildConfig.xml
261sub write_build_cfg_xml {
262 my ($buildoutfile, $buildcfg, $collectcfg) = @_;
263
264 require buildConfigxml;
265 return &buildConfigxml::write_cfg_file ($buildoutfile, $buildcfg, $collectcfg);
266}
267
268
269# method to check for filename of collect.cfg, and gs mode.
270sub get_collect_cfg_name_old {
271 my ($out) = @_;
272
273 # First check if there's a
274 # gsdl/collect/COLLECTION/custom/COLLECTION/etc/custom.cfg file. This
275 # customization was added for DLC by Stefan, 30/6/2007.
276 my $configfilename = &FileUtils::filenameConcatenate ($ENV{'GSDLCOLLECTDIR'}, "custom", $ENV{'GSDLCOLLECTION'}, "etc", "custom.cfg");
277
278 if (-e $configfilename) {
279 return ($configfilename, "gs2");
280 }
281
282 # Check if there is a collectionConfig.xml file. If there is one, it's gs3
283 $configfilename = &FileUtils::filenameConcatenate ($ENV{'GSDLCOLLECTDIR'}, "etc", "collectionConfig.xml");
284 if (-e $configfilename) {
285 return ($configfilename, "gs3");
286 }
287
288 # If we get to here we check if there is a collect.cfg file in the usual place, i.e. it is gs2.
289 $configfilename = &FileUtils::filenameConcatenate ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
290 if (-e $configfilename) {
291 return ($configfilename, "gs2");
292 }
293
294 # Error. No collection configuration file.
295 (&gsprintf($out, "{common.cannot_find_cfg_file}\n", $configfilename) && die);
296}
297
298# method to check for filename of collect.cfg
299# needs to be given gs_version, since we can have a GS2 collection ported into
300# GS3 which could potentially have collect.cfg AND collectionConfig.xml
301# in which case the older version of this subroutine (get_collect_cfg_name_old)
302# will return the wrong answer for the gs version we're using.
303sub get_collect_cfg_name {
304 my ($out, $gs_version) = @_;
305
306 # First check if there's a
307 # gsdl/collect/COLLECTION/custom/COLLECTION/etc/custom.cfg file. This
308 # customization was added for DLC by Stefan, 30/6/2007.
309 my $configfilename;
310
311 if($gs_version eq "gs2") {
312 $configfilename = &FileUtils::filenameConcatenate ($ENV{'GSDLCOLLECTDIR'}, "custom", $ENV{'GSDLCOLLECTION'}, "etc", "custom.cfg");
313
314 if (-e $configfilename) {
315 return $configfilename;
316 }
317 }
318
319 # Check if there is a collectionConfig.xml file if it's gs3
320 if($gs_version eq "gs3") {
321 $configfilename = &FileUtils::filenameConcatenate ($ENV{'GSDLCOLLECTDIR'}, "etc", "collectionConfig.xml");
322 if (-e $configfilename) {
323 return $configfilename;
324 }
325 }
326
327 # Check if there is a collect.cfg file in the usual place for gs2.
328 if($gs_version eq "gs2") {
329 $configfilename = &FileUtils::filenameConcatenate ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
330 if (-e $configfilename) {
331 return $configfilename;
332 }
333 }
334
335 # Error. No collection configuration file.
336 (&gsprintf($out, "{common.cannot_find_cfg_file}\n", $configfilename) && die);
337}
338
339
340
341sub use_collection {
342 my ($site, $collection, $collectdir) = @_;
343
344 if ((defined $site) && ($site ne ""))
345 {
346 return &util::use_site_collection($site, $collection, $collectdir);
347 }
348 else
349 {
350 return &util::use_collection($collection, $collectdir);
351 }
352}
353
354
3551;
356
357
Note: See TracBrowser for help on using the repository browser.