source: gs2-extensions/parallel-building/trunk/src/perllib/colcfg.pm@ 24682

Last change on this file since 24682 was 24682, checked in by jmt12, 13 years ago

Restored the complexmeta flag needed to allow parallel importing with metadata.xml file to work properly

File size: 9.8 KB
Line 
1###########################################################################
2#
3# colcfg.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads/writes the collection configuration files:
27# collect.cfg/collectionConfig.xml and build.cfg/buildConfig.xml
28
29package colcfg;
30
31use cfgread;
32use gsprintf 'gsprintf';
33use strict;
34
35# the collection configuration file data is stored in the form
36#
37# {'infodbtype'}->string
38# {'creator'}->string
39# {'public'}->string
40# {'defaultindex'}->string
41# {'importdir'}->string
42# {'archivedir'}->string
43# {'cachedir'}->string
44# {'builddir'}->string
45# {'removeold'}->string
46# {'textcompress'}->string
47# {'buildtype'}->string
48# {'orthogonalbuildtypes'}->array of strings
49# {'maxnumeric'}->string
50# {'separate_cjk'}->string
51# {'sections_index_document_metadata'}->string (always, unless_section_metadata_exists)
52# {'languagemetadata'} -> string
53# {'maintainer'}->array of strings
54# {'languages'}->array of strings
55# {'indexsubcollections'}->array of strings
56# {'indexes'}->array of strings
57# {'indexoptions'}->array of strings (stem, casefold, accentfold)
58# {'dontbuild'}->array of strings
59# {'dontgdbm'}->array of strings
60# {'mirror'}->array of strings
61# {'phind'}->array of strings
62# {'plugout'}->array of strings
63# {'levels'}->array of strings (for mgpp eg Section, Paragraph)
64# {'searchtype'}->array of strings (for mgpp, form or plain)
65
66# {'subcollection'}->hash of key-value pairs
67
68# {'acquire'}->array of arrays of strings
69# {'plugin'}->array of arrays of strings
70# {'classify'}->array of arrays of strings
71
72# {'collectionmeta'}->hash of key->hash of param-value -used
73# for language specification
74# for example, collectionmeta->collectionname->default->demo
75# ->mi->maori demo
76
77# convenience method for reading in either collect.cfg/collectionConfig.xml
78sub read_collection_cfg {
79 my ($filename,$gs_mode) = @_;
80
81 my $collectcfg = undef;
82
83 if ($gs_mode eq "gs2") {
84 $collectcfg = &colcfg::read_collect_cfg ($filename);
85 } elsif ($gs_mode eq "gs3") {
86 $collectcfg = &colcfg::read_collection_cfg_xml ($filename);
87 }
88 else {
89 print STDERR "Failed to read collection configuration file\n";
90 print STDERR " Unrecognized mode: $gs_mode\n";
91 }
92
93 return $collectcfg;
94}
95
96# convenience method for writing out either collect.cfg/collectionConfig.xml
97# is this ever used??
98sub write_collection_cfg {
99 my ($filename, $collectcfg_data, $gs_mode) = @_;
100
101 if ($gs_mode eq "gs2") {
102 &colcfg::write_collect_cfg ($filename, $collectcfg_data );
103 } elsif ($gs_mode eq "gs3") {
104 &colcfg::write_collection_cfg_xml ($filename, $collectcfg_data);
105 }
106 else {
107 print STDERR "Failed to write collection configuration file\n";
108 print STDERR " Unrecognized mode: $gs_mode\n";
109 }
110}
111
112# the build configuration file data is stored in the form
113#
114# {'infodbtype'}->string
115# {'builddate'}->string
116# {'buildtype'}->string
117# {'orthogonalbuildtypes'}->array of strings
118# {'metadata'}->array of strings
119# {'languages'}->array of strings
120# {'numdocs'}->string
121# {'numsections'}->string
122# {'numwords'}->string
123# {'numbytes'}->string
124# {'maxnumeric'}->string
125# {'indexfields'}->array of strings
126# {'indexfieldmap'}->array of strings in the form "field->FI"
127# {'indexmap'} -> array of strings
128# {'indexlevels'} -> array of strings
129# {'stemindexes'} -> string (int)
130# {'textlevel'}->string
131# {'levelmap'} -> array of strings in the form "level->shortname"
132
133# convenience method for reading in either build.cfg/buildConfig.xml
134sub read_building_cfg {
135 my ($filename,$gs_mode) = @_;
136
137 my $buildcfg = undef;
138
139 if ($gs_mode eq "gs2") {
140 $buildcfg = &colcfg::read_build_cfg ($filename);
141 } elsif ($gs_mode eq "gs3") {
142 $buildcfg = &colcfg::read_build_cfg_xml ($filename);
143 }
144 else {
145 print STDERR "Failed to read building configuration file\n";
146 print STDERR " Unrecognized mode: $gs_mode\n";
147 }
148
149 return $buildcfg;
150}
151
152# convenience method for writing out either build.cfg/buildConfig.xml
153# haven't got one, as gs3 version needs extra parameters
154#sub write_building_cfg {}
155
156##############################
157### gs2/gs3 specific methods
158###############################
159
160#####################################
161### collect.cfg/collectionConfig.xml
162#####################################
163
164# gs2 read in collect.cfg
165sub read_collect_cfg {
166 my ($filename) = @_;
167
168 return &cfgread::read_cfg_file ($filename,
169 q/^(infodbtype|creator|public|complexmeta|defaultindex|importdir|/ .
170 q/archivedir|cachedir|builddir|removeold|/ .
171 q/textcompress|buildtype|othogonalbuildtypes|no_text|keepold|gzip|/ .
172 q/verbosity|remove_empty_classifications|OIDtype|OIDmetadata|/ .
173 q/groupsize|maxdocs|debug|mode|saveas|/ .
174 q/sortmeta|removesuffix|removeprefix|create_images|/ .
175 q/maxnumeric|languagemetadata|/ .
176 q/no_strip_html|index|sections_index_document_metadata|/ .
177 q/store_metadata_coverage|indexname|indexlevel)$/,
178 q/(maintainer|languages|indexsubcollections|orthogonalbuildtypes|/ .
179 q/indexes|indexoptions|dontbuild|dontgdbm|mirror|levels|plugout|/ .
180 q/searchtype|searchtypes)$/,
181 q/^(subcollection|format)$/,
182 q/^(acquire|plugin|classify)$/,
183 q/^(collectionmeta)$/);
184}
185
186# gs2 write out collect.cfg
187sub write_collect_cfg {
188 my ($filename, $data) = @_;
189
190 &cfgread::write_cfg_file($filename, $data,
191 q/^(infodbtype|creator|public|complexmeta|defaultindex|importdir|/ .
192 q/archivedir|cachedir|builddir|removeold|/ .
193 q/textcompress|buildtype|no_text|keepold|gzip|/ .
194 q/verbosity|remove_empty_classifications|OIDtype|OIDmetadata|/.
195 q/groupsize|maxdocs|debug|mode|saveas|/ .
196 q/sortmeta|removesuffix|removeprefix|create_images|/ .
197 q/maxnumeric|languagemetadata/ .
198 q/no_strip_html|index|sections_index_document_metadata)$/,
199 q/^(maintainer|languages|indexsubcollections|orthogonalbuildtypes|/ .
200 q/indexes|indexoptions|dontbuild|dontgdbm|mirror|levels|/.
201 q/searchtype|searchtypes)$/,
202 q/^(subcollection|format)$/,
203 q/^(acquire|plugin|classify)$/,
204 q/^(collectionmeta)$/);
205}
206
207# gs3 read in collectionConfig.xml
208sub read_collection_cfg_xml {
209 my ($filename) = @_;
210
211 require collConfigxml;
212 return &collConfigxml::read_cfg_file ($filename);
213}
214
215# gs3 write out collectionConfig.xml
216sub write_collection_cfg_xml {
217
218}
219
220#####################################
221### build.cfg/buildConfig.xml
222######################################
223
224# gs2 read build.cfg
225sub read_build_cfg {
226 my ($filename) = @_;
227
228 return &cfgread::read_cfg_file ($filename,
229 q/^(earliestdatestamp|infodbtype|builddate|buildtype|numdocs|numsections|numwords|numbytes|maxnumeric|textlevel|indexstem|stemindexes|separate_cjk)$/,
230 q/^(indexmap|subcollectionmap|languagemap|orthogonalbuildtypes|notbuilt|indexfields|indexfieldmap|indexlevels|levelmap)$/);
231
232}
233
234# gs2 write build.cfg
235sub write_build_cfg {
236 my ($filename, $data) = @_;
237
238 &cfgread::write_cfg_file($filename, $data,
239 q/^(earliestdatestamp|infodbtype|builddate|buildtype|numdocs|numsections|numwords|numbytes|maxnumeric|textlevel|indexstem|stemindexes|separate_cjk)$/,
240 q/^(indexmap|subcollectionmap|languagemap|orthogonalbuildtypes|notbuilt|indexfields|indexfieldmap|indexlevels|levelmap)$/);
241}
242
243# gs3 read buildConfig.xml
244sub read_build_cfg_xml {
245
246 my ($filename) = @_;
247
248 require buildConfigxml;
249 return &buildConfigxml::read_cfg_file($filename);
250}
251
252# gs3 write buildConfig.xml
253sub write_build_cfg_xml {
254 my ($buildoutfile, $buildcfg, $collectcfg) = @_;
255
256 require buildConfigxml;
257 return &buildConfigxml::write_cfg_file ($buildoutfile, $buildcfg, $collectcfg);
258}
259
260
261# method to check for filename of collect.cfg, and gs mode.
262sub get_collect_cfg_name {
263 my ($out) = @_;
264
265 # First check if there's a
266 # gsdl/collect/COLLECTION/custom/COLLECTION/etc/custom.cfg file. This
267 # customization was added for DLC by Stefan, 30/6/2007.
268 my $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "custom", $ENV{'GSDLCOLLECTION'}, "etc", "custom.cfg");
269
270 if (-e $configfilename) {
271 return ($configfilename, "gs2");
272 }
273
274 # Now check if there is a collect.cfg file in the usual place, i.e. it is gs2.
275 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
276 if (-e $configfilename) {
277 return ($configfilename, "gs2");
278 }
279
280 # If we get to here we check if there is a collectionConfig.xml file,
281 # i.e. it is gs3.
282 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collectionConfig.xml");
283 if (-e $configfilename) {
284 return ($configfilename, "gs3");
285 }
286
287 # Error. No collection configuration file.
288 (&gsprintf($out, "{common.cannot_find_cfg_file}\n", $configfilename) && die);
289}
290
291
292sub use_collection {
293 my ($site, $collection, $collectdir) = @_;
294
295 if ((defined $site) && ($site ne ""))
296 {
297 return &util::use_site_collection($site, $collection, $collectdir);
298 }
299 else
300 {
301 return &util::use_collection($collection, $collectdir);
302 }
303}
304
305
3061;
307
308
Note: See TracBrowser for help on using the repository browser.