source: main/trunk/greenstone2/perllib/colcfg.pm@ 24193

Last change on this file since 24193 was 23939, checked in by ak19, 13 years ago

GS3's OAIserver passes final official oaiserver validation tests: to do with earliestDatestamp. 1. Perl code (inexport, basebuilder, colcfg, buildconfigxml.pm perl files) write out the earliestDatestamp into GS3's buildconfig.xml. Whenever a full-build is performed, the archives directory is recreated. At this stage, inexport creates a new file in archives called earliestDatestamp containing the current time. Whenever an incremental build is performed, this file already exists in archive, so it is left untouched, preserving the time of the full-build (which is the earliestDatestamp). The other perl files are concerned with obtaining this value from the archives directory and writing it out to the build config file. 2. doc.pm and BasePlugout.pm write out the current date and time for each document processed under the new fields oailastmodified and oailastmodifieddate. Changes made in this commit are related to GS3 java src code changes that work in tandem.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.6 KB
Line 
1###########################################################################
2#
3# colcfg.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads/writes the collection configuration files:
27# collect.cfg/collectionConfig.xml and build.cfg/buildConfig.xml
28
29package colcfg;
30
31use cfgread;
32use gsprintf 'gsprintf';
33use strict;
34
35# the collection configuration file data is stored in the form
36#
37# {'infodbtype'}->string
38# {'creator'}->string
39# {'public'}->string
40# {'defaultindex'}->string
41# {'importdir'}->string
42# {'archivedir'}->string
43# {'cachedir'}->string
44# {'builddir'}->string
45# {'removeold'}->string
46# {'textcompress'}->string
47# {'buildtype'}->string
48# {'maxnumeric'}->string
49# {'separate_cjk'}->string
50# {'sections_index_document_metadata'}->string (always, unless_section_metadata_exists)
51# {'languagemetadata'} -> string
52# {'maintainer'}->array of strings
53# {'languages'}->array of strings
54# {'indexsubcollections'}->array of strings
55# {'indexes'}->array of strings
56# {'indexoptions'}->array of strings (stem, casefold, accentfold)
57# {'dontbuild'}->array of strings
58# {'dontgdbm'}->array of strings
59# {'mirror'}->array of strings
60# {'phind'}->array of strings
61# {'plugout'}->array of strings
62# {'levels'}->array of strings (for mgpp eg Section, Paragraph)
63# {'searchtype'}->array of strings (for mgpp, form or plain)
64
65# {'subcollection'}->hash of key-value pairs
66
67# {'acquire'}->array of arrays of strings
68# {'plugin'}->array of arrays of strings
69# {'classify'}->array of arrays of strings
70
71# {'collectionmeta'}->hash of key->hash of param-value -used
72# for language specification
73# for example, collectionmeta->collectionname->default->demo
74# ->mi->maori demo
75
76# convenience method for reading in either collect.cfg/collectionConfig.xml
77sub read_collection_cfg {
78 my ($filename,$gs_mode) = @_;
79
80 my $collectcfg = undef;
81
82 if ($gs_mode eq "gs2") {
83 $collectcfg = &colcfg::read_collect_cfg ($filename);
84 } elsif ($gs_mode eq "gs3") {
85 $collectcfg = &colcfg::read_collection_cfg_xml ($filename);
86 }
87 else {
88 print STDERR "Failed to read collection configuration file\n";
89 print STDERR " Unrecognized mode: $gs_mode\n";
90 }
91
92 return $collectcfg;
93}
94
95# convenience method for writing out either collect.cfg/collectionConfig.xml
96# is this ever used??
97sub write_collection_cfg {
98 my ($filename, $collectcfg_data, $gs_mode) = @_;
99
100 if ($gs_mode eq "gs2") {
101 &colcfg::write_collect_cfg ($filename, $collectcfg_data );
102 } elsif ($gs_mode eq "gs3") {
103 &colcfg::write_collection_cfg_xml ($filename, $collectcfg_data);
104 }
105 else {
106 print STDERR "Failed to write collection configuration file\n";
107 print STDERR " Unrecognized mode: $gs_mode\n";
108 }
109}
110
111# the build configuration file data is stored in the form
112#
113# {'infodbtype'}->string
114# {'builddate'}->string
115# {'buildtype'}->string
116# {'metadata'}->array of strings
117# {'languages'}->array of strings
118# {'numdocs'}->string
119# {'numsections'}->string
120# {'numwords'}->string
121# {'numbytes'}->string
122# {'maxnumeric'}->string
123# {'indexfields'}->array of strings
124# {'indexfieldmap'}->array of strings in the form "field->FI"
125# {'indexmap'} -> array of strings
126# {'indexlevels'} -> array of strings
127# {'stemindexes'} -> string (int)
128# {'textlevel'}->string
129# {'levelmap'} -> array of strings in the form "level->shortname"
130
131# convenience method for reading in either build.cfg/buildConfig.xml
132sub read_building_cfg {
133 my ($filename,$gs_mode) = @_;
134
135 my $buildcfg = undef;
136
137 if ($gs_mode eq "gs2") {
138 $buildcfg = &colcfg::read_build_cfg ($filename);
139 } elsif ($gs_mode eq "gs3") {
140 $buildcfg = &colcfg::read_build_cfg_xml ($filename);
141 }
142 else {
143 print STDERR "Failed to read building configuration file\n";
144 print STDERR " Unrecognized mode: $gs_mode\n";
145 }
146
147 return $buildcfg;
148}
149
150# convenience method for writing out either build.cfg/buildConfig.xml
151# haven't got one, as gs3 version needs extra parameters
152#sub write_building_cfg {}
153
154##############################
155### gs2/gs3 specific methods
156###############################
157
158#####################################
159### collect.cfg/collectionConfig.xml
160#####################################
161
162# gs2 read in collect.cfg
163sub read_collect_cfg {
164 my ($filename) = @_;
165
166 return &cfgread::read_cfg_file ($filename,
167 q/^(infodbtype|creator|public|defaultindex|importdir|/ .
168 q/archivedir|cachedir|builddir|removeold|/ .
169 q/textcompress|buildtype|no_text|keepold|gzip|/ .
170 q/verbosity|remove_empty_classifications|OIDtype|OIDmetadata|/ .
171 q/groupsize|maxdocs|debug|mode|saveas|/ .
172 q/sortmeta|removesuffix|removeprefix|create_images|/ .
173 q/maxnumeric|languagemetadata|/ .
174 q/no_strip_html|index|sections_index_document_metadata|/ .
175 q/store_metadata_coverage)$/,
176 q/(maintainer|languages|indexsubcollections|/ .
177 q/indexes|indexoptions|dontbuild|dontgdbm|mirror|levels|plugout|/ .
178 q/searchtype|searchtypes)$/,
179 q/^(subcollection|format)$/,
180 q/^(acquire|plugin|classify)$/,
181 q/^(collectionmeta)$/);
182}
183
184# gs2 write out collect.cfg
185sub write_collect_cfg {
186 my ($filename, $data) = @_;
187
188 &cfgread::write_cfg_file($filename, $data,
189 q/^(infodbtype|creator|public|defaultindex|importdir|/ .
190 q/archivedir|cachedir|builddir|removeold|/ .
191 q/textcompress|buildtype|no_text|keepold|gzip|/ .
192 q/verbosity|remove_empty_classifications|OIDtype|OIDmetadata|/.
193 q/groupsize|maxdocs|debug|mode|saveas|/ .
194 q/sortmeta|removesuffix|removeprefix|create_images|/ .
195 q/maxnumeric|languagemetadata/ .
196 q/no_strip_html|index|sections_index_document_metadata)$/,
197 q/^(maintainer|languages|indexsubcollections|/ .
198 q/indexes|indexoptions|dontbuild|dontgdbm|mirror|levels|/.
199 q/searchtype|searchtypes)$/,
200 q/^(subcollection|format)$/,
201 q/^(acquire|plugin|classify)$/,
202 q/^(collectionmeta)$/);
203}
204
205# gs3 read in collectionConfig.xml
206sub read_collection_cfg_xml {
207 my ($filename) = @_;
208
209 require collConfigxml;
210 return &collConfigxml::read_cfg_file ($filename);
211}
212
213# gs3 write out collectionConfig.xml
214sub write_collection_cfg_xml {
215
216}
217
218#####################################
219### build.cfg/buildConfig.xml
220######################################
221
222# gs2 read build.cfg
223sub read_build_cfg {
224 my ($filename) = @_;
225
226 return &cfgread::read_cfg_file ($filename,
227 q/^(earliestdatestamp|infodbtype|builddate|buildtype|numdocs|numsections|numwords|numbytes|maxnumeric|textlevel|indexstem|stemindexes|separate_cjk)$/,
228 q/^(indexmap|subcollectionmap|languagemap|notbuilt|indexfields|indexfieldmap|indexlevels|levelmap)$/);
229
230}
231
232# gs2 write build.cfg
233sub write_build_cfg {
234 my ($filename, $data) = @_;
235
236 &cfgread::write_cfg_file($filename, $data,
237 q/^(earliestdatestamp|infodbtype|builddate|buildtype|numdocs|numsections|numwords|numbytes|maxnumeric|textlevel|indexstem|stemindexes|separate_cjk)$/,
238 q/^(indexmap|subcollectionmap|languagemap|notbuilt|indexfields|indexfieldmap|indexlevels|levelmap)$/);
239}
240
241# gs3 read buildConfig.xml
242sub read_build_cfg_xml {
243
244 my ($filename) = @_;
245
246 require buildConfigxml;
247 return &buildConfigxml::read_cfg_file($filename);
248}
249
250# gs3 write buildConfig.xml
251sub write_build_cfg_xml {
252 my ($buildoutfile, $buildcfg, $collectcfg) = @_;
253
254 require buildConfigxml;
255 return &buildConfigxml::write_cfg_file ($buildoutfile, $buildcfg, $collectcfg);
256}
257
258
259# method to check for filename of collect.cfg, and gs mode.
260sub get_collect_cfg_name {
261 my ($out) = @_;
262
263 # First check if there's a
264 # gsdl/collect/COLLECTION/custom/COLLECTION/etc/custom.cfg file. This
265 # customization was added for DLC by Stefan, 30/6/2007.
266 my $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "custom", $ENV{'GSDLCOLLECTION'}, "etc", "custom.cfg");
267
268 if (-e $configfilename) {
269 return ($configfilename, "gs2");
270 }
271
272 # Now check if there is a collect.cfg file in the usual place, i.e. it is gs2.
273 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
274 if (-e $configfilename) {
275 return ($configfilename, "gs2");
276 }
277
278 # If we get to here we check if there is a collectionConfig.xml file,
279 # i.e. it is gs3.
280 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collectionConfig.xml");
281 if (-e $configfilename) {
282 return ($configfilename, "gs3");
283 }
284
285 # Error. No collection configuration file.
286 (&gsprintf($out, "{common.cannot_find_cfg_file}\n", $configfilename) && die);
287}
288
289
290sub use_collection {
291 my ($site, $collection, $collectdir) = @_;
292
293 if ((defined $site) && ($site ne ""))
294 {
295 return &util::use_site_collection($site, $collection, $collectdir);
296 }
297 else
298 {
299 return &util::use_collection($collection, $collectdir);
300 }
301}
302
303
3041;
305
306
Note: See TracBrowser for help on using the repository browser.