source: main/tags/2.51/gsdl/perllib/colcfg.pm@ 32629

Last change on this file since 32629 was 6584, checked in by kjdon, 20 years ago

Fiddled around with segmenting for chinese text. Haven't changed how the
segmentation is done, or what character ranges are used.
But when its done is now controlled by the collect.cfg. There is a new
option, separate_cjk, values true or false, default false. Segmentation
is only done if this is set to true. This is passed as a global option to
all plugins by the import.pl script, so the user just needs to add it
once to the config file, not as an option to all plugins.
The queryaction uses this option too to determine whether or not to segment
the query.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.4 KB
Line 
1###########################################################################
2#
3# colcfg.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# reads in a collection configuration file
27
28package colcfg;
29
30use cfgread;
31
32
33# the collection configuration file data is stored in the form
34#
35# {'creator'}->string
36# {'public'}->string
37# {'defaultindex'}->string
38# {'importdir'}->string
39# {'archivedir'}->string
40# {'cachedir'}->string
41# {'builddir'}->string
42# {'removeold'}->string
43# {'textcompress'}->string
44# {'buildtype'}->string
45# {'maxnumeric'}->string
46# {'separate_cjk'}->string
47
48# {'maintainer'}->array of strings
49# {'languages'}->array of strings
50# {'indexsubcollections'}->array of strings
51# {'indexes'}->array of strings
52# {'dontbuild'}->array of strings
53# {'dontgdbm'}->array of strings
54# {'mirror'}->array of strings
55# {'phind'}->array of strings
56# {'levels'}->array of strings (for mgpp eg Section, Paragraph)
57# {'searchtype'}->array of strings (for mgpp, form or plain)
58
59# {'subcollection'}->hash of key-value pairs
60
61# {'acquire'}->array of arrays of strings
62# {'plugin'}->array of arrays of strings
63# {'classify'}->array of arrays of strings
64
65# {'collectionmeta'}->hash of key->hash of param-value -used
66# for language specification
67# for example, collectionmeta->collectionname->default->demo
68# ->mi->maori demo
69
70sub read_collect_cfg {
71 my ($filename) = @_;
72
73 return &cfgread::read_cfg_file ($filename,
74 q/^(creator|public|defaultindex|importdir|/ .
75 q/archivedir|cachedir|builddir|removeold|/ .
76 q/textcompress|buildtype|no_text|keepold|gzip/ .
77 q/verbosity|allclassifications|OIDtype|maxdocs|/ .
78 q/groupsize|sortmeta|debug|mode|create_images|/ .
79 q/maxnumeric|separate_cjk)$/,
80 q/(maintainer|languages|indexsubcollections|/ .
81 q/indexes|dontbuild|dontgdbm|mirror|phind|levels|searchtype)$/,
82 q/^(subcollection|format)$/,
83 q/^(acquire|plugin|classify)$/,
84 q/^(collectionmeta)$/);
85}
86
87sub write_collect_cfg {
88 my ($filename, $data) = @_;
89
90 &cfgread::write_cfg_file($filename, $data,
91 q/^(creator|public|defaultindex|importdir|/ .
92 q/archivedir|cachedir|builddir|removeold|/ .
93 q/textcompress|buildtype|collectdir|no_text|/ .
94 q/allclassifications|maxnumeric|separate_cjk)$/,
95 q/^(maintainer|languages|indexsubcollections|/ .
96 q/indexes|dontbuild|dontgdbm|levels|searchtype)$/,
97 q/^(subcollection)$/,
98 q/^(acquire|plugin|classify)$/,
99 q/^(collectionmeta)$/);
100}
101
102
103# the build configuration file data is stored in the form
104#
105# {'builddate'}->string
106# {'buildtype'}->string
107# {'metadata'}->array of strings
108# {'languages'}->array of strings
109# {'numdocs'}->string
110# {'numwords'}->string
111# {'numbytes'}->string
112# {'maxnumeric'}->string
113# {'indexfields'}->array of strings
114# {'indexfieldmap'}->array of strings in the form "field->FI"
115# {'indexmap'} -> array of strings
116# {'indexlevels'} -> array of strings
117# {'textlevel'}->string
118sub read_build_cfg {
119 my ($filename) = @_;
120
121 return &cfgread::read_cfg_file ($filename,
122 q/^(builddate|buildtype|numdocs|numwords|numbytes|maxnumeric|textlevel)$/,
123 q/^(metadata|languages|indexfields|indexfieldmap|indexmap|indexlevels)$/);
124}
125
126sub write_build_cfg {
127 my ($filename, $data) = @_;
128
129 &cfgread::write_cfg_file($filename, $data,
130 q/^(builddate|buildtype|numdocs|numwords|numbytes|maxnumeric|textlevel)$/,
131 q/^(metadata|languages|indexfieldsindexfieldmap|indexmap|indexlevels)$/);
132}
133
134
1351;
136
Note: See TracBrowser for help on using the repository browser.