source: trunk/indexers/mg/src/scripts/mgbuild.sh@ 3745

Last change on this file since 3745 was 3745, checked in by mdewsnip, 21 years ago

Addition of MG package for search and retrieval

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.6 KB
Line 
1#!/bin/csh -f
2###########################################################################
3#
4# mgbuild.sh -- Script used to build mg text collection.
5# Copyright (C) 1994 Neil Sharman
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program; if not, write to the Free Software
19# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20#
21# @(#)mgbuild.sh 1.9 16 Mar 1994
22#
23###########################################################################
24# mg.build
25# Creates a Full Text Retrieval database
26#
27# @(#)mgbuild.sh 1.9 16 Mar 1994
28#
29
30set complex = ""
31
32# Parse the command line arguments
33while ($#argv >= 1)
34 if ("$1" == "-s") then
35 shift
36 if ($#argv >= 1) then
37 set source = $1
38 shift
39 endif
40 else if ("$1" == "-d") then
41 shift
42 if ($#argv >= 1) then
43 set mgdata = $1
44 shift
45 endif
46 else if ("$1" == "-g") then
47 shift
48 if ($#argv >= 1) then
49 set get = $1
50 shift
51 endif
52 else if ("$1" == "-c") then
53 set complex = "-text"
54 shift
55 else
56 if ($?text == "0") then
57 set text = $1
58 endif
59 shift
60 endif
61
62end
63
64if ($?text == "0") then
65 set prog = $0
66 echo "USAGE:"
67 echo " "$prog:t" [-s config-script] [-g get-program] [-d mgdata dir] [-c] source"
68 echo ""
69 echo " The config-script is only needed if a non-standard build is required."
70 echo " The get-program defaults to mg_get if not specified."
71 exit 1
72endif
73
74set bindir = $0
75set bindir = $bindir:h
76
77# if $pipe == 1 then pipe in the source text using $get and $text otherwise
78# read the source text directly from the file names specified in $input_files
79set pipe = 1
80
81if ($?get == "0") then
82 set complex = "-text"
83 set get = $bindir/mg_get
84endif
85
86if ($?mgdata) then
87 setenv MGDATA $mgdata
88endif
89
90if (-e $MGDATA/${text}.chunks) then
91 set input_files = `cat $MGDATA/${text}.chunks`
92endif
93
94# Set the stemming method
95# Bit 0 = case folding
96# Bit 1 = S stemmer
97set stem_method = 3
98
99
100# [RPAP - Jan 97: Stem Index Change]
101# If do_indexes == 1 then build collection with full indexes to blocked file
102# overriding stem_method (stem_method will be set to 0)
103# Otherwise don't build with indexes
104set do_indexes = 0
105
106
107# If do_text == 1 then do the text component of building a mg database.
108# Otherwise don't do the text component.
109set do_text = 1
110
111
112# If do_invf == 1 then do the inversion component of building a mg database.
113# Otherwise don't do the inversion component.
114set do_invf = 1
115
116
117# If do_pass1 == 1 then do pass1 of building a mg database.
118# Otherwise don't do pass1.
119set do_pass1 = 1
120
121# If do_pass2 == 1 then do pass2 of building a mg database.
122# Otherwise don't do pass2.
123set do_pass2 = 1
124
125
126# $invf_mem specifies the amount of memory to use for the pass2 inversion
127set invf_mem = 32
128
129
130# $num_chunks specifies the number of interium chunks of inverted file that
131# may be written to disc before a merge into the invf file is done
132set num_chunks = 3
133
134# $invf_level specifies the level of the inverted file that will be generated.
135set invf_level = 2
136
137
138# If $strip_sgml == 1 then sgml tags are stripped from the inversion phase.
139# Otherwise sgml tags are kepted.
140set strip_sgml = 1
141
142
143# $trace specifies the interval between trace entries in Mb.
144# If this is not set no trace entries will be generated.
145set trace = 10
146
147
148# $weight_bits specifies the number of bits of precision bo be given to the
149# approximate weights.
150set weight_bits = 6
151
152
153# $mcd specifies the commandline arguments for the mg_compression_dict program
154#### Note: Set -S so novel words can be encoded, as this will happen if
155#### mgmerge is used.
156set mcd = -S
157
158
159# Source the parameter file to modify parameters of the build.
160if ($?source) then
161 source ${source}
162endif
163
164
165# [RPAP - Jan 97: Stem Index Change]
166# If do_indexes == 1 then set stem_method = 0
167if ("$do_indexes" == "1") then
168 set stem_method = 0
169endif
170
171
172# Generate the collection name.
173set coll_name = ${text}
174if ("$invf_level" == "3") then
175 set coll_name = ${text}-p
176endif
177
178# Generate the directory for the collection.
179if (-e $MGDATA/${coll_name}) then
180else
181 mkdir $MGDATA/${coll_name}
182endif
183
184# Generate the base name for the collection.
185set bname = ${coll_name}/${coll_name}
186
187
188#build up the command lines for pass 1 and 2
189set pass1 = (-f ${bname} -${invf_level} -m ${invf_mem} -s ${stem_method})
190set pass2 = (-f ${bname} -${invf_level} -c ${num_chunks})
191
192if ($strip_sgml) then
193 set pass1 = (${pass1} -G)
194 set pass2 = (${pass2} -G)
195endif
196
197if ($?trace) then
198 set pass1 = (${pass1} -t ${trace})
199 set pass2 = (${pass2} -t ${trace})
200endif
201
202if ($do_text) then
203 set pass1 = (${pass1} -T1)
204 set pass2 = (${pass2} -T2)
205endif
206
207if ($do_invf) then
208 set pass1 = (${pass1} -I1)
209 set pass2 = (${pass2} -I2)
210endif
211
212if ($?trace_name) then
213 set pass1 = (${pass1} -n ${trace_name})
214 set pass2 = (${pass2} -n ${trace_name})
215endif
216
217if ($?comp_stats) then
218 set pass2 = (${pass2} -C ${comp_stats})
219endif
220
221echo "-----------------------------------"
222echo "`uname -n`, `date`"
223echo "${text} --> ${bname}"
224echo "-----------------------------------"
225if ($pipe) then
226 if ("$complex" != "") then
227 echo "$get $text -init"
228 $get $text -init
229 if ("$status" != "0") exit 1
230 echo "-----------------------------------"
231 endif
232endif
233
234if (${do_pass1}) then
235 if ($pipe) then
236 if ($?pass1filter) then
237 echo "$get $text $complex | $pass1filter | mg_passes ${pass1}"
238 $get $text $complex| $pass1filter | $bindir/mg_passes ${pass1}
239 if ("$status" != "0") exit 1
240 else
241 echo "$get $text $complex | mg_passes ${pass1}"
242 $get $text $complex| $bindir/mg_passes ${pass1}
243 if ("$status" != "0") exit 1
244 endif
245 else
246 echo mg_passes ${pass1} ${input_files}
247 $bindir/mg_passes ${pass1} ${input_files}
248 if ("$status" != "0") exit 1
249 endif
250 echo "-----------------------------------"
251
252 echo "mg_perf_hash_build -f ${bname}"
253 $bindir/mg_perf_hash_build -f ${bname}
254 if ("$status" != "0") exit 1
255
256 echo "-----------------------------------"
257 echo "mg_compression_dict -f ${bname} ${mcd}"
258 $bindir/mg_compression_dict -f ${bname} ${mcd}
259 if ("$status" != "0") exit 1
260 echo "-----------------------------------"
261endif
262
263if (${do_pass2}) then
264 if ($pipe) then
265 if ($?pass2filter) then
266 echo "$get $text $complex | $pass2filter | mg_passes ${pass2}"
267 $get $text $complex | $pass2filter | $bindir/mg_passes ${pass2}
268 if ("$status" != "0") exit 1
269 else
270 echo "$get $text $complex | mg_passes ${pass2}"
271 $get $text $complex | $bindir/mg_passes ${pass2}
272 if ("$status" != "0") exit 1
273 endif
274 else
275 echo mg_passes ${pass2} ${input_files}
276 $bindir/mg_passes ${pass2} ${input_files}
277 if ("$status" != "0") exit 1
278 endif
279 echo "-----------------------------------"
280endif
281
282echo "mg_weights_build -f ${bname} -b ${weight_bits}"
283$bindir/mg_weights_build -f ${bname} -b ${weight_bits}
284if ("$status" != "0") exit 1
285
286echo "-----------------------------------"
287
288echo "mg_invf_dict -f ${bname} -b 4096"
289$bindir/mg_invf_dict -f ${bname} -b 4096
290if ("$status" != "0") exit 1
291
292echo "-----------------------------------"
293
294# [RPAP - Jan 97: Stem Index Change]
295if ("$do_indexes" == "1") then
296 echo "mg_stem_idx -f ${bname} -b 4096 -s1"
297 $bindir/mg_stem_idx -f ${bname} -b 4096 -s1
298 if ("$status" != "0") exit 1
299
300 echo ""
301
302 echo "mg_stem_idx -f ${bname} -b 4096 -s2"
303 $bindir/mg_stem_idx -f ${bname} -b 4096 -s2
304 if ("$status" != "0") exit 1
305
306 echo ""
307
308 echo "mg_stem_idx -f ${bname} -b 4096 -s3"
309 $bindir/mg_stem_idx -f ${bname} -b 4096 -s3
310 if ("$status" != "0") exit 1
311
312 echo "-----------------------------------"
313endif
314
315echo "mgstat -f ${bname} -E"
316$bindir/mgstat -f ${bname} -E
317if ("$status" != "0") exit 1
318
319if ($pipe) then
320 if ("$complex" != "") then
321 echo "-----------------------------------"
322 echo "$get $text -cleanup"
323 $get $text -cleanup
324 if ("$status" != "0") exit 1
325 endif
326endif
327
328echo "-----------------------------------"
329echo "`uname -n`, `date`"
330echo "-----------------------------------"
331
332echo "-- The fast-loading compression dictionary has not been built."
333echo "-- If you wish to build it, execute the following command:"
334echo "-- "$bindir/mg_fast_comp_dict -f ${bname}
335
336echo "-----------------------------------"
337
338echo ""
339
Note: See TracBrowser for help on using the repository browser.