source: trunk/indexers/mgpp/text/mg_files.h@ 13477

Last change on this file since 13477 was 13477, checked in by shaoqun, 17 years ago

added code for accentfolding

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 9.8 KB
Line 
1/**************************************************************************
2 *
3 * mg_files.h -- Routines for handling files for the auxillary programs
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 **************************************************************************/
21
22#ifndef MG_FILES_H
23#define MG_FILES_H
24
25#include "sysfuncs.h"
26
27/* Magic numbers for the different types of files */
28
29#define GEN_MAGIC(a,b,c,d) ((unsigned long)(((a)<<24) + ((b)<<16) + \
30 ((c)<<8) + (d)))
31
32#define MAGIC_XXXX GEN_MAGIC('M','G', 0 , 0)
33#define MAGIC_STATS_DICT GEN_MAGIC('M','G','S','D')
34#define MAGIC_AUX_DICT GEN_MAGIC('M','G','A','D')
35#define MAGIC_FAST_DICT GEN_MAGIC('M','G','F','D')
36#define MAGIC_DICT GEN_MAGIC('M','G','D', 0 )
37#define MAGIC_STEM_BUILD GEN_MAGIC('M','G','S', 0 )
38#define MAGIC_HASH GEN_MAGIC('M','G','H', 0 )
39#define MAGIC_STEM GEN_MAGIC('M','G','s', 0 )
40#define MAGIC_CHUNK GEN_MAGIC('M','G','C', 0 )
41#define MAGIC_CHUNK_TRANS GEN_MAGIC('M','G','c', 0 )
42#define MAGIC_TEXT GEN_MAGIC('M','G','T', 0 )
43#define MAGIC_TEXI GEN_MAGIC('M','G','t', 0 )
44#define MAGIC_TEXT_LEVELS GEN_MAGIC('M','G','t','L')
45#define MAGIC_TEXI_WGT GEN_MAGIC('M','G','t','W')
46#define MAGIC_INVF GEN_MAGIC('M','G','I', 0 )
47#define MAGIC_INVF_LEVELS GEN_MAGIC('M','G','I','L')
48#define MAGIC_INVI GEN_MAGIC('M','G','i', 0 )
49#define MAGIC_WGHT GEN_MAGIC('M','G','W', 0 )
50#define MAGIC_WGHT_APPROX GEN_MAGIC('M','G','w', 0 )
51#define MAGIC_PARAGRAPH GEN_MAGIC('M','G','P', 0 )
52#define MAGIC_STEM_GEN(x) GEN_MAGIC('M', 'G', 's', x)
53#define IS_MAGIC(a) ((((u_long)(a)) & 0xffff0000) == MAGIC_XXXX)
54
55
56/* err_mode values for open_file and create_file */
57#define MG_ABORT 0
58#define MG_MESSAGE 1
59#define MG_CONTINUE 2
60
61
62
63
64
65/* File suffixes */
66
67
68/* The compression dictionary built by txt.pass1 */
69#ifdef SHORT_SUFFIX
70# define TEXT_STATS_DICT_SUFFIX ".tsd"
71#else
72# define TEXT_STATS_DICT_SUFFIX ".text.stats"
73#endif
74
75/* The compression dictionary built by text.pass1 and comp_dict.process */
76#ifdef SHORT_SUFFIX
77# define TEXT_DICT_SUFFIX ".td"
78#else
79# define TEXT_DICT_SUFFIX ".text.dict"
80#endif
81
82/* The compression dictionary built by mg_make_fast_dict */
83#ifdef SHORT_SUFFIX
84# define TEXT_DICT_FAST_SUFFIX ".tdf"
85#else
86# define TEXT_DICT_FAST_SUFFIX ".text.dict.fast"
87#endif
88
89/* The auxilary dictionary built by text.pass2 */
90#ifdef SHORT_SUFFIX
91# define TEXT_DICT_AUX_SUFFIX ".tda"
92#else
93# define TEXT_DICT_AUX_SUFFIX ".text.dict.aux"
94#endif
95
96/* The compressed text build by text.pass2 */
97#ifdef SHORT_SUFFIX
98# define TEXT_SUFFIX ".t"
99#else
100# define TEXT_SUFFIX ".text"
101#endif
102
103/* The combined compressed text index and document weight file */
104#ifdef SHORT_SUFFIX
105# define TEXT_IDX_WGT_SUFFIX ".tiw"
106#else
107# define TEXT_IDX_WGT_SUFFIX ".text.idx.wgt"
108#endif
109
110/* The compressed text index file */
111#ifdef SHORT_SUFFIX
112# define TEXT_IDX_SUFFIX ".ti"
113#else
114# define TEXT_IDX_SUFFIX ".text.idx"
115#endif
116
117/* The document levels in the text */
118#ifdef SHORT_SUFFIX
119# define TEXT_LEVEL_SUFFIX ".tl"
120#else
121# define TEXT_LEVEL_SUFFIX ".text.level"
122#endif
123
124/* The dictionary of stemmed words build by invf.pass1 and ivf.pass1 */
125#ifdef SHORT_SUFFIX
126# define INVF_DICT_SUFFIX ".id"
127#else
128# define INVF_DICT_SUFFIX ".invf.dict"
129#endif
130
131/* The dictionary of stemmed words build by stem.process */
132#ifdef SHORT_SUFFIX
133# define INVF_DICT_BLOCKED_SUFFIX ".idb"
134#else
135# define INVF_DICT_BLOCKED_SUFFIX ".invf.dict.blocked"
136#endif
137
138/* The exact document weights file build by make.weights, invf.pass2,
139 or ivf.pass2 */
140#ifdef SHORT_SUFFIX
141# define WEIGHTS_SUFFIX ".w"
142#else
143# define WEIGHTS_SUFFIX ".weight"
144#endif
145
146/* The approximate weights file built by make.weights */
147#ifdef SHORT_SUFFIX
148# define APPROX_WEIGHTS_SUFFIX ".wa"
149#else
150# define APPROX_WEIGHTS_SUFFIX ".weight.approx"
151#endif
152
153/* The inverted file build by invf.pass2 or ivf.pass2 */
154#ifdef SHORT_SUFFIX
155# define INVF_SUFFIX ".i"
156#else
157# define INVF_SUFFIX ".invf"
158#endif
159
160/* The level information for the inverted text */
161#ifdef SHORT_SUFFIX
162# define INVF_LEVEL_SUFFIX ".il"
163#else
164# define INVF_LEVEL_SUFFIX ".invf.level"
165#endif
166
167/* The inverted file index build by invf.pass2 or ivf.pass2 */
168#ifdef SHORT_SUFFIX
169# define INVF_IDX_SUFFIX ".ii"
170#else
171# define INVF_IDX_SUFFIX ".invf.idx"
172#endif
173
174/* The inverted file chunk descriptor built by ivf.pass1 */
175#ifdef SHORT_SUFFIX
176# define INVF_CHUNK_SUFFIX ".ic"
177#else
178# define INVF_CHUNK_SUFFIX ".invf.chunk"
179#endif
180
181/* The word index translation file built by ivf.pass1 */
182#ifdef SHORT_SUFFIX
183# define INVF_CHUNK_TRANS_SUFFIX ".ict"
184#else
185# define INVF_CHUNK_TRANS_SUFFIX ".invf.chunk.trans"
186#endif
187
188/* The hashed stemmed dictionary built by make.perf_hash */
189#ifdef SHORT_SUFFIX
190# define INVF_DICT_HASH_SUFFIX ".idh"
191#else
192# define INVF_DICT_HASH_SUFFIX ".invf.dict.hash"
193#endif
194
195/* The paragraph descriptior file built by invf.pass1 or ivf.pass1 */
196#ifdef SHORT_SUFFIX
197# define INVF_PARAGRAPH_SUFFIX ".ip"
198#else
199# define INVF_PARAGRAPH_SUFFIX ".invf.paragraph"
200#endif
201
202/* The trace file build by mg.builder. */
203#ifdef SHORT_SUFFIX
204# define TRACE_SUFFIX ".trc"
205#else
206# define TRACE_SUFFIX ".trace"
207#endif
208
209/* The compression stats file build by mg.builder. */
210#ifdef SHORT_SUFFIX
211# define COMPRESSION_STATS_SUFFIX ".cs"
212#else
213# define COMPRESSION_STATS_SUFFIX ".compression.stats"
214#endif
215
216/* [RPAP - Jan 97: Stem Index Change] */
217/* [JFG - Mar 06: Accent folding patch]
218 * Use the pattern with make_suffix */
219/* The casefolded index into the stemmed dictionary */
220#ifdef SHORT_SUFFIX
221# define INVF_DICT_BLOCKED_SUFFIX_PAT ".ib%d"
222#else
223# define INVF_DICT_BLOCKED_SUFFIX_PAT ".invf.dict.blocked.%d"
224#endif
225
226
227/* [RPAP - Feb 97: WIN32 Port] */
228#ifdef __WIN32__
229#define FILE_NAME_FORMAT "%s%s%s"
230#else
231#define FILE_NAME_FORMAT "%s/%s%s"
232#endif
233
234
235/* This sets the base path for all file operations */
236void set_basepath (const char *bp);
237
238
239/* return the currently defined basepath */
240char *get_basepath (void);
241
242
243
244
245/* This generates the name of a file. It places the name in the buffer
246 specified or if that is NULL it uses a static buffer. */
247char *make_name (const char *name, const char *suffix, char *buffer);
248
249/* [JFG - Mar 06: Accent folding patch] */
250/* This generates the suffix of a file. It places the name in the buffer
251 specified or if that is NULL it uses a static buffer. */
252char *make_suffix (const char *suffix_format, const char suffix_arg, char *buffer);
253
254
255
256
257
258/* This will open the specified file and check its magic number.
259 Mode may take on the following values
260 MG_ABORT : causes an error message to be generated and the
261 program aborted if there is an error.
262 MG_MESSAGE : causes a message to be generated and a NULL value to
263 be returned if there is an error.
264 MG_CONTINUE : causes a NULL value to be returned if there is an error.
265
266 On success if returns the FILE *. On failure it will return a NULL value
267 and possibly generate an error message, or it will exit the program with
268 an error message. */
269FILE *open_named_file (const char *name, const char *mode,
270 u_long magic_num, int err_mode);
271
272
273
274
275/* This will open the specified file and check its magic number.
276
277 err_mode may take on the following values
278 MG_ABORT : causes an error message to be generated and the
279 program aborted if there is an error.
280 MG_MESSAGE : causes a message to be generated and a NULL value to
281 be returned if there is an error.
282 MG_CONTINUE : causes a NULL value to be returned if there is an error.
283
284 On success if returns the FILE *. On failure it will return a NULL value
285 and possibly generate an error message, or it will exit the program with
286 an error message. */
287FILE *open_file (const char *name, const char *suffix, const char *mode,
288 u_long magic_num, int err_mode);
289
290
291
292
293
294/* This will create the specified file and set its magic number.
295
296 Mode may take on the following values
297 MG_ABORT : causes an error message to be generated and the
298 program aborted if there is an error.
299 MG_MESSAGE : causes a message to be generated and a NULL value to
300 be returned if there is an error.
301 MG_CONTINUE : causes a NULL value to be returned if there is an error.
302
303 On success if returns the FILE *. On failure it will return a NULL value
304 and possibly generate an error message, or it will exit the program with
305 an error message. */
306FILE *create_named_file (const char *name, const char *mode,
307 u_long magic_num, int err_mode);
308
309
310
311/* This will create the specified file and set its magic number.
312
313 err_mode may take on the following values
314 MG_ABORT : causes an error message to be generated and the
315 program aborted if there is an error.
316 MG_MESSAGE : causes a message to be generated and a NULL value to
317 be returned if there is an error.
318 MG_CONTINUE : causes a NULL value to be returned if there is an error.
319
320 On success if returns the FILE *. On failure it will return a NULL value
321 and possibly generate an error message, or it will exit the program with
322 an error message. */
323FILE *create_file (const char *name, const char *suffix, const char *mode,
324 u_long magic_num, int err_mode);
325
326
327
328
329#endif
Note: See TracBrowser for help on using the repository browser.