source: main/tags/2.80/indexers/mg/src/text/build.h@ 24541

Last change on this file since 24541 was 3745, checked in by mdewsnip, 21 years ago

Addition of MG package for search and retrieval

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.0 KB
Line 
1/**************************************************************************
2 *
3 * build.h -- Global information for the passes of mg_passes
4 * Copyright (C) 1994 Neil Sharman, Alistair Moffat and Lachlan Andrew
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: build.h 3745 2003-02-20 21:20:24Z mdewsnip $
21 *
22 **************************************************************************/
23
24
25#ifndef H_BUILD
26#define H_BUILD
27
28#include "longlong.h"
29
30#define TERMPARAGRAPH '\003'
31
32
33int init_special (char *file_name);
34int init_text_1 (char *file_name);
35int init_text_2 (char *file_name);
36int init_invf_1 (char *file_name);
37int init_invf_2 (char *file_name);
38int init_ivf_1 (char *file_name);
39int init_ivf_2 (char *file_name);
40/*
41 * file_name IN The name of the dictionary file
42 *
43 * returns COMPALLOK for all ok,
44 * COMPERROR for any error. e.g. cannot read file
45 */
46
47
48
49
50
51int process_special (u_char * s_in, int l_in);
52int process_text_1 (u_char * s_in, int l_in);
53int process_text_2 (u_char * s_in, int l_in);
54int process_invf_1 (u_char * s_in, int l_in);
55int process_invf_2 (u_char * s_in, int l_in);
56int process_ivf_1 (u_char * s_in, int l_in);
57int process_ivf_2 (u_char * s_in, int l_in);
58/*
59 * s_in IN The binary string to be compressed
60 * l_in IN The number of characters in s_in
61 *
62 * returns COMPALLOK for all ok,
63 * COMPERROR for any error. e.g. cannot read file
64 *
65 * The calling routine is responsible for ensuring that s_out is long
66 * enough.
67 */
68
69
70
71
72
73int done_special (char *filename);
74int done_text_1 (char *filename);
75int done_text_2 (char *filename);
76int done_invf_1 (char *filename);
77int done_invf_2 (char *filename);
78int done_ivf_1 (char *filename);
79int done_ivf_2 (char *filename);
80/*
81 * returns COMPALLOK for all ok,
82 * COMPERROR for any error. e.g. cannot write file
83 */
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98extern char InvfLevel;
99/*
100 * This will determine the level of the inverted file it can take on the
101 * values 1, 2, or 3.
102 *
103 * Level 1: The inverted file contains only document numbers making it possible
104 * to do only boolean queries.
105 *
106 * Level 2: The inverted file also contains word counts per document making it
107 * possible to do cosine ranked queries.
108 *
109 * Level 3: The inverted file contains word positions.
110 */
111
112
113extern unsigned long buf_size;
114/*
115 * The size of the document input buffer.
116 */
117
118extern unsigned long ChunkLimit;
119/*
120 * The maximum number of chunks that can be written to disk.
121 */
122
123extern unsigned long invf_buffer_size;
124/*
125 * The amount of memory to allocate to the invertion buffer.
126 */
127
128extern char SkipSGML;
129/*
130 * 1 if SGML tags are to be considered non-words when building the
131 * inverted file.
132 */
133
134extern char MakeWeights;
135/*
136 * 1 if the weights file should be generated.
137 */
138
139extern FILE *Comp_Stats;
140/*
141 * Contains a file pointer to the file where compression stats should be sent
142 */
143
144extern int comp_stat_point;
145/*
146 * Generate a compression stat entry every comp_stat_point bytes
147 */
148
149extern mg_ullong bytes_processed;
150/*
151 * The number of bytes processed. NOTE: This excludes document separators.
152 */
153
154extern mg_ullong bytes_received;
155/*
156 * The number of bytes processed. NOTE: This includes document separators.
157 */
158
159
160extern int stemmer_num;
161/*
162 * The stemmer to use for stemming words for the inverted file.
163 * see stemmer.h
164 */
165
166extern int stem_method;
167/*
168 * The method to use for stemming words for the inverted file.
169 * see stemmer.h
170 */
171#endif
Note: See TracBrowser for help on using the repository browser.