1 | /**************************************************************************
|
---|
2 | *
|
---|
3 | * backend.h -- Underlying routines and datastructures for mgquery
|
---|
4 | * Copyright (C) 1994 Neil Sharman
|
---|
5 | *
|
---|
6 | * This program is free software; you can redistribute it and/or modify
|
---|
7 | * it under the terms of the GNU General Public License as published by
|
---|
8 | * the Free Software Foundation; either version 2 of the License, or
|
---|
9 | * (at your option) any later version.
|
---|
10 | *
|
---|
11 | * This program is distributed in the hope that it will be useful,
|
---|
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | * GNU General Public License for more details.
|
---|
15 | *
|
---|
16 | * You should have received a copy of the GNU General Public License
|
---|
17 | * along with this program; if not, write to the Free Software
|
---|
18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
19 | *
|
---|
20 | * $Id: backend.h 7401 2004-05-24 21:18:51Z kjdon $
|
---|
21 | *
|
---|
22 | **************************************************************************/
|
---|
23 |
|
---|
24 |
|
---|
25 | #ifndef BACKEND_H
|
---|
26 | #define BACKEND_H
|
---|
27 |
|
---|
28 | #include "sysfuncs.h"
|
---|
29 |
|
---|
30 | #include "timing.h"
|
---|
31 | #include "lists.h"
|
---|
32 | #include "term_lists.h"
|
---|
33 | #include "query_term_list.h" /* [RPAP - Feb 97: Term Frequency] */
|
---|
34 | #include "mg.h"
|
---|
35 | #include "invf.h"
|
---|
36 | #include "text.h"
|
---|
37 |
|
---|
38 |
|
---|
39 | typedef struct invf_data
|
---|
40 | {
|
---|
41 | File *InvfFile;
|
---|
42 | unsigned long N;
|
---|
43 | unsigned long Nstatic; /* N parameter for decoding inverted file entries */
|
---|
44 | struct invf_file_header ifh;
|
---|
45 | }
|
---|
46 | invf_data;
|
---|
47 |
|
---|
48 | typedef struct text_data
|
---|
49 | {
|
---|
50 | File *TextFile;
|
---|
51 | File *TextIdxFile;
|
---|
52 | File *TextIdxWgtFile;
|
---|
53 | long current_pos;
|
---|
54 | struct
|
---|
55 | {
|
---|
56 | unsigned long Start;
|
---|
57 | float Weight;
|
---|
58 | }
|
---|
59 | *idx_data;
|
---|
60 | compressed_text_header cth;
|
---|
61 | }
|
---|
62 | text_data;
|
---|
63 |
|
---|
64 |
|
---|
65 | typedef struct auxiliary_dict
|
---|
66 | {
|
---|
67 | aux_frags_header afh[2];
|
---|
68 | u_char *word_data[2];
|
---|
69 | u_char **words[2];
|
---|
70 | int blk_start[2][33], blk_end[2][33]; /* blk_start and blk_end are required
|
---|
71 | for the hybrid methods */
|
---|
72 | }
|
---|
73 | auxiliary_dict;
|
---|
74 |
|
---|
75 |
|
---|
76 | typedef struct compression_dict
|
---|
77 | {
|
---|
78 | compression_dict_header cdh;
|
---|
79 | comp_frags_header *cfh[2];
|
---|
80 | unsigned long MemForCompDict;
|
---|
81 | u_char ***values[2];
|
---|
82 | u_char *escape[2];
|
---|
83 | huff_data *chars_huff[2];
|
---|
84 | u_long **chars_vals[2];
|
---|
85 | huff_data *lens_huff[2];
|
---|
86 | u_long **lens_vals[2];
|
---|
87 | auxiliary_dict *ad;
|
---|
88 | int fast_loaded;
|
---|
89 | }
|
---|
90 | compression_dict;
|
---|
91 |
|
---|
92 |
|
---|
93 | typedef struct stemmed_idx /* [RPAP - Jan 97: Stem Index Change] */
|
---|
94 | {
|
---|
95 | File *stem_idx_file;
|
---|
96 | struct stem_idx_header sih;
|
---|
97 | u_char **index;
|
---|
98 | unsigned long *pos;
|
---|
99 | int active;
|
---|
100 | u_char *buffer;
|
---|
101 | unsigned long MemForStemIdx;
|
---|
102 | }
|
---|
103 | stemmed_idx;
|
---|
104 |
|
---|
105 |
|
---|
106 | typedef struct stemmed_dict
|
---|
107 | {
|
---|
108 | File *stem_file;
|
---|
109 | struct stem_dict_header sdh;
|
---|
110 | u_char **index;
|
---|
111 | unsigned long *pos;
|
---|
112 | int active;
|
---|
113 | u_char *buffer;
|
---|
114 | unsigned long MemForStemDict;
|
---|
115 |
|
---|
116 | /* [RPAP - Jan 97: Stem Index Change] */
|
---|
117 | stemmed_idx *stem1;
|
---|
118 | stemmed_idx *stem2;
|
---|
119 | stemmed_idx *stem3;
|
---|
120 | }
|
---|
121 | stemmed_dict;
|
---|
122 |
|
---|
123 |
|
---|
124 | typedef struct approx_weights_data
|
---|
125 | {
|
---|
126 | double L;
|
---|
127 | double B;
|
---|
128 | unsigned long *DocWeights;
|
---|
129 | char bits;
|
---|
130 | float *table;
|
---|
131 | unsigned long mask;
|
---|
132 | unsigned long MemForWeights;
|
---|
133 | unsigned long num_of_docs;
|
---|
134 | }
|
---|
135 | approx_weights_data;
|
---|
136 |
|
---|
137 |
|
---|
138 | typedef struct RankedQueryInfo
|
---|
139 | {
|
---|
140 | int QueryFreqs;
|
---|
141 | int Exact; /* use exact weights for ranking or not */
|
---|
142 | long MaxDocsToRetrieve; /* may be -1 for all */
|
---|
143 | long MaxParasToRetrieve;
|
---|
144 | int Sort;
|
---|
145 | char AccumMethod; /* 'A' = array, 'S' = splay tree, 'H' = hash_table */
|
---|
146 | long MaxAccums; /* may be -1 for all */
|
---|
147 | long MaxTerms; /* may be -1 for all */
|
---|
148 | int StopAtMaxAccum; /* Stop at maximum accumulator or not */
|
---|
149 | long HashTblSize;
|
---|
150 | char *skip_dump;
|
---|
151 | }
|
---|
152 | RankedQueryInfo;
|
---|
153 |
|
---|
154 |
|
---|
155 |
|
---|
156 | typedef struct BooleanQueryInfo
|
---|
157 | {
|
---|
158 | long MaxDocsToRetrieve;
|
---|
159 | }
|
---|
160 | BooleanQueryInfo;
|
---|
161 |
|
---|
162 |
|
---|
163 | /* [TS:24/Aug/94] - maximum number of characters in term string */
|
---|
164 | #define MAXTERMSTRLEN 1023
|
---|
165 |
|
---|
166 | typedef struct query_data
|
---|
167 | {
|
---|
168 | stemmed_dict *sd;
|
---|
169 | compression_dict *cd;
|
---|
170 | approx_weights_data *awd;
|
---|
171 | invf_data *id;
|
---|
172 | text_data *td;
|
---|
173 | #if defined(PARADOCNUM) || defined(NZDL)
|
---|
174 | int *paragraph;
|
---|
175 | #endif
|
---|
176 | char *pathname;
|
---|
177 | char *textpathname; /* [RJM 06/97: text filename] */
|
---|
178 | File *File_text;
|
---|
179 | File *File_comp_dict;
|
---|
180 | File *File_aux_dict;
|
---|
181 | File *File_fast_comp_dict;
|
---|
182 | File *File_text_idx_wgt;
|
---|
183 | File *File_text_idx;
|
---|
184 | File *File_stem;
|
---|
185 |
|
---|
186 | /* [RPAP - Jan 97: Stem Index Change] */
|
---|
187 | File *File_stem1;
|
---|
188 | File *File_stem2;
|
---|
189 | File *File_stem3;
|
---|
190 |
|
---|
191 | File *File_invf;
|
---|
192 | File *File_weight_approx;
|
---|
193 | unsigned long mem_in_use, max_mem_in_use;
|
---|
194 | unsigned long num_of_ptrs, tot_num_of_ptrs;
|
---|
195 | unsigned long num_of_terms, tot_num_of_terms;
|
---|
196 | unsigned long num_of_accum, tot_num_of_accum;
|
---|
197 | unsigned long num_of_ans, tot_num_of_ans;
|
---|
198 | unsigned long hops_taken, tot_hops_taken;
|
---|
199 | unsigned long text_idx_lookups, tot_text_idx_lookups;
|
---|
200 | unsigned long max_buffers;
|
---|
201 | unsigned doc_pos;
|
---|
202 | unsigned buf_in_use;
|
---|
203 | DocList *DL;
|
---|
204 | TermList *TL; /* [TS:Oct/94] - so term list for query can easily be accessed */
|
---|
205 | u_char *TextBuffer;
|
---|
206 | int TextBufferLen;
|
---|
207 | QueryTermList *QTL; /* [RPAP - Feb 97: Term Frequency] */
|
---|
208 | }
|
---|
209 | query_data;
|
---|
210 |
|
---|
211 |
|
---|
212 |
|
---|
213 | typedef struct InitQueryTimes
|
---|
214 | {
|
---|
215 | ProgTime Start;
|
---|
216 | ProgTime StemDict;
|
---|
217 | ProgTime ApproxWeights;
|
---|
218 | ProgTime CompDict;
|
---|
219 | ProgTime Invf;
|
---|
220 | ProgTime Text;
|
---|
221 | }
|
---|
222 | InitQueryTimes;
|
---|
223 |
|
---|
224 |
|
---|
225 | /* [RJM 06/97: text filename] */
|
---|
226 | query_data *InitQuerySystem (char *dir, char *name, char *textname, InitQueryTimes * iqt);
|
---|
227 |
|
---|
228 | void ChangeMemInUse (query_data * qd, long delta);
|
---|
229 |
|
---|
230 | void FinishQuerySystem (query_data * qd);
|
---|
231 |
|
---|
232 | void ResetFileStats (query_data * qd);
|
---|
233 |
|
---|
234 | void TransFileStats (query_data * qd);
|
---|
235 |
|
---|
236 | void RankedQuery (query_data * qd, char *Query, RankedQueryInfo * rqi);
|
---|
237 |
|
---|
238 | void BooleanQuery (query_data * qd, char *Query, BooleanQueryInfo * bqi,
|
---|
239 | int stem_method);
|
---|
240 |
|
---|
241 | void DocnumsQuery (query_data * qd, char *QueryLine);
|
---|
242 |
|
---|
243 | void FreeTextBuffer (query_data * qd);
|
---|
244 |
|
---|
245 | void FreeQueryDocs (query_data * qd);
|
---|
246 |
|
---|
247 | int LoadCompressedText (query_data * qd, int max_mem);
|
---|
248 |
|
---|
249 | int GetDocNum (query_data * qd);
|
---|
250 |
|
---|
251 | float GetDocWeight (query_data * qd);
|
---|
252 |
|
---|
253 | long GetDocCompLength (query_data * qd);
|
---|
254 |
|
---|
255 | u_char *GetDocText (query_data * qd, unsigned long *len);
|
---|
256 |
|
---|
257 | DocEntry *GetDocChain (query_data * qd);
|
---|
258 |
|
---|
259 | int NextDoc (query_data * qd);
|
---|
260 |
|
---|
261 | #endif
|
---|