source: trunk/gsdl/packages/mg-1.3d/src/text/backend.h@ 30

Last change on this file since 30 was 13, checked in by rjmcnab, 26 years ago

* empty log message *

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.1 KB
Line 
1/**************************************************************************
2 *
3 * backend.h -- Underlying routines and datastructures for mgquery
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: backend.h 13 1998-11-17 09:36:00Z rjmcnab $
21 *
22 **************************************************************************/
23
24
25#ifndef BACKEND_H
26#define BACKEND_H
27
28#include "sysfuncs.h"
29
30#include "timing.h"
31#include "lists.h"
32#include "term_lists.h"
33#include "query_term_list.h" /* [RPAP - Feb 97: Term Frequency] */
34#include "mg.h"
35#include "invf.h"
36#include "text.h"
37
38
39typedef struct invf_data
40 {
41 File *InvfFile;
42 unsigned long N;
43 unsigned long Nstatic; /* N parameter for decoding inverted file entries */
44 struct invf_file_header ifh;
45 }
46invf_data;
47
48typedef struct text_data
49 {
50 File *TextFile;
51 File *TextIdxFile;
52 File *TextIdxWgtFile;
53 long current_pos;
54 struct
55 {
56 unsigned long Start;
57 float Weight;
58 }
59 *idx_data;
60 compressed_text_header cth;
61 }
62text_data;
63
64
65typedef struct auxiliary_dict
66 {
67 aux_frags_header afh[2];
68 u_char *word_data[2];
69 u_char **words[2];
70 int blk_start[2][33], blk_end[2][33]; /* blk_start and blk_end are required
71 for the hybrid methods */
72 }
73auxiliary_dict;
74
75
76typedef struct compression_dict
77 {
78 compression_dict_header cdh;
79 comp_frags_header *cfh[2];
80 unsigned long MemForCompDict;
81 u_char ***values[2];
82 u_char *escape[2];
83 huff_data *chars_huff[2];
84 u_long **chars_vals[2];
85 huff_data *lens_huff[2];
86 u_long **lens_vals[2];
87 auxiliary_dict *ad;
88 int fast_loaded;
89 }
90compression_dict;
91
92
93typedef struct stemmed_idx /* [RPAP - Jan 97: Stem Index Change] */
94 {
95 File *stem_idx_file;
96 struct stem_idx_header sih;
97 u_char **index;
98 unsigned long *pos;
99 int active;
100 u_char *buffer;
101 unsigned long MemForStemIdx;
102 }
103stemmed_idx;
104
105
106typedef struct stemmed_dict
107 {
108 File *stem_file;
109 struct stem_dict_header sdh;
110 u_char **index;
111 unsigned long *pos;
112 int active;
113 u_char *buffer;
114 unsigned long MemForStemDict;
115
116 /* [RPAP - Jan 97: Stem Index Change] */
117 stemmed_idx *stem1;
118 stemmed_idx *stem2;
119 stemmed_idx *stem3;
120 }
121stemmed_dict;
122
123
124typedef struct approx_weights_data
125 {
126 double L;
127 double B;
128 unsigned long *DocWeights;
129 char bits;
130 float *table;
131 unsigned long mask;
132 unsigned long MemForWeights;
133 unsigned long num_of_docs;
134 }
135approx_weights_data;
136
137
138typedef struct RankedQueryInfo
139 {
140 int QueryFreqs;
141 int Exact; /* use exact weights for ranking or not */
142 long MaxDocsToRetrieve; /* may be -1 for all */
143 long MaxParasToRetrieve;
144 int Sort;
145 char AccumMethod; /* 'A' = array, 'S' = splay tree, 'H' = hash_table */
146 long MaxAccums; /* may be -1 for all */
147 long MaxTerms; /* may be -1 for all */
148 int StopAtMaxAccum; /* Stop at maximum accumulator or not */
149 long HashTblSize;
150 char *skip_dump;
151 }
152RankedQueryInfo;
153
154
155
156typedef struct BooleanQueryInfo
157 {
158 long MaxDocsToRetrieve;
159 }
160BooleanQueryInfo;
161
162
163/* [TS:24/Aug/94] - maximum number of characters in term string */
164#define MAXTERMSTRLEN 1023
165
166typedef struct query_data
167 {
168 stemmed_dict *sd;
169 compression_dict *cd;
170 approx_weights_data *awd;
171 invf_data *id;
172 text_data *td;
173 char *pathname;
174 char *textpathname; /* [RJM 06/97: text filename] */
175 File *File_text;
176 File *File_comp_dict;
177 File *File_aux_dict;
178 File *File_fast_comp_dict;
179 File *File_text_idx_wgt;
180 File *File_text_idx;
181 File *File_stem;
182
183 /* [RPAP - Jan 97: Stem Index Change] */
184 File *File_stem1;
185 File *File_stem2;
186 File *File_stem3;
187
188 File *File_invf;
189 File *File_weight_approx;
190 unsigned long mem_in_use, max_mem_in_use;
191 unsigned long num_of_ptrs, tot_num_of_ptrs;
192 unsigned long num_of_terms, tot_num_of_terms;
193 unsigned long num_of_accum, tot_num_of_accum;
194 unsigned long num_of_ans, tot_num_of_ans;
195 unsigned long hops_taken, tot_hops_taken;
196 unsigned long text_idx_lookups, tot_text_idx_lookups;
197 unsigned long max_buffers;
198 unsigned doc_pos;
199 unsigned buf_in_use;
200 DocList *DL;
201 TermList *TL; /* [TS:Oct/94] - so term list for query can easily be accessed */
202 u_char *TextBuffer;
203 int TextBufferLen;
204 QueryTermList *QTL; /* [RPAP - Feb 97: Term Frequency] */
205 }
206query_data;
207
208
209
210typedef struct InitQueryTimes
211 {
212 ProgTime Start;
213 ProgTime StemDict;
214 ProgTime ApproxWeights;
215 ProgTime CompDict;
216 ProgTime Invf;
217 ProgTime Text;
218 }
219InitQueryTimes;
220
221
222/* [RJM 06/97: text filename] */
223query_data *InitQuerySystem (char *dir, char *name, char *textname, InitQueryTimes * iqt);
224
225void ChangeMemInUse (query_data * qd, long delta);
226
227void FinishQuerySystem (query_data * qd);
228
229void ResetFileStats (query_data * qd);
230
231void TransFileStats (query_data * qd);
232
233void ChangeMemInUse (query_data * qd, long delta);
234
235void RankedQuery (query_data * qd, char *Query, RankedQueryInfo * rqi);
236
237void BooleanQuery (query_data * qd, char *Query, BooleanQueryInfo * bqi,
238 int stem_method);
239
240void DocnumsQuery (query_data * qd, char *QueryLine);
241
242void FreeTextBuffer (query_data * qd);
243
244void FreeQueryDocs (query_data * qd);
245
246int LoadCompressedText (query_data * qd, int max_mem);
247
248int GetDocNum (query_data * qd);
249
250float GetDocWeight (query_data * qd);
251
252long GetDocCompLength (query_data * qd);
253
254u_char *GetDocText (query_data * qd, unsigned long *len);
255
256DocEntry *GetDocChain (query_data * qd);
257
258int NextDoc (query_data * qd);
259
260#endif
Note: See TracBrowser for help on using the repository browser.