source: main/trunk/greenstone2/common-src/indexers/mg/src/text/backend.h@ 25147

Last change on this file since 25147 was 25147, checked in by kjdon, 12 years ago

merged 64_bit_Greenstone branch into trunk, rev 25139

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.1 KB
Line 
1/**************************************************************************
2 *
3 * backend.h -- Underlying routines and datastructures for mgquery
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: backend.h 25147 2012-02-28 00:59:00Z kjdon $
21 *
22 **************************************************************************/
23
24
25#ifndef BACKEND_H
26#define BACKEND_H
27
28#include "sysfuncs.h"
29
30#include "timing.h"
31#include "lists.h"
32#include "term_lists.h"
33#include "query_term_list.h" /* [RPAP - Feb 97: Term Frequency] */
34#include "mg.h"
35#include "invf.h"
36#include "text.h"
37
38
39typedef struct invf_data
40 {
41 File *InvfFile;
42 mg_u_long N;
43 mg_u_long Nstatic; /* N parameter for decoding inverted file entries */
44 struct invf_file_header ifh;
45 }
46invf_data;
47
48typedef struct text_data
49 {
50 File *TextFile;
51 File *TextIdxFile;
52 File *TextIdxWgtFile;
53 mg_s_long current_pos;
54 struct
55 {
56 mg_u_long Start;
57 float Weight;
58 }
59 *idx_data;
60 compressed_text_header cth;
61 }
62text_data;
63
64
65typedef struct auxiliary_dict
66 {
67 aux_frags_header afh[2];
68 u_char *word_data[2];
69 u_char **words[2];
70 int blk_start[2][33], blk_end[2][33]; /* blk_start and blk_end are required
71 for the hybrid methods */
72 }
73auxiliary_dict;
74
75
76typedef struct compression_dict
77 {
78 compression_dict_header cdh;
79 comp_frags_header *cfh[2];
80 mg_u_long MemForCompDict;
81 u_char ***values[2];
82 u_char *escape[2];
83 huff_data *chars_huff[2];
84 mg_u_long **chars_vals[2];
85 huff_data *lens_huff[2];
86 mg_u_long **lens_vals[2];
87 auxiliary_dict *ad;
88 int fast_loaded;
89 }
90compression_dict;
91
92
93typedef struct stemmed_idx /* [RPAP - Jan 97: Stem Index Change] */
94 {
95 File *stem_idx_file;
96 struct stem_idx_header sih;
97 u_char **index;
98 mg_u_long *pos;
99 int active;
100 u_char *buffer;
101 mg_u_long MemForStemIdx;
102 }
103stemmed_idx;
104
105
106typedef struct stemmed_dict
107 {
108 File *stem_file;
109 struct stem_dict_header sdh;
110 u_char **index;
111 mg_u_long *pos;
112 int active;
113 u_char *buffer;
114 mg_u_long MemForStemDict;
115
116 /* [RPAP - Jan 97: Stem Index Change] */
117 stemmed_idx *stem1;
118 stemmed_idx *stem2;
119 stemmed_idx *stem3;
120 }
121stemmed_dict;
122
123
124typedef struct approx_weights_data
125 {
126 double L;
127 double B;
128 mg_u_long *DocWeights;
129 char bits;
130 float *table;
131 mg_u_long mask;
132 mg_u_long MemForWeights;
133 mg_u_long num_of_docs;
134 }
135approx_weights_data;
136
137
138typedef struct RankedQueryInfo
139 {
140 int QueryFreqs;
141 int Exact; /* use exact weights for ranking or not */
142 mg_s_long MaxDocsToRetrieve; /* may be -1 for all */
143 mg_s_long MaxParasToRetrieve;
144 int Sort;
145 char AccumMethod; /* 'A' = array, 'S' = splay tree, 'H' = hash_table */
146 mg_s_long MaxAccums; /* may be -1 for all */
147 mg_s_long MaxTerms; /* may be -1 for all */
148 int StopAtMaxAccum; /* Stop at maximum accumulator or not */
149 mg_s_long HashTblSize;
150 char *skip_dump;
151 }
152RankedQueryInfo;
153
154
155
156typedef struct BooleanQueryInfo
157 {
158 mg_s_long MaxDocsToRetrieve;
159 }
160BooleanQueryInfo;
161
162
163/* [TS:24/Aug/94] - maximum number of characters in term string */
164#define MAXTERMSTRLEN 1023
165
166typedef struct query_data
167 {
168 stemmed_dict *sd;
169 compression_dict *cd;
170 approx_weights_data *awd;
171 invf_data *id;
172 text_data *td;
173#if defined(PARADOCNUM) || defined(NZDL)
174 int *paragraph;
175#endif
176 char *pathname;
177 char *textpathname; /* [RJM 06/97: text filename] */
178 File *File_text;
179 File *File_comp_dict;
180 File *File_aux_dict;
181 File *File_fast_comp_dict;
182 File *File_text_idx_wgt;
183 File *File_text_idx;
184 File *File_stem;
185
186 /* [RPAP - Jan 97: Stem Index Change] */
187 File *File_stem1;
188 File *File_stem2;
189 File *File_stem3;
190
191 File *File_invf;
192 File *File_weight_approx;
193 mg_u_long mem_in_use, max_mem_in_use;
194 mg_u_long num_of_ptrs, tot_num_of_ptrs;
195 mg_u_long num_of_terms, tot_num_of_terms;
196 mg_u_long num_of_accum, tot_num_of_accum;
197 mg_u_long num_of_ans, tot_num_of_ans;
198 mg_u_long hops_taken, tot_hops_taken;
199 mg_u_long text_idx_lookups, tot_text_idx_lookups;
200 mg_u_long max_buffers;
201 unsigned doc_pos;
202 unsigned buf_in_use;
203 DocList *DL;
204 TermList *TL; /* [TS:Oct/94] - so term list for query can easily be accessed */
205 u_char *TextBuffer;
206 int TextBufferLen;
207 QueryTermList *QTL; /* [RPAP - Feb 97: Term Frequency] */
208 }
209query_data;
210
211
212
213typedef struct InitQueryTimes
214 {
215 ProgTime Start;
216 ProgTime StemDict;
217 ProgTime ApproxWeights;
218 ProgTime CompDict;
219 ProgTime Invf;
220 ProgTime Text;
221 }
222InitQueryTimes;
223
224
225/* [RJM 06/97: text filename] */
226query_data *InitQuerySystem (char *dir, char *name, char *textname, InitQueryTimes * iqt);
227
228void ChangeMemInUse (query_data * qd, mg_s_long delta);
229
230void FinishQuerySystem (query_data * qd);
231
232void ResetFileStats (query_data * qd);
233
234void TransFileStats (query_data * qd);
235
236void RankedQuery (query_data * qd, char *Query, RankedQueryInfo * rqi);
237
238void BooleanQuery (query_data * qd, char *Query, BooleanQueryInfo * bqi,
239 int stem_method);
240
241void DocnumsQuery (query_data * qd, char *QueryLine);
242
243void FreeTextBuffer (query_data * qd);
244
245void FreeQueryDocs (query_data * qd);
246
247int LoadCompressedText (query_data * qd, int max_mem);
248
249int GetDocNum (query_data * qd);
250
251float GetDocWeight (query_data * qd);
252
253mg_s_long GetDocCompLength (query_data * qd);
254
255u_char *GetDocText (query_data * qd, mg_u_long *len);
256
257DocEntry *GetDocChain (query_data * qd);
258
259int NextDoc (query_data * qd);
260
261#endif
Note: See TracBrowser for help on using the repository browser.