source: gsdl/trunk/trunk/mg/src/text/backend.c@ 16583

Last change on this file since 16583 was 16583, checked in by davidb, 16 years ago

Undoing change commited in r16582

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 20.5 KB
Line 
1/**************************************************************************
2 *
3 * backend.c -- Underlying routines for mgquery
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: backend.c 16583 2008-07-29 10:20:36Z davidb $
21 *
22 **************************************************************************/
23
24#include "sysfuncs.h"
25
26#include "memlib.h"
27#include "messages.h"
28#include "timing.h"
29#include "filestats.h"
30#include "sptree.h"
31#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
32
33
34#include "mg_files.h"
35#include "mg.h"
36#include "invf.h"
37#include "text.h"
38#include "lists.h"
39#include "backend.h"
40#include "stem_search.h"
41#include "invf_get.h"
42#include "text_get.h"
43#include "weights.h"
44#include "locallib.h"
45#include "mg_errors.h"
46
47
48static File *
49OpenFile (char *base, char *suffix, unsigned long magic, int *ok)
50{
51 char FileName[512];
52 File *F;
53 sprintf (FileName, "%s%s", base, suffix);
54 if (!(F = Fopen (FileName, "rb", 0))) /* [RPAP - Feb 97: WIN32 Port] */
55 {
56 mg_errno = MG_NOFILE;
57 MgErrorData (FileName);
58 if (ok)
59 *ok = 0;
60 return (NULL);
61 }
62 if (magic)
63 {
64 unsigned long m;
65 if (fread ((char *) &m, sizeof (m), 1, F->f) == 0)
66 {
67 mg_errno = MG_READERR;
68 MgErrorData (FileName);
69 if (ok)
70 *ok = 0;
71 Fclose (F);
72 return (NULL);
73 }
74 NTOHUL(m); /* [RPAP - Jan 97: Endian Ordering] */
75 if (m != magic)
76 {
77 mg_errno = MG_BADMAGIC;
78 MgErrorData (FileName);
79 if (ok)
80 *ok = 0;
81 Fclose (F);
82 return (NULL);
83 }
84 }
85 return (F);
86}
87
88
89static int
90open_all_files (query_data * qd)
91{
92 int ok = 1;
93
94 qd->File_text = OpenFile (qd->textpathname, TEXT_SUFFIX, /* [RJM 06/97: text filename] */
95 MAGIC_TEXT, &ok);
96 qd->File_fast_comp_dict = OpenFile (qd->textpathname, /* [RJM 06/97: text filename] */
97 TEXT_DICT_FAST_SUFFIX, MAGIC_FAST_DICT, NULL);
98 if (!qd->File_fast_comp_dict)
99 {
100 qd->File_comp_dict = OpenFile (qd->textpathname, /* [RJM 06/97: text filename] */
101 TEXT_DICT_SUFFIX, MAGIC_DICT, &ok);
102 qd->File_aux_dict = OpenFile (qd->textpathname, /* [RJM 06/97: text filename] */
103 TEXT_DICT_AUX_SUFFIX, MAGIC_AUX_DICT, NULL);
104 }
105 else
106 qd->File_comp_dict = qd->File_aux_dict = NULL;
107
108 qd->File_stem = OpenFile (qd->pathname, INVF_DICT_BLOCKED_SUFFIX,
109 MAGIC_STEM, &ok);
110
111 /* [RPAP - Jan 97: Stem Index Change]
112 These will fail if collection not built with stem indexes */
113 qd->File_stem1 = OpenFile (qd->pathname, INVF_DICT_BLOCKED_1_SUFFIX,
114 MAGIC_STEM_1, NULL);
115 qd->File_stem2 = OpenFile (qd->pathname, INVF_DICT_BLOCKED_2_SUFFIX,
116 MAGIC_STEM_2, NULL);
117 qd->File_stem3 = OpenFile (qd->pathname, INVF_DICT_BLOCKED_3_SUFFIX,
118 MAGIC_STEM_3, NULL);
119
120 qd->File_invf = OpenFile (qd->pathname, INVF_SUFFIX,
121 MAGIC_INVF, &ok);
122
123 /* These will fail if a level 1 inverted file was created because there
124 will be no document weights */
125 qd->File_text_idx_wgt = OpenFile (qd->pathname, TEXT_IDX_WGT_SUFFIX,
126 MAGIC_TEXI_WGT, NULL);
127 qd->File_weight_approx = OpenFile (qd->pathname, APPROX_WEIGHTS_SUFFIX,
128 MAGIC_WGHT_APPROX, NULL);
129 if (qd->File_text_idx_wgt == NULL && qd->File_weight_approx == NULL)
130 qd->File_text_idx = OpenFile (qd->textpathname, /* [RJM 06/97: text filename] */
131 TEXT_IDX_SUFFIX, MAGIC_TEXI, NULL);
132 else
133 qd->File_text_idx = NULL;
134
135
136 if (!ok)
137 {
138 Fclose (qd->File_text);
139 if (qd->File_fast_comp_dict)
140 Fclose (qd->File_fast_comp_dict);
141 if (qd->File_comp_dict)
142 Fclose (qd->File_comp_dict);
143 Fclose (qd->File_stem);
144
145 /* [RPAP - Jan 97: Stem Index Change] */
146 if (qd->File_stem1)
147 Fclose (qd->File_stem1);
148 if (qd->File_stem2)
149 Fclose (qd->File_stem2);
150 if (qd->File_stem3)
151 Fclose (qd->File_stem3);
152
153 Fclose (qd->File_invf);
154 if (qd->File_text_idx_wgt)
155 Fclose (qd->File_text_idx_wgt);
156 if (qd->File_weight_approx)
157 Fclose (qd->File_weight_approx);
158 if (qd->File_text_idx)
159 Fclose (qd->File_text_idx);
160 return (-1);
161 }
162 return (0);
163
164}
165
166static void
167close_all_files (query_data * qd)
168{
169 Fclose (qd->File_text);
170 if (qd->File_fast_comp_dict)
171 Fclose (qd->File_fast_comp_dict);
172 if (qd->File_aux_dict)
173 Fclose (qd->File_aux_dict);
174 if (qd->File_comp_dict)
175 Fclose (qd->File_comp_dict);
176 Fclose (qd->File_stem);
177
178 /* [RPAP - Jan 97: Stem Index Change] */
179 if (qd->File_stem1)
180 Fclose (qd->File_stem1);
181 if (qd->File_stem2)
182 Fclose (qd->File_stem2);
183 if (qd->File_stem3)
184 Fclose (qd->File_stem3);
185
186 Fclose (qd->File_invf);
187 if (qd->File_text_idx_wgt)
188 Fclose (qd->File_text_idx_wgt);
189 if (qd->File_weight_approx)
190 Fclose (qd->File_weight_approx);
191 if (qd->File_text_idx)
192 Fclose (qd->File_text_idx);
193}
194
195/* If textname is equal to null then name will be used instead */
196/* [RJM 06/97: text filename] */
197query_data *
198InitQuerySystem (char *dir, char *name, char *textname, InitQueryTimes * iqt)
199{
200 query_data *qd;
201 char *s;
202
203 if (textname == NULL) textname = name; /* [RJM 06/97: text filename] */
204
205 if (!(qd = Xmalloc (sizeof (query_data))))
206 {
207 mg_errno = MG_NOMEM;
208 return (NULL);
209 }
210
211 bzero ((char *) qd, sizeof (*qd));
212
213 qd->mem_in_use = qd->max_mem_in_use = 0;
214
215 qd->doc_pos = qd->buf_in_use = 0;
216 qd->TextBufferLen = 0;
217 qd->DL = NULL;
218
219 /* [RPAP - Feb 97: Term Frequency] */
220 qd->TL = NULL;
221 qd->QTL = NULL;
222
223 qd->TextBuffer = NULL;
224
225 qd->tot_hops_taken = 0;
226 qd->tot_num_of_ptrs = 0;
227 qd->tot_num_of_accum = 0;
228 qd->tot_num_of_terms = 0;
229 qd->tot_num_of_ans = 0;
230 qd->tot_text_idx_lookups = 0;
231
232 qd->hops_taken = 0;
233 qd->num_of_ptrs = 0;
234 qd->num_of_accum = 0;
235 qd->num_of_terms = 0;
236 qd->num_of_ans = 0;
237 qd->text_idx_lookups = 0;
238
239 qd->pathname = NULL; /* RJM 06/97: text filename] */
240 qd->textpathname = NULL; /* RJM 06/97: text filename] */
241
242 s = strrchr (dir, '/');
243 if (s && *(s + 1) == '\0')
244 {
245 /* [RJM 06/97: text filename] */
246 if (!(qd->pathname = Xmalloc (strlen (dir) + strlen (name) + 1)) ||
247 !(qd->textpathname = Xmalloc (strlen (dir) + strlen (textname) + 1)))
248 {
249 mg_errno = MG_NOMEM;
250 if (qd->pathname) Xfree (qd->pathname); /* [RJM 06/97: text filename] */
251 Xfree (qd);
252 return (NULL);
253 }
254 sprintf (qd->pathname, "%s%s", dir, name);
255 sprintf (qd->textpathname, "%s%s", dir, textname); /* [RJM 06/97: text filename] */
256 }
257
258 else
259 {
260 /* [RJM 06/97: text filename] */
261 if (!(qd->pathname = Xmalloc (strlen (dir) + strlen (name) + 2)) ||
262 !(qd->textpathname = Xmalloc (strlen (dir) + strlen (textname) + 2)))
263 {
264 mg_errno = MG_NOMEM;
265 if (qd->pathname) Xfree (qd->pathname); /* [RJM 06/97: text filename] */
266 Xfree (qd);
267 return (NULL);
268 }
269/* [RPAP - Feb 97: WIN32 Port] */
270#ifdef __WIN32__
271 if (dir == NULL || dir[0] == '\0') {
272 sprintf (qd->pathname, "%s", name);
273 sprintf (qd->textpathname, "%s", textname); /* [RJM 06/97: text filename] */
274 } else {
275 sprintf (qd->pathname, "%s%s", dir, name);
276 sprintf (qd->textpathname, "%s%s", dir, textname); /* [RJM 06/97: text filename] */
277 }
278#else
279 sprintf (qd->pathname, "%s/%s", dir, name);
280 sprintf (qd->textpathname, "%s/%s", dir, textname); /* [RJM 06/97: text filename] */
281#endif
282 }
283
284 if (open_all_files (qd) == -1)
285 {
286 Xfree (qd->pathname);
287 Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
288 Xfree (qd);
289 return (NULL);
290 }
291
292 if (iqt)
293 GetTime (&iqt->Start);
294
295 /* Initialise the stemmed dictionary system */
296 if (!(qd->sd = ReadStemDictBlk (qd->File_stem)))
297 {
298 close_all_files (qd);
299 Xfree (qd->pathname);
300 Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
301 Xfree (qd);
302 return (NULL);
303 }
304
305 /* [RPAP - Jan 97: Stem Index Change] */
306 if ((qd->sd->sdh.indexed & 7) && qd->File_stem1 && qd->File_stem2 && qd->File_stem3)
307 {
308 if (!(qd->sd->stem1 = ReadStemIdxBlk (qd->File_stem1)))
309 {
310 FreeStemDict (qd->sd);
311 close_all_files (qd);
312 Xfree (qd->pathname);
313 Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
314 Xfree (qd);
315 return (NULL);
316 }
317 if (!(qd->sd->stem2 = ReadStemIdxBlk (qd->File_stem2)))
318 {
319 FreeStemDict (qd->sd);
320 close_all_files (qd);
321 Xfree (qd->pathname);
322 Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
323 Xfree (qd);
324 return (NULL);
325 }
326 if (!(qd->sd->stem3 = ReadStemIdxBlk (qd->File_stem3)))
327 {
328 FreeStemDict (qd->sd);
329 close_all_files (qd);
330 Xfree (qd->pathname);
331 Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
332 Xfree (qd);
333 return (NULL);
334 }
335 }
336 else if (qd->sd->sdh.indexed != 0)
337 {
338 FreeStemDict (qd->sd);
339 close_all_files (qd);
340 Xfree (qd->pathname);
341 Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
342 Xfree (qd);
343 return (NULL);
344 }
345 else
346 {
347 if (qd->File_stem1)
348 Fclose (qd->File_stem1);
349 if (qd->File_stem2)
350 Fclose (qd->File_stem2);
351 if (qd->File_stem3)
352 Fclose (qd->File_stem3);
353 qd->File_stem1 = NULL;
354 qd->File_stem2 = NULL;
355 qd->File_stem3 = NULL;
356 qd->sd->stem1 = NULL;
357 qd->sd->stem2 = NULL;
358 qd->sd->stem3 = NULL;
359 }
360
361 if (iqt)
362 GetTime (&iqt->StemDict);
363 if (qd->File_weight_approx)
364 {
365 if (!(qd->awd = LoadDocWeights (qd->File_weight_approx,
366 qd->sd->sdh.num_of_docs)))
367 {
368 FreeStemDict (qd->sd);
369 close_all_files (qd);
370 Xfree (qd->pathname);
371 Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
372 Xfree (qd);
373 return (NULL);
374 }
375 }
376 else
377 qd->awd = NULL;
378
379
380 if (iqt)
381 GetTime (&iqt->ApproxWeights);
382
383 if (!(qd->cd = LoadCompDict (qd->File_comp_dict, qd->File_aux_dict,
384 qd->File_fast_comp_dict)))
385 {
386 if (qd->awd)
387 FreeWeights (qd->awd);
388 FreeStemDict (qd->sd);
389 close_all_files (qd);
390 Xfree (qd->pathname);
391 Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
392 Xfree (qd);
393 return (NULL);
394 }
395
396 if (iqt)
397 GetTime (&iqt->CompDict);
398
399 if (!(qd->id = InitInvfFile (qd->File_invf, qd->sd)))
400 {
401 FreeCompDict (qd->cd);
402 if (qd->awd)
403 FreeWeights (qd->awd);
404 FreeStemDict (qd->sd);
405 close_all_files (qd);
406 Xfree (qd->pathname);
407 Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
408 Xfree (qd);
409 return (NULL);
410 }
411 if ((qd->File_text_idx_wgt == NULL || qd->File_weight_approx == NULL) &&
412 qd->id->ifh.InvfLevel >= 2)
413 {
414 FreeInvfData (qd->id);
415 FreeCompDict (qd->cd);
416 if (qd->awd)
417 FreeWeights (qd->awd);
418 FreeStemDict (qd->sd);
419 close_all_files (qd);
420 Xfree (qd->pathname);
421 Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
422 Xfree (qd);
423 mg_errno = MG_INVERSION;
424 return (NULL);
425 }
426 if (iqt)
427 GetTime (&iqt->Invf);
428
429 if (!(qd->td = LoadTextData (qd->File_text, qd->File_text_idx_wgt,
430 qd->File_text_idx)))
431 {
432 FreeInvfData (qd->id);
433 FreeCompDict (qd->cd);
434 if (qd->awd)
435 FreeWeights (qd->awd);
436 FreeStemDict (qd->sd);
437 close_all_files (qd);
438 Xfree (qd->pathname);
439 Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
440 Xfree (qd);
441 return (NULL);
442 }
443
444/* [RPAP - Feb 97: NZDL Additions] */
445#if defined(PARADOCNUM) || defined(NZDL)
446
447/*
448
449This code is based on the TREC_MODE code below to read the .paragraph
450file to determine what document numbers correspond to what paragraphs.
451This code is more space efficient, reading in the .paragraph file
452into memory as an accumulate docnum array. Eg. the .paragraph may contain
453
454 [5 3 6 4 7 9 4]
455
456indicating the first document has 5 paragraphs, the next 3, etc.
457This will be stored in memory as
458
459 [0 5 8 14 18 25 34 38]
460
461so a binary search can be performed. The first 0 is for convenience;
462it prevents testing boundary conditions.
463
464
465The TREC_MODE code does this differently; it stores the array
466
467 [1 1 1 1 1 2 2 2 3 3 3 3 3 3 ....]
468
469allowing directy paragraph to docnum conversion, at the expense
470of memory.
471
472*/
473 qd->paragraph = NULL;
474
475 if (qd->id->ifh.InvfLevel == 3)
476 {
477 unsigned long magic;
478 FILE *paragraph;
479 int i;
480 char paraFile[512];
481
482 sprintf(paraFile, "%s%s", qd->pathname, INVF_PARAGRAPH_SUFFIX);
483 paragraph = fopen(paraFile, "rb");
484 if (!paragraph)
485 FatalError(1, "Unable to open 'paraFile'.", paraFile);
486
487 fread((void *)&magic, sizeof(magic), 1, paragraph);
488 qd->paragraph = Xmalloc((qd->td->cth.num_of_docs+1)*sizeof(int));
489 qd->paragraph[0] = 0;
490 for (i = 1; i <= qd->td->cth.num_of_docs; i++)
491 {
492 int count;
493
494 if (fread((void *)&count, sizeof(count), 1, paragraph) != 1)
495 FatalError(1, "Unexpected EOF while reading '%s'.", paraFile);
496 NTOHSI(count); /* [RPAP - Jan 97: Endian Ordering] */
497 qd->paragraph[i] = qd->paragraph[i-1]+count;
498 }
499
500 fclose (paragraph); /* [RJM 07/98: Memory Leak] */
501 }
502
503
504#endif
505
506#ifdef TREC_MODE
507 {
508 extern char *trec_ids;
509 extern long *trec_paras;
510 int size;
511 char FileName[512];
512 FILE *f;
513 if (!strstr (qd->pathname, "trec"))
514 goto error;
515 sprintf (FileName, "%s%s", qd->pathname, ".DOCIDS");
516 if (!(f = fopen (FileName, "rb"))) /* [RPAP - Feb 97: WIN32 Port] */
517 {
518 Message ("Unable to open \"%s\"", FileName);
519 goto error;
520 }
521 fseek (f, 0, 2);
522 size = ftell (f);
523 fseek (f, 0, 0);
524 trec_ids = Xmalloc (size);
525 if (!trec_ids)
526 {
527 fclose (f);
528 goto error;
529 }
530 fread (trec_ids, 1, size, f);
531 fclose (f);
532 if (qd->id->ifh.InvfLevel == 3)
533 {
534 int i, d;
535 unsigned long magic;
536 trec_paras = Xmalloc (qd->sd->sdh.num_of_docs * sizeof (long));
537 if (!trec_paras)
538 {
539 Xfree (trec_ids);
540 trec_ids = NULL;
541 goto error;
542 }
543 sprintf (FileName, "%s%s", qd->pathname, INVF_PARAGRAPH_SUFFIX);
544 if (!(f = fopen (FileName, "rb"))) /* [RPAP - Feb 97: WIN32 Port] */
545 {
546 Message ("Unable to open \"%s\"", FileName);
547 goto error;
548 }
549 if (fread ((char *) &magic, sizeof (magic), 1, f) != 1 ||
550 NTOHUL(magic) != MAGIC_PARAGRAPH) /* [RPAP - Jan 97: Endian Ordering] */
551 {
552 fclose (f);
553 Message ("Bad magic number in \"%s\"", FileName);
554 goto error;
555 }
556
557 for (d = i = 0; i < qd->td->cth.num_of_docs; i++)
558 {
559 int count;
560 if (fread ((char *) &count, sizeof (count), 1, f) != 1)
561 {
562 fclose (f);
563 goto error;
564 }
565 NTOHSI(count); /* [RPAP - Jan 97: Endian Ordering] */
566 while (count--)
567 trec_paras[d++] = i;
568 }
569 fclose (f);
570 }
571 goto ok;
572 error:
573 if (trec_ids)
574 Xfree (trec_ids);
575 if (trec_paras)
576 Xfree (trec_paras);
577 trec_ids = NULL;
578 trec_paras = NULL;
579 ok:
580 ;
581 }
582#endif
583
584 if (iqt)
585 GetTime (&iqt->Text);
586
587 return (qd);
588}
589
590
591
592
593
594
595/*
596 * Change the amount of memory currently in use
597 *
598 */
599void
600ChangeMemInUse (query_data * qd, long delta)
601{
602 qd->mem_in_use += delta;
603 if (qd->mem_in_use > qd->max_mem_in_use)
604 qd->max_mem_in_use = qd->mem_in_use;
605}
606
607
608void
609FinishQuerySystem (query_data * qd)
610{
611/* [RJM 07/98: Memory Leak] */
612#if defined(PARADOCNUM) || defined(NZDL)
613 if (qd->paragraph != NULL) {
614 Xfree (qd->paragraph);
615 qd->paragraph = NULL;
616 }
617#endif
618
619 FreeTextData (qd->td);
620 FreeInvfData (qd->id);
621 FreeCompDict (qd->cd);
622 if (qd->awd)
623 FreeWeights (qd->awd);
624 FreeStemDict (qd->sd);
625 close_all_files (qd);
626 Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
627 Xfree (qd->pathname);
628 FreeQueryDocs (qd);
629 if (qd->TL != NULL) FreeTermList(&qd->TL); /* [RJM 07/98: Memory Leak] */
630 if (qd->QTL != NULL) FreeQueryTermList(&qd->QTL); /* [RJM 07/98: Memory Leak] */
631 Xfree (qd);
632
633 /* other global stuff hanging around */
634 MgErrorDeinit ();
635}
636
637
638void
639ResetFileStats (query_data * qd)
640{
641 ZeroFileStats (qd->File_text);
642 if (qd->File_comp_dict)
643 ZeroFileStats (qd->File_comp_dict);
644 if (qd->File_fast_comp_dict)
645 ZeroFileStats (qd->File_fast_comp_dict);
646 ZeroFileStats (qd->File_stem);
647
648 /* [RPAP - Jan 97: Stem Index Change] */
649 if (qd->File_stem1)
650 ZeroFileStats (qd->File_stem1);
651 if (qd->File_stem2)
652 ZeroFileStats (qd->File_stem2);
653 if (qd->File_stem3)
654 ZeroFileStats (qd->File_stem3);
655
656 ZeroFileStats (qd->File_invf);
657 if (qd->File_text_idx_wgt)
658 ZeroFileStats (qd->File_text_idx_wgt);
659 if (qd->File_weight_approx)
660 ZeroFileStats (qd->File_weight_approx);
661 if (qd->File_text_idx)
662 ZeroFileStats (qd->File_text_idx);
663}
664
665
666void
667TransFileStats (query_data * qd)
668{
669 qd->File_text->Current = qd->File_text->Cumulative;
670 if (qd->File_comp_dict)
671 qd->File_comp_dict->Current = qd->File_comp_dict->Cumulative;
672 if (qd->File_fast_comp_dict)
673 qd->File_fast_comp_dict->Current = qd->File_fast_comp_dict->Cumulative;
674 qd->File_stem->Current = qd->File_stem->Cumulative;
675
676 /* [RPAP - Jan 97: Stem Index Change] */
677 if (qd->File_stem1)
678 qd->File_stem1->Current = qd->File_stem1->Cumulative;
679 if (qd->File_stem2)
680 qd->File_stem2->Current = qd->File_stem2->Cumulative;
681 if (qd->File_stem3)
682 qd->File_stem3->Current = qd->File_stem3->Cumulative;
683
684 qd->File_invf->Current = qd->File_invf->Cumulative;
685 if (qd->File_text_idx_wgt)
686 qd->File_text_idx_wgt->Current = qd->File_text_idx_wgt->Cumulative;
687 if (qd->File_weight_approx)
688 qd->File_weight_approx->Current = qd->File_weight_approx->Cumulative;
689 if (qd->File_text_idx)
690 qd->File_text_idx->Current = qd->File_text_idx->Cumulative;
691}
692
693
694void
695FreeTextBuffer (query_data * qd)
696{
697 if (qd->TextBuffer)
698 {
699 Xfree (qd->TextBuffer);
700 ChangeMemInUse (qd, -qd->TextBufferLen);
701 }
702 qd->TextBuffer = NULL;
703 qd->TextBufferLen = 0;
704}
705
706void
707FreeQueryDocs (query_data * qd)
708{
709 qd->doc_pos = 0;
710 qd->buf_in_use = 0;
711 if (qd->DL)
712 {
713 int i;
714 for (i = 0; i < qd->DL->num; i++)
715 if (qd->DL->DE[i].CompTextBuffer)
716 {
717 Xfree (qd->DL->DE[i].CompTextBuffer);
718 qd->DL->DE[i].CompTextBuffer = NULL;
719 ChangeMemInUse (qd, -qd->DL->DE[i].Len);
720 }
721 Xfree (qd->DL);
722 }
723 qd->DL = NULL;
724 FreeTextBuffer (qd);
725}
726
727int
728LoadCompressedText (query_data * qd, int max_mem)
729{
730 DocEntry *DE;
731 if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
732 return -1;
733
734 DE = &qd->DL->DE[qd->doc_pos];
735 if (!DE->CompTextBuffer)
736 {
737 int i;
738 DocEntry *de;
739 for (i = 0, de = qd->DL->DE; i < qd->DL->num; i++, de++)
740 if (de->CompTextBuffer)
741 {
742 Xfree (de->CompTextBuffer);
743 de->CompTextBuffer = NULL;
744 ChangeMemInUse (qd, -de->Len);
745 }
746 if (LoadBuffers (qd, &qd->DL->DE[qd->doc_pos], max_mem,
747 qd->DL->num - qd->doc_pos) == -1)
748 return -1;
749 }
750 return 0;
751}
752
753int
754GetDocNum (query_data * qd)
755{
756 if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
757 return -1;
758 return qd->DL->DE[qd->doc_pos].DocNum;
759}
760
761DocEntry *
762GetDocChain (query_data * qd)
763{
764 if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
765 return NULL;
766 return &(qd->DL->DE[qd->doc_pos]);
767}
768
769float
770GetDocWeight (query_data * qd)
771{
772 if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
773 return -1;
774 return qd->DL->DE[qd->doc_pos].Weight;
775}
776
777long
778GetDocCompLength (query_data * qd)
779{
780 if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
781 return -1;
782 return qd->DL->DE[qd->doc_pos].Len;
783}
784
785
786u_char *
787GetDocText (query_data * qd, unsigned long *len)
788{
789 DocEntry *DE;
790 int ULen;
791 if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
792 return NULL;
793
794 DE = &qd->DL->DE[qd->doc_pos];
795
796 if (!DE->CompTextBuffer)
797 {
798 fprintf (stderr, "The compressed text buffer is NULL\n");
799 mg_errno = MG_NOMEM;
800 return (NULL);
801 }
802
803 FreeTextBuffer (qd);
804
805 qd->TextBufferLen = (int) (qd->td->cth.ratio * 1.01 *
806 DE->Len) + 100;
807 if (!(qd->TextBuffer = Xmalloc (qd->TextBufferLen)))
808 {
809 fprintf (stderr, "No memory for TextBuffer\n");
810 mg_errno = MG_NOMEM;
811 return (NULL);
812 }
813
814 DecodeText (qd->cd, (u_char *) (DE->CompTextBuffer), DE->Len,
815 (u_char *) (qd->TextBuffer), &ULen);
816 qd->TextBuffer[ULen] = '\0';
817
818 if (ULen >= qd->TextBufferLen)
819 {
820 fprintf (stderr, "%d >= %d\n", ULen, qd->TextBufferLen);
821 mg_errno = MG_BUFTOOSMALL;
822 return (NULL);
823 }
824
825 if (len)
826 *len = ULen;
827
828 return qd->TextBuffer;
829}
830
831int
832NextDoc (query_data * qd)
833{
834 if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
835 return 0;
836 qd->doc_pos++;
837 return qd->doc_pos < qd->DL->num;
838}
Note: See TracBrowser for help on using the repository browser.