source: trunk/gsdl/src/mgpp/text/backend.cpp@ 711

Last change on this file since 711 was 711, checked in by cs025, 25 years ago

Changes to eradicate Xmalloc

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 22.8 KB
Line 
1/**************************************************************************
2 *
3 * backend.c -- Underlying routines for mgquery
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: backend.cpp 711 1999-10-17 23:43:31Z cs025 $
21 *
22 **************************************************************************/
23
24#include "sysfuncs.h"
25
26#include "memlib.h"
27#include "messages.h"
28#include "timing.h"
29#include "filestats.h"
30#include "sptree.h"
31#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
32
33
34#include "mg_files.h"
35#include "mg.h"
36#include "invf.h"
37#include "text.h"
38#include "lists.h"
39#include "backend.h"
40#include "stem_search.h"
41#include "StemBlock.h"
42#include "invf_get.h"
43#include "text_get.h"
44#include "weights.h"
45#include "locallib.h"
46#include "mg_errors.h"
47#include "DocEntry.h"
48
49static File *
50OpenFile (char *base, char *suffix, unsigned long magic, int *ok)
51{
52 char FileName[512];
53 File *F;
54 sprintf (FileName, "%s%s", base, suffix);
55 if (!(F = Fopen (FileName, "rb", 0))) /* [RPAP - Feb 97: WIN32 Port] */
56 {
57 mg_errno = MG_NOFILE;
58 MgErrorData (FileName);
59 if (ok)
60 *ok = 0;
61 return (NULL);
62 }
63 if (magic)
64 {
65 unsigned long m;
66 if (fread ((char *) &m, sizeof (m), 1, F->f) == 0)
67 {
68 mg_errno = MG_READERR;
69 MgErrorData (FileName);
70 if (ok)
71 *ok = 0;
72 Fclose (F);
73 return (NULL);
74 }
75 NTOHUL(m); /* [RPAP - Jan 97: Endian Ordering] */
76 if (m != magic)
77 {
78 mg_errno = MG_BADMAGIC;
79 MgErrorData (FileName);
80 if (ok)
81 *ok = 0;
82 Fclose (F);
83 return (NULL);
84 }
85 }
86 return (F);
87}
88
89/* --
90 -- GRB: Open text files only; based on open_all_files in MG1.2.1;
91 -- should only be called after open_index_files
92 --
93*/
94static int
95open_text_files (query_data * qd)
96{
97 int ok = 1;
98
99 qd->File_text = OpenFile (qd->textpathname, TEXT_SUFFIX, /* [RJM 06/97: text filename] */
100 MAGIC_TEXT, &ok);
101 qd->File_fast_comp_dict = OpenFile (qd->textpathname, /* [RJM 06/97: text filename] */
102 TEXT_DICT_FAST_SUFFIX, MAGIC_FAST_DICT, NULL);
103 if (!qd->File_fast_comp_dict)
104 {
105 qd->File_comp_dict = OpenFile (qd->textpathname, /* [RJM 06/97: text filename] */
106 TEXT_DICT_SUFFIX, MAGIC_DICT, &ok);
107 qd->File_aux_dict = OpenFile (qd->textpathname, /* [RJM 06/97: text filename] */
108 TEXT_DICT_AUX_SUFFIX, MAGIC_AUX_DICT, NULL);
109 }
110 else
111 qd->File_comp_dict = qd->File_aux_dict = NULL;
112
113 /* This will fail if a level 1 inverted file was created because there
114 will be no document weights */
115 qd->File_text_idx_wgt = OpenFile (qd->pathname, TEXT_IDX_WGT_SUFFIX,
116 MAGIC_TEXI_WGT, NULL);
117 if (qd->File_text_idx_wgt == NULL && qd->File_weight_approx == NULL)
118 qd->File_text_idx = OpenFile (qd->textpathname, /* [RJM 06/97: text filename] */
119 TEXT_IDX_SUFFIX, MAGIC_TEXI, NULL);
120 else
121 qd->File_text_idx = NULL;
122
123 /**
124 * GRB: Additional check; if we didn't get idx_wgt and we are on
125 * a level 2 or later inversion fileset, then error.
126 * As the id structure may not have been initialised, only
127 * error if it has been; we'll assume that if it isn't
128 * initialised yet then something bespoke is happening and
129 * the programmer should deal with it;
130 * open_all_files() calls this fn after open_index_files,
131 * in which case id is NULL if when open_all_files is called
132 * inside __InitQuerySystem (ie. when MG is just run as normal).
133 * Programmers doing their own thing should prepare their
134 * code to cope with the 2-stage initialisation of level 2
135 * or higher indexes. The idx_wgt index is only used in
136 * retrieving text; it has no effect on indexed searches.
137 */
138 if (qd->id != NULL &&
139 ((qd->allfiles == 1 && qd->File_text_idx_wgt == NULL) ||
140 qd->File_weight_approx == NULL) &&
141 qd->id->ifh.InvfLevel >= 2)
142 { ok = 0;
143 }
144
145
146 if (!ok)
147 { if (qd->File_text)
148 Fclose (qd->File_text);
149 if (qd->File_fast_comp_dict)
150 Fclose (qd->File_fast_comp_dict);
151 if (qd->File_aux_dict)
152 Fclose (qd->File_aux_dict);
153 if (qd->File_comp_dict)
154 Fclose (qd->File_comp_dict);
155
156 if (qd->File_text_idx_wgt)
157 Fclose (qd->File_text_idx_wgt);
158 if (qd->File_text_idx)
159 Fclose (qd->File_text_idx);
160 return (-1);
161 }
162 return (0);
163
164}
165
166/* --
167 -- GRB: Open index files only; based on open_all_files in MG1.2.1
168 --
169*/
170static int open_index_files(query_data *qd)
171{
172 int ok = 1;
173
174 qd->File_text = NULL;
175 qd->File_fast_comp_dict = NULL;
176 qd->File_comp_dict = NULL;
177 qd->File_aux_dict = NULL;
178
179 qd->File_stem = OpenFile (qd->pathname, INVF_DICT_BLOCKED_SUFFIX,
180 MAGIC_STEM, &ok);
181
182 /* [RPAP - Jan 97: Stem Index Change]
183 These will fail if collection not built with stem indexes */
184 qd->File_stem1 = OpenFile (qd->pathname, INVF_DICT_BLOCKED_1_SUFFIX,
185 MAGIC_STEM_1, NULL);
186 qd->File_stem2 = OpenFile (qd->pathname, INVF_DICT_BLOCKED_2_SUFFIX,
187 MAGIC_STEM_2, NULL);
188 qd->File_stem3 = OpenFile (qd->pathname, INVF_DICT_BLOCKED_3_SUFFIX,
189 MAGIC_STEM_3, NULL);
190
191 qd->File_invf = OpenFile (qd->pathname, INVF_SUFFIX,
192 MAGIC_INVF, &ok);
193
194 /* Ths will fail if a level 1 inverted file was created because there
195 will be no document weights */
196 qd->File_weight_approx = OpenFile (qd->pathname, APPROX_WEIGHTS_SUFFIX,
197 MAGIC_WGHT_APPROX, NULL);
198
199 qd->File_text_idx_wgt = NULL;
200 qd->File_text_idx = NULL;
201
202 if (!ok)
203 { Fclose (qd->File_stem);
204
205 /* [RPAP - Jan 97: Stem Index Change] */
206 if (qd->File_stem1)
207 Fclose (qd->File_stem1);
208 if (qd->File_stem2)
209 Fclose (qd->File_stem2);
210 if (qd->File_stem3)
211 Fclose (qd->File_stem3);
212
213 Fclose (qd->File_invf);
214 if (qd->File_weight_approx)
215 Fclose (qd->File_weight_approx);
216 return (-1);
217 }
218 return (0);
219
220}
221
222static int
223open_all_files (query_data * qd)
224{
225 int ok = 1;
226
227 if (open_index_files(qd) != 0)
228 {
229 return -1;
230 }
231 if (open_text_files(qd) != 0)
232 {
233 return -1;
234 }
235 return 0;
236}
237
238static void
239close_all_files (query_data * qd)
240{ if (qd->File_text)
241 Fclose (qd->File_text);
242 if (qd->File_fast_comp_dict)
243 Fclose (qd->File_fast_comp_dict);
244 if (qd->File_aux_dict)
245 Fclose (qd->File_aux_dict);
246 if (qd->File_comp_dict)
247 Fclose (qd->File_comp_dict);
248 Fclose (qd->File_stem);
249
250 /* [RPAP - Jan 97: Stem Index Change] */
251 if (qd->File_stem1)
252 Fclose (qd->File_stem1);
253 if (qd->File_stem2)
254 Fclose (qd->File_stem2);
255 if (qd->File_stem3)
256 Fclose (qd->File_stem3);
257
258 Fclose (qd->File_invf);
259 if (qd->File_text_idx_wgt)
260 Fclose (qd->File_text_idx_wgt);
261 if (qd->File_weight_approx)
262 Fclose (qd->File_weight_approx);
263 if (qd->File_text_idx)
264 Fclose (qd->File_text_idx);
265}
266
267/* If textname is equal to null then name will be used instead */
268/* [RJM 06/97: text filename] */
269
270/**
271 * The following define and the variable abortvar in the following function are designed
272 * to provide an exception-like recovery system for when an error occurs.
273 */
274#define abort(x) abortvar=x;goto Recovery;
275
276query_data *
277__InitQuerySystem (char *dir, char *name, char *textname, InitQueryTimes * iqt, int allfiles)
278{
279 query_data *qd;
280 char *s;
281 int abortvar;
282
283 if (textname == NULL) textname = name; /* [RJM 06/97: text filename] */
284
285 if (!(qd = new query_data))
286 {
287 abort(0);
288 }
289
290 bzero ((char *) qd, sizeof (*qd));
291
292 /* -- GRB: 13/09/99: note if all files were requested */
293 qd->allfiles = allfiles;
294
295 qd->mem_in_use = qd->max_mem_in_use = 0;
296
297 qd->doc_pos = qd->buf_in_use = 0;
298 qd->TextBufferLen = 0;
299 qd->DL = NULL;
300
301 /* [RPAP - Feb 97: Term Frequency] */
302 qd->TL = NULL;
303 qd->QTL = NULL;
304
305 qd->TextBuffer = NULL;
306
307 qd->tot_hops_taken = 0;
308 qd->tot_num_of_ptrs = 0;
309 qd->tot_num_of_accum = 0;
310 qd->tot_num_of_terms = 0;
311 qd->tot_num_of_ans = 0;
312 qd->tot_text_idx_lookups = 0;
313
314 qd->hops_taken = 0;
315 qd->num_of_ptrs = 0;
316 qd->num_of_accum = 0;
317 qd->num_of_terms = 0;
318 qd->num_of_ans = 0;
319 qd->text_idx_lookups = 0;
320
321 qd->pathname = NULL; /* RJM 06/97: text filename] */
322 qd->textpathname = NULL; /* RJM 06/97: text filename] */
323
324 s = strrchr (dir, '/');
325 if (s && *(s + 1) == '\0')
326 {
327 /* [RJM 06/97: text filename] */
328 if (!(qd->pathname = new char[strlen (dir) + strlen (name) + 1]) ||
329 !(qd->textpathname = new char[strlen (dir) + strlen (textname) + 1]))
330 {
331 abort(1);
332 }
333 sprintf (qd->pathname, "%s%s", dir, name);
334 sprintf (qd->textpathname, "%s%s", dir, textname); /* [RJM 06/97: text filename] */
335 }
336
337 else
338 {
339 /* [RJM 06/97: text filename] */
340 if (!(qd->pathname = new char[strlen (dir) + strlen (name) + 2]) ||
341 !(qd->textpathname = new char[strlen (dir) + strlen (textname) + 2]))
342 {
343 abort(1);
344 }
345/* [RPAP - Feb 97: WIN32 Port] */
346#ifdef __WIN32__
347 if (dir == NULL || dir[0] == '\0') {
348 sprintf (qd->pathname, "%s", name);
349 sprintf (qd->textpathname, "%s", textname); /* [RJM 06/97: text filename] */
350 } else {
351 sprintf (qd->pathname, "%s%s", dir, name);
352 sprintf (qd->textpathname, "%s%s", dir, textname); /* [RJM 06/97: text filename] */
353 }
354#else
355 sprintf (qd->pathname, "%s/%s", dir, name);
356 sprintf (qd->textpathname, "%s/%s", dir, textname); /* [RJM 06/97: text filename] */
357#endif
358 }
359
360 if (((allfiles == 1) && (open_all_files (qd) == -1)) ||
361 ((allfiles == 0) && (open_index_files (qd) == -1)))
362 {
363 abort(2);
364 return (NULL);
365 }
366
367 if (iqt)
368 GetTime (&iqt->Start);
369
370 /* Initialise the stemmed dictionary system */
371 if (!(qd->sd = ReadStemDictBlk (qd->File_stem, _MGErr)))
372 {
373 abort(3);
374 }
375
376 /* [RPAP - Jan 97: Stem Index Change] */
377 if ((qd->sd->sdh.indexed & 7) && qd->File_stem1 && qd->File_stem2 && qd->File_stem3)
378 {
379 if (!(qd->sd->stem1 = ReadStemIdxBlk (qd->File_stem1)))
380 {
381 abort(4);
382 }
383 if (!(qd->sd->stem2 = ReadStemIdxBlk (qd->File_stem2)))
384 {
385 abort(4);
386 }
387 if (!(qd->sd->stem3 = ReadStemIdxBlk (qd->File_stem3)))
388 {
389 abort(4);
390 }
391 }
392 else if (qd->sd->sdh.indexed != 0)
393 {
394 abort(4);
395 }
396 else
397 {
398 if (qd->File_stem1)
399 Fclose (qd->File_stem1);
400 if (qd->File_stem2)
401 Fclose (qd->File_stem2);
402 if (qd->File_stem3)
403 Fclose (qd->File_stem3);
404 qd->File_stem1 = NULL;
405 qd->File_stem2 = NULL;
406 qd->File_stem3 = NULL;
407 qd->sd->stem1 = NULL;
408 qd->sd->stem2 = NULL;
409 qd->sd->stem3 = NULL;
410 }
411
412 if (iqt)
413 GetTime (&iqt->StemDict);
414 if (qd->File_weight_approx)
415 {
416 if (!(qd->awd = LoadDocWeights (qd->File_weight_approx,
417 qd->sd->sdh.num_of_docs)))
418 {
419 abort(4);
420 }
421 }
422 else
423 qd->awd = NULL;
424
425
426 if (iqt)
427 GetTime (&iqt->ApproxWeights);
428
429 if ((allfiles == 1) &&
430 !(qd->cd = LoadCompDict (qd->File_comp_dict, qd->File_aux_dict,
431 qd->File_fast_comp_dict)))
432 {
433 abort(5);
434 }
435 else if (allfiles == 0)
436 {
437 qd->cd = NULL;
438 }
439
440 if (iqt)
441 GetTime (&iqt->CompDict);
442
443 if (!(qd->id = InitInvfFile (qd->File_invf, qd->sd)))
444 {
445 abort(6);
446 }
447 if (((allfiles == 1 && qd->File_text_idx_wgt == NULL) ||
448 qd->File_weight_approx == NULL) &&
449 qd->id->ifh.InvfLevel >= 2)
450 {
451 abort(7);
452 }
453 if (iqt)
454 GetTime (&iqt->Invf);
455
456 if ((allfiles == 1) &&
457 !(qd->td = LoadTextData (qd->File_text, qd->File_text_idx_wgt,
458 qd->File_text_idx)))
459 {
460 abort(8);
461 }
462 else if (allfiles == 0)
463 {
464 qd->td = NULL;
465 }
466
467/* [RPAP - Feb 97: NZDL Additions] */
468#if defined(PARADOCNUM) || defined(NZDL)
469
470/*
471
472This code is based on the TREC_MODE code below to read the .paragraph
473file to determine what document numbers correspond to what paragraphs.
474This code is more space efficient, reading in the .paragraph file
475into memory as an accumulate docnum array. Eg. the .paragraph may contain
476
477 [5 3 6 4 7 9 4]
478
479indicating the first document has 5 paragraphs, the next 3, etc.
480This will be stored in memory as
481
482 [0 5 8 14 18 25 34 38]
483
484so a binary search can be performed. The first 0 is for convenience;
485it prevents testing boundary conditions.
486
487
488The TREC_MODE code does this differently; it stores the array
489
490 [1 1 1 1 1 2 2 2 3 3 3 3 3 3 ....]
491
492allowing directy paragraph to docnum conversion, at the expense
493of memory.
494
495*/
496 qd->paragraph = NULL;
497
498 if ((allfiles == 1) && (qd->id->ifh.InvfLevel == 3))
499 {
500 unsigned long magic;
501 FILE *paragraph;
502 int i;
503 char paraFile[512];
504
505 sprintf(paraFile, "%s%s", qd->pathname, INVF_PARAGRAPH_SUFFIX);
506 paragraph = fopen(paraFile, "rb");
507 if (!paragraph)
508 FatalError(1, "Unable to open 'paraFile'.", paraFile);
509
510 fread((void *)&magic, sizeof(magic), 1, paragraph);
511 qd->paragraph = new int[qd->td->cth.num_of_docs+1];
512 qd->paragraph[0] = 0;
513 for (i = 1; i <= qd->td->cth.num_of_docs; i++)
514 {
515 int count;
516
517 if (fread((void *)&count, sizeof(count), 1, paragraph) != 1)
518 FatalError(1, "Unexpected EOF while reading '%s'.", paraFile);
519 NTOHSI(count); /* [RPAP - Jan 97: Endian Ordering] */
520 qd->paragraph[i] = qd->paragraph[i-1]+count;
521 }
522
523 fclose (paragraph); /* [RJM 07/98: Memory Leak] */
524 }
525
526
527#endif
528
529#ifdef TREC_MODE
530 {
531 extern char *trec_ids;
532 extern long *trec_paras;
533 int size;
534 char FileName[512];
535 FILE *f;
536 if (!strstr (qd->pathname, "trec"))
537 goto error;
538 sprintf (FileName, "%s%s", qd->pathname, ".DOCIDS");
539 if (!(f = fopen (FileName, "rb"))) /* [RPAP - Feb 97: WIN32 Port] */
540 {
541 Message ("Unable to open \"%s\"", FileName);
542 goto error;
543 }
544 fseek (f, 0, 2);
545 size = ftell (f);
546 fseek (f, 0, 0);
547 trec_ids = new char[size];
548 if (!trec_ids)
549 {
550 fclose (f);
551 goto error;
552 }
553 fread (trec_ids, 1, size, f);
554 fclose (f);
555 if ((allfiles == 1) && (qd->id->ifh.InvfLevel == 3))
556 {
557 int i, d;
558 unsigned long magic;
559 trec_paras = new long[qd->sd->sdh.num_of_docs];
560 if (!trec_paras)
561 {
562 delete trec_ids;
563 trec_ids = NULL;
564 goto error;
565 }
566 sprintf (FileName, "%s%s", qd->pathname, INVF_PARAGRAPH_SUFFIX);
567 if (!(f = fopen (FileName, "rb"))) /* [RPAP - Feb 97: WIN32 Port] */
568 {
569 Message ("Unable to open \"%s\"", FileName);
570 goto error;
571 }
572 if (fread ((char *) &magic, sizeof (magic), 1, f) != 1 ||
573 NTOHUL(magic) != MAGIC_PARAGRAPH) /* [RPAP - Jan 97: Endian Ordering] */
574 {
575 fclose (f);
576 Message ("Bad magic number in \"%s\"", FileName);
577 goto error;
578 }
579
580 for (d = i = 0; i < qd->td->cth.num_of_docs; i++)
581 {
582 int count;
583 if (fread ((char *) &count, sizeof (count), 1, f) != 1)
584 {
585 fclose (f);
586 goto error;
587 }
588 NTOHSI(count); /* [RPAP - Jan 97: Endian Ordering] */
589 while (count--)
590 trec_paras[d++] = i;
591 }
592 fclose (f);
593 }
594 goto ok;
595 error:
596 if (trec_ids)
597 delete trec_ids;
598 if (trec_paras)
599 delete (trec_paras);
600 trec_ids = NULL;
601 trec_paras = NULL;
602 ok:
603 ;
604 }
605#endif
606
607 if (iqt)
608 GetTime (&iqt->Text);
609
610 return (qd);
611
612 Recovery:
613 switch (abortvar)
614 {
615 case 8: // NB: 8 indicates a failure after the loading of the inverted file
616 case 7: // NB: 7 indicates a failure during the loading of the inverted file
617 FreeInvfData (qd->id);
618 case 6:
619 if (qd->cd)
620 FreeCompDict (qd->cd);
621 case 5:
622 if (qd->awd)
623 {
624 FreeWeights (qd->awd);
625 }
626 case 4:
627 FreeStemDict (qd->sd);
628 case 3:
629 close_all_files (qd);
630 case 2:
631 delete (qd->textpathname); /* [RJM 06/97: text filename] */
632 case 1:
633 if (qd->pathname)
634 delete (qd->pathname); /* [RJM 06/97: text filename] */
635 delete qd;
636 break;
637 }
638
639 /* Set mg error status for particular failures; NB not every failure results in an
640 * mg error value being set.
641 */
642 if (abortvar < 2)
643 mg_errno = MG_NOMEM;
644 else if (abortvar == 7)
645 mg_errno = MG_INVERSION;
646 return (NULL);
647}
648
649
650
651query_data *
652InitQuerySystem (char *dir, char *name, char *textname, InitQueryTimes * iqt)
653{ return __InitQuerySystem(dir, name, textname, iqt, 1);
654}
655
656// This function initialises the query system without loading the text dictionary etc.
657query_data *
658InitQuerySystemNT (char *dir, char *name, char *textname, InitQueryTimes * iqt)
659{ return __InitQuerySystem(dir, name, textname, iqt, 0);
660}
661
662/*
663 * Change the amount of memory currently in use
664 *
665 */
666void
667ChangeMemInUse (query_data * qd, long delta)
668{
669 qd->mem_in_use += delta;
670 if (qd->mem_in_use > qd->max_mem_in_use)
671 qd->max_mem_in_use = qd->mem_in_use;
672}
673
674
675void
676FinishQuerySystem (query_data * qd)
677{
678/* [RJM 07/98: Memory Leak] */
679#if defined(PARADOCNUM) || defined(NZDL)
680 if (qd->paragraph != NULL) {
681 delete (qd->paragraph);
682 qd->paragraph = NULL;
683 }
684#endif
685
686 if (qd->td != NULL)
687 {
688 FreeTextData (qd->td);
689 }
690 FreeInvfData (qd->id);
691 if (qd->cd)
692 FreeCompDict (qd->cd);
693
694 if (qd->awd)
695 FreeWeights (qd->awd);
696 FreeStemDict (qd->sd);
697 close_all_files (qd);
698 delete (qd->textpathname); /* [RJM 06/97: text filename] */
699 delete (qd->pathname);
700 QueryData_FreeQueryDocs (qd);
701 if (qd->TL != NULL) TermList_destroy(&qd->TL); /* [RJM 07/98: Memory Leak] */
702 if (qd->QTL != NULL) QueryTermList_free(&qd->QTL); /* [RJM 07/98: Memory Leak] */
703 delete qd;
704
705 /* other global stuff hanging around */
706 MgErrorDeinit ();
707}
708
709
710void
711ResetFileStats (query_data * qd)
712{
713 if (qd->File_text)
714 ZeroFileStats (qd->File_text);
715 if (qd->File_comp_dict)
716 ZeroFileStats (qd->File_comp_dict);
717 if (qd->File_fast_comp_dict)
718 ZeroFileStats (qd->File_fast_comp_dict);
719 ZeroFileStats (qd->File_stem);
720
721 /* [RPAP - Jan 97: Stem Index Change] */
722 if (qd->File_stem1)
723 ZeroFileStats (qd->File_stem1);
724 if (qd->File_stem2)
725 ZeroFileStats (qd->File_stem2);
726 if (qd->File_stem3)
727 ZeroFileStats (qd->File_stem3);
728
729 ZeroFileStats (qd->File_invf);
730 if (qd->File_text_idx_wgt)
731 ZeroFileStats (qd->File_text_idx_wgt);
732 if (qd->File_weight_approx)
733 ZeroFileStats (qd->File_weight_approx);
734 if (qd->File_text_idx)
735 ZeroFileStats (qd->File_text_idx);
736}
737
738
739void
740TransFileStats (query_data * qd)
741{
742 qd->File_text->Current = qd->File_text->Cumulative;
743 if (qd->File_comp_dict)
744 qd->File_comp_dict->Current = qd->File_comp_dict->Cumulative;
745 if (qd->File_fast_comp_dict)
746 qd->File_fast_comp_dict->Current = qd->File_fast_comp_dict->Cumulative;
747 qd->File_stem->Current = qd->File_stem->Cumulative;
748
749 /* [RPAP - Jan 97: Stem Index Change] */
750 if (qd->File_stem1)
751 qd->File_stem1->Current = qd->File_stem1->Cumulative;
752 if (qd->File_stem2)
753 qd->File_stem2->Current = qd->File_stem2->Cumulative;
754 if (qd->File_stem3)
755 qd->File_stem3->Current = qd->File_stem3->Cumulative;
756
757 qd->File_invf->Current = qd->File_invf->Cumulative;
758 if (qd->File_text_idx_wgt)
759 qd->File_text_idx_wgt->Current = qd->File_text_idx_wgt->Cumulative;
760 if (qd->File_weight_approx)
761 qd->File_weight_approx->Current = qd->File_weight_approx->Cumulative;
762 if (qd->File_text_idx)
763 qd->File_text_idx->Current = qd->File_text_idx->Cumulative;
764}
765
766
767/**
768 * Block: Query Document handling;
769 * Provides basic loading/unloading facilities for individual
770 * documents, usually for presentation purposes (searches
771 * are performed on inverted files
772 */
773
774void
775QueryData_FreeTextBuffer (query_data * qd)
776{
777 if (qd->TextBuffer)
778 {
779 delete qd->TextBuffer;
780 ChangeMemInUse (qd, -qd->TextBufferLen);
781 }
782 qd->TextBuffer = NULL;
783 qd->TextBufferLen = 0;
784}
785
786void
787QueryData_FreeQueryDocs (query_data * qd)
788{
789 qd->doc_pos = 0;
790 qd->buf_in_use = 0;
791 if (qd->DL)
792 {
793 //int i;
794 DocList_FreeTextBuffers(qd->DL, qd);
795 /*
796 for (i = 0; i < qd->DL->size(); i++)
797 DocEntry_FreeTextBuffer(&qd->DL->DE[i], qd);
798
799 if (qd->DL->DE[i].CompTextBuffer)
800 {
801 delete qd->DL->DE[i].CompTextBuffer;
802 qd->DL->DE[i].CompTextBuffer = NULL;
803 ChangeMemInUse (qd, -qd->DL->DE[i].Len);
804 }
805 */
806 DocList_destroy(qd->DL);
807 }
808 qd->DL = NULL;
809 QueryData_FreeTextBuffer (qd);
810}
811
812int
813LoadCompressedText (query_data * qd, int max_mem)
814{
815 DocEntry *DE;
816
817 if (qd->DL == NULL || qd->doc_pos >= qd->DL->size())
818 return -1;
819
820 /**
821 * GRB: added test for td (text data) per addition of loading
822 * indexes only; td may now be NULL; this returns a
823 * special value to indicated that the text was not
824 * loaded; mgquery etc now test on that value
825 * 17/09/99
826 */
827 if (qd->td == NULL)
828 return -2;
829
830 DE = qd->DL->member(qd->doc_pos);
831
832 if (!DocEntry_TextLoaded(DE))
833 {
834 //int i;
835 //DocEntry *de;
836
837 DocList_FreeTextBuffers(qd->DL, qd);
838 /*
839 for (i = 0, de = qd->DL->DE; i < qd->DL->size(); i++, de++)
840 DocEntry_FreeTextBuffer(de);
841 */
842
843 if (LoadBuffers (qd, qd->DL->member(qd->doc_pos), max_mem,
844 qd->DL->size() - qd->doc_pos) == -1)
845 return -1;
846 }
847 return 0;
848}
849
850
851int
852GetDocNum (query_data * qd)
853{
854 if (qd->DL == NULL || qd->doc_pos >= qd->DL->size())
855 return -1;
856 return qd->DL->member(qd->doc_pos)->docNum();
857}
858
859DocEntry *
860GetDocChain (query_data * qd)
861{
862 if (qd->DL == NULL || qd->doc_pos >= qd->DL->size())
863 return NULL;
864 return qd->DL->member(qd->doc_pos);
865}
866
867float
868GetDocWeight (query_data * qd)
869{
870 if (qd->DL == NULL || qd->doc_pos >= qd->DL->size())
871 return -1;
872 return qd->DL->member(qd->doc_pos)->docWeight();
873}
874
875long
876GetDocCompLength (query_data * qd)
877{
878 if (qd->DL == NULL || qd->doc_pos >= qd->DL->size())
879 return -1;
880 return qd->DL->DE[qd->doc_pos].Len;
881}
882
883
884u_char *
885GetDocText (query_data * qd, unsigned long *len)
886{
887 DocEntry *DE;
888 int ULen;
889 if (qd->DL == NULL || qd->doc_pos >= qd->DL->size())
890 return NULL;
891
892 DE = qd->DL->member(qd->doc_pos);
893
894 if (!DocEntry_TextLoaded(DE))
895 {
896 fprintf (stderr, "The compressed text buffer is NULL\n");
897 mg_errno = MG_NOMEM;
898 return (NULL);
899 }
900
901 QueryData_FreeTextBuffer (qd);
902
903 qd->TextBufferLen = (int) (qd->td->cth.ratio * 1.01 *
904 DocEntry_length(DE)) + 100;
905 if (!(qd->TextBuffer = new unsigned char[qd->TextBufferLen]))
906 {
907 fprintf (stderr, "No memory for TextBuffer\n");
908 mg_errno = MG_NOMEM;
909 return (NULL);
910 }
911
912 DecodeText (qd->cd, (u_char *) DocEntry_TextBuffer(DE), DE->Len,
913 (u_char *) (qd->TextBuffer), &ULen);
914 qd->TextBuffer[ULen] = '\0';
915
916 if (ULen >= qd->TextBufferLen)
917 {
918 fprintf (stderr, "%d >= %d\n", ULen, qd->TextBufferLen);
919 mg_errno = MG_BUFTOOSMALL;
920 return (NULL);
921 }
922
923 if (len)
924 *len = ULen;
925
926 return qd->TextBuffer;
927}
928
929int
930NextDoc (query_data * qd)
931{
932 if (qd->DL == NULL || qd->doc_pos >= qd->DL->size())
933 return 0;
934 qd->doc_pos++;
935 return qd->doc_pos < qd->DL->size();
936}
937
938
Note: See TracBrowser for help on using the repository browser.