[3745] | 1 | /**************************************************************************
|
---|
| 2 | *
|
---|
| 3 | * backend.c -- Underlying routines for mgquery
|
---|
| 4 | * Copyright (C) 1994 Neil Sharman
|
---|
| 5 | *
|
---|
| 6 | * This program is free software; you can redistribute it and/or modify
|
---|
| 7 | * it under the terms of the GNU General Public License as published by
|
---|
| 8 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 9 | * (at your option) any later version.
|
---|
| 10 | *
|
---|
| 11 | * This program is distributed in the hope that it will be useful,
|
---|
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 14 | * GNU General Public License for more details.
|
---|
| 15 | *
|
---|
| 16 | * You should have received a copy of the GNU General Public License
|
---|
| 17 | * along with this program; if not, write to the Free Software
|
---|
| 18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 19 | *
|
---|
| 20 | * $Id: backend.c 23508 2010-12-17 01:04:10Z sjm84 $
|
---|
| 21 | *
|
---|
| 22 | **************************************************************************/
|
---|
| 23 |
|
---|
| 24 | #include "sysfuncs.h"
|
---|
| 25 |
|
---|
| 26 | #include "memlib.h"
|
---|
| 27 | #include "messages.h"
|
---|
| 28 | #include "timing.h"
|
---|
| 29 | #include "filestats.h"
|
---|
| 30 | #include "sptree.h"
|
---|
| 31 | #include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 32 |
|
---|
| 33 |
|
---|
| 34 | #include "mg_files.h"
|
---|
| 35 | #include "mg.h"
|
---|
| 36 | #include "invf.h"
|
---|
| 37 | #include "text.h"
|
---|
| 38 | #include "lists.h"
|
---|
| 39 | #include "backend.h"
|
---|
| 40 | #include "stem_search.h"
|
---|
| 41 | #include "invf_get.h"
|
---|
| 42 | #include "text_get.h"
|
---|
| 43 | #include "weights.h"
|
---|
| 44 | #include "locallib.h"
|
---|
| 45 | #include "mg_errors.h"
|
---|
| 46 |
|
---|
| 47 |
|
---|
| 48 | static File *
|
---|
[23508] | 49 | OpenFile (char *base, char *suffix, mg_u_long magic, int *ok)
|
---|
[3745] | 50 | {
|
---|
| 51 | char FileName[512];
|
---|
| 52 | File *F;
|
---|
| 53 | sprintf (FileName, "%s%s", base, suffix);
|
---|
| 54 | if (!(F = Fopen (FileName, "rb", 0))) /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 55 | {
|
---|
| 56 | mg_errno = MG_NOFILE;
|
---|
| 57 | MgErrorData (FileName);
|
---|
| 58 | if (ok)
|
---|
| 59 | *ok = 0;
|
---|
| 60 | return (NULL);
|
---|
| 61 | }
|
---|
| 62 | if (magic)
|
---|
| 63 | {
|
---|
[23508] | 64 | mg_u_long m;
|
---|
[3745] | 65 | if (fread ((char *) &m, sizeof (m), 1, F->f) == 0)
|
---|
| 66 | {
|
---|
| 67 | mg_errno = MG_READERR;
|
---|
| 68 | MgErrorData (FileName);
|
---|
| 69 | if (ok)
|
---|
| 70 | *ok = 0;
|
---|
| 71 | Fclose (F);
|
---|
| 72 | return (NULL);
|
---|
| 73 | }
|
---|
| 74 | NTOHUL(m); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 75 | if (m != magic)
|
---|
| 76 | {
|
---|
| 77 | mg_errno = MG_BADMAGIC;
|
---|
| 78 | MgErrorData (FileName);
|
---|
| 79 | if (ok)
|
---|
| 80 | *ok = 0;
|
---|
| 81 | Fclose (F);
|
---|
| 82 | return (NULL);
|
---|
| 83 | }
|
---|
| 84 | }
|
---|
| 85 | return (F);
|
---|
| 86 | }
|
---|
| 87 |
|
---|
| 88 |
|
---|
| 89 | static int
|
---|
| 90 | open_all_files (query_data * qd)
|
---|
| 91 | {
|
---|
| 92 | int ok = 1;
|
---|
| 93 |
|
---|
| 94 | qd->File_text = OpenFile (qd->textpathname, TEXT_SUFFIX, /* [RJM 06/97: text filename] */
|
---|
| 95 | MAGIC_TEXT, &ok);
|
---|
| 96 | qd->File_fast_comp_dict = OpenFile (qd->textpathname, /* [RJM 06/97: text filename] */
|
---|
| 97 | TEXT_DICT_FAST_SUFFIX, MAGIC_FAST_DICT, NULL);
|
---|
| 98 | if (!qd->File_fast_comp_dict)
|
---|
| 99 | {
|
---|
| 100 | qd->File_comp_dict = OpenFile (qd->textpathname, /* [RJM 06/97: text filename] */
|
---|
| 101 | TEXT_DICT_SUFFIX, MAGIC_DICT, &ok);
|
---|
| 102 | qd->File_aux_dict = OpenFile (qd->textpathname, /* [RJM 06/97: text filename] */
|
---|
| 103 | TEXT_DICT_AUX_SUFFIX, MAGIC_AUX_DICT, NULL);
|
---|
| 104 | }
|
---|
| 105 | else
|
---|
| 106 | qd->File_comp_dict = qd->File_aux_dict = NULL;
|
---|
| 107 |
|
---|
| 108 | qd->File_stem = OpenFile (qd->pathname, INVF_DICT_BLOCKED_SUFFIX,
|
---|
| 109 | MAGIC_STEM, &ok);
|
---|
| 110 |
|
---|
| 111 | /* [RPAP - Jan 97: Stem Index Change]
|
---|
| 112 | These will fail if collection not built with stem indexes */
|
---|
| 113 | qd->File_stem1 = OpenFile (qd->pathname, INVF_DICT_BLOCKED_1_SUFFIX,
|
---|
| 114 | MAGIC_STEM_1, NULL);
|
---|
| 115 | qd->File_stem2 = OpenFile (qd->pathname, INVF_DICT_BLOCKED_2_SUFFIX,
|
---|
| 116 | MAGIC_STEM_2, NULL);
|
---|
| 117 | qd->File_stem3 = OpenFile (qd->pathname, INVF_DICT_BLOCKED_3_SUFFIX,
|
---|
| 118 | MAGIC_STEM_3, NULL);
|
---|
| 119 |
|
---|
| 120 | qd->File_invf = OpenFile (qd->pathname, INVF_SUFFIX,
|
---|
| 121 | MAGIC_INVF, &ok);
|
---|
| 122 |
|
---|
| 123 | /* These will fail if a level 1 inverted file was created because there
|
---|
| 124 | will be no document weights */
|
---|
| 125 | qd->File_text_idx_wgt = OpenFile (qd->pathname, TEXT_IDX_WGT_SUFFIX,
|
---|
| 126 | MAGIC_TEXI_WGT, NULL);
|
---|
| 127 | qd->File_weight_approx = OpenFile (qd->pathname, APPROX_WEIGHTS_SUFFIX,
|
---|
| 128 | MAGIC_WGHT_APPROX, NULL);
|
---|
| 129 | if (qd->File_text_idx_wgt == NULL && qd->File_weight_approx == NULL)
|
---|
| 130 | qd->File_text_idx = OpenFile (qd->textpathname, /* [RJM 06/97: text filename] */
|
---|
| 131 | TEXT_IDX_SUFFIX, MAGIC_TEXI, NULL);
|
---|
| 132 | else
|
---|
| 133 | qd->File_text_idx = NULL;
|
---|
| 134 |
|
---|
| 135 |
|
---|
| 136 | if (!ok)
|
---|
| 137 | {
|
---|
| 138 | Fclose (qd->File_text);
|
---|
| 139 | if (qd->File_fast_comp_dict)
|
---|
| 140 | Fclose (qd->File_fast_comp_dict);
|
---|
| 141 | if (qd->File_comp_dict)
|
---|
| 142 | Fclose (qd->File_comp_dict);
|
---|
| 143 | Fclose (qd->File_stem);
|
---|
| 144 |
|
---|
| 145 | /* [RPAP - Jan 97: Stem Index Change] */
|
---|
| 146 | if (qd->File_stem1)
|
---|
| 147 | Fclose (qd->File_stem1);
|
---|
| 148 | if (qd->File_stem2)
|
---|
| 149 | Fclose (qd->File_stem2);
|
---|
| 150 | if (qd->File_stem3)
|
---|
| 151 | Fclose (qd->File_stem3);
|
---|
| 152 |
|
---|
| 153 | Fclose (qd->File_invf);
|
---|
| 154 | if (qd->File_text_idx_wgt)
|
---|
| 155 | Fclose (qd->File_text_idx_wgt);
|
---|
| 156 | if (qd->File_weight_approx)
|
---|
| 157 | Fclose (qd->File_weight_approx);
|
---|
| 158 | if (qd->File_text_idx)
|
---|
| 159 | Fclose (qd->File_text_idx);
|
---|
| 160 | return (-1);
|
---|
| 161 | }
|
---|
| 162 | return (0);
|
---|
| 163 |
|
---|
| 164 | }
|
---|
| 165 |
|
---|
| 166 | static void
|
---|
| 167 | close_all_files (query_data * qd)
|
---|
| 168 | {
|
---|
| 169 | Fclose (qd->File_text);
|
---|
| 170 | if (qd->File_fast_comp_dict)
|
---|
| 171 | Fclose (qd->File_fast_comp_dict);
|
---|
| 172 | if (qd->File_aux_dict)
|
---|
| 173 | Fclose (qd->File_aux_dict);
|
---|
| 174 | if (qd->File_comp_dict)
|
---|
| 175 | Fclose (qd->File_comp_dict);
|
---|
| 176 | Fclose (qd->File_stem);
|
---|
| 177 |
|
---|
| 178 | /* [RPAP - Jan 97: Stem Index Change] */
|
---|
| 179 | if (qd->File_stem1)
|
---|
| 180 | Fclose (qd->File_stem1);
|
---|
| 181 | if (qd->File_stem2)
|
---|
| 182 | Fclose (qd->File_stem2);
|
---|
| 183 | if (qd->File_stem3)
|
---|
| 184 | Fclose (qd->File_stem3);
|
---|
| 185 |
|
---|
| 186 | Fclose (qd->File_invf);
|
---|
| 187 | if (qd->File_text_idx_wgt)
|
---|
| 188 | Fclose (qd->File_text_idx_wgt);
|
---|
| 189 | if (qd->File_weight_approx)
|
---|
| 190 | Fclose (qd->File_weight_approx);
|
---|
| 191 | if (qd->File_text_idx)
|
---|
| 192 | Fclose (qd->File_text_idx);
|
---|
| 193 | }
|
---|
| 194 |
|
---|
| 195 | /* If textname is equal to null then name will be used instead */
|
---|
| 196 | /* [RJM 06/97: text filename] */
|
---|
| 197 | query_data *
|
---|
| 198 | InitQuerySystem (char *dir, char *name, char *textname, InitQueryTimes * iqt)
|
---|
| 199 | {
|
---|
| 200 | query_data *qd;
|
---|
| 201 | char *s;
|
---|
| 202 |
|
---|
| 203 | if (textname == NULL) textname = name; /* [RJM 06/97: text filename] */
|
---|
| 204 |
|
---|
| 205 | if (!(qd = Xmalloc (sizeof (query_data))))
|
---|
| 206 | {
|
---|
| 207 | mg_errno = MG_NOMEM;
|
---|
| 208 | return (NULL);
|
---|
| 209 | }
|
---|
| 210 |
|
---|
| 211 | bzero ((char *) qd, sizeof (*qd));
|
---|
| 212 |
|
---|
| 213 | qd->mem_in_use = qd->max_mem_in_use = 0;
|
---|
| 214 |
|
---|
| 215 | qd->doc_pos = qd->buf_in_use = 0;
|
---|
| 216 | qd->TextBufferLen = 0;
|
---|
| 217 | qd->DL = NULL;
|
---|
| 218 |
|
---|
| 219 | /* [RPAP - Feb 97: Term Frequency] */
|
---|
| 220 | qd->TL = NULL;
|
---|
| 221 | qd->QTL = NULL;
|
---|
| 222 |
|
---|
| 223 | qd->TextBuffer = NULL;
|
---|
| 224 |
|
---|
| 225 | qd->tot_hops_taken = 0;
|
---|
| 226 | qd->tot_num_of_ptrs = 0;
|
---|
| 227 | qd->tot_num_of_accum = 0;
|
---|
| 228 | qd->tot_num_of_terms = 0;
|
---|
| 229 | qd->tot_num_of_ans = 0;
|
---|
| 230 | qd->tot_text_idx_lookups = 0;
|
---|
| 231 |
|
---|
| 232 | qd->hops_taken = 0;
|
---|
| 233 | qd->num_of_ptrs = 0;
|
---|
| 234 | qd->num_of_accum = 0;
|
---|
| 235 | qd->num_of_terms = 0;
|
---|
| 236 | qd->num_of_ans = 0;
|
---|
| 237 | qd->text_idx_lookups = 0;
|
---|
| 238 |
|
---|
| 239 | qd->pathname = NULL; /* RJM 06/97: text filename] */
|
---|
| 240 | qd->textpathname = NULL; /* RJM 06/97: text filename] */
|
---|
| 241 |
|
---|
| 242 | s = strrchr (dir, '/');
|
---|
| 243 | if (s && *(s + 1) == '\0')
|
---|
| 244 | {
|
---|
| 245 | /* [RJM 06/97: text filename] */
|
---|
| 246 | if (!(qd->pathname = Xmalloc (strlen (dir) + strlen (name) + 1)) ||
|
---|
| 247 | !(qd->textpathname = Xmalloc (strlen (dir) + strlen (textname) + 1)))
|
---|
| 248 | {
|
---|
| 249 | mg_errno = MG_NOMEM;
|
---|
| 250 | if (qd->pathname) Xfree (qd->pathname); /* [RJM 06/97: text filename] */
|
---|
| 251 | Xfree (qd);
|
---|
| 252 | return (NULL);
|
---|
| 253 | }
|
---|
| 254 | sprintf (qd->pathname, "%s%s", dir, name);
|
---|
| 255 | sprintf (qd->textpathname, "%s%s", dir, textname); /* [RJM 06/97: text filename] */
|
---|
| 256 | }
|
---|
| 257 |
|
---|
| 258 | else
|
---|
| 259 | {
|
---|
| 260 | /* [RJM 06/97: text filename] */
|
---|
| 261 | if (!(qd->pathname = Xmalloc (strlen (dir) + strlen (name) + 2)) ||
|
---|
| 262 | !(qd->textpathname = Xmalloc (strlen (dir) + strlen (textname) + 2)))
|
---|
| 263 | {
|
---|
| 264 | mg_errno = MG_NOMEM;
|
---|
| 265 | if (qd->pathname) Xfree (qd->pathname); /* [RJM 06/97: text filename] */
|
---|
| 266 | Xfree (qd);
|
---|
| 267 | return (NULL);
|
---|
| 268 | }
|
---|
| 269 | /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 270 | #ifdef __WIN32__
|
---|
| 271 | if (dir == NULL || dir[0] == '\0') {
|
---|
| 272 | sprintf (qd->pathname, "%s", name);
|
---|
| 273 | sprintf (qd->textpathname, "%s", textname); /* [RJM 06/97: text filename] */
|
---|
| 274 | } else {
|
---|
| 275 | sprintf (qd->pathname, "%s%s", dir, name);
|
---|
| 276 | sprintf (qd->textpathname, "%s%s", dir, textname); /* [RJM 06/97: text filename] */
|
---|
| 277 | }
|
---|
| 278 | #else
|
---|
| 279 | sprintf (qd->pathname, "%s/%s", dir, name);
|
---|
| 280 | sprintf (qd->textpathname, "%s/%s", dir, textname); /* [RJM 06/97: text filename] */
|
---|
| 281 | #endif
|
---|
| 282 | }
|
---|
| 283 |
|
---|
| 284 | if (open_all_files (qd) == -1)
|
---|
| 285 | {
|
---|
| 286 | Xfree (qd->pathname);
|
---|
| 287 | Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
|
---|
| 288 | Xfree (qd);
|
---|
| 289 | return (NULL);
|
---|
| 290 | }
|
---|
| 291 |
|
---|
| 292 | if (iqt)
|
---|
| 293 | GetTime (&iqt->Start);
|
---|
| 294 |
|
---|
| 295 | /* Initialise the stemmed dictionary system */
|
---|
| 296 | if (!(qd->sd = ReadStemDictBlk (qd->File_stem)))
|
---|
| 297 | {
|
---|
| 298 | close_all_files (qd);
|
---|
| 299 | Xfree (qd->pathname);
|
---|
| 300 | Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
|
---|
| 301 | Xfree (qd);
|
---|
| 302 | return (NULL);
|
---|
| 303 | }
|
---|
| 304 |
|
---|
| 305 | /* [RPAP - Jan 97: Stem Index Change] */
|
---|
| 306 | if ((qd->sd->sdh.indexed & 7) && qd->File_stem1 && qd->File_stem2 && qd->File_stem3)
|
---|
| 307 | {
|
---|
| 308 | if (!(qd->sd->stem1 = ReadStemIdxBlk (qd->File_stem1)))
|
---|
| 309 | {
|
---|
| 310 | FreeStemDict (qd->sd);
|
---|
| 311 | close_all_files (qd);
|
---|
| 312 | Xfree (qd->pathname);
|
---|
| 313 | Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
|
---|
| 314 | Xfree (qd);
|
---|
| 315 | return (NULL);
|
---|
| 316 | }
|
---|
| 317 | if (!(qd->sd->stem2 = ReadStemIdxBlk (qd->File_stem2)))
|
---|
| 318 | {
|
---|
| 319 | FreeStemDict (qd->sd);
|
---|
| 320 | close_all_files (qd);
|
---|
| 321 | Xfree (qd->pathname);
|
---|
| 322 | Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
|
---|
| 323 | Xfree (qd);
|
---|
| 324 | return (NULL);
|
---|
| 325 | }
|
---|
| 326 | if (!(qd->sd->stem3 = ReadStemIdxBlk (qd->File_stem3)))
|
---|
| 327 | {
|
---|
| 328 | FreeStemDict (qd->sd);
|
---|
| 329 | close_all_files (qd);
|
---|
| 330 | Xfree (qd->pathname);
|
---|
| 331 | Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
|
---|
| 332 | Xfree (qd);
|
---|
| 333 | return (NULL);
|
---|
| 334 | }
|
---|
| 335 | }
|
---|
| 336 | else if (qd->sd->sdh.indexed != 0)
|
---|
| 337 | {
|
---|
| 338 | FreeStemDict (qd->sd);
|
---|
| 339 | close_all_files (qd);
|
---|
| 340 | Xfree (qd->pathname);
|
---|
| 341 | Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
|
---|
| 342 | Xfree (qd);
|
---|
| 343 | return (NULL);
|
---|
| 344 | }
|
---|
| 345 | else
|
---|
| 346 | {
|
---|
| 347 | if (qd->File_stem1)
|
---|
| 348 | Fclose (qd->File_stem1);
|
---|
| 349 | if (qd->File_stem2)
|
---|
| 350 | Fclose (qd->File_stem2);
|
---|
| 351 | if (qd->File_stem3)
|
---|
| 352 | Fclose (qd->File_stem3);
|
---|
| 353 | qd->File_stem1 = NULL;
|
---|
| 354 | qd->File_stem2 = NULL;
|
---|
| 355 | qd->File_stem3 = NULL;
|
---|
| 356 | qd->sd->stem1 = NULL;
|
---|
| 357 | qd->sd->stem2 = NULL;
|
---|
| 358 | qd->sd->stem3 = NULL;
|
---|
| 359 | }
|
---|
| 360 |
|
---|
| 361 | if (iqt)
|
---|
| 362 | GetTime (&iqt->StemDict);
|
---|
| 363 | if (qd->File_weight_approx)
|
---|
| 364 | {
|
---|
| 365 | if (!(qd->awd = LoadDocWeights (qd->File_weight_approx,
|
---|
| 366 | qd->sd->sdh.num_of_docs)))
|
---|
| 367 | {
|
---|
| 368 | FreeStemDict (qd->sd);
|
---|
| 369 | close_all_files (qd);
|
---|
| 370 | Xfree (qd->pathname);
|
---|
| 371 | Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
|
---|
| 372 | Xfree (qd);
|
---|
| 373 | return (NULL);
|
---|
| 374 | }
|
---|
| 375 | }
|
---|
| 376 | else
|
---|
| 377 | qd->awd = NULL;
|
---|
| 378 |
|
---|
| 379 |
|
---|
| 380 | if (iqt)
|
---|
| 381 | GetTime (&iqt->ApproxWeights);
|
---|
| 382 |
|
---|
| 383 | if (!(qd->cd = LoadCompDict (qd->File_comp_dict, qd->File_aux_dict,
|
---|
| 384 | qd->File_fast_comp_dict)))
|
---|
| 385 | {
|
---|
| 386 | if (qd->awd)
|
---|
| 387 | FreeWeights (qd->awd);
|
---|
| 388 | FreeStemDict (qd->sd);
|
---|
| 389 | close_all_files (qd);
|
---|
| 390 | Xfree (qd->pathname);
|
---|
| 391 | Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
|
---|
| 392 | Xfree (qd);
|
---|
| 393 | return (NULL);
|
---|
| 394 | }
|
---|
| 395 |
|
---|
| 396 | if (iqt)
|
---|
| 397 | GetTime (&iqt->CompDict);
|
---|
| 398 |
|
---|
| 399 | if (!(qd->id = InitInvfFile (qd->File_invf, qd->sd)))
|
---|
| 400 | {
|
---|
| 401 | FreeCompDict (qd->cd);
|
---|
| 402 | if (qd->awd)
|
---|
| 403 | FreeWeights (qd->awd);
|
---|
| 404 | FreeStemDict (qd->sd);
|
---|
| 405 | close_all_files (qd);
|
---|
| 406 | Xfree (qd->pathname);
|
---|
| 407 | Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
|
---|
| 408 | Xfree (qd);
|
---|
| 409 | return (NULL);
|
---|
| 410 | }
|
---|
| 411 | if ((qd->File_text_idx_wgt == NULL || qd->File_weight_approx == NULL) &&
|
---|
| 412 | qd->id->ifh.InvfLevel >= 2)
|
---|
| 413 | {
|
---|
| 414 | FreeInvfData (qd->id);
|
---|
| 415 | FreeCompDict (qd->cd);
|
---|
| 416 | if (qd->awd)
|
---|
| 417 | FreeWeights (qd->awd);
|
---|
| 418 | FreeStemDict (qd->sd);
|
---|
| 419 | close_all_files (qd);
|
---|
| 420 | Xfree (qd->pathname);
|
---|
| 421 | Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
|
---|
| 422 | Xfree (qd);
|
---|
| 423 | mg_errno = MG_INVERSION;
|
---|
| 424 | return (NULL);
|
---|
| 425 | }
|
---|
| 426 | if (iqt)
|
---|
| 427 | GetTime (&iqt->Invf);
|
---|
| 428 |
|
---|
| 429 | if (!(qd->td = LoadTextData (qd->File_text, qd->File_text_idx_wgt,
|
---|
| 430 | qd->File_text_idx)))
|
---|
| 431 | {
|
---|
| 432 | FreeInvfData (qd->id);
|
---|
| 433 | FreeCompDict (qd->cd);
|
---|
| 434 | if (qd->awd)
|
---|
| 435 | FreeWeights (qd->awd);
|
---|
| 436 | FreeStemDict (qd->sd);
|
---|
| 437 | close_all_files (qd);
|
---|
| 438 | Xfree (qd->pathname);
|
---|
| 439 | Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
|
---|
| 440 | Xfree (qd);
|
---|
| 441 | return (NULL);
|
---|
| 442 | }
|
---|
| 443 |
|
---|
| 444 | /* [RPAP - Feb 97: NZDL Additions] */
|
---|
| 445 | #if defined(PARADOCNUM) || defined(NZDL)
|
---|
| 446 |
|
---|
| 447 | /*
|
---|
| 448 |
|
---|
| 449 | This code is based on the TREC_MODE code below to read the .paragraph
|
---|
| 450 | file to determine what document numbers correspond to what paragraphs.
|
---|
| 451 | This code is more space efficient, reading in the .paragraph file
|
---|
| 452 | into memory as an accumulate docnum array. Eg. the .paragraph may contain
|
---|
| 453 |
|
---|
| 454 | [5 3 6 4 7 9 4]
|
---|
| 455 |
|
---|
| 456 | indicating the first document has 5 paragraphs, the next 3, etc.
|
---|
| 457 | This will be stored in memory as
|
---|
| 458 |
|
---|
| 459 | [0 5 8 14 18 25 34 38]
|
---|
| 460 |
|
---|
| 461 | so a binary search can be performed. The first 0 is for convenience;
|
---|
| 462 | it prevents testing boundary conditions.
|
---|
| 463 |
|
---|
| 464 |
|
---|
| 465 | The TREC_MODE code does this differently; it stores the array
|
---|
| 466 |
|
---|
| 467 | [1 1 1 1 1 2 2 2 3 3 3 3 3 3 ....]
|
---|
| 468 |
|
---|
| 469 | allowing directy paragraph to docnum conversion, at the expense
|
---|
| 470 | of memory.
|
---|
| 471 |
|
---|
| 472 | */
|
---|
| 473 | qd->paragraph = NULL;
|
---|
| 474 |
|
---|
| 475 | if (qd->id->ifh.InvfLevel == 3)
|
---|
| 476 | {
|
---|
[23508] | 477 | mg_u_long magic;
|
---|
[3745] | 478 | FILE *paragraph;
|
---|
| 479 | int i;
|
---|
| 480 | char paraFile[512];
|
---|
| 481 |
|
---|
| 482 | sprintf(paraFile, "%s%s", qd->pathname, INVF_PARAGRAPH_SUFFIX);
|
---|
| 483 | paragraph = fopen(paraFile, "rb");
|
---|
| 484 | if (!paragraph)
|
---|
| 485 | FatalError(1, "Unable to open 'paraFile'.", paraFile);
|
---|
| 486 |
|
---|
| 487 | fread((void *)&magic, sizeof(magic), 1, paragraph);
|
---|
| 488 | qd->paragraph = Xmalloc((qd->td->cth.num_of_docs+1)*sizeof(int));
|
---|
| 489 | qd->paragraph[0] = 0;
|
---|
| 490 | for (i = 1; i <= qd->td->cth.num_of_docs; i++)
|
---|
| 491 | {
|
---|
| 492 | int count;
|
---|
| 493 |
|
---|
| 494 | if (fread((void *)&count, sizeof(count), 1, paragraph) != 1)
|
---|
| 495 | FatalError(1, "Unexpected EOF while reading '%s'.", paraFile);
|
---|
| 496 | NTOHSI(count); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 497 | qd->paragraph[i] = qd->paragraph[i-1]+count;
|
---|
| 498 | }
|
---|
| 499 |
|
---|
| 500 | fclose (paragraph); /* [RJM 07/98: Memory Leak] */
|
---|
| 501 | }
|
---|
| 502 |
|
---|
| 503 |
|
---|
| 504 | #endif
|
---|
| 505 |
|
---|
| 506 | #ifdef TREC_MODE
|
---|
| 507 | {
|
---|
| 508 | extern char *trec_ids;
|
---|
[23508] | 509 | extern mg_s_long *trec_paras;
|
---|
[3745] | 510 | int size;
|
---|
| 511 | char FileName[512];
|
---|
| 512 | FILE *f;
|
---|
| 513 | if (!strstr (qd->pathname, "trec"))
|
---|
| 514 | goto error;
|
---|
| 515 | sprintf (FileName, "%s%s", qd->pathname, ".DOCIDS");
|
---|
| 516 | if (!(f = fopen (FileName, "rb"))) /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 517 | {
|
---|
| 518 | Message ("Unable to open \"%s\"", FileName);
|
---|
| 519 | goto error;
|
---|
| 520 | }
|
---|
| 521 | fseek (f, 0, 2);
|
---|
| 522 | size = ftell (f);
|
---|
| 523 | fseek (f, 0, 0);
|
---|
| 524 | trec_ids = Xmalloc (size);
|
---|
| 525 | if (!trec_ids)
|
---|
| 526 | {
|
---|
| 527 | fclose (f);
|
---|
| 528 | goto error;
|
---|
| 529 | }
|
---|
| 530 | fread (trec_ids, 1, size, f);
|
---|
| 531 | fclose (f);
|
---|
| 532 | if (qd->id->ifh.InvfLevel == 3)
|
---|
| 533 | {
|
---|
| 534 | int i, d;
|
---|
[23508] | 535 | mg_u_long magic;
|
---|
| 536 | trec_paras = Xmalloc (qd->sd->sdh.num_of_docs * sizeof (mg_s_long));
|
---|
[3745] | 537 | if (!trec_paras)
|
---|
| 538 | {
|
---|
| 539 | Xfree (trec_ids);
|
---|
| 540 | trec_ids = NULL;
|
---|
| 541 | goto error;
|
---|
| 542 | }
|
---|
| 543 | sprintf (FileName, "%s%s", qd->pathname, INVF_PARAGRAPH_SUFFIX);
|
---|
| 544 | if (!(f = fopen (FileName, "rb"))) /* [RPAP - Feb 97: WIN32 Port] */
|
---|
| 545 | {
|
---|
| 546 | Message ("Unable to open \"%s\"", FileName);
|
---|
| 547 | goto error;
|
---|
| 548 | }
|
---|
| 549 | if (fread ((char *) &magic, sizeof (magic), 1, f) != 1 ||
|
---|
| 550 | NTOHUL(magic) != MAGIC_PARAGRAPH) /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 551 | {
|
---|
| 552 | fclose (f);
|
---|
| 553 | Message ("Bad magic number in \"%s\"", FileName);
|
---|
| 554 | goto error;
|
---|
| 555 | }
|
---|
| 556 |
|
---|
| 557 | for (d = i = 0; i < qd->td->cth.num_of_docs; i++)
|
---|
| 558 | {
|
---|
| 559 | int count;
|
---|
| 560 | if (fread ((char *) &count, sizeof (count), 1, f) != 1)
|
---|
| 561 | {
|
---|
| 562 | fclose (f);
|
---|
| 563 | goto error;
|
---|
| 564 | }
|
---|
| 565 | NTOHSI(count); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 566 | while (count--)
|
---|
| 567 | trec_paras[d++] = i;
|
---|
| 568 | }
|
---|
| 569 | fclose (f);
|
---|
| 570 | }
|
---|
| 571 | goto ok;
|
---|
| 572 | error:
|
---|
| 573 | if (trec_ids)
|
---|
| 574 | Xfree (trec_ids);
|
---|
| 575 | if (trec_paras)
|
---|
| 576 | Xfree (trec_paras);
|
---|
| 577 | trec_ids = NULL;
|
---|
| 578 | trec_paras = NULL;
|
---|
| 579 | ok:
|
---|
| 580 | ;
|
---|
| 581 | }
|
---|
| 582 | #endif
|
---|
| 583 |
|
---|
| 584 | if (iqt)
|
---|
| 585 | GetTime (&iqt->Text);
|
---|
| 586 |
|
---|
| 587 | return (qd);
|
---|
| 588 | }
|
---|
| 589 |
|
---|
| 590 |
|
---|
| 591 |
|
---|
| 592 |
|
---|
| 593 |
|
---|
| 594 |
|
---|
| 595 | /*
|
---|
| 596 | * Change the amount of memory currently in use
|
---|
| 597 | *
|
---|
| 598 | */
|
---|
| 599 | void
|
---|
[23508] | 600 | ChangeMemInUse (query_data * qd, mg_s_long delta)
|
---|
[3745] | 601 | {
|
---|
| 602 | qd->mem_in_use += delta;
|
---|
| 603 | if (qd->mem_in_use > qd->max_mem_in_use)
|
---|
| 604 | qd->max_mem_in_use = qd->mem_in_use;
|
---|
| 605 | }
|
---|
| 606 |
|
---|
| 607 |
|
---|
| 608 | void
|
---|
| 609 | FinishQuerySystem (query_data * qd)
|
---|
| 610 | {
|
---|
| 611 | /* [RJM 07/98: Memory Leak] */
|
---|
| 612 | #if defined(PARADOCNUM) || defined(NZDL)
|
---|
| 613 | if (qd->paragraph != NULL) {
|
---|
| 614 | Xfree (qd->paragraph);
|
---|
| 615 | qd->paragraph = NULL;
|
---|
| 616 | }
|
---|
| 617 | #endif
|
---|
| 618 |
|
---|
| 619 | FreeTextData (qd->td);
|
---|
| 620 | FreeInvfData (qd->id);
|
---|
| 621 | FreeCompDict (qd->cd);
|
---|
| 622 | if (qd->awd)
|
---|
| 623 | FreeWeights (qd->awd);
|
---|
| 624 | FreeStemDict (qd->sd);
|
---|
| 625 | close_all_files (qd);
|
---|
| 626 | Xfree (qd->textpathname); /* [RJM 06/97: text filename] */
|
---|
| 627 | Xfree (qd->pathname);
|
---|
| 628 | FreeQueryDocs (qd);
|
---|
| 629 | if (qd->TL != NULL) FreeTermList(&qd->TL); /* [RJM 07/98: Memory Leak] */
|
---|
| 630 | if (qd->QTL != NULL) FreeQueryTermList(&qd->QTL); /* [RJM 07/98: Memory Leak] */
|
---|
| 631 | Xfree (qd);
|
---|
| 632 |
|
---|
| 633 | /* other global stuff hanging around */
|
---|
| 634 | MgErrorDeinit ();
|
---|
| 635 | }
|
---|
| 636 |
|
---|
| 637 |
|
---|
| 638 | void
|
---|
| 639 | ResetFileStats (query_data * qd)
|
---|
| 640 | {
|
---|
| 641 | ZeroFileStats (qd->File_text);
|
---|
| 642 | if (qd->File_comp_dict)
|
---|
| 643 | ZeroFileStats (qd->File_comp_dict);
|
---|
| 644 | if (qd->File_fast_comp_dict)
|
---|
| 645 | ZeroFileStats (qd->File_fast_comp_dict);
|
---|
| 646 | ZeroFileStats (qd->File_stem);
|
---|
| 647 |
|
---|
| 648 | /* [RPAP - Jan 97: Stem Index Change] */
|
---|
| 649 | if (qd->File_stem1)
|
---|
| 650 | ZeroFileStats (qd->File_stem1);
|
---|
| 651 | if (qd->File_stem2)
|
---|
| 652 | ZeroFileStats (qd->File_stem2);
|
---|
| 653 | if (qd->File_stem3)
|
---|
| 654 | ZeroFileStats (qd->File_stem3);
|
---|
| 655 |
|
---|
| 656 | ZeroFileStats (qd->File_invf);
|
---|
| 657 | if (qd->File_text_idx_wgt)
|
---|
| 658 | ZeroFileStats (qd->File_text_idx_wgt);
|
---|
| 659 | if (qd->File_weight_approx)
|
---|
| 660 | ZeroFileStats (qd->File_weight_approx);
|
---|
| 661 | if (qd->File_text_idx)
|
---|
| 662 | ZeroFileStats (qd->File_text_idx);
|
---|
| 663 | }
|
---|
| 664 |
|
---|
| 665 |
|
---|
| 666 | void
|
---|
| 667 | TransFileStats (query_data * qd)
|
---|
| 668 | {
|
---|
| 669 | qd->File_text->Current = qd->File_text->Cumulative;
|
---|
| 670 | if (qd->File_comp_dict)
|
---|
| 671 | qd->File_comp_dict->Current = qd->File_comp_dict->Cumulative;
|
---|
| 672 | if (qd->File_fast_comp_dict)
|
---|
| 673 | qd->File_fast_comp_dict->Current = qd->File_fast_comp_dict->Cumulative;
|
---|
| 674 | qd->File_stem->Current = qd->File_stem->Cumulative;
|
---|
| 675 |
|
---|
| 676 | /* [RPAP - Jan 97: Stem Index Change] */
|
---|
| 677 | if (qd->File_stem1)
|
---|
| 678 | qd->File_stem1->Current = qd->File_stem1->Cumulative;
|
---|
| 679 | if (qd->File_stem2)
|
---|
| 680 | qd->File_stem2->Current = qd->File_stem2->Cumulative;
|
---|
| 681 | if (qd->File_stem3)
|
---|
| 682 | qd->File_stem3->Current = qd->File_stem3->Cumulative;
|
---|
| 683 |
|
---|
| 684 | qd->File_invf->Current = qd->File_invf->Cumulative;
|
---|
| 685 | if (qd->File_text_idx_wgt)
|
---|
| 686 | qd->File_text_idx_wgt->Current = qd->File_text_idx_wgt->Cumulative;
|
---|
| 687 | if (qd->File_weight_approx)
|
---|
| 688 | qd->File_weight_approx->Current = qd->File_weight_approx->Cumulative;
|
---|
| 689 | if (qd->File_text_idx)
|
---|
| 690 | qd->File_text_idx->Current = qd->File_text_idx->Cumulative;
|
---|
| 691 | }
|
---|
| 692 |
|
---|
| 693 |
|
---|
| 694 | void
|
---|
| 695 | FreeTextBuffer (query_data * qd)
|
---|
| 696 | {
|
---|
| 697 | if (qd->TextBuffer)
|
---|
| 698 | {
|
---|
| 699 | Xfree (qd->TextBuffer);
|
---|
| 700 | ChangeMemInUse (qd, -qd->TextBufferLen);
|
---|
| 701 | }
|
---|
| 702 | qd->TextBuffer = NULL;
|
---|
| 703 | qd->TextBufferLen = 0;
|
---|
| 704 | }
|
---|
| 705 |
|
---|
| 706 | void
|
---|
| 707 | FreeQueryDocs (query_data * qd)
|
---|
| 708 | {
|
---|
| 709 | qd->doc_pos = 0;
|
---|
| 710 | qd->buf_in_use = 0;
|
---|
| 711 | if (qd->DL)
|
---|
| 712 | {
|
---|
| 713 | int i;
|
---|
| 714 | for (i = 0; i < qd->DL->num; i++)
|
---|
| 715 | if (qd->DL->DE[i].CompTextBuffer)
|
---|
| 716 | {
|
---|
| 717 | Xfree (qd->DL->DE[i].CompTextBuffer);
|
---|
| 718 | qd->DL->DE[i].CompTextBuffer = NULL;
|
---|
| 719 | ChangeMemInUse (qd, -qd->DL->DE[i].Len);
|
---|
| 720 | }
|
---|
| 721 | Xfree (qd->DL);
|
---|
| 722 | }
|
---|
| 723 | qd->DL = NULL;
|
---|
| 724 | FreeTextBuffer (qd);
|
---|
| 725 | }
|
---|
| 726 |
|
---|
| 727 | int
|
---|
| 728 | LoadCompressedText (query_data * qd, int max_mem)
|
---|
| 729 | {
|
---|
| 730 | DocEntry *DE;
|
---|
| 731 | if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
|
---|
| 732 | return -1;
|
---|
| 733 |
|
---|
| 734 | DE = &qd->DL->DE[qd->doc_pos];
|
---|
| 735 | if (!DE->CompTextBuffer)
|
---|
| 736 | {
|
---|
| 737 | int i;
|
---|
| 738 | DocEntry *de;
|
---|
| 739 | for (i = 0, de = qd->DL->DE; i < qd->DL->num; i++, de++)
|
---|
| 740 | if (de->CompTextBuffer)
|
---|
| 741 | {
|
---|
| 742 | Xfree (de->CompTextBuffer);
|
---|
| 743 | de->CompTextBuffer = NULL;
|
---|
| 744 | ChangeMemInUse (qd, -de->Len);
|
---|
| 745 | }
|
---|
| 746 | if (LoadBuffers (qd, &qd->DL->DE[qd->doc_pos], max_mem,
|
---|
| 747 | qd->DL->num - qd->doc_pos) == -1)
|
---|
| 748 | return -1;
|
---|
| 749 | }
|
---|
| 750 | return 0;
|
---|
| 751 | }
|
---|
| 752 |
|
---|
| 753 | int
|
---|
| 754 | GetDocNum (query_data * qd)
|
---|
| 755 | {
|
---|
| 756 | if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
|
---|
| 757 | return -1;
|
---|
| 758 | return qd->DL->DE[qd->doc_pos].DocNum;
|
---|
| 759 | }
|
---|
| 760 |
|
---|
| 761 | DocEntry *
|
---|
| 762 | GetDocChain (query_data * qd)
|
---|
| 763 | {
|
---|
| 764 | if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
|
---|
| 765 | return NULL;
|
---|
| 766 | return &(qd->DL->DE[qd->doc_pos]);
|
---|
| 767 | }
|
---|
| 768 |
|
---|
| 769 | float
|
---|
| 770 | GetDocWeight (query_data * qd)
|
---|
| 771 | {
|
---|
| 772 | if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
|
---|
| 773 | return -1;
|
---|
| 774 | return qd->DL->DE[qd->doc_pos].Weight;
|
---|
| 775 | }
|
---|
| 776 |
|
---|
[23508] | 777 | mg_s_long
|
---|
[3745] | 778 | GetDocCompLength (query_data * qd)
|
---|
| 779 | {
|
---|
| 780 | if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
|
---|
| 781 | return -1;
|
---|
| 782 | return qd->DL->DE[qd->doc_pos].Len;
|
---|
| 783 | }
|
---|
| 784 |
|
---|
| 785 |
|
---|
| 786 | u_char *
|
---|
[23508] | 787 | GetDocText (query_data * qd, mg_u_long *len)
|
---|
[3745] | 788 | {
|
---|
| 789 | DocEntry *DE;
|
---|
| 790 | int ULen;
|
---|
| 791 | if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
|
---|
| 792 | return NULL;
|
---|
| 793 |
|
---|
| 794 | DE = &qd->DL->DE[qd->doc_pos];
|
---|
| 795 |
|
---|
| 796 | if (!DE->CompTextBuffer)
|
---|
| 797 | {
|
---|
| 798 | fprintf (stderr, "The compressed text buffer is NULL\n");
|
---|
| 799 | mg_errno = MG_NOMEM;
|
---|
| 800 | return (NULL);
|
---|
| 801 | }
|
---|
| 802 |
|
---|
| 803 | FreeTextBuffer (qd);
|
---|
| 804 |
|
---|
| 805 | qd->TextBufferLen = (int) (qd->td->cth.ratio * 1.01 *
|
---|
| 806 | DE->Len) + 100;
|
---|
| 807 | if (!(qd->TextBuffer = Xmalloc (qd->TextBufferLen)))
|
---|
| 808 | {
|
---|
| 809 | fprintf (stderr, "No memory for TextBuffer\n");
|
---|
| 810 | mg_errno = MG_NOMEM;
|
---|
| 811 | return (NULL);
|
---|
| 812 | }
|
---|
| 813 |
|
---|
| 814 | DecodeText (qd->cd, (u_char *) (DE->CompTextBuffer), DE->Len,
|
---|
| 815 | (u_char *) (qd->TextBuffer), &ULen);
|
---|
| 816 | qd->TextBuffer[ULen] = '\0';
|
---|
| 817 |
|
---|
| 818 | if (ULen >= qd->TextBufferLen)
|
---|
| 819 | {
|
---|
| 820 | fprintf (stderr, "%d >= %d\n", ULen, qd->TextBufferLen);
|
---|
| 821 | mg_errno = MG_BUFTOOSMALL;
|
---|
| 822 | return (NULL);
|
---|
| 823 | }
|
---|
| 824 |
|
---|
| 825 | if (len)
|
---|
| 826 | *len = ULen;
|
---|
| 827 |
|
---|
| 828 | return qd->TextBuffer;
|
---|
| 829 | }
|
---|
| 830 |
|
---|
| 831 | int
|
---|
| 832 | NextDoc (query_data * qd)
|
---|
| 833 | {
|
---|
| 834 | if (qd->DL == NULL || qd->doc_pos >= qd->DL->num)
|
---|
| 835 | return 0;
|
---|
| 836 | qd->doc_pos++;
|
---|
| 837 | return qd->doc_pos < qd->DL->num;
|
---|
| 838 | }
|
---|