source: trunk/gsdl/packages/mg-1.3d/src/text/stem_search.c@ 30

Last change on this file since 30 was 13, checked in by rjmcnab, 26 years ago

* empty log message *

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 19.5 KB
Line 
1/**************************************************************************
2 *
3 * stem_search.c -- Functions for searching the blocked stemmed dictionary
4 * Copyright (C) 1994 Neil Sharman
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: stem_search.c 13 1998-11-17 09:36:00Z rjmcnab $
21 *
22 **************************************************************************/
23
24#include "sysfuncs.h"
25
26#include "memlib.h"
27#include "messages.h"
28#include "filestats.h"
29#include "timing.h"
30#include "local_strings.h"
31#include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
32
33#include "mg.h"
34#include "invf.h"
35#include "text.h"
36#include "lists.h"
37#include "backend.h"
38#include "words.h"
39#include "locallib.h"
40#include "stem_search.h"
41#include "mg_errors.h"
42#include "term_lists.h"
43#include "stemmer.h"
44
45
46/*
47 $Log$
48 Revision 1.1 1998/11/17 09:35:39 rjmcnab
49 *** empty log message ***
50
51 * Revision 1.3 1994/10/20 03:57:04 tes
52 * I have rewritten the boolean query optimiser and abstracted out the
53 * components of the boolean query.
54 *
55 * Revision 1.2 1994/09/20 04:42:08 tes
56 * For version 1.1
57 *
58 */
59
60static char *RCSID = "$Id: stem_search.c 13 1998-11-17 09:36:00Z rjmcnab $";
61
62
63stemmed_dict *
64ReadStemDictBlk (File * stem_file)
65{
66 unsigned long i;
67 stemmed_dict *sd;
68 u_char *buffer;
69
70 if (!(sd = Xmalloc (sizeof (stemmed_dict))))
71 {
72 mg_errno = MG_NOMEM;
73 return (NULL);
74 }
75
76 sd->stem_file = stem_file;
77 sd->MemForStemDict = 0;
78
79 Fread (&sd->sdh, sizeof (sd->sdh), 1, stem_file);
80 /* [RPAP - Jan 97: Endian Ordering] */
81 NTOHUL(sd->sdh.lookback);
82 NTOHUL(sd->sdh.block_size);
83 NTOHUL(sd->sdh.num_blocks);
84 NTOHUL(sd->sdh.blocks_start);
85 NTOHUL(sd->sdh.index_chars);
86 NTOHUL(sd->sdh.num_of_docs);
87 NTOHUL(sd->sdh.static_num_of_docs);
88 NTOHUL(sd->sdh.num_of_words);
89 NTOHUL(sd->sdh.stem_method);
90 NTOHUL(sd->sdh.indexed);
91
92 if (!(buffer = Xmalloc (sd->sdh.index_chars)))
93 {
94 Xfree (sd);
95 mg_errno = MG_NOMEM;
96 return (NULL);
97 };
98 sd->MemForStemDict += sd->sdh.index_chars;
99
100 if (!(sd->index = Xmalloc (sd->sdh.num_blocks * sizeof (*sd->index))))
101 {
102 Xfree (sd);
103 Xfree (buffer);
104 mg_errno = MG_NOMEM;
105 return (NULL);
106 };
107 sd->MemForStemDict += sd->sdh.num_blocks * sizeof (*sd->index);
108
109 if (!(sd->pos = Xmalloc (sd->sdh.num_blocks * sizeof (*sd->pos))))
110 {
111 Xfree (sd);
112 Xfree (buffer);
113 Xfree (sd->index);
114 mg_errno = MG_NOMEM;
115 return (NULL);
116 };
117 sd->MemForStemDict += sd->sdh.num_blocks * sizeof (*sd->pos);
118
119 if (!(sd->buffer = Xmalloc (sd->sdh.block_size * sizeof (*sd->buffer))))
120 {
121 Xfree (sd);
122 Xfree (buffer);
123 Xfree (sd->index);
124 Xfree (sd->buffer);
125 mg_errno = MG_NOMEM;
126 return (NULL);
127 };
128 sd->MemForStemDict += sd->sdh.block_size * sizeof (*sd->buffer);
129
130 sd->active = -1;
131
132 for (i = 0; i < sd->sdh.num_blocks; i++)
133 {
134 register u_char len;
135 sd->index[i] = buffer;
136 len = Getc (stem_file);
137 *buffer++ = len;
138 Fread (buffer, sizeof (u_char), len, stem_file);
139 buffer += len;
140 Fread (&sd->pos[i], sizeof (*sd->pos), 1, stem_file);
141 NTOHUL(sd->pos[i]); /* [RPAP - Jan 97: Endian Ordering] */
142 }
143
144 mg_errno = MG_NOERROR;
145
146 /* fprintf (stderr, "mem for stem dict = %i\n", sd->MemForStemDict); */
147
148 return sd;
149}
150
151
152/* [RPAP - Jan 97: Stem Index Change] */
153stemmed_idx *
154ReadStemIdxBlk (File * stem_idx_file)
155{
156 unsigned long i;
157 stemmed_idx *si;
158 u_char *buffer;
159
160 if (!(si = Xmalloc (sizeof (stemmed_idx))))
161 {
162 mg_errno = MG_NOMEM;
163 return (NULL);
164 }
165
166 si->stem_idx_file = stem_idx_file;
167 si->MemForStemIdx = 0;
168
169 Fread (&si->sih, sizeof (si->sih), 1, stem_idx_file);
170 /* [RPAP - Jan 97: Endian Ordering] */
171 NTOHUL(si->sih.lookback);
172 NTOHUL(si->sih.block_size);
173 NTOHUL(si->sih.num_blocks);
174 NTOHUL(si->sih.blocks_start);
175 NTOHUL(si->sih.index_chars);
176 NTOHUL(si->sih.num_of_words);
177
178 if (!(buffer = Xmalloc (si->sih.index_chars)))
179 {
180 Xfree (si);
181 mg_errno = MG_NOMEM;
182 return (NULL);
183 };
184 si->MemForStemIdx += si->sih.index_chars;
185
186 if (!(si->index = Xmalloc (si->sih.num_blocks * sizeof (*si->index))))
187 {
188 Xfree (si);
189 Xfree (buffer);
190 mg_errno = MG_NOMEM;
191 return (NULL);
192 };
193 si->MemForStemIdx += si->sih.num_blocks * sizeof (*si->index);
194
195 if (!(si->pos = Xmalloc (si->sih.num_blocks * sizeof (*si->pos))))
196 {
197 Xfree (si->index);
198 Xfree (si);
199 Xfree (buffer);
200 mg_errno = MG_NOMEM;
201 return (NULL);
202 };
203 si->MemForStemIdx += si->sih.num_blocks * sizeof (*si->pos);
204
205 if (!(si->buffer = Xmalloc (si->sih.block_size * sizeof (*si->buffer))))
206 {
207 Xfree (buffer);
208 Xfree (si->index);
209 Xfree (si->buffer);
210 Xfree (si);
211 mg_errno = MG_NOMEM;
212 return (NULL);
213 };
214 si->MemForStemIdx += si->sih.block_size * sizeof (*si->buffer);
215
216 si->active = -1;
217
218 for (i = 0; i < si->sih.num_blocks; i++)
219 {
220 register u_char len;
221 si->index[i] = buffer;
222 len = Getc (stem_idx_file);
223 *buffer++ = len;
224 Fread (buffer, sizeof (u_char), len, stem_idx_file);
225 buffer += len;
226 Fread (&si->pos[i], sizeof (*si->pos), 1, stem_idx_file);
227 NTOHUL(si->pos[i]); /* [RPAP - Jan 97: Endian Ordering] */
228 }
229 mg_errno = MG_NOERROR;
230
231 /* fprintf (stderr, "mem for stem idx = %i\n", si->MemForStemIdx); */
232
233 return si;
234}
235
236
237/* [RPAP - Jan 97: Stem Index Change] */
238/* word should be appropriately stemed */
239static int
240GetIdxBlock (stemmed_idx * si, u_char * word)
241{
242 register int lo = 0, hi = si->sih.num_blocks - 1;
243 register int mid = 0, c = 0;
244
245 while (lo <= hi)
246 {
247 mid = (lo + hi) / 2;
248 c = casecompare (word, si->index[mid]);
249 if (c < 0)
250 hi = mid - 1;
251 else if (c > 0)
252 lo = mid + 1;
253 else
254 return mid;
255 }
256 return hi < 0 ? 0 : (c < 0 ? mid - 1 : mid);
257}
258
259
260static int
261GetBlock (stemmed_dict * sd, u_char * Word)
262{
263 register int lo = 0, hi = sd->sdh.num_blocks - 1;
264 register int mid = 0, c = 0;
265 while (lo <= hi)
266 {
267 mid = (lo + hi) / 2;
268 c = casecompare (Word, sd->index[mid]); /* [RPAP - Jan 97: Stem Index Change] */
269 if (c < 0)
270 hi = mid - 1;
271 else if (c > 0)
272 lo = mid + 1;
273 else
274 return mid;
275 }
276 return hi < 0 ? 0 : (c < 0 ? mid - 1 : mid);
277}
278
279
280/*
281 * This function looks up a word in the stemmed dictionary, it returns -1
282 * if the word cound not be found, and 0 if it successfully finds the word.
283 * If count is non-null the ulong it is pointing to is set to the number of
284 * occurances of the stemmed word in the collection. i.e wcnt.
285 * If doc_count is non-null the ulong it is pointing to is set to the number
286 * of documents that the word occurs in. i.e fcnt
287 * If invf_ptr is non-null the ulong it is pointing to is set to the position
288 * of the inverted file where the entry for this word start.
289 */
290int
291FindWord (stemmed_dict * sd, u_char * Word, unsigned long *count,
292 unsigned long *doc_count, unsigned long *invf_ptr,
293 unsigned long *invf_len)
294{
295 register int lo, hi, mid, c;
296 register unsigned int res;
297 int block, num_indexes;
298 unsigned long *first_word, *last_invf_len;
299 unsigned short *num_words;
300 u_char *base;
301 unsigned short *index;
302 u_char prev[MAXSTEMLEN + 1];
303
304 block = GetBlock (sd, Word);
305 /* [RPAP - Jan 97: Endian Ordering] */
306 if (sd->active != sd->pos[block])
307 {
308 int i;
309
310 Fseek (sd->stem_file, sd->pos[block] + sd->sdh.blocks_start, 0);
311 Fread (sd->buffer, sd->sdh.block_size, sizeof (u_char), sd->stem_file);
312 sd->active = sd->pos[block];
313
314 /* [RPAP - Jan 97: Endian Ordering] */
315 first_word = (unsigned long *) (sd->buffer);
316 NTOHUL(*first_word);
317 last_invf_len = (unsigned long *) (first_word + 1);
318 NTOHUL(*last_invf_len);
319 num_words = (unsigned short *) (last_invf_len + 1);
320 NTOHUS(*num_words);
321 index = num_words + 1;
322 num_indexes = ((*num_words - 1) / sd->sdh.lookback) + 1;
323
324 for (i = 0; i < num_indexes; i++)
325 NTOHUS(index[i]);
326 }
327 else
328 {
329 first_word = (unsigned long *) (sd->buffer);
330 last_invf_len = (unsigned long *) (first_word + 1);
331 num_words = (unsigned short *) (last_invf_len + 1);
332 index = num_words + 1;
333 num_indexes = ((*num_words - 1) / sd->sdh.lookback) + 1;
334 }
335 base = (u_char *) (index + num_indexes);
336
337 lo = 0;
338 hi = num_indexes - 1;
339 while (lo <= hi)
340 {
341 mid = (lo + hi) / 2;
342 c = casecompare (Word, base + index[mid] + 1); /* [RPAP - Jan 97: Stem Index Change] */
343 if (c < 0)
344 hi = mid - 1;
345 else if (c > 0)
346 lo = mid + 1;
347 else
348 {
349 hi = mid;
350 break;
351 }
352 }
353 if (hi < 0)
354 hi = 0;
355
356 res = hi * sd->sdh.lookback;
357 base += index[hi];
358
359 for (;;)
360 {
361 unsigned copy, suff;
362 unsigned long invfp;
363 if (res >= *num_words)
364 return (-1);
365 copy = *base++;
366 suff = *base++;
367 bcopy ((char *) base, (char *) (prev + copy + 1), suff);
368 base += suff;
369 *prev = copy + suff;
370
371 c = casecompare (Word, prev); /* [RPAP - Jan 97: Stem Index Change] */
372 if (c < 0)
373 return (-1);
374
375 if (c == 0 && doc_count)
376 {
377 bcopy ((char *) base, (char *) doc_count, sizeof (*doc_count));
378 NTOHUL(*doc_count); /* [RPAP - Jan 97: Endian Ordering] */
379 }
380 base += sizeof (*doc_count);
381
382 if (c == 0 && count)
383 {
384 bcopy ((char *) base, (char *) count, sizeof (*count));
385 NTOHUL(*count); /* [RPAP - Jan 97: Endian Ordering] */
386 }
387 base += sizeof (*count);
388
389 if (c == 0 && invf_ptr)
390 {
391 bcopy ((char *) base, (char *) &invfp, sizeof (invf_ptr));
392 NTOHUL(invfp); /* [RPAP - Jan 97: Endian Ordering] */
393 *invf_ptr = invfp;
394 }
395 base += sizeof (*invf_ptr);
396
397 if (c == 0)
398 {
399 /* Calculate invf_len is necessary */
400 unsigned long next_invfp;
401 if (!invf_len)
402 return (*first_word + res);
403
404 /* If the current word is the last word of the block the get the
405 length from last_invf_len */
406 if (res == *num_words - 1)
407 {
408 *invf_len = *last_invf_len;
409 return (*first_word + res);
410 }
411
412 /* Skip over most of the next word to get to the invf_ptr */
413 base++;
414 suff = *base++;
415 base += suff + sizeof (unsigned long) * 2;
416 bcopy ((char *) base, (char *) &next_invfp, sizeof (next_invfp));
417 NTOHUL(next_invfp); /* [RPAP - Jan 97: Endian Ordering] */
418 *invf_len = next_invfp - invfp;
419 return (*first_word + res);
420 }
421 res++;
422 }
423}
424
425
426/* [RPAP - Jan 97: Stem Index Change] */
427int
428FindWords (stemmed_dict * sd, u_char * sWord, int stem_method, TermList ** tl)
429{
430 register unsigned int res;
431 unsigned int idx_res;
432 unsigned copy, suff;
433 int j, k;
434
435 int block, num_indexes;
436 unsigned long *first_word, *last_invf_len;
437 unsigned short *num_words;
438 u_char *base;
439 unsigned short *index;
440 u_char prev[MAXSTEMLEN + 1];
441
442 int idx_block, idx_num_indexes;
443 unsigned long *idx_first_word;
444 unsigned short *idx_num_words;
445 u_char *idx_base;
446 unsigned short *idx_index;
447 u_char idx_prev[MAXSTEMLEN + 1];
448
449 unsigned int num_entries, num_cases;
450 unsigned short blk_index, offset;
451 stemmed_idx * si = NULL;
452
453 if (stem_method == 1)
454 si = sd->stem1;
455 else if (stem_method == 2)
456 si = sd->stem2;
457 else
458 si = sd->stem3;
459
460 /* Locate block */
461 idx_block = GetIdxBlock (si, sWord);
462
463 /* [RPAP - Jan 97: Endian Ordering] */
464 if (si->active != si->pos[idx_block])
465 {
466 Fseek (si->stem_idx_file, si->pos[idx_block] + si->sih.blocks_start, 0);
467 Fread (si->buffer, si->sih.block_size, sizeof (u_char), si->stem_idx_file);
468 si->active = si->pos[idx_block];
469
470 idx_first_word = (unsigned long *) (si->buffer);
471 NTOHUL(*idx_first_word); /* [RPAP - Jan 97: Endian Ordering] */
472 idx_num_words = (unsigned short *) (idx_first_word + 1);
473 NTOHUS(*idx_num_words); /* [RPAP - Jan 97: Endian Ordering] */
474 idx_index = idx_num_words + 1;
475 idx_num_indexes = ((*idx_num_words - 1) / si->sih.lookback) + 1;
476
477 /* [RPAP - Jan 97: Endian Ordering] */
478 for (j = 0; j < idx_num_indexes; j++)
479 NTOHUS(idx_index[j]);
480 }
481 else
482 {
483 idx_first_word = (unsigned long *) (si->buffer);
484 idx_num_words = (unsigned short *) (idx_first_word + 1);
485 idx_index = idx_num_words + 1;
486 idx_num_indexes = ((*idx_num_words - 1) / si->sih.lookback) + 1;
487 }
488 idx_base = (u_char *) (idx_index + idx_num_indexes);
489
490 {
491 /* Locate 3-in-4 block */
492 register int lo, hi, mid, c;
493 lo = 0;
494 hi = idx_num_indexes - 1;
495 while (lo <= hi)
496 {
497 mid = (lo + hi) / 2;
498 c = casecompare (sWord, idx_base + idx_index[mid] + 1);
499 if (c < 0)
500 hi = mid - 1;
501 else if (c > 0)
502 lo = mid + 1;
503 else
504 {
505 hi = mid;
506 break;
507 }
508 }
509 if (hi < 0)
510 hi = 0;
511
512 idx_res = hi * si->sih.lookback;
513 idx_base += idx_index[hi];
514 }
515
516 /* Locate actual word entry */
517 for (;;)
518 {
519 int c;
520 if (idx_res >= *idx_num_words)
521 return (-1);
522 copy = *idx_base++;
523 suff = *idx_base++;
524 bcopy ((char *) idx_base, (char *) (idx_prev + copy + 1), suff);
525 idx_base += suff;
526 *idx_prev = copy + suff;
527
528 c = casecompare (sWord, idx_prev);
529 if (c < 0)
530 return (-1);
531
532 bcopy ((char *) idx_base, (char *) &num_entries, sizeof (num_entries));
533 NTOHUI(num_entries); /* [RPAP - Jan 97: Endian Ordering] */
534 idx_base += sizeof (num_entries);
535
536 if (c > 0)
537 idx_base += num_entries * (sizeof (num_cases) + sizeof (block) +
538 sizeof (blk_index) + sizeof (offset));
539
540 else
541 break;
542
543 idx_res++;
544 }
545
546 for (k = 0; k < num_entries; k++)
547 {
548 unsigned copy, suff;
549 unsigned long invfp;
550 /* Read next stem index pos */
551 bcopy ((char *) idx_base, (char *) &num_cases, sizeof (num_cases));
552 NTOHUI(num_cases); /* [RPAP - Jan 97: Endian Ordering] */
553 idx_base += sizeof (num_cases);
554 bcopy ((char *) idx_base, (char *) &block, sizeof (block));
555 NTOHUI(block); /* [RPAP - Jan 97: Endian Ordering] */
556 idx_base += sizeof (block);
557 bcopy ((char *) idx_base, (char *) &blk_index, sizeof (blk_index));
558 NTOHUS(blk_index); /* [RPAP - Jan 97: Endian Ordering] */
559 idx_base += sizeof (blk_index);
560 bcopy ((char *) idx_base, (char *) &offset, sizeof (offset));
561 NTOHUS(offset); /* [RPAP - Jan 97: Endian Ordering] */
562 idx_base += sizeof (offset);
563
564 /* [RPAP - Jan 97: Endian Ordering] */
565 if (sd->active != sd->pos[block])
566 {
567 Fseek (sd->stem_file, sd->pos[block] + sd->sdh.blocks_start, 0);
568 Fread (sd->buffer, sd->sdh.block_size, sizeof (u_char), sd->stem_file);
569 sd->active = sd->pos[block];
570
571 first_word = (unsigned long *) (sd->buffer);
572 NTOHUL(*first_word); /* [RPAP - Jan 97: Endian Ordering] */
573 last_invf_len = (unsigned long *) (first_word + 1);
574 NTOHUL(*last_invf_len); /* [RPAP - Jan 97: Endian Ordering] */
575 num_words = (unsigned short *) (last_invf_len + 1);
576 NTOHUS(*num_words); /* [RPAP - Jan 97: Endian Ordering] */
577 index = num_words + 1;
578 num_indexes = ((*num_words - 1) / sd->sdh.lookback) + 1;
579
580 /* [RPAP - Jan 97: Endian Ordering] */
581 for (j = 0; j < num_indexes; j++)
582 NTOHUS(index[j]);
583 }
584 else
585 {
586 first_word = (unsigned long *) (sd->buffer);
587 last_invf_len = (unsigned long *) (first_word + 1);
588 num_words = (unsigned short *) (last_invf_len + 1);
589 index = num_words + 1;
590 num_indexes = ((*num_words - 1) / sd->sdh.lookback) + 1;
591 }
592 base = (u_char *) (index + num_indexes);
593
594 res = blk_index * sd->sdh.lookback;
595 base += index[blk_index];
596
597 for (j = 0; j < offset; j++)
598 {
599 copy = *base++;
600 suff = *base++;
601 bcopy ((char *) base, (char *) (prev + copy + 1), suff);
602 base += suff;
603 *prev = copy + suff;
604 base += sizeof (unsigned long); /* skip doc_count */
605 base += sizeof (unsigned long); /* skip count */
606 base += sizeof (unsigned long); /* skip invf_ptr */
607 res++;
608 }
609
610 for (j = 0; j < num_cases; j++)
611 {
612 TermEntry te;
613
614 if (res >= *num_words)
615 return (-1);
616 copy = *base++;
617 suff = *base++;
618 bcopy ((char *) base, (char *) (prev + copy + 1), suff);
619 base += suff;
620 *prev = copy + suff;
621
622 te.Word = copy_string (prev);
623 if (!te.Word)
624 FatalError (1, "Could NOT create memory to add term");
625 te.Stem = copy_string (prev);
626 if (!te.Stem)
627 FatalError (1, "Could NOT create memory to add term");
628 stemmer (2, te.Stem);
629
630 te.Count = 1;
631 te.WE.word_num = *first_word + res;
632 bcopy ((char *) base, (char *) &te.WE.doc_count, sizeof (te.WE.doc_count));
633 NTOHUL(te.WE.doc_count); /* [RPAP - Jan 97: Endian Ordering] */
634 te.WE.max_doc_count = te.WE.doc_count;
635 base += sizeof (te.WE.doc_count);
636
637 bcopy ((char *) base, (char *) &te.WE.count, sizeof (te.WE.count));
638 NTOHUL(te.WE.count);
639 base += sizeof (te.WE.count);
640
641 bcopy ((char *) base, (char *) &invfp, sizeof (te.WE.invf_ptr));
642 NTOHUL(invfp); /* [RPAP - Jan 97: Endian Ordering] */
643 te.WE.invf_ptr = invfp;
644 base += sizeof (te.WE.invf_ptr);
645
646 /* If the current word is the last word of the block the get the
647 length from last_invf_len */
648 if (res == *num_words - 1)
649 te.WE.invf_len = *last_invf_len;
650 else
651 {
652 unsigned long next_invfp;
653 u_char *oldbase = base;
654
655 /* Skip over most of the next word to get to the invf_ptr */
656 base++;
657 suff = *base++;
658 base += suff + sizeof (unsigned long) * 2;
659 bcopy ((char *) base, (char *) &next_invfp, sizeof (next_invfp));
660 NTOHUL(next_invfp); /* [RPAP - Jan 97: Endian Ordering] */
661 te.WE.invf_len = next_invfp - invfp;
662 base = oldbase;
663 }
664
665 /* Add term entry to term list */
666 AddTermEntry (tl, &te);
667
668 if (res == *num_words - 1 && j + 1 < num_cases)
669 {
670 int ii;
671 /* Read in next block */
672 block++;
673 Fseek (sd->stem_file, sd->pos[block] + sd->sdh.blocks_start, 0);
674 Fread (sd->buffer, sd->sdh.block_size, sizeof (u_char), sd->stem_file);
675 sd->active = sd->pos[block];
676
677 first_word = (unsigned long *) (sd->buffer);
678 NTOHUL(*first_word); /* [RPAP - Jan 97: Endian Ordering] */
679 last_invf_len = (unsigned long *) (first_word + 1);
680 NTOHUL(*last_invf_len); /* [RPAP - Jan 97: Endian Ordering] */
681 num_words = (unsigned short *) (last_invf_len + 1);
682 NTOHUS(*num_words); /* [RPAP - Jan 97: Endian Ordering] */
683 index = num_words + 1;
684 num_indexes = ((*num_words - 1) / sd->sdh.lookback) + 1;
685
686 /* [RPAP - Jan 97: Endian Ordering] */
687 for (ii = 0; ii < num_indexes; ii++)
688 NTOHUS(index[ii]);
689
690 base = (u_char *) (index + num_indexes);
691 base += index[0];
692 res = 0;
693 blk_index = 0;
694 }
695 else
696 res++;
697 } /* end for num_cases */
698 } /* end for num_entries */
699 return (*tl)->num;
700}
701
702
703void
704FreeStemDict (stemmed_dict * sd)
705{
706 /* [RPAP - Jan 97: Stem Index Change] */
707 if (sd->stem1)
708 FreeStemIdx (sd->stem1);
709 if (sd->stem2)
710 FreeStemIdx (sd->stem2);
711 if (sd->stem3)
712 FreeStemIdx (sd->stem3);
713
714 Xfree (sd->index[0]);
715 Xfree (sd->index);
716 Xfree (sd->buffer);
717 Xfree (sd->pos);
718 Xfree (sd);
719}
720
721/* [RPAP - Jan 97: Stem Index Change] */
722void
723FreeStemIdx (stemmed_idx * si)
724{
725 Xfree (si->index[0]);
726 Xfree (si->index);
727 Xfree (si->buffer);
728 Xfree (si->pos);
729 Xfree (si);
730}
Note: See TracBrowser for help on using the repository browser.