[439] | 1 | /**************************************************************************
|
---|
| 2 | *
|
---|
| 3 | * mg_stem_idx.c -- Memory efficient stem index builder
|
---|
| 4 | * Copyright (C) 1997 Ross Peeters
|
---|
| 5 | *
|
---|
| 6 | * This program is free software; you can redistribute it and/or modify
|
---|
| 7 | * it under the terms of the GNU General Public License as published by
|
---|
| 8 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 9 | * (at your option) any later version.
|
---|
| 10 | *
|
---|
| 11 | * This program is distributed in the hope that it will be useful,
|
---|
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 14 | * GNU General Public License for more details.
|
---|
| 15 | *
|
---|
| 16 | * You should have received a copy of the GNU General Public License
|
---|
| 17 | * along with this program; if not, write to the Free Software
|
---|
| 18 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 19 | *
|
---|
| 20 | **************************************************************************/
|
---|
| 21 |
|
---|
| 22 | #include "sysfuncs.h"
|
---|
| 23 |
|
---|
| 24 | #include "memlib.h"
|
---|
| 25 | #include "messages.h"
|
---|
| 26 | #include "filestats.h"
|
---|
| 27 | #include "netorder.h" /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 28 |
|
---|
| 29 | #include "mg_files.h"
|
---|
| 30 | #include "invf.h"
|
---|
| 31 | #include "mg.h"
|
---|
| 32 | #include "locallib.h"
|
---|
| 33 | #include "backend.h" /* For struct stemmed_dict */
|
---|
| 34 | #include "words.h"
|
---|
| 35 | #include "stemmer.h"
|
---|
| 36 | #include "hash.h"
|
---|
| 37 | #include "local_strings.h"
|
---|
| 38 |
|
---|
| 39 | typedef struct PosEntry
|
---|
| 40 | {
|
---|
| 41 | unsigned int num_cases;
|
---|
| 42 | unsigned int blk;
|
---|
| 43 | unsigned short blk_index;
|
---|
| 44 | unsigned short offset;
|
---|
| 45 | }
|
---|
| 46 | PosEntry;
|
---|
| 47 |
|
---|
| 48 |
|
---|
| 49 | typedef struct PosList
|
---|
| 50 | {
|
---|
| 51 | unsigned int list_size;
|
---|
| 52 | unsigned int num_entries;
|
---|
| 53 | PosEntry PE[1];
|
---|
| 54 | }
|
---|
| 55 | PosList;
|
---|
| 56 |
|
---|
| 57 |
|
---|
| 58 | typedef struct idx_hash_rec
|
---|
| 59 | {
|
---|
| 60 | u_char *word;
|
---|
| 61 | PosList *PL;
|
---|
| 62 | }
|
---|
| 63 | idx_hash_rec;
|
---|
| 64 |
|
---|
| 65 |
|
---|
| 66 | #define POOL_SIZE 1024*1024
|
---|
| 67 | #define HASH_POOL_SIZE 8192
|
---|
| 68 | #define INITIAL_HASH_SIZE 7927
|
---|
| 69 |
|
---|
| 70 |
|
---|
| 71 | static unsigned long MaxMemInUse = 0;
|
---|
| 72 | static unsigned long MemInUse = 0;
|
---|
| 73 |
|
---|
| 74 | static idx_hash_rec **IdxHashTable;
|
---|
| 75 | static unsigned long IdxHashSize;
|
---|
| 76 | static unsigned long IdxHashUsed;
|
---|
| 77 | static u_char *IdxPool;
|
---|
| 78 | static int IdxPoolLeft;
|
---|
| 79 |
|
---|
| 80 | static idx_hash_rec *idx_hr_pool;
|
---|
| 81 | static int idx_hr_PoolLeft;
|
---|
| 82 |
|
---|
| 83 | static idx_hash_rec **idx_first_occr;
|
---|
| 84 | static int idx_max_first_occr;
|
---|
| 85 |
|
---|
| 86 | int block_size = 1024 * 4;
|
---|
| 87 |
|
---|
| 88 | int force = 0;
|
---|
| 89 |
|
---|
| 90 | static long lookback = 4;
|
---|
| 91 |
|
---|
| 92 | static void
|
---|
| 93 | ChangeMem (int Change)
|
---|
| 94 | {
|
---|
| 95 | MemInUse += Change;
|
---|
| 96 | if (MemInUse > MaxMemInUse)
|
---|
| 97 | MaxMemInUse = MemInUse;
|
---|
| 98 | }
|
---|
| 99 |
|
---|
| 100 |
|
---|
| 101 | /* =========================================================================
|
---|
| 102 | * Function: MakePosList
|
---|
| 103 | * Description:
|
---|
| 104 | * Input:
|
---|
| 105 | * Output:
|
---|
| 106 | * ========================================================================= */
|
---|
| 107 | PosList *
|
---|
| 108 | MakePosList (int n)
|
---|
| 109 | {
|
---|
| 110 | PosList *pl;
|
---|
| 111 | int list_size = (n == 0 ? 1 : n); /* always allocate at least one node */
|
---|
| 112 |
|
---|
| 113 | pl = Xmalloc (sizeof (PosList) + (list_size - 1) * sizeof (PosEntry));
|
---|
| 114 | if (!pl)
|
---|
| 115 | FatalError (1, "Unable to allocate term list");
|
---|
| 116 | ChangeMem (sizeof (PosList) + (list_size - 1) * sizeof (PosEntry));
|
---|
| 117 |
|
---|
| 118 | pl->num_entries = n;
|
---|
| 119 | pl->list_size = list_size;
|
---|
| 120 |
|
---|
| 121 | return pl;
|
---|
| 122 | }
|
---|
| 123 |
|
---|
| 124 | /* =========================================================================
|
---|
| 125 | * Function: ResizePosList
|
---|
| 126 | * Description:
|
---|
| 127 | * Input:
|
---|
| 128 | * Output:
|
---|
| 129 | * ========================================================================= */
|
---|
| 130 |
|
---|
| 131 | #define GROWTH_FACTOR 2
|
---|
| 132 | #define MIN_SIZE 2
|
---|
| 133 |
|
---|
| 134 | static void
|
---|
| 135 | ResizePosList (PosList ** pos_list)
|
---|
| 136 | {
|
---|
| 137 | PosList *pl = *pos_list;
|
---|
| 138 |
|
---|
| 139 | ChangeMem (-(sizeof (PosList) + (pl->list_size - 1) * sizeof (PosEntry)));
|
---|
| 140 | if (pl->num_entries > pl->list_size)
|
---|
| 141 | {
|
---|
| 142 | if (pl->list_size)
|
---|
| 143 | pl->list_size *= GROWTH_FACTOR;
|
---|
| 144 | else
|
---|
| 145 | pl->list_size = MIN_SIZE;
|
---|
| 146 | }
|
---|
| 147 | pl = Xrealloc (pl, sizeof (PosList) + (pl->list_size - 1) * sizeof (PosEntry));
|
---|
| 148 |
|
---|
| 149 | if (!pl)
|
---|
| 150 | FatalError (1, "Unable to resize pos list");
|
---|
| 151 | ChangeMem (sizeof (PosList) + (pl->list_size - 1) * sizeof (PosEntry));
|
---|
| 152 |
|
---|
| 153 | *pos_list = pl;
|
---|
| 154 | }
|
---|
| 155 |
|
---|
| 156 |
|
---|
| 157 | /* =========================================================================
|
---|
| 158 | * Function: FreePosList
|
---|
| 159 | * Description:
|
---|
| 160 | * Input:
|
---|
| 161 | * Output:
|
---|
| 162 | * ========================================================================= */
|
---|
| 163 |
|
---|
| 164 | void
|
---|
| 165 | FreePosList (PosList ** the_pl)
|
---|
| 166 | {
|
---|
| 167 | PosList *pl = *the_pl;
|
---|
| 168 |
|
---|
| 169 | ChangeMem (-(sizeof(PosList) + sizeof (PosEntry) * (pl->list_size - 1)));
|
---|
| 170 | Xfree (pl);
|
---|
| 171 |
|
---|
| 172 | *the_pl = NULL;
|
---|
| 173 | }
|
---|
| 174 |
|
---|
| 175 |
|
---|
| 176 | /* =========================================================================
|
---|
| 177 | * Function: ResetPosList
|
---|
| 178 | * Description:
|
---|
| 179 | * Input:
|
---|
| 180 | * Output:
|
---|
| 181 | * ========================================================================= */
|
---|
| 182 |
|
---|
| 183 | void
|
---|
| 184 | ResetPosList (PosList ** pl)
|
---|
| 185 | {
|
---|
| 186 | if (*pl)
|
---|
| 187 | FreePosList (pl);
|
---|
| 188 | *pl = MakePosList (0);
|
---|
| 189 | }
|
---|
| 190 |
|
---|
| 191 | /* =========================================================================
|
---|
| 192 | * Function: AddPosEntry
|
---|
| 193 | * Description:
|
---|
| 194 | * Input:
|
---|
| 195 | * Output:
|
---|
| 196 | * ========================================================================= */
|
---|
| 197 |
|
---|
| 198 | int
|
---|
| 199 | AddPosEntry (PosList ** pos_list, PosEntry * pe)
|
---|
| 200 | {
|
---|
| 201 | PosList *pl = *pos_list;
|
---|
| 202 |
|
---|
| 203 | pl->num_entries++;
|
---|
| 204 | ResizePosList (pos_list);
|
---|
| 205 | pl = *pos_list;
|
---|
| 206 |
|
---|
| 207 | /* copy the structure contents */
|
---|
| 208 | bcopy ((char *) pe, (char *) &(pl->PE[pl->num_entries - 1]), sizeof (PosEntry));
|
---|
| 209 |
|
---|
| 210 | return pl->num_entries - 1;
|
---|
| 211 | }
|
---|
| 212 |
|
---|
| 213 |
|
---|
| 214 | /* Modified from stem_search.c */
|
---|
| 215 | stemmed_dict *
|
---|
| 216 | ReadStemDictBlk (File * stem_file)
|
---|
| 217 | {
|
---|
| 218 | unsigned long i;
|
---|
| 219 | stemmed_dict *sd;
|
---|
| 220 | u_char *buffer;
|
---|
| 221 |
|
---|
| 222 | if (!(sd = Xmalloc (sizeof (stemmed_dict))))
|
---|
| 223 | FatalError (1, "Could not allocate memory for stemmed dict");
|
---|
| 224 |
|
---|
| 225 |
|
---|
| 226 | sd->stem_file = stem_file;
|
---|
| 227 | sd->MemForStemDict = 0;
|
---|
| 228 |
|
---|
| 229 | Fread (&sd->sdh, sizeof (sd->sdh), 1, stem_file);
|
---|
| 230 |
|
---|
| 231 | /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 232 | NTOHUL(sd->sdh.lookback);
|
---|
| 233 | NTOHUL(sd->sdh.block_size);
|
---|
| 234 | NTOHUL(sd->sdh.num_blocks);
|
---|
| 235 | NTOHUL(sd->sdh.blocks_start);
|
---|
| 236 | NTOHUL(sd->sdh.index_chars);
|
---|
| 237 | NTOHUL(sd->sdh.num_of_docs);
|
---|
| 238 | NTOHUL(sd->sdh.static_num_of_docs);
|
---|
| 239 | NTOHUL(sd->sdh.num_of_words);
|
---|
| 240 | NTOHUL(sd->sdh.stemmer_num);
|
---|
| 241 | NTOHUL(sd->sdh.stem_method);
|
---|
| 242 | NTOHUL(sd->sdh.indexed);
|
---|
| 243 |
|
---|
| 244 | if (!(buffer = Xmalloc (sd->sdh.index_chars)))
|
---|
| 245 | {
|
---|
| 246 | Xfree (sd);
|
---|
| 247 | FatalError (1, "Could not allocate memory for stemmed dict");
|
---|
| 248 | return (NULL);
|
---|
| 249 | };
|
---|
| 250 | sd->MemForStemDict += sd->sdh.index_chars;
|
---|
| 251 |
|
---|
| 252 | if (!(sd->index = Xmalloc (sd->sdh.num_blocks * sizeof (*sd->index))))
|
---|
| 253 | {
|
---|
| 254 | Xfree (sd);
|
---|
| 255 | Xfree (buffer);
|
---|
| 256 | FatalError (1, "Could not allocate memory for stemmed dict");
|
---|
| 257 | return (NULL);
|
---|
| 258 | };
|
---|
| 259 | sd->MemForStemDict += sd->sdh.num_blocks * sizeof (*sd->index);
|
---|
| 260 |
|
---|
| 261 | if (!(sd->pos = Xmalloc (sd->sdh.num_blocks * sizeof (*sd->pos))))
|
---|
| 262 | {
|
---|
| 263 | Xfree (sd);
|
---|
| 264 | Xfree (buffer);
|
---|
| 265 | Xfree (sd->index);
|
---|
| 266 | FatalError (1, "Could not allocate memory for stemmed dict");
|
---|
| 267 | return (NULL);
|
---|
| 268 | };
|
---|
| 269 | sd->MemForStemDict += sd->sdh.num_blocks * sizeof (*sd->pos);
|
---|
| 270 |
|
---|
| 271 | if (!(sd->buffer = Xmalloc (sd->sdh.block_size * sizeof (*sd->buffer))))
|
---|
| 272 | {
|
---|
| 273 | Xfree (sd);
|
---|
| 274 | Xfree (buffer);
|
---|
| 275 | Xfree (sd->index);
|
---|
| 276 | Xfree (sd->buffer);
|
---|
| 277 | FatalError (1, "Could not allocate memory for stemmed dict");
|
---|
| 278 | return (NULL);
|
---|
| 279 | };
|
---|
| 280 | sd->MemForStemDict += sd->sdh.block_size * sizeof (*sd->buffer);
|
---|
| 281 |
|
---|
| 282 | sd->active = -1;
|
---|
| 283 |
|
---|
| 284 | for (i = 0; i < sd->sdh.num_blocks; i++)
|
---|
| 285 | {
|
---|
| 286 | register u_char len;
|
---|
| 287 | sd->index[i] = buffer;
|
---|
| 288 | len = Getc (stem_file);
|
---|
| 289 | *buffer++ = len;
|
---|
| 290 | Fread (buffer, sizeof (u_char), len, stem_file);
|
---|
| 291 | buffer += len;
|
---|
| 292 | Fread (&sd->pos[i], sizeof (*sd->pos), 1, stem_file);
|
---|
| 293 | NTOHUL(sd->pos[i]); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 294 | }
|
---|
| 295 |
|
---|
| 296 | return sd;
|
---|
| 297 | }
|
---|
| 298 |
|
---|
| 299 |
|
---|
| 300 | void
|
---|
[2378] | 301 | mg_init_process ()
|
---|
[439] | 302 | {
|
---|
| 303 | /* Allocate memory for idx hash table */
|
---|
| 304 |
|
---|
| 305 | if (!(IdxPool = Xmalloc (POOL_SIZE)))
|
---|
| 306 | FatalError (1, "Unable to allocate memory for idx pool");
|
---|
| 307 | IdxPoolLeft = POOL_SIZE;
|
---|
| 308 | ChangeMem (POOL_SIZE);
|
---|
| 309 |
|
---|
| 310 | if (!(idx_hr_pool = Xmalloc (HASH_POOL_SIZE * sizeof (idx_hash_rec))))
|
---|
| 311 | FatalError (1, "Unable to allocate memory for idx pool");
|
---|
| 312 | idx_hr_PoolLeft = HASH_POOL_SIZE;
|
---|
| 313 | ChangeMem (HASH_POOL_SIZE * sizeof (idx_hash_rec));
|
---|
| 314 |
|
---|
| 315 | IdxHashSize = INITIAL_HASH_SIZE;
|
---|
| 316 | IdxHashUsed = 0;
|
---|
| 317 | if (!(IdxHashTable = Xmalloc (sizeof (idx_hash_rec *) * IdxHashSize)))
|
---|
| 318 | FatalError (1, "Unable to allocate memory for idx table");
|
---|
| 319 | ChangeMem (sizeof (idx_hash_rec *) * IdxHashSize);
|
---|
| 320 | bzero ((char *) IdxHashTable, sizeof (idx_hash_rec *) * IdxHashSize);
|
---|
| 321 | idx_max_first_occr = 8192;
|
---|
| 322 | if (!(idx_first_occr = Xmalloc (sizeof (idx_hash_rec *) * idx_max_first_occr)))
|
---|
| 323 | FatalError (1, "Unable to allocate memory for idx_first_occur");
|
---|
| 324 | ChangeMem (sizeof (idx_hash_rec *) * idx_max_first_occr);
|
---|
| 325 | }
|
---|
| 326 |
|
---|
| 327 |
|
---|
| 328 | void
|
---|
| 329 | PackIdxHashTable (void)
|
---|
| 330 | {
|
---|
| 331 | int s, d;
|
---|
| 332 | for (s = d = 0; s < IdxHashSize; s++)
|
---|
| 333 | if (IdxHashTable[s])
|
---|
| 334 | IdxHashTable[d++] = IdxHashTable[s];
|
---|
| 335 | ChangeMem (-sizeof (idx_hash_rec *) * IdxHashSize);
|
---|
| 336 | ChangeMem (sizeof (idx_hash_rec *) * IdxHashUsed);
|
---|
| 337 | if (!(IdxHashTable = Xrealloc (IdxHashTable, sizeof (idx_hash_rec *) * IdxHashUsed)))
|
---|
| 338 | FatalError (1, "Out of memory");
|
---|
| 339 | IdxHashSize = IdxHashUsed;
|
---|
| 340 | }
|
---|
| 341 |
|
---|
| 342 |
|
---|
| 343 | void
|
---|
| 344 | process_stem_dict (stemmed_dict * sd, int stem_method)
|
---|
| 345 | {
|
---|
| 346 | int block;
|
---|
| 347 | short blk_index = -1;
|
---|
| 348 | int wordnum = -1;
|
---|
| 349 | PosEntry *prevPE = NULL;
|
---|
| 350 | idx_hash_rec *prevIdx = NULL;
|
---|
| 351 | u_char word[MAXSTEMLEN + 1];
|
---|
| 352 | u_char prev[MAXSTEMLEN + 1];
|
---|
| 353 |
|
---|
| 354 | /* For each block in stem dict... */
|
---|
| 355 | for (block = 0; block < sd->sdh.num_blocks; block++)
|
---|
| 356 | {
|
---|
| 357 | register unsigned int res;
|
---|
| 358 | int num_indexes;
|
---|
| 359 | unsigned long *first_word, *last_invf_len;
|
---|
| 360 | unsigned short *num_words;
|
---|
| 361 | u_char *base;
|
---|
| 362 | unsigned short *index;
|
---|
| 363 |
|
---|
| 364 | /* Read block into buffer */
|
---|
| 365 | Fseek (sd->stem_file, sd->pos[block] + sd->sdh.blocks_start, 0);
|
---|
| 366 | Fread (sd->buffer, sd->sdh.block_size, sizeof (u_char), sd->stem_file);
|
---|
| 367 | sd->active = sd->pos[block];
|
---|
| 368 |
|
---|
| 369 | /* Move through block header */
|
---|
| 370 | first_word = (unsigned long *) (sd->buffer);
|
---|
| 371 | NTOHUL(*first_word); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 372 | last_invf_len = (unsigned long *) (first_word + 1);
|
---|
| 373 | NTOHUL(*last_invf_len); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 374 | num_words = (unsigned short *) (last_invf_len + 1);
|
---|
| 375 | NTOHUS(*num_words); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 376 | index = num_words + 1;
|
---|
| 377 | num_indexes = ((*num_words - 1) / sd->sdh.lookback) + 1;
|
---|
| 378 |
|
---|
| 379 | {
|
---|
| 380 | /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 381 | int i;
|
---|
| 382 | for (i = 0; i < num_indexes; i++)
|
---|
| 383 | NTOHUS(index[i]);
|
---|
| 384 | }
|
---|
| 385 |
|
---|
| 386 | base = (u_char *) (index + num_indexes);
|
---|
| 387 | blk_index = -1;
|
---|
| 388 |
|
---|
| 389 | /* For each word in block... */
|
---|
| 390 | for (res = 0; res < *num_words; res++)
|
---|
| 391 | {
|
---|
| 392 | unsigned copy, suff;
|
---|
| 393 | register idx_hash_rec *idx_ent = 0;
|
---|
| 394 |
|
---|
| 395 | /* Update blk_index */
|
---|
| 396 | if (!(res % sd->sdh.lookback))
|
---|
| 397 | blk_index++;
|
---|
| 398 |
|
---|
| 399 | copy = *base++;
|
---|
| 400 | suff = *base++;
|
---|
| 401 | bcopy ((char *) base, (char *) (prev + copy + 1), suff);
|
---|
| 402 | base += suff;
|
---|
| 403 | *prev = copy + suff;
|
---|
| 404 |
|
---|
| 405 | /* Skip irrelevant word info */
|
---|
| 406 | base += sizeof (u_long);
|
---|
| 407 | base += sizeof (u_long);
|
---|
| 408 | base += sizeof (u_long);
|
---|
| 409 |
|
---|
| 410 | /* Stem word */
|
---|
| 411 | bcopy ((char *) prev, (char *) word, *prev + 1);
|
---|
| 412 | stemmer (stem_method, sd->sdh.stemmer_num, word);
|
---|
| 413 |
|
---|
| 414 | /* Check if word follows straight on from previous word */
|
---|
| 415 | if (prevIdx && !compare (word, prevIdx->word))
|
---|
| 416 | prevPE->num_cases++;
|
---|
| 417 |
|
---|
[3804] | 418 | else
|
---|
[439] | 419 | {
|
---|
| 420 | /* Search the idx hash table for word */
|
---|
| 421 | register unsigned long hashval, step;
|
---|
| 422 | register int hsize = IdxHashSize;
|
---|
| 423 | HASH (hashval, step, word, hsize);
|
---|
| 424 | for (;;)
|
---|
| 425 | {
|
---|
| 426 | register u_char *s1;
|
---|
| 427 | register u_char *s2;
|
---|
| 428 | register int len;
|
---|
| 429 | idx_ent = IdxHashTable[hashval];
|
---|
| 430 | if (!idx_ent)
|
---|
| 431 | {
|
---|
| 432 | /* Create a next entry in the hash table */
|
---|
| 433 | int len = *word + 1;
|
---|
| 434 | if (!idx_hr_PoolLeft)
|
---|
| 435 | {
|
---|
| 436 | if (!(idx_hr_pool = Xmalloc (HASH_POOL_SIZE *
|
---|
| 437 | sizeof (idx_hash_rec))))
|
---|
| 438 | FatalError (1, "Unable to allocate memory for pool");
|
---|
| 439 | idx_hr_PoolLeft = HASH_POOL_SIZE;
|
---|
| 440 | ChangeMem (HASH_POOL_SIZE * sizeof (idx_hash_rec));
|
---|
| 441 | }
|
---|
| 442 | idx_ent = idx_hr_pool++;
|
---|
| 443 | idx_hr_PoolLeft--;
|
---|
| 444 | if (len > IdxPoolLeft)
|
---|
| 445 | {
|
---|
| 446 | if (!(IdxPool = Xmalloc (POOL_SIZE)))
|
---|
| 447 | FatalError (1, "Unable to allocate memory for pool");
|
---|
| 448 | IdxPoolLeft = POOL_SIZE;
|
---|
| 449 | ChangeMem (POOL_SIZE);
|
---|
| 450 | }
|
---|
| 451 | wordnum++;
|
---|
| 452 |
|
---|
| 453 | idx_ent->word = IdxPool;
|
---|
| 454 | idx_ent->PL = MakePosList (0);
|
---|
| 455 | {
|
---|
| 456 | PosEntry PE;
|
---|
| 457 | PE.num_cases = 1;
|
---|
| 458 | PE.blk = block;
|
---|
| 459 | PE.blk_index = (unsigned short) blk_index;
|
---|
| 460 | PE.offset = res % sd->sdh.lookback;
|
---|
| 461 | AddPosEntry (&(idx_ent->PL), &PE);
|
---|
| 462 | }
|
---|
| 463 | prevIdx = idx_ent;
|
---|
| 464 | prevPE = &(idx_ent->PL->PE[idx_ent->PL->num_entries - 1]);
|
---|
| 465 |
|
---|
| 466 | bcopy ((char *) word, (char *) IdxPool, len);
|
---|
| 467 | IdxPool += len;
|
---|
| 468 | IdxPoolLeft -= len;
|
---|
| 469 | if (IdxHashUsed == idx_max_first_occr - 1)
|
---|
| 470 | {
|
---|
| 471 | ChangeMem (-sizeof (idx_hash_rec *) * idx_max_first_occr);
|
---|
| 472 | idx_max_first_occr *= 2;
|
---|
| 473 | if (!(idx_first_occr = Xrealloc (idx_first_occr, sizeof (idx_hash_rec *) *
|
---|
| 474 | idx_max_first_occr)))
|
---|
| 475 | FatalError (1, "Unable to allocate memory for idx_first_occr");
|
---|
| 476 | ChangeMem (sizeof (idx_hash_rec *) * idx_max_first_occr);
|
---|
| 477 | }
|
---|
| 478 | idx_first_occr[IdxHashUsed] = idx_ent;
|
---|
| 479 | IdxHashUsed++;
|
---|
| 480 | IdxHashTable[hashval] = idx_ent;
|
---|
| 481 | break;
|
---|
[3804] | 482 | } // if (!idx_ent)
|
---|
[439] | 483 |
|
---|
| 484 | /* Compare the words */
|
---|
| 485 | s1 = word;
|
---|
| 486 | s2 = idx_ent->word;
|
---|
| 487 | len = *s1 + 1;
|
---|
| 488 | for (; len; len--)
|
---|
| 489 | if (*s1++ != *s2++)
|
---|
| 490 | break;
|
---|
| 491 |
|
---|
| 492 | if (len)
|
---|
| 493 | {
|
---|
| 494 | /* Entry is not the right one - move to next hash index */
|
---|
| 495 | hashval = (hashval + step);
|
---|
| 496 | if (hashval >= hsize)
|
---|
| 497 | hashval -= hsize;
|
---|
| 498 | }
|
---|
| 499 | else
|
---|
| 500 | {
|
---|
| 501 | /* Entry is correct - added PosEntry to word */
|
---|
| 502 | PosEntry PE;
|
---|
| 503 | PE.num_cases = 1;
|
---|
| 504 | PE.blk = block;
|
---|
| 505 | PE.blk_index = (unsigned short) blk_index;
|
---|
| 506 | PE.offset = res % sd->sdh.lookback;
|
---|
| 507 | AddPosEntry (&(idx_ent->PL), &PE);
|
---|
| 508 | prevIdx = idx_ent;
|
---|
| 509 | prevPE = &(idx_ent->PL->PE[idx_ent->PL->num_entries - 1]);
|
---|
| 510 | break;
|
---|
| 511 | }
|
---|
[3804] | 512 | } // for (;;)
|
---|
| 513 | } // else (the word does not follow on from previous one)
|
---|
[439] | 514 |
|
---|
| 515 | if (IdxHashUsed >= IdxHashSize >> 1)
|
---|
| 516 | {
|
---|
| 517 | idx_hash_rec **ht;
|
---|
| 518 | unsigned long size;
|
---|
| 519 | unsigned long i;
|
---|
| 520 | size = prime (IdxHashSize * 2);
|
---|
| 521 | if (!(ht = Xmalloc (sizeof (idx_hash_rec *) * size)))
|
---|
| 522 | FatalError (1, "Unable to allocate memory for idx table");
|
---|
| 523 | bzero ((char *) ht, sizeof (idx_hash_rec *) * size);
|
---|
| 524 | ChangeMem (sizeof (idx_hash_rec *) * size);
|
---|
| 525 |
|
---|
| 526 | for (i = 0; i < IdxHashSize; i++)
|
---|
| 527 | if (IdxHashTable[i])
|
---|
| 528 | {
|
---|
| 529 | register u_char *wptr;
|
---|
| 530 | idx_hash_rec *ent;
|
---|
| 531 | register unsigned long hashval, step;
|
---|
| 532 |
|
---|
| 533 | wptr = IdxHashTable[i]->word;
|
---|
| 534 | HASH (hashval, step, wptr, size);
|
---|
| 535 | ent = ht[hashval];
|
---|
| 536 | while (ent)
|
---|
| 537 | {
|
---|
| 538 | hashval += step;
|
---|
| 539 | if (hashval >= size)
|
---|
| 540 | hashval -= size;
|
---|
| 541 | ent = ht[hashval];
|
---|
| 542 | }
|
---|
| 543 | ht[hashval] = IdxHashTable[i];
|
---|
| 544 | }
|
---|
| 545 | Xfree (IdxHashTable);
|
---|
| 546 | ChangeMem (-sizeof (idx_hash_rec *) * IdxHashSize);
|
---|
| 547 | IdxHashTable = ht;
|
---|
| 548 | IdxHashSize = size;
|
---|
| 549 | }
|
---|
| 550 |
|
---|
| 551 | } /* end for each word */
|
---|
| 552 |
|
---|
| 553 | } /* end for each block */
|
---|
| 554 |
|
---|
| 555 | }
|
---|
| 556 |
|
---|
| 557 | static int
|
---|
| 558 | idx_comp (const void *A, const void *B)
|
---|
| 559 | {
|
---|
| 560 | u_char *s1 = (*((idx_hash_rec **) A))->word;
|
---|
| 561 | u_char *s2 = (*((idx_hash_rec **) B))->word;
|
---|
| 562 | return (casecompare (s1, s2));
|
---|
| 563 | }
|
---|
| 564 |
|
---|
| 565 |
|
---|
| 566 | void
|
---|
| 567 | save_idx (char * filename, int stem_method)
|
---|
| 568 | {
|
---|
| 569 | char *FName;
|
---|
| 570 | unsigned long i, j, pos, First_word, num;
|
---|
| 571 | struct stem_idx_header sih;
|
---|
| 572 | u_char *buffer, *last_word = NULL;
|
---|
| 573 | unsigned short *pointers;
|
---|
| 574 | int buf_in_use;
|
---|
| 575 | unsigned short ptrs_in_use, word_num;
|
---|
| 576 | FILE *idbi = NULL, *tmp = NULL;
|
---|
| 577 |
|
---|
| 578 | FName = make_name (filename, ".tmp", NULL);
|
---|
| 579 | if (!(tmp = fopen (FName, "w+b")))
|
---|
| 580 | FatalError (1, "Unable to open \"%s\".\n", FName);
|
---|
| 581 |
|
---|
| 582 | /* Delete the file now */
|
---|
| 583 | unlink (FName);
|
---|
| 584 |
|
---|
| 585 | /* Create appropriate stem index file */
|
---|
| 586 | switch (stem_method)
|
---|
| 587 | {
|
---|
| 588 | case (1):
|
---|
| 589 | {
|
---|
| 590 | idbi = create_file (filename, INVF_DICT_BLOCKED_1_SUFFIX, "wb", MAGIC_STEM_1,
|
---|
| 591 | MG_ABORT);
|
---|
| 592 | break;
|
---|
| 593 | }
|
---|
| 594 |
|
---|
| 595 | case (2):
|
---|
| 596 | {
|
---|
| 597 | idbi = create_file (filename, INVF_DICT_BLOCKED_2_SUFFIX, "wb", MAGIC_STEM_2,
|
---|
| 598 | MG_ABORT);
|
---|
| 599 | break;
|
---|
| 600 | }
|
---|
| 601 | case (3):
|
---|
| 602 | {
|
---|
| 603 | idbi = create_file (filename, INVF_DICT_BLOCKED_3_SUFFIX, "wb", MAGIC_STEM_3,
|
---|
| 604 | MG_ABORT);
|
---|
| 605 | break;
|
---|
| 606 | }
|
---|
| 607 | }
|
---|
| 608 |
|
---|
| 609 | if (!idbi)
|
---|
| 610 | FatalError (1, "Could NOT create .invf.blocked.%d file", stem_method);
|
---|
| 611 |
|
---|
| 612 | PackIdxHashTable();
|
---|
| 613 | qsort (IdxHashTable, IdxHashUsed, sizeof (idx_hash_rec *), idx_comp);
|
---|
| 614 |
|
---|
| 615 | sih.lookback = lookback;
|
---|
| 616 | sih.block_size = block_size;
|
---|
| 617 | sih.num_blocks = 0;
|
---|
| 618 | sih.blocks_start = 0;
|
---|
| 619 | sih.index_chars = 0;
|
---|
| 620 | sih.num_of_words = IdxHashUsed;
|
---|
| 621 |
|
---|
| 622 | fwrite ((char *) &sih, sizeof (sih), 1, idbi);
|
---|
| 623 |
|
---|
| 624 | if (!(buffer = Xmalloc (block_size + 512)))
|
---|
| 625 | FatalError (1, "Unable to allocate memory for \"buffer\"\n");
|
---|
| 626 | if (!(pointers = Xmalloc (block_size + 512)))
|
---|
| 627 | FatalError (1, "Unable to allocate memory for \"buffer\"\n");
|
---|
| 628 |
|
---|
| 629 | buf_in_use = 0;
|
---|
| 630 | pos = 0;
|
---|
| 631 | word_num = 0;
|
---|
| 632 | ptrs_in_use = 0;
|
---|
| 633 | First_word = 0;
|
---|
| 634 |
|
---|
| 635 | /* For each word in the hashtable... */
|
---|
| 636 | for (i = 0; i < IdxHashUsed; i++)
|
---|
| 637 | {
|
---|
| 638 | register unsigned long extra, copy, suff;
|
---|
| 639 | register struct idx_hash_rec *ent = IdxHashTable[i];
|
---|
| 640 |
|
---|
| 641 | /* build a new word on top of prev */
|
---|
| 642 | if (last_word != NULL)
|
---|
| 643 | copy = prefixlen (last_word, ent->word);
|
---|
| 644 | else
|
---|
| 645 | copy = 0;
|
---|
| 646 | suff = *(ent->word) - copy;
|
---|
| 647 | last_word = ent->word;
|
---|
| 648 |
|
---|
| 649 | if (word_num % sih.lookback == 0)
|
---|
| 650 | /* Will need copy chars to add + a pointer in index */
|
---|
| 651 | extra = copy + sizeof (*pointers);
|
---|
| 652 | else
|
---|
| 653 | extra = 0;
|
---|
| 654 | if ((ptrs_in_use + 1) * sizeof (*pointers) + sizeof (ptrs_in_use) + extra +
|
---|
| 655 | buf_in_use + sizeof (First_word) + suff + 1 + sizeof (ent->PL->num_entries) +
|
---|
| 656 | ent->PL->num_entries * sizeof (PosEntry) > block_size)
|
---|
| 657 | {
|
---|
| 658 | /* Dump buffer to tmp file */
|
---|
| 659 | int chunk;
|
---|
| 660 | HTONUL(First_word); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 661 | HTONUS(word_num); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 662 | fwrite (&First_word, sizeof (First_word), 1, tmp);
|
---|
| 663 | fwrite (&word_num, sizeof (word_num), 1, tmp);
|
---|
| 664 | fwrite (pointers, sizeof (*pointers), ptrs_in_use, tmp);
|
---|
| 665 | fwrite (buffer, sizeof (u_char), buf_in_use, tmp);
|
---|
| 666 | bzero ((char *) buffer, block_size);
|
---|
| 667 | chunk = buf_in_use + ptrs_in_use * sizeof (*pointers) +
|
---|
| 668 | sizeof (ptrs_in_use) + sizeof (First_word);
|
---|
| 669 | if (force && chunk < block_size)
|
---|
| 670 | {
|
---|
| 671 | fwrite (buffer, sizeof (u_char), block_size - chunk, tmp);
|
---|
| 672 | chunk = block_size;
|
---|
| 673 | }
|
---|
| 674 |
|
---|
| 675 | pos += chunk;
|
---|
| 676 |
|
---|
| 677 | buf_in_use = 0;
|
---|
| 678 | word_num = 0;
|
---|
| 679 | ptrs_in_use = 0;
|
---|
| 680 | sih.num_blocks++;
|
---|
| 681 |
|
---|
| 682 | /* Check that entry will fit into new block */
|
---|
| 683 | if (sizeof (*pointers) + sizeof (ptrs_in_use) + extra + sizeof (First_word) +
|
---|
| 684 | suff + 1 + sizeof (ent->PL->num_entries) +
|
---|
| 685 | ent->PL->num_entries * sizeof (PosEntry) > block_size)
|
---|
| 686 | FatalError (1, "Block size to small");
|
---|
| 687 |
|
---|
[3804] | 688 | } // if size > block_size
|
---|
[439] | 689 |
|
---|
| 690 | if (word_num % sih.lookback == 0)
|
---|
| 691 | {
|
---|
| 692 | HTONUS2(buf_in_use, pointers[ptrs_in_use++]); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 693 | suff += copy;
|
---|
| 694 | copy = 0;
|
---|
| 695 | }
|
---|
| 696 |
|
---|
| 697 | /* Output Word information */
|
---|
| 698 | buffer[buf_in_use++] = copy;
|
---|
| 699 | buffer[buf_in_use++] = suff;
|
---|
| 700 | bcopy ((char *) (ent->word + copy + 1), (char *) (buffer + buf_in_use), suff);
|
---|
| 701 | buf_in_use += suff;
|
---|
| 702 | HTONUI(ent->PL->num_entries); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 703 | bcopy ((char *) &(ent->PL->num_entries), (char *) (buffer + buf_in_use), sizeof (ent->PL->num_entries));
|
---|
| 704 | NTOHUI(ent->PL->num_entries); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 705 | buf_in_use += sizeof (ent->PL->num_entries);
|
---|
| 706 |
|
---|
| 707 | for (j = 0; j < ent->PL->num_entries; j++)
|
---|
| 708 | {
|
---|
| 709 | register PosEntry *pe = &(ent->PL->PE[j]);
|
---|
| 710 | HTONUI(pe->num_cases); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 711 | bcopy ((char *) &(pe->num_cases), (char *) (buffer + buf_in_use), sizeof (pe->num_cases));
|
---|
| 712 | buf_in_use += sizeof (pe->num_cases);
|
---|
| 713 | HTONUI(pe->blk); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 714 | bcopy ((char *) &(pe->blk), (char *) (buffer + buf_in_use), sizeof (pe->blk));
|
---|
| 715 | buf_in_use += sizeof (pe->blk);
|
---|
| 716 | HTONUS(pe->blk_index); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 717 | bcopy ((char *) &(pe->blk_index), (char *) (buffer + buf_in_use), sizeof (pe->blk_index));
|
---|
| 718 | buf_in_use += sizeof (pe->blk_index);
|
---|
| 719 | HTONUS(pe->offset); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 720 | bcopy ((char *) &(pe->offset), (char *) (buffer + buf_in_use), sizeof (pe->offset));
|
---|
| 721 | buf_in_use += sizeof (pe->offset);
|
---|
| 722 | }
|
---|
| 723 |
|
---|
| 724 | if (buf_in_use + ptrs_in_use * sizeof (*pointers) +
|
---|
| 725 | sizeof (ptrs_in_use) > block_size)
|
---|
| 726 | FatalError (1, "Fatal Internal Error # 64209258\n");
|
---|
| 727 |
|
---|
[3804] | 728 | if (word_num == 0) // the first word in each block - word_num set to 0 when buffer written to tmp file
|
---|
[439] | 729 | {
|
---|
| 730 | /* Write word to main index */
|
---|
| 731 | fwrite (ent->word, sizeof (u_char), *(ent->word) + 1, idbi);
|
---|
| 732 | HTONUL(pos); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 733 | fwrite (&pos, sizeof (pos), 1, idbi);
|
---|
| 734 | NTOHUL(pos); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 735 | sih.index_chars += *(ent->word) + 1;
|
---|
| 736 | First_word = i;
|
---|
| 737 | }
|
---|
| 738 | word_num++;
|
---|
[3804] | 739 | } /* end for each word */ // for each hash entry
|
---|
[439] | 740 |
|
---|
| 741 | if (buf_in_use)
|
---|
| 742 | {
|
---|
| 743 | /* Write last buffer to tmp file */
|
---|
| 744 | int chunk;
|
---|
| 745 |
|
---|
| 746 | /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 747 | HTONUL(First_word);
|
---|
| 748 | HTONUS(word_num);
|
---|
| 749 |
|
---|
| 750 | fwrite (&First_word, sizeof (First_word), 1, tmp);
|
---|
| 751 | fwrite (&word_num, sizeof (word_num), 1, tmp);
|
---|
| 752 | fwrite (pointers, sizeof (*pointers), ptrs_in_use, tmp);
|
---|
| 753 | fwrite (buffer, sizeof (u_char), buf_in_use, tmp);
|
---|
| 754 | bzero ((char *) buffer, block_size);
|
---|
| 755 | chunk = buf_in_use + ptrs_in_use * sizeof (*pointers) +
|
---|
| 756 | sizeof (ptrs_in_use) + sizeof (First_word);
|
---|
| 757 | if (force && chunk < block_size)
|
---|
| 758 | {
|
---|
| 759 | fwrite (buffer, sizeof (u_char), block_size - chunk, tmp);
|
---|
| 760 | chunk = block_size;
|
---|
| 761 | }
|
---|
| 762 |
|
---|
| 763 | sih.num_blocks++;
|
---|
| 764 | }
|
---|
| 765 |
|
---|
| 766 | rewind (tmp);
|
---|
| 767 | sih.blocks_start = sih.index_chars + sizeof (u_long) + sizeof (sih) +
|
---|
| 768 | sih.num_blocks * sizeof (pos);
|
---|
| 769 | if (force)
|
---|
| 770 | {
|
---|
| 771 | int amount;
|
---|
| 772 | amount = sih.blocks_start % block_size;
|
---|
| 773 | if (amount != 0)
|
---|
| 774 | {
|
---|
| 775 | bzero ((char *) buffer, block_size);
|
---|
| 776 | fwrite (buffer, sizeof (u_char), block_size - amount, idbi);
|
---|
| 777 | sih.blocks_start += block_size - amount;
|
---|
| 778 | }
|
---|
| 779 | }
|
---|
| 780 |
|
---|
| 781 | while ((num = fread (buffer, sizeof (u_char), block_size, tmp)) != 0)
|
---|
| 782 | fwrite (buffer, sizeof (u_char), num, idbi);
|
---|
| 783 | fclose (tmp);
|
---|
| 784 |
|
---|
| 785 | /* skip over the magic number */
|
---|
| 786 | fseek (idbi, sizeof (u_long), 0);
|
---|
| 787 |
|
---|
| 788 | /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 789 | HTONUL(sih.lookback);
|
---|
| 790 | HTONUL(sih.block_size);
|
---|
| 791 | HTONUL(sih.num_blocks);
|
---|
| 792 | HTONUL(sih.blocks_start);
|
---|
| 793 | HTONUL(sih.index_chars);
|
---|
| 794 | HTONUL(sih.num_of_words);
|
---|
| 795 |
|
---|
| 796 | fwrite (&sih, sizeof (sih), 1, idbi);
|
---|
| 797 | fclose (idbi);
|
---|
| 798 |
|
---|
| 799 | #ifndef SILENT
|
---|
| 800 | Message ("Stem %d:\n", stem_method);
|
---|
| 801 | Message (" Block size : %10d\n", block_size);
|
---|
| 802 | Message (" Num_blocks : %10d\n", NTOHUL(sih.num_blocks)); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 803 | Message (" Max mem used : %10.1f Mb\n", (double) MaxMemInUse / 1024.0 / 1024.0);
|
---|
| 804 | Message (" Num_of_words : %10d\n", NTOHUL(sih.num_of_words)); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 805 | #endif
|
---|
| 806 | }
|
---|
| 807 |
|
---|
| 808 |
|
---|
| 809 | void
|
---|
| 810 | UpdateStemDict (char * filename, int stem_method)
|
---|
| 811 | {
|
---|
| 812 | FILE *idb;
|
---|
| 813 | struct stem_dict_header sdh;
|
---|
| 814 |
|
---|
| 815 | if (!(idb = open_file (filename, INVF_DICT_BLOCKED_SUFFIX, "r+b",
|
---|
| 816 | MAGIC_STEM, MG_CONTINUE)))
|
---|
| 817 | FatalError (1, "Could not update stemmed dict");
|
---|
| 818 |
|
---|
| 819 | fread ((char *) &sdh, sizeof (sdh), 1, idb);
|
---|
| 820 | NTOHUL(sdh.indexed); /* [RPAP - Jan 97: Endian Ordering] */
|
---|
| 821 | sdh.indexed |= 1 << (stem_method - 1);
|
---|
| 822 | HTONUL(sdh.indexed);
|
---|
| 823 | fseek (idb, sizeof (u_long), 0);
|
---|
| 824 | fwrite ((char *) &sdh, sizeof (sdh), 1, idb);
|
---|
| 825 | fclose (idb);
|
---|
| 826 | }
|
---|
| 827 |
|
---|
| 828 |
|
---|
| 829 |
|
---|
| 830 | /* Main */
|
---|
| 831 | int main (int argc, char **argv)
|
---|
| 832 | {
|
---|
| 833 | File *idb; /* File to .invf.dict.blocked */
|
---|
| 834 | char *filename = "";
|
---|
| 835 | stemmed_dict *sd; /* Stemmed dictionary */
|
---|
| 836 | int ch;
|
---|
| 837 | char path[512];
|
---|
| 838 | int stem_method = 0;
|
---|
| 839 |
|
---|
| 840 | msg_prefix = argv[0];
|
---|
| 841 | opterr = 0;
|
---|
| 842 | while ((ch = getopt (argc, argv, "f:d:b:hFs:")) != -1)
|
---|
| 843 | switch (ch)
|
---|
| 844 | {
|
---|
| 845 | case 'f': /* input file */
|
---|
| 846 | filename = optarg;
|
---|
| 847 | break;
|
---|
| 848 | case 'd':
|
---|
| 849 | set_basepath (optarg);
|
---|
| 850 | break;
|
---|
| 851 | case 'b':
|
---|
| 852 | block_size = atoi (optarg);
|
---|
| 853 | break;
|
---|
| 854 | case 'F':
|
---|
| 855 | force = 1;
|
---|
| 856 | break;
|
---|
| 857 | case 's':
|
---|
| 858 | stem_method = atoi (optarg);
|
---|
| 859 | break;
|
---|
| 860 | case 'h':
|
---|
| 861 | case '?':
|
---|
| 862 | fprintf (stderr, "usage: %s [-d directory] "
|
---|
| 863 | "[-b num] [-F] [-h] -s 1|2|3 -f name\n", argv[0]);
|
---|
| 864 | exit (1);
|
---|
| 865 | }
|
---|
| 866 |
|
---|
| 867 | if (stem_method < 1 || stem_method > 3)
|
---|
| 868 | FatalError (1, "Stem method must be 1, 2 or 3");
|
---|
| 869 |
|
---|
| 870 | /* Open required stem dict file */
|
---|
| 871 | sprintf (path, FILE_NAME_FORMAT, get_basepath (), filename, INVF_DICT_BLOCKED_SUFFIX);
|
---|
| 872 | if (!(idb = Fopen (path, "rb", MAGIC_STEM)))
|
---|
| 873 | FatalError (1, "Unable to open \"%s\"", path);
|
---|
| 874 |
|
---|
| 875 | /* Read in idb header and index to blocks */
|
---|
| 876 | if (!(sd = ReadStemDictBlk (idb)))
|
---|
| 877 | FatalError (1, "Could not read stemmed dictionary");
|
---|
| 878 |
|
---|
| 879 | /* Process stemmed dictionary */
|
---|
[2378] | 880 | mg_init_process ();
|
---|
[439] | 881 | process_stem_dict (sd, stem_method);
|
---|
| 882 | save_idx (filename, stem_method);
|
---|
| 883 |
|
---|
| 884 | /* Close stemmed dict */
|
---|
| 885 | Fclose (idb);
|
---|
| 886 |
|
---|
| 887 | /* Update stemmed dict */
|
---|
| 888 | UpdateStemDict (filename, stem_method);
|
---|
| 889 |
|
---|
| 890 | return 0;
|
---|
| 891 | }
|
---|
| 892 |
|
---|