Changeset 711 for trunk/gsdl/src/mgpp/text/stem_search.cpp
- Timestamp:
- 1999-10-18T12:43:31+13:00 (25 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/mgpp/text/stem_search.cpp
r655 r711 48 48 /* 49 49 $Log$ 50 Revision 1.2 1999/10/17 23:43:29 cs025 51 Changes to eradicate Xmalloc 52 50 53 Revision 1.1 1999/10/11 02:58:32 cs025 51 54 Base install of MG-PP … … 108 111 return (NULL); 109 112 } 110 /*111 si->stem_idx_file = stem_idx_file;112 si->MemForStemIdx = 0;113 114 Fread (&si->sih, sizeof (si->sih), 1, stem_idx_file);115 // [RPAP - Jan 97: Endian Ordering]116 NTOHUL(si->sih.lookback);117 NTOHUL(si->sih.block_size);118 NTOHUL(si->sih.num_blocks);119 NTOHUL(si->sih.blocks_start);120 NTOHUL(si->sih.index_chars);121 NTOHUL(si->sih.num_of_words);122 123 if (!(buffer = (u_char *) Xmalloc (si->sih.index_chars)))124 {125 Xfree (si);126 mg_errno = MG_NOMEM;127 return (NULL);128 };129 si->MemForStemIdx += si->sih.index_chars;130 131 if (!(si->index = (u_char **) Xmalloc (si->sih.num_blocks * sizeof (*si->index))))132 {133 Xfree (si);134 Xfree (buffer);135 mg_errno = MG_NOMEM;136 return (NULL);137 };138 si->MemForStemIdx += si->sih.num_blocks * sizeof (*si->index);139 140 if (!(si->pos = (long unsigned int *) Xmalloc (si->sih.num_blocks * sizeof (*si->pos))))141 {142 Xfree (si->index);143 Xfree (si);144 Xfree (buffer);145 mg_errno = MG_NOMEM;146 return (NULL);147 };148 si->MemForStemIdx += si->sih.num_blocks * sizeof (*si->pos);149 150 if (!(si->buffer = (u_char *) Xmalloc (si->sih.block_size * sizeof (*si->buffer))))151 {152 Xfree (buffer);153 Xfree (si->index);154 Xfree (si->buffer);155 Xfree (si);156 mg_errno = MG_NOMEM;157 return (NULL);158 };159 si->MemForStemIdx += si->sih.block_size * sizeof (*si->buffer);160 161 si->active = -1;162 163 for (i = 0; i < si->sih.num_blocks; i++)164 {165 register u_char len;166 si->index[i] = buffer;167 len = Getc (stem_idx_file);168 *buffer++ = len;169 Fread (buffer, sizeof (u_char), len, stem_idx_file);170 buffer += len;171 Fread (&si->pos[i], sizeof (*si->pos), 1, stem_idx_file);172 NTOHUL(si->pos[i]); // [RPAP - Jan 97: Endian Ordering]173 }174 */175 113 mg_errno = MG_NOERROR; 176 114 … … 300 238 StemBlock_read(sd, block, &first_word, &last_invf_len, &num_words, 301 239 &index, &num_indexes); 302 /* int i;303 304 Fseek (sd->stem_file, sd->pos[block] + sd->sdh.blocks_start, 0);305 Fread (sd->buffer, sd->sdh.block_size, sizeof (u_char), sd->stem_file);306 sd->active = sd->pos[block];307 308 // [RPAP - Jan 97: Endian Ordering]309 first_word = (unsigned long *) (sd->buffer);310 NTOHUL(*first_word);311 last_invf_len = (unsigned long *) (first_word + 1);312 NTOHUL(*last_invf_len);313 num_words = (unsigned short *) (last_invf_len + 1);314 NTOHUS(*num_words);315 index = num_words + 1;316 num_indexes = ((*num_words - 1) / sd->sdh.lookback) + 1;317 318 for (i = 0; i < num_indexes; i++)319 NTOHUS(index[i]);*/320 240 } 321 241 else … … 334 254 */ 335 255 hi = Word_BaseIndexNo(num_indexes, base, index, Word); 336 /*337 lo = 0;338 hi = num_indexes - 1;339 while (lo <= hi)340 {341 mid = (lo + hi) / 2;342 c = casecompare (Word, base + index[mid] + 1); // [RPAP - Jan 97: Stem Index Change]343 if (c < 0)344 hi = mid - 1;345 else if (c > 0)346 lo = mid + 1;347 else348 {349 hi = mid;350 break;351 }352 }353 if (hi < 0)354 hi = 0;355 */356 256 357 257 res = hi * sd->sdh.lookback; … … 365 265 while (res < *num_words) 366 266 { 367 //unsigned copy, suff;368 267 unsigned long invfp; 369 /* if (res >= *num_words)370 return (-1);*/371 268 StemBlock_ReadWordString(&base, prev); 372 /*373 copy = *base++;374 suff = *base++;375 bcopy ((void *) base, (void *) (prev + copy + 1), suff);376 base += suff;377 *prev = copy + suff;378 */379 269 380 270 c = casecompare (Word, prev); /* [RPAP - Jan 97: Stem Index Change] */ … … 407 297 { 408 298 /* Calculate invf_len is necessary */ 409 // unsigned long next_invfp;410 299 if (!invf_len) 411 300 return (*first_word + res); … … 421 310 /* Skip over most of the next word to get to the invf_ptr */ 422 311 *invf_len = Word_SkipToNextInvfPtr(&base, invfp); 423 /*424 base++;425 suff = *base++;426 base += suff + sizeof (unsigned long) * 2;427 bcopy ((void *) base, (void *) &next_invfp, sizeof (next_invfp));428 NTOHUL(next_invfp); // [RPAP - Jan 97: Endian Ordering]429 *invf_len = next_invfp - invfp;430 */431 312 return (*first_word + res); 432 313 } … … 507 388 StemIdx_block_readNext((void *) si->stem_idx_file, si, idx_block, &idx_first_word, &idx_num_words, 508 389 &idx_index, &idx_num_indexes, stemAbstractSeeker, stemAbstractReader); 509 /*510 Fseek (si->stem_idx_file, si->pos[idx_block] + si->sih.blocks_start, 0);511 Fread (si->buffer, si->sih.block_size, sizeof (u_char), si->stem_idx_file);512 si->active = si->pos[idx_block];513 514 idx_first_word = (unsigned long *) (si->buffer);515 NTOHUL(*idx_first_word); // [RPAP - Jan 97: Endian Ordering]516 idx_num_words = (unsigned short *) (idx_first_word + 1);517 NTOHUS(*idx_num_words); // [RPAP - Jan 97: Endian Ordering]518 idx_index = idx_num_words + 1;519 idx_num_indexes = ((*idx_num_words - 1) / si->sih.lookback) + 1;520 521 // [RPAP - Jan 97: Endian Ordering]522 for (j = 0; j < idx_num_indexes; j++)523 NTOHUS(idx_index[j]);524 */525 390 } 526 391 else … … 542 407 543 408 hi = Word_BaseIndexNo(idx_num_indexes, idx_base, idx_index, sWord); 544 /*545 lo = 0;546 hi = idx_num_indexes - 1;547 while (lo <= hi)548 {549 mid = (lo + hi) / 2;550 c = casecompare (sWord, idx_base + idx_index[mid] + 1);551 if (c < 0)552 hi = mid - 1;553 else if (c > 0)554 lo = mid + 1;555 else556 {557 hi = mid;558 break;559 }560 }561 if (hi < 0)562 hi = 0;563 */564 409 idx_res = hi * si->sih.lookback; 565 410 idx_base += idx_index[hi]; … … 573 418 return (-1); 574 419 StemIdx_ReadWordString(&idx_base, idx_prev); 575 576 /*577 copy = *idx_base++;578 suff = *idx_base++;579 bcopy ((void *) idx_base, (void *) (idx_prev + copy + 1), suff);580 idx_base += suff;581 *idx_prev = copy + suff;582 */583 420 584 421 c = casecompare (sWord, idx_prev); … … 608 445 */ 609 446 StemIdx_ReadPosEntry(&idx_base, &num_cases, (unsigned int *) &block, &blk_index, &offset); 610 /*611 bcopy ((void *) idx_base, (void *) &num_cases, sizeof (num_cases));612 NTOHUI(num_cases); // [RPAP - Jan 97: Endian Ordering]613 idx_base += sizeof (num_cases);614 bcopy ((void *) idx_base, (void *) &block, sizeof (block));615 NTOHUI(block); // [RPAP - Jan 97: Endian Ordering]616 idx_base += sizeof (block);617 bcopy ((void *) idx_base, (void *) &blk_index, sizeof (blk_index));618 NTOHUS(blk_index); // [RPAP - Jan 97: Endian Ordering]619 idx_base += sizeof (blk_index);620 bcopy ((void *) idx_base, (void *) &offset, sizeof (offset));621 NTOHUS(offset); // [RPAP - Jan 97: Endian Ordering]622 idx_base += sizeof (offset);623 */624 447 625 448 … … 636 459 StemBlock_read(sd, block, &first_word, &last_invf_len, &num_words, 637 460 &index, &num_indexes); 638 /*639 Fseek (sd->stem_file, sd->pos[block] + sd->sdh.blocks_start, 0);640 Fread (sd->buffer, sd->sdh.block_size, sizeof (u_char), sd->stem_file);641 sd->active = sd->pos[block];642 643 first_word = (unsigned long *) (sd->buffer);644 NTOHUL(*first_word); // [RPAP - Jan 97: Endian Ordering]645 last_invf_len = (unsigned long *) (first_word + 1);646 NTOHUL(*last_invf_len); // [RPAP - Jan 97: Endian Ordering]647 num_words = (unsigned short *) (last_invf_len + 1);648 NTOHUS(*num_words); // [RPAP - Jan 97: Endian Ordering]649 index = num_words + 1;650 num_indexes = ((*num_words - 1) / sd->sdh.lookback) + 1;651 652 // [RPAP - Jan 97: Endian Ordering]653 for (j = 0; j < num_indexes; j++)654 NTOHUS(index[j]);655 */656 461 } 657 462 else … … 676 481 StemBlock_ReadWordString(&base, prev); 677 482 678 /*679 copy = *base++;680 suff = *base++;681 bcopy ((void *) base, (void *) (prev + copy + 1), suff);682 base += suff;683 *prev = copy + suff;684 */685 483 base += sizeof (unsigned long); /* skip doc_count */ 686 484 base += sizeof (unsigned long); /* skip count */ … … 699 497 return (-1); 700 498 StemBlock_ReadWordString(&base, prev); 701 /*702 copy = *base++;703 suff = *base++;704 bcopy ((void *) base, (void *) (prev + copy + 1), suff);705 base += suff;706 *prev = copy + suff;707 */708 499 709 500 te.Word = copy_string (prev); … … 737 528 else 738 529 { 739 //unsigned long next_invfp;740 530 u_char *oldbase = base; 741 531 742 532 /* Skip over most of the next word to get to the invf_ptr */ 743 533 te.WE.invf_len = Word_SkipToNextInvfPtr(&base, invfp); 744 /*745 base++;746 suff = *base++;747 base += suff + sizeof (unsigned long) * 2;748 bcopy ((void *) base, (void *) &next_invfp, sizeof (next_invfp));749 NTOHUL(next_invfp); // [RPAP - Jan 97: Endian Ordering]750 te.WE.invf_len = next_invfp - invfp;751 */752 534 base = oldbase; 753 535 } … … 764 546 if (res == *num_words - 1 && j + 1 < num_cases) 765 547 { 766 //int ii;767 548 /* Read in next block */ 768 549 block++; 769 550 StemBlock_read(sd, block, &first_word, &last_invf_len, &num_words, 770 551 &index, &num_indexes); 771 /*772 Fseek (sd->stem_file, sd->pos[block] + sd->sdh.blocks_start, 0);773 Fread (sd->buffer, sd->sdh.block_size, sizeof (u_char), sd->stem_file);774 sd->active = sd->pos[block];775 776 first_word = (unsigned long *) (sd->buffer);777 NTOHUL(*first_word); // [RPAP - Jan 97: Endian Ordering]778 last_invf_len = (unsigned long *) (first_word + 1);779 NTOHUL(*last_invf_len); // [RPAP - Jan 97: Endian Ordering]780 num_words = (unsigned short *) (last_invf_len + 1);781 NTOHUS(*num_words); // [RPAP - Jan 97: Endian Ordering]782 index = num_words + 1;783 num_indexes = ((*num_words - 1) / sd->sdh.lookback) + 1;784 785 // [RPAP - Jan 97: Endian Ordering]786 for (ii = 0; ii < num_indexes; ii++)787 NTOHUS(index[ii]);788 */789 552 base = (u_char *) (index + num_indexes); 790 553 base += index[0]; … … 811 574 FreeStemIdx (sd->stem3); 812 575 813 Xfree (sd->index[0]);814 Xfree (sd->index);815 Xfree (sd->buffer);816 Xfree (sd->pos);817 Xfree (sd);576 delete (sd->index[0]); 577 delete (sd->index); 578 delete (sd->buffer); 579 delete (sd->pos); 580 delete sd; 818 581 } 819 582 … … 822 585 FreeStemIdx (stemmed_idx * si) 823 586 { 824 Xfree (si->index[0]);825 Xfree (si->index);826 Xfree (si->buffer);827 Xfree (si->pos);828 Xfree (si);829 } 587 delete si->index[0]; 588 delete si->index; 589 delete si->buffer; 590 delete si->pos; 591 delete si; 592 }
Note:
See TracChangeset
for help on using the changeset viewer.