source: trunk/gsdl/src/mgpp/text/Terms.cpp@ 1300

Last change on this file since 1300 was 1300, checked in by kjm18, 24 years ago

added full text browsing functionality

  • Property svn:keywords set to Author Date Id Revision
File size: 20.5 KB
Line 
1/**************************************************************************
2 *
3 * Terms.cpp -- Query related functions
4 * Copyright (C) 1999 Rodger McNab
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: Terms.cpp 1300 2000-07-24 02:46:11Z kjm18 $
21 *
22 **************************************************************************/
23
24#include "Terms.h"
25#include "words.h"
26#include "stemmer.h"
27#include "bitio_gen.h"
28#include "bitio_m_stdio.h"
29
30void QueryInfo::Clear () {
31 UCArrayClear (docLevel);
32 maxDocs = 0;
33 sortByRank = true;
34 exactWeights = false;
35 needRankInfo = false;
36 needTermFreqs = false;
37}
38
39
40
41void TermFreqData::Clear () {
42 UCArrayClear (tag);
43 UCArrayClear (term);
44 stemMethod = 0;
45 matchDocs = 0;
46 termFreq = 0;
47}
48
49ostream &operator<< (ostream &s, const TermFreqData &t) {
50 s << "<" << t.tag << ">\"" << t.term << "\"stem("
51 << t.stemMethod << ")docs(" << t.matchDocs << ")"
52 << "count("<<t.termFreq<<")";
53 return s;
54}
55
56bool operator== (const TermFreqData &t1, const TermFreqData &t2) {
57 return ((t1.tag == t2.tag) &&
58 (t1.term == t2.term) &&
59 (t1.stemMethod == t2.stemMethod) &&
60 (t1.matchDocs == t2.matchDocs) &&
61 (t1.termFreq == t2.termFreq));
62}
63
64
65void QueryResult::Clear () {
66 docs.erase (docs.begin(), docs.end());
67 ranks.erase (ranks.begin(), ranks.end());
68 termFreqs.erase (termFreqs.begin(), termFreqs.end());
69}
70
71QueryResult::QueryResult () {
72 Clear ();
73}
74
75
76
77ostream &operator<< (ostream &s, const QueryResult &r) {
78 s << "docs: ";
79 unsigned long i;
80 for (i=0; i<r.docs.size(); i++)
81 s << r.docs[i] << ", ";
82
83 s << "\nranks: ";
84 for (i=0; i<r.ranks.size(); i++)
85 s << r.ranks[i] << ", ";
86
87 s << "\ntermFreqs: ";
88 for (i=0; i<r.termFreqs.size(); i++)
89 s << r.termFreqs[i] << ", ";
90 s << "\n\n";
91
92 return s;
93}
94
95
96bool operator== (const QueryResult &r1, const QueryResult &r2) {
97 return ((r1.docs == r2.docs) &&
98 (r1.ranks == r2.ranks) &&
99 (r1.termFreqs == r2.termFreqs));
100}
101
102//---------------------------------------------------
103// new ExtQueryResult stuff
104void ExtQueryResult::Clear () {
105 docs.erase (docs.begin(), docs.end());
106 levels.erase (levels.begin(), levels.end());
107 ranks.erase (ranks.begin(), ranks.end());
108 termFreqs.erase (termFreqs.begin(), termFreqs.end());
109}
110
111ExtQueryResult::ExtQueryResult () {
112 Clear ();
113}
114
115ostream &operator<< (ostream &s, const ExtQueryResult &r) {
116 s << "docs: ";
117 unsigned long i;
118 for (i=0; i<r.docs.size(); i++)
119 s << r.docs[i] << ", ";
120
121 s << "\nlevels: ";
122 for (i=0; i<r.levels.size(); i++)
123 s << r.levels[i] << ", ";
124
125
126 s << "\nranks: ";
127 for (i=0; i<r.ranks.size(); i++)
128 s << r.ranks[i] << ", ";
129
130 s << "\ntermFreqs: ";
131 for (i=0; i<r.termFreqs.size(); i++)
132 s << r.termFreqs[i] << ", ";
133 s << "\n\n";
134
135 return s;
136}
137
138
139bool operator== (const ExtQueryResult &r1, const ExtQueryResult &r2) {
140 return ((r1.docs == r2.docs) &&
141 (r1.levels == r2.levels) &&
142 (r1.ranks == r2.ranks) &&
143 (r1.termFreqs == r2.termFreqs));
144}
145
146//-------------------------------------------------------
147// new BrowseQueryResult stuff
148void BrowseQueryResult::Clear () {
149 termFreqs.erase (termFreqs.begin(), termFreqs.end());
150}
151
152BrowseQueryResult::BrowseQueryResult () {
153 Clear ();
154}
155
156
157
158ostream &operator<< (ostream &s, const BrowseQueryResult &r) {
159 s << "terms: ";
160 unsigned long i;
161 for (i=0; i<r.termFreqs.size(); i++)
162 s << r.termFreqs[i] << ", ";
163 s << "\n\n";
164 return s;
165}
166
167
168bool operator== (const BrowseQueryResult &r1, const BrowseQueryResult &r2) {
169 return ((r1.termFreqs == r2.termFreqs));
170
171}
172
173
174
175
176//--------------------------------------
177void FragData::Clear () {
178 matchDocs = 0;
179 fragNums.erase (fragNums.begin(), fragNums.end());
180 fragFreqs.erase (fragFreqs.begin(), fragFreqs.end());
181}
182
183
184
185
186void FindWordNumbers (IndexData &indexData,
187 const UCArray &term,
188 unsigned long stemMethod,
189 vector<unsigned long> &equivWords) {
190 equivWords.erase (equivWords.begin(), equivWords.end());
191
192 if (stemMethod == 0) {
193 // don't need to stem the word,
194 // find the word number for this term
195 unsigned long wordElNum = 0;
196 unsigned long numLevels = indexData.bdh.num_levels;
197 word_block_dict_el wordDictEl;
198 wordDictEl.SetNumLevels (numLevels);
199 if (SearchWordBlockDictEl (indexData.dictFile, indexData.biWords,
200 indexData.bdh.entries_per_wblk,
201 indexData.bdh.word_dict_size,
202 numLevels, term, wordDictEl, wordElNum))
203 equivWords.push_back (wordElNum);
204
205 return;
206
207 }
208
209
210 // need to stem this word and find it in the blocked stem index
211
212 unsigned char mgWord[MAXSTEMLEN + 1];
213 UCArray stemTerm;
214 unsigned long stemmerNum = 0;
215
216 if (stemMethod == 1) stemmerNum = indexData.sih1.stemmer_num;
217 else if (stemMethod == 2) stemmerNum = indexData.sih2.stemmer_num;
218 else if (stemMethod == 3) stemmerNum = indexData.sih3.stemmer_num;
219
220
221 // convert the word to an "mg word"
222 mgWord[0] = term.size();
223 bcopy ((char *)term.begin(), (char *)&mgWord[1], term.size());
224
225 // stem the word
226 stemmer (stemMethod, stemmerNum, mgWord);
227
228 // convert the result back to a UCArray
229 stemTerm.insert (stemTerm.end(), &mgWord[1], &mgWord[1] + mgWord[0]);
230
231 // need to look up this term in the appropriate dictionary
232 stem_block_dict_el stemDictEl;
233 unsigned long stemElNum;
234 if (stemMethod == 1) {
235 SearchStemBlockDictEl (indexData.stem1File,
236 indexData.sii1,
237 indexData.sih1.entries_per_block,
238 indexData.sih1.dict_size,
239 stemTerm,
240 stemDictEl,
241 stemElNum);
242
243 } else if (stemMethod == 2) {
244 SearchStemBlockDictEl (indexData.stem2File,
245 indexData.sii2,
246 indexData.sih2.entries_per_block,
247 indexData.sih2.dict_size,
248 stemTerm,
249 stemDictEl,
250 stemElNum);
251
252 } else if (stemMethod == 3) {
253 SearchStemBlockDictEl (indexData.stem3File,
254 indexData.sii3,
255 indexData.sih3.entries_per_block,
256 indexData.sih3.dict_size,
257 stemTerm,
258 stemDictEl,
259 stemElNum);
260 }
261
262 equivWords = stemDictEl.equivWords;
263}
264
265
266
267void ReadTermFragData (IndexData &indexData,
268 bool needFragFreqs,
269 unsigned long termNum,
270 FragData &fragData,
271 FragRangeArray *fragLimits) {
272 fragData.Clear();
273
274 // look up the word in the dictionary
275 unsigned long numLevels = indexData.bdh.num_levels;
276 word_block_dict_el wordDictEl;
277 wordDictEl.SetNumLevels (numLevels);
278 if (!SearchWordBlockDictElNum (indexData.dictFile,
279 indexData.biWords,
280 indexData.bdh.entries_per_wblk,
281 indexData.bdh.word_dict_size,
282 numLevels,
283 termNum, wordDictEl))
284 return; // nothing more to do
285
286 fragData.matchDocs = wordDictEl.levelFreqs[indexData.curLevelNum];
287
288 // seek to the appropriate place in the inverted file
289 fseek (indexData.invfFile, wordDictEl.invf_ptr, SEEK_SET);
290 stdio_bitio_buffer buffer (indexData.invfFile);
291
292 unsigned long B = BIO_Bblock_Init (indexData.bdh.num_frags,
293 wordDictEl.frag_occur);
294 unsigned long fragNum = 0;
295 unsigned long termFreq = 0;
296
297 unsigned long fragLimitI = 0;
298 unsigned long i;
299 for (i=0; i<wordDictEl.frag_occur; i++) {
300 fragNum += buffer.bblock_decode (B, NULL);
301 if (!indexData.ifh.word_level_index) termFreq = buffer.gamma_decode (NULL);
302 else termFreq = 1;
303
304 // get the right fragment range
305 if (fragLimits != NULL) {
306 while (fragLimitI+1 < (*fragLimits).size() &&
307 fragNum > (*fragLimits)[fragLimitI+1].rangeStart) {
308 fragLimitI++;
309 }
310 }
311
312 // add the entry if it is within the limits
313 if ((fragLimits == NULL) ||
314 (fragLimitI < (*fragLimits).size() &&
315 fragNum > (*fragLimits)[fragLimitI].rangeStart &&
316 fragNum <= (*fragLimits)[fragLimitI].rangeEnd)) {
317 fragData.fragNums.push_back (fragNum);
318 if (needFragFreqs)
319 fragData.fragFreqs.push_back (termFreq);
320 }
321 }
322
323 buffer.done();
324}
325
326
327void CombineFragData (bool needFragFreqs,
328 const FragData &f1,
329 const FragData &f2,
330 FragData &outFragData) {
331 outFragData.Clear();
332
333 // the new number of matching documents is the maximum
334 // of the two input matching number of documents -- it
335 // is assumed that these are at the same document level
336 outFragData.matchDocs = (f1.matchDocs > f2.matchDocs) ?
337 f1.matchDocs : f2.matchDocs;
338
339 // do or
340 unsigned long f1I = 0, f1Size = f1.fragNums.size();
341 unsigned long f2I = 0, f2Size = f2.fragNums.size();
342 while (f1I < f1Size || f2I < f2Size) {
343 if (f2I < f2Size &&
344 (f1I >= f1Size ||
345 f1.fragNums[f1I] > f2.fragNums[f2I])) {
346 // output f2I
347 outFragData.fragNums.push_back (f2.fragNums[f2I]);
348 if (needFragFreqs)
349 outFragData.fragFreqs.push_back (f2.fragFreqs[f2I]);
350 f2I++;
351
352 } else if (f1I < f1Size &&
353 (f2I >= f2Size ||
354 f1.fragNums[f1I] < f2.fragNums[f2I])) {
355 // output f1I
356 outFragData.fragNums.push_back (f1.fragNums[f1I]);
357 if (needFragFreqs)
358 outFragData.fragFreqs.push_back (f1.fragFreqs[f1I]);
359 f1I++;
360
361 } else {
362 // must be equal combine f1I and f2I
363 outFragData.fragNums.push_back (f1.fragNums[f1I]);
364 if (needFragFreqs)
365 outFragData.fragFreqs.push_back (f1.fragFreqs[f1I]+f2.fragFreqs[f2I]);
366 f1I++;
367 f2I++;
368 }
369 }
370}
371
372
373void AndCombineFragData (bool needFragFreqs,
374 FragData &fragData,
375 const FragData &comFragData,
376 signed long startRange,
377 signed long endRange,
378 const FragRangeArray *fragLimits) {
379 // sanity check on range
380 if (startRange > endRange) {
381 signed long temp = endRange;
382 endRange = startRange;
383 startRange = temp;
384 }
385
386 // get min matchdocs
387 if (comFragData.matchDocs < fragData.matchDocs)
388 fragData.matchDocs = comFragData.matchDocs;
389
390 unsigned long fragDataI = 0;
391 unsigned long fragDataSize = fragData.fragNums.size();
392 unsigned long comFragDataI = 0;
393 unsigned long comFragDataSize = comFragData.fragNums.size();
394 unsigned long fragLimitI = 0;
395 unsigned long fragLimitSize = (fragLimits==NULL) ? 0 : (*fragLimits).size();
396 unsigned long outI = 0;
397
398 while (fragDataI < fragDataSize &&
399 comFragDataI < comFragDataSize) {
400 signed long fragNum = (signed long)fragData.fragNums[fragDataI];
401 signed long comFragNum = (signed long)comFragData.fragNums[comFragDataI];
402
403 // go to the right fragment limit (for the com frag)
404 if (fragLimits != NULL) {
405 while (fragLimitI+1 < fragLimitSize &&
406 comFragNum > (signed long)(*fragLimits)[fragLimitI+1].rangeStart) {
407 fragLimitI++;
408 }
409 }
410
411 if (fragNum <= comFragNum+startRange ||
412 (fragLimits!=NULL &&
413 fragNum<=(signed long)(*fragLimits)[fragLimitI].rangeStart)) {
414 fragDataI++;
415
416 } else if (fragNum > comFragNum+endRange ||
417 (fragLimits!=NULL &&
418 fragNum>(signed long)(*fragLimits)[fragLimitI].rangeEnd)) {
419 comFragDataI++;
420
421 } else {
422 // equal and within tag
423 fragData.fragNums[outI] = comFragNum;
424 if (needFragFreqs) {
425 fragData.fragFreqs[outI] =
426 (fragData.fragFreqs[fragDataI] < comFragData.fragFreqs[comFragDataI]) ?
427 fragData.fragFreqs[fragDataI] : comFragData.fragFreqs[comFragDataI];
428 }
429 fragDataI++;
430 comFragDataI++;
431 outI++;
432 }
433 }
434
435 // erase unused part of fragData
436 fragData.fragNums.erase (fragData.fragNums.begin()+outI,
437 fragData.fragNums.end());
438 if (needFragFreqs)
439 fragData.fragFreqs.erase (fragData.fragFreqs.begin()+outI,
440 fragData.fragFreqs.end());
441 else
442 fragData.fragFreqs.erase (fragData.fragFreqs.begin(),
443 fragData.fragFreqs.end());
444}
445
446
447void FragsToQueryResult (IndexData &indexData,
448 const QueryInfo &queryInfo,
449 const FragData &termData,
450 const UCArray &tag,
451 const UCArray &term,
452 unsigned long stemMethod,
453 unsigned long termWeight,
454 QueryResult &result) {
455 bool needRanks = (queryInfo.sortByRank || queryInfo.needRankInfo);
456
457 result.Clear();
458
459 // log (N / ft)
460 unsigned long N = indexData.levels.levelInfo[indexData.curLevel].numEntries;
461 float wordLog = log((double)N / (double)termData.matchDocs);
462
463 // Wqt = fqt * log (N / ft)
464 // note: terms are allowed to have a weight of zero so
465 // they can be excluded from the ranking
466 float Wqt = termWeight * wordLog;
467
468 // Wdt = fdt * log (N / ft)
469 float Wdt;
470
471 unsigned long termDataI = 0;
472 unsigned long termDataSize = termData.fragNums.size();
473 unsigned long levelDocNum = 0;
474
475 unsigned long termDocFreq = 0;
476 unsigned long lastLevelDocNum = 0;
477 unsigned long overallwordfreq = 0;
478
479 while (termDataI < termDataSize) {
480 if (indexData.levelConverter.FragToLevel (termData.fragNums[termDataI],
481 levelDocNum)) {
482 if (levelDocNum != lastLevelDocNum) {
483 if (lastLevelDocNum > 0) {
484 // add this doc information
485 if (needRanks) {
486 Wdt = termDocFreq * wordLog;
487 result.ranks.push_back (Wqt * Wdt);
488 }
489 result.docs.push_back (lastLevelDocNum);
490 }
491
492 lastLevelDocNum = levelDocNum;
493 termDocFreq = 0;
494 }
495
496 if (needRanks)
497 termDocFreq += termData.fragFreqs[termDataI];
498 overallwordfreq += termData.fragFreqs[termDataI];
499 }
500
501 termDataI++;
502 }
503
504 if (lastLevelDocNum > 0) {
505 // add the last document information
506 if (needRanks) {
507 Wdt = termDocFreq * wordLog;
508 result.ranks.push_back (Wqt * Wdt);
509 }
510 result.docs.push_back (lastLevelDocNum);
511 }
512
513 // add the term frequency information
514 if (queryInfo.needTermFreqs) {
515 TermFreqData termFreqData;
516 termFreqData.tag = tag;
517 termFreqData.term = term;
518 termFreqData.stemMethod = stemMethod;
519 termFreqData.matchDocs = termData.matchDocs;
520 termFreqData.termFreq = overallwordfreq;
521 result.termFreqs.push_back (termFreqData);
522 }
523}
524
525void AndFragsToQueryResult (IndexData &indexData,
526 const QueryInfo &queryInfo,
527 const FragData &termData,
528 const UCArray &tag,
529 const UCArray &term,
530 unsigned long stemMethod,
531 unsigned long termWeight,
532 QueryResult &result) {
533 bool needRanks = (queryInfo.sortByRank || queryInfo.needRankInfo);
534
535 // log (N / ft)
536 float wordLog =
537 log((double)indexData.levels.levelInfo[indexData.curLevel].numEntries/
538 (double)termData.matchDocs);
539
540 // Wqt = fqt * log (N / ft)
541 // note: terms are allowed to have a weight of zero so
542 // they can be excluded from the ranking
543 float Wqt = termWeight * wordLog;
544
545 // Wdt = fdt * log (N / ft)
546 float Wdt;
547
548 unsigned long termDataI = 0;
549 unsigned long termDataSize = termData.fragNums.size();
550 unsigned long levelDocNum = 0;
551
552 unsigned long termDocFreq = 0;
553 unsigned long lastLevelDocNum = 0;
554 unsigned long overallwordfreq = 0;
555 unsigned long resultI = 0;
556 unsigned long resultSize = result.docs.size();
557 unsigned long resultOutI = 0;
558
559
560 while (termDataI < termDataSize) {
561 if (indexData.levelConverter.FragToLevel (termData.fragNums[termDataI],
562 levelDocNum)) {
563 if (levelDocNum != lastLevelDocNum) {
564 if (lastLevelDocNum > 0) {
565 // add this doc information
566 Wdt = termDocFreq * wordLog;
567
568 // find this document number
569 while (resultI < resultSize &&
570 result.docs[resultI] < lastLevelDocNum)
571 resultI++;
572
573 // store the result
574 if (resultI < resultSize && result.docs[resultI] == lastLevelDocNum) {
575 result.docs[resultOutI] = lastLevelDocNum;
576 if (needRanks)
577 result.ranks[resultOutI] = result.ranks[resultI] + Wqt * Wdt;
578 resultI++;
579 resultOutI++;
580 }
581 }
582
583 lastLevelDocNum = levelDocNum;
584 termDocFreq = 0;
585 }
586
587 if (needRanks)
588 termDocFreq += termData.fragFreqs[termDataI];
589 overallwordfreq += termData.fragFreqs[termDataI];
590 }
591
592 termDataI++;
593 } // while
594
595 if (lastLevelDocNum > 0) {
596 // add the last document information
597 Wdt = termDocFreq * wordLog;
598
599 // find this document number
600 while (resultI < resultSize &&
601 result.docs[resultI] < lastLevelDocNum)
602 resultI++;
603
604 // store the result
605 if (resultI < resultSize && result.docs[resultI] == lastLevelDocNum) {
606 result.docs[resultOutI] = lastLevelDocNum;
607 if (needRanks)
608 result.ranks[resultOutI] = result.ranks[resultI] + Wqt * Wdt;
609 resultI++;
610 resultOutI++;
611 }
612 }
613
614 // remove unneeded entries
615 result.docs.erase (result.docs.begin()+resultOutI, result.docs.end());
616 if (needRanks)
617 result.ranks.erase (result.ranks.begin()+resultOutI, result.ranks.end());
618 else
619 result.ranks.erase (result.ranks.begin(), result.ranks.end());
620
621 // add the term frequency information
622 if (queryInfo.needTermFreqs) {
623 TermFreqData termFreqData;
624 termFreqData.tag = tag;
625 termFreqData.term = term;
626 termFreqData.stemMethod = stemMethod;
627 termFreqData.matchDocs = termData.matchDocs;
628 termFreqData.termFreq = overallwordfreq;
629 result.termFreqs.push_back (termFreqData);
630 }
631}
632
633
634void RemoveUnwantedResults (IndexData &indexData,
635 const QueryInfo &queryInfo,
636 const FragData &termData,
637 QueryResult &result) {
638 bool needRanks = (queryInfo.sortByRank || queryInfo.needRankInfo);
639
640 unsigned long termDataI = 0;
641 unsigned long termDataSize = termData.fragNums.size();
642 unsigned long levelDocNum = 0;
643
644 unsigned long lastLevelDocNum = 0;
645
646 unsigned long resultI = 0;
647 unsigned long resultSize = result.docs.size();
648 unsigned long resultOutI = 0;
649
650 while (termDataI < termDataSize) {
651 if (indexData.levelConverter.FragToLevel (termData.fragNums[termDataI],
652 levelDocNum)) {
653 if (levelDocNum != lastLevelDocNum) {
654 if (lastLevelDocNum > 0) {
655 // find this document number
656 while (resultI < resultSize &&
657 result.docs[resultI] < lastLevelDocNum)
658 resultI++;
659
660 // store the result
661 if (resultI < resultSize && result.docs[resultI] == lastLevelDocNum) {
662 result.docs[resultOutI] = lastLevelDocNum;
663 if (needRanks)
664 result.ranks[resultOutI] = result.ranks[resultI];
665 resultI++;
666 resultOutI++;
667 }
668 }
669
670 lastLevelDocNum = levelDocNum;
671 }
672 }
673
674 termDataI++;
675 }
676
677 if (lastLevelDocNum > 0) {
678 // find this document number
679 while (resultI < resultSize &&
680 result.docs[resultI] < lastLevelDocNum)
681 resultI++;
682
683 // store the result
684 if (resultI < resultSize && result.docs[resultI] == lastLevelDocNum) {
685 result.docs[resultOutI] = lastLevelDocNum;
686 if (needRanks)
687 result.ranks[resultOutI] = result.ranks[resultI];
688 resultI++;
689 resultOutI++;
690 }
691 }
692
693 // remove unneeded entries
694 result.docs.erase (result.docs.begin()+resultOutI, result.docs.end());
695 if (needRanks)
696 result.ranks.erase (result.ranks.begin()+resultOutI, result.ranks.end());
697 else
698 result.ranks.erase (result.ranks.begin(), result.ranks.end());
699}
700
701
702
703//--------------------------------------------------------------
704// functions to support full text browse
705
706void FindNearestWordNumber (IndexData &indexData,
707 const UCArray &term,
708 unsigned long &number) {
709
710 // find the word number for this term
711 unsigned long wordElNum = 0;
712 unsigned long numLevels = indexData.bdh.num_levels;
713 word_block_dict_el wordDictEl;
714 wordDictEl.SetNumLevels (numLevels);
715 if (NearestSearchWordBlockDictEl (indexData.dictFile, indexData.biWords,
716 indexData.bdh.entries_per_wblk,
717 indexData.bdh.word_dict_size,
718 numLevels, term, wordDictEl, wordElNum))
719 number = wordElNum;
720
721}
722
723void GetTermList(IndexData &indexData,
724 unsigned long startTerm,
725 unsigned long numTerms,
726 TermFreqArray &terms) {
727
728 word_block_dict_el_array wordBlocks; // = new word_block_dict_el_array();
729 TermFreqData termdata;
730
731 terms.erase(terms.begin(), terms.end());
732
733 SearchWordBlockDictElNumRange (indexData.dictFile, indexData.biWords,
734 indexData.bdh.entries_per_wblk,
735 indexData.bdh.word_dict_size,
736 indexData.bdh.num_levels, startTerm,
737 numTerms, wordBlocks);
738
739 word_block_dict_el_array::iterator here = wordBlocks.begin();
740 word_block_dict_el_array::iterator end = wordBlocks.end();
741
742 while (here != end) {
743 termdata.Clear();
744 termdata.term = (*here).el;
745 termdata.termFreq = (*here).freq;
746 terms.push_back(termdata);
747 here++;
748 }
749
750}
751
752void GetTermList(IndexData &indexData,
753 unsigned long startTerm,
754 unsigned long numTerms,
755 UCArrayVector &terms) {
756
757
758
759 SearchWordBlockDictElNumRange (indexData.dictFile, indexData.biWords,
760 indexData.bdh.entries_per_wblk,
761 indexData.bdh.word_dict_size,
762 indexData.bdh.num_levels, startTerm,
763 numTerms, terms);
764
765}
766
767
768
Note: See TracBrowser for help on using the repository browser.