Changeset 1300
- Timestamp:
- 2000-07-24T14:46:11+12:00 (24 years ago)
- Location:
- trunk/gsdl/src/mgpp/text
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/mgpp/text/MGQuery.cpp
r927 r1300 572 572 573 573 574 void BrowseQueryNode::Clear () { 575 UCArrayClear(term); 576 } 577 578 void BrowseQueryNode::Calculate (IndexData &indexData, BrowseQueryResult &result) const { 579 580 unsigned long number=0; 581 FindNearestWordNumber(indexData, term, number); 582 if (number + startPosition > 0 ) { 583 number = number+startPosition; 584 } 585 else { 586 number = 1; 587 } 588 589 GetTermList (indexData, number, numTerms, result.termFreqs); 590 591 } 592 593 594 595 void BrowseQueryNode::Free () { 596 Clear(); 597 } 598 599 600 void BrowseQueryNode::Print (ostream &s, int indent) const { 601 PrintIndentText(s, "BROWSEQUERYNODE\n", indent); 602 PrintIndent (s, indent+2); 603 s << "TERM:"<<term<<"\n"; 604 PrintIndent (s, indent+2); 605 s << "Start position: "<< startPosition<<", Num terms: "<< numTerms<<"\n"; 606 607 608 } 609 610 611 574 612 void MGQuery (IndexData &indexData, 575 613 const QueryInfo &queryInfo, … … 693 731 694 732 695 733 // new function for full text browsing, 734 void MGBrowseQuery (IndexData &indexData, UCArray &level, 735 const BrowseQueryNode &node, 736 BrowseQueryResult &result) { 737 738 indexData.LoadLevel(level); 739 node.Calculate(indexData, result); 740 741 } 742 743 744 745 746 747 748 -
trunk/gsdl/src/mgpp/text/MGQuery.h
r927 r1300 119 119 then 'the' has to be at position between -2 and -1 relative to 'cat'. 120 120 "the cat" could also be searched for by 'cat' with no range limits, then 'the' with range 0 to 1. 121 range values are relative to the gaps between words: 122 x y z X a b c 123 -3 -2 -1 0 1 2 3 124 121 125 */ 122 126 class TermNode { … … 157 161 }; 158 162 163 class BrowseQueryNode :public QueryNode { 164 public: 165 UCArray term; 166 signed long startPosition; 167 unsigned long numTerms; 168 169 void Clear(); 170 BrowseQueryNode () { Clear(); } 171 // ~BrowseQueryNode (); 172 173 void Calculate (IndexData &indexData, BrowseQueryResult &result) const; 174 void Free (); 175 void Print (ostream &s, int indent=0) const; 176 177 178 179 180 }; 159 181 160 182 void MGQuery (IndexData &indexData, … … 163 185 QueryResult &result); 164 186 165 // this function for retri ving results with both section doc nums187 // this function for retrieving results with both section doc nums 166 188 // and Document docnums 167 189 void MGQuery (IndexData &indexData, … … 171 193 172 194 195 // new function for full text browsing, 196 void MGBrowseQuery (IndexData &indexData, UCArray &level, 197 const BrowseQueryNode &node, 198 BrowseQueryResult &result); 199 173 200 #endif 201 202 203 204 205 206 207 208 209 210 -
trunk/gsdl/src/mgpp/text/Terms.cpp
r1124 r1300 143 143 (r1.termFreqs == r2.termFreqs)); 144 144 } 145 146 //------------------------------------------------------- 147 // new BrowseQueryResult stuff 148 void BrowseQueryResult::Clear () { 149 termFreqs.erase (termFreqs.begin(), termFreqs.end()); 150 } 151 152 BrowseQueryResult::BrowseQueryResult () { 153 Clear (); 154 } 155 156 157 158 ostream &operator<< (ostream &s, const BrowseQueryResult &r) { 159 s << "terms: "; 160 unsigned long i; 161 for (i=0; i<r.termFreqs.size(); i++) 162 s << r.termFreqs[i] << ", "; 163 s << "\n\n"; 164 return s; 165 } 166 167 168 bool operator== (const BrowseQueryResult &r1, const BrowseQueryResult &r2) { 169 return ((r1.termFreqs == r2.termFreqs)); 170 171 } 172 173 174 145 175 146 176 //-------------------------------------- … … 668 698 result.ranks.erase (result.ranks.begin(), result.ranks.end()); 669 699 } 700 701 702 703 //-------------------------------------------------------------- 704 // functions to support full text browse 705 706 void FindNearestWordNumber (IndexData &indexData, 707 const UCArray &term, 708 unsigned long &number) { 709 710 // find the word number for this term 711 unsigned long wordElNum = 0; 712 unsigned long numLevels = indexData.bdh.num_levels; 713 word_block_dict_el wordDictEl; 714 wordDictEl.SetNumLevels (numLevels); 715 if (NearestSearchWordBlockDictEl (indexData.dictFile, indexData.biWords, 716 indexData.bdh.entries_per_wblk, 717 indexData.bdh.word_dict_size, 718 numLevels, term, wordDictEl, wordElNum)) 719 number = wordElNum; 720 721 } 722 723 void GetTermList(IndexData &indexData, 724 unsigned long startTerm, 725 unsigned long numTerms, 726 TermFreqArray &terms) { 727 728 word_block_dict_el_array wordBlocks; // = new word_block_dict_el_array(); 729 TermFreqData termdata; 730 731 terms.erase(terms.begin(), terms.end()); 732 733 SearchWordBlockDictElNumRange (indexData.dictFile, indexData.biWords, 734 indexData.bdh.entries_per_wblk, 735 indexData.bdh.word_dict_size, 736 indexData.bdh.num_levels, startTerm, 737 numTerms, wordBlocks); 738 739 word_block_dict_el_array::iterator here = wordBlocks.begin(); 740 word_block_dict_el_array::iterator end = wordBlocks.end(); 741 742 while (here != end) { 743 termdata.Clear(); 744 termdata.term = (*here).el; 745 termdata.termFreq = (*here).freq; 746 terms.push_back(termdata); 747 here++; 748 } 749 750 } 751 752 void GetTermList(IndexData &indexData, 753 unsigned long startTerm, 754 unsigned long numTerms, 755 UCArrayVector &terms) { 756 757 758 759 SearchWordBlockDictElNumRange (indexData.dictFile, indexData.biWords, 760 indexData.bdh.entries_per_wblk, 761 indexData.bdh.word_dict_size, 762 indexData.bdh.num_levels, startTerm, 763 numTerms, terms); 764 765 } 766 767 768 -
trunk/gsdl/src/mgpp/text/Terms.h
r1124 r1300 163 163 QueryResult &result); 164 164 165 165 //----------------------------------------------------------------- 166 166 // new QueryResult class to handle retrieval of doc and level nums. 167 167 // Use this class with extended version of MGQuery … … 180 180 bool operator== (const ExtQueryResult &r1, const ExtQueryResult &r2); 181 181 182 //------------------------------------------------------------ 183 // new functions to handle full text browse 184 185 class BrowseQueryResult { 186 public: 187 TermFreqArray termFreqs; 188 void Clear(); 189 BrowseQueryResult (); 190 191 }; 192 193 194 ostream &operator<< (ostream &s, const BrowseQueryResult &r); 195 bool operator== (const BrowseQueryResult &r1, const BrowseQueryResult &r2); 196 197 void FindNearestWordNumber (IndexData &indexData, 198 const UCArray &term, 199 unsigned long &number); 200 201 void GetTermList(IndexData &indexData, 202 unsigned long startTerm, 203 unsigned long numTerms, 204 TermFreqArray &terms); 205 206 void GetTermList (IndexData &indexData, 207 unsigned long startTerm, 208 unsigned long numTerms, 209 UCArrayVector &terms); 210 182 211 #endif 183 212 213 214 215 216 -
trunk/gsdl/src/mgpp/text/invf.cpp
r856 r1300 513 513 514 514 515 // use the block dictionary functions for tag entries, and word block dict 516 // functions for word entries. 517 515 518 516 519 bool SearchBlockDictElNum (FILE *dictFile, … … 689 692 return false; 690 693 } 694 695 //---------------------------------------------------------------- 696 // functions for full text browse 697 698 bool NearestSearchWordBlockDictEl (FILE *dictFile, 699 const block_idx &bIdx, 700 unsigned long entriesPerBlock, 701 unsigned long dictSize, 702 unsigned long numLevels, 703 const UCArray &el, 704 word_block_dict_el &dictEl, 705 unsigned long &elNum) { 706 707 UCArrayClear (dictEl.el); 708 709 // find the block that contains the element 710 unsigned long blockIdxNum; 711 if (!SearchEl (bIdx, entriesPerBlock, el, 712 blockIdxNum, elNum)) 713 return false; 714 715 unsigned long blockEndElNum = elNum + entriesPerBlock; 716 if (blockEndElNum > dictSize) blockEndElNum = dictSize; 717 718 // look for the block 719 fseek (dictFile, bIdx[blockIdxNum].block_ptr, SEEK_SET); 720 while (elNum < blockEndElNum) { 721 dictEl.Read (dictFile, numLevels); 722 int res = DictCompare (el, dictEl.el); // look for the first word that is 723 // greater or equal to the el 724 if (res <= 0) { 725 return true; // found one 726 } 727 728 elNum++; 729 } 730 // it must be the last term 731 return true; 732 733 734 } 735 736 737 bool SearchWordBlockDictElNumRange (FILE *dictFile, 738 const block_idx &bIdx, 739 unsigned long entriesPerBlock, 740 unsigned long dictSize, 741 unsigned long numLevels, 742 unsigned long elNum, 743 unsigned long numWords, 744 UCArrayVector &terms) { 745 746 word_block_dict_el dictEl; 747 dictEl.SetNumLevels (numLevels); 748 UCArrayClear(dictEl.el); 749 750 terms.erase(terms.begin(), terms.end()); 751 752 if (elNum >= dictSize) return false; 753 754 // find the block that contains the element 755 unsigned long blockIdxNum, curElNum; 756 if (!SearchElNum (bIdx, entriesPerBlock, elNum, 757 blockIdxNum, curElNum)) 758 return false; 759 760 unsigned long lastElNum = elNum + numWords - 1; 761 if (lastElNum > dictSize) lastElNum = dictSize; 762 763 // look for the block 764 fseek (dictFile, bIdx[blockIdxNum].block_ptr, SEEK_SET); 765 766 // get the first term 767 do { 768 dictEl.Read (dictFile, numLevels); 769 } while (curElNum++ < elNum); 770 771 terms.push_back(dictEl.el); 772 while (curElNum <= lastElNum ) { 773 dictEl.Read(dictFile, numLevels); 774 terms.push_back(dictEl.el); 775 curElNum++; 776 } 777 778 779 return true; 780 } 781 782 // NOte: before each addition of dictEl to the array, the level freqs array 783 // is deleted, as this was causing problems - generating a seg fault, I think if 784 // the vector had to be reallocated or something. 785 // setNumLevels has to be called each time before a read, now, to set up the level 786 //freqs array. this is necessary. 787 bool SearchWordBlockDictElNumRange (FILE *dictFile, 788 const block_idx &bIdx, 789 unsigned long entriesPerBlock, 790 unsigned long dictSize, 791 unsigned long numLevels, 792 unsigned long elNum, 793 unsigned long numWords, 794 word_block_dict_el_array &terms) { 795 796 word_block_dict_el dictEl; 797 dictEl.SetNumLevels (numLevels); 798 UCArrayClear(dictEl.el); 799 800 block_dict_el elem; 801 terms.erase(terms.begin(), terms.end()); 802 803 if (elNum >= dictSize) return false; 804 805 // find the block that contains the element 806 unsigned long blockIdxNum, curElNum; 807 if (!SearchElNum (bIdx, entriesPerBlock, elNum, 808 blockIdxNum, curElNum)) 809 return false; 810 811 unsigned long lastElNum = elNum + numWords - 1; 812 if (lastElNum > dictSize) lastElNum = dictSize; 813 814 // look for the block 815 fseek (dictFile, bIdx[blockIdxNum].block_ptr, SEEK_SET); 816 // get the first term 817 do { 818 dictEl.Read (dictFile, numLevels); 819 } while (curElNum++ < elNum); 820 821 dictEl.levelFreqs = NULL; 822 terms.push_back(dictEl); 823 824 while (curElNum <= lastElNum ) { 825 dictEl.SetNumLevels(numLevels); 826 dictEl.Read(dictFile, numLevels); 827 dictEl.levelFreqs = NULL; 828 terms.push_back(dictEl); 829 curElNum++; 830 } 831 832 return true; 833 } 834 835 836 837 838 839 840 841 842 -
trunk/gsdl/src/mgpp/text/invf.h
r1122 r1300 158 158 }; 159 159 160 160 typedef vector<word_block_dict_el> word_block_dict_el_array; 161 161 162 162 struct block_idx_info { … … 302 302 unsigned long &elNum); 303 303 304 304 //---------------------------------------------------------- 305 306 // new functions for full text browse 307 308 bool NearestSearchWordBlockDictEl (FILE *dictFile, 309 const block_idx &bIdx, 310 unsigned long entriesPerBlock, 311 unsigned long dictSize, 312 unsigned long numLevels, 313 const UCArray &el, 314 word_block_dict_el &dictEl, 315 unsigned long &elNum); 316 317 // returns a list of word_block_dict_el, with no levelfreqs 318 bool SearchWordBlockDictElNumRange (FILE *dictFile, 319 const block_idx &bIdx, 320 unsigned long entriesPerBlock, 321 unsigned long dictSize, 322 unsigned long numLevels, 323 unsigned long elNum, 324 unsigned long numWords, 325 word_block_dict_el_array &terms); 326 327 // just returns a list of terms 328 bool SearchWordBlockDictElNumRange (FILE *dictFile, 329 const block_idx &bIdx, 330 unsigned long entriesPerBlock, 331 unsigned long dictSize, 332 unsigned long numLevels, 333 unsigned long elNum, 334 unsigned long numWords, 335 UCArrayVector &terms); 336 305 337 306 338
Note:
See TracChangeset
for help on using the changeset viewer.