Changeset 1873
- Timestamp:
- 2001-01-30T12:08:49+13:00 (23 years ago)
- Location:
- trunk/gsdl/src/phind/generate
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/phind/generate/phrase.cpp
r1631 r1873 180 180 // Does the phrase have a unique suffix/prefix extension? 181 181 // 182 // If uniqueSuffixExtensions is 1, then we do. 183 // If it is > 1 then we do not. 184 // If it is -1 then we have to check in the suffix array. 182 // For suffix: 183 // If uniqueSuffixExtensions is 1, then phrase has some unique expansion. 184 // If it is 0 then it does not. If it is -1 then we don't know, and have 185 // to calculate it from the suffix array. Same goes for prefixes. 185 186 186 187 int Phrase::hasUniqueSuffixExtension() { … … 189 190 if (uniqueSuffixExtension == -1) { 190 191 191 if (!suffixFound) { 192 findFirstAndLastSuffix(); 193 } 194 195 /* if (firstSuffixIndex == 56957) { 196 cout << "Testing suffix expansion: " << toString() << "\n"; 197 cout << "mode: " << phraseMode << endl; 198 } 199 */ 192 ensureSuffixFound(); 200 193 201 194 // pointers to the phrase's first and last occurances in te suffixArray … … 235 228 ensurePrefixFound(); 236 229 237 // pointers to the phrase's first and last occurances in t e prefixArray230 // pointers to the phrase's first and last occurances in the prefixArray 238 231 symbol *fst = firstPrefix - length; 239 232 symbol *lst = lastPrefix - length; … … 249 242 // in STOPWORDS mode, make sure there is a unique next content symbol 250 243 else { 251 // if (firstSuffixIndex == 56962) cout << "Testing 1: s" << *fst << " = s" << *lst << endl;252 244 uniquePrefixExtension = 0; 253 245 while ((*fst == *lst) && (*fst > LASTDELIMITER)) { … … 266 258 267 259 268 // Expand a phrase with a unique suffix/prefix extensionby 1 symbol260 // Expand a phrase with a unique suffix/prefix by 1 symbol 269 261 // 270 262 // Note that in STOPWORDS mode a "unique extension" means a unique … … 274 266 275 267 int Phrase::expandUniqueSuffixExtensionByOne() { 276 assert(forward);277 268 assert(suffixFound); 278 269 assert(uniqueSuffixExtension == 1); … … 299 290 300 291 int Phrase::expandUniquePrefixExtensionByOne() { 301 assert(back);302 292 assert(prefixFound); 303 293 assert(uniquePrefixExtension == 1); … … 797 787 // Compare the length of two phrases 798 788 // 799 // Given two phrases, return true if the first is shorter ,800 // otherwise return false. For use with the STL sort function.789 // Given two phrases, return true if the first is shorter/longer, 790 // otherwise return false. For use in various sort functions. 801 791 802 792 bool isShorter(Phrase p1, Phrase p2) { … … 807 797 } 808 798 809 799 bool isLonger(Phrase p1, Phrase p2) { 800 if (p1.length > p2.length) { 801 return true; 802 } 803 return false; 804 } 805 806 -
trunk/gsdl/src/phind/generate/phrase.h
r1631 r1873 126 126 127 127 bool isShorter(Phrase p1, Phrase p2); 128 bool isLonger(Phrase p1, Phrase p2); 128 129 129 130 -
trunk/gsdl/src/phind/generate/suffix.cpp
r1683 r1873 1 1 /********************************************************************** 2 2 * 3 * suffix. h-- Extract the repeated phrases in the input using4 * suffix and prefix arrays.3 * suffix.cpp -- Extract the repeated phrases in the input using 4 * suffix and prefix arrays. 5 5 * 6 * Copyright 2000 Gordon W. Paynter ([email protected])6 * Copyright 2000 Gordon W. Paynter 7 7 * Copyright 2000 The New Zealand Digital Library Project 8 8 * … … 507 507 508 508 // 2.2 Sort the candidates by phrase length 509 // sort(candidates.begin(), candidates.end(), isShorter); 510 make_heap(candidates.begin(), candidates.end(), isShorter); 511 509 make_heap(candidates.begin(), candidates.end(), isLonger); 512 510 513 511 // 3. While candidates is non-empty, confirm the phrases it … … 516 514 517 515 // 3.1 Get next candidate 518 //Phrase c = candidates.front(); 519 //candidates.erase(candidates.begin()); 520 pop_heap(candidates.begin(), candidates.end(), isShorter); 516 pop_heap(candidates.begin(), candidates.end(), isLonger); 521 517 Phrase c = candidates.back(); 522 518 candidates.pop_back(); … … 538 534 // cout << "expanding prefix " << c.toString() << "=> "; 539 535 c.expandUniquePrefixExtensionByOne(); 540 //candidates.push_back(c);541 //sort(candidates.begin(), candidates.end(), isShorter);542 536 candidates.push_back(c); 543 push_heap(candidates.begin(), candidates.end(), is Shorter);537 push_heap(candidates.begin(), candidates.end(), isLonger); 544 538 } 545 539 … … 572 566 else if (c.hasUniqueSuffixExtension()) { 573 567 c.expandUniqueSuffixExtensionByOne(); 574 //candidates.push_back(c);575 //sort(candidates.begin(), candidates.end(), isShorter);576 568 candidates.push_back(c); 577 push_heap(candidates.begin(), candidates.end(), is Shorter);569 push_heap(candidates.begin(), candidates.end(), isLonger); 578 570 } 579 571
Note:
See TracChangeset
for help on using the changeset viewer.