Changeset 2807
- Timestamp:
- 2001-10-30T15:49:34+13:00 (22 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/phind/generate/suffix.cpp
r2806 r2807 65 65 check *suffixCheck; 66 66 67 // the length of the check array 68 cellcount checkLength; 67 69 68 70 // How many documents are in this collection? … … 91 93 int pointerCompare(const void *, const void *); 92 94 93 95 // some bit manipulation functions for the check arrays, defined below 96 int getSuffixCheck(cellindex suff); 97 void setSuffixCheck(cellindex suff); 98 94 99 // Functions for implementing "phrase memory". These let us "remember" 95 100 // each phrase that we've expanded without using too much memory. … … 137 142 // Initialise the candidates, check array, and various variables. 138 143 sort(candidates.begin(), candidates.end(), isShorter); 139 memset(suffixCheck, 0, sizeof(check)*inputLength); 140 //for (cellcount j = 0; j < inputLength; j++) 141 // suffixCheck[j] = 0; 144 memset(suffixCheck, 0, sizeof(check)*checkLength); 142 145 unsigned minimum_length = candidates.begin()->length; 143 146 … … 153 156 while (temp_phrase.length >= minimum_length && !shorter_found) { 154 157 temp_phrase.ensureSuffixFound(); 155 if (suffixCheck[temp_phrase.firstSuffixIndex] == 0) 158 //if (suffixCheck[temp_phrase.firstSuffixIndex] == 0) 159 if (getSuffixCheck(temp_phrase.firstSuffixIndex)==0) 156 160 temp_phrase.shortenByOneAtPrefix(); 157 161 else … … 166 170 candidate->ensureSuffixFound(); 167 171 for (cellcount k = candidate->firstSuffixIndex; k <= candidate->lastSuffixIndex; ++k) 168 suffixCheck[k] = candidate->length; 172 //suffixCheck[k] = candidate->length; 173 setSuffixCheck(k); 169 174 } 170 175 } … … 249 254 } 250 255 256 int getSuffixCheck(cellindex suff) { 257 cellindex cell = suff >> 3; 258 check remainder = suff & 0x07; // the last 3 bits 259 if (suffixCheck[cell]& (1 << remainder)) { 260 return 1; 261 } 262 return 0; 263 } 264 void setSuffixCheck(cellindex suff) { 265 cellindex cell = suff >> 3; 266 check remainder = suff & 0x07; // the last 3 bits 267 suffixCheck[cell] |= (1 << remainder); 268 269 } 251 270 252 271 // Read the clauses.numbers file into the "symbols" array. … … 786 805 qsort(prefixArray, contentLength, sizeof(symbol *), prefixCompare); 787 806 788 suffixCheck = new check[contentLength]; 807 checkLength = contentLength/8 + 1; 808 suffixCheck = new check[checkLength]; 789 809 if (suffixCheck == NULL) { 790 810 cerr << "Suffix error: not enough memory to hold " << inputLength << " symbols." << endl; 791 811 exit(2); 792 812 } 793 //for (cellcount j = 0; j < contentLength; j++) 794 // suffixCheck[j] = 0; 795 memset(suffixCheck, 0, sizeof(check)*contentLength); 813 memset(suffixCheck, 0, sizeof(check)*checkLength); 796 814 797 815 cout <<"\ngenerating the phrase hierarchy\n\n";
Note:
See TracChangeset
for help on using the changeset viewer.