Changeset 9604
- Timestamp:
- 2005-04-08T11:18:58+12:00 (19 years ago)
- Location:
- trunk/gsdl/src/phind/generate
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/phind/generate/phrase.cpp
r8362 r9604 185 185 186 186 stream << "s" << *s++; 187 for (cellcount i = 1; i < phrase.length; i++)187 for (cellcount i = 1; i < phrase.length; ++i) 188 188 stream << " s" << *s++; 189 189 … … 203 203 sprintf(str, "s%d", *s++); 204 204 205 for (cellcount i = 1; i < length; i++) {205 for (cellcount i = 1; i < length; ++i) { 206 206 sprintf(str, "%s s%d", str, *s++); 207 207 } … … 224 224 symbol *p = forward; 225 225 226 for (cellcount i = 0; i < length; i++) {226 for (cellcount i = 0; i < length; ++i) { 227 227 if (*p > *words) { 228 228 return 1; … … 230 230 return -1; 231 231 } else { 232 *p++;233 *words++;232 ++*p; 233 ++*words; 234 234 } 235 235 } … … 244 244 symbol *p = back; 245 245 246 for (cellcount i = 0; i < length; i++) {246 for (cellcount i = 0; i < length; ++i) { 247 247 if (*p > *words) { 248 248 return -1; … … 250 250 return 1; 251 251 } else { 252 *p--;253 *words--;252 --*p; 253 --*words; 254 254 } 255 255 } … … 294 294 break; 295 295 } 296 fst++;297 lst++;296 ++fst; 297 ++lst; 298 298 } 299 299 } … … 330 330 break; 331 331 } 332 fst--;333 lst--;332 --fst; 333 --lst; 334 334 } 335 335 } … … 361 361 362 362 while (*next < firstContentSymbol) { 363 next++;364 newlength++;363 ++next; 364 ++newlength; 365 365 } 366 366 increaseSuffixLength(newlength); … … 384 384 cellcount newlength = length + 1; 385 385 while (*next < firstContentSymbol) { 386 next--;387 newlength++;386 --next; 387 --newlength; 388 388 } 389 389 increasePrefixLength(newlength); … … 766 766 // Calculate the length over which the phrases match 767 767 while((*fst == *lst) && (*fst > LASTDELIMITER)) { 768 len++;768 ++len; 769 769 if (*fst > lastStopSymbol) content_len = len; 770 fst++;771 lst++;770 ++fst; 771 ++lst; 772 772 } 773 773 … … 809 809 // Calculate the length over which the phrases match 810 810 while((*fst == *lst) && (*fst > LASTDELIMITER)) { 811 len++;811 ++len; 812 812 if (*fst > lastStopSymbol) content_len = len; 813 fst--;814 lst--;813 --fst; 814 --lst; 815 815 } 816 816 -
trunk/gsdl/src/phind/generate/suffix.cpp
r8362 r9604 142 142 // Try to add each candidate to the results set, ignoring the non-minimal 143 143 for (vector<Phrase>::iterator candidate = candidates.begin(); 144 candidate != candidates.end(); candidate++) {144 candidate != candidates.end(); ++candidate) { 145 145 146 146 // Make a copy of candidate to mutilate while performing sub-phrase checks … … 187 187 // If the two elements are the same, examine the next one 188 188 while (*pa == *pb) { 189 *pa++;190 *pb++;189 ++*pa; 190 ++*pb; 191 191 } 192 192 … … 217 217 // If the two elements are the same, examine the next one 218 218 while (*pa == *pb) { 219 *pa--;220 *pb--;219 --*pa; 220 --*pb; 221 221 } 222 222 … … 275 275 symbol word; 276 276 while (inFile1 >> word) { 277 inputLength++;277 ++inputLength; 278 278 } 279 279 inFile1.close(); … … 305 305 symbols[next++] = word; 306 306 if (word == DOCUMENTSTART) { 307 numberOfDocuments++;307 ++numberOfDocuments; 308 308 } 309 309 } … … 332 332 333 333 // Initialise the document frequency array 334 // for (cellindex i = 0; i < numberOfDocuments; i++) {334 // for (cellindex i = 0; i < numberOfDocuments; ++i) { 335 335 // frequency[i] = 0; 336 336 //} … … 345 345 346 346 // search for the document in which each occurence of the phrase is found 347 for (cellcount j = p.firstSuffixIndex; j <= p.lastSuffixIndex; j++) {347 for (cellcount j = p.firstSuffixIndex; j <= p.lastSuffixIndex; ++j) { 348 348 349 349 // cout << "looking for phrase at suffixArray[" << j << "]\n"; … … 368 368 if (begin == end) { 369 369 if (frequency[begin] == 0) { 370 df++;370 ++df; 371 371 } 372 frequency[begin]++;372 ++frequency[begin]; 373 373 found = true; 374 374 } … … 384 384 if ((d == numberOfDocuments - 1) || (target < documentArray[d+1])) { 385 385 if (frequency[d] == 0) { 386 df++;386 ++df; 387 387 } 388 frequency[d]++;388 ++frequency[d]; 389 389 found = true; 390 390 } else { … … 455 455 456 456 // to begin with, everything is empty 457 // for (cellcount i = 0; i < inputLength; i++) {457 // for (cellcount i = 0; i < inputLength; ++i) { 458 458 // phraseMemory[i] = 0; 459 459 //} … … 475 475 // create a char with just the bit corresponding to length set 476 476 unsigned char newbit = 1; 477 for (cellcount i = 1; i < length; i++) {477 for (cellcount i = 1; i < length; ++i) { 478 478 newbit <<= 1; 479 479 } … … 493 493 // create a char with just the bit corresponding to length set 494 494 unsigned char newbit = 1; 495 for (cellcount i = 1; i < length; i++) {495 for (cellcount i = 1; i < length; ++i) { 496 496 newbit <<= 1; 497 497 } … … 532 532 } 533 533 hashTableFile.open(hashTableFileName, ios::in | ios::out); 534 for (cellcount i = 0; i < bigPrime; i++) {534 for (cellcount i = 0; i < bigPrime; ++i) { 535 535 hashTableFile.write((char *) &example, sizeof(example)); 536 536 } … … 707 707 708 708 cellcount content=0; 709 for (cellcount i=0; i<inputLength; i++) {710 if (symbols[i]>=firstContent) content++;709 for (cellcount i=0; i<inputLength; ++i) { 710 if (symbols[i]>=firstContent) ++content; 711 711 } 712 712 … … 781 781 782 782 // Initialise prefix and suffix arrays, only use the needed suffixes 783 for (cellcount j = 0, here = 0; j < inputLength; j++) {783 for (cellcount j = 0, here = 0; j < inputLength; ++j) { 784 784 if (symbols[j]>=firstContent) { 785 785 suffixArray[here] = &symbols[j]; 786 786 prefixArray[here] = &symbols[j]; 787 here++;787 ++here; 788 788 } 789 789 } … … 812 812 // just scan through the input text to find the doc starts 813 813 cellindex d = 0; 814 for (cellindex i=0; i<inputLength; i++) {814 for (cellindex i=0; i<inputLength; ++i) { 815 815 if (symbols[i] == DOCUMENTSTART) { 816 816 documentArray[d] = &symbols[i]; 817 d++;817 ++d; 818 818 } 819 819 } … … 911 911 912 912 // write the results 913 for (cellcount k = 0; k < result.size(); k++) {913 for (cellcount k = 0; k < result.size(); ++k) { 914 914 if (k) { 915 915 phraseData << ","; … … 917 917 phraseData << result[k].firstSuffixIndex << "-" << result[k].length; 918 918 outPhrase << result[k].firstSuffixIndex << " " << result[k].length << endl; 919 outPhraseCounter++;919 ++outPhraseCounter; 920 920 } 921 921 result.clear(); … … 926 926 927 927 // write the documents 928 for (cellcount m = 0, first = 1; m < numberOfDocuments; m++) {928 for (cellcount m = 0, first = 1; m < numberOfDocuments; ++m) { 929 929 if (documentFrequency[m]) { 930 930 if (first) { … … 946 946 947 947 phraseData << endl; 948 phraseCounter++;948 ++phraseCounter; 949 949 950 950 // feedback … … 967 967 968 968 // Start a new pass 969 phrasePass++;969 ++phrasePass; 970 970 if (verbosity) { 971 971 cout << "Starting pass " << phrasePass << endl; … … 1020 1020 phraseData << result.size() << ":"; 1021 1021 1022 for (cellcount i = 0; i < result.size(); i++) {1022 for (cellcount i = 0; i < result.size(); ++i) { 1023 1023 if (i) { 1024 1024 phraseData << ","; … … 1026 1026 phraseData << result[i].firstSuffixIndex << "-" << result[i].length; 1027 1027 outPhrase << result[i].firstSuffixIndex << " " << result[i].length << endl; 1028 outPhraseCounter++;1028 ++outPhraseCounter; 1029 1029 } 1030 1030 result.clear(); … … 1040 1040 1041 1041 // write the documents 1042 for (cellcount i = 0, first = 1; i < numberOfDocuments; i++) {1042 for (cellcount i = 0, first = 1; i < numberOfDocuments; ++i) { 1043 1043 if (documentFrequency[i]) { 1044 1044 if (first) { … … 1060 1060 1061 1061 phraseData << endl; 1062 phraseCounter++;1062 ++phraseCounter; 1063 1063 1064 1064 // feedback
Note:
See TracChangeset
for help on using the changeset viewer.