Changeset 1618 for trunk/gsdl
- Timestamp:
- 2000-10-27T09:21:28+13:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/phind/generate/suffix.cpp
r1562 r1618 91 91 92 92 93 // how much output do we want? 94 int verbosity = 1; 95 93 96 94 97 int main (int argc, char * argv[]) { 95 98 96 99 // Command-line arguments 97 // argv[1] is the collectiondirectory100 // argv[1] is the phindex directory 98 101 // argv[2] is the maximum array symbol length (optional) 99 102 // argv[3] is the mode, where 1 is stopword mode (optional) … … 119 122 assert(phraseMode == STOPWORDS); 120 123 } 121 if (phraseMode == STOPWORDS) { 122 cout << "STOPWORDS mode: no phrase may begin or end with a stopword" << endl; 123 } else { 124 cout << "ALLPHRASE mode: extract every phrase that occurs more than once" << endl; 125 } 126 124 if (verbosity) { 125 if (phraseMode == STOPWORDS) { 126 cout << "STOPWORDS mode: no phrase may begin or end with a stopword" << endl; 127 } else { 128 cout << "ALLPHRASE mode: extract every phrase that occurs more than once" << endl; 129 } 130 } 127 131 128 132 // Read the statistics file … … 135 139 136 140 // Create the suffix & prefix arrays 137 cout << "Create suffix & prefix arrays for " << inputLength << " symbols" << endl; 141 if (verbosity) { 142 cout << "Create suffix & prefix arrays for " << inputLength << " symbols" << endl; 143 } 138 144 suffixArray = new (symbol *)[inputLength]; 139 145 prefixArray = new (symbol *)[inputLength]; … … 149 155 150 156 // Create the document arrays 151 cout << "Create document arrays for " << numberOfDocuments << " documents" << endl; 157 if (verbosity) { 158 cout << "Create document arrays for " << numberOfDocuments << " documents" << endl; 159 } 152 160 if (numberOfDocuments == 0) { 153 161 cerr << "There are no documents in this collection!" << endl; … … 217 225 218 226 // PASS NUMBER 1 219 cout << endl<< "suffix: starting pass " << phrasePass << endl; 227 if (verbosity) { 228 cout << endl<< "suffix: starting pass " << phrasePass << endl; 229 } 220 230 221 231 // We need an input file, for phrases we are about to examine, and an … … 299 309 phraseData << ";"; 300 310 } 301 phraseData << "d" << i; 311 // Output the document number. Note that here we've numbered the 312 // N documents from 0 to N-1, but later they'll be 1-N. Thus we 313 // add 1 to the document id when we output it. 314 phraseData << "d" << (i+1); 315 // Next, output the frequency with which the document occurs, but 316 // only if it is > 1. 302 317 if (documentFrequency[i] > 1) { 303 318 phraseData << "," << documentFrequency[i]; … … 310 325 311 326 // feedback 312 if (phraseCounter % 1000 == 0) { 313 tmpString = p.toString(); 314 cout << "phrase " << phraseCounter << ": " 315 << "cell " << p.firstSuffixIndex << " - " << tmpString << endl; 316 delete [] tmpString; 327 if (verbosity) { 328 if (phraseCounter % 1000 == 0) { 329 tmpString = p.toString(); 330 cout << "phrase " << phraseCounter << ": " 331 << "cell " << p.firstSuffixIndex << " - " << tmpString << endl; 332 delete [] tmpString; 333 } 317 334 } 318 335 } … … 329 346 // Start a new pass 330 347 phrasePass++; 331 cout << endl << "Starting pass " << phrasePass << endl; 348 if (verbosity) { 349 cout << endl << "Starting pass " << phrasePass << endl; 350 } 332 351 333 352 // Open the input file … … 427 446 428 447 // feedback 429 if (phraseCounter % 1000 == 0) { 430 tmpString = p.toString(); 431 cout << "phrase " << phraseCounter << ": "<< "start " << start 432 << ", length " << length << " - " << tmpString << endl; 433 delete [] tmpString; 434 } 448 if (verbosity) { 449 if (phraseCounter % 1000 == 0) { 450 tmpString = p.toString(); 451 cout << "phrase " << phraseCounter << ": "<< "start " << start 452 << ", length " << length << " - " << tmpString << endl; 453 delete [] tmpString; 454 } 455 } 456 435 457 } 436 458 … … 493 515 Phrase c = candidates.back(); 494 516 candidates.pop_back(); 495 496 /*497 if (c.firstSuffixIndex == 56962) {498 cout << endl << "candidate : " << c.toString() << endl;499 c.ensurePrefixFound();500 c.ensureSuffixFound();501 cout << c.firstSuffixIndex << " - " << c.length << " - " << c.toString() << endl;502 cout << "has unique suffix: " << c.hasUniqueSuffixExtension() << endl ;503 cout << "has unique prefix: " << c.hasUniquePrefixExtension() << endl;504 }505 */506 507 517 508 518 // 3.2 If we know there are no unique right extensions … … 668 678 char filename[FILENAME_MAX]; 669 679 sprintf(filename, "%s/clauses.numbers", collection); 670 cout << "Reading numbers from: " << filename << endl; 680 if (verbosity) { 681 cout << "Reading numbers from: " << filename << endl; 682 } 671 683 672 684 // Open the numbers file … … 753 765 754 766 // If the beginning and end of the interval are the same, 755 // then mwe've found the correct document767 // then we've found the correct document 756 768 if (begin == end) { 757 769 if (frequency[begin] == 0) { … … 915 927 916 928 // create the new hashtable 917 cout << "Initialising hashTable: " << hashTableFileName << endl; 929 if (verbosity > 1) { 930 cout << "Initialising hashTable: " << hashTableFileName << endl; 931 } 918 932 hashTableFile.open(hashTableFileName, ios::in | ios::out); 919 933 for (cellcount i = 0; i < bigPrime; i++) { … … 922 936 923 937 // create the list of phrases 924 cout << "Initialising list of hashtable entries: " << listOfEntriesName << endl; 938 if (verbosity > 1) { 939 cout << "Initialising list of hashtable entries: " << listOfEntriesName << endl; 940 } 925 941 listOfEntries.open(listOfEntriesName, ios::in | ios::out); 926 942 listOfEntries.write((char *) &example, sizeof(example));
Note:
See TracChangeset
for help on using the changeset viewer.