Changeset 1618 for trunk/gsdl


Ignore:
Timestamp:
2000-10-27T09:21:28+13:00 (24 years ago)
Author:
paynter
Message:

Better verbosity control. Fixed bug in document numbers (suffix numbers
from 0, mgpp numbers from 1).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/phind/generate/suffix.cpp

    r1562 r1618  
    9191
    9292
     93// how much output do we want?
     94int verbosity = 1;
     95
    9396
    9497int main (int argc, char * argv[]) {
    9598
    9699  // Command-line arguments
    97   // argv[1] is the collection directory
     100  // argv[1] is the phindex directory
    98101  // argv[2] is the maximum array symbol length (optional)
    99102  // argv[3] is the mode, where 1 is stopword mode (optional)
     
    119122    assert(phraseMode == STOPWORDS);
    120123  }
    121   if (phraseMode == STOPWORDS) {
    122     cout << "STOPWORDS mode: no phrase may begin or end with a stopword" << endl;
    123   } else {
    124     cout << "ALLPHRASE mode: extract every phrase that occurs more than once" << endl;
    125   }
    126  
     124  if (verbosity) {
     125    if (phraseMode == STOPWORDS) {
     126      cout << "STOPWORDS mode: no phrase may begin or end with a stopword" << endl;
     127    } else {
     128      cout << "ALLPHRASE mode: extract every phrase that occurs more than once" << endl;
     129    }
     130  }
    127131
    128132  // Read the statistics file
     
    135139
    136140  // Create the suffix & prefix arrays
    137   cout << "Create suffix & prefix arrays for " << inputLength << " symbols" << endl;
     141  if (verbosity) {
     142    cout << "Create suffix & prefix arrays for " << inputLength << " symbols" << endl;
     143  }
    138144  suffixArray = new (symbol *)[inputLength];
    139145  prefixArray = new (symbol *)[inputLength];
     
    149155
    150156  // Create the document arrays
    151   cout << "Create document arrays for " << numberOfDocuments << " documents" << endl;
     157  if (verbosity) {
     158    cout << "Create document arrays for " << numberOfDocuments << " documents" << endl;
     159  }
    152160  if (numberOfDocuments == 0) {
    153161    cerr << "There are no documents in this collection!" << endl;
     
    217225
    218226  // PASS NUMBER 1
    219   cout << endl<< "suffix: starting pass " << phrasePass << endl;
     227  if (verbosity) {
     228    cout << endl<< "suffix: starting pass " << phrasePass << endl;
     229  }
    220230
    221231  // We need an input file, for phrases we are about to examine, and an
     
    299309          phraseData << ";";
    300310        }
    301         phraseData << "d" << i;
     311        // Output the document number.  Note that here we've numbered the
     312        // N documents from 0 to N-1, but later they'll be 1-N.  Thus we
     313        // add 1 to the document id when we output it.
     314        phraseData << "d" << (i+1);
     315        // Next, output the frequency with which the document occurs, but
     316        // only if it is > 1.
    302317        if (documentFrequency[i] > 1) {
    303318          phraseData << "," << documentFrequency[i];
     
    310325
    311326    // feedback
    312     if (phraseCounter % 1000 == 0) {
    313       tmpString = p.toString();
    314       cout << "phrase " << phraseCounter << ": "
    315            << "cell " << p.firstSuffixIndex << " - " << tmpString << endl;
    316       delete [] tmpString;
     327    if (verbosity) {
     328      if (phraseCounter % 1000 == 0) {
     329        tmpString = p.toString();
     330        cout << "phrase " << phraseCounter << ": "
     331         << "cell " << p.firstSuffixIndex << " - " << tmpString << endl;
     332        delete [] tmpString;
     333      }
    317334    }
    318335      }
     
    329346    // Start a new pass
    330347    phrasePass++;
    331     cout << endl << "Starting pass " << phrasePass << endl;
     348    if (verbosity) {
     349      cout << endl << "Starting pass " << phrasePass << endl;
     350    }
    332351
    333352    // Open the input file
     
    427446
    428447      // feedback
    429       if (phraseCounter % 1000 == 0) {
    430     tmpString = p.toString();
    431     cout << "phrase " << phraseCounter << ": "<< "start " << start
    432          << ", length " << length << " - " << tmpString << endl;
    433     delete [] tmpString;
    434       }
     448      if (verbosity) {
     449    if (phraseCounter % 1000 == 0) {
     450      tmpString = p.toString();
     451      cout << "phrase " << phraseCounter << ": "<< "start " << start
     452           << ", length " << length << " - " << tmpString << endl;
     453      delete [] tmpString;
     454    }
     455      }
     456
    435457    }
    436458
     
    493515    Phrase c = candidates.back();
    494516    candidates.pop_back();
    495 
    496     /*
    497     if (c.firstSuffixIndex ==  56962) {
    498       cout << endl << "candidate : " << c.toString() << endl;
    499       c.ensurePrefixFound();
    500       c.ensureSuffixFound();
    501       cout << c.firstSuffixIndex << " - " << c.length << " - " << c.toString() << endl;
    502       cout << "has unique suffix: " << c.hasUniqueSuffixExtension() << endl ;
    503       cout << "has unique prefix: " << c.hasUniquePrefixExtension() << endl;
    504     }
    505     */
    506 
    507517
    508518    // 3.2 If we know there are no unique right extensions
     
    668678  char filename[FILENAME_MAX];
    669679  sprintf(filename, "%s/clauses.numbers", collection);
    670   cout << "Reading numbers from: " << filename << endl;
     680  if (verbosity) {
     681    cout << "Reading numbers from: " << filename << endl;
     682  }
    671683
    672684  // Open the numbers file
     
    753765
    754766      // If the beginning and end of the interval are the same,
    755       // thenm we've found the correct document
     767      // then we've found the correct document
    756768      if (begin == end) {
    757769    if (frequency[begin] == 0) {
     
    915927
    916928  // create the new hashtable
    917   cout << "Initialising hashTable: " << hashTableFileName << endl;
     929  if (verbosity > 1) {
     930    cout << "Initialising hashTable: " << hashTableFileName << endl;
     931  }
    918932  hashTableFile.open(hashTableFileName, ios::in | ios::out);
    919933  for (cellcount i = 0; i < bigPrime; i++) {
     
    922936
    923937  // create the list of phrases
    924   cout << "Initialising list of hashtable entries: " << listOfEntriesName << endl;
     938  if (verbosity > 1) {
     939    cout << "Initialising list of hashtable entries: " << listOfEntriesName << endl;
     940  }
    925941  listOfEntries.open(listOfEntriesName, ios::in | ios::out);
    926942  listOfEntries.write((char *) &example, sizeof(example));
Note: See TracChangeset for help on using the changeset viewer.