Context Navigation

← Previous Change
Next Change →

Changeset 1618 for trunk/gsdl

Timestamp:

2000-10-27T09:21:28+13:00 (24 years ago)

Author:

paynter

Message:

Better verbosity control. Fixed bug in document numbers (suffix numbers
from 0, mgpp numbers from 1).

File:

: 1 edited

trunk/gsdl/src/phind/generate/suffix.cpp (modified) (14 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/gsdl/src/phind/generate/suffix.cpp

-              r1562
+              r1618
+// how much output do we want?
+int verbosity = 1;
 int main (int argc, char * argv[]) {
   // Command-line arguments
   // argv[1] is the collection directory
+  // argv[1] is the phindex directory
   // argv[2] is the maximum array symbol length (optional)
   // argv[3] is the mode, where 1 is stopword mode (optional)
 …
     assert(phraseMode == STOPWORDS);
+  }
+  if (phraseMode == STOPWORDS) {
+    cout << "STOPWORDS mode: no phrase may begin or end with a stopword" << endl;
+  } else {
+    cout << "ALLPHRASE mode: extract every phrase that occurs more than once" << endl;
+  }
+  if (verbosity) {
+    if (phraseMode == STOPWORDS) {
+      cout << "STOPWORDS mode: no phrase may begin or end with a stopword" << endl;
+    } else {
+      cout << "ALLPHRASE mode: extract every phrase that occurs more than once" << endl;
+    }
+  }
   // Read the statistics file
 …
   // Create the suffix & prefix arrays
+  cout << "Create suffix & prefix arrays for " << inputLength << " symbols" << endl;
+  if (verbosity) {
+    cout << "Create suffix & prefix arrays for " << inputLength << " symbols" << endl;
+  }
   suffixArray = new (symbol *)[inputLength];
   prefixArray = new (symbol *)[inputLength];
 …
   // Create the document arrays
+  cout << "Create document arrays for " << numberOfDocuments << " documents" << endl;
+  if (verbosity) {
+    cout << "Create document arrays for " << numberOfDocuments << " documents" << endl;
+  }
   if (numberOfDocuments == 0) {
     cerr << "There are no documents in this collection!" << endl;
 …
   // PASS NUMBER 1
+  cout << endl<< "suffix: starting pass " << phrasePass << endl;
+  if (verbosity) {
+    cout << endl<< "suffix: starting pass " << phrasePass << endl;
+  }
   // We need an input file, for phrases we are about to examine, and an
 …
           phraseData << ";";
+        }
+        phraseData << "d" << i;
+        // Output the document number.  Note that here we've numbered the
+        // N documents from 0 to N-1, but later they'll be 1-N.  Thus we
+        // add 1 to the document id when we output it.
+        phraseData << "d" << (i+1);
+        // Next, output the frequency with which the document occurs, but
+        // only if it is > 1.
         if (documentFrequency[i] > 1) {
           phraseData << "," << documentFrequency[i];
 …
     // feedback
+    if (phraseCounter % 1000 == 0) {
+      tmpString = p.toString();
+      cout << "phrase " << phraseCounter << ": "
+           << "cell " << p.firstSuffixIndex << " - " << tmpString << endl;
+      delete [] tmpString;
+    if (verbosity) {
+      if (phraseCounter % 1000 == 0) {
+        tmpString = p.toString();
+        cout << "phrase " << phraseCounter << ": "
+         << "cell " << p.firstSuffixIndex << " - " << tmpString << endl;
+        delete [] tmpString;
+      }
+    }
+      }
 …
     // Start a new pass
     phrasePass++;
+    cout << endl << "Starting pass " << phrasePass << endl;
+    if (verbosity) {
+      cout << endl << "Starting pass " << phrasePass << endl;
+    }
     // Open the input file
 …
       // feedback
+      if (phraseCounter % 1000 == 0) {
+    tmpString = p.toString();
+    cout << "phrase " << phraseCounter << ": "<< "start " << start
+         << ", length " << length << " - " << tmpString << endl;
+    delete [] tmpString;
+      }
+      if (verbosity) {
+    if (phraseCounter % 1000 == 0) {
+      tmpString = p.toString();
+      cout << "phrase " << phraseCounter << ": "<< "start " << start
+           << ", length " << length << " - " << tmpString << endl;
+      delete [] tmpString;
+    }
+      }
+    }
 …
     Phrase c = candidates.back();
     candidates.pop_back();
-    /*
-    if (c.firstSuffixIndex ==  56962) {
-      cout << endl << "candidate : " << c.toString() << endl;
-      c.ensurePrefixFound();
-      c.ensureSuffixFound();
-      cout << c.firstSuffixIndex << " - " << c.length << " - " << c.toString() << endl;
-      cout << "has unique suffix: " << c.hasUniqueSuffixExtension() << endl ;
-      cout << "has unique prefix: " << c.hasUniquePrefixExtension() << endl;
+    }
-    */
     // 3.2 If we know there are no unique right extensions
 …
   char filename[FILENAME_MAX];
   sprintf(filename, "%s/clauses.numbers", collection);
+  cout << "Reading numbers from: " << filename << endl;
+  if (verbosity) {
+    cout << "Reading numbers from: " << filename << endl;
+  }
   // Open the numbers file
 …
       // If the beginning and end of the interval are the same,
       // thenm we've found the correct document
+      // then we've found the correct document
       if (begin == end) {
     if (frequency[begin] == 0) {
 …
   // create the new hashtable
+  cout << "Initialising hashTable: " << hashTableFileName << endl;
+  if (verbosity > 1) {
+    cout << "Initialising hashTable: " << hashTableFileName << endl;
+  }
   hashTableFile.open(hashTableFileName, ios::in | ios::out);
   for (cellcount i = 0; i < bigPrime; i++) {
 …
   // create the list of phrases
+  cout << "Initialising list of hashtable entries: " << listOfEntriesName << endl;
+  if (verbosity > 1) {
+    cout << "Initialising list of hashtable entries: " << listOfEntriesName << endl;
+  }
   listOfEntries.open(listOfEntriesName, ios::in | ios::out);
   listOfEntries.write((char *) &example, sizeof(example));

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 1618 for trunk/gsdl

Legend:

trunk/gsdl/src/phind/generate/suffix.cpp

Download in other formats: