Context Navigation

← Previous Change
Next Change →

suffix.cpp

Timestamp:

2001-06-01T14:51:29+12:00 (23 years ago)

Author:

sjboddie

Message:

Changes to get phind working under windows

File:

: 1 edited

trunk/gsdl/src/phind/generate/suffix.cpp (modified) (16 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/gsdl/src/phind/generate/suffix.cpp

-              r1882
+              r2487
 #include <assert.h>
-#include <fstream.h>
-#include <iostream.h>
 #include <math.h>
 #include <stdio.h>
 …
 #include <string.h>
+#include <algo.h>
+#include <heap.h>
+#include <vector.h>
+#if defined(GSDL_USE_IOS_H)
+#  include <fstream.h>
+#  include <iostream.h>
+#else
+#  include <fstream>
+#  include <iostream>
+#endif
+#if defined(GSDL_USE_STL_H)
+#  if defined(GSDL_USE_ALGO_H)
+#    include <algo.h>
+#  else
+#    include <algorithm.h>
+#  endif
+#  include <vector.h>
+#else
+#  include <algorithm>
+#  include <vector>
+#endif
+#include <stl_heap.h>
 #include "suffix.h"
 #include "phrase.h"
 // Global variables declared in suffix.h
 …
   // Create the suffix & prefix arrays
   suffixArray = new (symbol *)[inputLength];
   prefixArray = new (symbol *)[inputLength];
   suffixCheck = new (check)[inputLength];
   prefixCheck = new (check)[inputLength];
+  suffixArray = new symbol *[inputLength];
+  prefixArray = new symbol *[inputLength];
+  suffixCheck = new check[inputLength];
+  prefixCheck = new check[inputLength];
   if (prefixCheck == NULL) {
     cerr << "Suffix error: not enough memory to hold " << inputLength
 …
   // each phrase occurs in each document.  The number of documents in
   // which a phrase occurs is stored in df.
   frequency documentFrequency[numberOfDocuments];
+  frequency *documentFrequency = new frequency[numberOfDocuments];
   frequency df;
   // documentArray will be searched in order to discover which document
   // each phrase occurs in.
   documentArray = new (symbol *)[numberOfDocuments];
+  documentArray = new symbol *[numberOfDocuments];
   // Discover all the DOCUMENTSTART symbols and store as a phrase
 …
   // Iterate over the different symbols by working through the suffix array
   vector<Phrase> result;
   cellindex i = 0;
+  cellindex ij = 0;
   char *tmpString;
   while (i < inputLength) {
+  while (ij < inputLength) {
     // make a new phrase of length 1
     p = Phrase(suffixArray[i], 1, SUFFIX);
     p.findFirstAndLastSuffix(i, inputLength-1);
     // cout << "cell " << i << " - " << p.toString() << endl;
+    p = Phrase(suffixArray[ij], 1, SUFFIX);
+    p.findFirstAndLastSuffix(ij, inputLength-1);
+    // cout << "cell " << ij << " - " << p.toString() << endl;
     // We ignore this symbol if it occurs only once, if it is a delimiter,
 …
     // it explodes the size of the indexes.  So: would it be useful?
     if (!((p.suffixFrequency <= 1) ||
       // (*suffixArray[i] != 23054) ||
       (*suffixArray[i] <= LASTDELIMITER) ||
       ((phraseMode == STOPWORDS) && (*suffixArray[i] <= lastStopSymbol)))) {
+      // (*suffixArray[ij] != 23054) ||
+      (*suffixArray[ij] <= LASTDELIMITER) ||
+      ((phraseMode == STOPWORDS) && (*suffixArray[ij] <= lastStopSymbol)))) {
       // Get minimal expansions of the phrase
 …
     // Remember that we have expanded this phrase
     rememberThisPhrase(i, 1);
+    rememberThisPhrase(ij, 1);
     // write the phrase text
     tmpString = p.toString();
     phraseData << i << "-1:" << tmpString << ":" << p.suffixFrequency << ":"
+    phraseData << ij << "-1:" << tmpString << ":" << p.suffixFrequency << ":"
            << result.size() << ":";
     delete [] tmpString;
     // write the results
     for (cellcount i = 0; i < result.size(); i++) {
       if (i) {
+    for (cellcount k = 0; k < result.size(); k++) {
+      if (k) {
         phraseData << ",";
+      }
       phraseData << result[i].firstSuffixIndex << "-" << result[i].length;
       outPhrase << result[i].firstSuffixIndex << " " << result[i].length << endl;
+      phraseData << result[k].firstSuffixIndex << "-" << result[k].length;
+      outPhrase << result[k].firstSuffixIndex << " " << result[k].length << endl;
       outPhraseCounter++;
+    }
 …
     // write the documents
     for (cellcount i = 0, first = 1; i < numberOfDocuments; i++) {
       if (documentFrequency[i]) {
+    for (cellcount m = 0, first = 1; m < numberOfDocuments; m++) {
+      if (documentFrequency[m]) {
         if (first) {
           first = 0;
 …
         // N documents from 0 to N-1, but later they'll be 1-N.  Thus we
         // add 1 to the document id when we output it.
         phraseData << "d" << (i+1);
+        phraseData << "d" << (m+1);
         // Next, output the frequency with which the document occurs, but
         // only if it is > 1.
         if (documentFrequency[i] > 1) {
           phraseData << "," << documentFrequency[i];
+        if (documentFrequency[m] > 1) {
+          phraseData << "," << documentFrequency[m];
+        }
+      }
 …
+      }
+    }
    i = p.lastSuffixIndex + 1;
+   ij = p.lastSuffixIndex + 1;
+  }
   outPhrase.close();
 …
   deletePhraseMemory();
+  delete [] documentFrequency;
   delete [] symbols;
   delete [] suffixArray;
 …
       suffixCheck[i] = c.length;
+    }
     for (cellcount i = c.firstPrefixIndex; i <= c.lastPrefixIndex; i++) {
       prefixCheck[i] = c.length;
+    for (cellcount ik = c.firstPrefixIndex; ik <= c.lastPrefixIndex; ik++) {
+      prefixCheck[ik] = c.length;
+    }
+      }
 …
       suffixCheck[i] = c.length;
+    }
     for (cellcount i = c.firstPrefixIndex; i <= c.lastPrefixIndex; i++) {
       prefixCheck[i] = c.length;
+    for (cellcount ijk = c.firstPrefixIndex; ijk <= c.lastPrefixIndex; ijk++) {
+      prefixCheck[ijk] = c.length;
+    }
 …
     cout << "Allocating symbol arrays for " << inputLength << " symbols" << endl;
+  }
   symbols = new (symbol)[inputLength];
+  symbols = new symbol[inputLength];
   if (symbols == NULL) {
     cerr << "Suffix error: not enough memory to hold " << inputLength
 …
   // search for the document in which each occurence of the phrase is found
   for (cellcount i = p.firstSuffixIndex; i <= p.lastSuffixIndex; i++) {
+  for (cellcount j = p.firstSuffixIndex; j <= p.lastSuffixIndex; j++) {
     // cout << "looking for phrase at suffixArray[" << i << "]\n";
+    // cout << "looking for phrase at suffixArray[" << j << "]\n";
     target = suffixArray[i];
+    target = suffixArray[j];
     begin = 0;
     end = numberOfDocuments - 1;
 …
 void initialisePhraseMemory() {
   phraseMemory = new (unsigned char)[inputLength];
+  phraseMemory = new unsigned char[inputLength];
   // to begin with, everything is empty

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 2487 for trunk/gsdl/src/phind/generate/suffix.cpp

Legend:

trunk/gsdl/src/phind/generate/suffix.cpp

Download in other formats: