Changeset 2807


Ignore:
Timestamp:
2001-10-30T15:49:34+13:00 (20 years ago)
Author:
kjm18
Message:

changed suffixCheck to bit array.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/phind/generate/suffix.cpp

    r2806 r2807  
    6565check    *suffixCheck;
    6666
     67// the length of the check array
     68cellcount checkLength;
    6769
    6870// How many documents are in this collection?
     
    9193int pointerCompare(const void *, const void *);
    9294
    93 
     95// some bit manipulation functions for the check arrays, defined below
     96int getSuffixCheck(cellindex suff);
     97void setSuffixCheck(cellindex suff);
     98                                           
    9499// Functions for implementing "phrase memory".  These let us "remember"
    95100// each phrase that we've expanded without using too much memory.
     
    137142  // Initialise the candidates, check array, and various variables.
    138143  sort(candidates.begin(), candidates.end(), isShorter);
    139   memset(suffixCheck, 0, sizeof(check)*inputLength);
    140   //for (cellcount j = 0; j < inputLength; j++)
    141   //  suffixCheck[j] = 0;
     144  memset(suffixCheck, 0, sizeof(check)*checkLength);
    142145  unsigned minimum_length = candidates.begin()->length;
    143146 
     
    153156    while (temp_phrase.length >= minimum_length && !shorter_found) {
    154157      temp_phrase.ensureSuffixFound();
    155       if (suffixCheck[temp_phrase.firstSuffixIndex] == 0)
     158      //if (suffixCheck[temp_phrase.firstSuffixIndex] == 0)
     159      if (getSuffixCheck(temp_phrase.firstSuffixIndex)==0)
    156160    temp_phrase.shortenByOneAtPrefix();
    157161      else
     
    166170      candidate->ensureSuffixFound();
    167171      for (cellcount k = candidate->firstSuffixIndex; k <= candidate->lastSuffixIndex; ++k)
    168     suffixCheck[k] = candidate->length;
     172    //suffixCheck[k] = candidate->length;
     173    setSuffixCheck(k);
    169174    }
    170175  }
     
    249254}
    250255
     256int getSuffixCheck(cellindex suff) {
     257  cellindex cell = suff >> 3;
     258  check remainder = suff & 0x07; // the last 3 bits
     259  if (suffixCheck[cell]& (1 << remainder)) {
     260    return 1;
     261  }
     262  return 0;
     263}                                                                               
     264void setSuffixCheck(cellindex suff) {
     265  cellindex cell = suff >> 3;
     266  check remainder = suff & 0x07; // the last 3 bits
     267  suffixCheck[cell] |= (1 << remainder);
     268
     269
    251270
    252271// Read the clauses.numbers file into the "symbols" array.
     
    786805  qsort(prefixArray, contentLength, sizeof(symbol *), prefixCompare);
    787806
    788   suffixCheck = new check[contentLength];
     807  checkLength = contentLength/8 + 1;
     808  suffixCheck = new check[checkLength];
    789809  if (suffixCheck == NULL) {
    790810    cerr << "Suffix error: not enough memory to hold " << inputLength << " symbols." << endl;
    791811    exit(2);
    792812  } 
    793   //for (cellcount j = 0; j < contentLength; j++)
    794   //  suffixCheck[j] = 0;
    795   memset(suffixCheck, 0, sizeof(check)*contentLength);
     813  memset(suffixCheck, 0, sizeof(check)*checkLength);
    796814
    797815  cout <<"\ngenerating the phrase hierarchy\n\n";
Note: See TracChangeset for help on using the changeset viewer.