source: main/trunk/greenstone2/build-src/src/phind/generate/check.h@ 21356

Last change on this file since 21356 was 2958, checked in by sjboddie, 22 years ago

Minor change to get phind to compile under VC++ again.

  • Property svn:keywords set to Author Date Id Revision
File size: 4.0 KB
Line 
1// check.h -- manipulate the suffixCheck array
2//
3// The suffixCheck array is used to prevent a phrase being added to
4// the results of getExpansions. It works by declaring an array that
5// has one bit for each entry in the suffix array, which is cleared at
6// the start of getExpansions. As getExpansions adds each phrase to
7// the result set, we set the bits in the suffixCheck array that
8// correspond to the positions of of the phrase in the suffixArray.
9
10// (The point is that getExpansions can later check whether a phrase
11// or its prefix has been seen before by looking up the bit
12// corresponding to its first occurence in the suffixCheck array: if
13// the bit is set, a subphrase of this phrase is already in the
14// results already, and the current phrase can be ignored.)
15
16#include "suffix.h"
17
18// The suffixCheck array
19#if defined (__WIN32__)
20typedef unsigned char check;
21#else
22#include <inttypes.h>
23typedef uint8_t check;
24#endif
25
26check *suffixCheck;
27
28// The length of the suffixCheck array
29cellcount checkLength;
30
31// Some useful bitmask constants
32const check ALLBITS = 255;
33const check LEFTMOSTBIT = static_cast<check>(ALLBITS << 7);
34const check LEFTMOSTBITS[] = {static_cast<check>(ALLBITS << 8),
35 static_cast<check>(ALLBITS << 7),
36 static_cast<check>(ALLBITS << 6),
37 static_cast<check>(ALLBITS << 5),
38 static_cast<check>(ALLBITS << 4),
39 static_cast<check>(ALLBITS << 3),
40 static_cast<check>(ALLBITS << 2),
41 static_cast<check>(ALLBITS << 1),
42 static_cast<check>(ALLBITS << 0)};
43
44
45// Allocate the initial memory for suffixCheck
46inline void allocateSuffixCheck(cellcount number_of_symbols)
47{
48 checkLength = number_of_symbols / (sizeof(check) * 8) + 1;
49 suffixCheck = new check[checkLength];
50 if (suffixCheck == NULL) {
51 cerr << "Suffix error: out of memory allocating suffixCheck array" << endl;
52 exit(2);
53 }
54 memset(suffixCheck, 0, sizeof(check) * checkLength);
55}
56
57
58// Set all bits in suffixCheck to 0
59inline void clearSuffixCheck()
60{
61 memset(suffixCheck, 0, sizeof(check) * checkLength);
62}
63
64
65// Get the value of a particular bit in suffixCheck
66inline int getSuffixCheck(cellindex position)
67{
68 cellindex cell = position >> 3;
69 check remainder = position & 0x07; // the last 3 bits
70 if (suffixCheck[cell] & (LEFTMOSTBIT >> remainder)) {
71 return 1;
72 }
73 return 0;
74}
75
76// Set the value of a bit in suffixCheck to 1
77inline void setSuffixCheck(cellindex suff)
78{
79 cellindex cell = suff >> 3;
80 check remainder = suff & 0x07u; // the last 3 bits
81 suffixCheck[cell] |= (LEFTMOSTBIT >> remainder);
82}
83
84
85// Set the bits in a cell in the suffixArray.
86// Bits are indexed 0-7.
87inline void setCellBits(cellindex cell_offset, unsigned first_bit, unsigned last_bit)
88{
89 unsigned number_of_bits = last_bit - first_bit + 1;
90 check mask = (LEFTMOSTBITS[number_of_bits] >> first_bit);
91 suffixCheck[cell_offset] |= mask;
92}
93
94
95inline void setSuffixCheck(cellindex first, cellindex last)
96{
97 // Find the first and last cells in which bits are set
98 cellindex first_cell = first >> 3;
99 cellindex last_cell = last >> 3;
100
101 // If all the values are in the same cell, set them
102 if (first_cell == last_cell) {
103 setCellBits(first_cell, (first & 0x07u), (last & 0x07u));
104 return;
105 }
106
107 // Set the bits in the first and last cells
108 setCellBits(first_cell, (first & 0x07u), 7);
109 setCellBits(last_cell, 0, (last & 0x07u));
110
111 // Set the bits in the intermediate cells
112 ++first_cell;
113 if(last_cell > first_cell)
114 memset(suffixCheck + first_cell, ALLBITS,
115 (last_cell - first_cell) * sizeof(check));
116}
117
118
119// Print the suffixCheck array (for debugging)
120void printSuffixCheck()
121{
122 for (cellcount i = 0; i < checkLength; ++i) {
123 cout << "cell " << i << " \t";
124 cout << "(" << (i * 8) << "-" << (i * 8 + 7) << ") \t";
125 for (cellindex j = 0; j < 8; ++j)
126 cout << getSuffixCheck(i * 8 + j);
127 cout << " (" << (unsigned int) suffixCheck[i] << ")\n";
128 }
129}
130
Note: See TracBrowser for help on using the repository browser.