source: trunk/gsdl/src/phind/generate/check.h@ 2867

Last change on this file since 2867 was 2867, checked in by paynter, 22 years ago

Moved all the sufficCheck functionality into the check.h header and
inlined it.

  • Property svn:keywords set to Author Date Id Revision
File size: 4.4 KB
Line 
1// check.h -- manipulate the suffixCheck array
2//
3// The suffixCheck array is used to prevent a phrase being added to
4// the results of getExpansions. It works by declaring an array that
5// has one bit for each entry in the suffix array, which is cleared at
6// the start of getExpansions. As getExpansions adds each phrase to
7// the result set, we set the bits in the suffixCheck array that
8// correspond to the positions of of the phrase in the suffixArray.
9
10// (The point is that getExpansions can later check whether a phrase
11// or its prefix has been seen before by looking up the bit
12// corresponding to its first occurence in the suffixCheck array: if
13// the bit is set, a subphrase of this phrase is already in the
14// results already, and the current phrase can be ignored.)
15
16#include "suffix.h"
17
18// The suffixCheck array
19typedef unsigned char check;
20check *suffixCheck;
21
22// The length of the suffixCheck array
23cellcount checkLength;
24
25// Some useful bitmask constants
26const check ALLBITS = 255;
27const check LEFTMOSTBIT = (ALLBITS << 7);
28const check LEFTMOSTBITS[] = {0,
29 (ALLBITS << 7), (ALLBITS << 6), (ALLBITS << 5), (ALLBITS << 4),
30 (ALLBITS << 3), (ALLBITS << 2), (ALLBITS << 1), ALLBITS};
31//const check LEFTMOSTBIT = 128;
32//const check LEFTMOSTBITS[] = {0, 128, 192, 224, 240, 248, 252, 254, 255};
33
34
35// Allocate the initial memory for suffixCheck
36inline void allocateSuffixCheck(cellcount number_of_symbols)
37{
38 checkLength = number_of_symbols / (sizeof(check) * 8) + 1;
39 suffixCheck = new check[checkLength];
40 if (suffixCheck == NULL) {
41 cerr << "Suffix error: out of memory allocating suffixCheck array" << endl;
42 exit(2);
43 }
44 memset(suffixCheck, 0, sizeof(check) * checkLength);
45}
46
47
48// Set all bits in suffixCheck to 0
49inline void clearSuffixCheck()
50{
51 memset(suffixCheck, 0, sizeof(check) * checkLength);
52}
53
54
55// Get the value of a particular bit in suffixCheck
56inline int getSuffixCheck(cellindex suff)
57{
58 cellindex cell = suff >> 3;
59 check remainder = suff & 0x07; // the last 3 bits
60 if (suffixCheck[cell] & (LEFTMOSTBIT >> remainder)) {
61 return 1;
62 }
63 return 0;
64}
65
66
67// Set the value of a bit in suffixCheck to 1
68inline void setSuffixCheck(cellindex suff)
69{
70 cellindex cell = suff >> 3;
71 check remainder = suff & 0x07; // the last 3 bits
72 suffixCheck[cell] |= (LEFTMOSTBIT >> remainder);
73}
74
75
76// Set the value of a range of bits in suffixCheck to 1
77void setSuffixCheck(cellindex first, cellindex last)
78{
79 for (cellcount i = first; i <= last; ++i)
80 setSuffixCheck(i);
81}
82
83
84// Set the bits in a cell in the suffixArray.
85// Bits are indexed 0-7.
86inline void setCellBits(cellindex cell_offset, unsigned first_bit, unsigned last_bit)
87{
88 unsigned number_of_bits = last_bit - first_bit + 1;
89 check mask = (LEFTMOSTBITS[number_of_bits] >> first_bit);
90 suffixCheck[cell_offset] |= mask;
91}
92
93
94inline void setSuffixCheck_new(cellindex first, cellindex last)
95{
96 // If only one bit is set, use simpler function
97 // We should be able to remove this case.
98 if (first == last) {
99 setSuffixCheck(first);
100 return;
101 }
102
103 // Find the first and last cells in which bits are set
104 cellindex first_cell = first >> 3;
105 cellindex last_cell = last >> 3;
106
107 // If all the values are in the same cell, set them
108 if (first_cell == last_cell) {
109 setCellBits(first_cell, (first & 0x07), (last & 0x07));
110 return;
111 }
112
113 // Set the bits in the first and last cells
114 setCellBits(first_cell, (first & 0x07), 7);
115 setCellBits(last_cell, 0, (last & 0x07));
116
117 // Set the bits in the intermediate cells
118 ++first_cell;
119 if(last_cell > first_cell)
120 memset(suffixCheck + first_cell, ALLBITS,
121 (last_cell - first_cell) * sizeof(check));
122}
123
124
125// Print the suffixCheck array (for debugging)
126void printSuffixCheck()
127{
128 for (cellcount i = 0; i < checkLength; ++i) {
129 cout << "cell " << i << " \t";
130 cout << "(" << (i * 8) << "-" << (i * 8 + 7) << ") \t";
131 for (cellindex j = 0; j < 8; ++j) {
132 cout << getSuffixCheck(i * 8 + j);
133 // cout << " (" << (i * 8 + j) << ") ";
134 }
135 cout << " (" << (unsigned int) suffixCheck[i] << ")\n";
136 }
137}
138
139
140// To test, put this at the start of main
141
142// allocateSuffixCheck(300);
143// clearSuffixCheck();
144// setSuffixCheck(20);
145// setSuffixCheck(35,50);
146// setSuffixCheck(68,150);
147// setSuffixCheck(170,174);
148// setSuffixCheck(220,230);
149// setSuffixCheck(247);
150// setSuffixCheck(256);
151// printSuffixCheck();
152// exit(0);
153
Note: See TracBrowser for help on using the repository browser.