1 | /*
|
---|
2 | * 08/26/2004
|
---|
3 | *
|
---|
4 | * TokenMap.java - Similar to a Map in Java, only designed specifically for
|
---|
5 | * org.fife.ui.rsyntaxtextarea.Tokens.
|
---|
6 | *
|
---|
7 | * This library is distributed under a modified BSD license. See the included
|
---|
8 | * RSyntaxTextArea.License.txt file for details.
|
---|
9 | */
|
---|
10 | package org.fife.ui.rsyntaxtextarea;
|
---|
11 |
|
---|
12 | import javax.swing.text.Segment;
|
---|
13 |
|
---|
14 |
|
---|
15 | /**
|
---|
16 | * A hash table for reserved words, etc. defined by a {@link TokenMaker}.
|
---|
17 | * This class is designed for the quick lookup of tokens, as it can compare
|
---|
18 | * <code>Segment</code>s without the need to allocate a new string.<p>
|
---|
19 | *
|
---|
20 | * The <code>org.fife.ui.rsyntaxtextarea</code> package uses this class to help
|
---|
21 | * identify reserved words in programming languages. An instance of
|
---|
22 | * {@link TokenMaker} will create and initialize an instance of this class
|
---|
23 | * containing all reserved words, data types, and all other words that need to
|
---|
24 | * be syntax-highlighted for that particular language. When the token maker
|
---|
25 | * parses a line and identifies an individual token, it is looked up in the
|
---|
26 | * <code>TokenMap</code> to see if it should be syntax-highlighted.
|
---|
27 | *
|
---|
28 | * @author Robert Futrell
|
---|
29 | * @version 0.6
|
---|
30 | */
|
---|
31 | public class TokenMap {
|
---|
32 |
|
---|
33 | private int size;
|
---|
34 | private TokenMapToken[] tokenMap;
|
---|
35 | private boolean ignoreCase;
|
---|
36 |
|
---|
37 | private static final int DEFAULT_TOKEN_MAP_SIZE = 52;
|
---|
38 |
|
---|
39 |
|
---|
40 | /**
|
---|
41 | * Constructs a new token map that is case-sensitive.
|
---|
42 | */
|
---|
43 | public TokenMap() {
|
---|
44 | this(DEFAULT_TOKEN_MAP_SIZE);
|
---|
45 | }
|
---|
46 |
|
---|
47 |
|
---|
48 | /**
|
---|
49 | * Constructs a new token map that is case-sensitive.
|
---|
50 | *
|
---|
51 | * @param size The size of the token map.
|
---|
52 | */
|
---|
53 | public TokenMap(int size) {
|
---|
54 | this(size, false);
|
---|
55 | }
|
---|
56 |
|
---|
57 |
|
---|
58 | /**
|
---|
59 | * Constructs a new token map.
|
---|
60 | *
|
---|
61 | * @param ignoreCase Whether or not this token map should ignore case
|
---|
62 | * when comparing tokens.
|
---|
63 | */
|
---|
64 | public TokenMap(boolean ignoreCase) {
|
---|
65 | this(DEFAULT_TOKEN_MAP_SIZE, ignoreCase);
|
---|
66 | }
|
---|
67 |
|
---|
68 |
|
---|
69 | /**
|
---|
70 | * Constructs a new token map.
|
---|
71 | *
|
---|
72 | * @param size The size of the token map.
|
---|
73 | * @param ignoreCase Whether or not this token map should ignore case
|
---|
74 | * when comparing tokens.
|
---|
75 | */
|
---|
76 | public TokenMap(int size, boolean ignoreCase) {
|
---|
77 | this.size = size;
|
---|
78 | tokenMap = new TokenMapToken[size];
|
---|
79 | this.ignoreCase = ignoreCase;
|
---|
80 | }
|
---|
81 |
|
---|
82 |
|
---|
83 | /**
|
---|
84 | * Adds a token to a specified bucket in the token map.
|
---|
85 | *
|
---|
86 | * @param bucket The bucket in which to add the token.
|
---|
87 | * @param token The token to add.
|
---|
88 | */
|
---|
89 | private void addTokenToBucket(int bucket, TokenMapToken token) {
|
---|
90 | TokenMapToken old = tokenMap[bucket];
|
---|
91 | token.nextToken = old;
|
---|
92 | tokenMap[bucket] = token;
|
---|
93 | }
|
---|
94 |
|
---|
95 |
|
---|
96 | /**
|
---|
97 | * Returns the token type associated with the given text, if the given
|
---|
98 | * text is in this token map. If it isn't, <code>-1</code> is returned.
|
---|
99 | *
|
---|
100 | * @param text The segment from which to get the text to compare.
|
---|
101 | * @param start The starting index in the segment of the text.
|
---|
102 | * @param end The ending index in the segment of the text.
|
---|
103 | * @return The token type associated with the given text, or
|
---|
104 | * <code>-1</code> if this token was not specified in this map.
|
---|
105 | */
|
---|
106 | public int get(Segment text, int start, int end) {
|
---|
107 | return get(text.array, start, end);
|
---|
108 | }
|
---|
109 |
|
---|
110 |
|
---|
111 | /**
|
---|
112 | * Returns the token type associated with the given text, if the given
|
---|
113 | * text is in this token map. If it isn't, <code>-1</code> is returned.
|
---|
114 | *
|
---|
115 | * @param array1 An array of characters containing the text.
|
---|
116 | * @param start The starting index in the array of the text.
|
---|
117 | * @param end The ending index in the array of the text.
|
---|
118 | * @return The token type associated with the given text, or
|
---|
119 | * <code>-1</code> if this token was not specified in this map.
|
---|
120 | */
|
---|
121 | public int get(char[] array1, int start, int end) {
|
---|
122 |
|
---|
123 | int length1 = end - start + 1;
|
---|
124 |
|
---|
125 | int hash = getHashCode(array1, start, length1);
|
---|
126 | TokenMapToken token = tokenMap[hash];
|
---|
127 |
|
---|
128 | char[] array2;
|
---|
129 | int offset2;
|
---|
130 | int offset1;
|
---|
131 | int length;
|
---|
132 |
|
---|
133 | /* We check whether or not to ignore case before doing any looping to
|
---|
134 | * minimize the number of extraneous comparisons we do. This makes
|
---|
135 | * for slightly redundant code, but it'll be a little more efficient.
|
---|
136 | */
|
---|
137 |
|
---|
138 | // If matches are case-sensitive (C, C++, Java, etc.)...
|
---|
139 | if (ignoreCase==false) {
|
---|
140 |
|
---|
141 | mainLoop:
|
---|
142 | while (token!=null) {
|
---|
143 | if (token.length==length1) {
|
---|
144 | array2 = token.text;
|
---|
145 | offset2 = token.offset;
|
---|
146 | offset1 = start;
|
---|
147 | length = length1;
|
---|
148 | while (length-- > 0) {
|
---|
149 | if (array1[offset1++]!=array2[offset2++]) {
|
---|
150 | token = token.nextToken;
|
---|
151 | continue mainLoop;
|
---|
152 | }
|
---|
153 | }
|
---|
154 | return token.tokenType;
|
---|
155 | }
|
---|
156 | token = token.nextToken;
|
---|
157 | }
|
---|
158 |
|
---|
159 | }
|
---|
160 |
|
---|
161 | // If matches are NOT case-sensitive (HTML)...
|
---|
162 | // Note that all tokens saved in this map were converted to
|
---|
163 | // lower-case already.
|
---|
164 | else {
|
---|
165 |
|
---|
166 | mainLoop2:
|
---|
167 | while (token!=null) {
|
---|
168 | if (token.length==length1) {
|
---|
169 | array2 = token.text;
|
---|
170 | offset2 = token.offset;
|
---|
171 | offset1 = start;
|
---|
172 | length = length1;
|
---|
173 | while (length-- > 0) {
|
---|
174 | if (RSyntaxUtilities.toLowerCase(
|
---|
175 | array1[offset1++]) != array2[offset2++]) {
|
---|
176 | token = token.nextToken;
|
---|
177 | continue mainLoop2;
|
---|
178 | }
|
---|
179 | }
|
---|
180 | return token.tokenType;
|
---|
181 | }
|
---|
182 | token = token.nextToken;
|
---|
183 | }
|
---|
184 |
|
---|
185 | }
|
---|
186 |
|
---|
187 | // Didn't match any of the tokens in the bucket.
|
---|
188 | return -1;
|
---|
189 |
|
---|
190 | }
|
---|
191 |
|
---|
192 |
|
---|
193 | /**
|
---|
194 | * Returns the hash code for a given string.
|
---|
195 | *
|
---|
196 | * @param text The text to hash.
|
---|
197 | * @param offset The offset into the text at which to start hashing.
|
---|
198 | * @param length The last character in the text to hash.
|
---|
199 | * @return The hash code.
|
---|
200 | */
|
---|
201 | private final int getHashCode(char[] text, int offset, int length) {
|
---|
202 | return (RSyntaxUtilities.toLowerCase(text[offset]) +
|
---|
203 | RSyntaxUtilities.toLowerCase(text[offset+length-1])) % size;
|
---|
204 | }
|
---|
205 |
|
---|
206 |
|
---|
207 | /**
|
---|
208 | * Returns whether this token map ignores case when checking for tokens.
|
---|
209 | * This property is set in the constructor and cannot be changed, as this
|
---|
210 | * is an intrinsic property of a particular programming language.
|
---|
211 | *
|
---|
212 | * @return Whether or not this token maker is ignoring case.
|
---|
213 | */
|
---|
214 | protected boolean isIgnoringCase() {
|
---|
215 | return ignoreCase;
|
---|
216 | }
|
---|
217 |
|
---|
218 |
|
---|
219 | /**
|
---|
220 | * Adds a string to this token map.
|
---|
221 | *
|
---|
222 | * @param string The string to add.
|
---|
223 | * @param tokenType The type of token the string is.
|
---|
224 | */
|
---|
225 | public void put(final String string, final int tokenType) {
|
---|
226 | if (isIgnoringCase())
|
---|
227 | put(string.toLowerCase().toCharArray(), tokenType);
|
---|
228 | else
|
---|
229 | put(string.toCharArray(), tokenType);
|
---|
230 | }
|
---|
231 |
|
---|
232 |
|
---|
233 | /**
|
---|
234 | * Adds a string to this token map. The char array passed-in will be used
|
---|
235 | * as the actual data for the token, so it may well be modified (such as
|
---|
236 | * lower-casing it if <code>ignoreCase</code> is <code>true</code>). This
|
---|
237 | * shouldn't be an issue though as this method is only called from the
|
---|
238 | * public <code>put</code> method, which allocates a new char array.
|
---|
239 | *
|
---|
240 | * @param string The string to add.
|
---|
241 | * @param tokenType The type of token the string is.
|
---|
242 | */
|
---|
243 | private void put(char[] string, int tokenType) {
|
---|
244 | int hashCode = getHashCode(string, 0, string.length);
|
---|
245 | addTokenToBucket(hashCode, new TokenMapToken(string, tokenType));
|
---|
246 | }
|
---|
247 |
|
---|
248 |
|
---|
249 | /**
|
---|
250 | * The "token" used by a token map. Note that this isn't the same thing
|
---|
251 | * as the {@link Token} class, but it's basically a 1-1 correspondence
|
---|
252 | * for reserved words, etc.
|
---|
253 | */
|
---|
254 | private static class TokenMapToken {
|
---|
255 |
|
---|
256 | char[] text;
|
---|
257 | int offset;
|
---|
258 | int length;
|
---|
259 | int tokenType;
|
---|
260 | TokenMapToken nextToken;
|
---|
261 |
|
---|
262 | TokenMapToken(char[] text, int tokenType) {
|
---|
263 | this.text = text;
|
---|
264 | this.offset = 0;
|
---|
265 | this.length = text.length;
|
---|
266 | this.tokenType = tokenType;
|
---|
267 | }
|
---|
268 |
|
---|
269 | public String toString() {
|
---|
270 | return "[TokenMapToken: " + new String(text,offset,length) + "]";
|
---|
271 | }
|
---|
272 |
|
---|
273 | }
|
---|
274 |
|
---|
275 |
|
---|
276 | } |
---|