source: other-projects/rsyntax-textarea/devel-packages/jflex-1.4.3/src/JFlex/CharClasses.java@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 11.5 KB
Line 
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * JFlex 1.4.3 *
3 * Copyright (C) 1998-2009 Gerwin Klein <[email protected]> *
4 * All rights reserved. *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License. See the file *
8 * COPYRIGHT for more information. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License along *
16 * with this program; if not, write to the Free Software Foundation, Inc., *
17 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
18 * *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21package JFlex;
22
23import java.util.*;
24
25
26/**
27 *
28 * @author Gerwin Klein
29 * @version JFlex 1.4.3, $Revision: 433 $, $Date: 2009-01-31 19:52:34 +1100 (Sat, 31 Jan 2009) $
30 */
31public class CharClasses {
32
33 /** debug flag (for char classes only) */
34 private static final boolean DEBUG = false;
35
36 /** the largest character that can be used in char classes */
37 public static final char maxChar = '\uFFFF';
38
39 /** the char classes */
40 private Vector /* of IntCharSet */ classes;
41
42 /** the largest character actually used in a specification */
43 private char maxCharUsed;
44
45 /**
46 * Constructs a new CharClass object that provides space for
47 * classes of characters from 0 to maxCharCode.
48 *
49 * Initially all characters are in class 0.
50 *
51 * @param maxCharCode the last character code to be
52 * considered. (127 for 7bit Lexers,
53 * 255 for 8bit Lexers and 0xFFFF
54 * for Unicode Lexers).
55 */
56 public CharClasses(int maxCharCode) {
57 if (maxCharCode < 0 || maxCharCode > 0xFFFF)
58 throw new IllegalArgumentException();
59
60 maxCharUsed = (char) maxCharCode;
61
62 classes = new Vector();
63 classes.addElement(new IntCharSet(new Interval((char) 0, maxChar)));
64 }
65
66
67 /**
68 * Returns the greatest Unicode value of the current input character set.
69 */
70 public char getMaxCharCode() {
71 return maxCharUsed;
72 }
73
74
75 /**
76 * Sets the largest Unicode value of the current input character set.
77 *
78 * @param charCode the largest character code, used for the scanner
79 * (i.e. %7bit, %8bit, %16bit etc.)
80 */
81 public void setMaxCharCode(int charCode) {
82 if (charCode < 0 || charCode > 0xFFFF)
83 throw new IllegalArgumentException();
84
85 maxCharUsed = (char) charCode;
86 }
87
88
89 /**
90 * Returns the current number of character classes.
91 */
92 public int getNumClasses() {
93 return classes.size();
94 }
95
96
97
98 /**
99 * Updates the current partition, so that the specified set of characters
100 * gets a new character class.
101 *
102 * Characters that are elements of <code>set</code> are not in the same
103 * equivalence class with characters that are not elements of <code>set</code>.
104 *
105 * @param set the set of characters to distinguish from the rest
106 * @param caseless if true upper/lower/title case are considered equivalent
107 */
108 public void makeClass(IntCharSet set, boolean caseless) {
109 if (caseless) set = set.getCaseless();
110
111 if ( DEBUG ) {
112 Out.dump("makeClass("+set+")");
113 dump();
114 }
115
116 int oldSize = classes.size();
117 for (int i = 0; i < oldSize; i++) {
118 IntCharSet x = (IntCharSet) classes.elementAt(i);
119
120 if (x.equals(set)) return;
121
122 IntCharSet and = x.and(set);
123
124 if ( and.containsElements() ) {
125 if ( x.equals(and) ) {
126 set.sub(and);
127 continue;
128 }
129 else if ( set.equals(and) ) {
130 x.sub(and);
131 classes.addElement(and);
132 if (DEBUG) {
133 Out.dump("makeClass(..) finished");
134 dump();
135 }
136 return;
137 }
138
139 set.sub(and);
140 x.sub(and);
141 classes.addElement(and);
142 }
143 }
144
145 if (DEBUG) {
146 Out.dump("makeClass(..) finished");
147 dump();
148 }
149 }
150
151
152 /**
153 * Returns the code of the character class the specified character belongs to.
154 */
155 public int getClassCode(char letter) {
156 int i = -1;
157 while (true) {
158 IntCharSet x = (IntCharSet) classes.elementAt(++i);
159 if ( x.contains(letter) ) return i;
160 }
161 }
162
163 /**
164 * Dump charclasses to the dump output stream
165 */
166 public void dump() {
167 Out.dump(toString());
168 }
169
170
171 /**
172 * Return a string representation of one char class
173 *
174 * @param theClass the index of the class to
175 */
176 public String toString(int theClass) {
177 return classes.elementAt(theClass).toString();
178 }
179
180
181 /**
182 * Return a string representation of the char classes
183 * stored in this class.
184 *
185 * Enumerates the classes by index.
186 */
187 public String toString() {
188 StringBuffer result = new StringBuffer("CharClasses:");
189
190 result.append(Out.NL);
191
192 for (int i = 0; i < classes.size(); i++)
193 result.append("class "+i+":"+Out.NL+classes.elementAt(i)+Out.NL);
194
195 return result.toString();
196 }
197
198
199 /**
200 * Creates a new character class for the single character <code>singleChar</code>.
201 *
202 * @param caseless if true upper/lower/title case are considered equivalent
203 */
204 public void makeClass(char singleChar, boolean caseless) {
205 makeClass(new IntCharSet(singleChar), caseless);
206 }
207
208
209 /**
210 * Creates a new character class for each character of the specified String.
211 *
212 * @param caseless if true upper/lower/title case are considered equivalent
213 */
214 public void makeClass(String str, boolean caseless) {
215 for (int i = 0; i < str.length(); i++) makeClass(str.charAt(i), caseless);
216 }
217
218
219 /**
220 * Updates the current partition, so that the specified set of characters
221 * gets a new character class.
222 *
223 * Characters that are elements of the set <code>v</code> are not in the same
224 * equivalence class with characters that are not elements of the set <code>v</code>.
225 *
226 * @param v a Vector of Interval objects.
227 * This Vector represents a set of characters. The set of characters is
228 * the union of all intervals in the Vector.
229 *
230 * @param caseless if true upper/lower/title case are considered equivalent
231 */
232 public void makeClass(Vector /* Interval */ v, boolean caseless) {
233 makeClass(new IntCharSet(v), caseless);
234 }
235
236
237 /**
238 * Updates the current partition, so that the set of all characters not contained in the specified
239 * set of characters gets a new character class.
240 *
241 * Characters that are elements of the set <code>v</code> are not in the same
242 * equivalence class with characters that are not elements of the set <code>v</code>.
243 *
244 * This method is equivalent to <code>makeClass(v)</code>
245 *
246 * @param v a Vector of Interval objects.
247 * This Vector represents a set of characters. The set of characters is
248 * the union of all intervals in the Vector.
249 *
250 * @param caseless if true upper/lower/title case are considered equivalent
251 */
252 public void makeClassNot(Vector v, boolean caseless) {
253 makeClass(new IntCharSet(v), caseless);
254 }
255
256
257 /**
258 * Returns an array that contains the character class codes of all characters
259 * in the specified set of input characters.
260 */
261 private int [] getClassCodes(IntCharSet set, boolean negate) {
262
263 if (DEBUG) {
264 Out.dump("getting class codes for "+set);
265 if (negate)
266 Out.dump("[negated]");
267 }
268
269 int size = classes.size();
270
271 // [fixme: optimize]
272 int temp [] = new int [size];
273 int length = 0;
274
275 for (int i = 0; i < size; i++) {
276 IntCharSet x = (IntCharSet) classes.elementAt(i);
277 if ( negate ) {
278 if ( !set.and(x).containsElements() ) {
279 temp[length++] = i;
280 if (DEBUG) Out.dump("code "+i);
281 }
282 }
283 else {
284 if ( set.and(x).containsElements() ) {
285 temp[length++] = i;
286 if (DEBUG) Out.dump("code "+i);
287 }
288 }
289 }
290
291 int result [] = new int [length];
292 System.arraycopy(temp, 0, result, 0, length);
293
294 return result;
295 }
296
297
298 /**
299 * Returns an array that contains the character class codes of all characters
300 * in the specified set of input characters.
301 *
302 * @param intervallVec a Vector of Intervals, the set of characters to get
303 * the class codes for
304 *
305 * @return an array with the class codes for intervallVec
306 */
307 public int [] getClassCodes(Vector /* Interval */ intervallVec) {
308 return getClassCodes(new IntCharSet(intervallVec), false);
309 }
310
311
312 /**
313 * Returns an array that contains the character class codes of all characters
314 * that are <strong>not</strong> in the specified set of input characters.
315 *
316 * @param intervallVec a Vector of Intervals, the complement of the
317 * set of characters to get the class codes for
318 *
319 * @return an array with the class codes for the complement of intervallVec
320 */
321 public int [] getNotClassCodes(Vector /* Interval */ intervallVec) {
322 return getClassCodes(new IntCharSet(intervallVec), true);
323 }
324
325
326 /**
327 * Check consistency of the stored classes [debug].
328 *
329 * all classes must be disjoint, checks if all characters
330 * have a class assigned.
331 */
332 public void check() {
333 for (int i = 0; i < classes.size(); i++)
334 for (int j = i+1; j < classes.size(); j++) {
335 IntCharSet x = (IntCharSet) classes.elementAt(i);
336 IntCharSet y = (IntCharSet) classes.elementAt(j);
337 if ( x.and(y).containsElements() ) {
338 System.out.println("Error: non disjoint char classes "+i+" and "+j);
339 System.out.println("class "+i+": "+x);
340 System.out.println("class "+j+": "+y);
341 }
342 }
343
344 // check if each character has a classcode
345 // (= if getClassCode terminates)
346 for (char c = 0; c < maxChar; c++) {
347 getClassCode(c);
348 if (c % 100 == 0) System.out.print(".");
349 }
350
351 getClassCode(maxChar);
352 }
353
354
355 /**
356 * Returns an array of all CharClassIntervalls in this
357 * char class collection.
358 *
359 * The array is ordered by char code, i.e.
360 * <code>result[i+1].start = result[i].end+1</code>
361 *
362 * Each CharClassInterval contains the number of the
363 * char class it belongs to.
364 */
365 public CharClassInterval [] getIntervals() {
366 int i, c;
367 int size = classes.size();
368 int numIntervalls = 0;
369
370 for (i = 0; i < size; i++)
371 numIntervalls+= ((IntCharSet) classes.elementAt(i)).numIntervalls();
372
373 CharClassInterval [] result = new CharClassInterval[numIntervalls];
374
375 i = 0;
376 c = 0;
377 while (i < numIntervalls) {
378 int code = getClassCode((char) c);
379 IntCharSet set = (IntCharSet) classes.elementAt(code);
380 Interval iv = set.getNext();
381
382 result[i++] = new CharClassInterval(iv.start, iv.end, code);
383 c = iv.end+1;
384 }
385
386 return result;
387 }
388}
Note: See TracBrowser for help on using the repository browser.