source: other-projects/rsyntax-textarea/devel-packages/jflex-1.4.3/src/JFlex/RegExp.java@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 9.8 KB
Line 
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * JFlex 1.4.3 *
3 * Copyright (C) 1998-2009 Gerwin Klein <[email protected]> *
4 * All rights reserved. *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License. See the file *
8 * COPYRIGHT for more information. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License along *
16 * with this program; if not, write to the Free Software Foundation, Inc., *
17 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
18 * *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21package JFlex;
22
23import java.util.Vector;
24
25/**
26 * Stores a regular expression of rules section in a JFlex-specification.
27 *
28 * This base class has no content other than its type.
29 *
30 * @author Gerwin Klein
31 * @version JFlex 1.4.3, $Revision: 433 $, $Date: 2009-01-31 19:52:34 +1100 (Sat, 31 Jan 2009) $
32 */
33public class RegExp {
34
35 /**
36 * The type of the regular expression. This field will be
37 * filled with values from class sym.java (generated by cup)
38 */
39 int type;
40
41
42 /**
43 * Create a new regular expression of the specified type.
44 *
45 * @param type a value from the cup generated class sym.
46 *
47 * @see JFlex.sym
48 */
49 public RegExp(int type) {
50 this.type = type;
51 }
52
53
54
55 /**
56 * Returns a String-representation of this regular expression
57 * with the specified indentation.
58 *
59 * @param tab a String that should contain only space characters and
60 * that is inserted in front of standard String-representation
61 * pf this object.
62 */
63 public String print(String tab) {
64 return tab+toString();
65 }
66
67
68 /**
69 * Returns a String-representation of this regular expression
70 */
71 public String toString() {
72 return "type = "+type;
73 }
74
75
76 /**
77 * Find out if this regexp is a char class or equivalent to one.
78 *
79 * @param macros for macro expansion
80 * @return true if the regexp is equivalent to a char class.
81 */
82 public boolean isCharClass(Macros macros) {
83 RegExp1 unary;
84 RegExp2 binary;
85
86 switch (type) {
87 case sym.CHAR:
88 case sym.CHAR_I:
89 case sym.CCLASS:
90 case sym.CCLASSNOT:
91 return true;
92
93 case sym.BAR:
94 binary = (RegExp2) this;
95 return binary.r1.isCharClass(macros) && binary.r2.isCharClass(macros);
96
97 case sym.MACROUSE:
98 unary = (RegExp1) this;
99 return macros.getDefinition((String) unary.content).isCharClass(macros);
100
101 default: return false;
102 }
103 }
104
105 /**
106 * The approximate number of NFA states this expression will need (only
107 * works correctly after macro expansion and without negation)
108 *
109 * @param macros macro table for expansion
110 */
111 public int size(Macros macros) {
112 RegExp1 unary;
113 RegExp2 binary;
114 RegExp content;
115
116 switch ( type ) {
117 case sym.BAR:
118 binary = (RegExp2) this;
119 return binary.r1.size(macros) + binary.r2.size(macros) + 2;
120
121 case sym.CONCAT:
122 binary = (RegExp2) this;
123 return binary.r1.size(macros) + binary.r2.size(macros);
124
125 case sym.STAR:
126 unary = (RegExp1) this;
127 content = (RegExp) unary.content;
128 return content.size(macros) + 2;
129
130 case sym.PLUS:
131 unary = (RegExp1) this;
132 content = (RegExp) unary.content;
133 return content.size(macros) + 2;
134
135 case sym.QUESTION:
136 unary = (RegExp1) this;
137 content = (RegExp) unary.content;
138 return content.size(macros);
139
140 case sym.BANG:
141 unary = (RegExp1) this;
142 content = (RegExp) unary.content;
143 return content.size(macros) * content.size(macros);
144 // this is only a very rough estimate (worst case 2^n)
145 // exact size too complicated (propably requires construction)
146
147 case sym.TILDE:
148 unary = (RegExp1) this;
149 content = (RegExp) unary.content;
150 return content.size(macros) * content.size(macros) * 3;
151 // see sym.BANG
152
153 case sym.STRING:
154 case sym.STRING_I:
155 unary = (RegExp1) this;
156 return ((String) unary.content).length()+1;
157
158 case sym.CHAR:
159 case sym.CHAR_I:
160 return 2;
161
162 case sym.CCLASS:
163 case sym.CCLASSNOT:
164 return 2;
165
166 case sym.MACROUSE:
167 unary = (RegExp1) this;
168 return macros.getDefinition((String) unary.content).size(macros);
169 }
170
171 throw new Error("unknown regexp type "+type);
172 }
173
174 /**
175 * @return the reverse of the specified string.
176 */
177 public final static String revString(String s) {
178 StringBuffer b = new StringBuffer(s.length());
179 for (int i=s.length()-1; i >= 0; i--) {
180 b.append(s.charAt(i));
181 }
182 return b.toString();
183 }
184
185 /**
186 * Recursively convert tilde (upto) expressions into negation and star.
187 *
188 * @param macros the macro table for expansion.
189 * @return new RegExp equivalent to the current one, but without upto expressions.
190 */
191 public final RegExp resolveTilde(Macros macros) {
192 RegExp1 unary;
193 RegExp2 binary;
194 RegExp content;
195
196 switch ( type ) {
197 case sym.BAR:
198 binary = (RegExp2) this;
199 return new RegExp2(sym.BAR, binary.r1.resolveTilde(macros),
200 binary.r2.resolveTilde(macros));
201
202 case sym.CONCAT:
203 binary = (RegExp2) this;
204 return new RegExp2(sym.CONCAT, binary.r1.resolveTilde(macros),
205 binary.r2.resolveTilde(macros));
206
207 case sym.STAR:
208 unary = (RegExp1) this;
209 content = (RegExp) unary.content;
210 return new RegExp1(sym.STAR, content.resolveTilde(macros));
211
212 case sym.PLUS:
213 unary = (RegExp1) this;
214 content = (RegExp) unary.content;
215 return new RegExp1(sym.PLUS, content.resolveTilde(macros));
216
217 case sym.QUESTION:
218 unary = (RegExp1) this;
219 content = (RegExp) unary.content;
220 return new RegExp1(sym.QUESTION, content.resolveTilde(macros));
221
222 case sym.BANG:
223 unary = (RegExp1) this;
224 content = (RegExp) unary.content;
225 return new RegExp1(sym.BANG, content.resolveTilde(macros));
226
227 case sym.TILDE:
228 // ~a = !([^]* a [^]*) a
229 // uses subexpression sharing
230 unary = (RegExp1) this;
231 content = ((RegExp) unary.content).resolveTilde(macros);
232
233 RegExp any_star = new RegExp1(sym.STAR, anyChar());
234 RegExp neg = new RegExp1(sym.BANG,
235 new RegExp2(sym.CONCAT, any_star,
236 new RegExp2(sym.CONCAT, content, any_star)));
237
238 return new RegExp2(sym.CONCAT, neg, content);
239
240 case sym.STRING:
241 case sym.STRING_I:
242 case sym.CHAR:
243 case sym.CHAR_I:
244 case sym.CCLASS:
245 case sym.CCLASSNOT:
246 unary = (RegExp1) this;
247 return new RegExp1(unary.type, unary.content);
248
249 case sym.MACROUSE:
250 unary = (RegExp1) this;
251 return macros.getDefinition((String) unary.content).resolveTilde(macros);
252 }
253
254 throw new Error("unknown regexp type "+type);
255 }
256
257
258 /**
259 * Returns a regexp that matches any character: <code>[^]</code>
260 * @return the regexp for <code>[^]</code>
261 */
262 public RegExp anyChar() {
263 // FIXME: there is some code duplication here with the parser
264 Vector list = new Vector();
265 list.addElement(new Interval((char)0,CharClasses.maxChar));
266 return new RegExp1(sym.CCLASS,list);
267 }
268
269
270 /**
271 * Create a new regexp that matches the reverse text of this one.
272 *
273 * @return the reverse regexp
274 */
275 public final RegExp rev(Macros macros) {
276 RegExp1 unary;
277 RegExp2 binary;
278 RegExp content;
279
280 switch ( type ) {
281 case sym.BAR:
282 binary = (RegExp2) this;
283 return new RegExp2(sym.BAR, binary.r1.rev(macros), binary.r2.rev(macros));
284
285 case sym.CONCAT:
286 binary = (RegExp2) this;
287 return new RegExp2(sym.CONCAT, binary.r2.rev(macros), binary.r1.rev(macros));
288
289 case sym.STAR:
290 unary = (RegExp1) this;
291 content = (RegExp) unary.content;
292 return new RegExp1(sym.STAR, content.rev(macros));
293
294 case sym.PLUS:
295 unary = (RegExp1) this;
296 content = (RegExp) unary.content;
297 return new RegExp1(sym.PLUS, content.rev(macros));
298
299 case sym.QUESTION:
300 unary = (RegExp1) this;
301 content = (RegExp) unary.content;
302 return new RegExp1(sym.QUESTION, content.rev(macros));
303
304 case sym.BANG:
305 unary = (RegExp1) this;
306 content = (RegExp) unary.content;
307 return new RegExp1(sym.BANG, content.rev(macros));
308
309 case sym.TILDE:
310 content = resolveTilde(macros);
311 return content.rev(macros);
312
313 case sym.STRING:
314 case sym.STRING_I:
315 unary = (RegExp1) this;
316 return new RegExp1(unary.type, revString((String) unary.content));
317
318 case sym.CHAR:
319 case sym.CHAR_I:
320 case sym.CCLASS:
321 case sym.CCLASSNOT:
322 unary = (RegExp1) this;
323 return new RegExp1(unary.type, unary.content);
324
325 case sym.MACROUSE:
326 unary = (RegExp1) this;
327 return macros.getDefinition((String) unary.content).rev(macros);
328 }
329
330 throw new Error("unknown regexp type "+type);
331 }
332}
Note: See TracBrowser for help on using the repository browser.