1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
---|
2 | * JFlex 1.4.3 *
|
---|
3 | * Copyright (C) 1998-2009 Gerwin Klein <[email protected]> *
|
---|
4 | * All rights reserved. *
|
---|
5 | * *
|
---|
6 | * This program is free software; you can redistribute it and/or modify *
|
---|
7 | * it under the terms of the GNU General Public License. See the file *
|
---|
8 | * COPYRIGHT for more information. *
|
---|
9 | * *
|
---|
10 | * This program is distributed in the hope that it will be useful, *
|
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
---|
13 | * GNU General Public License for more details. *
|
---|
14 | * *
|
---|
15 | * You should have received a copy of the GNU General Public License along *
|
---|
16 | * with this program; if not, write to the Free Software Foundation, Inc., *
|
---|
17 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
|
---|
18 | * *
|
---|
19 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
---|
20 |
|
---|
21 | package JFlex;
|
---|
22 |
|
---|
23 | import java.util.Vector;
|
---|
24 |
|
---|
25 | /**
|
---|
26 | * Stores a regular expression of rules section in a JFlex-specification.
|
---|
27 | *
|
---|
28 | * This base class has no content other than its type.
|
---|
29 | *
|
---|
30 | * @author Gerwin Klein
|
---|
31 | * @version JFlex 1.4.3, $Revision: 433 $, $Date: 2009-01-31 19:52:34 +1100 (Sat, 31 Jan 2009) $
|
---|
32 | */
|
---|
33 | public class RegExp {
|
---|
34 |
|
---|
35 | /**
|
---|
36 | * The type of the regular expression. This field will be
|
---|
37 | * filled with values from class sym.java (generated by cup)
|
---|
38 | */
|
---|
39 | int type;
|
---|
40 |
|
---|
41 |
|
---|
42 | /**
|
---|
43 | * Create a new regular expression of the specified type.
|
---|
44 | *
|
---|
45 | * @param type a value from the cup generated class sym.
|
---|
46 | *
|
---|
47 | * @see JFlex.sym
|
---|
48 | */
|
---|
49 | public RegExp(int type) {
|
---|
50 | this.type = type;
|
---|
51 | }
|
---|
52 |
|
---|
53 |
|
---|
54 |
|
---|
55 | /**
|
---|
56 | * Returns a String-representation of this regular expression
|
---|
57 | * with the specified indentation.
|
---|
58 | *
|
---|
59 | * @param tab a String that should contain only space characters and
|
---|
60 | * that is inserted in front of standard String-representation
|
---|
61 | * pf this object.
|
---|
62 | */
|
---|
63 | public String print(String tab) {
|
---|
64 | return tab+toString();
|
---|
65 | }
|
---|
66 |
|
---|
67 |
|
---|
68 | /**
|
---|
69 | * Returns a String-representation of this regular expression
|
---|
70 | */
|
---|
71 | public String toString() {
|
---|
72 | return "type = "+type;
|
---|
73 | }
|
---|
74 |
|
---|
75 |
|
---|
76 | /**
|
---|
77 | * Find out if this regexp is a char class or equivalent to one.
|
---|
78 | *
|
---|
79 | * @param macros for macro expansion
|
---|
80 | * @return true if the regexp is equivalent to a char class.
|
---|
81 | */
|
---|
82 | public boolean isCharClass(Macros macros) {
|
---|
83 | RegExp1 unary;
|
---|
84 | RegExp2 binary;
|
---|
85 |
|
---|
86 | switch (type) {
|
---|
87 | case sym.CHAR:
|
---|
88 | case sym.CHAR_I:
|
---|
89 | case sym.CCLASS:
|
---|
90 | case sym.CCLASSNOT:
|
---|
91 | return true;
|
---|
92 |
|
---|
93 | case sym.BAR:
|
---|
94 | binary = (RegExp2) this;
|
---|
95 | return binary.r1.isCharClass(macros) && binary.r2.isCharClass(macros);
|
---|
96 |
|
---|
97 | case sym.MACROUSE:
|
---|
98 | unary = (RegExp1) this;
|
---|
99 | return macros.getDefinition((String) unary.content).isCharClass(macros);
|
---|
100 |
|
---|
101 | default: return false;
|
---|
102 | }
|
---|
103 | }
|
---|
104 |
|
---|
105 | /**
|
---|
106 | * The approximate number of NFA states this expression will need (only
|
---|
107 | * works correctly after macro expansion and without negation)
|
---|
108 | *
|
---|
109 | * @param macros macro table for expansion
|
---|
110 | */
|
---|
111 | public int size(Macros macros) {
|
---|
112 | RegExp1 unary;
|
---|
113 | RegExp2 binary;
|
---|
114 | RegExp content;
|
---|
115 |
|
---|
116 | switch ( type ) {
|
---|
117 | case sym.BAR:
|
---|
118 | binary = (RegExp2) this;
|
---|
119 | return binary.r1.size(macros) + binary.r2.size(macros) + 2;
|
---|
120 |
|
---|
121 | case sym.CONCAT:
|
---|
122 | binary = (RegExp2) this;
|
---|
123 | return binary.r1.size(macros) + binary.r2.size(macros);
|
---|
124 |
|
---|
125 | case sym.STAR:
|
---|
126 | unary = (RegExp1) this;
|
---|
127 | content = (RegExp) unary.content;
|
---|
128 | return content.size(macros) + 2;
|
---|
129 |
|
---|
130 | case sym.PLUS:
|
---|
131 | unary = (RegExp1) this;
|
---|
132 | content = (RegExp) unary.content;
|
---|
133 | return content.size(macros) + 2;
|
---|
134 |
|
---|
135 | case sym.QUESTION:
|
---|
136 | unary = (RegExp1) this;
|
---|
137 | content = (RegExp) unary.content;
|
---|
138 | return content.size(macros);
|
---|
139 |
|
---|
140 | case sym.BANG:
|
---|
141 | unary = (RegExp1) this;
|
---|
142 | content = (RegExp) unary.content;
|
---|
143 | return content.size(macros) * content.size(macros);
|
---|
144 | // this is only a very rough estimate (worst case 2^n)
|
---|
145 | // exact size too complicated (propably requires construction)
|
---|
146 |
|
---|
147 | case sym.TILDE:
|
---|
148 | unary = (RegExp1) this;
|
---|
149 | content = (RegExp) unary.content;
|
---|
150 | return content.size(macros) * content.size(macros) * 3;
|
---|
151 | // see sym.BANG
|
---|
152 |
|
---|
153 | case sym.STRING:
|
---|
154 | case sym.STRING_I:
|
---|
155 | unary = (RegExp1) this;
|
---|
156 | return ((String) unary.content).length()+1;
|
---|
157 |
|
---|
158 | case sym.CHAR:
|
---|
159 | case sym.CHAR_I:
|
---|
160 | return 2;
|
---|
161 |
|
---|
162 | case sym.CCLASS:
|
---|
163 | case sym.CCLASSNOT:
|
---|
164 | return 2;
|
---|
165 |
|
---|
166 | case sym.MACROUSE:
|
---|
167 | unary = (RegExp1) this;
|
---|
168 | return macros.getDefinition((String) unary.content).size(macros);
|
---|
169 | }
|
---|
170 |
|
---|
171 | throw new Error("unknown regexp type "+type);
|
---|
172 | }
|
---|
173 |
|
---|
174 | /**
|
---|
175 | * @return the reverse of the specified string.
|
---|
176 | */
|
---|
177 | public final static String revString(String s) {
|
---|
178 | StringBuffer b = new StringBuffer(s.length());
|
---|
179 | for (int i=s.length()-1; i >= 0; i--) {
|
---|
180 | b.append(s.charAt(i));
|
---|
181 | }
|
---|
182 | return b.toString();
|
---|
183 | }
|
---|
184 |
|
---|
185 | /**
|
---|
186 | * Recursively convert tilde (upto) expressions into negation and star.
|
---|
187 | *
|
---|
188 | * @param macros the macro table for expansion.
|
---|
189 | * @return new RegExp equivalent to the current one, but without upto expressions.
|
---|
190 | */
|
---|
191 | public final RegExp resolveTilde(Macros macros) {
|
---|
192 | RegExp1 unary;
|
---|
193 | RegExp2 binary;
|
---|
194 | RegExp content;
|
---|
195 |
|
---|
196 | switch ( type ) {
|
---|
197 | case sym.BAR:
|
---|
198 | binary = (RegExp2) this;
|
---|
199 | return new RegExp2(sym.BAR, binary.r1.resolveTilde(macros),
|
---|
200 | binary.r2.resolveTilde(macros));
|
---|
201 |
|
---|
202 | case sym.CONCAT:
|
---|
203 | binary = (RegExp2) this;
|
---|
204 | return new RegExp2(sym.CONCAT, binary.r1.resolveTilde(macros),
|
---|
205 | binary.r2.resolveTilde(macros));
|
---|
206 |
|
---|
207 | case sym.STAR:
|
---|
208 | unary = (RegExp1) this;
|
---|
209 | content = (RegExp) unary.content;
|
---|
210 | return new RegExp1(sym.STAR, content.resolveTilde(macros));
|
---|
211 |
|
---|
212 | case sym.PLUS:
|
---|
213 | unary = (RegExp1) this;
|
---|
214 | content = (RegExp) unary.content;
|
---|
215 | return new RegExp1(sym.PLUS, content.resolveTilde(macros));
|
---|
216 |
|
---|
217 | case sym.QUESTION:
|
---|
218 | unary = (RegExp1) this;
|
---|
219 | content = (RegExp) unary.content;
|
---|
220 | return new RegExp1(sym.QUESTION, content.resolveTilde(macros));
|
---|
221 |
|
---|
222 | case sym.BANG:
|
---|
223 | unary = (RegExp1) this;
|
---|
224 | content = (RegExp) unary.content;
|
---|
225 | return new RegExp1(sym.BANG, content.resolveTilde(macros));
|
---|
226 |
|
---|
227 | case sym.TILDE:
|
---|
228 | // ~a = !([^]* a [^]*) a
|
---|
229 | // uses subexpression sharing
|
---|
230 | unary = (RegExp1) this;
|
---|
231 | content = ((RegExp) unary.content).resolveTilde(macros);
|
---|
232 |
|
---|
233 | RegExp any_star = new RegExp1(sym.STAR, anyChar());
|
---|
234 | RegExp neg = new RegExp1(sym.BANG,
|
---|
235 | new RegExp2(sym.CONCAT, any_star,
|
---|
236 | new RegExp2(sym.CONCAT, content, any_star)));
|
---|
237 |
|
---|
238 | return new RegExp2(sym.CONCAT, neg, content);
|
---|
239 |
|
---|
240 | case sym.STRING:
|
---|
241 | case sym.STRING_I:
|
---|
242 | case sym.CHAR:
|
---|
243 | case sym.CHAR_I:
|
---|
244 | case sym.CCLASS:
|
---|
245 | case sym.CCLASSNOT:
|
---|
246 | unary = (RegExp1) this;
|
---|
247 | return new RegExp1(unary.type, unary.content);
|
---|
248 |
|
---|
249 | case sym.MACROUSE:
|
---|
250 | unary = (RegExp1) this;
|
---|
251 | return macros.getDefinition((String) unary.content).resolveTilde(macros);
|
---|
252 | }
|
---|
253 |
|
---|
254 | throw new Error("unknown regexp type "+type);
|
---|
255 | }
|
---|
256 |
|
---|
257 |
|
---|
258 | /**
|
---|
259 | * Returns a regexp that matches any character: <code>[^]</code>
|
---|
260 | * @return the regexp for <code>[^]</code>
|
---|
261 | */
|
---|
262 | public RegExp anyChar() {
|
---|
263 | // FIXME: there is some code duplication here with the parser
|
---|
264 | Vector list = new Vector();
|
---|
265 | list.addElement(new Interval((char)0,CharClasses.maxChar));
|
---|
266 | return new RegExp1(sym.CCLASS,list);
|
---|
267 | }
|
---|
268 |
|
---|
269 |
|
---|
270 | /**
|
---|
271 | * Create a new regexp that matches the reverse text of this one.
|
---|
272 | *
|
---|
273 | * @return the reverse regexp
|
---|
274 | */
|
---|
275 | public final RegExp rev(Macros macros) {
|
---|
276 | RegExp1 unary;
|
---|
277 | RegExp2 binary;
|
---|
278 | RegExp content;
|
---|
279 |
|
---|
280 | switch ( type ) {
|
---|
281 | case sym.BAR:
|
---|
282 | binary = (RegExp2) this;
|
---|
283 | return new RegExp2(sym.BAR, binary.r1.rev(macros), binary.r2.rev(macros));
|
---|
284 |
|
---|
285 | case sym.CONCAT:
|
---|
286 | binary = (RegExp2) this;
|
---|
287 | return new RegExp2(sym.CONCAT, binary.r2.rev(macros), binary.r1.rev(macros));
|
---|
288 |
|
---|
289 | case sym.STAR:
|
---|
290 | unary = (RegExp1) this;
|
---|
291 | content = (RegExp) unary.content;
|
---|
292 | return new RegExp1(sym.STAR, content.rev(macros));
|
---|
293 |
|
---|
294 | case sym.PLUS:
|
---|
295 | unary = (RegExp1) this;
|
---|
296 | content = (RegExp) unary.content;
|
---|
297 | return new RegExp1(sym.PLUS, content.rev(macros));
|
---|
298 |
|
---|
299 | case sym.QUESTION:
|
---|
300 | unary = (RegExp1) this;
|
---|
301 | content = (RegExp) unary.content;
|
---|
302 | return new RegExp1(sym.QUESTION, content.rev(macros));
|
---|
303 |
|
---|
304 | case sym.BANG:
|
---|
305 | unary = (RegExp1) this;
|
---|
306 | content = (RegExp) unary.content;
|
---|
307 | return new RegExp1(sym.BANG, content.rev(macros));
|
---|
308 |
|
---|
309 | case sym.TILDE:
|
---|
310 | content = resolveTilde(macros);
|
---|
311 | return content.rev(macros);
|
---|
312 |
|
---|
313 | case sym.STRING:
|
---|
314 | case sym.STRING_I:
|
---|
315 | unary = (RegExp1) this;
|
---|
316 | return new RegExp1(unary.type, revString((String) unary.content));
|
---|
317 |
|
---|
318 | case sym.CHAR:
|
---|
319 | case sym.CHAR_I:
|
---|
320 | case sym.CCLASS:
|
---|
321 | case sym.CCLASSNOT:
|
---|
322 | unary = (RegExp1) this;
|
---|
323 | return new RegExp1(unary.type, unary.content);
|
---|
324 |
|
---|
325 | case sym.MACROUSE:
|
---|
326 | unary = (RegExp1) this;
|
---|
327 | return macros.getDefinition((String) unary.content).rev(macros);
|
---|
328 | }
|
---|
329 |
|
---|
330 | throw new Error("unknown regexp type "+type);
|
---|
331 | }
|
---|
332 | }
|
---|