1 | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
---|
2 | * JFlex 1.4.3 *
|
---|
3 | * Copyright (C) 1998-2009 Gerwin Klein <[email protected]> *
|
---|
4 | * All rights reserved. *
|
---|
5 | * *
|
---|
6 | * This program is free software; you can redistribute it and/or modify *
|
---|
7 | * it under the terms of the GNU General Public License. See the file *
|
---|
8 | * COPYRIGHT for more information. *
|
---|
9 | * *
|
---|
10 | * This program is distributed in the hope that it will be useful, *
|
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
---|
13 | * GNU General Public License for more details. *
|
---|
14 | * *
|
---|
15 | * You should have received a copy of the GNU General Public License along *
|
---|
16 | * with this program; if not, write to the Free Software Foundation, Inc., *
|
---|
17 | * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
|
---|
18 | * *
|
---|
19 | * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
---|
20 | package JFlex;
|
---|
21 |
|
---|
22 | import java.util.*;
|
---|
23 | import java.io.*;
|
---|
24 |
|
---|
25 |
|
---|
26 | /**
|
---|
27 | * NFA representation in JFlex.
|
---|
28 | *
|
---|
29 | * Contains algorithms RegExp -> NFA and NFA -> DFA.
|
---|
30 | *
|
---|
31 | * @author Gerwin Klein
|
---|
32 | * @version JFlex 1.4.3, $Revision: 433 $, $Date: 2009-01-31 19:52:34 +1100 (Sat, 31 Jan 2009) $
|
---|
33 | */
|
---|
34 | final public class NFA {
|
---|
35 |
|
---|
36 | /** table[current_state][next_char] is the set of states that can be reached
|
---|
37 | /* from current_state with an input next_char */
|
---|
38 | StateSet [][] table;
|
---|
39 |
|
---|
40 | /** epsilon[current_state] is the set of states that can be reached
|
---|
41 | /* from current_state via epsilon edges */
|
---|
42 | StateSet [] epsilon;
|
---|
43 |
|
---|
44 | /** isFinal[state] == true <=> state is a final state of the NFA */
|
---|
45 | boolean [] isFinal;
|
---|
46 |
|
---|
47 | /** action[current_state]: the action associated with the state
|
---|
48 | /* current_state (null, if there is no action for the state) */
|
---|
49 | Action [] action;
|
---|
50 |
|
---|
51 | /** the number of states in this NFA */
|
---|
52 | int numStates;
|
---|
53 |
|
---|
54 | /** the current maximum number of input characters */
|
---|
55 | int numInput;
|
---|
56 |
|
---|
57 | /** the number of lexical States. Lexical states have the indices
|
---|
58 | /* 0..numLexStates-1 in the transition table */
|
---|
59 | int numLexStates;
|
---|
60 |
|
---|
61 | /** estimated size of the NFA (before actual construction) */
|
---|
62 | int estSize = 256;
|
---|
63 |
|
---|
64 | Macros macros;
|
---|
65 | CharClasses classes;
|
---|
66 |
|
---|
67 | LexScan scanner;
|
---|
68 | RegExps regExps;
|
---|
69 |
|
---|
70 | // will be reused by several methods (avoids excessive object creation)
|
---|
71 | private static StateSetEnumerator states = new StateSetEnumerator();
|
---|
72 | private static StateSet tempStateSet = new StateSet();
|
---|
73 |
|
---|
74 | public NFA(int numInput, int estSize) {
|
---|
75 | this.numInput = numInput;
|
---|
76 | this.estSize = estSize;
|
---|
77 | numStates = 0;
|
---|
78 | epsilon = new StateSet [estSize];
|
---|
79 | action = new Action [estSize];
|
---|
80 | isFinal = new boolean [estSize];
|
---|
81 | table = new StateSet [estSize][numInput];
|
---|
82 | }
|
---|
83 |
|
---|
84 | /**
|
---|
85 | * Construct new NFA.
|
---|
86 | *
|
---|
87 | * Assumes that lookahead cases and numbers are already resolved in RegExps.
|
---|
88 | * @see RegExps#checkLookAheads()
|
---|
89 | */
|
---|
90 | public NFA(int numInput, LexScan scanner, RegExps regExps,
|
---|
91 | Macros macros, CharClasses classes) {
|
---|
92 | this(numInput, regExps.NFASize(macros)+2*scanner.states.number());
|
---|
93 |
|
---|
94 | this.scanner = scanner;
|
---|
95 | this.regExps = regExps;
|
---|
96 | this.macros = macros;
|
---|
97 | this.classes = classes;
|
---|
98 |
|
---|
99 | numLexStates = scanner.states.number();
|
---|
100 |
|
---|
101 | // ensureCapacity assumes correctly set up numStates.
|
---|
102 | int new_num = numEntryStates();
|
---|
103 | ensureCapacity(new_num);
|
---|
104 | numStates = new_num;
|
---|
105 | }
|
---|
106 |
|
---|
107 | public int numEntryStates() {
|
---|
108 | return 2*(numLexStates+regExps.gen_look_count);
|
---|
109 | }
|
---|
110 |
|
---|
111 | /**
|
---|
112 | * Add a standalone rule that has minimum priority, fires a transition
|
---|
113 | * on all single input characters and has a "print yytext" action.
|
---|
114 | */
|
---|
115 | public void addStandaloneRule() {
|
---|
116 | int start = numStates;
|
---|
117 | int end = numStates+1;
|
---|
118 |
|
---|
119 | for (int c = 0; c < classes.getNumClasses(); c++)
|
---|
120 | addTransition(start, c, end);
|
---|
121 |
|
---|
122 | for (int i = 0; i < numLexStates*2; i++)
|
---|
123 | addEpsilonTransition(i, start);
|
---|
124 |
|
---|
125 | action[end] = new Action("System.out.print(yytext());", Integer.MAX_VALUE);
|
---|
126 | isFinal[end] = true;
|
---|
127 | }
|
---|
128 |
|
---|
129 | /**
|
---|
130 | * Add a regexp to this NFA.
|
---|
131 | *
|
---|
132 | * @param regExpNum the number of the regexp to add.
|
---|
133 | */
|
---|
134 | public void addRegExp(int regExpNum) {
|
---|
135 |
|
---|
136 | if (Options.DEBUG)
|
---|
137 | Out.debug("Adding nfa for regexp "+regExpNum+" :"+Out.NL+regExps.getRegExp(regExpNum));
|
---|
138 |
|
---|
139 | IntPair nfa = insertNFA( regExps.getRegExp(regExpNum) );
|
---|
140 |
|
---|
141 | Enumeration lexStates = regExps.getStates(regExpNum).elements();
|
---|
142 |
|
---|
143 | if ( !lexStates.hasMoreElements() )
|
---|
144 | lexStates = scanner.states.getInclusiveStates();
|
---|
145 |
|
---|
146 | while ( lexStates.hasMoreElements() ) {
|
---|
147 | int stateNum = ((Integer)lexStates.nextElement()).intValue();
|
---|
148 |
|
---|
149 | if ( !regExps.isBOL(regExpNum) )
|
---|
150 | addEpsilonTransition(2*stateNum, nfa.start);
|
---|
151 |
|
---|
152 | addEpsilonTransition(2*stateNum+1, nfa.start);
|
---|
153 | }
|
---|
154 |
|
---|
155 |
|
---|
156 | if ( regExps.getLookAhead(regExpNum) != null ) {
|
---|
157 | Action a = regExps.getAction(regExpNum);
|
---|
158 |
|
---|
159 | if (a.lookAhead() == Action.FINITE_CHOICE) {
|
---|
160 | insertLookAheadChoices(nfa.end, a, regExps.getLookAhead(regExpNum));
|
---|
161 | // remove the original action from the collection: it will never
|
---|
162 | // be matched directly, only its copies will.
|
---|
163 | scanner.actions.remove(a);
|
---|
164 | }
|
---|
165 | else {
|
---|
166 | RegExp r1 = regExps.getRegExp(regExpNum);
|
---|
167 | RegExp r2 = regExps.getLookAhead(regExpNum);
|
---|
168 |
|
---|
169 | IntPair look = insertNFA(r2);
|
---|
170 |
|
---|
171 | addEpsilonTransition(nfa.end, look.start);
|
---|
172 |
|
---|
173 | action[look.end] = a;
|
---|
174 | isFinal[look.end] = true;
|
---|
175 |
|
---|
176 | if (a.lookAhead() == Action.GENERAL_LOOK) {
|
---|
177 | // base forward pass
|
---|
178 | IntPair forward = insertNFA(r1);
|
---|
179 | // lookahead backward pass
|
---|
180 | IntPair backward = insertNFA(r2.rev(macros));
|
---|
181 |
|
---|
182 | isFinal[forward.end] = true;
|
---|
183 | action[forward.end] = new Action(Action.FORWARD_ACTION);
|
---|
184 |
|
---|
185 | isFinal[backward.end] = true;
|
---|
186 | action[backward.end] = new Action(Action.BACKWARD_ACTION);
|
---|
187 |
|
---|
188 | int entry = 2*(regExps.getLookEntry(regExpNum) + numLexStates);
|
---|
189 | addEpsilonTransition(entry, forward.start);
|
---|
190 | addEpsilonTransition(entry+1, backward.start);
|
---|
191 |
|
---|
192 | a.setEntryState(entry);
|
---|
193 | }
|
---|
194 | }
|
---|
195 | }
|
---|
196 | else {
|
---|
197 | action[nfa.end] = regExps.getAction(regExpNum);
|
---|
198 | isFinal[nfa.end] = true;
|
---|
199 | }
|
---|
200 | }
|
---|
201 |
|
---|
202 | /**
|
---|
203 | * Insert NFAs for the (finitely many) fixed length lookahead choices.
|
---|
204 | *
|
---|
205 | * @param lookAhead a lookahead of which isFiniteChoice is true
|
---|
206 | * @param baseEnd the end state of the base expression NFA
|
---|
207 | * @param a the action of the expression
|
---|
208 | *
|
---|
209 | * @see SemCheck#isFiniteChoice(RegExp)
|
---|
210 | */
|
---|
211 | private void insertLookAheadChoices(int baseEnd, Action a, RegExp lookAhead) {
|
---|
212 | if (lookAhead.type == sym.BAR) {
|
---|
213 | RegExp2 r = (RegExp2) lookAhead;
|
---|
214 | insertLookAheadChoices(baseEnd, a, r.r1);
|
---|
215 | insertLookAheadChoices(baseEnd, a, r.r2);
|
---|
216 | }
|
---|
217 | else if (lookAhead.type == sym.MACROUSE) {
|
---|
218 | RegExp1 r = (RegExp1) lookAhead;
|
---|
219 | insertLookAheadChoices(baseEnd, a, macros.getDefinition((String) r.content));
|
---|
220 | }
|
---|
221 | else {
|
---|
222 | int len = SemCheck.length(lookAhead);
|
---|
223 |
|
---|
224 | if (len >= 0) {
|
---|
225 | // termination case
|
---|
226 | IntPair look = insertNFA(lookAhead);
|
---|
227 |
|
---|
228 | addEpsilonTransition(baseEnd, look.start);
|
---|
229 |
|
---|
230 | Action x = a.copyChoice(len);
|
---|
231 | action[look.end] = x;
|
---|
232 | isFinal[look.end] = true;
|
---|
233 |
|
---|
234 | // add new copy to the collection of known actions such that
|
---|
235 | // it can be checked for the NEVER_MATCH warning.
|
---|
236 | scanner.actions.add(x);
|
---|
237 | }
|
---|
238 | else {
|
---|
239 | // should never happen
|
---|
240 | throw new Error("When inserting lookahead expression: unkown expression type "+lookAhead.type+" in "+lookAhead); //$NON-NLS-1$ //$NON-NLS-2$
|
---|
241 | }
|
---|
242 | }
|
---|
243 | }
|
---|
244 |
|
---|
245 | /**
|
---|
246 | * Make sure the NFA can contain at least newNumStates states.
|
---|
247 | *
|
---|
248 | * @param newNumStates the minimu number of states.
|
---|
249 | */
|
---|
250 | private void ensureCapacity(int newNumStates) {
|
---|
251 | int oldLength = epsilon.length;
|
---|
252 |
|
---|
253 | if ( newNumStates < oldLength ) return;
|
---|
254 |
|
---|
255 | int newStatesLength = Math.max(oldLength*2, newNumStates);
|
---|
256 |
|
---|
257 | boolean [] newFinal = new boolean [newStatesLength];
|
---|
258 | boolean [] newIsPush = new boolean [newStatesLength];
|
---|
259 | Action [] newAction = new Action [newStatesLength];
|
---|
260 | StateSet [] [] newTable = new StateSet [newStatesLength] [numInput];
|
---|
261 | StateSet [] newEpsilon = new StateSet [newStatesLength];
|
---|
262 |
|
---|
263 | System.arraycopy(isFinal,0,newFinal,0,numStates);
|
---|
264 | System.arraycopy(action,0,newAction,0,numStates);
|
---|
265 | System.arraycopy(epsilon,0,newEpsilon,0,numStates);
|
---|
266 | System.arraycopy(table,0,newTable,0,numStates);
|
---|
267 |
|
---|
268 | isFinal = newFinal;
|
---|
269 | action = newAction;
|
---|
270 | epsilon = newEpsilon;
|
---|
271 | table = newTable;
|
---|
272 | }
|
---|
273 |
|
---|
274 | public void addTransition(int start, int input, int dest) {
|
---|
275 | Out.debug("Adding transition ("+start+", "+input+", "+dest+")");
|
---|
276 |
|
---|
277 | int maxS = Math.max(start,dest)+1;
|
---|
278 |
|
---|
279 | ensureCapacity( maxS );
|
---|
280 |
|
---|
281 | if (maxS > numStates) numStates = maxS;
|
---|
282 |
|
---|
283 | if ( table[start][input] != null )
|
---|
284 | table[start][input].addState(dest);
|
---|
285 | else
|
---|
286 | table[start][input] = new StateSet(estSize,dest);
|
---|
287 | }
|
---|
288 |
|
---|
289 | public void addEpsilonTransition(int start, int dest) {
|
---|
290 | int max = Math.max(start,dest)+1;
|
---|
291 | ensureCapacity( max );
|
---|
292 | if (max > numStates) numStates = max;
|
---|
293 |
|
---|
294 | if (epsilon[start] != null)
|
---|
295 | epsilon[start].addState(dest);
|
---|
296 | else
|
---|
297 | epsilon[start] = new StateSet(estSize,dest);
|
---|
298 | }
|
---|
299 |
|
---|
300 |
|
---|
301 | /**
|
---|
302 | * Returns <code>true</code>, iff the specified set of states
|
---|
303 | * contains a final state.
|
---|
304 | *
|
---|
305 | * @param set the set of states that is tested for final states.
|
---|
306 | */
|
---|
307 | private boolean containsFinal(StateSet set) {
|
---|
308 | states.reset(set);
|
---|
309 |
|
---|
310 | while ( states.hasMoreElements() )
|
---|
311 | if ( isFinal[states.nextElement()] ) return true;
|
---|
312 |
|
---|
313 | return false;
|
---|
314 | }
|
---|
315 |
|
---|
316 |
|
---|
317 | /**
|
---|
318 | * Returns <code>true</code>, iff the specified set of states
|
---|
319 | * contains a pushback-state.
|
---|
320 | *
|
---|
321 | * @param set the set of states that is tested for pushback-states.
|
---|
322 | private boolean containsPushback(StateSet set) {
|
---|
323 | states.reset(set);
|
---|
324 |
|
---|
325 | while ( states.hasMoreElements() )
|
---|
326 | if ( isPushback[states.nextElement()] ) return true;
|
---|
327 |
|
---|
328 | return false;
|
---|
329 | }
|
---|
330 | */
|
---|
331 |
|
---|
332 | /**
|
---|
333 | * Returns the action with highest priority in the specified
|
---|
334 | * set of states.
|
---|
335 | *
|
---|
336 | * @param set the set of states for which to determine the action
|
---|
337 | */
|
---|
338 | private Action getAction(StateSet set) {
|
---|
339 |
|
---|
340 | states.reset(set);
|
---|
341 |
|
---|
342 | Action maxAction = null;
|
---|
343 |
|
---|
344 | Out.debug("Determining action of : "+set);
|
---|
345 |
|
---|
346 | while ( states.hasMoreElements() ) {
|
---|
347 |
|
---|
348 | Action currentAction = action[ states.nextElement() ];
|
---|
349 |
|
---|
350 | if ( currentAction != null ) {
|
---|
351 | if (maxAction == null)
|
---|
352 | maxAction = currentAction;
|
---|
353 | else
|
---|
354 | maxAction = maxAction.getHigherPriority(currentAction);
|
---|
355 | }
|
---|
356 |
|
---|
357 | }
|
---|
358 |
|
---|
359 | return maxAction;
|
---|
360 | }
|
---|
361 |
|
---|
362 |
|
---|
363 | /**
|
---|
364 | * Calculates the epsilon closure for a specified set of states.
|
---|
365 | *
|
---|
366 | * The epsilon closure for set a is the set of states that can be reached
|
---|
367 | * by epsilon edges from a.
|
---|
368 | *
|
---|
369 | * @param set the set of states to calculate the epsilon closure for
|
---|
370 | *
|
---|
371 | * @return the epsilon closure of the specified set of states
|
---|
372 | * in this NFA
|
---|
373 | */
|
---|
374 | private StateSet closure(int startState) {
|
---|
375 |
|
---|
376 | // Out.debug("Calculating closure of "+set);
|
---|
377 |
|
---|
378 | StateSet notvisited = tempStateSet;
|
---|
379 | StateSet closure = new StateSet(numStates,startState);
|
---|
380 |
|
---|
381 | notvisited.clear();
|
---|
382 | notvisited.addState(startState);
|
---|
383 |
|
---|
384 | while ( notvisited.containsElements() ) {
|
---|
385 | // Out.debug("closure is now "+closure);
|
---|
386 | // Out.debug("notvisited is "+notvisited);
|
---|
387 | int state = notvisited.getAndRemoveElement();
|
---|
388 | // Out.debug("removed element "+state+" of "+notvisited);
|
---|
389 | // Out.debug("epsilon[states] = "+epsilon[state]);
|
---|
390 | notvisited.add(closure.complement(epsilon[state]));
|
---|
391 | closure.add(epsilon[state]);
|
---|
392 | }
|
---|
393 |
|
---|
394 | // Out.debug("Closure is : "+closure);
|
---|
395 |
|
---|
396 | return closure;
|
---|
397 | }
|
---|
398 |
|
---|
399 | /**
|
---|
400 | * Returns the epsilon closure of a set of states
|
---|
401 | */
|
---|
402 | private StateSet closure(StateSet startStates) {
|
---|
403 | StateSet result = new StateSet(numStates);
|
---|
404 |
|
---|
405 | if (startStates != null) {
|
---|
406 | states.reset(startStates);
|
---|
407 | while (states.hasMoreElements())
|
---|
408 | result.add( closure(states.nextElement()) );
|
---|
409 | }
|
---|
410 |
|
---|
411 | return result;
|
---|
412 | }
|
---|
413 |
|
---|
414 |
|
---|
415 | private void epsilonFill() {
|
---|
416 | for (int i = 0; i < numStates; i++) {
|
---|
417 | epsilon[i] = closure(i);
|
---|
418 | }
|
---|
419 | }
|
---|
420 |
|
---|
421 | /**
|
---|
422 | * Calculates the set of states that can be reached from another
|
---|
423 | * set of states <code>start</code> with an specified input
|
---|
424 | * character <code>input</code>
|
---|
425 | *
|
---|
426 | * @param start the set of states to start from
|
---|
427 | * @param input the input character for which to search the next states
|
---|
428 | *
|
---|
429 | * @return the set of states that are reached from <code>start</code>
|
---|
430 | * via <code>input</code>
|
---|
431 | */
|
---|
432 | private StateSet DFAEdge(StateSet start, char input) {
|
---|
433 | // Out.debug("Calculating DFAEdge for state set "+start+" and input '"+input+"'");
|
---|
434 |
|
---|
435 | tempStateSet.clear();
|
---|
436 |
|
---|
437 | states.reset(start);
|
---|
438 | while ( states.hasMoreElements() )
|
---|
439 | tempStateSet.add( table[states.nextElement()][input] );
|
---|
440 |
|
---|
441 | StateSet result = new StateSet(tempStateSet);
|
---|
442 |
|
---|
443 | states.reset(tempStateSet);
|
---|
444 | while ( states.hasMoreElements() )
|
---|
445 | result.add( epsilon[states.nextElement()] );
|
---|
446 |
|
---|
447 | // Out.debug("DFAEdge is : "+result);
|
---|
448 |
|
---|
449 | return result;
|
---|
450 | }
|
---|
451 |
|
---|
452 |
|
---|
453 | /**
|
---|
454 | * Returns an DFA that accepts the same language as this NFA.
|
---|
455 | * This DFA is usually not minimal.
|
---|
456 | */
|
---|
457 | public DFA getDFA() {
|
---|
458 |
|
---|
459 | Hashtable dfaStates = new Hashtable(numStates);
|
---|
460 | Vector dfaVector = new Vector(numStates);
|
---|
461 |
|
---|
462 | DFA dfa = new DFA(numEntryStates(), numInput, numLexStates);
|
---|
463 |
|
---|
464 | int numDFAStates = 0;
|
---|
465 | int currentDFAState = 0;
|
---|
466 |
|
---|
467 | Out.println("Converting NFA to DFA : ");
|
---|
468 |
|
---|
469 | epsilonFill();
|
---|
470 |
|
---|
471 | StateSet currentState, newState;
|
---|
472 |
|
---|
473 | // create the initial states of the DFA
|
---|
474 | for ( int i = 0; i < numEntryStates(); i++ ) {
|
---|
475 | newState = epsilon[i];
|
---|
476 |
|
---|
477 | dfaStates.put(newState, new Integer(numDFAStates));
|
---|
478 | dfaVector.addElement(newState);
|
---|
479 |
|
---|
480 | dfa.setEntryState( i, numDFAStates );
|
---|
481 |
|
---|
482 | dfa.setFinal( numDFAStates, containsFinal(newState) );
|
---|
483 | dfa.setAction( numDFAStates, getAction(newState) );
|
---|
484 |
|
---|
485 | numDFAStates++;
|
---|
486 | }
|
---|
487 |
|
---|
488 | numDFAStates--;
|
---|
489 |
|
---|
490 | if (Options.DEBUG)
|
---|
491 | Out.debug("DFA start states are :"+Out.NL+dfaStates+Out.NL+Out.NL+"ordered :"+Out.NL+dfaVector);
|
---|
492 |
|
---|
493 | currentDFAState = 0;
|
---|
494 |
|
---|
495 | StateSet tempStateSet = NFA.tempStateSet;
|
---|
496 | StateSetEnumerator states = NFA.states;
|
---|
497 |
|
---|
498 | // will be reused
|
---|
499 | newState = new StateSet(numStates);
|
---|
500 |
|
---|
501 | while ( currentDFAState <= numDFAStates ) {
|
---|
502 |
|
---|
503 | currentState = (StateSet) dfaVector.elementAt(currentDFAState);
|
---|
504 |
|
---|
505 | for (char input = 0; input < numInput; input++) {
|
---|
506 |
|
---|
507 | // newState = DFAEdge(currentState, input);
|
---|
508 |
|
---|
509 | // inlining DFAEdge for performance:
|
---|
510 |
|
---|
511 | // Out.debug("Calculating DFAEdge for state set "+currentState+" and input '"+input+"'");
|
---|
512 |
|
---|
513 | tempStateSet.clear();
|
---|
514 | states.reset(currentState);
|
---|
515 | while ( states.hasMoreElements() )
|
---|
516 | tempStateSet.add( table[states.nextElement()][input] );
|
---|
517 |
|
---|
518 | newState.copy(tempStateSet);
|
---|
519 |
|
---|
520 | states.reset(tempStateSet);
|
---|
521 | while ( states.hasMoreElements() )
|
---|
522 | newState.add( epsilon[states.nextElement()] );
|
---|
523 |
|
---|
524 | // Out.debug("DFAEdge is : "+newState);
|
---|
525 |
|
---|
526 |
|
---|
527 | if ( newState.containsElements() ) {
|
---|
528 |
|
---|
529 | // Out.debug("DFAEdge for input "+(int)input+" and state set "+currentState+" is "+newState);
|
---|
530 |
|
---|
531 | // Out.debug("Looking for state set "+newState);
|
---|
532 | Integer nextDFAState = (Integer) dfaStates.get(newState);
|
---|
533 |
|
---|
534 | if ( nextDFAState != null ) {
|
---|
535 | // Out.debug("FOUND!");
|
---|
536 | dfa.addTransition(currentDFAState, input, nextDFAState.intValue());
|
---|
537 | }
|
---|
538 | else {
|
---|
539 | if (Options.progress) Out.print(".");
|
---|
540 | // Out.debug("NOT FOUND!");
|
---|
541 | // Out.debug("Table was "+dfaStates);
|
---|
542 | numDFAStates++;
|
---|
543 |
|
---|
544 | // make a new copy of newState to store in dfaStates
|
---|
545 | StateSet storeState = new StateSet(newState);
|
---|
546 |
|
---|
547 | dfaStates.put(storeState, new Integer(numDFAStates));
|
---|
548 | dfaVector.addElement(storeState);
|
---|
549 |
|
---|
550 | dfa.addTransition(currentDFAState, input, numDFAStates);
|
---|
551 | dfa.setFinal( numDFAStates, containsFinal(storeState) );
|
---|
552 | dfa.setAction( numDFAStates, getAction(storeState) );
|
---|
553 | }
|
---|
554 | }
|
---|
555 | }
|
---|
556 |
|
---|
557 | currentDFAState++;
|
---|
558 | }
|
---|
559 |
|
---|
560 | if (Options.verbose) Out.println("");
|
---|
561 |
|
---|
562 | return dfa;
|
---|
563 | }
|
---|
564 |
|
---|
565 |
|
---|
566 | public void dumpTable() {
|
---|
567 | Out.dump(toString());
|
---|
568 | }
|
---|
569 |
|
---|
570 | public String toString() {
|
---|
571 | StringBuffer result = new StringBuffer();
|
---|
572 |
|
---|
573 | for (int i=0; i < numStates; i++) {
|
---|
574 | result.append("State");
|
---|
575 | if ( isFinal[i] ) {
|
---|
576 | result.append("[FINAL");
|
---|
577 | String l = action[i].lookString();
|
---|
578 | if (!l.equals("")) {
|
---|
579 | result.append(", ");
|
---|
580 | result.append(l);
|
---|
581 | }
|
---|
582 | result.append("]");
|
---|
583 | }
|
---|
584 | result.append(" "+i+Out.NL);
|
---|
585 |
|
---|
586 | for (char input = 0; input < numInput; input++) {
|
---|
587 | if ( table[i][input] != null && table[i][input].containsElements() )
|
---|
588 | result.append(" with "+((int) input)+" in "+table[i][input]+Out.NL);
|
---|
589 | }
|
---|
590 |
|
---|
591 | if ( epsilon[i] != null && epsilon[i].containsElements() )
|
---|
592 | result.append(" with epsilon in "+epsilon[i]+Out.NL);
|
---|
593 | }
|
---|
594 |
|
---|
595 | return result.toString();
|
---|
596 | }
|
---|
597 |
|
---|
598 | public void writeDot(File file) {
|
---|
599 | try {
|
---|
600 | PrintWriter writer = new PrintWriter(new FileWriter(file));
|
---|
601 | writer.println(dotFormat());
|
---|
602 | writer.close();
|
---|
603 | }
|
---|
604 | catch (IOException e) {
|
---|
605 | Out.error(ErrorMessages.FILE_WRITE, file);
|
---|
606 | throw new GeneratorException();
|
---|
607 | }
|
---|
608 | }
|
---|
609 |
|
---|
610 | public String dotFormat() {
|
---|
611 | StringBuffer result = new StringBuffer();
|
---|
612 |
|
---|
613 | result.append("digraph NFA {"+Out.NL);
|
---|
614 | result.append("rankdir = LR"+Out.NL);
|
---|
615 |
|
---|
616 | for (int i=0; i < numStates; i++) {
|
---|
617 | if ( isFinal[i] ) {
|
---|
618 | result.append(i);
|
---|
619 | result.append(" [shape = doublecircle]");
|
---|
620 | result.append(Out.NL);
|
---|
621 | }
|
---|
622 | }
|
---|
623 |
|
---|
624 | for (int i=0; i < numStates; i++) {
|
---|
625 | for (int input = 0; input < numInput; input++) {
|
---|
626 | if ( table[i][input] != null ) {
|
---|
627 | StateSetEnumerator states = table[i][input].states();
|
---|
628 |
|
---|
629 | while (states.hasMoreElements()) {
|
---|
630 | int s = states.nextElement();
|
---|
631 | result.append(i+" -> "+s);
|
---|
632 | result.append(" [label=\""+classes.toString(input)+"\"]"+Out.NL);
|
---|
633 | }
|
---|
634 | }
|
---|
635 | }
|
---|
636 | if ( epsilon[i] != null ) {
|
---|
637 | StateSetEnumerator states = epsilon[i].states();
|
---|
638 | while (states.hasMoreElements()) {
|
---|
639 | int s = states.nextElement();
|
---|
640 | result.append(i+" -> "+s+" [style=dotted]"+Out.NL);
|
---|
641 | }
|
---|
642 | }
|
---|
643 | }
|
---|
644 |
|
---|
645 | result.append("}"+Out.NL);
|
---|
646 |
|
---|
647 | return result.toString();
|
---|
648 | }
|
---|
649 |
|
---|
650 |
|
---|
651 | //-----------------------------------------------------------------------
|
---|
652 | // Functions for constructing NFAs out of regular expressions.
|
---|
653 |
|
---|
654 | private void insertLetterNFA(boolean caseless, char letter, int start, int end) {
|
---|
655 | if (caseless) {
|
---|
656 | int lower = classes.getClassCode(Character.toLowerCase(letter));
|
---|
657 | int upper = classes.getClassCode(Character.toUpperCase(letter));
|
---|
658 | addTransition(start, lower, end);
|
---|
659 | if (upper != lower) addTransition(start, upper, end);
|
---|
660 | }
|
---|
661 | else {
|
---|
662 | addTransition(start, classes.getClassCode(letter), end);
|
---|
663 | }
|
---|
664 | }
|
---|
665 |
|
---|
666 | private IntPair insertStringNFA(boolean caseless, String letters) {
|
---|
667 | int start = numStates;
|
---|
668 | int i;
|
---|
669 |
|
---|
670 | for (i = 0; i < letters.length(); i++) {
|
---|
671 | if (caseless) {
|
---|
672 | char c = letters.charAt(i);
|
---|
673 | int lower = classes.getClassCode(Character.toLowerCase(c));
|
---|
674 | int upper = classes.getClassCode(Character.toUpperCase(c));
|
---|
675 | addTransition(i+start, lower, i+start+1);
|
---|
676 | if (upper != lower) addTransition(i+start, upper, i+start+1);
|
---|
677 | }
|
---|
678 | else {
|
---|
679 | addTransition(i+start, classes.getClassCode(letters.charAt(i)), i+start+1);
|
---|
680 | }
|
---|
681 | }
|
---|
682 |
|
---|
683 | return new IntPair(start, i+start);
|
---|
684 | }
|
---|
685 |
|
---|
686 |
|
---|
687 | private void insertClassNFA(Vector intervalls, int start, int end) {
|
---|
688 | // empty char class is ok:
|
---|
689 | if (intervalls == null) return;
|
---|
690 |
|
---|
691 | int [] cl = classes.getClassCodes(intervalls);
|
---|
692 | for (int i = 0; i < cl.length; i++)
|
---|
693 | addTransition(start, cl[i], end);
|
---|
694 | }
|
---|
695 |
|
---|
696 | private void insertNotClassNFA(Vector intervalls, int start, int end) {
|
---|
697 | int [] cl = classes.getNotClassCodes(intervalls);
|
---|
698 |
|
---|
699 | for (int i = 0; i < cl.length; i++)
|
---|
700 | addTransition(start, cl[i], end);
|
---|
701 | }
|
---|
702 |
|
---|
703 |
|
---|
704 | /**
|
---|
705 | * Constructs an NFA accepting the complement of the language
|
---|
706 | * of a given NFA.
|
---|
707 | *
|
---|
708 | * Converts the NFA into a DFA, then negates that DFA.
|
---|
709 | * Exponential state blowup possible and common.
|
---|
710 | *
|
---|
711 | * @param the NFA to construct the complement for.
|
---|
712 | *
|
---|
713 | * @return a pair of integers denoting the index of start
|
---|
714 | * and end state of the complement NFA.
|
---|
715 | */
|
---|
716 | private IntPair complement(IntPair nfa) {
|
---|
717 |
|
---|
718 | if (Options.DEBUG) {
|
---|
719 | Out.debug("complement for "+nfa);
|
---|
720 | Out.debug("NFA is :"+Out.NL+this);
|
---|
721 | }
|
---|
722 |
|
---|
723 | int dfaStart = nfa.end+1;
|
---|
724 |
|
---|
725 | // FIXME: only need epsilon closure of states reachable from nfa.start
|
---|
726 | epsilonFill();
|
---|
727 |
|
---|
728 | Hashtable dfaStates = new Hashtable(numStates);
|
---|
729 | Vector dfaVector = new Vector(numStates);
|
---|
730 |
|
---|
731 | int numDFAStates = 0;
|
---|
732 | int currentDFAState = 0;
|
---|
733 |
|
---|
734 | StateSet currentState, newState;
|
---|
735 |
|
---|
736 | newState = epsilon[nfa.start];
|
---|
737 | dfaStates.put(newState, new Integer(numDFAStates));
|
---|
738 | dfaVector.addElement(newState);
|
---|
739 |
|
---|
740 | if (Options.DEBUG)
|
---|
741 | Out.debug("pos DFA start state is :"+Out.NL+dfaStates+Out.NL+Out.NL+"ordered :"+Out.NL+dfaVector);
|
---|
742 |
|
---|
743 | currentDFAState = 0;
|
---|
744 |
|
---|
745 | while ( currentDFAState <= numDFAStates ) {
|
---|
746 |
|
---|
747 | currentState = (StateSet) dfaVector.elementAt(currentDFAState);
|
---|
748 |
|
---|
749 | for (char input = 0; input < numInput; input++) {
|
---|
750 | newState = DFAEdge(currentState, input);
|
---|
751 |
|
---|
752 | if ( newState.containsElements() ) {
|
---|
753 |
|
---|
754 | // Out.debug("DFAEdge for input "+(int)input+" and state set "+currentState+" is "+newState);
|
---|
755 |
|
---|
756 | // Out.debug("Looking for state set "+newState);
|
---|
757 | Integer nextDFAState = (Integer) dfaStates.get(newState);
|
---|
758 |
|
---|
759 | if ( nextDFAState != null ) {
|
---|
760 | // Out.debug("FOUND!");
|
---|
761 | addTransition(dfaStart+currentDFAState, input, dfaStart+nextDFAState.intValue());
|
---|
762 | }
|
---|
763 | else {
|
---|
764 | if (Options.dump) Out.print("+");
|
---|
765 | // Out.debug("NOT FOUND!");
|
---|
766 | // Out.debug("Table was "+dfaStates);
|
---|
767 | numDFAStates++;
|
---|
768 |
|
---|
769 | dfaStates.put(newState, new Integer(numDFAStates));
|
---|
770 | dfaVector.addElement(newState);
|
---|
771 |
|
---|
772 | addTransition(dfaStart+currentDFAState, input, dfaStart+numDFAStates);
|
---|
773 | }
|
---|
774 | }
|
---|
775 | }
|
---|
776 |
|
---|
777 | currentDFAState++;
|
---|
778 | }
|
---|
779 |
|
---|
780 | // We have a dfa accepting the positive regexp.
|
---|
781 |
|
---|
782 | // Now the complement:
|
---|
783 | if (Options.DEBUG)
|
---|
784 | Out.debug("dfa finished, nfa is now :"+Out.NL+this);
|
---|
785 |
|
---|
786 | int start = dfaStart+numDFAStates+1;
|
---|
787 | int error = dfaStart+numDFAStates+2;
|
---|
788 | int end = dfaStart+numDFAStates+3;
|
---|
789 |
|
---|
790 | addEpsilonTransition(start, dfaStart);
|
---|
791 |
|
---|
792 | for (int i = 0; i < numInput; i++)
|
---|
793 | addTransition(error, i, error);
|
---|
794 |
|
---|
795 | addEpsilonTransition(error, end);
|
---|
796 |
|
---|
797 | for (int s = 0; s <= numDFAStates; s++) {
|
---|
798 | currentState = (StateSet) dfaVector.elementAt(s);
|
---|
799 |
|
---|
800 | currentDFAState = dfaStart+s;
|
---|
801 |
|
---|
802 | // if it was not a final state, it is now in the complement
|
---|
803 | if (!currentState.isElement(nfa.end))
|
---|
804 | addEpsilonTransition(currentDFAState, end);
|
---|
805 |
|
---|
806 | // all inputs not present (formerly leading to an implicit error)
|
---|
807 | // now lead to an explicit (final) state accepting everything.
|
---|
808 | for (int i = 0; i < numInput; i++)
|
---|
809 | if (table[currentDFAState][i] == null)
|
---|
810 | addTransition(currentDFAState, i, error);
|
---|
811 | }
|
---|
812 |
|
---|
813 | // eliminate transitions leading to dead states
|
---|
814 | if (live == null || live.length < numStates) {
|
---|
815 | live = new boolean [2*numStates];
|
---|
816 | visited = new boolean [2*numStates];
|
---|
817 | }
|
---|
818 |
|
---|
819 | removeDead(dfaStart, end);
|
---|
820 |
|
---|
821 | if (Options.DEBUG)
|
---|
822 | Out.debug("complement finished, nfa ("+start+","+end+") is now :"+this);
|
---|
823 |
|
---|
824 | return new IntPair(start, end);
|
---|
825 | }
|
---|
826 |
|
---|
827 | // "global" data for use in method removeDead only:
|
---|
828 | // live[s] == false <=> no final state can be reached from s
|
---|
829 | private boolean [] live; // = new boolean [estSize];
|
---|
830 | private boolean [] visited; // = new boolean [estSize];
|
---|
831 |
|
---|
832 | private void removeDead(int start, int end) {
|
---|
833 | // Out.debug("removeDead ("+start+")");
|
---|
834 |
|
---|
835 | if ( visited[start] || live[start] ) return;
|
---|
836 | visited[start] = true;
|
---|
837 |
|
---|
838 | // Out.debug("not yet visited");
|
---|
839 |
|
---|
840 | if (closure(start).isElement(end))
|
---|
841 | live[start] = true;
|
---|
842 |
|
---|
843 | // Out.debug("is final :"+live[start]);
|
---|
844 |
|
---|
845 | for (int i = 0; i < numInput; i++) {
|
---|
846 | StateSet nextState = closure(table[start][i]);
|
---|
847 | StateSetEnumerator states = nextState.states();
|
---|
848 | while (states.hasMoreElements()) {
|
---|
849 | int next = states.nextElement();
|
---|
850 |
|
---|
851 | if (next != start) {
|
---|
852 | removeDead(next,end);
|
---|
853 |
|
---|
854 | if (live[next])
|
---|
855 | live[start] = true;
|
---|
856 | else
|
---|
857 | table[start][i] = null;
|
---|
858 | }
|
---|
859 | }
|
---|
860 | }
|
---|
861 |
|
---|
862 | StateSet nextState = closure(epsilon[start]);
|
---|
863 | StateSetEnumerator states = nextState.states();
|
---|
864 | while (states.hasMoreElements()) {
|
---|
865 | int next = states.nextElement();
|
---|
866 |
|
---|
867 | if (next != start) {
|
---|
868 | removeDead(next,end);
|
---|
869 |
|
---|
870 | if (live[next])
|
---|
871 | live[start] = true;
|
---|
872 | }
|
---|
873 | }
|
---|
874 |
|
---|
875 | // Out.debug("state "+start+" is live :"+live[start]);
|
---|
876 | }
|
---|
877 |
|
---|
878 |
|
---|
879 | /**
|
---|
880 | * Constructs a two state NFA for char class regexps,
|
---|
881 | * such that the NFA has
|
---|
882 | *
|
---|
883 | * exactly one start state,
|
---|
884 | * exactly one end state,
|
---|
885 | * no transitions leading out of the end state
|
---|
886 | * no transitions leading into the start state
|
---|
887 | *
|
---|
888 | * Assumes that regExp.isCharClass(macros) == true
|
---|
889 | *
|
---|
890 | * @param regExp the regular expression to construct the
|
---|
891 | * NFA for
|
---|
892 | *
|
---|
893 | * @return a pair of integers denoting the index of start
|
---|
894 | * and end state of the NFA.
|
---|
895 | */
|
---|
896 | private void insertCCLNFA(RegExp regExp, int start, int end) {
|
---|
897 | switch (regExp.type) {
|
---|
898 |
|
---|
899 | case sym.BAR:
|
---|
900 | RegExp2 r = (RegExp2) regExp;
|
---|
901 | insertCCLNFA(r.r1, start, end);
|
---|
902 | insertCCLNFA(r.r2, start, end);
|
---|
903 | return;
|
---|
904 |
|
---|
905 | case sym.CCLASS:
|
---|
906 | insertClassNFA( (Vector) ((RegExp1) regExp).content, start, end);
|
---|
907 | return;
|
---|
908 |
|
---|
909 | case sym.CCLASSNOT:
|
---|
910 | insertNotClassNFA( (Vector) ((RegExp1) regExp).content, start, end);
|
---|
911 | return;
|
---|
912 |
|
---|
913 | case sym.CHAR:
|
---|
914 | insertLetterNFA(
|
---|
915 | false, ((Character) ((RegExp1) regExp).content).charValue(),
|
---|
916 | start, end);
|
---|
917 | return;
|
---|
918 |
|
---|
919 | case sym.CHAR_I:
|
---|
920 | insertLetterNFA(
|
---|
921 | true, ((Character) ((RegExp1) regExp).content).charValue(),
|
---|
922 | start, end);
|
---|
923 | return;
|
---|
924 |
|
---|
925 | case sym.MACROUSE:
|
---|
926 | insertCCLNFA(macros.getDefinition((String) ((RegExp1) regExp).content),
|
---|
927 | start, end);
|
---|
928 | return;
|
---|
929 | }
|
---|
930 |
|
---|
931 | throw new Error("Unknown expression type "+regExp.type+" in NFA construction");
|
---|
932 | }
|
---|
933 |
|
---|
934 |
|
---|
935 | /**
|
---|
936 | * Constructs an NFA for regExp such that the NFA has
|
---|
937 | *
|
---|
938 | * exactly one start state,
|
---|
939 | * exactly one end state,
|
---|
940 | * no transitions leading out of the end state
|
---|
941 | * no transitions leading into the start state
|
---|
942 | *
|
---|
943 | * @param regExp the regular expression to construct the
|
---|
944 | * NFA for
|
---|
945 | *
|
---|
946 | * @return a pair of integers denoting the index of start
|
---|
947 | * and end state of the NFA.
|
---|
948 | */
|
---|
949 | public IntPair insertNFA(RegExp regExp) {
|
---|
950 |
|
---|
951 | IntPair nfa1, nfa2;
|
---|
952 | int start, end;
|
---|
953 | RegExp2 r;
|
---|
954 |
|
---|
955 | if (Options.DEBUG)
|
---|
956 | Out.debug("Inserting RegExp : "+regExp);
|
---|
957 |
|
---|
958 | if (regExp.isCharClass(macros)) {
|
---|
959 | start = numStates;
|
---|
960 | end = numStates+1;
|
---|
961 |
|
---|
962 | ensureCapacity(end+1);
|
---|
963 | if (end+1 > numStates) numStates = end+1;
|
---|
964 |
|
---|
965 | insertCCLNFA(regExp, start, end);
|
---|
966 |
|
---|
967 | return new IntPair(start, end);
|
---|
968 | }
|
---|
969 |
|
---|
970 | switch (regExp.type) {
|
---|
971 |
|
---|
972 | case sym.BAR:
|
---|
973 |
|
---|
974 | r = (RegExp2) regExp;
|
---|
975 |
|
---|
976 | nfa1 = insertNFA(r.r1);
|
---|
977 | nfa2 = insertNFA(r.r2);
|
---|
978 |
|
---|
979 | start = nfa2.end+1;
|
---|
980 | end = nfa2.end+2;
|
---|
981 |
|
---|
982 | addEpsilonTransition(start, nfa1.start);
|
---|
983 | addEpsilonTransition(start, nfa2.start);
|
---|
984 | addEpsilonTransition(nfa1.end, end);
|
---|
985 | addEpsilonTransition(nfa2.end, end);
|
---|
986 |
|
---|
987 | return new IntPair(start, end);
|
---|
988 |
|
---|
989 | case sym.CONCAT:
|
---|
990 |
|
---|
991 | r = (RegExp2) regExp;
|
---|
992 |
|
---|
993 | nfa1 = insertNFA(r.r1);
|
---|
994 | nfa2 = insertNFA(r.r2);
|
---|
995 |
|
---|
996 | addEpsilonTransition(nfa1.end, nfa2.start);
|
---|
997 |
|
---|
998 | return new IntPair(nfa1.start, nfa2.end);
|
---|
999 |
|
---|
1000 | case sym.STAR:
|
---|
1001 | nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content );
|
---|
1002 |
|
---|
1003 | start = nfa1.end+1;
|
---|
1004 | end = nfa1.end+2;
|
---|
1005 |
|
---|
1006 | addEpsilonTransition(nfa1.end, end);
|
---|
1007 | addEpsilonTransition(start, nfa1.start);
|
---|
1008 |
|
---|
1009 | addEpsilonTransition(start, end);
|
---|
1010 | addEpsilonTransition(nfa1.end, nfa1.start);
|
---|
1011 |
|
---|
1012 | return new IntPair(start, end);
|
---|
1013 |
|
---|
1014 | case sym.PLUS:
|
---|
1015 | nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content );
|
---|
1016 |
|
---|
1017 | start = nfa1.end+1;
|
---|
1018 | end = nfa1.end+2;
|
---|
1019 |
|
---|
1020 | addEpsilonTransition(nfa1.end, end);
|
---|
1021 | addEpsilonTransition(start, nfa1.start);
|
---|
1022 |
|
---|
1023 | addEpsilonTransition(nfa1.end, nfa1.start);
|
---|
1024 |
|
---|
1025 | return new IntPair(start, end);
|
---|
1026 |
|
---|
1027 | case sym.QUESTION:
|
---|
1028 | nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content );
|
---|
1029 |
|
---|
1030 | addEpsilonTransition(nfa1.start, nfa1.end);
|
---|
1031 |
|
---|
1032 | return new IntPair(nfa1.start, nfa1.end);
|
---|
1033 |
|
---|
1034 | case sym.BANG:
|
---|
1035 | return complement(insertNFA((RegExp) ((RegExp1) regExp).content));
|
---|
1036 |
|
---|
1037 | case sym.TILDE:
|
---|
1038 | return insertNFA(regExp.resolveTilde(macros));
|
---|
1039 |
|
---|
1040 | case sym.STRING:
|
---|
1041 | return insertStringNFA(false, (String) ((RegExp1) regExp).content );
|
---|
1042 |
|
---|
1043 | case sym.STRING_I:
|
---|
1044 | return insertStringNFA(true, (String) ((RegExp1) regExp).content );
|
---|
1045 |
|
---|
1046 | case sym.MACROUSE:
|
---|
1047 | return insertNFA(macros.getDefinition((String) ((RegExp1) regExp).content));
|
---|
1048 | }
|
---|
1049 |
|
---|
1050 | throw new Error("Unknown expression type "+regExp.type+" in NFA construction");
|
---|
1051 | }
|
---|
1052 | }
|
---|