source: other-projects/rsyntax-textarea/devel-packages/jflex-1.4.3/src/JFlex/NFA.java@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 30.4 KB
Line 
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * JFlex 1.4.3 *
3 * Copyright (C) 1998-2009 Gerwin Klein <[email protected]> *
4 * All rights reserved. *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License. See the file *
8 * COPYRIGHT for more information. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License along *
16 * with this program; if not, write to the Free Software Foundation, Inc., *
17 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
18 * *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20package JFlex;
21
22import java.util.*;
23import java.io.*;
24
25
26/**
27 * NFA representation in JFlex.
28 *
29 * Contains algorithms RegExp -> NFA and NFA -> DFA.
30 *
31 * @author Gerwin Klein
32 * @version JFlex 1.4.3, $Revision: 433 $, $Date: 2009-01-31 19:52:34 +1100 (Sat, 31 Jan 2009) $
33 */
34final public class NFA {
35
36 /** table[current_state][next_char] is the set of states that can be reached
37 /* from current_state with an input next_char */
38 StateSet [][] table;
39
40 /** epsilon[current_state] is the set of states that can be reached
41 /* from current_state via epsilon edges */
42 StateSet [] epsilon;
43
44 /** isFinal[state] == true <=> state is a final state of the NFA */
45 boolean [] isFinal;
46
47 /** action[current_state]: the action associated with the state
48 /* current_state (null, if there is no action for the state) */
49 Action [] action;
50
51 /** the number of states in this NFA */
52 int numStates;
53
54 /** the current maximum number of input characters */
55 int numInput;
56
57 /** the number of lexical States. Lexical states have the indices
58 /* 0..numLexStates-1 in the transition table */
59 int numLexStates;
60
61 /** estimated size of the NFA (before actual construction) */
62 int estSize = 256;
63
64 Macros macros;
65 CharClasses classes;
66
67 LexScan scanner;
68 RegExps regExps;
69
70 // will be reused by several methods (avoids excessive object creation)
71 private static StateSetEnumerator states = new StateSetEnumerator();
72 private static StateSet tempStateSet = new StateSet();
73
74 public NFA(int numInput, int estSize) {
75 this.numInput = numInput;
76 this.estSize = estSize;
77 numStates = 0;
78 epsilon = new StateSet [estSize];
79 action = new Action [estSize];
80 isFinal = new boolean [estSize];
81 table = new StateSet [estSize][numInput];
82 }
83
84 /**
85 * Construct new NFA.
86 *
87 * Assumes that lookahead cases and numbers are already resolved in RegExps.
88 * @see RegExps#checkLookAheads()
89 */
90 public NFA(int numInput, LexScan scanner, RegExps regExps,
91 Macros macros, CharClasses classes) {
92 this(numInput, regExps.NFASize(macros)+2*scanner.states.number());
93
94 this.scanner = scanner;
95 this.regExps = regExps;
96 this.macros = macros;
97 this.classes = classes;
98
99 numLexStates = scanner.states.number();
100
101 // ensureCapacity assumes correctly set up numStates.
102 int new_num = numEntryStates();
103 ensureCapacity(new_num);
104 numStates = new_num;
105 }
106
107 public int numEntryStates() {
108 return 2*(numLexStates+regExps.gen_look_count);
109 }
110
111 /**
112 * Add a standalone rule that has minimum priority, fires a transition
113 * on all single input characters and has a "print yytext" action.
114 */
115 public void addStandaloneRule() {
116 int start = numStates;
117 int end = numStates+1;
118
119 for (int c = 0; c < classes.getNumClasses(); c++)
120 addTransition(start, c, end);
121
122 for (int i = 0; i < numLexStates*2; i++)
123 addEpsilonTransition(i, start);
124
125 action[end] = new Action("System.out.print(yytext());", Integer.MAX_VALUE);
126 isFinal[end] = true;
127 }
128
129 /**
130 * Add a regexp to this NFA.
131 *
132 * @param regExpNum the number of the regexp to add.
133 */
134 public void addRegExp(int regExpNum) {
135
136 if (Options.DEBUG)
137 Out.debug("Adding nfa for regexp "+regExpNum+" :"+Out.NL+regExps.getRegExp(regExpNum));
138
139 IntPair nfa = insertNFA( regExps.getRegExp(regExpNum) );
140
141 Enumeration lexStates = regExps.getStates(regExpNum).elements();
142
143 if ( !lexStates.hasMoreElements() )
144 lexStates = scanner.states.getInclusiveStates();
145
146 while ( lexStates.hasMoreElements() ) {
147 int stateNum = ((Integer)lexStates.nextElement()).intValue();
148
149 if ( !regExps.isBOL(regExpNum) )
150 addEpsilonTransition(2*stateNum, nfa.start);
151
152 addEpsilonTransition(2*stateNum+1, nfa.start);
153 }
154
155
156 if ( regExps.getLookAhead(regExpNum) != null ) {
157 Action a = regExps.getAction(regExpNum);
158
159 if (a.lookAhead() == Action.FINITE_CHOICE) {
160 insertLookAheadChoices(nfa.end, a, regExps.getLookAhead(regExpNum));
161 // remove the original action from the collection: it will never
162 // be matched directly, only its copies will.
163 scanner.actions.remove(a);
164 }
165 else {
166 RegExp r1 = regExps.getRegExp(regExpNum);
167 RegExp r2 = regExps.getLookAhead(regExpNum);
168
169 IntPair look = insertNFA(r2);
170
171 addEpsilonTransition(nfa.end, look.start);
172
173 action[look.end] = a;
174 isFinal[look.end] = true;
175
176 if (a.lookAhead() == Action.GENERAL_LOOK) {
177 // base forward pass
178 IntPair forward = insertNFA(r1);
179 // lookahead backward pass
180 IntPair backward = insertNFA(r2.rev(macros));
181
182 isFinal[forward.end] = true;
183 action[forward.end] = new Action(Action.FORWARD_ACTION);
184
185 isFinal[backward.end] = true;
186 action[backward.end] = new Action(Action.BACKWARD_ACTION);
187
188 int entry = 2*(regExps.getLookEntry(regExpNum) + numLexStates);
189 addEpsilonTransition(entry, forward.start);
190 addEpsilonTransition(entry+1, backward.start);
191
192 a.setEntryState(entry);
193 }
194 }
195 }
196 else {
197 action[nfa.end] = regExps.getAction(regExpNum);
198 isFinal[nfa.end] = true;
199 }
200 }
201
202 /**
203 * Insert NFAs for the (finitely many) fixed length lookahead choices.
204 *
205 * @param lookAhead a lookahead of which isFiniteChoice is true
206 * @param baseEnd the end state of the base expression NFA
207 * @param a the action of the expression
208 *
209 * @see SemCheck#isFiniteChoice(RegExp)
210 */
211 private void insertLookAheadChoices(int baseEnd, Action a, RegExp lookAhead) {
212 if (lookAhead.type == sym.BAR) {
213 RegExp2 r = (RegExp2) lookAhead;
214 insertLookAheadChoices(baseEnd, a, r.r1);
215 insertLookAheadChoices(baseEnd, a, r.r2);
216 }
217 else if (lookAhead.type == sym.MACROUSE) {
218 RegExp1 r = (RegExp1) lookAhead;
219 insertLookAheadChoices(baseEnd, a, macros.getDefinition((String) r.content));
220 }
221 else {
222 int len = SemCheck.length(lookAhead);
223
224 if (len >= 0) {
225 // termination case
226 IntPair look = insertNFA(lookAhead);
227
228 addEpsilonTransition(baseEnd, look.start);
229
230 Action x = a.copyChoice(len);
231 action[look.end] = x;
232 isFinal[look.end] = true;
233
234 // add new copy to the collection of known actions such that
235 // it can be checked for the NEVER_MATCH warning.
236 scanner.actions.add(x);
237 }
238 else {
239 // should never happen
240 throw new Error("When inserting lookahead expression: unkown expression type "+lookAhead.type+" in "+lookAhead); //$NON-NLS-1$ //$NON-NLS-2$
241 }
242 }
243 }
244
245 /**
246 * Make sure the NFA can contain at least newNumStates states.
247 *
248 * @param newNumStates the minimu number of states.
249 */
250 private void ensureCapacity(int newNumStates) {
251 int oldLength = epsilon.length;
252
253 if ( newNumStates < oldLength ) return;
254
255 int newStatesLength = Math.max(oldLength*2, newNumStates);
256
257 boolean [] newFinal = new boolean [newStatesLength];
258 boolean [] newIsPush = new boolean [newStatesLength];
259 Action [] newAction = new Action [newStatesLength];
260 StateSet [] [] newTable = new StateSet [newStatesLength] [numInput];
261 StateSet [] newEpsilon = new StateSet [newStatesLength];
262
263 System.arraycopy(isFinal,0,newFinal,0,numStates);
264 System.arraycopy(action,0,newAction,0,numStates);
265 System.arraycopy(epsilon,0,newEpsilon,0,numStates);
266 System.arraycopy(table,0,newTable,0,numStates);
267
268 isFinal = newFinal;
269 action = newAction;
270 epsilon = newEpsilon;
271 table = newTable;
272 }
273
274 public void addTransition(int start, int input, int dest) {
275 Out.debug("Adding transition ("+start+", "+input+", "+dest+")");
276
277 int maxS = Math.max(start,dest)+1;
278
279 ensureCapacity( maxS );
280
281 if (maxS > numStates) numStates = maxS;
282
283 if ( table[start][input] != null )
284 table[start][input].addState(dest);
285 else
286 table[start][input] = new StateSet(estSize,dest);
287 }
288
289 public void addEpsilonTransition(int start, int dest) {
290 int max = Math.max(start,dest)+1;
291 ensureCapacity( max );
292 if (max > numStates) numStates = max;
293
294 if (epsilon[start] != null)
295 epsilon[start].addState(dest);
296 else
297 epsilon[start] = new StateSet(estSize,dest);
298 }
299
300
301 /**
302 * Returns <code>true</code>, iff the specified set of states
303 * contains a final state.
304 *
305 * @param set the set of states that is tested for final states.
306 */
307 private boolean containsFinal(StateSet set) {
308 states.reset(set);
309
310 while ( states.hasMoreElements() )
311 if ( isFinal[states.nextElement()] ) return true;
312
313 return false;
314 }
315
316
317 /**
318 * Returns <code>true</code>, iff the specified set of states
319 * contains a pushback-state.
320 *
321 * @param set the set of states that is tested for pushback-states.
322 private boolean containsPushback(StateSet set) {
323 states.reset(set);
324
325 while ( states.hasMoreElements() )
326 if ( isPushback[states.nextElement()] ) return true;
327
328 return false;
329 }
330 */
331
332 /**
333 * Returns the action with highest priority in the specified
334 * set of states.
335 *
336 * @param set the set of states for which to determine the action
337 */
338 private Action getAction(StateSet set) {
339
340 states.reset(set);
341
342 Action maxAction = null;
343
344 Out.debug("Determining action of : "+set);
345
346 while ( states.hasMoreElements() ) {
347
348 Action currentAction = action[ states.nextElement() ];
349
350 if ( currentAction != null ) {
351 if (maxAction == null)
352 maxAction = currentAction;
353 else
354 maxAction = maxAction.getHigherPriority(currentAction);
355 }
356
357 }
358
359 return maxAction;
360 }
361
362
363 /**
364 * Calculates the epsilon closure for a specified set of states.
365 *
366 * The epsilon closure for set a is the set of states that can be reached
367 * by epsilon edges from a.
368 *
369 * @param set the set of states to calculate the epsilon closure for
370 *
371 * @return the epsilon closure of the specified set of states
372 * in this NFA
373 */
374 private StateSet closure(int startState) {
375
376 // Out.debug("Calculating closure of "+set);
377
378 StateSet notvisited = tempStateSet;
379 StateSet closure = new StateSet(numStates,startState);
380
381 notvisited.clear();
382 notvisited.addState(startState);
383
384 while ( notvisited.containsElements() ) {
385 // Out.debug("closure is now "+closure);
386 // Out.debug("notvisited is "+notvisited);
387 int state = notvisited.getAndRemoveElement();
388 // Out.debug("removed element "+state+" of "+notvisited);
389 // Out.debug("epsilon[states] = "+epsilon[state]);
390 notvisited.add(closure.complement(epsilon[state]));
391 closure.add(epsilon[state]);
392 }
393
394 // Out.debug("Closure is : "+closure);
395
396 return closure;
397 }
398
399 /**
400 * Returns the epsilon closure of a set of states
401 */
402 private StateSet closure(StateSet startStates) {
403 StateSet result = new StateSet(numStates);
404
405 if (startStates != null) {
406 states.reset(startStates);
407 while (states.hasMoreElements())
408 result.add( closure(states.nextElement()) );
409 }
410
411 return result;
412 }
413
414
415 private void epsilonFill() {
416 for (int i = 0; i < numStates; i++) {
417 epsilon[i] = closure(i);
418 }
419 }
420
421 /**
422 * Calculates the set of states that can be reached from another
423 * set of states <code>start</code> with an specified input
424 * character <code>input</code>
425 *
426 * @param start the set of states to start from
427 * @param input the input character for which to search the next states
428 *
429 * @return the set of states that are reached from <code>start</code>
430 * via <code>input</code>
431 */
432 private StateSet DFAEdge(StateSet start, char input) {
433 // Out.debug("Calculating DFAEdge for state set "+start+" and input '"+input+"'");
434
435 tempStateSet.clear();
436
437 states.reset(start);
438 while ( states.hasMoreElements() )
439 tempStateSet.add( table[states.nextElement()][input] );
440
441 StateSet result = new StateSet(tempStateSet);
442
443 states.reset(tempStateSet);
444 while ( states.hasMoreElements() )
445 result.add( epsilon[states.nextElement()] );
446
447 // Out.debug("DFAEdge is : "+result);
448
449 return result;
450 }
451
452
453 /**
454 * Returns an DFA that accepts the same language as this NFA.
455 * This DFA is usually not minimal.
456 */
457 public DFA getDFA() {
458
459 Hashtable dfaStates = new Hashtable(numStates);
460 Vector dfaVector = new Vector(numStates);
461
462 DFA dfa = new DFA(numEntryStates(), numInput, numLexStates);
463
464 int numDFAStates = 0;
465 int currentDFAState = 0;
466
467 Out.println("Converting NFA to DFA : ");
468
469 epsilonFill();
470
471 StateSet currentState, newState;
472
473 // create the initial states of the DFA
474 for ( int i = 0; i < numEntryStates(); i++ ) {
475 newState = epsilon[i];
476
477 dfaStates.put(newState, new Integer(numDFAStates));
478 dfaVector.addElement(newState);
479
480 dfa.setEntryState( i, numDFAStates );
481
482 dfa.setFinal( numDFAStates, containsFinal(newState) );
483 dfa.setAction( numDFAStates, getAction(newState) );
484
485 numDFAStates++;
486 }
487
488 numDFAStates--;
489
490 if (Options.DEBUG)
491 Out.debug("DFA start states are :"+Out.NL+dfaStates+Out.NL+Out.NL+"ordered :"+Out.NL+dfaVector);
492
493 currentDFAState = 0;
494
495 StateSet tempStateSet = NFA.tempStateSet;
496 StateSetEnumerator states = NFA.states;
497
498 // will be reused
499 newState = new StateSet(numStates);
500
501 while ( currentDFAState <= numDFAStates ) {
502
503 currentState = (StateSet) dfaVector.elementAt(currentDFAState);
504
505 for (char input = 0; input < numInput; input++) {
506
507 // newState = DFAEdge(currentState, input);
508
509 // inlining DFAEdge for performance:
510
511 // Out.debug("Calculating DFAEdge for state set "+currentState+" and input '"+input+"'");
512
513 tempStateSet.clear();
514 states.reset(currentState);
515 while ( states.hasMoreElements() )
516 tempStateSet.add( table[states.nextElement()][input] );
517
518 newState.copy(tempStateSet);
519
520 states.reset(tempStateSet);
521 while ( states.hasMoreElements() )
522 newState.add( epsilon[states.nextElement()] );
523
524 // Out.debug("DFAEdge is : "+newState);
525
526
527 if ( newState.containsElements() ) {
528
529 // Out.debug("DFAEdge for input "+(int)input+" and state set "+currentState+" is "+newState);
530
531 // Out.debug("Looking for state set "+newState);
532 Integer nextDFAState = (Integer) dfaStates.get(newState);
533
534 if ( nextDFAState != null ) {
535 // Out.debug("FOUND!");
536 dfa.addTransition(currentDFAState, input, nextDFAState.intValue());
537 }
538 else {
539 if (Options.progress) Out.print(".");
540 // Out.debug("NOT FOUND!");
541 // Out.debug("Table was "+dfaStates);
542 numDFAStates++;
543
544 // make a new copy of newState to store in dfaStates
545 StateSet storeState = new StateSet(newState);
546
547 dfaStates.put(storeState, new Integer(numDFAStates));
548 dfaVector.addElement(storeState);
549
550 dfa.addTransition(currentDFAState, input, numDFAStates);
551 dfa.setFinal( numDFAStates, containsFinal(storeState) );
552 dfa.setAction( numDFAStates, getAction(storeState) );
553 }
554 }
555 }
556
557 currentDFAState++;
558 }
559
560 if (Options.verbose) Out.println("");
561
562 return dfa;
563 }
564
565
566 public void dumpTable() {
567 Out.dump(toString());
568 }
569
570 public String toString() {
571 StringBuffer result = new StringBuffer();
572
573 for (int i=0; i < numStates; i++) {
574 result.append("State");
575 if ( isFinal[i] ) {
576 result.append("[FINAL");
577 String l = action[i].lookString();
578 if (!l.equals("")) {
579 result.append(", ");
580 result.append(l);
581 }
582 result.append("]");
583 }
584 result.append(" "+i+Out.NL);
585
586 for (char input = 0; input < numInput; input++) {
587 if ( table[i][input] != null && table[i][input].containsElements() )
588 result.append(" with "+((int) input)+" in "+table[i][input]+Out.NL);
589 }
590
591 if ( epsilon[i] != null && epsilon[i].containsElements() )
592 result.append(" with epsilon in "+epsilon[i]+Out.NL);
593 }
594
595 return result.toString();
596 }
597
598 public void writeDot(File file) {
599 try {
600 PrintWriter writer = new PrintWriter(new FileWriter(file));
601 writer.println(dotFormat());
602 writer.close();
603 }
604 catch (IOException e) {
605 Out.error(ErrorMessages.FILE_WRITE, file);
606 throw new GeneratorException();
607 }
608 }
609
610 public String dotFormat() {
611 StringBuffer result = new StringBuffer();
612
613 result.append("digraph NFA {"+Out.NL);
614 result.append("rankdir = LR"+Out.NL);
615
616 for (int i=0; i < numStates; i++) {
617 if ( isFinal[i] ) {
618 result.append(i);
619 result.append(" [shape = doublecircle]");
620 result.append(Out.NL);
621 }
622 }
623
624 for (int i=0; i < numStates; i++) {
625 for (int input = 0; input < numInput; input++) {
626 if ( table[i][input] != null ) {
627 StateSetEnumerator states = table[i][input].states();
628
629 while (states.hasMoreElements()) {
630 int s = states.nextElement();
631 result.append(i+" -> "+s);
632 result.append(" [label=\""+classes.toString(input)+"\"]"+Out.NL);
633 }
634 }
635 }
636 if ( epsilon[i] != null ) {
637 StateSetEnumerator states = epsilon[i].states();
638 while (states.hasMoreElements()) {
639 int s = states.nextElement();
640 result.append(i+" -> "+s+" [style=dotted]"+Out.NL);
641 }
642 }
643 }
644
645 result.append("}"+Out.NL);
646
647 return result.toString();
648 }
649
650
651 //-----------------------------------------------------------------------
652 // Functions for constructing NFAs out of regular expressions.
653
654 private void insertLetterNFA(boolean caseless, char letter, int start, int end) {
655 if (caseless) {
656 int lower = classes.getClassCode(Character.toLowerCase(letter));
657 int upper = classes.getClassCode(Character.toUpperCase(letter));
658 addTransition(start, lower, end);
659 if (upper != lower) addTransition(start, upper, end);
660 }
661 else {
662 addTransition(start, classes.getClassCode(letter), end);
663 }
664 }
665
666 private IntPair insertStringNFA(boolean caseless, String letters) {
667 int start = numStates;
668 int i;
669
670 for (i = 0; i < letters.length(); i++) {
671 if (caseless) {
672 char c = letters.charAt(i);
673 int lower = classes.getClassCode(Character.toLowerCase(c));
674 int upper = classes.getClassCode(Character.toUpperCase(c));
675 addTransition(i+start, lower, i+start+1);
676 if (upper != lower) addTransition(i+start, upper, i+start+1);
677 }
678 else {
679 addTransition(i+start, classes.getClassCode(letters.charAt(i)), i+start+1);
680 }
681 }
682
683 return new IntPair(start, i+start);
684 }
685
686
687 private void insertClassNFA(Vector intervalls, int start, int end) {
688 // empty char class is ok:
689 if (intervalls == null) return;
690
691 int [] cl = classes.getClassCodes(intervalls);
692 for (int i = 0; i < cl.length; i++)
693 addTransition(start, cl[i], end);
694 }
695
696 private void insertNotClassNFA(Vector intervalls, int start, int end) {
697 int [] cl = classes.getNotClassCodes(intervalls);
698
699 for (int i = 0; i < cl.length; i++)
700 addTransition(start, cl[i], end);
701 }
702
703
704 /**
705 * Constructs an NFA accepting the complement of the language
706 * of a given NFA.
707 *
708 * Converts the NFA into a DFA, then negates that DFA.
709 * Exponential state blowup possible and common.
710 *
711 * @param the NFA to construct the complement for.
712 *
713 * @return a pair of integers denoting the index of start
714 * and end state of the complement NFA.
715 */
716 private IntPair complement(IntPair nfa) {
717
718 if (Options.DEBUG) {
719 Out.debug("complement for "+nfa);
720 Out.debug("NFA is :"+Out.NL+this);
721 }
722
723 int dfaStart = nfa.end+1;
724
725 // FIXME: only need epsilon closure of states reachable from nfa.start
726 epsilonFill();
727
728 Hashtable dfaStates = new Hashtable(numStates);
729 Vector dfaVector = new Vector(numStates);
730
731 int numDFAStates = 0;
732 int currentDFAState = 0;
733
734 StateSet currentState, newState;
735
736 newState = epsilon[nfa.start];
737 dfaStates.put(newState, new Integer(numDFAStates));
738 dfaVector.addElement(newState);
739
740 if (Options.DEBUG)
741 Out.debug("pos DFA start state is :"+Out.NL+dfaStates+Out.NL+Out.NL+"ordered :"+Out.NL+dfaVector);
742
743 currentDFAState = 0;
744
745 while ( currentDFAState <= numDFAStates ) {
746
747 currentState = (StateSet) dfaVector.elementAt(currentDFAState);
748
749 for (char input = 0; input < numInput; input++) {
750 newState = DFAEdge(currentState, input);
751
752 if ( newState.containsElements() ) {
753
754 // Out.debug("DFAEdge for input "+(int)input+" and state set "+currentState+" is "+newState);
755
756 // Out.debug("Looking for state set "+newState);
757 Integer nextDFAState = (Integer) dfaStates.get(newState);
758
759 if ( nextDFAState != null ) {
760 // Out.debug("FOUND!");
761 addTransition(dfaStart+currentDFAState, input, dfaStart+nextDFAState.intValue());
762 }
763 else {
764 if (Options.dump) Out.print("+");
765 // Out.debug("NOT FOUND!");
766 // Out.debug("Table was "+dfaStates);
767 numDFAStates++;
768
769 dfaStates.put(newState, new Integer(numDFAStates));
770 dfaVector.addElement(newState);
771
772 addTransition(dfaStart+currentDFAState, input, dfaStart+numDFAStates);
773 }
774 }
775 }
776
777 currentDFAState++;
778 }
779
780 // We have a dfa accepting the positive regexp.
781
782 // Now the complement:
783 if (Options.DEBUG)
784 Out.debug("dfa finished, nfa is now :"+Out.NL+this);
785
786 int start = dfaStart+numDFAStates+1;
787 int error = dfaStart+numDFAStates+2;
788 int end = dfaStart+numDFAStates+3;
789
790 addEpsilonTransition(start, dfaStart);
791
792 for (int i = 0; i < numInput; i++)
793 addTransition(error, i, error);
794
795 addEpsilonTransition(error, end);
796
797 for (int s = 0; s <= numDFAStates; s++) {
798 currentState = (StateSet) dfaVector.elementAt(s);
799
800 currentDFAState = dfaStart+s;
801
802 // if it was not a final state, it is now in the complement
803 if (!currentState.isElement(nfa.end))
804 addEpsilonTransition(currentDFAState, end);
805
806 // all inputs not present (formerly leading to an implicit error)
807 // now lead to an explicit (final) state accepting everything.
808 for (int i = 0; i < numInput; i++)
809 if (table[currentDFAState][i] == null)
810 addTransition(currentDFAState, i, error);
811 }
812
813 // eliminate transitions leading to dead states
814 if (live == null || live.length < numStates) {
815 live = new boolean [2*numStates];
816 visited = new boolean [2*numStates];
817 }
818
819 removeDead(dfaStart, end);
820
821 if (Options.DEBUG)
822 Out.debug("complement finished, nfa ("+start+","+end+") is now :"+this);
823
824 return new IntPair(start, end);
825 }
826
827 // "global" data for use in method removeDead only:
828 // live[s] == false <=> no final state can be reached from s
829 private boolean [] live; // = new boolean [estSize];
830 private boolean [] visited; // = new boolean [estSize];
831
832 private void removeDead(int start, int end) {
833 // Out.debug("removeDead ("+start+")");
834
835 if ( visited[start] || live[start] ) return;
836 visited[start] = true;
837
838 // Out.debug("not yet visited");
839
840 if (closure(start).isElement(end))
841 live[start] = true;
842
843 // Out.debug("is final :"+live[start]);
844
845 for (int i = 0; i < numInput; i++) {
846 StateSet nextState = closure(table[start][i]);
847 StateSetEnumerator states = nextState.states();
848 while (states.hasMoreElements()) {
849 int next = states.nextElement();
850
851 if (next != start) {
852 removeDead(next,end);
853
854 if (live[next])
855 live[start] = true;
856 else
857 table[start][i] = null;
858 }
859 }
860 }
861
862 StateSet nextState = closure(epsilon[start]);
863 StateSetEnumerator states = nextState.states();
864 while (states.hasMoreElements()) {
865 int next = states.nextElement();
866
867 if (next != start) {
868 removeDead(next,end);
869
870 if (live[next])
871 live[start] = true;
872 }
873 }
874
875 // Out.debug("state "+start+" is live :"+live[start]);
876 }
877
878
879 /**
880 * Constructs a two state NFA for char class regexps,
881 * such that the NFA has
882 *
883 * exactly one start state,
884 * exactly one end state,
885 * no transitions leading out of the end state
886 * no transitions leading into the start state
887 *
888 * Assumes that regExp.isCharClass(macros) == true
889 *
890 * @param regExp the regular expression to construct the
891 * NFA for
892 *
893 * @return a pair of integers denoting the index of start
894 * and end state of the NFA.
895 */
896 private void insertCCLNFA(RegExp regExp, int start, int end) {
897 switch (regExp.type) {
898
899 case sym.BAR:
900 RegExp2 r = (RegExp2) regExp;
901 insertCCLNFA(r.r1, start, end);
902 insertCCLNFA(r.r2, start, end);
903 return;
904
905 case sym.CCLASS:
906 insertClassNFA( (Vector) ((RegExp1) regExp).content, start, end);
907 return;
908
909 case sym.CCLASSNOT:
910 insertNotClassNFA( (Vector) ((RegExp1) regExp).content, start, end);
911 return;
912
913 case sym.CHAR:
914 insertLetterNFA(
915 false, ((Character) ((RegExp1) regExp).content).charValue(),
916 start, end);
917 return;
918
919 case sym.CHAR_I:
920 insertLetterNFA(
921 true, ((Character) ((RegExp1) regExp).content).charValue(),
922 start, end);
923 return;
924
925 case sym.MACROUSE:
926 insertCCLNFA(macros.getDefinition((String) ((RegExp1) regExp).content),
927 start, end);
928 return;
929 }
930
931 throw new Error("Unknown expression type "+regExp.type+" in NFA construction");
932 }
933
934
935 /**
936 * Constructs an NFA for regExp such that the NFA has
937 *
938 * exactly one start state,
939 * exactly one end state,
940 * no transitions leading out of the end state
941 * no transitions leading into the start state
942 *
943 * @param regExp the regular expression to construct the
944 * NFA for
945 *
946 * @return a pair of integers denoting the index of start
947 * and end state of the NFA.
948 */
949 public IntPair insertNFA(RegExp regExp) {
950
951 IntPair nfa1, nfa2;
952 int start, end;
953 RegExp2 r;
954
955 if (Options.DEBUG)
956 Out.debug("Inserting RegExp : "+regExp);
957
958 if (regExp.isCharClass(macros)) {
959 start = numStates;
960 end = numStates+1;
961
962 ensureCapacity(end+1);
963 if (end+1 > numStates) numStates = end+1;
964
965 insertCCLNFA(regExp, start, end);
966
967 return new IntPair(start, end);
968 }
969
970 switch (regExp.type) {
971
972 case sym.BAR:
973
974 r = (RegExp2) regExp;
975
976 nfa1 = insertNFA(r.r1);
977 nfa2 = insertNFA(r.r2);
978
979 start = nfa2.end+1;
980 end = nfa2.end+2;
981
982 addEpsilonTransition(start, nfa1.start);
983 addEpsilonTransition(start, nfa2.start);
984 addEpsilonTransition(nfa1.end, end);
985 addEpsilonTransition(nfa2.end, end);
986
987 return new IntPair(start, end);
988
989 case sym.CONCAT:
990
991 r = (RegExp2) regExp;
992
993 nfa1 = insertNFA(r.r1);
994 nfa2 = insertNFA(r.r2);
995
996 addEpsilonTransition(nfa1.end, nfa2.start);
997
998 return new IntPair(nfa1.start, nfa2.end);
999
1000 case sym.STAR:
1001 nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content );
1002
1003 start = nfa1.end+1;
1004 end = nfa1.end+2;
1005
1006 addEpsilonTransition(nfa1.end, end);
1007 addEpsilonTransition(start, nfa1.start);
1008
1009 addEpsilonTransition(start, end);
1010 addEpsilonTransition(nfa1.end, nfa1.start);
1011
1012 return new IntPair(start, end);
1013
1014 case sym.PLUS:
1015 nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content );
1016
1017 start = nfa1.end+1;
1018 end = nfa1.end+2;
1019
1020 addEpsilonTransition(nfa1.end, end);
1021 addEpsilonTransition(start, nfa1.start);
1022
1023 addEpsilonTransition(nfa1.end, nfa1.start);
1024
1025 return new IntPair(start, end);
1026
1027 case sym.QUESTION:
1028 nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content );
1029
1030 addEpsilonTransition(nfa1.start, nfa1.end);
1031
1032 return new IntPair(nfa1.start, nfa1.end);
1033
1034 case sym.BANG:
1035 return complement(insertNFA((RegExp) ((RegExp1) regExp).content));
1036
1037 case sym.TILDE:
1038 return insertNFA(regExp.resolveTilde(macros));
1039
1040 case sym.STRING:
1041 return insertStringNFA(false, (String) ((RegExp1) regExp).content );
1042
1043 case sym.STRING_I:
1044 return insertStringNFA(true, (String) ((RegExp1) regExp).content );
1045
1046 case sym.MACROUSE:
1047 return insertNFA(macros.getDefinition((String) ((RegExp1) regExp).content));
1048 }
1049
1050 throw new Error("Unknown expression type "+regExp.type+" in NFA construction");
1051 }
1052}
Note: See TracBrowser for help on using the repository browser.