source: other-projects/rsyntax-textarea/devel-packages/jflex-1.4.3/src/java_cup/runtime/lr_parser.java@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 43.8 KB
Line 
1
2package java_cup.runtime;
3
4import java.util.Stack;
5
6/** This class implements a skeleton table driven LR parser. In general,
7 * LR parsers are a form of bottom up shift-reduce parsers. Shift-reduce
8 * parsers act by shifting input onto a parse stack until the Symbols
9 * matching the right hand side of a production appear on the top of the
10 * stack. Once this occurs, a reduce is performed. This involves removing
11 * the Symbols corresponding to the right hand side of the production
12 * (the so called "handle") and replacing them with the non-terminal from
13 * the left hand side of the production. <p>
14 *
15 * To control the decision of whether to shift or reduce at any given point,
16 * the parser uses a state machine (the "viable prefix recognition machine"
17 * built by the parser generator). The current state of the machine is placed
18 * on top of the parse stack (stored as part of a Symbol object representing
19 * a terminal or non terminal). The parse action table is consulted
20 * (using the current state and the current lookahead Symbol as indexes) to
21 * determine whether to shift or to reduce. When the parser shifts, it
22 * changes to a new state by pushing a new Symbol (containing a new state)
23 * onto the stack. When the parser reduces, it pops the handle (right hand
24 * side of a production) off the stack. This leaves the parser in the state
25 * it was in before any of those Symbols were matched. Next the reduce-goto
26 * table is consulted (using the new state and current lookahead Symbol as
27 * indexes) to determine a new state to go to. The parser then shifts to
28 * this goto state by pushing the left hand side Symbol of the production
29 * (also containing the new state) onto the stack.<p>
30 *
31 * This class actually provides four LR parsers. The methods parse() and
32 * debug_parse() provide two versions of the main parser (the only difference
33 * being that debug_parse() emits debugging trace messages as it parses).
34 * In addition to these main parsers, the error recovery mechanism uses two
35 * more. One of these is used to simulate "parsing ahead" in the input
36 * without carrying out actions (to verify that a potential error recovery
37 * has worked), and the other is used to parse through buffered "parse ahead"
38 * input in order to execute all actions and re-synchronize the actual parser
39 * configuration.<p>
40 *
41 * This is an abstract class which is normally filled out by a subclass
42 * generated by the JavaCup parser generator. In addition to supplying
43 * the actual parse tables, generated code also supplies methods which
44 * invoke various pieces of user supplied code, provide access to certain
45 * special Symbols (e.g., EOF and error), etc. Specifically, the following
46 * abstract methods are normally supplied by generated code:
47 * <dl compact>
48 * <dt> short[][] production_table()
49 * <dd> Provides a reference to the production table (indicating the index of
50 * the left hand side non terminal and the length of the right hand side
51 * for each production in the grammar).
52 * <dt> short[][] action_table()
53 * <dd> Provides a reference to the parse action table.
54 * <dt> short[][] reduce_table()
55 * <dd> Provides a reference to the reduce-goto table.
56 * <dt> int start_state()
57 * <dd> Indicates the index of the start state.
58 * <dt> int start_production()
59 * <dd> Indicates the index of the starting production.
60 * <dt> int EOF_sym()
61 * <dd> Indicates the index of the EOF Symbol.
62 * <dt> int error_sym()
63 * <dd> Indicates the index of the error Symbol.
64 * <dt> Symbol do_action()
65 * <dd> Executes a piece of user supplied action code. This always comes at
66 * the point of a reduce in the parse, so this code also allocates and
67 * fills in the left hand side non terminal Symbol object that is to be
68 * pushed onto the stack for the reduce.
69 * <dt> void init_actions()
70 * <dd> Code to initialize a special object that encapsulates user supplied
71 * actions (this object is used by do_action() to actually carry out the
72 * actions).
73 * </dl>
74 *
75 * In addition to these routines that <i>must</i> be supplied by the
76 * generated subclass there are also a series of routines that <i>may</i>
77 * be supplied. These include:
78 * <dl>
79 * <dt> Symbol scan()
80 * <dd> Used to get the next input Symbol from the scanner.
81 * <dt> Scanner getScanner()
82 * <dd> Used to provide a scanner for the default implementation of
83 * scan().
84 * <dt> int error_sync_size()
85 * <dd> This determines how many Symbols past the point of an error
86 * must be parsed without error in order to consider a recovery to
87 * be valid. This defaults to 3. Values less than 2 are not
88 * recommended.
89 * <dt> void report_error(String message, Object info)
90 * <dd> This method is called to report an error. The default implementation
91 * simply prints a message to System.err and where the error occurred.
92 * This method is often replaced in order to provide a more sophisticated
93 * error reporting mechanism.
94 * <dt> void report_fatal_error(String message, Object info)
95 * <dd> This method is called when a fatal error that cannot be recovered from
96 * is encountered. In the default implementation, it calls
97 * report_error() to emit a message, then throws an exception.
98 * <dt> void syntax_error(Symbol cur_token)
99 * <dd> This method is called as soon as syntax error is detected (but
100 * before recovery is attempted). In the default implementation it
101 * invokes: report_error("Syntax error", null);
102 * <dt> void unrecovered_syntax_error(Symbol cur_token)
103 * <dd> This method is called if syntax error recovery fails. In the default
104 * implementation it invokes:<br>
105 * report_fatal_error("Couldn't repair and continue parse", null);
106 * </dl>
107 *
108 * @see java_cup.runtime.Symbol
109 * @see java_cup.runtime.Symbol
110 * @see java_cup.runtime.virtual_parse_stack
111 * @version last updated: 7/3/96
112 * @author Frank Flannery
113 */
114
115public abstract class lr_parser {
116 /*-----------------------------------------------------------*/
117 /*--- Constructor(s) ----------------------------------------*/
118 /*-----------------------------------------------------------*/
119
120 /**
121 * Simple constructor.
122 */
123 public lr_parser() {
124 }
125
126 /**
127 * Constructor that sets the default scanner. [CSA/davidm]
128 */
129 public lr_parser(Scanner s) {
130 this(s,new DefaultSymbolFactory()); // TUM 20060327 old cup v10 Symbols as default
131 }
132 /**
133 * Constructor that sets the default scanner and a SymbolFactory
134 */
135 public lr_parser(Scanner s, SymbolFactory symfac) {
136 this(); // in case default constructor someday does something
137 symbolFactory = symfac;
138 setScanner(s);
139 }
140 public SymbolFactory symbolFactory;// = new DefaultSymbolFactory();
141 /**
142 * Whenever creation of a new Symbol is necessary, one should use this factory.
143 */
144 public SymbolFactory getSymbolFactory(){
145 return symbolFactory;
146 }
147 /*-----------------------------------------------------------*/
148 /*--- (Access to) Static (Class) Variables ------------------*/
149 /*-----------------------------------------------------------*/
150
151 /** The default number of Symbols after an error we much match to consider
152 * it recovered from.
153 */
154 protected final static int _error_sync_size = 3;
155
156 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
157
158 /** The number of Symbols after an error we much match to consider it
159 * recovered from.
160 */
161 protected int error_sync_size() {return _error_sync_size; }
162
163 /*-----------------------------------------------------------*/
164 /*--- (Access to) Instance Variables ------------------------*/
165 /*-----------------------------------------------------------*/
166
167 /** Table of production information (supplied by generated subclass).
168 * This table contains one entry per production and is indexed by
169 * the negative-encoded values (reduce actions) in the action_table.
170 * Each entry has two parts, the index of the non-terminal on the
171 * left hand side of the production, and the number of Symbols
172 * on the right hand side.
173 */
174 public abstract short[][] production_table();
175
176 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
177
178 /** The action table (supplied by generated subclass). This table is
179 * indexed by state and terminal number indicating what action is to
180 * be taken when the parser is in the given state (i.e., the given state
181 * is on top of the stack) and the given terminal is next on the input.
182 * States are indexed using the first dimension, however, the entries for
183 * a given state are compacted and stored in adjacent index, value pairs
184 * which are searched for rather than accessed directly (see get_action()).
185 * The actions stored in the table will be either shifts, reduces, or
186 * errors. Shifts are encoded as positive values (one greater than the
187 * state shifted to). Reduces are encoded as negative values (one less
188 * than the production reduced by). Error entries are denoted by zero.
189 *
190 * @see java_cup.runtime.lr_parser#get_action
191 */
192 public abstract short[][] action_table();
193
194 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
195
196 /** The reduce-goto table (supplied by generated subclass). This
197 * table is indexed by state and non-terminal number and contains
198 * state numbers. States are indexed using the first dimension, however,
199 * the entries for a given state are compacted and stored in adjacent
200 * index, value pairs which are searched for rather than accessed
201 * directly (see get_reduce()). When a reduce occurs, the handle
202 * (corresponding to the RHS of the matched production) is popped off
203 * the stack. The new top of stack indicates a state. This table is
204 * then indexed by that state and the LHS of the reducing production to
205 * indicate where to "shift" to.
206 *
207 * @see java_cup.runtime.lr_parser#get_reduce
208 */
209 public abstract short[][] reduce_table();
210
211 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
212
213 /** The index of the start state (supplied by generated subclass). */
214 public abstract int start_state();
215
216 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
217
218 /** The index of the start production (supplied by generated subclass). */
219 public abstract int start_production();
220
221 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
222
223 /** The index of the end of file terminal Symbol (supplied by generated
224 * subclass).
225 */
226 public abstract int EOF_sym();
227
228 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
229
230 /** The index of the special error Symbol (supplied by generated subclass). */
231 public abstract int error_sym();
232
233 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
234
235 /** Internal flag to indicate when parser should quit. */
236 protected boolean _done_parsing = false;
237
238 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
239
240 /** This method is called to indicate that the parser should quit. This is
241 * normally called by an accept action, but can be used to cancel parsing
242 * early in other circumstances if desired.
243 */
244 public void done_parsing()
245 {
246 _done_parsing = true;
247 }
248
249 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
250 /* Global parse state shared by parse(), error recovery, and
251 * debugging routines */
252 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
253
254 /** Indication of the index for top of stack (for use by actions). */
255 protected int tos;
256
257 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
258
259 /** The current lookahead Symbol. */
260 protected Symbol cur_token;
261
262 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
263
264 /** The parse stack itself. */
265 protected Stack stack = new Stack();
266
267 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
268
269 /** Direct reference to the production table. */
270 protected short[][] production_tab;
271
272 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
273
274 /** Direct reference to the action table. */
275 protected short[][] action_tab;
276
277 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
278
279 /** Direct reference to the reduce-goto table. */
280 protected short[][] reduce_tab;
281
282 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
283
284 /** This is the scanner object used by the default implementation
285 * of scan() to get Symbols. To avoid name conflicts with existing
286 * code, this field is private. [CSA/davidm] */
287 private Scanner _scanner;
288
289 /**
290 * Simple accessor method to set the default scanner.
291 */
292 public void setScanner(Scanner s) { _scanner = s; }
293
294 /**
295 * Simple accessor method to get the default scanner.
296 */
297 public Scanner getScanner() { return _scanner; }
298
299 /*-----------------------------------------------------------*/
300 /*--- General Methods ---------------------------------------*/
301 /*-----------------------------------------------------------*/
302
303 /** Perform a bit of user supplied action code (supplied by generated
304 * subclass). Actions are indexed by an internal action number assigned
305 * at parser generation time.
306 *
307 * @param act_num the internal index of the action to be performed.
308 * @param parser the parser object we are acting for.
309 * @param stack the parse stack of that object.
310 * @param top the index of the top element of the parse stack.
311 */
312 public abstract Symbol do_action(
313 int act_num,
314 lr_parser parser,
315 Stack stack,
316 int top)
317 throws java.lang.Exception;
318
319 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
320
321 /** User code for initialization inside the parser. Typically this
322 * initializes the scanner. This is called before the parser requests
323 * the first Symbol. Here this is just a placeholder for subclasses that
324 * might need this and we perform no action. This method is normally
325 * overridden by the generated code using this contents of the "init with"
326 * clause as its body.
327 */
328 public void user_init() throws java.lang.Exception { }
329
330 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
331
332 /** Initialize the action object. This is called before the parser does
333 * any parse actions. This is filled in by generated code to create
334 * an object that encapsulates all action code.
335 */
336 protected abstract void init_actions() throws java.lang.Exception;
337
338 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
339
340 /** Get the next Symbol from the input (supplied by generated subclass).
341 * Once end of file has been reached, all subsequent calls to scan
342 * should return an EOF Symbol (which is Symbol number 0). By default
343 * this method returns getScanner().next_token(); this implementation
344 * can be overriden by the generated parser using the code declared in
345 * the "scan with" clause. Do not recycle objects; every call to
346 * scan() should return a fresh object.
347 */
348 public Symbol scan() throws java.lang.Exception {
349 Symbol sym = getScanner().next_token();
350 return (sym!=null) ? sym : getSymbolFactory().newSymbol("END_OF_FILE",EOF_sym());
351 }
352
353 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
354
355 /** Report a fatal error. This method takes a message string and an
356 * additional object (to be used by specializations implemented in
357 * subclasses). Here in the base class a very simple implementation
358 * is provided which reports the error then throws an exception.
359 *
360 * @param message an error message.
361 * @param info an extra object reserved for use by specialized subclasses.
362 */
363 public void report_fatal_error(
364 String message,
365 Object info)
366 throws java.lang.Exception
367 {
368 /* stop parsing (not really necessary since we throw an exception, but) */
369 done_parsing();
370
371 /* use the normal error message reporting to put out the message */
372 report_error(message, info);
373
374 /* throw an exception */
375 throw new Exception("Can't recover from previous error(s)");
376 }
377
378 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
379
380 /** Report a non fatal error (or warning). This method takes a message
381 * string and an additional object (to be used by specializations
382 * implemented in subclasses). Here in the base class a very simple
383 * implementation is provided which simply prints the message to
384 * System.err.
385 *
386 * @param message an error message.
387 * @param info an extra object reserved for use by specialized subclasses.
388 */
389 public void report_error(String message, Object info)
390 {
391 System.err.print(message);
392 System.err.flush();
393 if (info instanceof Symbol)
394 if (((Symbol)info).left != -1)
395 System.err.println(" at character " + ((Symbol)info).left +
396 " of input");
397 else System.err.println("");
398 else System.err.println("");
399 }
400
401 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
402
403 /** This method is called when a syntax error has been detected and recovery
404 * is about to be invoked. Here in the base class we just emit a
405 * "Syntax error" error message.
406 *
407 * @param cur_token the current lookahead Symbol.
408 */
409 public void syntax_error(Symbol cur_token)
410 {
411 report_error("Syntax error", cur_token);
412 }
413
414 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
415
416 /** This method is called if it is determined that syntax error recovery
417 * has been unsuccessful. Here in the base class we report a fatal error.
418 *
419 * @param cur_token the current lookahead Symbol.
420 */
421 public void unrecovered_syntax_error(Symbol cur_token)
422 throws java.lang.Exception
423 {
424 report_fatal_error("Couldn't repair and continue parse", cur_token);
425 }
426
427 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
428
429 /** Fetch an action from the action table. The table is broken up into
430 * rows, one per state (rows are indexed directly by state number).
431 * Within each row, a list of index, value pairs are given (as sequential
432 * entries in the table), and the list is terminated by a default entry
433 * (denoted with a Symbol index of -1). To find the proper entry in a row
434 * we do a linear or binary search (depending on the size of the row).
435 *
436 * @param state the state index of the action being accessed.
437 * @param sym the Symbol index of the action being accessed.
438 */
439 protected final short get_action(int state, int sym)
440 {
441 short tag;
442 int first, last, probe;
443 short[] row = action_tab[state];
444
445 /* linear search if we are < 10 entries */
446 if (row.length < 20)
447 for (probe = 0; probe < row.length; probe++)
448 {
449 /* is this entry labeled with our Symbol or the default? */
450 tag = row[probe++];
451 if (tag == sym || tag == -1)
452 {
453 /* return the next entry */
454 return row[probe];
455 }
456 }
457 /* otherwise binary search */
458 else
459 {
460 first = 0;
461 last = (row.length-1)/2 - 1; /* leave out trailing default entry */
462 while (first <= last)
463 {
464 probe = (first+last)/2;
465 if (sym == row[probe*2])
466 return row[probe*2+1];
467 else if (sym > row[probe*2])
468 first = probe+1;
469 else
470 last = probe-1;
471 }
472
473 /* not found, use the default at the end */
474 return row[row.length-1];
475 }
476
477 /* shouldn't happened, but if we run off the end we return the
478 default (error == 0) */
479 return 0;
480 }
481
482 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
483
484 /** Fetch a state from the reduce-goto table. The table is broken up into
485 * rows, one per state (rows are indexed directly by state number).
486 * Within each row, a list of index, value pairs are given (as sequential
487 * entries in the table), and the list is terminated by a default entry
488 * (denoted with a Symbol index of -1). To find the proper entry in a row
489 * we do a linear search.
490 *
491 * @param state the state index of the entry being accessed.
492 * @param sym the Symbol index of the entry being accessed.
493 */
494 protected final short get_reduce(int state, int sym)
495 {
496 short tag;
497 short[] row = reduce_tab[state];
498
499 /* if we have a null row we go with the default */
500 if (row == null)
501 return -1;
502
503 for (int probe = 0; probe < row.length; probe++)
504 {
505 /* is this entry labeled with our Symbol or the default? */
506 tag = row[probe++];
507 if (tag == sym || tag == -1)
508 {
509 /* return the next entry */
510 return row[probe];
511 }
512 }
513 /* if we run off the end we return the default (error == -1) */
514 return -1;
515 }
516
517 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
518
519 /** This method provides the main parsing routine. It returns only when
520 * done_parsing() has been called (typically because the parser has
521 * accepted, or a fatal error has been reported). See the header
522 * documentation for the class regarding how shift/reduce parsers operate
523 * and how the various tables are used.
524 */
525 public Symbol parse() throws java.lang.Exception
526 {
527 /* the current action code */
528 int act;
529
530 /* the Symbol/stack element returned by a reduce */
531 Symbol lhs_sym = null;
532
533 /* information about production being reduced with */
534 short handle_size, lhs_sym_num;
535
536 /* set up direct reference to tables to drive the parser */
537
538 production_tab = production_table();
539 action_tab = action_table();
540 reduce_tab = reduce_table();
541
542 /* initialize the action encapsulation object */
543 init_actions();
544
545 /* do user initialization */
546 user_init();
547
548 /* get the first token */
549 cur_token = scan();
550
551 /* push dummy Symbol with start state to get us underway */
552 stack.removeAllElements();
553 stack.push(getSymbolFactory().startSymbol("START", 0, start_state()));
554 tos = 0;
555
556 /* continue until we are told to stop */
557 for (_done_parsing = false; !_done_parsing; )
558 {
559 /* Check current token for freshness. */
560 if (cur_token.used_by_parser)
561 throw new Error("Symbol recycling detected (fix your scanner).");
562
563 /* current state is always on the top of the stack */
564
565 /* look up action out of the current state with the current input */
566 act = get_action(((Symbol)stack.peek()).parse_state, cur_token.sym);
567
568 /* decode the action -- > 0 encodes shift */
569 if (act > 0)
570 {
571 /* shift to the encoded state by pushing it on the stack */
572 cur_token.parse_state = act-1;
573 cur_token.used_by_parser = true;
574 stack.push(cur_token);
575 tos++;
576
577 /* advance to the next Symbol */
578 cur_token = scan();
579 }
580 /* if its less than zero, then it encodes a reduce action */
581 else if (act < 0)
582 {
583 /* perform the action for the reduce */
584 lhs_sym = do_action((-act)-1, this, stack, tos);
585
586 /* look up information about the production */
587 lhs_sym_num = production_tab[(-act)-1][0];
588 handle_size = production_tab[(-act)-1][1];
589
590 /* pop the handle off the stack */
591 for (int i = 0; i < handle_size; i++)
592 {
593 stack.pop();
594 tos--;
595 }
596
597 /* look up the state to go to from the one popped back to */
598 act = get_reduce(((Symbol)stack.peek()).parse_state, lhs_sym_num);
599
600 /* shift to that state */
601 lhs_sym.parse_state = act;
602 lhs_sym.used_by_parser = true;
603 stack.push(lhs_sym);
604 tos++;
605 }
606 /* finally if the entry is zero, we have an error */
607 else if (act == 0)
608 {
609 /* call user syntax error reporting routine */
610 syntax_error(cur_token);
611
612 /* try to error recover */
613 if (!error_recovery(false))
614 {
615 /* if that fails give up with a fatal syntax error */
616 unrecovered_syntax_error(cur_token);
617
618 /* just in case that wasn't fatal enough, end parse */
619 done_parsing();
620 } else {
621 lhs_sym = (Symbol)stack.peek();
622 }
623 }
624 }
625 return lhs_sym;
626 }
627
628 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
629
630 /** Write a debugging message to System.err for the debugging version
631 * of the parser.
632 *
633 * @param mess the text of the debugging message.
634 */
635 public void debug_message(String mess)
636 {
637 System.err.println(mess);
638 }
639
640 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
641
642 /** Dump the parse stack for debugging purposes. */
643 public void dump_stack()
644 {
645 if (stack == null)
646 {
647 debug_message("# Stack dump requested, but stack is null");
648 return;
649 }
650
651 debug_message("============ Parse Stack Dump ============");
652
653 /* dump the stack */
654 for (int i=0; i<stack.size(); i++)
655 {
656 debug_message("Symbol: " + ((Symbol)stack.elementAt(i)).sym +
657 " State: " + ((Symbol)stack.elementAt(i)).parse_state);
658 }
659 debug_message("==========================================");
660 }
661
662 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
663
664 /** Do debug output for a reduce.
665 *
666 * @param prod_num the production we are reducing with.
667 * @param nt_num the index of the LHS non terminal.
668 * @param rhs_size the size of the RHS.
669 */
670 public void debug_reduce(int prod_num, int nt_num, int rhs_size)
671 {
672 debug_message("# Reduce with prod #" + prod_num + " [NT=" + nt_num +
673 ", " + "SZ=" + rhs_size + "]");
674 }
675
676 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
677
678 /** Do debug output for shift.
679 *
680 * @param shift_tkn the Symbol being shifted onto the stack.
681 */
682 public void debug_shift(Symbol shift_tkn)
683 {
684 debug_message("# Shift under term #" + shift_tkn.sym +
685 " to state #" + shift_tkn.parse_state);
686 }
687
688 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
689
690 /** Do debug output for stack state. [CSA]
691 */
692 public void debug_stack() {
693 StringBuffer sb=new StringBuffer("## STACK:");
694 for (int i=0; i<stack.size(); i++) {
695 Symbol s = (Symbol) stack.elementAt(i);
696 sb.append(" <state "+s.parse_state+", sym "+s.sym+">");
697 if ((i%3)==2 || (i==(stack.size()-1))) {
698 debug_message(sb.toString());
699 sb = new StringBuffer(" ");
700 }
701 }
702 }
703
704 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
705
706 /** Perform a parse with debugging output. This does exactly the
707 * same things as parse(), except that it calls debug_shift() and
708 * debug_reduce() when shift and reduce moves are taken by the parser
709 * and produces various other debugging messages.
710 */
711 public Symbol debug_parse()
712 throws java.lang.Exception
713 {
714 /* the current action code */
715 int act;
716
717 /* the Symbol/stack element returned by a reduce */
718 Symbol lhs_sym = null;
719
720 /* information about production being reduced with */
721 short handle_size, lhs_sym_num;
722
723 /* set up direct reference to tables to drive the parser */
724 production_tab = production_table();
725 action_tab = action_table();
726 reduce_tab = reduce_table();
727
728 debug_message("# Initializing parser");
729
730 /* initialize the action encapsulation object */
731 init_actions();
732
733 /* do user initialization */
734 user_init();
735
736 /* the current Symbol */
737 cur_token = scan();
738
739 debug_message("# Current Symbol is #" + cur_token.sym);
740
741 /* push dummy Symbol with start state to get us underway */
742 stack.removeAllElements();
743 stack.push(getSymbolFactory().startSymbol("START",0, start_state()));
744 tos = 0;
745
746 /* continue until we are told to stop */
747 for (_done_parsing = false; !_done_parsing; )
748 {
749 /* Check current token for freshness. */
750 if (cur_token.used_by_parser)
751 throw new Error("Symbol recycling detected (fix your scanner).");
752
753 /* current state is always on the top of the stack */
754 //debug_stack();
755
756 /* look up action out of the current state with the current input */
757 act = get_action(((Symbol)stack.peek()).parse_state, cur_token.sym);
758
759 /* decode the action -- > 0 encodes shift */
760 if (act > 0)
761 {
762 /* shift to the encoded state by pushing it on the stack */
763 cur_token.parse_state = act-1;
764 cur_token.used_by_parser = true;
765 debug_shift(cur_token);
766 stack.push(cur_token);
767 tos++;
768
769 /* advance to the next Symbol */
770 cur_token = scan();
771 debug_message("# Current token is " + cur_token);
772 }
773 /* if its less than zero, then it encodes a reduce action */
774 else if (act < 0)
775 {
776 /* perform the action for the reduce */
777 lhs_sym = do_action((-act)-1, this, stack, tos);
778
779 /* look up information about the production */
780 lhs_sym_num = production_tab[(-act)-1][0];
781 handle_size = production_tab[(-act)-1][1];
782
783 debug_reduce((-act)-1, lhs_sym_num, handle_size);
784
785 /* pop the handle off the stack */
786 for (int i = 0; i < handle_size; i++)
787 {
788 stack.pop();
789 tos--;
790 }
791
792 /* look up the state to go to from the one popped back to */
793 act = get_reduce(((Symbol)stack.peek()).parse_state, lhs_sym_num);
794 debug_message("# Reduce rule: top state " +
795 ((Symbol)stack.peek()).parse_state +
796 ", lhs sym " + lhs_sym_num + " -> state " + act);
797
798 /* shift to that state */
799 lhs_sym.parse_state = act;
800 lhs_sym.used_by_parser = true;
801 stack.push(lhs_sym);
802 tos++;
803
804 debug_message("# Goto state #" + act);
805 }
806 /* finally if the entry is zero, we have an error */
807 else if (act == 0)
808 {
809 /* call user syntax error reporting routine */
810 syntax_error(cur_token);
811
812 /* try to error recover */
813 if (!error_recovery(true))
814 {
815 /* if that fails give up with a fatal syntax error */
816 unrecovered_syntax_error(cur_token);
817
818 /* just in case that wasn't fatal enough, end parse */
819 done_parsing();
820 } else {
821 lhs_sym = (Symbol)stack.peek();
822 }
823 }
824 }
825 return lhs_sym;
826 }
827
828 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
829 /* Error recovery code */
830 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
831
832 /** Attempt to recover from a syntax error. This returns false if recovery
833 * fails, true if it succeeds. Recovery happens in 4 steps. First we
834 * pop the parse stack down to a point at which we have a shift out
835 * of the top-most state on the error Symbol. This represents the
836 * initial error recovery configuration. If no such configuration is
837 * found, then we fail. Next a small number of "lookahead" or "parse
838 * ahead" Symbols are read into a buffer. The size of this buffer is
839 * determined by error_sync_size() and determines how many Symbols beyond
840 * the error must be matched to consider the recovery a success. Next,
841 * we begin to discard Symbols in attempt to get past the point of error
842 * to a point where we can continue parsing. After each Symbol, we attempt
843 * to "parse ahead" though the buffered lookahead Symbols. The "parse ahead"
844 * process simulates that actual parse, but does not modify the real
845 * parser's configuration, nor execute any actions. If we can parse all
846 * the stored Symbols without error, then the recovery is considered a
847 * success. Once a successful recovery point is determined, we do an
848 * actual parse over the stored input -- modifying the real parse
849 * configuration and executing all actions. Finally, we return the the
850 * normal parser to continue with the overall parse.
851 *
852 * @param debug should we produce debugging messages as we parse.
853 */
854 protected boolean error_recovery(boolean debug)
855 throws java.lang.Exception
856 {
857 if (debug) debug_message("# Attempting error recovery");
858
859 /* first pop the stack back into a state that can shift on error and
860 do that shift (if that fails, we fail) */
861 if (!find_recovery_config(debug))
862 {
863 if (debug) debug_message("# Error recovery fails");
864 return false;
865 }
866
867 /* read ahead to create lookahead we can parse multiple times */
868 read_lookahead();
869
870 /* repeatedly try to parse forward until we make it the required dist */
871 for (;;)
872 {
873 /* try to parse forward, if it makes it, bail out of loop */
874 if (debug) debug_message("# Trying to parse ahead");
875 if (try_parse_ahead(debug))
876 {
877 break;
878 }
879
880 /* if we are now at EOF, we have failed */
881 if (lookahead[0].sym == EOF_sym())
882 {
883 if (debug) debug_message("# Error recovery fails at EOF");
884 return false;
885 }
886
887 /* otherwise, we consume another Symbol and try again */
888 // BUG FIX by Bruce Hutton
889 // Computer Science Department, University of Auckland,
890 // Auckland, New Zealand.
891 // It is the first token that is being consumed, not the one
892 // we were up to parsing
893 if (debug)
894 debug_message("# Consuming Symbol #" + lookahead[ 0 ].sym);
895 restart_lookahead();
896 }
897
898 /* we have consumed to a point where we can parse forward */
899 if (debug) debug_message("# Parse-ahead ok, going back to normal parse");
900
901 /* do the real parse (including actions) across the lookahead */
902 parse_lookahead(debug);
903
904 /* we have success */
905 return true;
906 }
907
908 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
909
910 /** Determine if we can shift under the special error Symbol out of the
911 * state currently on the top of the (real) parse stack.
912 */
913 protected boolean shift_under_error()
914 {
915 /* is there a shift under error Symbol */
916 return get_action(((Symbol)stack.peek()).parse_state, error_sym()) > 0;
917 }
918
919 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
920
921 /** Put the (real) parse stack into error recovery configuration by
922 * popping the stack down to a state that can shift on the special
923 * error Symbol, then doing the shift. If no suitable state exists on
924 * the stack we return false
925 *
926 * @param debug should we produce debugging messages as we parse.
927 */
928 protected boolean find_recovery_config(boolean debug)
929 {
930 Symbol error_token;
931 int act;
932
933 if (debug) debug_message("# Finding recovery state on stack");
934
935 /* Remember the right-position of the top symbol on the stack */
936 Symbol right = ((Symbol)stack.peek());// TUM 20060327 removed .right
937 Symbol left = right;// TUM 20060327 removed .left
938
939 /* pop down until we can shift under error Symbol */
940 while (!shift_under_error())
941 {
942 /* pop the stack */
943 if (debug)
944 debug_message("# Pop stack by one, state was # " +
945 ((Symbol)stack.peek()).parse_state);
946 left = ((Symbol)stack.pop()); // TUM 20060327 removed .left
947 tos--;
948
949 /* if we have hit bottom, we fail */
950 if (stack.empty())
951 {
952 if (debug) debug_message("# No recovery state found on stack");
953 return false;
954 }
955 }
956
957 /* state on top of the stack can shift under error, find the shift */
958 act = get_action(((Symbol)stack.peek()).parse_state, error_sym());
959 if (debug)
960 {
961 debug_message("# Recover state found (#" +
962 ((Symbol)stack.peek()).parse_state + ")");
963 debug_message("# Shifting on error to state #" + (act-1));
964 }
965
966 /* build and shift a special error Symbol */
967 error_token = getSymbolFactory().newSymbol("ERROR",error_sym(), left, right);
968 error_token.parse_state = act-1;
969 error_token.used_by_parser = true;
970 stack.push(error_token);
971 tos++;
972
973 return true;
974 }
975
976 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
977
978 /** Lookahead Symbols used for attempting error recovery "parse aheads". */
979 protected Symbol lookahead[];
980
981 /** Position in lookahead input buffer used for "parse ahead". */
982 protected int lookahead_pos;
983
984 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
985
986 /** Read from input to establish our buffer of "parse ahead" lookahead
987 * Symbols.
988 */
989 protected void read_lookahead() throws java.lang.Exception
990 {
991 /* create the lookahead array */
992 lookahead = new Symbol[error_sync_size()];
993
994 /* fill in the array */
995 for (int i = 0; i < error_sync_size(); i++)
996 {
997 lookahead[i] = cur_token;
998 cur_token = scan();
999 }
1000
1001 /* start at the beginning */
1002 lookahead_pos = 0;
1003 }
1004
1005 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
1006
1007 /** Return the current lookahead in our error "parse ahead" buffer. */
1008 protected Symbol cur_err_token() { return lookahead[lookahead_pos]; }
1009
1010 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
1011
1012 /** Advance to next "parse ahead" input Symbol. Return true if we have
1013 * input to advance to, false otherwise.
1014 */
1015 protected boolean advance_lookahead()
1016 {
1017 /* advance the input location */
1018 lookahead_pos++;
1019
1020 /* return true if we didn't go off the end */
1021 return lookahead_pos < error_sync_size();
1022 }
1023
1024 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
1025
1026 /** Reset the parse ahead input to one Symbol past where we started error
1027 * recovery (this consumes one new Symbol from the real input).
1028 */
1029 protected void restart_lookahead() throws java.lang.Exception
1030 {
1031 /* move all the existing input over */
1032 for (int i = 1; i < error_sync_size(); i++)
1033 lookahead[i-1] = lookahead[i];
1034
1035 /* read a new Symbol into the last spot */
1036 // BUG Fix by Bruce Hutton
1037 // Computer Science Department, University of Auckland,
1038 // Auckland, New Zealand. [applied 5-sep-1999 by csa]
1039 // The following two lines were out of order!!
1040 lookahead[error_sync_size()-1] = cur_token;
1041 cur_token = scan();
1042
1043 /* reset our internal position marker */
1044 lookahead_pos = 0;
1045 }
1046
1047 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
1048
1049 /** Do a simulated parse forward (a "parse ahead") from the current
1050 * stack configuration using stored lookahead input and a virtual parse
1051 * stack. Return true if we make it all the way through the stored
1052 * lookahead input without error. This basically simulates the action of
1053 * parse() using only our saved "parse ahead" input, and not executing any
1054 * actions.
1055 *
1056 * @param debug should we produce debugging messages as we parse.
1057 */
1058 protected boolean try_parse_ahead(boolean debug)
1059 throws java.lang.Exception
1060 {
1061 int act;
1062 short lhs, rhs_size;
1063
1064 /* create a virtual stack from the real parse stack */
1065 virtual_parse_stack vstack = new virtual_parse_stack(stack);
1066
1067 /* parse until we fail or get past the lookahead input */
1068 for (;;)
1069 {
1070 /* look up the action from the current state (on top of stack) */
1071 act = get_action(vstack.top(), cur_err_token().sym);
1072
1073 /* if its an error, we fail */
1074 if (act == 0) return false;
1075
1076 /* > 0 encodes a shift */
1077 if (act > 0)
1078 {
1079 /* push the new state on the stack */
1080 vstack.push(act-1);
1081
1082 if (debug) debug_message("# Parse-ahead shifts Symbol #" +
1083 cur_err_token().sym + " into state #" + (act-1));
1084
1085 /* advance simulated input, if we run off the end, we are done */
1086 if (!advance_lookahead()) return true;
1087 }
1088 /* < 0 encodes a reduce */
1089 else
1090 {
1091 /* if this is a reduce with the start production we are done */
1092 if ((-act)-1 == start_production())
1093 {
1094 if (debug) debug_message("# Parse-ahead accepts");
1095 return true;
1096 }
1097
1098 /* get the lhs Symbol and the rhs size */
1099 lhs = production_tab[(-act)-1][0];
1100 rhs_size = production_tab[(-act)-1][1];
1101
1102 /* pop handle off the stack */
1103 for (int i = 0; i < rhs_size; i++)
1104 vstack.pop();
1105
1106 if (debug)
1107 debug_message("# Parse-ahead reduces: handle size = " +
1108 rhs_size + " lhs = #" + lhs + " from state #" + vstack.top());
1109
1110 /* look up goto and push it onto the stack */
1111 vstack.push(get_reduce(vstack.top(), lhs));
1112 if (debug)
1113 debug_message("# Goto state #" + vstack.top());
1114 }
1115 }
1116 }
1117
1118 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
1119
1120 /** Parse forward using stored lookahead Symbols. In this case we have
1121 * already verified that parsing will make it through the stored lookahead
1122 * Symbols and we are now getting back to the point at which we can hand
1123 * control back to the normal parser. Consequently, this version of the
1124 * parser performs all actions and modifies the real parse configuration.
1125 * This returns once we have consumed all the stored input or we accept.
1126 *
1127 * @param debug should we produce debugging messages as we parse.
1128 */
1129 protected void parse_lookahead(boolean debug)
1130 throws java.lang.Exception
1131 {
1132 /* the current action code */
1133 int act;
1134
1135 /* the Symbol/stack element returned by a reduce */
1136 Symbol lhs_sym = null;
1137
1138 /* information about production being reduced with */
1139 short handle_size, lhs_sym_num;
1140
1141 /* restart the saved input at the beginning */
1142 lookahead_pos = 0;
1143
1144 if (debug)
1145 {
1146 debug_message("# Reparsing saved input with actions");
1147 debug_message("# Current Symbol is #" + cur_err_token().sym);
1148 debug_message("# Current state is #" +
1149 ((Symbol)stack.peek()).parse_state);
1150 }
1151
1152 /* continue until we accept or have read all lookahead input */
1153 while(!_done_parsing)
1154 {
1155 /* current state is always on the top of the stack */
1156
1157 /* look up action out of the current state with the current input */
1158 act =
1159 get_action(((Symbol)stack.peek()).parse_state, cur_err_token().sym);
1160
1161 /* decode the action -- > 0 encodes shift */
1162 if (act > 0)
1163 {
1164 /* shift to the encoded state by pushing it on the stack */
1165 cur_err_token().parse_state = act-1;
1166 cur_err_token().used_by_parser = true;
1167 if (debug) debug_shift(cur_err_token());
1168 stack.push(cur_err_token());
1169 tos++;
1170
1171 /* advance to the next Symbol, if there is none, we are done */
1172 if (!advance_lookahead())
1173 {
1174 if (debug) debug_message("# Completed reparse");
1175
1176 /* scan next Symbol so we can continue parse */
1177 // BUGFIX by Chris Harris <[email protected]>:
1178 // correct a one-off error by commenting out
1179 // this next line.
1180 /*cur_token = scan();*/
1181
1182 /* go back to normal parser */
1183 return;
1184 }
1185
1186 if (debug)
1187 debug_message("# Current Symbol is #" + cur_err_token().sym);
1188 }
1189 /* if its less than zero, then it encodes a reduce action */
1190 else if (act < 0)
1191 {
1192 /* perform the action for the reduce */
1193 lhs_sym = do_action((-act)-1, this, stack, tos);
1194
1195 /* look up information about the production */
1196 lhs_sym_num = production_tab[(-act)-1][0];
1197 handle_size = production_tab[(-act)-1][1];
1198
1199 if (debug) debug_reduce((-act)-1, lhs_sym_num, handle_size);
1200
1201 /* pop the handle off the stack */
1202 for (int i = 0; i < handle_size; i++)
1203 {
1204 stack.pop();
1205 tos--;
1206 }
1207
1208 /* look up the state to go to from the one popped back to */
1209 act = get_reduce(((Symbol)stack.peek()).parse_state, lhs_sym_num);
1210
1211 /* shift to that state */
1212 lhs_sym.parse_state = act;
1213 lhs_sym.used_by_parser = true;
1214 stack.push(lhs_sym);
1215 tos++;
1216
1217 if (debug) debug_message("# Goto state #" + act);
1218
1219 }
1220 /* finally if the entry is zero, we have an error
1221 (shouldn't happen here, but...)*/
1222 else if (act == 0)
1223 {
1224 report_fatal_error("Syntax error", lhs_sym);
1225 return;
1226 }
1227 }
1228
1229
1230 }
1231
1232 /*-----------------------------------------------------------*/
1233
1234 /** Utility function: unpacks parse tables from strings */
1235 protected static short[][] unpackFromStrings(String[] sa)
1236 {
1237 // Concatanate initialization strings.
1238 StringBuffer sb = new StringBuffer(sa[0]);
1239 for (int i=1; i<sa.length; i++)
1240 sb.append(sa[i]);
1241 int n=0; // location in initialization string
1242 int size1 = (((int)sb.charAt(n))<<16) | ((int)sb.charAt(n+1)); n+=2;
1243 short[][] result = new short[size1][];
1244 for (int i=0; i<size1; i++) {
1245 int size2 = (((int)sb.charAt(n))<<16) | ((int)sb.charAt(n+1)); n+=2;
1246 result[i] = new short[size2];
1247 for (int j=0; j<size2; j++)
1248 result[i][j] = (short) (sb.charAt(n++)-2);
1249 }
1250 return result;
1251 }
1252}
1253
Note: See TracBrowser for help on using the repository browser.