source: release-kits/lirk3/resources/gs3-release-maker/apache-ant-1.6.5/src/etc/testcases/taskdefs/optional/antlr/java.g@ 14982

Last change on this file since 14982 was 14982, checked in by oranfry, 16 years ago

initial import of LiRK3

File size: 28.5 KB
Line 
1/** Java 1.2 Recognizer
2 *
3 * Run 'java Main <directory full of java files>'
4 *
5 * Contributing authors:
6 * John Mitchell [email protected]
7 * Terence Parr [email protected]
8 * John Lilley [email protected]
9 * Scott Stanchfield [email protected]
10 * Markus Mohnen [email protected]
11 * Peter Williams [email protected]
12 *
13 * Version 1.00 December 9, 1997 -- initial release
14 * Version 1.01 December 10, 1997
15 * fixed bug in octal def (0..7 not 0..8)
16 * Version 1.10 August 1998 (parrt)
17 * added tree construction
18 * fixed definition of WS,comments for mac,pc,unix newlines
19 * added unary plus
20 * Version 1.11 (Nov 20, 1998)
21 * Added "shutup" option to turn off last ambig warning.
22 * Fixed inner class def to allow named class defs as statements
23 * synchronized requires compound not simple statement
24 * add [] after builtInType DOT class in primaryExpression
25 * "const" is reserved but not valid..removed from modifiers
26 * Version 1.12 (Feb 2, 1999)
27 * Changed LITERAL_xxx to xxx in tree grammar.
28 * Updated java.g to use tokens {...} now for 2.6.0 (new feature).
29 *
30 * Version 1.13 (Apr 23, 1999)
31 * Didn't have (stat)? for else clause in tree parser.
32 * Didn't gen ASTs for interface extends. Updated tree parser too.
33 * Updated to 2.6.0.
34 * Version 1.14 (Jun 20, 1999)
35 * Allowed final/abstract on local classes.
36 * Removed local interfaces from methods
37 * Put instanceof precedence where it belongs...in relationalExpr
38 * It also had expr not type as arg; fixed it.
39 * Missing ! on SEMI in classBlock
40 * fixed: (expr) + "string" was parsed incorrectly (+ as unary plus).
41 * fixed: didn't like Object[].class in parser or tree parser
42 * Version 1.15 (Jun 26, 1999)
43 * Screwed up rule with instanceof in it. :( Fixed.
44 * Tree parser didn't like (expr).something; fixed.
45 * Allowed multiple inheritance in tree grammar. oops.
46 * Version 1.16 (August 22, 1999)
47 * Extending an interface built a wacky tree: had extra EXTENDS.
48 * Tree grammar didn't allow multiple superinterfaces.
49 * Tree grammar didn't allow empty var initializer: {}
50 * Version 1.17 (October 12, 1999)
51 * ESC lexer rule allowed 399 max not 377 max.
52 * java.tree.g didn't handle the expression of synchronized
53 * statements.
54 *
55 * Version tracking now done with following ID:
56 *
57 *
58 * BUG:
59 * Doesn't like boolean.class!
60 *
61 * class Test {
62 * public static void main( String args[] ) {
63 * if (boolean.class.equals(boolean.class)) {
64 * System.out.println("works");
65 * }
66 * }
67 * }
68 *
69 * This grammar is in the PUBLIC DOMAIN
70 */
71class JavaRecognizer extends Parser;
72options {
73 k = 2; // two token lookahead
74 exportVocab=Java; // Call its vocabulary "Java"
75 codeGenMakeSwitchThreshold = 2; // Some optimizations
76 codeGenBitsetTestThreshold = 3;
77 defaultErrorHandler = false; // Don't generate parser error handlers
78 buildAST = true;
79}
80
81tokens {
82 BLOCK; MODIFIERS; OBJBLOCK; SLIST; CTOR_DEF; METHOD_DEF; VARIABLE_DEF;
83 INSTANCE_INIT; STATIC_INIT; TYPE; CLASS_DEF; INTERFACE_DEF;
84 PACKAGE_DEF; ARRAY_DECLARATOR; EXTENDS_CLAUSE; IMPLEMENTS_CLAUSE;
85 PARAMETERS; PARAMETER_DEF; LABELED_STAT; TYPECAST; INDEX_OP;
86 POST_INC; POST_DEC; METHOD_CALL; EXPR; ARRAY_INIT;
87 IMPORT; UNARY_MINUS; UNARY_PLUS; CASE_GROUP; ELIST; FOR_INIT; FOR_CONDITION;
88 FOR_ITERATOR; EMPTY_STAT; FINAL="final"; ABSTRACT="abstract";
89}
90
91// Compilation Unit: In Java, this is a single file. This is the start
92// rule for this parser
93compilationUnit
94 : // A compilation unit starts with an optional package definition
95 ( packageDefinition
96 | /* nothing */
97 )
98
99 // Next we have a series of zero or more import statements
100 ( importDefinition )*
101
102 // Wrapping things up with any number of class or interface
103 // definitions
104 ( typeDefinition )*
105
106 EOF!
107 ;
108
109
110// Package statement: "package" followed by an identifier.
111packageDefinition
112 options {defaultErrorHandler = true;} // let ANTLR handle errors
113 : p:"package"^ {#p.setType(PACKAGE_DEF);} identifier SEMI!
114 ;
115
116
117// Import statement: import followed by a package or class name
118importDefinition
119 options {defaultErrorHandler = true;}
120 : i:"import"^ {#i.setType(IMPORT);} identifierStar SEMI!
121 ;
122
123// A type definition in a file is either a class or interface definition.
124typeDefinition
125 options {defaultErrorHandler = true;}
126 : m:modifiers!
127 ( classDefinition[#m]
128 | interfaceDefinition[#m]
129 )
130 | SEMI!
131 ;
132
133/** A declaration is the creation of a reference or primitive-type variable
134 * Create a separate Type/Var tree for each var in the var list.
135 */
136declaration!
137 : m:modifiers t:typeSpec[false] v:variableDefinitions[#m,#t]
138 {#declaration = #v;}
139 ;
140
141// A list of zero or more modifiers. We could have used (modifier)* in
142// place of a call to modifiers, but I thought it was a good idea to keep
143// this rule separate so they can easily be collected in a Vector if
144// someone so desires
145modifiers
146 : ( modifier )*
147 {#modifiers = #([MODIFIERS, "MODIFIERS"], #modifiers);}
148 ;
149
150
151// A type specification is a type name with possible brackets afterwards
152// (which would make it an array type).
153typeSpec[boolean addImagNode]
154 : classTypeSpec[addImagNode]
155 | builtInTypeSpec[addImagNode]
156 ;
157
158// A class type specification is a class type with possible brackets afterwards
159// (which would make it an array type).
160classTypeSpec[boolean addImagNode]
161 : identifier (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
162 {
163 if ( addImagNode ) {
164 #classTypeSpec = #(#[TYPE,"TYPE"], #classTypeSpec);
165 }
166 }
167 ;
168
169// A builtin type specification is a builtin type with possible brackets
170// afterwards (which would make it an array type).
171builtInTypeSpec[boolean addImagNode]
172 : builtInType (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
173 {
174 if ( addImagNode ) {
175 #builtInTypeSpec = #(#[TYPE,"TYPE"], #builtInTypeSpec);
176 }
177 }
178 ;
179
180// A type name. which is either a (possibly qualified) class name or
181// a primitive (builtin) type
182type
183 : identifier
184 | builtInType
185 ;
186
187// The primitive types.
188builtInType
189 : "void"
190 | "boolean"
191 | "byte"
192 | "char"
193 | "short"
194 | "int"
195 | "float"
196 | "long"
197 | "double"
198 ;
199
200// A (possibly-qualified) java identifier. We start with the first IDENT
201// and expand its name by adding dots and following IDENTS
202identifier
203 : IDENT ( DOT^ IDENT )*
204 ;
205
206identifierStar
207 : IDENT
208 ( DOT^ IDENT )*
209 ( DOT^ STAR )?
210 ;
211
212
213// modifiers for Java classes, interfaces, class/instance vars and methods
214modifier
215 : "private"
216 | "public"
217 | "protected"
218 | "static"
219 | "transient"
220 | "final"
221 | "abstract"
222 | "native"
223 | "threadsafe"
224 | "synchronized"
225// | "const" // reserved word; leave out
226 | "volatile"
227 ;
228
229
230// Definition of a Java class
231classDefinition![AST modifiers]
232 : "class" IDENT
233 // it _might_ have a superclass...
234 sc:superClassClause
235 // it might implement some interfaces...
236 ic:implementsClause
237 // now parse the body of the class
238 cb:classBlock
239 {#classDefinition = #(#[CLASS_DEF,"CLASS_DEF"],
240 modifiers,IDENT,sc,ic,cb);}
241 ;
242
243superClassClause!
244 : ( "extends" id:identifier )?
245 {#superClassClause = #(#[EXTENDS_CLAUSE,"EXTENDS_CLAUSE"],id);}
246 ;
247
248// Definition of a Java Interface
249interfaceDefinition![AST modifiers]
250 : "interface" IDENT
251 // it might extend some other interfaces
252 ie:interfaceExtends
253 // now parse the body of the interface (looks like a class...)
254 cb:classBlock
255 {#interfaceDefinition = #(#[INTERFACE_DEF,"INTERFACE_DEF"],
256 modifiers,IDENT,ie,cb);}
257 ;
258
259
260// This is the body of a class. You can have fields and extra semicolons,
261// That's about it (until you see what a field is...)
262classBlock
263 : LCURLY!
264 ( field | SEMI! )*
265 RCURLY!
266 {#classBlock = #([OBJBLOCK, "OBJBLOCK"], #classBlock);}
267 ;
268
269// An interface can extend several other interfaces...
270interfaceExtends
271 : (
272 e:"extends"!
273 identifier ( COMMA! identifier )*
274 )?
275 {#interfaceExtends = #(#[EXTENDS_CLAUSE,"EXTENDS_CLAUSE"],
276 #interfaceExtends);}
277 ;
278
279// A class can implement several interfaces...
280implementsClause
281 : (
282 i:"implements"! identifier ( COMMA! identifier )*
283 )?
284 {#implementsClause = #(#[IMPLEMENTS_CLAUSE,"IMPLEMENTS_CLAUSE"],
285 #implementsClause);}
286 ;
287
288// Now the various things that can be defined inside a class or interface...
289// Note that not all of these are really valid in an interface (constructors,
290// for example), and if this grammar were used for a compiler there would
291// need to be some semantic checks to make sure we're doing the right thing...
292field!
293 : // method, constructor, or variable declaration
294 mods:modifiers
295 ( h:ctorHead s:compoundStatement // constructor
296 {#field = #(#[CTOR_DEF,"CTOR_DEF"], mods, h, s);}
297
298 | cd:classDefinition[#mods] // inner class
299 {#field = #cd;}
300
301 | id:interfaceDefinition[#mods] // inner interface
302 {#field = #id;}
303
304 | t:typeSpec[false] // method or variable declaration(s)
305 ( IDENT // the name of the method
306
307 // parse the formal parameter declarations.
308 LPAREN! param:parameterDeclarationList RPAREN!
309
310 rt:returnTypeBrackersOnEndOfMethodHead[#t]
311
312 // get the list of exceptions that this method is declared to throw
313 (tc:throwsClause)?
314
315 ( s2:compoundStatement | SEMI )
316 {#field = #(#[METHOD_DEF,"METHOD_DEF"],
317 mods,
318 #(#[TYPE,"TYPE"],rt),
319 IDENT,
320 param,
321 tc,
322 s2);}
323 | v:variableDefinitions[#mods,#t] SEMI
324// {#field = #(#[VARIABLE_DEF,"VARIABLE_DEF"], v);}
325 {#field = #v;}
326 )
327 )
328
329 // "static { ... }" class initializer
330 | "static" s3:compoundStatement
331 {#field = #(#[STATIC_INIT,"STATIC_INIT"], s3);}
332
333 // "{ ... }" instance initializer
334 | s4:compoundStatement
335 {#field = #(#[INSTANCE_INIT,"INSTANCE_INIT"], s4);}
336 ;
337
338variableDefinitions[AST mods, AST t]
339 : variableDeclarator[getASTFactory().dupTree(mods),
340 getASTFactory().dupTree(t)]
341 ( COMMA!
342 variableDeclarator[getASTFactory().dupTree(mods),
343 getASTFactory().dupTree(t)]
344 )*
345 ;
346
347/** Declaration of a variable. This can be a class/instance variable,
348 * or a local variable in a method
349 * It can also include possible initialization.
350 */
351variableDeclarator![AST mods, AST t]
352 : id:IDENT d:declaratorBrackets[t] v:varInitializer
353 {#variableDeclarator = #(#[VARIABLE_DEF,"VARIABLE_DEF"], mods, #(#[TYPE,"TYPE"],d), id, v);}
354 ;
355
356declaratorBrackets[AST typ]
357 : {#declaratorBrackets=typ;}
358 (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
359 ;
360
361varInitializer
362 : ( ASSIGN^ initializer )?
363 ;
364
365// This is an initializer used to set up an array.
366arrayInitializer
367 : lc:LCURLY^ {#lc.setType(ARRAY_INIT);}
368 ( initializer
369 (
370 // CONFLICT: does a COMMA after an initializer start a new
371 // initializer or start the option ',' at end?
372 // ANTLR generates proper code by matching
373 // the comma as soon as possible.
374 options {
375 warnWhenFollowAmbig = false;
376 }
377 :
378 COMMA! initializer
379 )*
380 (COMMA!)?
381 )?
382 RCURLY!
383 ;
384
385
386// The two "things" that can initialize an array element are an expression
387// and another (nested) array initializer.
388initializer
389 : expression
390 | arrayInitializer
391 ;
392
393// This is the header of a method. It includes the name and parameters
394// for the method.
395// This also watches for a list of exception classes in a "throws" clause.
396ctorHead
397 : IDENT // the name of the method
398
399 // parse the formal parameter declarations.
400 LPAREN! parameterDeclarationList RPAREN!
401
402 // get the list of exceptions that this method is declared to throw
403 (throwsClause)?
404 ;
405
406// This is a list of exception classes that the method is declared to throw
407throwsClause
408 : "throws"^ identifier ( COMMA! identifier )*
409 ;
410
411
412returnTypeBrackersOnEndOfMethodHead[AST typ]
413 : {#returnTypeBrackersOnEndOfMethodHead = typ;}
414 (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
415 ;
416
417// A list of formal parameters
418parameterDeclarationList
419 : ( parameterDeclaration ( COMMA! parameterDeclaration )* )?
420 {#parameterDeclarationList = #(#[PARAMETERS,"PARAMETERS"],
421 #parameterDeclarationList);}
422 ;
423
424// A formal parameter.
425parameterDeclaration!
426 : pm:parameterModifier t:typeSpec[false] id:IDENT
427 pd:parameterDeclaratorBrackets[#t]
428 {#parameterDeclaration = #(#[PARAMETER_DEF,"PARAMETER_DEF"],
429 pm, #([TYPE,"TYPE"],pd), id);}
430 ;
431
432parameterDeclaratorBrackets[AST t]
433 : {#parameterDeclaratorBrackets = t;}
434 (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
435 ;
436
437parameterModifier
438 : (f:"final")?
439 {#parameterModifier = #(#[MODIFIERS,"MODIFIERS"], f);}
440 ;
441
442// Compound statement. This is used in many contexts:
443// Inside a class definition prefixed with "static":
444// it is a class initializer
445// Inside a class definition without "static":
446// it is an instance initializer
447// As the body of a method
448// As a completely indepdent braced block of code inside a method
449// it starts a new scope for variable definitions
450
451compoundStatement
452 : lc:LCURLY^ {#lc.setType(SLIST);}
453 // include the (possibly-empty) list of statements
454 (statement)*
455 RCURLY!
456 ;
457
458
459statement
460 // A list of statements in curly braces -- start a new scope!
461 : compoundStatement
462
463 // class definition
464 | classDefinition[#[MODIFIERS, "MODIFIERS"]]
465
466 // final class definition
467 | "final"! classDefinition[#(#[MODIFIERS, "MODIFIERS"],#[FINAL,"final"])]
468
469 // abstract class definition
470 | "abstract"! classDefinition[#(#[MODIFIERS, "MODIFIERS"],#[ABSTRACT,"abstract"])]
471
472 // declarations are ambiguous with "ID DOT" relative to expression
473 // statements. Must backtrack to be sure. Could use a semantic
474 // predicate to test symbol table to see what the type was coming
475 // up, but that's pretty hard without a symbol table ;)
476 | (declaration)=> declaration SEMI!
477
478 // An expression statement. This could be a method call,
479 // assignment statement, or any other expression evaluated for
480 // side-effects.
481 | expression SEMI!
482
483 // Attach a label to the front of a statement
484 | IDENT c:COLON^ {#c.setType(LABELED_STAT);} statement
485
486 // If-else statement
487 | "if"^ LPAREN! expression RPAREN! statement
488 (
489 // CONFLICT: the old "dangling-else" problem...
490 // ANTLR generates proper code matching
491 // as soon as possible. Hush warning.
492 options {
493 warnWhenFollowAmbig = false;
494 }
495 :
496 "else"! statement
497 )?
498
499 // For statement
500 | "for"^
501 LPAREN!
502 forInit SEMI! // initializer
503 forCond SEMI! // condition test
504 forIter // updater
505 RPAREN!
506 statement // statement to loop over
507
508 // While statement
509 | "while"^ LPAREN! expression RPAREN! statement
510
511 // do-while statement
512 | "do"^ statement "while"! LPAREN! expression RPAREN! SEMI!
513
514 // get out of a loop (or switch)
515 | "break"^ (IDENT)? SEMI!
516
517 // do next iteration of a loop
518 | "continue"^ (IDENT)? SEMI!
519
520 // Return an expression
521 | "return"^ (expression)? SEMI!
522
523 // switch/case statement
524 | "switch"^ LPAREN! expression RPAREN! LCURLY!
525 ( casesGroup )*
526 RCURLY!
527
528 // exception try-catch block
529 | tryBlock
530
531 // throw an exception
532 | "throw"^ expression SEMI!
533
534 // synchronize a statement
535 | "synchronized"^ LPAREN! expression RPAREN! compoundStatement
536
537 // empty statement
538 | s:SEMI {#s.setType(EMPTY_STAT);}
539 ;
540
541
542casesGroup
543 : ( // CONFLICT: to which case group do the statements bind?
544 // ANTLR generates proper code: it groups the
545 // many "case"/"default" labels together then
546 // follows them with the statements
547 options {
548 warnWhenFollowAmbig = false;
549 }
550 :
551 aCase
552 )+
553 caseSList
554 {#casesGroup = #([CASE_GROUP, "CASE_GROUP"], #casesGroup);}
555 ;
556
557aCase
558 : ("case"^ expression | "default") COLON!
559 ;
560
561caseSList
562 : (statement)*
563 {#caseSList = #(#[SLIST,"SLIST"],#caseSList);}
564 ;
565
566// The initializer for a for loop
567forInit
568 // if it looks like a declaration, it is
569 : ( (declaration)=> declaration
570 // otherwise it could be an expression list...
571 | expressionList
572 )?
573 {#forInit = #(#[FOR_INIT,"FOR_INIT"],#forInit);}
574 ;
575
576forCond
577 : (expression)?
578 {#forCond = #(#[FOR_CONDITION,"FOR_CONDITION"],#forCond);}
579 ;
580
581forIter
582 : (expressionList)?
583 {#forIter = #(#[FOR_ITERATOR,"FOR_ITERATOR"],#forIter);}
584 ;
585
586// an exception handler try/catch block
587tryBlock
588 : "try"^ compoundStatement
589 (handler)*
590 ( "finally"^ compoundStatement )?
591 ;
592
593
594// an exception handler
595handler
596 : "catch"^ LPAREN! parameterDeclaration RPAREN! compoundStatement
597 ;
598
599
600// expressions
601// Note that most of these expressions follow the pattern
602// thisLevelExpression :
603// nextHigherPrecedenceExpression
604// (OPERATOR nextHigherPrecedenceExpression)*
605// which is a standard recursive definition for a parsing an expression.
606// The operators in java have the following precedences:
607// lowest (13) = *= /= %= += -= <<= >>= >>>= &= ^= |=
608// (12) ?:
609// (11) ||
610// (10) &&
611// ( 9) |
612// ( 8) ^
613// ( 7) &
614// ( 6) == !=
615// ( 5) < <= > >=
616// ( 4) << >>
617// ( 3) +(binary) -(binary)
618// ( 2) * / %
619// ( 1) ++ -- +(unary) -(unary) ~ ! (type)
620// [] () (method call) . (dot -- identifier qualification)
621// new () (explicit parenthesis)
622//
623// the last two are not usually on a precedence chart; I put them in
624// to point out that new has a higher precedence than '.', so you
625// can validy use
626// new Frame().show()
627//
628// Note that the above precedence levels map to the rules below...
629// Once you have a precedence chart, writing the appropriate rules as below
630// is usually very straightfoward
631
632
633
634// the mother of all expressions
635expression
636 : assignmentExpression
637 {#expression = #(#[EXPR,"EXPR"],#expression);}
638 ;
639
640
641// This is a list of expressions.
642expressionList
643 : expression (COMMA! expression)*
644 {#expressionList = #(#[ELIST,"ELIST"], expressionList);}
645 ;
646
647
648// assignment expression (level 13)
649assignmentExpression
650 : conditionalExpression
651 ( ( ASSIGN^
652 | PLUS_ASSIGN^
653 | MINUS_ASSIGN^
654 | STAR_ASSIGN^
655 | DIV_ASSIGN^
656 | MOD_ASSIGN^
657 | SR_ASSIGN^
658 | BSR_ASSIGN^
659 | SL_ASSIGN^
660 | BAND_ASSIGN^
661 | BXOR_ASSIGN^
662 | BOR_ASSIGN^
663 )
664 assignmentExpression
665 )?
666 ;
667
668
669// conditional test (level 12)
670conditionalExpression
671 : logicalOrExpression
672 ( QUESTION^ assignmentExpression COLON! conditionalExpression )?
673 ;
674
675
676// logical or (||) (level 11)
677logicalOrExpression
678 : logicalAndExpression (LOR^ logicalAndExpression)*
679 ;
680
681
682// logical and (&&) (level 10)
683logicalAndExpression
684 : inclusiveOrExpression (LAND^ inclusiveOrExpression)*
685 ;
686
687
688// bitwise or non-short-circuiting or (|) (level 9)
689inclusiveOrExpression
690 : exclusiveOrExpression (BOR^ exclusiveOrExpression)*
691 ;
692
693
694// exclusive or (^) (level 8)
695exclusiveOrExpression
696 : andExpression (BXOR^ andExpression)*
697 ;
698
699
700// bitwise or non-short-circuiting and (&) (level 7)
701andExpression
702 : equalityExpression (BAND^ equalityExpression)*
703 ;
704
705
706// equality/inequality (==/!=) (level 6)
707equalityExpression
708 : relationalExpression ((NOT_EQUAL^ | EQUAL^) relationalExpression)*
709 ;
710
711
712// boolean relational expressions (level 5)
713relationalExpression
714 : shiftExpression
715 ( ( ( LT^
716 | GT^
717 | LE^
718 | GE^
719 )
720 shiftExpression
721 )*
722 | "instanceof"^ typeSpec[true]
723 )
724 ;
725
726
727// bit shift expressions (level 4)
728shiftExpression
729 : additiveExpression ((SL^ | SR^ | BSR^) additiveExpression)*
730 ;
731
732
733// binary addition/subtraction (level 3)
734additiveExpression
735 : multiplicativeExpression ((PLUS^ | MINUS^) multiplicativeExpression)*
736 ;
737
738
739// multiplication/division/modulo (level 2)
740multiplicativeExpression
741 : unaryExpression ((STAR^ | DIV^ | MOD^ ) unaryExpression)*
742 ;
743
744unaryExpression
745 : INC^ unaryExpression
746 | DEC^ unaryExpression
747 | MINUS^ {#MINUS.setType(UNARY_MINUS);} unaryExpression
748 | PLUS^ {#PLUS.setType(UNARY_PLUS);} unaryExpression
749 | unaryExpressionNotPlusMinus
750 ;
751
752unaryExpressionNotPlusMinus
753 : BNOT^ unaryExpression
754 | LNOT^ unaryExpression
755
756 | ( // subrule allows option to shut off warnings
757 options {
758 // "(int" ambig with postfixExpr due to lack of sequence
759 // info in linear approximate LL(k). It's ok. Shut up.
760 generateAmbigWarnings=false;
761 }
762 : // If typecast is built in type, must be numeric operand
763 // Also, no reason to backtrack if type keyword like int, float...
764 lpb:LPAREN^ {#lpb.setType(TYPECAST);} builtInTypeSpec[true] RPAREN!
765 unaryExpression
766
767 // Have to backtrack to see if operator follows. If no operator
768 // follows, it's a typecast. No semantic checking needed to parse.
769 // if it _looks_ like a cast, it _is_ a cast; else it's a "(expr)"
770 | (LPAREN classTypeSpec[true] RPAREN unaryExpressionNotPlusMinus)=>
771 lp:LPAREN^ {#lp.setType(TYPECAST);} classTypeSpec[true] RPAREN!
772 unaryExpressionNotPlusMinus
773
774 | postfixExpression
775 )
776 ;
777
778// qualified names, array expressions, method invocation, post inc/dec
779postfixExpression
780 : primaryExpression // start with a primary
781
782 ( // qualified id (id.id.id.id...) -- build the name
783 DOT^ ( IDENT
784 | "this"
785 | "class"
786 | newExpression
787 | "super" LPAREN ( expressionList )? RPAREN
788 )
789 // the above line needs a semantic check to make sure "class"
790 // is the _last_ qualifier.
791
792 // allow ClassName[].class
793 | ( lbc:LBRACK^ {#lbc.setType(ARRAY_DECLARATOR);} RBRACK! )+
794 DOT^ "class"
795
796 // an array indexing operation
797 | lb:LBRACK^ {#lb.setType(INDEX_OP);} expression RBRACK!
798
799 // method invocation
800 // The next line is not strictly proper; it allows x(3)(4) or
801 // x[2](4) which are not valid in Java. If this grammar were used
802 // to validate a Java program a semantic check would be needed, or
803 // this rule would get really ugly...
804 | lp:LPAREN^ {#lp.setType(METHOD_CALL);}
805 argList
806 RPAREN!
807 )*
808
809 // possibly add on a post-increment or post-decrement.
810 // allows INC/DEC on too much, but semantics can check
811 ( in:INC^ {#in.setType(POST_INC);}
812 | de:DEC^ {#de.setType(POST_DEC);}
813 | // nothing
814 )
815
816 // look for int.class and int[].class
817 | builtInType
818 ( lbt:LBRACK^ {#lbt.setType(ARRAY_DECLARATOR);} RBRACK! )*
819 DOT^ "class"
820 ;
821
822// the basic element of an expression
823primaryExpression
824 : IDENT
825 | newExpression
826 | constant
827 | "super"
828 | "true"
829 | "false"
830 | "this"
831 | "null"
832 | LPAREN! assignmentExpression RPAREN!
833 ;
834
835/** object instantiation.
836 * Trees are built as illustrated by the following input/tree pairs:
837 *
838 * new T()
839 *
840 * new
841 * |
842 * T -- ELIST
843 * |
844 * arg1 -- arg2 -- .. -- argn
845 *
846 * new int[]
847 *
848 * new
849 * |
850 * int -- ARRAY_DECLARATOR
851 *
852 * new int[] {1,2}
853 *
854 * new
855 * |
856 * int -- ARRAY_DECLARATOR -- ARRAY_INIT
857 * |
858 * EXPR -- EXPR
859 * | |
860 * 1 2
861 *
862 * new int[3]
863 * new
864 * |
865 * int -- ARRAY_DECLARATOR
866 * |
867 * EXPR
868 * |
869 * 3
870 *
871 * new int[1][2]
872 *
873 * new
874 * |
875 * int -- ARRAY_DECLARATOR
876 * |
877 * ARRAY_DECLARATOR -- EXPR
878 * | |
879 * EXPR 1
880 * |
881 * 2
882 *
883 */
884newExpression
885 : "new"^ type
886 ( LPAREN! argList RPAREN! (classBlock)?
887
888 //java 1.1
889 // Note: This will allow bad constructs like
890 // new int[4][][3] {exp,exp}.
891 // There needs to be a semantic check here...
892 // to make sure:
893 // a) [ expr ] and [ ] are not mixed
894 // b) [ expr ] and an init are not used together
895
896 | newArrayDeclarator (arrayInitializer)?
897 )
898 ;
899
900argList
901 : ( expressionList
902 | /*nothing*/
903 {#argList = #[ELIST,"ELIST"];}
904 )
905 ;
906
907newArrayDeclarator
908 : (
909 // CONFLICT:
910 // newExpression is a primaryExpression which can be
911 // followed by an array index reference. This is ok,
912 // as the generated code will stay in this loop as
913 // long as it sees an LBRACK (proper behavior)
914 options {
915 warnWhenFollowAmbig = false;
916 }
917 :
918 lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);}
919 (expression)?
920 RBRACK!
921 )+
922 ;
923
924constant
925 : NUM_INT
926 | CHAR_LITERAL
927 | STRING_LITERAL
928 | NUM_FLOAT
929 ;
930
931
932//----------------------------------------------------------------------------
933// The Java scanner
934//----------------------------------------------------------------------------
935class JavaLexer extends Lexer;
936
937options {
938 exportVocab=Java; // call the vocabulary "Java"
939 testLiterals=false; // don't automatically test for literals
940 k=4; // four characters of lookahead
941}
942
943
944
945// OPERATORS
946QUESTION : '?' ;
947LPAREN : '(' ;
948RPAREN : ')' ;
949LBRACK : '[' ;
950RBRACK : ']' ;
951LCURLY : '{' ;
952RCURLY : '}' ;
953COLON : ':' ;
954COMMA : ',' ;
955//DOT : '.' ;
956ASSIGN : '=' ;
957EQUAL : "==" ;
958LNOT : '!' ;
959BNOT : '~' ;
960NOT_EQUAL : "!=" ;
961DIV : '/' ;
962DIV_ASSIGN : "/=" ;
963PLUS : '+' ;
964PLUS_ASSIGN : "+=" ;
965INC : "++" ;
966MINUS : '-' ;
967MINUS_ASSIGN : "-=" ;
968DEC : "--" ;
969STAR : '*' ;
970STAR_ASSIGN : "*=" ;
971MOD : '%' ;
972MOD_ASSIGN : "%=" ;
973SR : ">>" ;
974SR_ASSIGN : ">>=" ;
975BSR : ">>>" ;
976BSR_ASSIGN : ">>>=" ;
977GE : ">=" ;
978GT : ">" ;
979SL : "<<" ;
980SL_ASSIGN : "<<=" ;
981LE : "<=" ;
982LT : '<' ;
983BXOR : '^' ;
984BXOR_ASSIGN : "^=" ;
985BOR : '|' ;
986BOR_ASSIGN : "|=" ;
987LOR : "||" ;
988BAND : '&' ;
989BAND_ASSIGN : "&=" ;
990LAND : "&&" ;
991SEMI : ';' ;
992
993
994// Whitespace -- ignored
995WS : ( ' '
996 | '\t'
997 | '\f'
998 // handle newlines
999 | ( "\r\n" // Evil DOS
1000 | '\r' // Macintosh
1001 | '\n' // Unix (the right way)
1002 )
1003 { newline(); }
1004 )
1005 { _ttype = Token.SKIP; }
1006 ;
1007
1008// Single-line comments
1009SL_COMMENT
1010 : "//"
1011 (~('\n'|'\r'))* ('\n'|'\r'('\n')?)
1012 {$setType(Token.SKIP); newline();}
1013 ;
1014
1015// multiple-line comments
1016ML_COMMENT
1017 : "/*"
1018 ( /* '\r' '\n' can be matched in one alternative or by matching
1019 '\r' in one iteration and '\n' in another. I am trying to
1020 handle any flavor of newline that comes in, but the language
1021 that allows both "\r\n" and "\r" and "\n" to all be valid
1022 newline is ambiguous. Consequently, the resulting grammar
1023 must be ambiguous. I'm shutting this warning off.
1024 */
1025 options {
1026 generateAmbigWarnings=false;
1027 }
1028 :
1029 { LA(2)!='/' }? '*'
1030 | '\r' '\n' {newline();}
1031 | '\r' {newline();}
1032 | '\n' {newline();}
1033 | ~('*'|'\n'|'\r')
1034 )*
1035 "*/"
1036 {$setType(Token.SKIP);}
1037 ;
1038
1039
1040// character literals
1041CHAR_LITERAL
1042 : '\'' ( ESC | ~'\'' ) '\''
1043 ;
1044
1045// string literals
1046STRING_LITERAL
1047 : '"' (ESC|~('"'|'\\'))* '"'
1048 ;
1049
1050
1051// escape sequence -- note that this is protected; it can only be called
1052// from another lexer rule -- it will not ever directly return a token to
1053// the parser
1054// There are various ambiguities hushed in this rule. The optional
1055// '0'...'9' digit matches should be matched here rather than letting
1056// them go back to STRING_LITERAL to be matched. ANTLR does the
1057// right thing by matching immediately; hence, it's ok to shut off
1058// the FOLLOW ambig warnings.
1059protected
1060ESC
1061 : '\\'
1062 ( 'n'
1063 | 'r'
1064 | 't'
1065 | 'b'
1066 | 'f'
1067 | '"'
1068 | '\''
1069 | '\\'
1070 | ('u')+ HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
1071 | ('0'..'3')
1072 (
1073 options {
1074 warnWhenFollowAmbig = false;
1075 }
1076 : ('0'..'7')
1077 (
1078 options {
1079 warnWhenFollowAmbig = false;
1080 }
1081 : '0'..'7'
1082 )?
1083 )?
1084 | ('4'..'7')
1085 (
1086 options {
1087 warnWhenFollowAmbig = false;
1088 }
1089 : ('0'..'9')
1090 )?
1091 )
1092 ;
1093
1094
1095// hexadecimal digit (again, note it's protected!)
1096protected
1097HEX_DIGIT
1098 : ('0'..'9'|'A'..'F'|'a'..'f')
1099 ;
1100
1101
1102// a dummy rule to force vocabulary to be all characters (except special
1103// ones that ANTLR uses internally (0 to 2)
1104protected
1105VOCAB
1106 : '\3'..'\377'
1107 ;
1108
1109
1110// an identifier. Note that testLiterals is set to true! This means
1111// that after we match the rule, we look in the literals table to see
1112// if it's a literal or really an identifer
1113IDENT
1114 options {testLiterals=true;}
1115 : ('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'$')*
1116 ;
1117
1118
1119// a numeric literal
1120NUM_INT
1121 {boolean isDecimal=false;}
1122 : '.' {_ttype = DOT;}
1123 (('0'..'9')+ (EXPONENT)? (FLOAT_SUFFIX)? { _ttype = NUM_FLOAT; })?
1124 | ( '0' {isDecimal = true;} // special case for just '0'
1125 ( ('x'|'X')
1126 ( // hex
1127 // the 'e'|'E' and float suffix stuff look
1128 // like hex digits, hence the (...)+ doesn't
1129 // know when to stop: ambig. ANTLR resolves
1130 // it correctly by matching immediately. It
1131 // is therefor ok to hush warning.
1132 options {
1133 warnWhenFollowAmbig=false;
1134 }
1135 : HEX_DIGIT
1136 )+
1137 | ('0'..'7')+ // octal
1138 )?
1139 | ('1'..'9') ('0'..'9')* {isDecimal=true;} // non-zero decimal
1140 )
1141 ( ('l'|'L')
1142
1143 // only check to see if it's a float if looks like decimal so far
1144 | {isDecimal}?
1145 ( '.' ('0'..'9')* (EXPONENT)? (FLOAT_SUFFIX)?
1146 | EXPONENT (FLOAT_SUFFIX)?
1147 | FLOAT_SUFFIX
1148 )
1149 { _ttype = NUM_FLOAT; }
1150 )?
1151 ;
1152
1153
1154// a couple protected methods to assist in matching floating point numbers
1155protected
1156EXPONENT
1157 : ('e'|'E') ('+'|'-')? ('0'..'9')+
1158 ;
1159
1160
1161protected
1162FLOAT_SUFFIX
1163 : 'f'|'F'|'d'|'D'
1164 ;
1165
Note: See TracBrowser for help on using the repository browser.