1 | /** Java 1.2 Recognizer
|
---|
2 | *
|
---|
3 | * Run 'java Main <directory full of java files>'
|
---|
4 | *
|
---|
5 | * Contributing authors:
|
---|
6 | * John Mitchell [email protected]
|
---|
7 | * Terence Parr [email protected]
|
---|
8 | * John Lilley [email protected]
|
---|
9 | * Scott Stanchfield [email protected]
|
---|
10 | * Markus Mohnen [email protected]
|
---|
11 | * Peter Williams [email protected]
|
---|
12 | *
|
---|
13 | * Version 1.00 December 9, 1997 -- initial release
|
---|
14 | * Version 1.01 December 10, 1997
|
---|
15 | * fixed bug in octal def (0..7 not 0..8)
|
---|
16 | * Version 1.10 August 1998 (parrt)
|
---|
17 | * added tree construction
|
---|
18 | * fixed definition of WS,comments for mac,pc,unix newlines
|
---|
19 | * added unary plus
|
---|
20 | * Version 1.11 (Nov 20, 1998)
|
---|
21 | * Added "shutup" option to turn off last ambig warning.
|
---|
22 | * Fixed inner class def to allow named class defs as statements
|
---|
23 | * synchronized requires compound not simple statement
|
---|
24 | * add [] after builtInType DOT class in primaryExpression
|
---|
25 | * "const" is reserved but not valid..removed from modifiers
|
---|
26 | * Version 1.12 (Feb 2, 1999)
|
---|
27 | * Changed LITERAL_xxx to xxx in tree grammar.
|
---|
28 | * Updated java.g to use tokens {...} now for 2.6.0 (new feature).
|
---|
29 | *
|
---|
30 | * Version 1.13 (Apr 23, 1999)
|
---|
31 | * Didn't have (stat)? for else clause in tree parser.
|
---|
32 | * Didn't gen ASTs for interface extends. Updated tree parser too.
|
---|
33 | * Updated to 2.6.0.
|
---|
34 | * Version 1.14 (Jun 20, 1999)
|
---|
35 | * Allowed final/abstract on local classes.
|
---|
36 | * Removed local interfaces from methods
|
---|
37 | * Put instanceof precedence where it belongs...in relationalExpr
|
---|
38 | * It also had expr not type as arg; fixed it.
|
---|
39 | * Missing ! on SEMI in classBlock
|
---|
40 | * fixed: (expr) + "string" was parsed incorrectly (+ as unary plus).
|
---|
41 | * fixed: didn't like Object[].class in parser or tree parser
|
---|
42 | * Version 1.15 (Jun 26, 1999)
|
---|
43 | * Screwed up rule with instanceof in it. :( Fixed.
|
---|
44 | * Tree parser didn't like (expr).something; fixed.
|
---|
45 | * Allowed multiple inheritance in tree grammar. oops.
|
---|
46 | * Version 1.16 (August 22, 1999)
|
---|
47 | * Extending an interface built a wacky tree: had extra EXTENDS.
|
---|
48 | * Tree grammar didn't allow multiple superinterfaces.
|
---|
49 | * Tree grammar didn't allow empty var initializer: {}
|
---|
50 | * Version 1.17 (October 12, 1999)
|
---|
51 | * ESC lexer rule allowed 399 max not 377 max.
|
---|
52 | * java.tree.g didn't handle the expression of synchronized
|
---|
53 | * statements.
|
---|
54 | *
|
---|
55 | * Version tracking now done with following ID:
|
---|
56 | *
|
---|
57 | *
|
---|
58 | * BUG:
|
---|
59 | * Doesn't like boolean.class!
|
---|
60 | *
|
---|
61 | * class Test {
|
---|
62 | * public static void main( String args[] ) {
|
---|
63 | * if (boolean.class.equals(boolean.class)) {
|
---|
64 | * System.out.println("works");
|
---|
65 | * }
|
---|
66 | * }
|
---|
67 | * }
|
---|
68 | *
|
---|
69 | * This grammar is in the PUBLIC DOMAIN
|
---|
70 | */
|
---|
71 | class JavaRecognizer extends Parser;
|
---|
72 | options {
|
---|
73 | k = 2; // two token lookahead
|
---|
74 | exportVocab=Java; // Call its vocabulary "Java"
|
---|
75 | codeGenMakeSwitchThreshold = 2; // Some optimizations
|
---|
76 | codeGenBitsetTestThreshold = 3;
|
---|
77 | defaultErrorHandler = false; // Don't generate parser error handlers
|
---|
78 | buildAST = true;
|
---|
79 | }
|
---|
80 |
|
---|
81 | tokens {
|
---|
82 | BLOCK; MODIFIERS; OBJBLOCK; SLIST; CTOR_DEF; METHOD_DEF; VARIABLE_DEF;
|
---|
83 | INSTANCE_INIT; STATIC_INIT; TYPE; CLASS_DEF; INTERFACE_DEF;
|
---|
84 | PACKAGE_DEF; ARRAY_DECLARATOR; EXTENDS_CLAUSE; IMPLEMENTS_CLAUSE;
|
---|
85 | PARAMETERS; PARAMETER_DEF; LABELED_STAT; TYPECAST; INDEX_OP;
|
---|
86 | POST_INC; POST_DEC; METHOD_CALL; EXPR; ARRAY_INIT;
|
---|
87 | IMPORT; UNARY_MINUS; UNARY_PLUS; CASE_GROUP; ELIST; FOR_INIT; FOR_CONDITION;
|
---|
88 | FOR_ITERATOR; EMPTY_STAT; FINAL="final"; ABSTRACT="abstract";
|
---|
89 | }
|
---|
90 |
|
---|
91 | // Compilation Unit: In Java, this is a single file. This is the start
|
---|
92 | // rule for this parser
|
---|
93 | compilationUnit
|
---|
94 | : // A compilation unit starts with an optional package definition
|
---|
95 | ( packageDefinition
|
---|
96 | | /* nothing */
|
---|
97 | )
|
---|
98 |
|
---|
99 | // Next we have a series of zero or more import statements
|
---|
100 | ( importDefinition )*
|
---|
101 |
|
---|
102 | // Wrapping things up with any number of class or interface
|
---|
103 | // definitions
|
---|
104 | ( typeDefinition )*
|
---|
105 |
|
---|
106 | EOF!
|
---|
107 | ;
|
---|
108 |
|
---|
109 |
|
---|
110 | // Package statement: "package" followed by an identifier.
|
---|
111 | packageDefinition
|
---|
112 | options {defaultErrorHandler = true;} // let ANTLR handle errors
|
---|
113 | : p:"package"^ {#p.setType(PACKAGE_DEF);} identifier SEMI!
|
---|
114 | ;
|
---|
115 |
|
---|
116 |
|
---|
117 | // Import statement: import followed by a package or class name
|
---|
118 | importDefinition
|
---|
119 | options {defaultErrorHandler = true;}
|
---|
120 | : i:"import"^ {#i.setType(IMPORT);} identifierStar SEMI!
|
---|
121 | ;
|
---|
122 |
|
---|
123 | // A type definition in a file is either a class or interface definition.
|
---|
124 | typeDefinition
|
---|
125 | options {defaultErrorHandler = true;}
|
---|
126 | : m:modifiers!
|
---|
127 | ( classDefinition[#m]
|
---|
128 | | interfaceDefinition[#m]
|
---|
129 | )
|
---|
130 | | SEMI!
|
---|
131 | ;
|
---|
132 |
|
---|
133 | /** A declaration is the creation of a reference or primitive-type variable
|
---|
134 | * Create a separate Type/Var tree for each var in the var list.
|
---|
135 | */
|
---|
136 | declaration!
|
---|
137 | : m:modifiers t:typeSpec[false] v:variableDefinitions[#m,#t]
|
---|
138 | {#declaration = #v;}
|
---|
139 | ;
|
---|
140 |
|
---|
141 | // A list of zero or more modifiers. We could have used (modifier)* in
|
---|
142 | // place of a call to modifiers, but I thought it was a good idea to keep
|
---|
143 | // this rule separate so they can easily be collected in a Vector if
|
---|
144 | // someone so desires
|
---|
145 | modifiers
|
---|
146 | : ( modifier )*
|
---|
147 | {#modifiers = #([MODIFIERS, "MODIFIERS"], #modifiers);}
|
---|
148 | ;
|
---|
149 |
|
---|
150 |
|
---|
151 | // A type specification is a type name with possible brackets afterwards
|
---|
152 | // (which would make it an array type).
|
---|
153 | typeSpec[boolean addImagNode]
|
---|
154 | : classTypeSpec[addImagNode]
|
---|
155 | | builtInTypeSpec[addImagNode]
|
---|
156 | ;
|
---|
157 |
|
---|
158 | // A class type specification is a class type with possible brackets afterwards
|
---|
159 | // (which would make it an array type).
|
---|
160 | classTypeSpec[boolean addImagNode]
|
---|
161 | : identifier (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
|
---|
162 | {
|
---|
163 | if ( addImagNode ) {
|
---|
164 | #classTypeSpec = #(#[TYPE,"TYPE"], #classTypeSpec);
|
---|
165 | }
|
---|
166 | }
|
---|
167 | ;
|
---|
168 |
|
---|
169 | // A builtin type specification is a builtin type with possible brackets
|
---|
170 | // afterwards (which would make it an array type).
|
---|
171 | builtInTypeSpec[boolean addImagNode]
|
---|
172 | : builtInType (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
|
---|
173 | {
|
---|
174 | if ( addImagNode ) {
|
---|
175 | #builtInTypeSpec = #(#[TYPE,"TYPE"], #builtInTypeSpec);
|
---|
176 | }
|
---|
177 | }
|
---|
178 | ;
|
---|
179 |
|
---|
180 | // A type name. which is either a (possibly qualified) class name or
|
---|
181 | // a primitive (builtin) type
|
---|
182 | type
|
---|
183 | : identifier
|
---|
184 | | builtInType
|
---|
185 | ;
|
---|
186 |
|
---|
187 | // The primitive types.
|
---|
188 | builtInType
|
---|
189 | : "void"
|
---|
190 | | "boolean"
|
---|
191 | | "byte"
|
---|
192 | | "char"
|
---|
193 | | "short"
|
---|
194 | | "int"
|
---|
195 | | "float"
|
---|
196 | | "long"
|
---|
197 | | "double"
|
---|
198 | ;
|
---|
199 |
|
---|
200 | // A (possibly-qualified) java identifier. We start with the first IDENT
|
---|
201 | // and expand its name by adding dots and following IDENTS
|
---|
202 | identifier
|
---|
203 | : IDENT ( DOT^ IDENT )*
|
---|
204 | ;
|
---|
205 |
|
---|
206 | identifierStar
|
---|
207 | : IDENT
|
---|
208 | ( DOT^ IDENT )*
|
---|
209 | ( DOT^ STAR )?
|
---|
210 | ;
|
---|
211 |
|
---|
212 |
|
---|
213 | // modifiers for Java classes, interfaces, class/instance vars and methods
|
---|
214 | modifier
|
---|
215 | : "private"
|
---|
216 | | "public"
|
---|
217 | | "protected"
|
---|
218 | | "static"
|
---|
219 | | "transient"
|
---|
220 | | "final"
|
---|
221 | | "abstract"
|
---|
222 | | "native"
|
---|
223 | | "threadsafe"
|
---|
224 | | "synchronized"
|
---|
225 | // | "const" // reserved word; leave out
|
---|
226 | | "volatile"
|
---|
227 | ;
|
---|
228 |
|
---|
229 |
|
---|
230 | // Definition of a Java class
|
---|
231 | classDefinition![AST modifiers]
|
---|
232 | : "class" IDENT
|
---|
233 | // it _might_ have a superclass...
|
---|
234 | sc:superClassClause
|
---|
235 | // it might implement some interfaces...
|
---|
236 | ic:implementsClause
|
---|
237 | // now parse the body of the class
|
---|
238 | cb:classBlock
|
---|
239 | {#classDefinition = #(#[CLASS_DEF,"CLASS_DEF"],
|
---|
240 | modifiers,IDENT,sc,ic,cb);}
|
---|
241 | ;
|
---|
242 |
|
---|
243 | superClassClause!
|
---|
244 | : ( "extends" id:identifier )?
|
---|
245 | {#superClassClause = #(#[EXTENDS_CLAUSE,"EXTENDS_CLAUSE"],id);}
|
---|
246 | ;
|
---|
247 |
|
---|
248 | // Definition of a Java Interface
|
---|
249 | interfaceDefinition![AST modifiers]
|
---|
250 | : "interface" IDENT
|
---|
251 | // it might extend some other interfaces
|
---|
252 | ie:interfaceExtends
|
---|
253 | // now parse the body of the interface (looks like a class...)
|
---|
254 | cb:classBlock
|
---|
255 | {#interfaceDefinition = #(#[INTERFACE_DEF,"INTERFACE_DEF"],
|
---|
256 | modifiers,IDENT,ie,cb);}
|
---|
257 | ;
|
---|
258 |
|
---|
259 |
|
---|
260 | // This is the body of a class. You can have fields and extra semicolons,
|
---|
261 | // That's about it (until you see what a field is...)
|
---|
262 | classBlock
|
---|
263 | : LCURLY!
|
---|
264 | ( field | SEMI! )*
|
---|
265 | RCURLY!
|
---|
266 | {#classBlock = #([OBJBLOCK, "OBJBLOCK"], #classBlock);}
|
---|
267 | ;
|
---|
268 |
|
---|
269 | // An interface can extend several other interfaces...
|
---|
270 | interfaceExtends
|
---|
271 | : (
|
---|
272 | e:"extends"!
|
---|
273 | identifier ( COMMA! identifier )*
|
---|
274 | )?
|
---|
275 | {#interfaceExtends = #(#[EXTENDS_CLAUSE,"EXTENDS_CLAUSE"],
|
---|
276 | #interfaceExtends);}
|
---|
277 | ;
|
---|
278 |
|
---|
279 | // A class can implement several interfaces...
|
---|
280 | implementsClause
|
---|
281 | : (
|
---|
282 | i:"implements"! identifier ( COMMA! identifier )*
|
---|
283 | )?
|
---|
284 | {#implementsClause = #(#[IMPLEMENTS_CLAUSE,"IMPLEMENTS_CLAUSE"],
|
---|
285 | #implementsClause);}
|
---|
286 | ;
|
---|
287 |
|
---|
288 | // Now the various things that can be defined inside a class or interface...
|
---|
289 | // Note that not all of these are really valid in an interface (constructors,
|
---|
290 | // for example), and if this grammar were used for a compiler there would
|
---|
291 | // need to be some semantic checks to make sure we're doing the right thing...
|
---|
292 | field!
|
---|
293 | : // method, constructor, or variable declaration
|
---|
294 | mods:modifiers
|
---|
295 | ( h:ctorHead s:compoundStatement // constructor
|
---|
296 | {#field = #(#[CTOR_DEF,"CTOR_DEF"], mods, h, s);}
|
---|
297 |
|
---|
298 | | cd:classDefinition[#mods] // inner class
|
---|
299 | {#field = #cd;}
|
---|
300 |
|
---|
301 | | id:interfaceDefinition[#mods] // inner interface
|
---|
302 | {#field = #id;}
|
---|
303 |
|
---|
304 | | t:typeSpec[false] // method or variable declaration(s)
|
---|
305 | ( IDENT // the name of the method
|
---|
306 |
|
---|
307 | // parse the formal parameter declarations.
|
---|
308 | LPAREN! param:parameterDeclarationList RPAREN!
|
---|
309 |
|
---|
310 | rt:returnTypeBrackersOnEndOfMethodHead[#t]
|
---|
311 |
|
---|
312 | // get the list of exceptions that this method is declared to throw
|
---|
313 | (tc:throwsClause)?
|
---|
314 |
|
---|
315 | ( s2:compoundStatement | SEMI )
|
---|
316 | {#field = #(#[METHOD_DEF,"METHOD_DEF"],
|
---|
317 | mods,
|
---|
318 | #(#[TYPE,"TYPE"],rt),
|
---|
319 | IDENT,
|
---|
320 | param,
|
---|
321 | tc,
|
---|
322 | s2);}
|
---|
323 | | v:variableDefinitions[#mods,#t] SEMI
|
---|
324 | // {#field = #(#[VARIABLE_DEF,"VARIABLE_DEF"], v);}
|
---|
325 | {#field = #v;}
|
---|
326 | )
|
---|
327 | )
|
---|
328 |
|
---|
329 | // "static { ... }" class initializer
|
---|
330 | | "static" s3:compoundStatement
|
---|
331 | {#field = #(#[STATIC_INIT,"STATIC_INIT"], s3);}
|
---|
332 |
|
---|
333 | // "{ ... }" instance initializer
|
---|
334 | | s4:compoundStatement
|
---|
335 | {#field = #(#[INSTANCE_INIT,"INSTANCE_INIT"], s4);}
|
---|
336 | ;
|
---|
337 |
|
---|
338 | variableDefinitions[AST mods, AST t]
|
---|
339 | : variableDeclarator[getASTFactory().dupTree(mods),
|
---|
340 | getASTFactory().dupTree(t)]
|
---|
341 | ( COMMA!
|
---|
342 | variableDeclarator[getASTFactory().dupTree(mods),
|
---|
343 | getASTFactory().dupTree(t)]
|
---|
344 | )*
|
---|
345 | ;
|
---|
346 |
|
---|
347 | /** Declaration of a variable. This can be a class/instance variable,
|
---|
348 | * or a local variable in a method
|
---|
349 | * It can also include possible initialization.
|
---|
350 | */
|
---|
351 | variableDeclarator![AST mods, AST t]
|
---|
352 | : id:IDENT d:declaratorBrackets[t] v:varInitializer
|
---|
353 | {#variableDeclarator = #(#[VARIABLE_DEF,"VARIABLE_DEF"], mods, #(#[TYPE,"TYPE"],d), id, v);}
|
---|
354 | ;
|
---|
355 |
|
---|
356 | declaratorBrackets[AST typ]
|
---|
357 | : {#declaratorBrackets=typ;}
|
---|
358 | (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
|
---|
359 | ;
|
---|
360 |
|
---|
361 | varInitializer
|
---|
362 | : ( ASSIGN^ initializer )?
|
---|
363 | ;
|
---|
364 |
|
---|
365 | // This is an initializer used to set up an array.
|
---|
366 | arrayInitializer
|
---|
367 | : lc:LCURLY^ {#lc.setType(ARRAY_INIT);}
|
---|
368 | ( initializer
|
---|
369 | (
|
---|
370 | // CONFLICT: does a COMMA after an initializer start a new
|
---|
371 | // initializer or start the option ',' at end?
|
---|
372 | // ANTLR generates proper code by matching
|
---|
373 | // the comma as soon as possible.
|
---|
374 | options {
|
---|
375 | warnWhenFollowAmbig = false;
|
---|
376 | }
|
---|
377 | :
|
---|
378 | COMMA! initializer
|
---|
379 | )*
|
---|
380 | (COMMA!)?
|
---|
381 | )?
|
---|
382 | RCURLY!
|
---|
383 | ;
|
---|
384 |
|
---|
385 |
|
---|
386 | // The two "things" that can initialize an array element are an expression
|
---|
387 | // and another (nested) array initializer.
|
---|
388 | initializer
|
---|
389 | : expression
|
---|
390 | | arrayInitializer
|
---|
391 | ;
|
---|
392 |
|
---|
393 | // This is the header of a method. It includes the name and parameters
|
---|
394 | // for the method.
|
---|
395 | // This also watches for a list of exception classes in a "throws" clause.
|
---|
396 | ctorHead
|
---|
397 | : IDENT // the name of the method
|
---|
398 |
|
---|
399 | // parse the formal parameter declarations.
|
---|
400 | LPAREN! parameterDeclarationList RPAREN!
|
---|
401 |
|
---|
402 | // get the list of exceptions that this method is declared to throw
|
---|
403 | (throwsClause)?
|
---|
404 | ;
|
---|
405 |
|
---|
406 | // This is a list of exception classes that the method is declared to throw
|
---|
407 | throwsClause
|
---|
408 | : "throws"^ identifier ( COMMA! identifier )*
|
---|
409 | ;
|
---|
410 |
|
---|
411 |
|
---|
412 | returnTypeBrackersOnEndOfMethodHead[AST typ]
|
---|
413 | : {#returnTypeBrackersOnEndOfMethodHead = typ;}
|
---|
414 | (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
|
---|
415 | ;
|
---|
416 |
|
---|
417 | // A list of formal parameters
|
---|
418 | parameterDeclarationList
|
---|
419 | : ( parameterDeclaration ( COMMA! parameterDeclaration )* )?
|
---|
420 | {#parameterDeclarationList = #(#[PARAMETERS,"PARAMETERS"],
|
---|
421 | #parameterDeclarationList);}
|
---|
422 | ;
|
---|
423 |
|
---|
424 | // A formal parameter.
|
---|
425 | parameterDeclaration!
|
---|
426 | : pm:parameterModifier t:typeSpec[false] id:IDENT
|
---|
427 | pd:parameterDeclaratorBrackets[#t]
|
---|
428 | {#parameterDeclaration = #(#[PARAMETER_DEF,"PARAMETER_DEF"],
|
---|
429 | pm, #([TYPE,"TYPE"],pd), id);}
|
---|
430 | ;
|
---|
431 |
|
---|
432 | parameterDeclaratorBrackets[AST t]
|
---|
433 | : {#parameterDeclaratorBrackets = t;}
|
---|
434 | (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)*
|
---|
435 | ;
|
---|
436 |
|
---|
437 | parameterModifier
|
---|
438 | : (f:"final")?
|
---|
439 | {#parameterModifier = #(#[MODIFIERS,"MODIFIERS"], f);}
|
---|
440 | ;
|
---|
441 |
|
---|
442 | // Compound statement. This is used in many contexts:
|
---|
443 | // Inside a class definition prefixed with "static":
|
---|
444 | // it is a class initializer
|
---|
445 | // Inside a class definition without "static":
|
---|
446 | // it is an instance initializer
|
---|
447 | // As the body of a method
|
---|
448 | // As a completely indepdent braced block of code inside a method
|
---|
449 | // it starts a new scope for variable definitions
|
---|
450 |
|
---|
451 | compoundStatement
|
---|
452 | : lc:LCURLY^ {#lc.setType(SLIST);}
|
---|
453 | // include the (possibly-empty) list of statements
|
---|
454 | (statement)*
|
---|
455 | RCURLY!
|
---|
456 | ;
|
---|
457 |
|
---|
458 |
|
---|
459 | statement
|
---|
460 | // A list of statements in curly braces -- start a new scope!
|
---|
461 | : compoundStatement
|
---|
462 |
|
---|
463 | // class definition
|
---|
464 | | classDefinition[#[MODIFIERS, "MODIFIERS"]]
|
---|
465 |
|
---|
466 | // final class definition
|
---|
467 | | "final"! classDefinition[#(#[MODIFIERS, "MODIFIERS"],#[FINAL,"final"])]
|
---|
468 |
|
---|
469 | // abstract class definition
|
---|
470 | | "abstract"! classDefinition[#(#[MODIFIERS, "MODIFIERS"],#[ABSTRACT,"abstract"])]
|
---|
471 |
|
---|
472 | // declarations are ambiguous with "ID DOT" relative to expression
|
---|
473 | // statements. Must backtrack to be sure. Could use a semantic
|
---|
474 | // predicate to test symbol table to see what the type was coming
|
---|
475 | // up, but that's pretty hard without a symbol table ;)
|
---|
476 | | (declaration)=> declaration SEMI!
|
---|
477 |
|
---|
478 | // An expression statement. This could be a method call,
|
---|
479 | // assignment statement, or any other expression evaluated for
|
---|
480 | // side-effects.
|
---|
481 | | expression SEMI!
|
---|
482 |
|
---|
483 | // Attach a label to the front of a statement
|
---|
484 | | IDENT c:COLON^ {#c.setType(LABELED_STAT);} statement
|
---|
485 |
|
---|
486 | // If-else statement
|
---|
487 | | "if"^ LPAREN! expression RPAREN! statement
|
---|
488 | (
|
---|
489 | // CONFLICT: the old "dangling-else" problem...
|
---|
490 | // ANTLR generates proper code matching
|
---|
491 | // as soon as possible. Hush warning.
|
---|
492 | options {
|
---|
493 | warnWhenFollowAmbig = false;
|
---|
494 | }
|
---|
495 | :
|
---|
496 | "else"! statement
|
---|
497 | )?
|
---|
498 |
|
---|
499 | // For statement
|
---|
500 | | "for"^
|
---|
501 | LPAREN!
|
---|
502 | forInit SEMI! // initializer
|
---|
503 | forCond SEMI! // condition test
|
---|
504 | forIter // updater
|
---|
505 | RPAREN!
|
---|
506 | statement // statement to loop over
|
---|
507 |
|
---|
508 | // While statement
|
---|
509 | | "while"^ LPAREN! expression RPAREN! statement
|
---|
510 |
|
---|
511 | // do-while statement
|
---|
512 | | "do"^ statement "while"! LPAREN! expression RPAREN! SEMI!
|
---|
513 |
|
---|
514 | // get out of a loop (or switch)
|
---|
515 | | "break"^ (IDENT)? SEMI!
|
---|
516 |
|
---|
517 | // do next iteration of a loop
|
---|
518 | | "continue"^ (IDENT)? SEMI!
|
---|
519 |
|
---|
520 | // Return an expression
|
---|
521 | | "return"^ (expression)? SEMI!
|
---|
522 |
|
---|
523 | // switch/case statement
|
---|
524 | | "switch"^ LPAREN! expression RPAREN! LCURLY!
|
---|
525 | ( casesGroup )*
|
---|
526 | RCURLY!
|
---|
527 |
|
---|
528 | // exception try-catch block
|
---|
529 | | tryBlock
|
---|
530 |
|
---|
531 | // throw an exception
|
---|
532 | | "throw"^ expression SEMI!
|
---|
533 |
|
---|
534 | // synchronize a statement
|
---|
535 | | "synchronized"^ LPAREN! expression RPAREN! compoundStatement
|
---|
536 |
|
---|
537 | // empty statement
|
---|
538 | | s:SEMI {#s.setType(EMPTY_STAT);}
|
---|
539 | ;
|
---|
540 |
|
---|
541 |
|
---|
542 | casesGroup
|
---|
543 | : ( // CONFLICT: to which case group do the statements bind?
|
---|
544 | // ANTLR generates proper code: it groups the
|
---|
545 | // many "case"/"default" labels together then
|
---|
546 | // follows them with the statements
|
---|
547 | options {
|
---|
548 | warnWhenFollowAmbig = false;
|
---|
549 | }
|
---|
550 | :
|
---|
551 | aCase
|
---|
552 | )+
|
---|
553 | caseSList
|
---|
554 | {#casesGroup = #([CASE_GROUP, "CASE_GROUP"], #casesGroup);}
|
---|
555 | ;
|
---|
556 |
|
---|
557 | aCase
|
---|
558 | : ("case"^ expression | "default") COLON!
|
---|
559 | ;
|
---|
560 |
|
---|
561 | caseSList
|
---|
562 | : (statement)*
|
---|
563 | {#caseSList = #(#[SLIST,"SLIST"],#caseSList);}
|
---|
564 | ;
|
---|
565 |
|
---|
566 | // The initializer for a for loop
|
---|
567 | forInit
|
---|
568 | // if it looks like a declaration, it is
|
---|
569 | : ( (declaration)=> declaration
|
---|
570 | // otherwise it could be an expression list...
|
---|
571 | | expressionList
|
---|
572 | )?
|
---|
573 | {#forInit = #(#[FOR_INIT,"FOR_INIT"],#forInit);}
|
---|
574 | ;
|
---|
575 |
|
---|
576 | forCond
|
---|
577 | : (expression)?
|
---|
578 | {#forCond = #(#[FOR_CONDITION,"FOR_CONDITION"],#forCond);}
|
---|
579 | ;
|
---|
580 |
|
---|
581 | forIter
|
---|
582 | : (expressionList)?
|
---|
583 | {#forIter = #(#[FOR_ITERATOR,"FOR_ITERATOR"],#forIter);}
|
---|
584 | ;
|
---|
585 |
|
---|
586 | // an exception handler try/catch block
|
---|
587 | tryBlock
|
---|
588 | : "try"^ compoundStatement
|
---|
589 | (handler)*
|
---|
590 | ( "finally"^ compoundStatement )?
|
---|
591 | ;
|
---|
592 |
|
---|
593 |
|
---|
594 | // an exception handler
|
---|
595 | handler
|
---|
596 | : "catch"^ LPAREN! parameterDeclaration RPAREN! compoundStatement
|
---|
597 | ;
|
---|
598 |
|
---|
599 |
|
---|
600 | // expressions
|
---|
601 | // Note that most of these expressions follow the pattern
|
---|
602 | // thisLevelExpression :
|
---|
603 | // nextHigherPrecedenceExpression
|
---|
604 | // (OPERATOR nextHigherPrecedenceExpression)*
|
---|
605 | // which is a standard recursive definition for a parsing an expression.
|
---|
606 | // The operators in java have the following precedences:
|
---|
607 | // lowest (13) = *= /= %= += -= <<= >>= >>>= &= ^= |=
|
---|
608 | // (12) ?:
|
---|
609 | // (11) ||
|
---|
610 | // (10) &&
|
---|
611 | // ( 9) |
|
---|
612 | // ( 8) ^
|
---|
613 | // ( 7) &
|
---|
614 | // ( 6) == !=
|
---|
615 | // ( 5) < <= > >=
|
---|
616 | // ( 4) << >>
|
---|
617 | // ( 3) +(binary) -(binary)
|
---|
618 | // ( 2) * / %
|
---|
619 | // ( 1) ++ -- +(unary) -(unary) ~ ! (type)
|
---|
620 | // [] () (method call) . (dot -- identifier qualification)
|
---|
621 | // new () (explicit parenthesis)
|
---|
622 | //
|
---|
623 | // the last two are not usually on a precedence chart; I put them in
|
---|
624 | // to point out that new has a higher precedence than '.', so you
|
---|
625 | // can validy use
|
---|
626 | // new Frame().show()
|
---|
627 | //
|
---|
628 | // Note that the above precedence levels map to the rules below...
|
---|
629 | // Once you have a precedence chart, writing the appropriate rules as below
|
---|
630 | // is usually very straightfoward
|
---|
631 |
|
---|
632 |
|
---|
633 |
|
---|
634 | // the mother of all expressions
|
---|
635 | expression
|
---|
636 | : assignmentExpression
|
---|
637 | {#expression = #(#[EXPR,"EXPR"],#expression);}
|
---|
638 | ;
|
---|
639 |
|
---|
640 |
|
---|
641 | // This is a list of expressions.
|
---|
642 | expressionList
|
---|
643 | : expression (COMMA! expression)*
|
---|
644 | {#expressionList = #(#[ELIST,"ELIST"], expressionList);}
|
---|
645 | ;
|
---|
646 |
|
---|
647 |
|
---|
648 | // assignment expression (level 13)
|
---|
649 | assignmentExpression
|
---|
650 | : conditionalExpression
|
---|
651 | ( ( ASSIGN^
|
---|
652 | | PLUS_ASSIGN^
|
---|
653 | | MINUS_ASSIGN^
|
---|
654 | | STAR_ASSIGN^
|
---|
655 | | DIV_ASSIGN^
|
---|
656 | | MOD_ASSIGN^
|
---|
657 | | SR_ASSIGN^
|
---|
658 | | BSR_ASSIGN^
|
---|
659 | | SL_ASSIGN^
|
---|
660 | | BAND_ASSIGN^
|
---|
661 | | BXOR_ASSIGN^
|
---|
662 | | BOR_ASSIGN^
|
---|
663 | )
|
---|
664 | assignmentExpression
|
---|
665 | )?
|
---|
666 | ;
|
---|
667 |
|
---|
668 |
|
---|
669 | // conditional test (level 12)
|
---|
670 | conditionalExpression
|
---|
671 | : logicalOrExpression
|
---|
672 | ( QUESTION^ assignmentExpression COLON! conditionalExpression )?
|
---|
673 | ;
|
---|
674 |
|
---|
675 |
|
---|
676 | // logical or (||) (level 11)
|
---|
677 | logicalOrExpression
|
---|
678 | : logicalAndExpression (LOR^ logicalAndExpression)*
|
---|
679 | ;
|
---|
680 |
|
---|
681 |
|
---|
682 | // logical and (&&) (level 10)
|
---|
683 | logicalAndExpression
|
---|
684 | : inclusiveOrExpression (LAND^ inclusiveOrExpression)*
|
---|
685 | ;
|
---|
686 |
|
---|
687 |
|
---|
688 | // bitwise or non-short-circuiting or (|) (level 9)
|
---|
689 | inclusiveOrExpression
|
---|
690 | : exclusiveOrExpression (BOR^ exclusiveOrExpression)*
|
---|
691 | ;
|
---|
692 |
|
---|
693 |
|
---|
694 | // exclusive or (^) (level 8)
|
---|
695 | exclusiveOrExpression
|
---|
696 | : andExpression (BXOR^ andExpression)*
|
---|
697 | ;
|
---|
698 |
|
---|
699 |
|
---|
700 | // bitwise or non-short-circuiting and (&) (level 7)
|
---|
701 | andExpression
|
---|
702 | : equalityExpression (BAND^ equalityExpression)*
|
---|
703 | ;
|
---|
704 |
|
---|
705 |
|
---|
706 | // equality/inequality (==/!=) (level 6)
|
---|
707 | equalityExpression
|
---|
708 | : relationalExpression ((NOT_EQUAL^ | EQUAL^) relationalExpression)*
|
---|
709 | ;
|
---|
710 |
|
---|
711 |
|
---|
712 | // boolean relational expressions (level 5)
|
---|
713 | relationalExpression
|
---|
714 | : shiftExpression
|
---|
715 | ( ( ( LT^
|
---|
716 | | GT^
|
---|
717 | | LE^
|
---|
718 | | GE^
|
---|
719 | )
|
---|
720 | shiftExpression
|
---|
721 | )*
|
---|
722 | | "instanceof"^ typeSpec[true]
|
---|
723 | )
|
---|
724 | ;
|
---|
725 |
|
---|
726 |
|
---|
727 | // bit shift expressions (level 4)
|
---|
728 | shiftExpression
|
---|
729 | : additiveExpression ((SL^ | SR^ | BSR^) additiveExpression)*
|
---|
730 | ;
|
---|
731 |
|
---|
732 |
|
---|
733 | // binary addition/subtraction (level 3)
|
---|
734 | additiveExpression
|
---|
735 | : multiplicativeExpression ((PLUS^ | MINUS^) multiplicativeExpression)*
|
---|
736 | ;
|
---|
737 |
|
---|
738 |
|
---|
739 | // multiplication/division/modulo (level 2)
|
---|
740 | multiplicativeExpression
|
---|
741 | : unaryExpression ((STAR^ | DIV^ | MOD^ ) unaryExpression)*
|
---|
742 | ;
|
---|
743 |
|
---|
744 | unaryExpression
|
---|
745 | : INC^ unaryExpression
|
---|
746 | | DEC^ unaryExpression
|
---|
747 | | MINUS^ {#MINUS.setType(UNARY_MINUS);} unaryExpression
|
---|
748 | | PLUS^ {#PLUS.setType(UNARY_PLUS);} unaryExpression
|
---|
749 | | unaryExpressionNotPlusMinus
|
---|
750 | ;
|
---|
751 |
|
---|
752 | unaryExpressionNotPlusMinus
|
---|
753 | : BNOT^ unaryExpression
|
---|
754 | | LNOT^ unaryExpression
|
---|
755 |
|
---|
756 | | ( // subrule allows option to shut off warnings
|
---|
757 | options {
|
---|
758 | // "(int" ambig with postfixExpr due to lack of sequence
|
---|
759 | // info in linear approximate LL(k). It's ok. Shut up.
|
---|
760 | generateAmbigWarnings=false;
|
---|
761 | }
|
---|
762 | : // If typecast is built in type, must be numeric operand
|
---|
763 | // Also, no reason to backtrack if type keyword like int, float...
|
---|
764 | lpb:LPAREN^ {#lpb.setType(TYPECAST);} builtInTypeSpec[true] RPAREN!
|
---|
765 | unaryExpression
|
---|
766 |
|
---|
767 | // Have to backtrack to see if operator follows. If no operator
|
---|
768 | // follows, it's a typecast. No semantic checking needed to parse.
|
---|
769 | // if it _looks_ like a cast, it _is_ a cast; else it's a "(expr)"
|
---|
770 | | (LPAREN classTypeSpec[true] RPAREN unaryExpressionNotPlusMinus)=>
|
---|
771 | lp:LPAREN^ {#lp.setType(TYPECAST);} classTypeSpec[true] RPAREN!
|
---|
772 | unaryExpressionNotPlusMinus
|
---|
773 |
|
---|
774 | | postfixExpression
|
---|
775 | )
|
---|
776 | ;
|
---|
777 |
|
---|
778 | // qualified names, array expressions, method invocation, post inc/dec
|
---|
779 | postfixExpression
|
---|
780 | : primaryExpression // start with a primary
|
---|
781 |
|
---|
782 | ( // qualified id (id.id.id.id...) -- build the name
|
---|
783 | DOT^ ( IDENT
|
---|
784 | | "this"
|
---|
785 | | "class"
|
---|
786 | | newExpression
|
---|
787 | | "super" LPAREN ( expressionList )? RPAREN
|
---|
788 | )
|
---|
789 | // the above line needs a semantic check to make sure "class"
|
---|
790 | // is the _last_ qualifier.
|
---|
791 |
|
---|
792 | // allow ClassName[].class
|
---|
793 | | ( lbc:LBRACK^ {#lbc.setType(ARRAY_DECLARATOR);} RBRACK! )+
|
---|
794 | DOT^ "class"
|
---|
795 |
|
---|
796 | // an array indexing operation
|
---|
797 | | lb:LBRACK^ {#lb.setType(INDEX_OP);} expression RBRACK!
|
---|
798 |
|
---|
799 | // method invocation
|
---|
800 | // The next line is not strictly proper; it allows x(3)(4) or
|
---|
801 | // x[2](4) which are not valid in Java. If this grammar were used
|
---|
802 | // to validate a Java program a semantic check would be needed, or
|
---|
803 | // this rule would get really ugly...
|
---|
804 | | lp:LPAREN^ {#lp.setType(METHOD_CALL);}
|
---|
805 | argList
|
---|
806 | RPAREN!
|
---|
807 | )*
|
---|
808 |
|
---|
809 | // possibly add on a post-increment or post-decrement.
|
---|
810 | // allows INC/DEC on too much, but semantics can check
|
---|
811 | ( in:INC^ {#in.setType(POST_INC);}
|
---|
812 | | de:DEC^ {#de.setType(POST_DEC);}
|
---|
813 | | // nothing
|
---|
814 | )
|
---|
815 |
|
---|
816 | // look for int.class and int[].class
|
---|
817 | | builtInType
|
---|
818 | ( lbt:LBRACK^ {#lbt.setType(ARRAY_DECLARATOR);} RBRACK! )*
|
---|
819 | DOT^ "class"
|
---|
820 | ;
|
---|
821 |
|
---|
822 | // the basic element of an expression
|
---|
823 | primaryExpression
|
---|
824 | : IDENT
|
---|
825 | | newExpression
|
---|
826 | | constant
|
---|
827 | | "super"
|
---|
828 | | "true"
|
---|
829 | | "false"
|
---|
830 | | "this"
|
---|
831 | | "null"
|
---|
832 | | LPAREN! assignmentExpression RPAREN!
|
---|
833 | ;
|
---|
834 |
|
---|
835 | /** object instantiation.
|
---|
836 | * Trees are built as illustrated by the following input/tree pairs:
|
---|
837 | *
|
---|
838 | * new T()
|
---|
839 | *
|
---|
840 | * new
|
---|
841 | * |
|
---|
842 | * T -- ELIST
|
---|
843 | * |
|
---|
844 | * arg1 -- arg2 -- .. -- argn
|
---|
845 | *
|
---|
846 | * new int[]
|
---|
847 | *
|
---|
848 | * new
|
---|
849 | * |
|
---|
850 | * int -- ARRAY_DECLARATOR
|
---|
851 | *
|
---|
852 | * new int[] {1,2}
|
---|
853 | *
|
---|
854 | * new
|
---|
855 | * |
|
---|
856 | * int -- ARRAY_DECLARATOR -- ARRAY_INIT
|
---|
857 | * |
|
---|
858 | * EXPR -- EXPR
|
---|
859 | * | |
|
---|
860 | * 1 2
|
---|
861 | *
|
---|
862 | * new int[3]
|
---|
863 | * new
|
---|
864 | * |
|
---|
865 | * int -- ARRAY_DECLARATOR
|
---|
866 | * |
|
---|
867 | * EXPR
|
---|
868 | * |
|
---|
869 | * 3
|
---|
870 | *
|
---|
871 | * new int[1][2]
|
---|
872 | *
|
---|
873 | * new
|
---|
874 | * |
|
---|
875 | * int -- ARRAY_DECLARATOR
|
---|
876 | * |
|
---|
877 | * ARRAY_DECLARATOR -- EXPR
|
---|
878 | * | |
|
---|
879 | * EXPR 1
|
---|
880 | * |
|
---|
881 | * 2
|
---|
882 | *
|
---|
883 | */
|
---|
884 | newExpression
|
---|
885 | : "new"^ type
|
---|
886 | ( LPAREN! argList RPAREN! (classBlock)?
|
---|
887 |
|
---|
888 | //java 1.1
|
---|
889 | // Note: This will allow bad constructs like
|
---|
890 | // new int[4][][3] {exp,exp}.
|
---|
891 | // There needs to be a semantic check here...
|
---|
892 | // to make sure:
|
---|
893 | // a) [ expr ] and [ ] are not mixed
|
---|
894 | // b) [ expr ] and an init are not used together
|
---|
895 |
|
---|
896 | | newArrayDeclarator (arrayInitializer)?
|
---|
897 | )
|
---|
898 | ;
|
---|
899 |
|
---|
900 | argList
|
---|
901 | : ( expressionList
|
---|
902 | | /*nothing*/
|
---|
903 | {#argList = #[ELIST,"ELIST"];}
|
---|
904 | )
|
---|
905 | ;
|
---|
906 |
|
---|
907 | newArrayDeclarator
|
---|
908 | : (
|
---|
909 | // CONFLICT:
|
---|
910 | // newExpression is a primaryExpression which can be
|
---|
911 | // followed by an array index reference. This is ok,
|
---|
912 | // as the generated code will stay in this loop as
|
---|
913 | // long as it sees an LBRACK (proper behavior)
|
---|
914 | options {
|
---|
915 | warnWhenFollowAmbig = false;
|
---|
916 | }
|
---|
917 | :
|
---|
918 | lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);}
|
---|
919 | (expression)?
|
---|
920 | RBRACK!
|
---|
921 | )+
|
---|
922 | ;
|
---|
923 |
|
---|
924 | constant
|
---|
925 | : NUM_INT
|
---|
926 | | CHAR_LITERAL
|
---|
927 | | STRING_LITERAL
|
---|
928 | | NUM_FLOAT
|
---|
929 | ;
|
---|
930 |
|
---|
931 |
|
---|
932 | //----------------------------------------------------------------------------
|
---|
933 | // The Java scanner
|
---|
934 | //----------------------------------------------------------------------------
|
---|
935 | class JavaLexer extends Lexer;
|
---|
936 |
|
---|
937 | options {
|
---|
938 | exportVocab=Java; // call the vocabulary "Java"
|
---|
939 | testLiterals=false; // don't automatically test for literals
|
---|
940 | k=4; // four characters of lookahead
|
---|
941 | }
|
---|
942 |
|
---|
943 |
|
---|
944 |
|
---|
945 | // OPERATORS
|
---|
946 | QUESTION : '?' ;
|
---|
947 | LPAREN : '(' ;
|
---|
948 | RPAREN : ')' ;
|
---|
949 | LBRACK : '[' ;
|
---|
950 | RBRACK : ']' ;
|
---|
951 | LCURLY : '{' ;
|
---|
952 | RCURLY : '}' ;
|
---|
953 | COLON : ':' ;
|
---|
954 | COMMA : ',' ;
|
---|
955 | //DOT : '.' ;
|
---|
956 | ASSIGN : '=' ;
|
---|
957 | EQUAL : "==" ;
|
---|
958 | LNOT : '!' ;
|
---|
959 | BNOT : '~' ;
|
---|
960 | NOT_EQUAL : "!=" ;
|
---|
961 | DIV : '/' ;
|
---|
962 | DIV_ASSIGN : "/=" ;
|
---|
963 | PLUS : '+' ;
|
---|
964 | PLUS_ASSIGN : "+=" ;
|
---|
965 | INC : "++" ;
|
---|
966 | MINUS : '-' ;
|
---|
967 | MINUS_ASSIGN : "-=" ;
|
---|
968 | DEC : "--" ;
|
---|
969 | STAR : '*' ;
|
---|
970 | STAR_ASSIGN : "*=" ;
|
---|
971 | MOD : '%' ;
|
---|
972 | MOD_ASSIGN : "%=" ;
|
---|
973 | SR : ">>" ;
|
---|
974 | SR_ASSIGN : ">>=" ;
|
---|
975 | BSR : ">>>" ;
|
---|
976 | BSR_ASSIGN : ">>>=" ;
|
---|
977 | GE : ">=" ;
|
---|
978 | GT : ">" ;
|
---|
979 | SL : "<<" ;
|
---|
980 | SL_ASSIGN : "<<=" ;
|
---|
981 | LE : "<=" ;
|
---|
982 | LT : '<' ;
|
---|
983 | BXOR : '^' ;
|
---|
984 | BXOR_ASSIGN : "^=" ;
|
---|
985 | BOR : '|' ;
|
---|
986 | BOR_ASSIGN : "|=" ;
|
---|
987 | LOR : "||" ;
|
---|
988 | BAND : '&' ;
|
---|
989 | BAND_ASSIGN : "&=" ;
|
---|
990 | LAND : "&&" ;
|
---|
991 | SEMI : ';' ;
|
---|
992 |
|
---|
993 |
|
---|
994 | // Whitespace -- ignored
|
---|
995 | WS : ( ' '
|
---|
996 | | '\t'
|
---|
997 | | '\f'
|
---|
998 | // handle newlines
|
---|
999 | | ( "\r\n" // Evil DOS
|
---|
1000 | | '\r' // Macintosh
|
---|
1001 | | '\n' // Unix (the right way)
|
---|
1002 | )
|
---|
1003 | { newline(); }
|
---|
1004 | )
|
---|
1005 | { _ttype = Token.SKIP; }
|
---|
1006 | ;
|
---|
1007 |
|
---|
1008 | // Single-line comments
|
---|
1009 | SL_COMMENT
|
---|
1010 | : "//"
|
---|
1011 | (~('\n'|'\r'))* ('\n'|'\r'('\n')?)
|
---|
1012 | {$setType(Token.SKIP); newline();}
|
---|
1013 | ;
|
---|
1014 |
|
---|
1015 | // multiple-line comments
|
---|
1016 | ML_COMMENT
|
---|
1017 | : "/*"
|
---|
1018 | ( /* '\r' '\n' can be matched in one alternative or by matching
|
---|
1019 | '\r' in one iteration and '\n' in another. I am trying to
|
---|
1020 | handle any flavor of newline that comes in, but the language
|
---|
1021 | that allows both "\r\n" and "\r" and "\n" to all be valid
|
---|
1022 | newline is ambiguous. Consequently, the resulting grammar
|
---|
1023 | must be ambiguous. I'm shutting this warning off.
|
---|
1024 | */
|
---|
1025 | options {
|
---|
1026 | generateAmbigWarnings=false;
|
---|
1027 | }
|
---|
1028 | :
|
---|
1029 | { LA(2)!='/' }? '*'
|
---|
1030 | | '\r' '\n' {newline();}
|
---|
1031 | | '\r' {newline();}
|
---|
1032 | | '\n' {newline();}
|
---|
1033 | | ~('*'|'\n'|'\r')
|
---|
1034 | )*
|
---|
1035 | "*/"
|
---|
1036 | {$setType(Token.SKIP);}
|
---|
1037 | ;
|
---|
1038 |
|
---|
1039 |
|
---|
1040 | // character literals
|
---|
1041 | CHAR_LITERAL
|
---|
1042 | : '\'' ( ESC | ~'\'' ) '\''
|
---|
1043 | ;
|
---|
1044 |
|
---|
1045 | // string literals
|
---|
1046 | STRING_LITERAL
|
---|
1047 | : '"' (ESC|~('"'|'\\'))* '"'
|
---|
1048 | ;
|
---|
1049 |
|
---|
1050 |
|
---|
1051 | // escape sequence -- note that this is protected; it can only be called
|
---|
1052 | // from another lexer rule -- it will not ever directly return a token to
|
---|
1053 | // the parser
|
---|
1054 | // There are various ambiguities hushed in this rule. The optional
|
---|
1055 | // '0'...'9' digit matches should be matched here rather than letting
|
---|
1056 | // them go back to STRING_LITERAL to be matched. ANTLR does the
|
---|
1057 | // right thing by matching immediately; hence, it's ok to shut off
|
---|
1058 | // the FOLLOW ambig warnings.
|
---|
1059 | protected
|
---|
1060 | ESC
|
---|
1061 | : '\\'
|
---|
1062 | ( 'n'
|
---|
1063 | | 'r'
|
---|
1064 | | 't'
|
---|
1065 | | 'b'
|
---|
1066 | | 'f'
|
---|
1067 | | '"'
|
---|
1068 | | '\''
|
---|
1069 | | '\\'
|
---|
1070 | | ('u')+ HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
|
---|
1071 | | ('0'..'3')
|
---|
1072 | (
|
---|
1073 | options {
|
---|
1074 | warnWhenFollowAmbig = false;
|
---|
1075 | }
|
---|
1076 | : ('0'..'7')
|
---|
1077 | (
|
---|
1078 | options {
|
---|
1079 | warnWhenFollowAmbig = false;
|
---|
1080 | }
|
---|
1081 | : '0'..'7'
|
---|
1082 | )?
|
---|
1083 | )?
|
---|
1084 | | ('4'..'7')
|
---|
1085 | (
|
---|
1086 | options {
|
---|
1087 | warnWhenFollowAmbig = false;
|
---|
1088 | }
|
---|
1089 | : ('0'..'9')
|
---|
1090 | )?
|
---|
1091 | )
|
---|
1092 | ;
|
---|
1093 |
|
---|
1094 |
|
---|
1095 | // hexadecimal digit (again, note it's protected!)
|
---|
1096 | protected
|
---|
1097 | HEX_DIGIT
|
---|
1098 | : ('0'..'9'|'A'..'F'|'a'..'f')
|
---|
1099 | ;
|
---|
1100 |
|
---|
1101 |
|
---|
1102 | // a dummy rule to force vocabulary to be all characters (except special
|
---|
1103 | // ones that ANTLR uses internally (0 to 2)
|
---|
1104 | protected
|
---|
1105 | VOCAB
|
---|
1106 | : '\3'..'\377'
|
---|
1107 | ;
|
---|
1108 |
|
---|
1109 |
|
---|
1110 | // an identifier. Note that testLiterals is set to true! This means
|
---|
1111 | // that after we match the rule, we look in the literals table to see
|
---|
1112 | // if it's a literal or really an identifer
|
---|
1113 | IDENT
|
---|
1114 | options {testLiterals=true;}
|
---|
1115 | : ('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'$')*
|
---|
1116 | ;
|
---|
1117 |
|
---|
1118 |
|
---|
1119 | // a numeric literal
|
---|
1120 | NUM_INT
|
---|
1121 | {boolean isDecimal=false;}
|
---|
1122 | : '.' {_ttype = DOT;}
|
---|
1123 | (('0'..'9')+ (EXPONENT)? (FLOAT_SUFFIX)? { _ttype = NUM_FLOAT; })?
|
---|
1124 | | ( '0' {isDecimal = true;} // special case for just '0'
|
---|
1125 | ( ('x'|'X')
|
---|
1126 | ( // hex
|
---|
1127 | // the 'e'|'E' and float suffix stuff look
|
---|
1128 | // like hex digits, hence the (...)+ doesn't
|
---|
1129 | // know when to stop: ambig. ANTLR resolves
|
---|
1130 | // it correctly by matching immediately. It
|
---|
1131 | // is therefor ok to hush warning.
|
---|
1132 | options {
|
---|
1133 | warnWhenFollowAmbig=false;
|
---|
1134 | }
|
---|
1135 | : HEX_DIGIT
|
---|
1136 | )+
|
---|
1137 | | ('0'..'7')+ // octal
|
---|
1138 | )?
|
---|
1139 | | ('1'..'9') ('0'..'9')* {isDecimal=true;} // non-zero decimal
|
---|
1140 | )
|
---|
1141 | ( ('l'|'L')
|
---|
1142 |
|
---|
1143 | // only check to see if it's a float if looks like decimal so far
|
---|
1144 | | {isDecimal}?
|
---|
1145 | ( '.' ('0'..'9')* (EXPONENT)? (FLOAT_SUFFIX)?
|
---|
1146 | | EXPONENT (FLOAT_SUFFIX)?
|
---|
1147 | | FLOAT_SUFFIX
|
---|
1148 | )
|
---|
1149 | { _ttype = NUM_FLOAT; }
|
---|
1150 | )?
|
---|
1151 | ;
|
---|
1152 |
|
---|
1153 |
|
---|
1154 | // a couple protected methods to assist in matching floating point numbers
|
---|
1155 | protected
|
---|
1156 | EXPONENT
|
---|
1157 | : ('e'|'E') ('+'|'-')? ('0'..'9')+
|
---|
1158 | ;
|
---|
1159 |
|
---|
1160 |
|
---|
1161 | protected
|
---|
1162 | FLOAT_SUFFIX
|
---|
1163 | : 'f'|'F'|'d'|'D'
|
---|
1164 | ;
|
---|
1165 |
|
---|