1 | /*
|
---|
2 | This example comes from a short article series in the Linux
|
---|
3 | Gazette by Richard A. Sevenich and Christopher Lopes, titled
|
---|
4 | "Compiler Construction Tools". The article series starts at
|
---|
5 |
|
---|
6 | http://www.linuxgazette.com/issue39/sevenich.html
|
---|
7 |
|
---|
8 | Small changes and updates to newest JFlex+Cup versions
|
---|
9 | by Gerwin Klein
|
---|
10 | */
|
---|
11 |
|
---|
12 | /*
|
---|
13 | Commented By: Christopher Lopes
|
---|
14 | File Name: lcalc.flex
|
---|
15 | To Create: > jflex lcalc.flex
|
---|
16 |
|
---|
17 | and then after the parser is created
|
---|
18 | > javac Lexer.java
|
---|
19 | */
|
---|
20 |
|
---|
21 | /* --------------------------Usercode Section------------------------ */
|
---|
22 |
|
---|
23 | import java_cup.runtime.*;
|
---|
24 |
|
---|
25 | %%
|
---|
26 |
|
---|
27 | /* -----------------Options and Declarations Section----------------- */
|
---|
28 |
|
---|
29 | /*
|
---|
30 | The name of the class JFlex will create will be Lexer.
|
---|
31 | Will write the code to the file Lexer.java.
|
---|
32 | */
|
---|
33 | %class Lexer
|
---|
34 |
|
---|
35 | /*
|
---|
36 | The current line number can be accessed with the variable yyline
|
---|
37 | and the current column number with the variable yycolumn.
|
---|
38 | */
|
---|
39 | %line
|
---|
40 | %column
|
---|
41 |
|
---|
42 | /*
|
---|
43 | Will switch to a CUP compatibility mode to interface with a CUP
|
---|
44 | generated parser.
|
---|
45 | */
|
---|
46 | %cup
|
---|
47 |
|
---|
48 | /*
|
---|
49 | Declarations
|
---|
50 |
|
---|
51 | Code between %{ and %}, both of which must be at the beginning of a
|
---|
52 | line, will be copied letter to letter into the lexer class source.
|
---|
53 | Here you declare member variables and functions that are used inside
|
---|
54 | scanner actions.
|
---|
55 | */
|
---|
56 | %{
|
---|
57 | /* To create a new java_cup.runtime.Symbol with information about
|
---|
58 | the current token, the token will have no value in this
|
---|
59 | case. */
|
---|
60 | private Symbol symbol(int type) {
|
---|
61 | return new Symbol(type, yyline, yycolumn);
|
---|
62 | }
|
---|
63 |
|
---|
64 | /* Also creates a new java_cup.runtime.Symbol with information
|
---|
65 | about the current token, but this object has a value. */
|
---|
66 | private Symbol symbol(int type, Object value) {
|
---|
67 | return new Symbol(type, yyline, yycolumn, value);
|
---|
68 | }
|
---|
69 | %}
|
---|
70 |
|
---|
71 |
|
---|
72 | /*
|
---|
73 | Macro Declarations
|
---|
74 |
|
---|
75 | These declarations are regular expressions that will be used latter
|
---|
76 | in the Lexical Rules Section.
|
---|
77 | */
|
---|
78 |
|
---|
79 | /* A line terminator is a \r (carriage return), \n (line feed), or
|
---|
80 | \r\n. */
|
---|
81 | LineTerminator = \r|\n|\r\n
|
---|
82 |
|
---|
83 | /* White space is a line terminator, space, tab, or line feed. */
|
---|
84 | WhiteSpace = {LineTerminator} | [ \t\f]
|
---|
85 |
|
---|
86 | /* A literal integer is is a number beginning with a number between
|
---|
87 | one and nine followed by zero or more numbers between zero and nine
|
---|
88 | or just a zero. */
|
---|
89 | dec_int_lit = 0 | [1-9][0-9]*
|
---|
90 |
|
---|
91 | /* A identifier integer is a word beginning a letter between A and
|
---|
92 | Z, a and z, or an underscore followed by zero or more letters
|
---|
93 | between A and Z, a and z, zero and nine, or an underscore. */
|
---|
94 | dec_int_id = [A-Za-z_][A-Za-z_0-9]*
|
---|
95 |
|
---|
96 | %%
|
---|
97 | /* ------------------------Lexical Rules Section---------------------- */
|
---|
98 |
|
---|
99 | /*
|
---|
100 | This section contains regular expressions and actions, i.e. Java
|
---|
101 | code, that will be executed when the scanner matches the associated
|
---|
102 | regular expression. */
|
---|
103 |
|
---|
104 | /* YYINITIAL is the state at which the lexer begins scanning. So
|
---|
105 | these regular expressions will only be matched if the scanner is in
|
---|
106 | the start state YYINITIAL. */
|
---|
107 |
|
---|
108 | <YYINITIAL> {
|
---|
109 |
|
---|
110 | /* Return the token SEMI declared in the class sym that was found. */
|
---|
111 | ";" { return symbol(sym.SEMI); }
|
---|
112 |
|
---|
113 | /* Print the token found that was declared in the class sym and then
|
---|
114 | return it. */
|
---|
115 | "+" { System.out.print(" + "); return symbol(sym.PLUS); }
|
---|
116 | "-" { System.out.print(" - "); return symbol(sym.MINUS); }
|
---|
117 | "*" { System.out.print(" * "); return symbol(sym.TIMES); }
|
---|
118 | "/" { System.out.print(" / "); return symbol(sym.DIVIDE); }
|
---|
119 | "(" { System.out.print(" ( "); return symbol(sym.LPAREN); }
|
---|
120 | ")" { System.out.print(" ) "); return symbol(sym.RPAREN); }
|
---|
121 |
|
---|
122 | /* If an integer is found print it out, return the token NUMBER
|
---|
123 | that represents an integer and the value of the integer that is
|
---|
124 | held in the string yytext which will get turned into an integer
|
---|
125 | before returning */
|
---|
126 | {dec_int_lit} { System.out.print(yytext());
|
---|
127 | return symbol(sym.NUMBER, new Integer(yytext())); }
|
---|
128 |
|
---|
129 | /* If an identifier is found print it out, return the token ID
|
---|
130 | that represents an identifier and the default value one that is
|
---|
131 | given to all identifiers. */
|
---|
132 | {dec_int_id} { System.out.print(yytext());
|
---|
133 | return symbol(sym.ID, new Integer(1));}
|
---|
134 |
|
---|
135 | /* Don't do anything if whitespace is found */
|
---|
136 | {WhiteSpace} { /* just skip what was found, do nothing */ }
|
---|
137 | }
|
---|
138 |
|
---|
139 |
|
---|
140 | /* No token was found for the input so through an error. Print out an
|
---|
141 | Illegal character message with the illegal character that was found. */
|
---|
142 | [^] { throw new Error("Illegal character <"+yytext()+">"); }
|
---|