source: other-projects/rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/PythonTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 15.6 KB
Line 
1/*
2 * 12/06/2005
3 *
4 * PythonTokenMaker.java - Token maker for the Python programming language.
5 *
6 * This library is distributed under a modified BSD license. See the included
7 * RSyntaxTextArea.License.txt file for details.
8 */
9package org.fife.ui.rsyntaxtextarea.modes;
10
11import java.io.*;
12import javax.swing.text.Segment;
13
14import org.fife.ui.rsyntaxtextarea.AbstractJFlexTokenMaker;
15import org.fife.ui.rsyntaxtextarea.DefaultToken;
16import org.fife.ui.rsyntaxtextarea.Token;
17import org.fife.ui.rsyntaxtextarea.TokenMaker;
18
19
20/**
21 * Scanner for the Python programming language.
22 *
23 * @author Robert Futrell
24 * @version 0.3
25 */
26%%
27
28%public
29%class PythonTokenMaker
30%extends AbstractJFlexTokenMaker
31%implements TokenMaker
32%unicode
33%type org.fife.ui.rsyntaxtextarea.Token
34
35
36%{
37
38
39 /**
40 * Constructor. This must be here because JFlex does not generate a
41 * no-parameter constructor.
42 */
43 public PythonTokenMaker() {
44 super();
45 }
46
47
48 /**
49 * Adds the token specified to the current linked list of tokens.
50 *
51 * @param tokenType The token's type.
52 */
53 private void addToken(int tokenType) {
54 addToken(zzStartRead, zzMarkedPos-1, tokenType);
55 }
56
57
58 /**
59 * Adds the token specified to the current linked list of tokens.
60 *
61 * @param tokenType The token's type.
62 */
63 private void addToken(int start, int end, int tokenType) {
64 int so = start + offsetShift;
65 addToken(zzBuffer, start,end, tokenType, so);
66 }
67
68
69 /**
70 * Adds the token specified to the current linked list of tokens.
71 *
72 * @param array The character array.
73 * @param start The starting offset in the array.
74 * @param end The ending offset in the array.
75 * @param tokenType The token's type.
76 * @param startOffset The offset in the document at which this token
77 * occurs.
78 */
79 public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
80 super.addToken(array, start,end, tokenType, startOffset);
81 zzStartRead = zzMarkedPos;
82 }
83
84
85 /**
86 * Returns the text to place at the beginning and end of a
87 * line to "comment" it in a this programming language.
88 *
89 * @return The start and end strings to add to a line to "comment"
90 * it out.
91 */
92 public String[] getLineCommentStartAndEnd() {
93 return new String[] { "#", null };
94 }
95
96
97 /**
98 * Returns the first token in the linked list of tokens generated
99 * from <code>text</code>. This method must be implemented by
100 * subclasses so they can correctly implement syntax highlighting.
101 *
102 * @param text The text from which to get tokens.
103 * @param initialTokenType The token type we should start with.
104 * @param startOffset The offset into the document at which
105 * <code>text</code> starts.
106 * @return The first <code>Token</code> in a linked list representing
107 * the syntax highlighted text.
108 */
109 public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
110
111 resetTokenList();
112 this.offsetShift = -text.offset + startOffset;
113
114 // Start off in the proper state.
115 int state = Token.NULL;
116 switch (initialTokenType) {
117 case Token.LITERAL_STRING_DOUBLE_QUOTE:
118 state = LONG_STRING_2;
119 break;
120 case Token.LITERAL_CHAR:
121 state = LONG_STRING_1;
122 break;
123 default:
124 state = Token.NULL;
125 }
126
127 s = text;
128 try {
129 yyreset(zzReader);
130 yybegin(state);
131 return yylex();
132 } catch (IOException ioe) {
133 ioe.printStackTrace();
134 return new DefaultToken();
135 }
136
137 }
138
139
140 /**
141 * Resets the scanner to read from a new input stream.
142 * Does not close the old reader.
143 *
144 * All internal variables are reset, the old input stream
145 * <b>cannot</b> be reused (internal buffer is discarded and lost).
146 * Lexical state is set to <tt>YY_INITIAL</tt>.
147 *
148 * @param reader the new input stream
149 */
150 public final void yyreset(java.io.Reader reader) throws java.io.IOException {
151 // 's' has been updated.
152 zzBuffer = s.array;
153 /*
154 * We replaced the line below with the two below it because zzRefill
155 * no longer "refills" the buffer (since the way we do it, it's always
156 * "full" the first time through, since it points to the segment's
157 * array). So, we assign zzEndRead here.
158 */
159 //zzStartRead = zzEndRead = s.offset;
160 zzStartRead = s.offset;
161 zzEndRead = zzStartRead + s.count - 1;
162 zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
163 zzLexicalState = YYINITIAL;
164 zzReader = reader;
165 zzAtBOL = true;
166 zzAtEOF = false;
167 }
168
169
170 /**
171 * Refills the input buffer.
172 *
173 * @return <code>true</code> if EOF was reached, otherwise
174 * <code>false</code>.
175 * @exception IOException if any I/O-Error occurs.
176 */
177 private boolean zzRefill() throws java.io.IOException {
178 return zzCurrentPos>=s.offset+s.count;
179 }
180
181
182%}
183
184/* This part is taken from http://www.python.org/doc/2.2.3/ref/grammar.txt */
185identifier = (({letter}|"_")({letter}|{digit}|"_")*)
186letter = ({lowercase}|{uppercase})
187lowercase = ([a-z])
188uppercase = ([A-Z])
189digit = ([0-9])
190stringliteral = ({stringprefix}?{shortstring})
191stringprefix = ("r"|"u"[rR]?|"R"|"U"[rR]?)
192shortstring1 = ([\']{shortstring1item}*[\']?)
193shortstring2 = ([\"]{shortstring2item}*[\"]?)
194shortstring = ({shortstring1}|{shortstring2})
195shortstring1item = ({shortstring1char}|{escapeseq})
196shortstring2item = ({shortstring2char}|{escapeseq})
197shortstring1char = ([^\\\n\'])
198shortstring2char = ([^\\\n\"])
199escapeseq = ([\\].)
200longinteger = ({integer}[lL])
201integer = ({decimalinteger}|{octinteger}|{hexinteger})
202decimalinteger = ({nonzerodigit}{digit}*|"0")
203octinteger = ("0"{octdigit}+)
204hexinteger = ("0"[xX]{hexdigit}+)
205nonzerodigit = ([1-9])
206octdigit = ([0-7])
207hexdigit = ({digit}|[a-f]|[A-F])
208floatnumber = ({pointfloat}|{exponentfloat})
209pointfloat = ({intpart}?{fraction}|{intpart}".")
210exponentfloat = (({intpart}|{pointfloat}){exponent})
211intpart = ({digit}+)
212fraction = ("."{digit}+)
213exponent = ([eE][\+\-]?{digit}+)
214imagnumber = (({floatnumber}|{intpart})[jJ])
215
216ErrorNumberFormat = ({digit}{NonSeparator}+)
217NonSeparator = ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#")
218
219LongStringStart1 = ({stringprefix}?\'\'\')
220LongStringStart2 = ({stringprefix}?\"\"\")
221
222LineTerminator = (\n)
223WhiteSpace = ([ \t\f])
224
225LineComment = ("#".*)
226
227
228%state LONG_STRING_1
229%state LONG_STRING_2
230
231
232%%
233
234/* Keywords */
235<YYINITIAL> "and" { addToken(Token.RESERVED_WORD); }
236<YYINITIAL> "as" { addToken(Token.RESERVED_WORD); }
237<YYINITIAL> "assert" { addToken(Token.RESERVED_WORD); }
238<YYINITIAL> "break" { addToken(Token.RESERVED_WORD); }
239<YYINITIAL> "class" { addToken(Token.RESERVED_WORD); }
240<YYINITIAL> "continue" { addToken(Token.RESERVED_WORD); }
241<YYINITIAL> "def" { addToken(Token.RESERVED_WORD); }
242<YYINITIAL> "del" { addToken(Token.RESERVED_WORD); }
243<YYINITIAL> "elif" { addToken(Token.RESERVED_WORD); }
244<YYINITIAL> "else" { addToken(Token.RESERVED_WORD); }
245<YYINITIAL> "except" { addToken(Token.RESERVED_WORD); }
246<YYINITIAL> "exec" { addToken(Token.RESERVED_WORD); }
247<YYINITIAL> "finally" { addToken(Token.RESERVED_WORD); }
248<YYINITIAL> "for" { addToken(Token.RESERVED_WORD); }
249<YYINITIAL> "from" { addToken(Token.RESERVED_WORD); }
250<YYINITIAL> "global" { addToken(Token.RESERVED_WORD); }
251<YYINITIAL> "if" { addToken(Token.RESERVED_WORD); }
252<YYINITIAL> "import" { addToken(Token.RESERVED_WORD); }
253<YYINITIAL> "in" { addToken(Token.RESERVED_WORD); }
254<YYINITIAL> "is" { addToken(Token.RESERVED_WORD); }
255<YYINITIAL> "lambda" { addToken(Token.RESERVED_WORD); }
256<YYINITIAL> "not" { addToken(Token.RESERVED_WORD); }
257<YYINITIAL> "or" { addToken(Token.RESERVED_WORD); }
258<YYINITIAL> "pass" { addToken(Token.RESERVED_WORD); }
259<YYINITIAL> "print" { addToken(Token.RESERVED_WORD); }
260<YYINITIAL> "raise" { addToken(Token.RESERVED_WORD); }
261<YYINITIAL> "return" { addToken(Token.RESERVED_WORD); }
262<YYINITIAL> "try" { addToken(Token.RESERVED_WORD); }
263<YYINITIAL> "while" { addToken(Token.RESERVED_WORD); }
264<YYINITIAL> "yield" { addToken(Token.RESERVED_WORD); }
265
266/* Data types. */
267<YYINITIAL> "char" { addToken(Token.DATA_TYPE); }
268<YYINITIAL> "double" { addToken(Token.DATA_TYPE); }
269<YYINITIAL> "float" { addToken(Token.DATA_TYPE); }
270<YYINITIAL> "int" { addToken(Token.DATA_TYPE); }
271<YYINITIAL> "long" { addToken(Token.DATA_TYPE); }
272<YYINITIAL> "short" { addToken(Token.DATA_TYPE); }
273<YYINITIAL> "signed" { addToken(Token.DATA_TYPE); }
274<YYINITIAL> "unsigned" { addToken(Token.DATA_TYPE); }
275<YYINITIAL> "void" { addToken(Token.DATA_TYPE); }
276
277/* Standard functions */
278<YYINITIAL> "abs" { addToken(Token.FUNCTION); }
279<YYINITIAL> "apply" { addToken(Token.FUNCTION); }
280<YYINITIAL> "bool" { addToken(Token.FUNCTION); }
281<YYINITIAL> "buffer" { addToken(Token.FUNCTION); }
282<YYINITIAL> "callable" { addToken(Token.FUNCTION); }
283<YYINITIAL> "chr" { addToken(Token.FUNCTION); }
284<YYINITIAL> "classmethod" { addToken(Token.FUNCTION); }
285<YYINITIAL> "cmp" { addToken(Token.FUNCTION); }
286<YYINITIAL> "coerce" { addToken(Token.FUNCTION); }
287<YYINITIAL> "compile" { addToken(Token.FUNCTION); }
288<YYINITIAL> "complex" { addToken(Token.FUNCTION); }
289<YYINITIAL> "delattr" { addToken(Token.FUNCTION); }
290<YYINITIAL> "dict" { addToken(Token.FUNCTION); }
291<YYINITIAL> "dir" { addToken(Token.FUNCTION); }
292<YYINITIAL> "divmod" { addToken(Token.FUNCTION); }
293<YYINITIAL> "enumerate" { addToken(Token.FUNCTION); }
294<YYINITIAL> "eval" { addToken(Token.FUNCTION); }
295<YYINITIAL> "execfile" { addToken(Token.FUNCTION); }
296<YYINITIAL> "file" { addToken(Token.FUNCTION); }
297<YYINITIAL> "filter" { addToken(Token.FUNCTION); }
298<YYINITIAL> "float" { addToken(Token.FUNCTION); }
299<YYINITIAL> "getattr" { addToken(Token.FUNCTION); }
300<YYINITIAL> "globals" { addToken(Token.FUNCTION); }
301<YYINITIAL> "hasattr" { addToken(Token.FUNCTION); }
302<YYINITIAL> "hash" { addToken(Token.FUNCTION); }
303<YYINITIAL> "hex" { addToken(Token.FUNCTION); }
304<YYINITIAL> "id" { addToken(Token.FUNCTION); }
305<YYINITIAL> "input" { addToken(Token.FUNCTION); }
306<YYINITIAL> "int" { addToken(Token.FUNCTION); }
307<YYINITIAL> "intern" { addToken(Token.FUNCTION); }
308<YYINITIAL> "isinstance" { addToken(Token.FUNCTION); }
309<YYINITIAL> "issubclass" { addToken(Token.FUNCTION); }
310<YYINITIAL> "iter" { addToken(Token.FUNCTION); }
311<YYINITIAL> "len" { addToken(Token.FUNCTION); }
312<YYINITIAL> "list" { addToken(Token.FUNCTION); }
313<YYINITIAL> "locals" { addToken(Token.FUNCTION); }
314<YYINITIAL> "long" { addToken(Token.FUNCTION); }
315<YYINITIAL> "map" { addToken(Token.FUNCTION); }
316<YYINITIAL> "max" { addToken(Token.FUNCTION); }
317<YYINITIAL> "min" { addToken(Token.FUNCTION); }
318<YYINITIAL> "object" { addToken(Token.FUNCTION); }
319<YYINITIAL> "oct" { addToken(Token.FUNCTION); }
320<YYINITIAL> "open" { addToken(Token.FUNCTION); }
321<YYINITIAL> "ord" { addToken(Token.FUNCTION); }
322<YYINITIAL> "pow" { addToken(Token.FUNCTION); }
323<YYINITIAL> "property" { addToken(Token.FUNCTION); }
324<YYINITIAL> "range" { addToken(Token.FUNCTION); }
325<YYINITIAL> "raw_input" { addToken(Token.FUNCTION); }
326<YYINITIAL> "reduce" { addToken(Token.FUNCTION); }
327<YYINITIAL> "reload" { addToken(Token.FUNCTION); }
328<YYINITIAL> "repr" { addToken(Token.FUNCTION); }
329<YYINITIAL> "round" { addToken(Token.FUNCTION); }
330<YYINITIAL> "setattr" { addToken(Token.FUNCTION); }
331<YYINITIAL> "slice" { addToken(Token.FUNCTION); }
332<YYINITIAL> "staticmethod" { addToken(Token.FUNCTION); }
333<YYINITIAL> "str" { addToken(Token.FUNCTION); }
334<YYINITIAL> "sum" { addToken(Token.FUNCTION); }
335<YYINITIAL> "super" { addToken(Token.FUNCTION); }
336<YYINITIAL> "tuple" { addToken(Token.FUNCTION); }
337<YYINITIAL> "type" { addToken(Token.FUNCTION); }
338<YYINITIAL> "unichr" { addToken(Token.FUNCTION); }
339<YYINITIAL> "unicode" { addToken(Token.FUNCTION); }
340<YYINITIAL> "vars" { addToken(Token.FUNCTION); }
341<YYINITIAL> "xrange" { addToken(Token.FUNCTION); }
342<YYINITIAL> "zip" { addToken(Token.FUNCTION); }
343
344
345<YYINITIAL> {
346
347 {LineTerminator} { addNullToken(); return firstToken; }
348
349 {identifier} { addToken(Token.IDENTIFIER); }
350
351 {WhiteSpace}+ { addToken(Token.WHITESPACE); }
352
353 /* String/Character Literals. */
354 {stringliteral} { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
355 {LongStringStart1} { yybegin(LONG_STRING_1); addToken(Token.LITERAL_CHAR); }
356 {LongStringStart2} { yybegin(LONG_STRING_2); addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
357
358 /* Comment Literals. */
359 {LineComment} { addToken(Token.COMMENT_EOL); }
360
361 /* Separators. */
362 "(" { addToken(Token.SEPARATOR); }
363 ")" { addToken(Token.SEPARATOR); }
364 "[" { addToken(Token.SEPARATOR); }
365 "]" { addToken(Token.SEPARATOR); }
366 "{" { addToken(Token.SEPARATOR); }
367 "}" { addToken(Token.SEPARATOR); }
368
369 /* Operators. */
370 "=" { addToken(Token.OPERATOR); }
371 "+" { addToken(Token.OPERATOR); }
372 "-" { addToken(Token.OPERATOR); }
373 "*" { addToken(Token.OPERATOR); }
374 "/" { addToken(Token.OPERATOR); }
375 "%" { addToken(Token.OPERATOR); }
376 "**" { addToken(Token.OPERATOR); }
377 "~" { addToken(Token.OPERATOR); }
378 "<" { addToken(Token.OPERATOR); }
379 ">" { addToken(Token.OPERATOR); }
380 "<<" { addToken(Token.OPERATOR); }
381 ">>" { addToken(Token.OPERATOR); }
382 "==" { addToken(Token.OPERATOR); }
383 "+=" { addToken(Token.OPERATOR); }
384 "-=" { addToken(Token.OPERATOR); }
385 "*=" { addToken(Token.OPERATOR); }
386 "/=" { addToken(Token.OPERATOR); }
387 "%=" { addToken(Token.OPERATOR); }
388 ">>=" { addToken(Token.OPERATOR); }
389 "<<=" { addToken(Token.OPERATOR); }
390 "^" { addToken(Token.OPERATOR); }
391 "&" { addToken(Token.OPERATOR); }
392 "&&" { addToken(Token.OPERATOR); }
393 "|" { addToken(Token.OPERATOR); }
394 "||" { addToken(Token.OPERATOR); }
395 "?" { addToken(Token.OPERATOR); }
396 ":" { addToken(Token.OPERATOR); }
397 "," { addToken(Token.OPERATOR); }
398 "!" { addToken(Token.OPERATOR); }
399 "++" { addToken(Token.OPERATOR); }
400 "--" { addToken(Token.OPERATOR); }
401 "." { addToken(Token.OPERATOR); }
402 "," { addToken(Token.OPERATOR); }
403
404 /* Numbers */
405 {longinteger}|{integer} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
406 {floatnumber}|{imagnumber} { addToken(Token.LITERAL_NUMBER_FLOAT); }
407 {ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
408
409 /* Other punctuation, we'll highlight it as "identifiers." */
410 "@" { addToken(Token.IDENTIFIER); }
411 ";" { addToken(Token.IDENTIFIER); }
412
413 /* Ended with a line not in a string or comment. */
414 <<EOF>> { addNullToken(); return firstToken; }
415
416 /* Catch any other (unhandled) characters and flag them as bad. */
417 . { addToken(Token.ERROR_IDENTIFIER); }
418
419}
420
421<LONG_STRING_1> {
422 [^\']+ { addToken(Token.LITERAL_CHAR); }
423 "'''" { yybegin(YYINITIAL); addToken(Token.LITERAL_CHAR); }
424 "'" { addToken(Token.LITERAL_CHAR); }
425 <<EOF>> {
426 if (firstToken==null) {
427 addToken(Token.LITERAL_CHAR);
428 }
429 return firstToken;
430 }
431}
432
433<LONG_STRING_2> {
434 [^\"]+ { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
435 \"\"\" { yybegin(YYINITIAL); addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
436 \" { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
437 <<EOF>> {
438 if (firstToken==null) {
439 addToken(Token.LITERAL_STRING_DOUBLE_QUOTE);
440 }
441 return firstToken;
442 }
443}
Note: See TracBrowser for help on using the repository browser.