Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

PythonTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago
Initial cut an a text edit area for GLI that supports color syntax highlighting
File size: 15.6 KB

Line
1	/*
2	* 12/06/2005
3	*
4	* PythonTokenMaker.java - Token maker for the Python programming language.
5	*
6	* This library is distributed under a modified BSD license. See the included
7	* RSyntaxTextArea.License.txt file for details.
8	*/
9	package org.fife.ui.rsyntaxtextarea.modes;
10
11	import java.io.*;
12	import javax.swing.text.Segment;
13
14	import org.fife.ui.rsyntaxtextarea.AbstractJFlexTokenMaker;
15	import org.fife.ui.rsyntaxtextarea.DefaultToken;
16	import org.fife.ui.rsyntaxtextarea.Token;
17	import org.fife.ui.rsyntaxtextarea.TokenMaker;
18
19
20	/**
21	* Scanner for the Python programming language.
22	*
23	* @author Robert Futrell
24	* @version 0.3
25	*/
26	%%
27
28	%public
29	%class PythonTokenMaker
30	%extends AbstractJFlexTokenMaker
31	%implements TokenMaker
32	%unicode
33	%type org.fife.ui.rsyntaxtextarea.Token
34
35
36	%{
37
38
39	/**
40	* Constructor. This must be here because JFlex does not generate a
41	* no-parameter constructor.
42	*/
43	public PythonTokenMaker() {
44	super();
45	}
46
47
48	/**
49	* Adds the token specified to the current linked list of tokens.
50	*
51	* @param tokenType The token's type.
52	*/
53	private void addToken(int tokenType) {
54	addToken(zzStartRead, zzMarkedPos-1, tokenType);
55	}
56
57
58	/**
59	* Adds the token specified to the current linked list of tokens.
60	*
61	* @param tokenType The token's type.
62	*/
63	private void addToken(int start, int end, int tokenType) {
64	int so = start + offsetShift;
65	addToken(zzBuffer, start,end, tokenType, so);
66	}
67
68
69	/**
70	* Adds the token specified to the current linked list of tokens.
71	*
72	* @param array The character array.
73	* @param start The starting offset in the array.
74	* @param end The ending offset in the array.
75	* @param tokenType The token's type.
76	* @param startOffset The offset in the document at which this token
77	* occurs.
78	*/
79	public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
80	super.addToken(array, start,end, tokenType, startOffset);
81	zzStartRead = zzMarkedPos;
82	}
83
84
85	/**
86	* Returns the text to place at the beginning and end of a
87	* line to "comment" it in a this programming language.
88	*
89	* @return The start and end strings to add to a line to "comment"
90	* it out.
91	*/
92	public String[] getLineCommentStartAndEnd() {
93	return new String[] { "#", null };
94	}
95
96
97	/**
98	* Returns the first token in the linked list of tokens generated
99	* from <code>text</code>. This method must be implemented by
100	* subclasses so they can correctly implement syntax highlighting.
101	*
102	* @param text The text from which to get tokens.
103	* @param initialTokenType The token type we should start with.
104	* @param startOffset The offset into the document at which
105	* <code>text</code> starts.
106	* @return The first <code>Token</code> in a linked list representing
107	* the syntax highlighted text.
108	*/
109	public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
110
111	resetTokenList();
112	this.offsetShift = -text.offset + startOffset;
113
114	// Start off in the proper state.
115	int state = Token.NULL;
116	switch (initialTokenType) {
117	case Token.LITERAL_STRING_DOUBLE_QUOTE:
118	state = LONG_STRING_2;
119	break;
120	case Token.LITERAL_CHAR:
121	state = LONG_STRING_1;
122	break;
123	default:
124	state = Token.NULL;
125	}
126
127	s = text;
128	try {
129	yyreset(zzReader);
130	yybegin(state);
131	return yylex();
132	} catch (IOException ioe) {
133	ioe.printStackTrace();
134	return new DefaultToken();
135	}
136
137	}
138
139
140	/**
141	* Resets the scanner to read from a new input stream.
142	* Does not close the old reader.
143	*
144	* All internal variables are reset, the old input stream
145	* <b>cannot</b> be reused (internal buffer is discarded and lost).
146	* Lexical state is set to <tt>YY_INITIAL</tt>.
147	*
148	* @param reader the new input stream
149	*/
150	public final void yyreset(java.io.Reader reader) throws java.io.IOException {
151	// 's' has been updated.
152	zzBuffer = s.array;
153	/*
154	* We replaced the line below with the two below it because zzRefill
155	* no longer "refills" the buffer (since the way we do it, it's always
156	* "full" the first time through, since it points to the segment's
157	* array). So, we assign zzEndRead here.
158	*/
159	//zzStartRead = zzEndRead = s.offset;
160	zzStartRead = s.offset;
161	zzEndRead = zzStartRead + s.count - 1;
162	zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
163	zzLexicalState = YYINITIAL;
164	zzReader = reader;
165	zzAtBOL = true;
166	zzAtEOF = false;
167	}
168
169
170	/**
171	* Refills the input buffer.
172	*
173	* @return <code>true</code> if EOF was reached, otherwise
174	* <code>false</code>.
175	* @exception IOException if any I/O-Error occurs.
176	*/
177	private boolean zzRefill() throws java.io.IOException {
178	return zzCurrentPos>=s.offset+s.count;
179	}
180
181
182	%}
183
184	/* This part is taken from http://www.python.org/doc/2.2.3/ref/grammar.txt */
185	identifier = (({letter}\|"_")({letter}\|{digit}\|"_")*)
186	letter = ({lowercase}\|{uppercase})
187	lowercase = ([a-z])
188	uppercase = ([A-Z])
189	digit = ([0-9])
190	stringliteral = ({stringprefix}?{shortstring})
191	stringprefix = ("r"\|"u"[rR]?\|"R"\|"U"[rR]?)
192	shortstring1 = ([\']{shortstring1item}*[\']?)
193	shortstring2 = ([\"]{shortstring2item}*[\"]?)
194	shortstring = ({shortstring1}\|{shortstring2})
195	shortstring1item = ({shortstring1char}\|{escapeseq})
196	shortstring2item = ({shortstring2char}\|{escapeseq})
197	shortstring1char = ([^\\\n\'])
198	shortstring2char = ([^\\\n\"])
199	escapeseq = ([\\].)
200	longinteger = ({integer}[lL])
201	integer = ({decimalinteger}\|{octinteger}\|{hexinteger})
202	decimalinteger = ({nonzerodigit}{digit}*\|"0")
203	octinteger = ("0"{octdigit}+)
204	hexinteger = ("0"[xX]{hexdigit}+)
205	nonzerodigit = ([1-9])
206	octdigit = ([0-7])
207	hexdigit = ({digit}\|[a-f]\|[A-F])
208	floatnumber = ({pointfloat}\|{exponentfloat})
209	pointfloat = ({intpart}?{fraction}\|{intpart}".")
210	exponentfloat = (({intpart}\|{pointfloat}){exponent})
211	intpart = ({digit}+)
212	fraction = ("."{digit}+)
213	exponent = ([eE][\+\-]?{digit}+)
214	imagnumber = (({floatnumber}\|{intpart})[jJ])
215
216	ErrorNumberFormat = ({digit}{NonSeparator}+)
217	NonSeparator = ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\\|\^\%\"\']\|"#")
218
219	LongStringStart1 = ({stringprefix}?\'\'\')
220	LongStringStart2 = ({stringprefix}?\"\"\")
221
222	LineTerminator = (\n)
223	WhiteSpace = ([ \t\f])
224
225	LineComment = ("#".*)
226
227
228	%state LONG_STRING_1
229	%state LONG_STRING_2
230
231
232	%%
233
234	/* Keywords */
235	<YYINITIAL> "and" { addToken(Token.RESERVED_WORD); }
236	<YYINITIAL> "as" { addToken(Token.RESERVED_WORD); }
237	<YYINITIAL> "assert" { addToken(Token.RESERVED_WORD); }
238	<YYINITIAL> "break" { addToken(Token.RESERVED_WORD); }
239	<YYINITIAL> "class" { addToken(Token.RESERVED_WORD); }
240	<YYINITIAL> "continue" { addToken(Token.RESERVED_WORD); }
241	<YYINITIAL> "def" { addToken(Token.RESERVED_WORD); }
242	<YYINITIAL> "del" { addToken(Token.RESERVED_WORD); }
243	<YYINITIAL> "elif" { addToken(Token.RESERVED_WORD); }
244	<YYINITIAL> "else" { addToken(Token.RESERVED_WORD); }
245	<YYINITIAL> "except" { addToken(Token.RESERVED_WORD); }
246	<YYINITIAL> "exec" { addToken(Token.RESERVED_WORD); }
247	<YYINITIAL> "finally" { addToken(Token.RESERVED_WORD); }
248	<YYINITIAL> "for" { addToken(Token.RESERVED_WORD); }
249	<YYINITIAL> "from" { addToken(Token.RESERVED_WORD); }
250	<YYINITIAL> "global" { addToken(Token.RESERVED_WORD); }
251	<YYINITIAL> "if" { addToken(Token.RESERVED_WORD); }
252	<YYINITIAL> "import" { addToken(Token.RESERVED_WORD); }
253	<YYINITIAL> "in" { addToken(Token.RESERVED_WORD); }
254	<YYINITIAL> "is" { addToken(Token.RESERVED_WORD); }
255	<YYINITIAL> "lambda" { addToken(Token.RESERVED_WORD); }
256	<YYINITIAL> "not" { addToken(Token.RESERVED_WORD); }
257	<YYINITIAL> "or" { addToken(Token.RESERVED_WORD); }
258	<YYINITIAL> "pass" { addToken(Token.RESERVED_WORD); }
259	<YYINITIAL> "print" { addToken(Token.RESERVED_WORD); }
260	<YYINITIAL> "raise" { addToken(Token.RESERVED_WORD); }
261	<YYINITIAL> "return" { addToken(Token.RESERVED_WORD); }
262	<YYINITIAL> "try" { addToken(Token.RESERVED_WORD); }
263	<YYINITIAL> "while" { addToken(Token.RESERVED_WORD); }
264	<YYINITIAL> "yield" { addToken(Token.RESERVED_WORD); }
265
266	/* Data types. */
267	<YYINITIAL> "char" { addToken(Token.DATA_TYPE); }
268	<YYINITIAL> "double" { addToken(Token.DATA_TYPE); }
269	<YYINITIAL> "float" { addToken(Token.DATA_TYPE); }
270	<YYINITIAL> "int" { addToken(Token.DATA_TYPE); }
271	<YYINITIAL> "long" { addToken(Token.DATA_TYPE); }
272	<YYINITIAL> "short" { addToken(Token.DATA_TYPE); }
273	<YYINITIAL> "signed" { addToken(Token.DATA_TYPE); }
274	<YYINITIAL> "unsigned" { addToken(Token.DATA_TYPE); }
275	<YYINITIAL> "void" { addToken(Token.DATA_TYPE); }
276
277	/* Standard functions */
278	<YYINITIAL> "abs" { addToken(Token.FUNCTION); }
279	<YYINITIAL> "apply" { addToken(Token.FUNCTION); }
280	<YYINITIAL> "bool" { addToken(Token.FUNCTION); }
281	<YYINITIAL> "buffer" { addToken(Token.FUNCTION); }
282	<YYINITIAL> "callable" { addToken(Token.FUNCTION); }
283	<YYINITIAL> "chr" { addToken(Token.FUNCTION); }
284	<YYINITIAL> "classmethod" { addToken(Token.FUNCTION); }
285	<YYINITIAL> "cmp" { addToken(Token.FUNCTION); }
286	<YYINITIAL> "coerce" { addToken(Token.FUNCTION); }
287	<YYINITIAL> "compile" { addToken(Token.FUNCTION); }
288	<YYINITIAL> "complex" { addToken(Token.FUNCTION); }
289	<YYINITIAL> "delattr" { addToken(Token.FUNCTION); }
290	<YYINITIAL> "dict" { addToken(Token.FUNCTION); }
291	<YYINITIAL> "dir" { addToken(Token.FUNCTION); }
292	<YYINITIAL> "divmod" { addToken(Token.FUNCTION); }
293	<YYINITIAL> "enumerate" { addToken(Token.FUNCTION); }
294	<YYINITIAL> "eval" { addToken(Token.FUNCTION); }
295	<YYINITIAL> "execfile" { addToken(Token.FUNCTION); }
296	<YYINITIAL> "file" { addToken(Token.FUNCTION); }
297	<YYINITIAL> "filter" { addToken(Token.FUNCTION); }
298	<YYINITIAL> "float" { addToken(Token.FUNCTION); }
299	<YYINITIAL> "getattr" { addToken(Token.FUNCTION); }
300	<YYINITIAL> "globals" { addToken(Token.FUNCTION); }
301	<YYINITIAL> "hasattr" { addToken(Token.FUNCTION); }
302	<YYINITIAL> "hash" { addToken(Token.FUNCTION); }
303	<YYINITIAL> "hex" { addToken(Token.FUNCTION); }
304	<YYINITIAL> "id" { addToken(Token.FUNCTION); }
305	<YYINITIAL> "input" { addToken(Token.FUNCTION); }
306	<YYINITIAL> "int" { addToken(Token.FUNCTION); }
307	<YYINITIAL> "intern" { addToken(Token.FUNCTION); }
308	<YYINITIAL> "isinstance" { addToken(Token.FUNCTION); }
309	<YYINITIAL> "issubclass" { addToken(Token.FUNCTION); }
310	<YYINITIAL> "iter" { addToken(Token.FUNCTION); }
311	<YYINITIAL> "len" { addToken(Token.FUNCTION); }
312	<YYINITIAL> "list" { addToken(Token.FUNCTION); }
313	<YYINITIAL> "locals" { addToken(Token.FUNCTION); }
314	<YYINITIAL> "long" { addToken(Token.FUNCTION); }
315	<YYINITIAL> "map" { addToken(Token.FUNCTION); }
316	<YYINITIAL> "max" { addToken(Token.FUNCTION); }
317	<YYINITIAL> "min" { addToken(Token.FUNCTION); }
318	<YYINITIAL> "object" { addToken(Token.FUNCTION); }
319	<YYINITIAL> "oct" { addToken(Token.FUNCTION); }
320	<YYINITIAL> "open" { addToken(Token.FUNCTION); }
321	<YYINITIAL> "ord" { addToken(Token.FUNCTION); }
322	<YYINITIAL> "pow" { addToken(Token.FUNCTION); }
323	<YYINITIAL> "property" { addToken(Token.FUNCTION); }
324	<YYINITIAL> "range" { addToken(Token.FUNCTION); }
325	<YYINITIAL> "raw_input" { addToken(Token.FUNCTION); }
326	<YYINITIAL> "reduce" { addToken(Token.FUNCTION); }
327	<YYINITIAL> "reload" { addToken(Token.FUNCTION); }
328	<YYINITIAL> "repr" { addToken(Token.FUNCTION); }
329	<YYINITIAL> "round" { addToken(Token.FUNCTION); }
330	<YYINITIAL> "setattr" { addToken(Token.FUNCTION); }
331	<YYINITIAL> "slice" { addToken(Token.FUNCTION); }
332	<YYINITIAL> "staticmethod" { addToken(Token.FUNCTION); }
333	<YYINITIAL> "str" { addToken(Token.FUNCTION); }
334	<YYINITIAL> "sum" { addToken(Token.FUNCTION); }
335	<YYINITIAL> "super" { addToken(Token.FUNCTION); }
336	<YYINITIAL> "tuple" { addToken(Token.FUNCTION); }
337	<YYINITIAL> "type" { addToken(Token.FUNCTION); }
338	<YYINITIAL> "unichr" { addToken(Token.FUNCTION); }
339	<YYINITIAL> "unicode" { addToken(Token.FUNCTION); }
340	<YYINITIAL> "vars" { addToken(Token.FUNCTION); }
341	<YYINITIAL> "xrange" { addToken(Token.FUNCTION); }
342	<YYINITIAL> "zip" { addToken(Token.FUNCTION); }
343
344
345	<YYINITIAL> {
346
347	{LineTerminator} { addNullToken(); return firstToken; }
348
349	{identifier} { addToken(Token.IDENTIFIER); }
350
351	{WhiteSpace}+ { addToken(Token.WHITESPACE); }
352
353	/* String/Character Literals. */
354	{stringliteral} { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
355	{LongStringStart1} { yybegin(LONG_STRING_1); addToken(Token.LITERAL_CHAR); }
356	{LongStringStart2} { yybegin(LONG_STRING_2); addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
357
358	/* Comment Literals. */
359	{LineComment} { addToken(Token.COMMENT_EOL); }
360
361	/* Separators. */
362	"(" { addToken(Token.SEPARATOR); }
363	")" { addToken(Token.SEPARATOR); }
364	"[" { addToken(Token.SEPARATOR); }
365	"]" { addToken(Token.SEPARATOR); }
366	"{" { addToken(Token.SEPARATOR); }
367	"}" { addToken(Token.SEPARATOR); }
368
369	/* Operators. */
370	"=" { addToken(Token.OPERATOR); }
371	"+" { addToken(Token.OPERATOR); }
372	"-" { addToken(Token.OPERATOR); }
373	"*" { addToken(Token.OPERATOR); }
374	"/" { addToken(Token.OPERATOR); }
375	"%" { addToken(Token.OPERATOR); }
376	"**" { addToken(Token.OPERATOR); }
377	"~" { addToken(Token.OPERATOR); }
378	"<" { addToken(Token.OPERATOR); }
379	">" { addToken(Token.OPERATOR); }
380	"<<" { addToken(Token.OPERATOR); }
381	">>" { addToken(Token.OPERATOR); }
382	"==" { addToken(Token.OPERATOR); }
383	"+=" { addToken(Token.OPERATOR); }
384	"-=" { addToken(Token.OPERATOR); }
385	"*=" { addToken(Token.OPERATOR); }
386	"/=" { addToken(Token.OPERATOR); }
387	"%=" { addToken(Token.OPERATOR); }
388	">>=" { addToken(Token.OPERATOR); }
389	"<<=" { addToken(Token.OPERATOR); }
390	"^" { addToken(Token.OPERATOR); }
391	"&" { addToken(Token.OPERATOR); }
392	"&&" { addToken(Token.OPERATOR); }
393	"\|" { addToken(Token.OPERATOR); }
394	"\|\|" { addToken(Token.OPERATOR); }
395	"?" { addToken(Token.OPERATOR); }
396	":" { addToken(Token.OPERATOR); }
397	"," { addToken(Token.OPERATOR); }
398	"!" { addToken(Token.OPERATOR); }
399	"++" { addToken(Token.OPERATOR); }
400	"--" { addToken(Token.OPERATOR); }
401	"." { addToken(Token.OPERATOR); }
402	"," { addToken(Token.OPERATOR); }
403
404	/* Numbers */
405	{longinteger}\|{integer} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
406	{floatnumber}\|{imagnumber} { addToken(Token.LITERAL_NUMBER_FLOAT); }
407	{ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
408
409	/* Other punctuation, we'll highlight it as "identifiers." */
410	"@" { addToken(Token.IDENTIFIER); }
411	";" { addToken(Token.IDENTIFIER); }
412
413	/* Ended with a line not in a string or comment. */
414	<<EOF>> { addNullToken(); return firstToken; }
415
416	/* Catch any other (unhandled) characters and flag them as bad. */
417	. { addToken(Token.ERROR_IDENTIFIER); }
418
419	}
420
421	<LONG_STRING_1> {
422	[^\']+ { addToken(Token.LITERAL_CHAR); }
423	"'''" { yybegin(YYINITIAL); addToken(Token.LITERAL_CHAR); }
424	"'" { addToken(Token.LITERAL_CHAR); }
425	<<EOF>> {
426	if (firstToken==null) {
427	addToken(Token.LITERAL_CHAR);
428	}
429	return firstToken;
430	}
431	}
432
433	<LONG_STRING_2> {
434	[^\"]+ { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
435	\"\"\" { yybegin(YYINITIAL); addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
436	\" { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
437	<<EOF>> {
438	if (firstToken==null) {
439	addToken(Token.LITERAL_STRING_DOUBLE_QUOTE);
440	}
441	return firstToken;
442	}
443	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format