Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

LispTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago
Initial cut an a text edit area for GLI that supports color syntax highlighting
File size: 12.8 KB

Line
1	/*
2	* 11/13/2004
3	*
4	* LispTokenMaker.java - Scanner for the Lisp programming language.
5	*
6	* This library is distributed under a modified BSD license. See the included
7	* RSyntaxTextArea.License.txt file for details.
8	*/
9	package org.fife.ui.rsyntaxtextarea.modes;
10
11	import java.io.*;
12	import javax.swing.text.Segment;
13
14	import org.fife.ui.rsyntaxtextarea.*;
15
16
17	/**
18	* Scanner for the Lisp programming language.<p>
19	*
20	* This implementation was created using
21	* <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
22	* was modified for performance. Memory allocation needs to be almost
23	* completely removed to be competitive with the handwritten lexers (subclasses
24	* of <code>AbstractTokenMaker</code>, so this class has been modified so that
25	* Strings are never allocated (via yytext()), and the scanner never has to
26	* worry about refilling its buffer (needlessly copying chars around).
27	* We can achieve this because RText always scans exactly 1 line of tokens at a
28	* time, and hands the scanner this line as an array of characters (a Segment
29	* really). Since tokens contain pointers to char arrays instead of Strings
30	* holding their contents, there is no need for allocating new memory for
31	* Strings.<p>
32	*
33	* The actual algorithm generated for scanning has, of course, not been
34	* modified.<p>
35	*
36	* If you wish to regenerate this file yourself, keep in mind the following:
37	* <ul>
38	* <li>The generated LispTokenMaker.java</code> file will contain two
39	* definitions of both <code>zzRefill</code> and <code>yyreset</code>.
40	* You should hand-delete the second of each definition (the ones
41	* generated by the lexer), as these generated methods modify the input
42	* buffer, which we'll never have to do.</li>
43	* <li>You should also change the declaration/definition of zzBuffer to NOT
44	* be initialized. This is a needless memory allocation for us since we
45	* will be pointing the array somewhere else anyway.</li>
46	* <li>You should NOT call <code>yylex()</code> on the generated scanner
47	* directly; rather, you should use <code>getTokenList</code> as you would
48	* with any other <code>TokenMaker</code> instance.</li>
49	* </ul>
50	*
51	* @author Robert Futrell
52	* @version 0.5
53	*
54	*/
55	%%
56
57	%public
58	%class LispTokenMaker
59	%extends AbstractJFlexTokenMaker
60	%unicode
61	%type org.fife.ui.rsyntaxtextarea.Token
62
63
64	%{
65
66
67	/**
68	* Constructor. This must be here because JFlex does not generate a
69	* no-parameter constructor.
70	*/
71	public LispTokenMaker() {
72	}
73
74
75	/**
76	* Adds the token specified to the current linked list of tokens.
77	*
78	* @param tokenType The token's type.
79	* @see #addToken(int, int, int)
80	*/
81	private void addHyperlinkToken(int start, int end, int tokenType) {
82	int so = start + offsetShift;
83	addToken(zzBuffer, start,end, tokenType, so, true);
84	}
85
86
87	/**
88	* Adds the token specified to the current linked list of tokens.
89	*
90	* @param tokenType The token's type.
91	*/
92	private void addToken(int tokenType) {
93	addToken(zzStartRead, zzMarkedPos-1, tokenType);
94	}
95
96
97	/**
98	* Adds the token specified to the current linked list of tokens.
99	*
100	* @param tokenType The token's type.
101	* @see #addHyperlinkToken(int, int, int)
102	*/
103	private void addToken(int start, int end, int tokenType) {
104	int so = start + offsetShift;
105	addToken(zzBuffer, start,end, tokenType, so, false);
106	}
107
108
109	/**
110	* Adds the token specified to the current linked list of tokens.
111	*
112	* @param array The character array.
113	* @param start The starting offset in the array.
114	* @param end The ending offset in the array.
115	* @param tokenType The token's type.
116	* @param startOffset The offset in the document at which this token
117	* occurs.
118	* @param hyperlink Whether this token is a hyperlink.
119	*/
120	public void addToken(char[] array, int start, int end, int tokenType,
121	int startOffset, boolean hyperlink) {
122	super.addToken(array, start,end, tokenType, startOffset, hyperlink);
123	zzStartRead = zzMarkedPos;
124	}
125
126
127	/**
128	* Returns the text to place at the beginning and end of a
129	* line to "comment" it in a this programming language.
130	*
131	* @return The start and end strings to add to a line to "comment"
132	* it out.
133	*/
134	public String[] getLineCommentStartAndEnd() {
135	return new String[] { ";", null };
136	}
137
138
139	/**
140	* Returns the first token in the linked list of tokens generated
141	* from <code>text</code>. This method must be implemented by
142	* subclasses so they can correctly implement syntax highlighting.
143	*
144	* @param text The text from which to get tokens.
145	* @param initialTokenType The token type we should start with.
146	* @param startOffset The offset into the document at which
147	* <code>text</code> starts.
148	* @return The first <code>Token</code> in a linked list representing
149	* the syntax highlighted text.
150	*/
151	public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
152
153	resetTokenList();
154	this.offsetShift = -text.offset + startOffset;
155
156	// Start off in the proper state.
157	int state = Token.NULL;
158	switch (initialTokenType) {
159	case Token.COMMENT_MULTILINE:
160	state = MLC;
161	start = text.offset;
162	break;
163	case Token.LITERAL_STRING_DOUBLE_QUOTE:
164	state = STRING;
165	start = text.offset;
166	break;
167	default:
168	state = Token.NULL;
169	}
170
171	s = text;
172	try {
173	yyreset(zzReader);
174	yybegin(state);
175	return yylex();
176	} catch (IOException ioe) {
177	ioe.printStackTrace();
178	return new DefaultToken();
179	}
180
181	}
182
183
184	/**
185	* Refills the input buffer.
186	*
187	* @return <code>true</code> if EOF was reached, otherwise
188	* <code>false</code>.
189	* @exception IOException if any I/O-Error occurs.
190	*/
191	private boolean zzRefill() throws java.io.IOException {
192	return zzCurrentPos>=s.offset+s.count;
193	}
194
195
196	/**
197	* Resets the scanner to read from a new input stream.
198	* Does not close the old reader.
199	*
200	* All internal variables are reset, the old input stream
201	* <b>cannot</b> be reused (internal buffer is discarded and lost).
202	* Lexical state is set to <tt>YY_INITIAL</tt>.
203	*
204	* @param reader the new input stream
205	*/
206	public final void yyreset(java.io.Reader reader) throws java.io.IOException {
207	// 's' has been updated.
208	zzBuffer = s.array;
209	/*
210	* We replaced the line below with the two below it because zzRefill
211	* no longer "refills" the buffer (since the way we do it, it's always
212	* "full" the first time through, since it points to the segment's
213	* array). So, we assign zzEndRead here.
214	*/
215	//zzStartRead = zzEndRead = s.offset;
216	zzStartRead = s.offset;
217	zzEndRead = zzStartRead + s.count - 1;
218	zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
219	zzLexicalState = YYINITIAL;
220	zzReader = reader;
221	zzAtBOL = true;
222	zzAtEOF = false;
223	}
224
225
226	%}
227
228	Letter = [A-Za-z]
229	LetterOrUnderscore = ({Letter}\|"_")
230	NonzeroDigit = [1-9]
231	Digit = ("0"\|{NonzeroDigit})
232	HexDigit = ({Digit}\|[A-Fa-f])
233	OctalDigit = ([0-7])
234	EscapedSourceCharacter = ("u"{HexDigit}{HexDigit}{HexDigit}{HexDigit})
235	NonSeparator = ([^\t\f\r\n\ \{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\\|\^\%\"\']\|"#"\|"\\")
236	IdentifierStart = ({LetterOrUnderscore}\|"$")
237	IdentifierPart = ({IdentifierStart}\|{Digit}\|("\\"{EscapedSourceCharacter}))
238
239	LineTerminator = (\n)
240	WhiteSpace = ([ \t\f])
241
242	MLCBegin = "#\|"
243	MLCEnd = "\|#"
244	LineCommentBegin = ";"
245
246	IntegerHelper1 = (({NonzeroDigit}{Digit}*)\|"0")
247	IntegerHelper2 = ("0"(([xX]{HexDigit}+)\|({OctalDigit}*)))
248	IntegerLiteral = ({IntegerHelper1}[lL]?)
249	HexLiteral = ({IntegerHelper2}[lL]?)
250	FloatHelper1 = ([fFdD]?)
251	FloatHelper2 = ([eE][+-]?{Digit}+{FloatHelper1})
252	FloatLiteral1 = ({Digit}+"."({FloatHelper1}\|{FloatHelper2}\|{Digit}+({FloatHelper1}\|{FloatHelper2})))
253	FloatLiteral2 = ("."{Digit}+({FloatHelper1}\|{FloatHelper2}))
254	FloatLiteral3 = ({Digit}+{FloatHelper2})
255	FloatLiteral = ({FloatLiteral1}\|{FloatLiteral2}\|{FloatLiteral3}\|({Digit}+[fFdD]))
256	ErrorNumberFormat = (({IntegerLiteral}\|{HexLiteral}\|{FloatLiteral}){NonSeparator}+)
257
258	Separator = ([])
259
260	NonAssignmentOperator = ("+"\|"-"\|"<="\|"^"\|"++"\|"<"\|"*"\|">="\|"%"\|"--"\|">"\|"/"\|"!="\|"?"\|">>"\|"!"\|"&"\|"=="\|":"\|">>"\|"~"\|"\|"\|"&&"\|">>>")
261	AssignmentOperator = ("="\|"-="\|"*="\|"/="\|"\|="\|"&="\|"^="\|"+="\|"%="\|"<<="\|">>="\|">>>=")
262	Operator = ({NonAssignmentOperator}\|{AssignmentOperator})
263
264	Identifier = ({IdentifierStart}{IdentifierPart}*)
265	ErrorIdentifier = ({NonSeparator}+)
266
267	URLGenDelim = ([:\/\?#\[\]@])
268	URLSubDelim = ([\!\$&'\*\+,;=])
269	URLUnreserved = ({LetterOrUnderscore}\|{Digit}\|[\-\.\~])
270	URLCharacter = ({URLGenDelim}\|{URLSubDelim}\|{URLUnreserved}\|[%])
271	URLCharacters = ({URLCharacter}*)
272	URLEndCharacter = ([\/\$]\|{Letter}\|{Digit})
273	URL = (((https?\|f(tp\|ile))"://"\|"www.")({URLCharacters}{URLEndCharacter})?)
274
275
276	%state STRING
277	%state MLC
278	%state EOL_COMMENT
279
280	%%
281
282	<YYINITIAL> {
283
284	"defclass" \|
285	"defconstant" \|
286	"defgeneric" \|
287	"define-compiler-macro" \|
288	"define-condition" \|
289	"define-method-combination" \|
290	"define-modify-macro" \|
291	"define-setf-expander" \|
292	"define-symbol-macro" \|
293	"defmacro" \|
294	"defmethod" \|
295	"defpackage" \|
296	"defparameter" \|
297	"defsetf" \|
298	"defstruct" \|
299	"deftype" \|
300	"defun" \|
301	"defvar" { addToken(Token.RESERVED_WORD); }
302
303	"abort" \|
304	"assert" \|
305	"block" \|
306	"break" \|
307	"case" \|
308	"catch" \|
309	"ccase" \|
310	"cerror" \|
311	"cond" \|
312	"ctypecase" \|
313	"declaim" \|
314	"declare" \|
315	"do" \|
316	"do*" \|
317	"do-all-symbols" \|
318	"do-external-symbols" \|
319	"do-symbols" \|
320	"dolist" \|
321	"dotimes" \|
322	"ecase" \|
323	"error" \|
324	"etypecase" \|
325	"eval-when" \|
326	"flet" \|
327	"handler-bind" \|
328	"handler-case" \|
329	"if" \|
330	"ignore-errors" \|
331	"in-package" \|
332	"labels" \|
333	"lambda" \|
334	"let" \|
335	"let*" \|
336	"locally" \|
337	"loop" \|
338	"macrolet" \|
339	"multiple-value-bind" \|
340	"proclaim" \|
341	"prog" \|
342	"prog*" \|
343	"prog1" \|
344	"prog2" \|
345	"progn" \|
346	"progv" \|
347	"provide" \|
348	"require" \|
349	"restart-bind" \|
350	"restart-case" \|
351	"restart-name" \|
352	"return" \|
353	"return-from" \|
354	"signal" \|
355	"symbol-macrolet" \|
356	"tagbody" \|
357	"the" \|
358	"throw" \|
359	"typecase" \|
360	"unless" \|
361	"unwind-protect" \|
362	"when" \|
363	"with-accessors" \|
364	"with-compilation-unit" \|
365	"with-condition-restarts" \|
366	"with-hash-table-iterator" \|
367	"with-input-from-string" \|
368	"with-open-file" \|
369	"with-open-stream" \|
370	"with-output-to-string" \|
371	"with-package-iterator" \|
372	"with-simple-restart" \|
373	"with-slots" \|
374	"with-standard-io-syntax" { addToken(Token.RESERVED_WORD); }
375
376	{LineTerminator} { addNullToken(); return firstToken; }
377	{Identifier} { addToken(Token.IDENTIFIER); }
378	{WhiteSpace}+ { addToken(Token.WHITESPACE); }
379	[\"] { start = zzMarkedPos-1; yybegin(STRING); }
380	{MLCBegin} { start = zzMarkedPos-2; yybegin(MLC); }
381	{LineCommentBegin} { start = zzMarkedPos-1; yybegin(EOL_COMMENT); }
382	{Separator} { addToken(Token.SEPARATOR); }
383	{Operator} { addToken(Token.OPERATOR); }
384
385	/* Numbers */
386	{IntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
387	{HexLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
388	{FloatLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); }
389	{ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
390
391	{ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); }
392
393	/* Ended with a line not in a string or comment. */
394	<<EOF>> { addNullToken(); return firstToken; }
395
396	/* Catch any other (unhandled) characters and flag them as bad. */
397	. { addToken(Token.ERROR_IDENTIFIER); }
398
399	}
400
401
402	<STRING> {
403	[^\n\\\"]+ {}
404	\n { addToken(start,zzStartRead-1, Token.LITERAL_STRING_DOUBLE_QUOTE); return firstToken; }
405	\\.? { /* Skip escaped chars. */ }
406	\" { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.LITERAL_STRING_DOUBLE_QUOTE); }
407	<<EOF>> { addToken(start,zzStartRead-1, Token.LITERAL_STRING_DOUBLE_QUOTE); return firstToken; }
408	}
409
410
411	<MLC> {
412
413	[^hwf\n\\|]+ {}
414	{URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
415	[hwf] {}
416
417	\n { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
418	{MLCEnd} { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
419	\\| {}
420	<<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
421
422	}
423
424
425	<EOL_COMMENT> {
426	[^hwf\n]+ {}
427	{URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
428	[hwf] {}
429	\n { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addNullToken(); return firstToken; }
430	<<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addNullToken(); return firstToken; }
431
432	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format