Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

CSharpTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago
Initial cut an a text edit area for GLI that supports color syntax highlighting
File size: 17.8 KB

Line
1	/*
2	* 11/13/2004
3	*
4	* CSharpTokenMaker.java - An object that can take a chunk of text and return
5	* a linked list of tokens representing it in the C# programming language.
6	*
7	* This library is distributed under a modified BSD license. See the included
8	* RSyntaxTextArea.License.txt file for details.
9	*/
10	package org.fife.ui.rsyntaxtextarea.modes;
11
12	import java.io.*;
13	import javax.swing.text.Segment;
14
15	import org.fife.ui.rsyntaxtextarea.*;
16
17
18	/**
19	* A lexer for the C# programming language.
20	*
21	* This implementation was created using
22	* <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
23	* was modified for performance. Memory allocation needs to be almost
24	* completely removed to be competitive with the handwritten lexers (subclasses
25	* of <code>AbstractTokenMaker</code>, so this class has been modified so that
26	* Strings are never allocated (via yytext()), and the scanner never has to
27	* worry about refilling its buffer (needlessly copying chars around).
28	* We can achieve this because RText always scans exactly 1 line of tokens at a
29	* time, and hands the scanner this line as an array of characters (a Segment
30	* really). Since tokens contain pointers to char arrays instead of Strings
31	* holding their contents, there is no need for allocating new memory for
32	* Strings.<p>
33	*
34	* The actual algorithm generated for scanning has, of course, not been
35	* modified.<p>
36	*
37	* If you wish to regenerate this file yourself, keep in mind the following:
38	* <ul>
39	* <li>The generated CSharpTokenMaker.java</code> file will contain two
40	* definitions of both <code>zzRefill</code> and <code>yyreset</code>.
41	* You should hand-delete the second of each definition (the ones
42	* generated by the lexer), as these generated methods modify the input
43	* buffer, which we'll never have to do.</li>
44	* <li>You should also change the declaration/definition of zzBuffer to NOT
45	* be initialized. This is a needless memory allocation for us since we
46	* will be pointing the array somewhere else anyway.</li>
47	* <li>You should NOT call <code>yylex()</code> on the generated scanner
48	* directly; rather, you should use <code>getTokenList</code> as you would
49	* with any other <code>TokenMaker</code> instance.</li>
50	* </ul>
51	*
52	* @author Robert Futrell
53	* @version 0.5
54	*
55	*/
56	%%
57
58	%public
59	%class CSharpTokenMaker
60	%extends AbstractJFlexCTokenMaker
61	%unicode
62	%type org.fife.ui.rsyntaxtextarea.Token
63
64
65	%{
66
67
68	/**
69	* Constructor. This must be here because JFlex does not generate a
70	* no-parameter constructor.
71	*/
72	public CSharpTokenMaker() {
73	super();
74	}
75
76
77	/**
78	* Adds the token specified to the current linked list of tokens.
79	*
80	* @param tokenType The token's type.
81	* @see #addToken(int, int, int)
82	*/
83	private void addHyperlinkToken(int start, int end, int tokenType) {
84	int so = start + offsetShift;
85	addToken(zzBuffer, start,end, tokenType, so, true);
86	}
87
88
89	/**
90	* Adds the token specified to the current linked list of tokens.
91	*
92	* @param tokenType The token's type.
93	*/
94	private void addToken(int tokenType) {
95	addToken(zzStartRead, zzMarkedPos-1, tokenType);
96	}
97
98
99	/**
100	* Adds the token specified to the current linked list of tokens.
101	*
102	* @param tokenType The token's type.
103	*/
104	private void addToken(int start, int end, int tokenType) {
105	int so = start + offsetShift;
106	addToken(zzBuffer, start,end, tokenType, so);
107	}
108
109
110	/**
111	* Adds the token specified to the current linked list of tokens.
112	*
113	* @param array The character array.
114	* @param start The starting offset in the array.
115	* @param end The ending offset in the array.
116	* @param tokenType The token's type.
117	* @param startOffset The offset in the document at which this token
118	* occurs.
119	*/
120	public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
121	super.addToken(array, start,end, tokenType, startOffset);
122	zzStartRead = zzMarkedPos;
123	}
124
125
126	/**
127	* Returns the text to place at the beginning and end of a
128	* line to "comment" it in a this programming language.
129	*
130	* @return The start and end strings to add to a line to "comment"
131	* it out.
132	*/
133	public String[] getLineCommentStartAndEnd() {
134	return new String[] { "//", null };
135	}
136
137
138	/**
139	* Returns the first token in the linked list of tokens generated
140	* from <code>text</code>. This method must be implemented by
141	* subclasses so they can correctly implement syntax highlighting.
142	*
143	* @param text The text from which to get tokens.
144	* @param initialTokenType The token type we should start with.
145	* @param startOffset The offset into the document at which
146	* <code>text</code> starts.
147	* @return The first <code>Token</code> in a linked list representing
148	* the syntax highlighted text.
149	*/
150	public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
151
152	resetTokenList();
153	this.offsetShift = -text.offset + startOffset;
154
155	// Start off in the proper state.
156	int state = Token.NULL;
157	switch (initialTokenType) {
158	case Token.LITERAL_STRING_DOUBLE_QUOTE:
159	state = VERBATIMSTRING;
160	start = text.offset;
161	break;
162	case Token.COMMENT_MULTILINE:
163	state = DELIMITEDCOMMENT;
164	start = text.offset;
165	break;
166	default:
167	state = Token.NULL;
168	}
169
170	s = text;
171	try {
172	yyreset(zzReader);
173	yybegin(state);
174	return yylex();
175	} catch (IOException ioe) {
176	ioe.printStackTrace();
177	return new DefaultToken();
178	}
179
180	}
181
182
183	/**
184	* Refills the input buffer.
185	*
186	* @return <code>true</code> if EOF was reached, otherwise
187	* <code>false</code>.
188	* @exception IOException if any I/O-Error occurs.
189	*/
190	private boolean zzRefill() throws java.io.IOException {
191	return zzCurrentPos>=s.offset+s.count;
192	}
193
194
195	/**
196	* Resets the scanner to read from a new input stream.
197	* Does not close the old reader.
198	*
199	* All internal variables are reset, the old input stream
200	* <b>cannot</b> be reused (internal buffer is discarded and lost).
201	* Lexical state is set to <tt>YY_INITIAL</tt>.
202	*
203	* @param reader the new input stream
204	*/
205	public final void yyreset(java.io.Reader reader) throws java.io.IOException {
206	// 's' has been updated.
207	zzBuffer = s.array;
208	/*
209	* We replaced the line below with the two below it because zzRefill
210	* no longer "refills" the buffer (since the way we do it, it's always
211	* "full" the first time through, since it points to the segment's
212	* array). So, we assign zzEndRead here.
213	*/
214	//zzStartRead = zzEndRead = s.offset;
215	zzStartRead = s.offset;
216	zzEndRead = zzStartRead + s.count - 1;
217	zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
218	zzLexicalState = YYINITIAL;
219	zzReader = reader;
220	zzAtBOL = true;
221	zzAtEOF = false;
222	}
223
224
225	%}
226
227	/* C1.1 - Line terminators. */
228	NewlineCharacter = ([\n])
229
230	/* C.1.2 - Whitespace. */
231	Whitespace = ([\t ]+)
232
233	/* C.1.3 - Comments */
234	InputCharacter = ([^\n])
235	InputCharacters = ({InputCharacter}+)
236	DocumentationCommentStart = ("///")
237	SingleLineComment = ("//"([^/]{InputCharacters}?)?)
238	DelimitedCommentStart = ("/*")
239	DelimitedCommentEnd = ("*/")
240
241	/* C.1.5 - Unicode character escape sequences. */
242	UnicodeEscape1 = ("\\u"{HexDigit}{HexDigit}{HexDigit}{HexDigit})
243	UnicodeEscape2 = ("\\U"{HexDigit}{HexDigit}{HexDigit}{HexDigit}{HexDigit}{HexDigit}{HexDigit}{HexDigit})
244	UnicodeEscapeSequence = ({UnicodeEscape1}\|{UnicodeEscape2})
245
246	/* C1.6 - Identifiers. */
247	LetterCharacter = ([A-Za-z]) /* Not accurate - many more Unicode letters, Unicode escapes */
248	/*
249	CombiningCharacter = ()
250	*/
251	DecimalDigitCharacter = ([0-9])
252	ConnectingCharacter = ([_\-])
253	/*
254	FormattingCharacter = ()
255	*/
256	/*
257	IdentifierPartCharacter = ({LetterCharacter}\|{DecimalDigitCharacter}\|{ConnectingCharacter}\|{CombiningCharacter}\|{FormattingCharacter})
258	*/
259	IdentifierPartCharacter = ({LetterCharacter}\|{DecimalDigitCharacter}\|{ConnectingCharacter})
260	IdentifierPartCharacters = ({IdentifierPartCharacter}+)
261	IdentifierStartCharacter = ({LetterCharacter}\|[_])
262	IdentifierOrKeyword = ({IdentifierStartCharacter}{IdentifierPartCharacters}?)
263	Identifier = ("@"?{IdentifierOrKeyword})
264	/* NOTE: The two below aren't from the C# spec, but we add them so we can */
265	/* highlight errors. */
266	NonSeparator = (([^\t\f\r\n\ \{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\\|\^\%\"\']\|"#"\|"\\"))
267	ErrorIdentifier = ({NonSeparator}+)
268
269	/* C1.8 - Literals. */
270	BooleanLiteral = ("true"\|"false")
271	DecimalDigit = ([0-9])
272	DecimalDigits = ({DecimalDigit}+)
273	IntegerTypeSuffix = (([uU][lL]?)\|([lL][uU]?))
274	DecimalIntegerLiteral = ({DecimalDigits}{IntegerTypeSuffix}?)
275	HexDigit = ([0-9A-Fa-f])
276	HexDigits = ({HexDigit}+)
277	HexadecimalIntegerLiteral = ("0"[xX]{HexDigits}{IntegerTypeSuffix}?)
278	Sign = ([+\-])
279	ExponentPart = ([eE]{Sign}?{DecimalDigits})
280	RealTypeSuffix = ([fFdDmM])
281	RealHelper1 = ({DecimalDigits}"."{DecimalDigits}{ExponentPart}?{RealTypeSuffix}?)
282	RealHelper2 = ("."{DecimalDigits}{ExponentPart}?{RealTypeSuffix}?)
283	RealHelper3 = ({DecimalDigits}{ExponentPart}{RealTypeSuffix}?)
284	RealHelper4 = ({DecimalDigits}{RealTypeSuffix})
285	RealLiteral = ({RealHelper1}\|{RealHelper2}\|{RealHelper3}\|{RealHelper4})
286	ErrorNumberFormat = (({DecimalIntegerLiteral}\|{HexadecimalIntegerLiteral}\|{RealLiteral}){NonSeparator}+)
287	SingleCharacter = ([^\'\\\n])
288	SimpleEscapeSequence = ("\\"[\'\"\\0abfnrtv])
289	HexadecimalEscapeSequence = ("\\x"{HexDigit}{HexDigit}?{HexDigit}?{HexDigit}?)
290	Character = ({SingleCharacter}\|{SimpleEscapeSequence}\|{HexadecimalEscapeSequence}\|{UnicodeEscapeSequence})
291	UnclosedCharacterLiteral = ("'"{Character})
292	CharacterLiteral = ({UnclosedCharacterLiteral}"'")
293	ErrorUnclosedCharacterLiteral = ("'"[^\'\n]*)
294	ErrorCharacterLiteral = ("''"\|{ErrorUnclosedCharacterLiteral}[\'])
295	QuoteEscapeSequence = ("\"\"")
296	SingleVerbatimStringLiteralCharacter = ([^\"])
297	VerbatimStringLiteralStart = ("@\"")
298	SingleRegularStringLiteralCharacter = ([^\"\\\n])
299	RegularStringLiteralCharacter = ({SingleRegularStringLiteralCharacter}\|{SimpleEscapeSequence}\|{HexadecimalEscapeSequence}\|{UnicodeEscapeSequence})
300	RegularStringLiteralCharacters = ({RegularStringLiteralCharacter}+)
301	RegularStringLiteral = ([\"]{RegularStringLiteralCharacters}?[\"])
302	UnclosedRegularStringLiteral = ([\"]([\\].\|[^\\\"])*[^\"]?)
303	ErrorRegularStringLiteral = ({UnclosedRegularStringLiteral}[\"])
304
305	/* C.1.9 - Operators and Punctuators. */
306	OOPHelper1 = (":")
307	OOPHelper2 = ("+"\|"-"\|"*"\|"/"\|"%"\|"&"\|"\|"\|"^"\|"!"\|"~")
308	OOPHelper3 = ("="\|"<"\|">"\|"?"\|"++"\|"--"\|"&&"\|"\|\|"\|"<<"\|">>")
309	OOPHelper4 = ("=="\|"!="\|"<="\|">="\|"+="\|"-="\|"*="\|"/="\|"%="\|"&=")
310	OOPHelper5 = ("\|="\|"^="\|"<<="\|">>="\|"->")
311	OperatorOrPunctuator = ({OOPHelper1}\|{OOPHelper2}\|{OOPHelper3}\|{OOPHelper4}\|{OOPHelper5})
312	/* NOTE: We distinguish between operators and separators (punctuators), but */
313	/* the C# spec doesn't, so the stuff below isn't in the spec. */
314	Separator = ([\{\}\[\]])
315	Separator2 = ([,;])
316
317	/* C.1.10 - Pre-processing Directives. */
318	/* NOTE: We don't do ALL of the PP stuff here as it's unnecessary */
319	/* for us to know the difference between declarations, diagnostics, */
320	/* regions, etc. */
321	ConditionalSymbol = ({IdentifierOrKeyword}) /* Not correct - excludes "true" and "false". */
322	PPNewLine = ({Whitespace}?{SingleLineComment}?{NewlineCharacter})
323	PPPrimaryExpression = ({IdentifierOrKeyword}\|({Whitespace}?{PPExpression}{Whitespace}?))
324	PPUnaryExpression = ({PPPrimaryExpression}\|("!"{Whitespace}?{PPUnaryExpression}))
325	PPEqualityExpression = ({PPUnaryExpression}\|({Whitespace}?"=="{Whitespace}?{PPUnaryExpression})\|({Whitespace}?"!="{Whitespace}?{PPUnaryExpression}))
326	PPAndExpression = ({PPEqualityExpression}\|({Whitespace}?"&&"{Whitespace}?{PPEqualityExpression}))
327	PPOrExpression = ({PPAndExpression}\|({Whitespace}?"\|\|"{Whitespace}?{PPAndExpression}))
328	PPExpression = ({Whitespace}?{PPOrExpression}{Whitespace}?)
329	PPWord = ("define"\|"undef"\|"if"\|"elif"\|"else"\|"endif"\|"line"\|"error"\|"warning"\|"region"\|"endregion")
330	PPDirective = ({Whitespace}?"#"{Whitespace}?{PPWord}{InputCharacter}*)
331
332	/* URL matching, for comments (not in C# spec) */
333	URLGenDelim = ([:\/\?#\[\]@])
334	URLSubDelim = ([\!\$&'\*\+,;=])
335	URLUnreserved = ([A-Za-z_]\|{DecimalDigitCharacter}\|[\-\.\~])
336	URLCharacter = ({URLGenDelim}\|{URLSubDelim}\|{URLUnreserved}\|[%])
337	URLCharacters = ({URLCharacter}*)
338	URLEndCharacter = ([\/\$]\|[A-Za-z0-9])
339	URL = (((https?\|f(tp\|ile))"://"\|"www.")({URLCharacters}{URLEndCharacter})?)
340
341
342	%state DELIMITEDCOMMENT
343	%state DOCUMENTCOMMENT
344	%state VERBATIMSTRING
345
346	%%
347
348	<YYINITIAL> {
349
350	/* Keywords */
351	"abstract" \|
352	"as" \|
353	"base" \|
354	"break" \|
355	"case" \|
356	"catch" \|
357	"checked" \|
358	"class" \|
359	"const" \|
360	"continue" \|
361	"decimal" \|
362	"default" \|
363	"delegate" \|
364	"do" \|
365	"else" \|
366	"enum" \|
367	"event" \|
368	"explicit" \|
369	"extern" \|
370	"finally" \|
371	"fixed" \|
372	"for" \|
373	"foreach" \|
374	"goto" \|
375	"if" \|
376	"implicit" \|
377	"in" \|
378	"interface" \|
379	"internal" \|
380	"is" \|
381	"lock" \|
382	"namespace" \|
383	"new" \|
384	"null" \|
385	"object" \|
386	"operator" \|
387	"out" \|
388	"override" \|
389	"params" \|
390	"private" \|
391	"protected" \|
392	"public" \|
393	"readonly" \|
394	"ref" \|
395	"return" \|
396	"sealed" \|
397	"sizeof" \|
398	"stackalloc" \|
399	"static" \|
400	"string" \|
401	"struct" \|
402	"switch" \|
403	"this" \|
404	"throw" \|
405	"try" \|
406	"typeof" \|
407	"unchecked" \|
408	"unsafe" \|
409	"using" \|
410	"virtual" \|
411	"void" \|
412	"volatile" \|
413	"while" { addToken(Token.RESERVED_WORD); }
414
415	/* Data types. */
416	"bool" \|
417	"byte" \|
418	"char" \|
419	"double" \|
420	"float" \|
421	"int" \|
422	"long" \|
423	"object" \|
424	"sbyte" \|
425	"short" \|
426	"string" \|
427	"uint" \|
428	"ulong" \|
429	"ushort" { addToken(Token.DATA_TYPE); }
430
431
432	{NewlineCharacter} { addNullToken(); return firstToken; }
433
434	{BooleanLiteral} { addToken(Token.LITERAL_BOOLEAN); }
435
436	{Identifier} { addToken(Token.IDENTIFIER); }
437
438	{Whitespace} { addToken(Token.WHITESPACE); }
439
440	/* String/Character Literals. */
441	{CharacterLiteral} { addToken(Token.LITERAL_CHAR); }
442	{UnclosedCharacterLiteral} { addToken(Token.ERROR_CHAR); /addNullToken(); return firstToken;/ }
443	{ErrorUnclosedCharacterLiteral} { addToken(Token.ERROR_CHAR); addNullToken(); return firstToken; }
444	{ErrorCharacterLiteral} { addToken(Token.ERROR_CHAR); }
445	{VerbatimStringLiteralStart} { start = zzMarkedPos-2; yybegin(VERBATIMSTRING); }
446	{RegularStringLiteral} { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
447	{UnclosedRegularStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); addNullToken(); return firstToken; }
448	{ErrorRegularStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); }
449
450	/* Comments. */
451	{DelimitedCommentStart} { start = zzMarkedPos-2; yybegin(DELIMITEDCOMMENT); }
452	{DocumentationCommentStart} { start = zzMarkedPos-3; yybegin(DOCUMENTCOMMENT); }
453	{SingleLineComment} { addToken(Token.COMMENT_EOL); addNullToken(); return firstToken; }
454
455	/* Separators. */
456	{Separator} { addToken(Token.SEPARATOR); }
457	{Separator2} { addToken(Token.IDENTIFIER); }
458
459	/* Operators. */
460	{OperatorOrPunctuator} { addToken(Token.OPERATOR); }
461
462	/* Numbers */
463	{DecimalIntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
464	{HexadecimalIntegerLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
465	{RealLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); }
466	{ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
467
468	/* Preprocessor directives. */
469	{PPDirective} { addToken(Token.PREPROCESSOR); }
470
471	/* Pretty-much anything else. */
472	{ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); }
473
474	/* Ended with a line not in a string or comment. */
475	<<EOF>> { addNullToken(); return firstToken; }
476
477	/* Catch any other (unhandled) characters and flag them as bad. */
478	. { addToken(Token.ERROR_IDENTIFIER); }
479
480	}
481
482
483	<DELIMITEDCOMMENT> {
484
485	[^hwf\n\*]+ {}
486	{URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
487	[hwf] {}
488	\n { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
489	{DelimitedCommentEnd} { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
490	\* {}
491	<<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
492
493	}
494
495
496	<DOCUMENTCOMMENT> {
497
498	[^hwf\<\n]* {}
499	{URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_DOCUMENTATION); start = zzMarkedPos; }
500	[hwf] {}
501	\n { addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addNullToken(); return firstToken; }
502	"<"[^\>]*">" { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addToken(temp,zzMarkedPos-1, Token.PREPROCESSOR); start = zzMarkedPos; }
503	"<" { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addToken(temp,zzEndRead, Token.PREPROCESSOR); addNullToken(); return firstToken; }
504	<<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addNullToken(); return firstToken; }
505
506	}
507
508
509	<VERBATIMSTRING> {
510
511	[^\"\n]* {}
512	{QuoteEscapeSequence} {}
513	\" { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.LITERAL_STRING_DOUBLE_QUOTE); }
514	\n { addToken(start,zzStartRead-1, Token.LITERAL_STRING_DOUBLE_QUOTE); return firstToken; }
515	<<EOF>> { addToken(start,zzStartRead-1, Token.LITERAL_STRING_DOUBLE_QUOTE); return firstToken; }
516
517	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format