source: other-projects/rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/CSharpTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 17.8 KB
Line 
1/*
2 * 11/13/2004
3 *
4 * CSharpTokenMaker.java - An object that can take a chunk of text and return
5 * a linked list of tokens representing it in the C# programming language.
6 *
7 * This library is distributed under a modified BSD license. See the included
8 * RSyntaxTextArea.License.txt file for details.
9 */
10package org.fife.ui.rsyntaxtextarea.modes;
11
12import java.io.*;
13import javax.swing.text.Segment;
14
15import org.fife.ui.rsyntaxtextarea.*;
16
17
18/**
19 * A lexer for the C# programming language.
20 *
21 * This implementation was created using
22 * <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
23 * was modified for performance. Memory allocation needs to be almost
24 * completely removed to be competitive with the handwritten lexers (subclasses
25 * of <code>AbstractTokenMaker</code>, so this class has been modified so that
26 * Strings are never allocated (via yytext()), and the scanner never has to
27 * worry about refilling its buffer (needlessly copying chars around).
28 * We can achieve this because RText always scans exactly 1 line of tokens at a
29 * time, and hands the scanner this line as an array of characters (a Segment
30 * really). Since tokens contain pointers to char arrays instead of Strings
31 * holding their contents, there is no need for allocating new memory for
32 * Strings.<p>
33 *
34 * The actual algorithm generated for scanning has, of course, not been
35 * modified.<p>
36 *
37 * If you wish to regenerate this file yourself, keep in mind the following:
38 * <ul>
39 * <li>The generated CSharpTokenMaker.java</code> file will contain two
40 * definitions of both <code>zzRefill</code> and <code>yyreset</code>.
41 * You should hand-delete the second of each definition (the ones
42 * generated by the lexer), as these generated methods modify the input
43 * buffer, which we'll never have to do.</li>
44 * <li>You should also change the declaration/definition of zzBuffer to NOT
45 * be initialized. This is a needless memory allocation for us since we
46 * will be pointing the array somewhere else anyway.</li>
47 * <li>You should NOT call <code>yylex()</code> on the generated scanner
48 * directly; rather, you should use <code>getTokenList</code> as you would
49 * with any other <code>TokenMaker</code> instance.</li>
50 * </ul>
51 *
52 * @author Robert Futrell
53 * @version 0.5
54 *
55 */
56%%
57
58%public
59%class CSharpTokenMaker
60%extends AbstractJFlexCTokenMaker
61%unicode
62%type org.fife.ui.rsyntaxtextarea.Token
63
64
65%{
66
67
68 /**
69 * Constructor. This must be here because JFlex does not generate a
70 * no-parameter constructor.
71 */
72 public CSharpTokenMaker() {
73 super();
74 }
75
76
77 /**
78 * Adds the token specified to the current linked list of tokens.
79 *
80 * @param tokenType The token's type.
81 * @see #addToken(int, int, int)
82 */
83 private void addHyperlinkToken(int start, int end, int tokenType) {
84 int so = start + offsetShift;
85 addToken(zzBuffer, start,end, tokenType, so, true);
86 }
87
88
89 /**
90 * Adds the token specified to the current linked list of tokens.
91 *
92 * @param tokenType The token's type.
93 */
94 private void addToken(int tokenType) {
95 addToken(zzStartRead, zzMarkedPos-1, tokenType);
96 }
97
98
99 /**
100 * Adds the token specified to the current linked list of tokens.
101 *
102 * @param tokenType The token's type.
103 */
104 private void addToken(int start, int end, int tokenType) {
105 int so = start + offsetShift;
106 addToken(zzBuffer, start,end, tokenType, so);
107 }
108
109
110 /**
111 * Adds the token specified to the current linked list of tokens.
112 *
113 * @param array The character array.
114 * @param start The starting offset in the array.
115 * @param end The ending offset in the array.
116 * @param tokenType The token's type.
117 * @param startOffset The offset in the document at which this token
118 * occurs.
119 */
120 public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
121 super.addToken(array, start,end, tokenType, startOffset);
122 zzStartRead = zzMarkedPos;
123 }
124
125
126 /**
127 * Returns the text to place at the beginning and end of a
128 * line to "comment" it in a this programming language.
129 *
130 * @return The start and end strings to add to a line to "comment"
131 * it out.
132 */
133 public String[] getLineCommentStartAndEnd() {
134 return new String[] { "//", null };
135 }
136
137
138 /**
139 * Returns the first token in the linked list of tokens generated
140 * from <code>text</code>. This method must be implemented by
141 * subclasses so they can correctly implement syntax highlighting.
142 *
143 * @param text The text from which to get tokens.
144 * @param initialTokenType The token type we should start with.
145 * @param startOffset The offset into the document at which
146 * <code>text</code> starts.
147 * @return The first <code>Token</code> in a linked list representing
148 * the syntax highlighted text.
149 */
150 public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
151
152 resetTokenList();
153 this.offsetShift = -text.offset + startOffset;
154
155 // Start off in the proper state.
156 int state = Token.NULL;
157 switch (initialTokenType) {
158 case Token.LITERAL_STRING_DOUBLE_QUOTE:
159 state = VERBATIMSTRING;
160 start = text.offset;
161 break;
162 case Token.COMMENT_MULTILINE:
163 state = DELIMITEDCOMMENT;
164 start = text.offset;
165 break;
166 default:
167 state = Token.NULL;
168 }
169
170 s = text;
171 try {
172 yyreset(zzReader);
173 yybegin(state);
174 return yylex();
175 } catch (IOException ioe) {
176 ioe.printStackTrace();
177 return new DefaultToken();
178 }
179
180 }
181
182
183 /**
184 * Refills the input buffer.
185 *
186 * @return <code>true</code> if EOF was reached, otherwise
187 * <code>false</code>.
188 * @exception IOException if any I/O-Error occurs.
189 */
190 private boolean zzRefill() throws java.io.IOException {
191 return zzCurrentPos>=s.offset+s.count;
192 }
193
194
195 /**
196 * Resets the scanner to read from a new input stream.
197 * Does not close the old reader.
198 *
199 * All internal variables are reset, the old input stream
200 * <b>cannot</b> be reused (internal buffer is discarded and lost).
201 * Lexical state is set to <tt>YY_INITIAL</tt>.
202 *
203 * @param reader the new input stream
204 */
205 public final void yyreset(java.io.Reader reader) throws java.io.IOException {
206 // 's' has been updated.
207 zzBuffer = s.array;
208 /*
209 * We replaced the line below with the two below it because zzRefill
210 * no longer "refills" the buffer (since the way we do it, it's always
211 * "full" the first time through, since it points to the segment's
212 * array). So, we assign zzEndRead here.
213 */
214 //zzStartRead = zzEndRead = s.offset;
215 zzStartRead = s.offset;
216 zzEndRead = zzStartRead + s.count - 1;
217 zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
218 zzLexicalState = YYINITIAL;
219 zzReader = reader;
220 zzAtBOL = true;
221 zzAtEOF = false;
222 }
223
224
225%}
226
227/* C1.1 - Line terminators. */
228NewlineCharacter = ([\n])
229
230/* C.1.2 - Whitespace. */
231Whitespace = ([\t ]+)
232
233/* C.1.3 - Comments */
234InputCharacter = ([^\n])
235InputCharacters = ({InputCharacter}+)
236DocumentationCommentStart = ("///")
237SingleLineComment = ("//"([^/]{InputCharacters}?)?)
238DelimitedCommentStart = ("/*")
239DelimitedCommentEnd = ("*/")
240
241/* C.1.5 - Unicode character escape sequences. */
242UnicodeEscape1 = ("\\u"{HexDigit}{HexDigit}{HexDigit}{HexDigit})
243UnicodeEscape2 = ("\\U"{HexDigit}{HexDigit}{HexDigit}{HexDigit}{HexDigit}{HexDigit}{HexDigit}{HexDigit})
244UnicodeEscapeSequence = ({UnicodeEscape1}|{UnicodeEscape2})
245
246/* C1.6 - Identifiers. */
247LetterCharacter = ([A-Za-z]) /* Not accurate - many more Unicode letters, Unicode escapes */
248/*
249CombiningCharacter = ()
250*/
251DecimalDigitCharacter = ([0-9])
252ConnectingCharacter = ([_\-])
253/*
254FormattingCharacter = ()
255*/
256/*
257IdentifierPartCharacter = ({LetterCharacter}|{DecimalDigitCharacter}|{ConnectingCharacter}|{CombiningCharacter}|{FormattingCharacter})
258*/
259IdentifierPartCharacter = ({LetterCharacter}|{DecimalDigitCharacter}|{ConnectingCharacter})
260IdentifierPartCharacters = ({IdentifierPartCharacter}+)
261IdentifierStartCharacter = ({LetterCharacter}|[_])
262IdentifierOrKeyword = ({IdentifierStartCharacter}{IdentifierPartCharacters}?)
263Identifier = ("@"?{IdentifierOrKeyword})
264/* NOTE: The two below aren't from the C# spec, but we add them so we can */
265/* highlight errors. */
266NonSeparator = (([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#"|"\\"))
267ErrorIdentifier = ({NonSeparator}+)
268
269/* C1.8 - Literals. */
270BooleanLiteral = ("true"|"false")
271DecimalDigit = ([0-9])
272DecimalDigits = ({DecimalDigit}+)
273IntegerTypeSuffix = (([uU][lL]?)|([lL][uU]?))
274DecimalIntegerLiteral = ({DecimalDigits}{IntegerTypeSuffix}?)
275HexDigit = ([0-9A-Fa-f])
276HexDigits = ({HexDigit}+)
277HexadecimalIntegerLiteral = ("0"[xX]{HexDigits}{IntegerTypeSuffix}?)
278Sign = ([+\-])
279ExponentPart = ([eE]{Sign}?{DecimalDigits})
280RealTypeSuffix = ([fFdDmM])
281RealHelper1 = ({DecimalDigits}"."{DecimalDigits}{ExponentPart}?{RealTypeSuffix}?)
282RealHelper2 = ("."{DecimalDigits}{ExponentPart}?{RealTypeSuffix}?)
283RealHelper3 = ({DecimalDigits}{ExponentPart}{RealTypeSuffix}?)
284RealHelper4 = ({DecimalDigits}{RealTypeSuffix})
285RealLiteral = ({RealHelper1}|{RealHelper2}|{RealHelper3}|{RealHelper4})
286ErrorNumberFormat = (({DecimalIntegerLiteral}|{HexadecimalIntegerLiteral}|{RealLiteral}){NonSeparator}+)
287SingleCharacter = ([^\'\\\n])
288SimpleEscapeSequence = ("\\"[\'\"\\0abfnrtv])
289HexadecimalEscapeSequence = ("\\x"{HexDigit}{HexDigit}?{HexDigit}?{HexDigit}?)
290Character = ({SingleCharacter}|{SimpleEscapeSequence}|{HexadecimalEscapeSequence}|{UnicodeEscapeSequence})
291UnclosedCharacterLiteral = ("'"{Character})
292CharacterLiteral = ({UnclosedCharacterLiteral}"'")
293ErrorUnclosedCharacterLiteral = ("'"[^\'\n]*)
294ErrorCharacterLiteral = ("''"|{ErrorUnclosedCharacterLiteral}[\'])
295QuoteEscapeSequence = ("\"\"")
296SingleVerbatimStringLiteralCharacter = ([^\"])
297VerbatimStringLiteralStart = ("@\"")
298SingleRegularStringLiteralCharacter = ([^\"\\\n])
299RegularStringLiteralCharacter = ({SingleRegularStringLiteralCharacter}|{SimpleEscapeSequence}|{HexadecimalEscapeSequence}|{UnicodeEscapeSequence})
300RegularStringLiteralCharacters = ({RegularStringLiteralCharacter}+)
301RegularStringLiteral = ([\"]{RegularStringLiteralCharacters}?[\"])
302UnclosedRegularStringLiteral = ([\"]([\\].|[^\\\"])*[^\"]?)
303ErrorRegularStringLiteral = ({UnclosedRegularStringLiteral}[\"])
304
305/* C.1.9 - Operators and Punctuators. */
306OOPHelper1 = (":")
307OOPHelper2 = ("+"|"-"|"*"|"/"|"%"|"&"|"|"|"^"|"!"|"~")
308OOPHelper3 = ("="|"<"|">"|"?"|"++"|"--"|"&&"|"||"|"<<"|">>")
309OOPHelper4 = ("=="|"!="|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&=")
310OOPHelper5 = ("|="|"^="|"<<="|">>="|"->")
311OperatorOrPunctuator = ({OOPHelper1}|{OOPHelper2}|{OOPHelper3}|{OOPHelper4}|{OOPHelper5})
312/* NOTE: We distinguish between operators and separators (punctuators), but */
313/* the C# spec doesn't, so the stuff below isn't in the spec. */
314Separator = ([\{\}\[\]\(\)])
315Separator2 = ([,;])
316
317/* C.1.10 - Pre-processing Directives. */
318/* NOTE: We don't do ALL of the PP stuff here as it's unnecessary */
319/* for us to know the difference between declarations, diagnostics, */
320/* regions, etc. */
321ConditionalSymbol = ({IdentifierOrKeyword}) /* Not correct - excludes "true" and "false". */
322PPNewLine = ({Whitespace}?{SingleLineComment}?{NewlineCharacter})
323PPPrimaryExpression = ({IdentifierOrKeyword}|({Whitespace}?{PPExpression}{Whitespace}?))
324PPUnaryExpression = ({PPPrimaryExpression}|("!"{Whitespace}?{PPUnaryExpression}))
325PPEqualityExpression = ({PPUnaryExpression}|({Whitespace}?"=="{Whitespace}?{PPUnaryExpression})|({Whitespace}?"!="{Whitespace}?{PPUnaryExpression}))
326PPAndExpression = ({PPEqualityExpression}|({Whitespace}?"&&"{Whitespace}?{PPEqualityExpression}))
327PPOrExpression = ({PPAndExpression}|({Whitespace}?"||"{Whitespace}?{PPAndExpression}))
328PPExpression = ({Whitespace}?{PPOrExpression}{Whitespace}?)
329PPWord = ("define"|"undef"|"if"|"elif"|"else"|"endif"|"line"|"error"|"warning"|"region"|"endregion")
330PPDirective = ({Whitespace}?"#"{Whitespace}?{PPWord}{InputCharacter}*)
331
332/* URL matching, for comments (not in C# spec) */
333URLGenDelim = ([:\/\?#\[\]@])
334URLSubDelim = ([\!\$&'\(\)\*\+,;=])
335URLUnreserved = ([A-Za-z_]|{DecimalDigitCharacter}|[\-\.\~])
336URLCharacter = ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%])
337URLCharacters = ({URLCharacter}*)
338URLEndCharacter = ([\/\$]|[A-Za-z0-9])
339URL = (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?)
340
341
342%state DELIMITEDCOMMENT
343%state DOCUMENTCOMMENT
344%state VERBATIMSTRING
345
346%%
347
348<YYINITIAL> {
349
350 /* Keywords */
351 "abstract" |
352 "as" |
353 "base" |
354 "break" |
355 "case" |
356 "catch" |
357 "checked" |
358 "class" |
359 "const" |
360 "continue" |
361 "decimal" |
362 "default" |
363 "delegate" |
364 "do" |
365 "else" |
366 "enum" |
367 "event" |
368 "explicit" |
369 "extern" |
370 "finally" |
371 "fixed" |
372 "for" |
373 "foreach" |
374 "goto" |
375 "if" |
376 "implicit" |
377 "in" |
378 "interface" |
379 "internal" |
380 "is" |
381 "lock" |
382 "namespace" |
383 "new" |
384 "null" |
385 "object" |
386 "operator" |
387 "out" |
388 "override" |
389 "params" |
390 "private" |
391 "protected" |
392 "public" |
393 "readonly" |
394 "ref" |
395 "return" |
396 "sealed" |
397 "sizeof" |
398 "stackalloc" |
399 "static" |
400 "string" |
401 "struct" |
402 "switch" |
403 "this" |
404 "throw" |
405 "try" |
406 "typeof" |
407 "unchecked" |
408 "unsafe" |
409 "using" |
410 "virtual" |
411 "void" |
412 "volatile" |
413 "while" { addToken(Token.RESERVED_WORD); }
414
415 /* Data types. */
416 "bool" |
417 "byte" |
418 "char" |
419 "double" |
420 "float" |
421 "int" |
422 "long" |
423 "object" |
424 "sbyte" |
425 "short" |
426 "string" |
427 "uint" |
428 "ulong" |
429 "ushort" { addToken(Token.DATA_TYPE); }
430
431
432 {NewlineCharacter} { addNullToken(); return firstToken; }
433
434 {BooleanLiteral} { addToken(Token.LITERAL_BOOLEAN); }
435
436 {Identifier} { addToken(Token.IDENTIFIER); }
437
438 {Whitespace} { addToken(Token.WHITESPACE); }
439
440 /* String/Character Literals. */
441 {CharacterLiteral} { addToken(Token.LITERAL_CHAR); }
442 {UnclosedCharacterLiteral} { addToken(Token.ERROR_CHAR); /*addNullToken(); return firstToken;*/ }
443 {ErrorUnclosedCharacterLiteral} { addToken(Token.ERROR_CHAR); addNullToken(); return firstToken; }
444 {ErrorCharacterLiteral} { addToken(Token.ERROR_CHAR); }
445 {VerbatimStringLiteralStart} { start = zzMarkedPos-2; yybegin(VERBATIMSTRING); }
446 {RegularStringLiteral} { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
447 {UnclosedRegularStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); addNullToken(); return firstToken; }
448 {ErrorRegularStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); }
449
450 /* Comments. */
451 {DelimitedCommentStart} { start = zzMarkedPos-2; yybegin(DELIMITEDCOMMENT); }
452 {DocumentationCommentStart} { start = zzMarkedPos-3; yybegin(DOCUMENTCOMMENT); }
453 {SingleLineComment} { addToken(Token.COMMENT_EOL); addNullToken(); return firstToken; }
454
455 /* Separators. */
456 {Separator} { addToken(Token.SEPARATOR); }
457 {Separator2} { addToken(Token.IDENTIFIER); }
458
459 /* Operators. */
460 {OperatorOrPunctuator} { addToken(Token.OPERATOR); }
461
462 /* Numbers */
463 {DecimalIntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
464 {HexadecimalIntegerLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
465 {RealLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); }
466 {ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
467
468 /* Preprocessor directives. */
469 {PPDirective} { addToken(Token.PREPROCESSOR); }
470
471 /* Pretty-much anything else. */
472 {ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); }
473
474 /* Ended with a line not in a string or comment. */
475 <<EOF>> { addNullToken(); return firstToken; }
476
477 /* Catch any other (unhandled) characters and flag them as bad. */
478 . { addToken(Token.ERROR_IDENTIFIER); }
479
480}
481
482
483<DELIMITEDCOMMENT> {
484
485 [^hwf\n\*]+ {}
486 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
487 [hwf] {}
488 \n { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
489 {DelimitedCommentEnd} { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
490 \* {}
491 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
492
493}
494
495
496<DOCUMENTCOMMENT> {
497
498 [^hwf\<\n]* {}
499 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_DOCUMENTATION); start = zzMarkedPos; }
500 [hwf] {}
501 \n { addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addNullToken(); return firstToken; }
502 "<"[^\>]*">" { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addToken(temp,zzMarkedPos-1, Token.PREPROCESSOR); start = zzMarkedPos; }
503 "<" { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addToken(temp,zzEndRead, Token.PREPROCESSOR); addNullToken(); return firstToken; }
504 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addNullToken(); return firstToken; }
505
506}
507
508
509<VERBATIMSTRING> {
510
511 [^\"\n]* {}
512 {QuoteEscapeSequence} {}
513 \" { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.LITERAL_STRING_DOUBLE_QUOTE); }
514 \n { addToken(start,zzStartRead-1, Token.LITERAL_STRING_DOUBLE_QUOTE); return firstToken; }
515 <<EOF>> { addToken(start,zzStartRead-1, Token.LITERAL_STRING_DOUBLE_QUOTE); return firstToken; }
516
517}
Note: See TracBrowser for help on using the repository browser.