source: other-projects/rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/CTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 15.4 KB
Line 
1/*
2 * 11/13/2004
3 *
4 * CTokenMaker.java - An object that can take a chunk of text and
5 * return a linked list of tokens representing it in the C programming
6 * language.
7 *
8 * This library is distributed under a modified BSD license. See the included
9 * RSyntaxTextArea.License.txt file for details.
10 */
11package org.fife.ui.rsyntaxtextarea.modes;
12
13import java.io.*;
14import javax.swing.text.Segment;
15
16import org.fife.ui.rsyntaxtextarea.*;
17
18
19/**
20 * Scanner for the C programming language.
21 *
22 * This implementation was created using
23 * <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
24 * was modified for performance. Memory allocation needs to be almost
25 * completely removed to be competitive with the handwritten lexers (subclasses
26 * of <code>AbstractTokenMaker</code>, so this class has been modified so that
27 * Strings are never allocated (via yytext()), and the scanner never has to
28 * worry about refilling its buffer (needlessly copying chars around).
29 * We can achieve this because RText always scans exactly 1 line of tokens at a
30 * time, and hands the scanner this line as an array of characters (a Segment
31 * really). Since tokens contain pointers to char arrays instead of Strings
32 * holding their contents, there is no need for allocating new memory for
33 * Strings.<p>
34 *
35 * The actual algorithm generated for scanning has, of course, not been
36 * modified.<p>
37 *
38 * If you wish to regenerate this file yourself, keep in mind the following:
39 * <ul>
40 * <li>The generated <code>CTokenMaker.java</code> file will contain two
41 * definitions of both <code>zzRefill</code> and <code>yyreset</code>.
42 * You should hand-delete the second of each definition (the ones
43 * generated by the lexer), as these generated methods modify the input
44 * buffer, which we'll never have to do.</li>
45 * <li>You should also change the declaration/definition of zzBuffer to NOT
46 * be initialized. This is a needless memory allocation for us since we
47 * will be pointing the array somewhere else anyway.</li>
48 * <li>You should NOT call <code>yylex()</code> on the generated scanner
49 * directly; rather, you should use <code>getTokenList</code> as you would
50 * with any other <code>TokenMaker</code> instance.</li>
51 * </ul>
52 *
53 * @author Robert Futrell
54 * @version 0.5
55 *
56 */
57%%
58
59%public
60%class CTokenMaker
61%extends AbstractJFlexCTokenMaker
62%unicode
63%type org.fife.ui.rsyntaxtextarea.Token
64
65
66%{
67
68
69 /**
70 * Constructor. This must be here because JFlex does not generate a
71 * no-parameter constructor.
72 */
73 public CTokenMaker() {
74 super();
75 }
76
77
78 /**
79 * Adds the token specified to the current linked list of tokens.
80 *
81 * @param tokenType The token's type.
82 * @see #addToken(int, int, int)
83 */
84 private void addHyperlinkToken(int start, int end, int tokenType) {
85 int so = start + offsetShift;
86 addToken(zzBuffer, start,end, tokenType, so, true);
87 }
88
89
90 /**
91 * Adds the token specified to the current linked list of tokens.
92 *
93 * @param tokenType The token's type.
94 */
95 private void addToken(int tokenType) {
96 addToken(zzStartRead, zzMarkedPos-1, tokenType);
97 }
98
99
100 /**
101 * Adds the token specified to the current linked list of tokens.
102 *
103 * @param tokenType The token's type.
104 */
105 private void addToken(int start, int end, int tokenType) {
106 int so = start + offsetShift;
107 addToken(zzBuffer, start,end, tokenType, so);
108 }
109
110
111 /**
112 * Adds the token specified to the current linked list of tokens.
113 *
114 * @param array The character array.
115 * @param start The starting offset in the array.
116 * @param end The ending offset in the array.
117 * @param tokenType The token's type.
118 * @param startOffset The offset in the document at which this token
119 * occurs.
120 */
121 public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
122 super.addToken(array, start,end, tokenType, startOffset);
123 zzStartRead = zzMarkedPos;
124 }
125
126
127 /**
128 * Returns the text to place at the beginning and end of a
129 * line to "comment" it in a this programming language.
130 *
131 * @return The start and end strings to add to a line to "comment"
132 * it out.
133 */
134 public String[] getLineCommentStartAndEnd() {
135 return new String[] { "//", null };
136 }
137
138
139 /**
140 * Returns the first token in the linked list of tokens generated
141 * from <code>text</code>. This method must be implemented by
142 * subclasses so they can correctly implement syntax highlighting.
143 *
144 * @param text The text from which to get tokens.
145 * @param initialTokenType The token type we should start with.
146 * @param startOffset The offset into the document at which
147 * <code>text</code> starts.
148 * @return The first <code>Token</code> in a linked list representing
149 * the syntax highlighted text.
150 */
151 public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
152
153 resetTokenList();
154 this.offsetShift = -text.offset + startOffset;
155
156 // Start off in the proper state.
157 int state = Token.NULL;
158 switch (initialTokenType) {
159 case Token.COMMENT_MULTILINE:
160 state = MLC;
161 start = text.offset;
162 break;
163 default:
164 state = Token.NULL;
165 }
166
167 s = text;
168 try {
169 yyreset(zzReader);
170 yybegin(state);
171 return yylex();
172 } catch (IOException ioe) {
173 ioe.printStackTrace();
174 return new DefaultToken();
175 }
176
177 }
178
179
180 /**
181 * Refills the input buffer.
182 *
183 * @return <code>true</code> if EOF was reached, otherwise
184 * <code>false</code>.
185 * @exception IOException if any I/O-Error occurs.
186 */
187 private boolean zzRefill() throws java.io.IOException {
188 return zzCurrentPos>=s.offset+s.count;
189 }
190
191
192 /**
193 * Resets the scanner to read from a new input stream.
194 * Does not close the old reader.
195 *
196 * All internal variables are reset, the old input stream
197 * <b>cannot</b> be reused (internal buffer is discarded and lost).
198 * Lexical state is set to <tt>YY_INITIAL</tt>.
199 *
200 * @param reader the new input stream
201 */
202 public final void yyreset(java.io.Reader reader) throws java.io.IOException {
203 // 's' has been updated.
204 zzBuffer = s.array;
205 /*
206 * We replaced the line below with the two below it because zzRefill
207 * no longer "refills" the buffer (since the way we do it, it's always
208 * "full" the first time through, since it points to the segment's
209 * array). So, we assign zzEndRead here.
210 */
211 //zzStartRead = zzEndRead = s.offset;
212 zzStartRead = s.offset;
213 zzEndRead = zzStartRead + s.count - 1;
214 zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
215 zzLexicalState = YYINITIAL;
216 zzReader = reader;
217 zzAtBOL = true;
218 zzAtEOF = false;
219 }
220
221
222%}
223
224Letter = [A-Za-z]
225LetterOrUnderscore = ({Letter}|[_])
226Digit = [0-9]
227HexDigit = {Digit}|[A-Fa-f]
228OctalDigit = [0-7]
229Exponent = [eE][+-]?{Digit}+
230
231PreprocessorWord = define|elif|else|endif|error|if|ifdef|ifndef|include|line|pragma|undef
232
233Trigraph = ("??="|"??("|"??)"|"??/"|"??'"|"??<"|"??>"|"??!"|"??-")
234
235OctEscape1 = ([\\]{OctalDigit})
236OctEscape2 = ([\\]{OctalDigit}{OctalDigit})
237OctEscape3 = ([\\][0-3]{OctalDigit}{OctalDigit})
238OctEscape = ({OctEscape1}|{OctEscape2}|{OctEscape3})
239HexEscape = ([\\][xX]{HexDigit}{HexDigit})
240
241AnyChrChr = ([^\'\n\\])
242Escape = ([\\]([abfnrtv\'\"\?\\0]))
243UnclosedCharLiteral = ([\']({Escape}|{OctEscape}|{HexEscape}|{Trigraph}|{AnyChrChr}))
244CharLiteral = ({UnclosedCharLiteral}[\'])
245ErrorUnclosedCharLiteral = ([\'][^\'\n]*)
246ErrorCharLiteral = (([\'][\'])|{ErrorUnclosedCharLiteral}[\'])
247AnyStrChr = ([^\"\n\\])
248FalseTrigraph = (("?"(("?")*)[^\=\(\)\/\'\<\>\!\-\\\?\"\n])|("?"[\=\(\)\/\'\<\>\!\-]))
249StringLiteral = ([\"]((((("?")*)({Escape}|{OctEscape}|{HexEscape}|{Trigraph}))|{FalseTrigraph}|{AnyStrChr})*)(("?")*)[\"])
250UnclosedStringLiteral = ([\"]([\\].|[^\\\"])*[^\"]?)
251ErrorStringLiteral = ({UnclosedStringLiteral}[\"])
252
253
254LineTerminator = \n
255WhiteSpace = [ \t\f]
256
257MLCBegin = "/*"
258MLCEnd = "*/"
259LineCommentBegin = "//"
260
261NonFloatSuffix = (([uU][lL]?)|([lL][uU]?))
262IntegerLiteral = ({Digit}+{Exponent}?{NonFloatSuffix}?)
263HexLiteral = ("0"[xX]{HexDigit}+{NonFloatSuffix}?)
264FloatLiteral = ((({Digit}*[\.]{Digit}+)|({Digit}+[\.]{Digit}*)){Exponent}?[fFlL]?)
265ErrorNumberFormat = (({IntegerLiteral}|{HexLiteral}|{FloatLiteral}){NonSeparator}+)
266
267NonSeparator = ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#")
268Identifier = ({LetterOrUnderscore}({LetterOrUnderscore}|{Digit}|[$])*)
269ErrorIdentifier = ({NonSeparator}+)
270
271
272URLGenDelim = ([:\/\?#\[\]@])
273URLSubDelim = ([\!\$&'\(\)\*\+,;=])
274URLUnreserved = ({LetterOrUnderscore}|{Digit}|[\-\.\~])
275URLCharacter = ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%])
276URLCharacters = ({URLCharacter}*)
277URLEndCharacter = ([\/\$]|{Letter}|{Digit})
278URL = (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?)
279
280%state MLC
281%state EOL_COMMENT
282
283%%
284
285<YYINITIAL> {
286
287 /* Keywords */
288 "auto" |
289 "break" |
290 "case" |
291 "const" |
292 "continue" |
293 "default" |
294 "do" |
295 "else" |
296 "enum" |
297 "extern" |
298 "for" |
299 "goto" |
300 "if" |
301 "register" |
302 "return" |
303 "sizeof" |
304 "static" |
305 "struct" |
306 "switch" |
307 "typedef" |
308 "union" |
309 "volatile" |
310 "while" { addToken(Token.RESERVED_WORD); }
311
312 /* Data types. */
313 "char" |
314 "div_t" |
315 "double" |
316 "float" |
317 "int" |
318 "ldiv_t" |
319 "long" |
320 "short" |
321 "signed" |
322 "size_t" |
323 "unsigned" |
324 "void" |
325 "wchar_t" { addToken(Token.DATA_TYPE); }
326
327 /* Standard functions */
328 "abort" |
329 "abs" |
330 "acos" |
331 "asctime" |
332 "asin" |
333 "assert" |
334 "atan2" |
335 "atan" |
336 "atexit" |
337 "atof" |
338 "atoi" |
339 "atol" |
340 "bsearch" |
341 "btowc" |
342 "calloc" |
343 "ceil" |
344 "clearerr" |
345 "clock" |
346 "cosh" |
347 "cos" |
348 "ctime" |
349 "difftime" |
350 "div" |
351 "errno" |
352 "exit" |
353 "exp" |
354 "fabs" |
355 "fclose" |
356 "feof" |
357 "ferror" |
358 "fflush" |
359 "fgetc" |
360 "fgetpos" |
361 "fgetwc" |
362 "fgets" |
363 "fgetws" |
364 "floor" |
365 "fmod" |
366 "fopen" |
367 "fprintf" |
368 "fputc" |
369 "fputs" |
370 "fputwc" |
371 "fputws" |
372 "fread" |
373 "free" |
374 "freopen" |
375 "frexp" |
376 "fscanf" |
377 "fseek" |
378 "fsetpos" |
379 "ftell" |
380 "fwprintf" |
381 "fwrite" |
382 "fwscanf" |
383 "getchar" |
384 "getc" |
385 "getenv" |
386 "gets" |
387 "getwc" |
388 "getwchar" |
389 "gmtime" |
390 "isalnum" |
391 "isalpha" |
392 "iscntrl" |
393 "isdigit" |
394 "isgraph" |
395 "islower" |
396 "isprint" |
397 "ispunct" |
398 "isspace" |
399 "isupper" |
400 "isxdigit" |
401 "labs" |
402 "ldexp" |
403 "ldiv" |
404 "localeconv" |
405 "localtime" |
406 "log10" |
407 "log" |
408 "longjmp" |
409 "malloc" |
410 "mblen" |
411 "mbrlen" |
412 "mbrtowc" |
413 "mbsinit" |
414 "mbsrtowcs" |
415 "mbstowcs" |
416 "mbtowc" |
417 "memchr" |
418 "memcmp" |
419 "memcpy" |
420 "memmove" |
421 "memset" |
422 "mktime" |
423 "modf" |
424 "offsetof" |
425 "perror" |
426 "pow" |
427 "printf" |
428 "putchar" |
429 "putc" |
430 "puts" |
431 "putwc" |
432 "putwchar" |
433 "qsort" |
434 "raise" |
435 "rand" |
436 "realloc" |
437 "remove" |
438 "rename" |
439 "rewind" |
440 "scanf" |
441 "setbuf" |
442 "setjmp" |
443 "setlocale" |
444 "setvbuf" |
445 "setvbuf" |
446 "signal" |
447 "sinh" |
448 "sin" |
449 "sprintf" |
450 "sqrt" |
451 "srand" |
452 "sscanf" |
453 "strcat" |
454 "strchr" |
455 "strcmp" |
456 "strcmp" |
457 "strcoll" |
458 "strcpy" |
459 "strcspn" |
460 "strerror" |
461 "strftime" |
462 "strlen" |
463 "strncat" |
464 "strncmp" |
465 "strncpy" |
466 "strpbrk" |
467 "strrchr" |
468 "strspn" |
469 "strstr" |
470 "strtod" |
471 "strtok" |
472 "strtol" |
473 "strtoul" |
474 "strxfrm" |
475 "swprintf" |
476 "swscanf" |
477 "system" |
478 "tanh" |
479 "tan" |
480 "time" |
481 "tmpfile" |
482 "tmpnam" |
483 "tolower" |
484 "toupper" |
485 "ungetc" |
486 "ungetwc" |
487 "va_arg" |
488 "va_end" |
489 "va_start" |
490 "vfprintf" |
491 "vfwprintf" |
492 "vprintf" |
493 "vsprintf" |
494 "vswprintf" |
495 "vwprintf" |
496 "wcrtomb" |
497 "wcscat" |
498 "wcschr" |
499 "wcscmp" |
500 "wcscoll" |
501 "wcscpy" |
502 "wcscspn" |
503 "wcsftime" |
504 "wcslen" |
505 "wcsncat" |
506 "wcsncmp" |
507 "wcsncpy" |
508 "wcspbrk" |
509 "wcsrchr" |
510 "wcsrtombs" |
511 "wcsspn" |
512 "wcsstr" |
513 "wcstod" |
514 "wcstok" |
515 "wcstol" |
516 "wcstombs" |
517 "wcstoul" |
518 "wcsxfrm" |
519 "wctob" |
520 "wctomb" |
521 "wmemchr" |
522 "wmemcmp" |
523 "wmemcpy" |
524 "wmemmove" |
525 "wmemset" |
526 "wprintf" |
527 "wscanf" { addToken(Token.FUNCTION); }
528
529 /* Standard-defined macros. */
530 "__DATE__" |
531 "__TIME__" |
532 "__FILE__" |
533 "__LINE__" |
534 "__STDC__" { addToken(Token.PREPROCESSOR); }
535
536 {LineTerminator} { addNullToken(); return firstToken; }
537
538 {Identifier} { addToken(Token.IDENTIFIER); }
539
540 {WhiteSpace}+ { addToken(Token.WHITESPACE); }
541
542 /* Preprocessor directives */
543 "#"{WhiteSpace}*{PreprocessorWord} { addToken(Token.PREPROCESSOR); }
544
545 /* String/Character Literals. */
546 {CharLiteral} { addToken(Token.LITERAL_CHAR); }
547 {UnclosedCharLiteral} { addToken(Token.ERROR_CHAR); /*addNullToken(); return firstToken;*/ }
548 {ErrorUnclosedCharLiteral} { addToken(Token.ERROR_CHAR); addNullToken(); return firstToken; }
549 {ErrorCharLiteral} { addToken(Token.ERROR_CHAR); }
550 {StringLiteral} { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
551 {UnclosedStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); addNullToken(); return firstToken; }
552 {ErrorStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); }
553
554 /* Comment Literals. */
555 {MLCBegin} { start = zzMarkedPos-2; yybegin(MLC); }
556 {LineCommentBegin} { start = zzMarkedPos-2; yybegin(EOL_COMMENT); }
557
558 /* Separators. */
559 "(" |
560 ")" |
561 "[" |
562 "]" |
563 "{" |
564 "}" { addToken(Token.SEPARATOR); }
565
566 /* Operators. */
567 {Trigraph} |
568 "=" |
569 "+" |
570 "-" |
571 "*" |
572 "/" |
573 "%" |
574 "~" |
575 "<" |
576 ">" |
577 "<<" |
578 ">>" |
579 "==" |
580 "+=" |
581 "-=" |
582 "*=" |
583 "/=" |
584 "%=" |
585 ">>=" |
586 "<<=" |
587 "^" |
588 "&" |
589 "&&" |
590 "|" |
591 "||" |
592 "?" |
593 ":" |
594 "," |
595 "!" |
596 "++" |
597 "--" |
598 "." |
599 "," { addToken(Token.OPERATOR); }
600
601 /* Numbers */
602 {IntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
603 {HexLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
604 {FloatLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); }
605 {ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
606
607 /* Some lines will end in '\' to wrap an expression. */
608 "\\" { addToken(Token.IDENTIFIER); }
609
610 {ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); }
611
612 /* Other punctuation, we'll highlight it as "identifiers." */
613 ";" { addToken(Token.IDENTIFIER); }
614
615 /* Ended with a line not in a string or comment. */
616 <<EOF>> { addNullToken(); return firstToken; }
617
618 /* Catch any other (unhandled) characters and flag them as bad. */
619 . { addToken(Token.ERROR_IDENTIFIER); }
620
621}
622
623<MLC> {
624
625 [^hwf\n\*]+ {}
626 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
627 [hwf] {}
628
629 \n { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
630 {MLCEnd} { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
631 \* {}
632 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
633
634}
635
636
637<EOL_COMMENT> {
638 [^hwf\n]+ {}
639 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
640 [hwf] {}
641 \n { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addNullToken(); return firstToken; }
642 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addNullToken(); return firstToken; }
643}
Note: See TracBrowser for help on using the repository browser.