source: other-projects/rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/CPlusPlusTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 15.7 KB
Line 
1/*
2 * 11/19/2004
3 *
4 * CPlusPlusTokenMaker.java - An object that can take a chunk of text and
5 * return a linked list of tokens representing it in C++.
6 *
7 * This library is distributed under a modified BSD license. See the included
8 * RSyntaxTextArea.License.txt file for details.
9 */
10package org.fife.ui.rsyntaxtextarea.modes;
11
12import java.io.*;
13import javax.swing.text.Segment;
14
15import org.fife.ui.rsyntaxtextarea.*;
16
17
18/**
19 * A parser for the C++ programming language.
20 *
21 * This implementation was created using
22 * <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
23 * was modified for performance. Memory allocation needs to be almost
24 * completely removed to be competitive with the handwritten lexers (subclasses
25 * of <code>AbstractTokenMaker</code>, so this class has been modified so that
26 * Strings are never allocated (via yytext()), and the scanner never has to
27 * worry about refilling its buffer (needlessly copying chars around).
28 * We can achieve this because RText always scans exactly 1 line of tokens at a
29 * time, and hands the scanner this line as an array of characters (a Segment
30 * really). Since tokens contain pointers to char arrays instead of Strings
31 * holding their contents, there is no need for allocating new memory for
32 * Strings.<p>
33 *
34 * The actual algorithm generated for scanning has, of course, not been
35 * modified.<p>
36 *
37 * If you wish to regenerate this file yourself, keep in mind the following:
38 * <ul>
39 * <li>The generated CPlusPlusTokenMaker.java</code> file will contain two
40 * definitions of both <code>zzRefill</code> and <code>yyreset</code>.
41 * You should hand-delete the second of each definition (the ones
42 * generated by the lexer), as these generated methods modify the input
43 * buffer, which we'll never have to do.</li>
44 * <li>You should also change the declaration/definition of zzBuffer to NOT
45 * be initialized. This is a needless memory allocation for us since we
46 * will be pointing the array somewhere else anyway.</li>
47 * <li>You should NOT call <code>yylex()</code> on the generated scanner
48 * directly; rather, you should use <code>getTokenList</code> as you would
49 * with any other <code>TokenMaker</code> instance.</li>
50 * </ul>
51 *
52 * @author Robert Futrell
53 * @version 0.6
54 *
55 */
56%%
57
58%public
59%class CPlusPlusTokenMaker
60%extends AbstractJFlexCTokenMaker
61%unicode
62%type org.fife.ui.rsyntaxtextarea.Token
63
64
65%{
66
67
68 /**
69 * Constructor. This must be here because JFlex does not generate a
70 * no-parameter constructor.
71 */
72 public CPlusPlusTokenMaker() {
73 super();
74 }
75
76
77 /**
78 * Adds the token specified to the current linked list of tokens.
79 *
80 * @param tokenType The token's type.
81 * @see #addToken(int, int, int)
82 */
83 private void addHyperlinkToken(int start, int end, int tokenType) {
84 int so = start + offsetShift;
85 addToken(zzBuffer, start,end, tokenType, so, true);
86 }
87
88
89 /**
90 * Adds the token specified to the current linked list of tokens.
91 *
92 * @param tokenType The token's type.
93 */
94 private void addToken(int tokenType) {
95 addToken(zzStartRead, zzMarkedPos-1, tokenType);
96 }
97
98
99 /**
100 * Adds the token specified to the current linked list of tokens.
101 *
102 * @param tokenType The token's type.
103 */
104 private void addToken(int start, int end, int tokenType) {
105 int so = start + offsetShift;
106 addToken(zzBuffer, start,end, tokenType, so);
107 }
108
109
110 /**
111 * Adds the token specified to the current linked list of tokens.
112 *
113 * @param array The character array.
114 * @param start The starting offset in the array.
115 * @param end The ending offset in the array.
116 * @param tokenType The token's type.
117 * @param startOffset The offset in the document at which this token
118 * occurs.
119 */
120 public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
121 super.addToken(array, start,end, tokenType, startOffset);
122 zzStartRead = zzMarkedPos;
123 }
124
125
126 /**
127 * Returns the text to place at the beginning and end of a
128 * line to "comment" it in a this programming language.
129 *
130 * @return The start and end strings to add to a line to "comment"
131 * it out.
132 */
133 public String[] getLineCommentStartAndEnd() {
134 return new String[] { "//", null };
135 }
136
137
138 /**
139 * Returns the first token in the linked list of tokens generated
140 * from <code>text</code>. This method must be implemented by
141 * subclasses so they can correctly implement syntax highlighting.
142 *
143 * @param text The text from which to get tokens.
144 * @param initialTokenType The token type we should start with.
145 * @param startOffset The offset into the document at which
146 * <code>text</code> starts.
147 * @return The first <code>Token</code> in a linked list representing
148 * the syntax highlighted text.
149 */
150 public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
151
152 resetTokenList();
153 this.offsetShift = -text.offset + startOffset;
154
155 // Start off in the proper state.
156 int state = Token.NULL;
157 switch (initialTokenType) {
158 case Token.COMMENT_MULTILINE:
159 state = MLC;
160 start = text.offset;
161 break;
162 default:
163 state = Token.NULL;
164 }
165
166 s = text;
167 try {
168 yyreset(zzReader);
169 yybegin(state);
170 return yylex();
171 } catch (IOException ioe) {
172 ioe.printStackTrace();
173 return new DefaultToken();
174 }
175
176 }
177
178
179 /**
180 * Refills the input buffer.
181 *
182 * @return <code>true</code> if EOF was reached, otherwise
183 * <code>false</code>.
184 * @exception IOException if any I/O-Error occurs.
185 */
186 private boolean zzRefill() throws java.io.IOException {
187 return zzCurrentPos>=s.offset+s.count;
188 }
189
190
191 /**
192 * Resets the scanner to read from a new input stream.
193 * Does not close the old reader.
194 *
195 * All internal variables are reset, the old input stream
196 * <b>cannot</b> be reused (internal buffer is discarded and lost).
197 * Lexical state is set to <tt>YY_INITIAL</tt>.
198 *
199 * @param reader the new input stream
200 */
201 public final void yyreset(java.io.Reader reader) throws java.io.IOException {
202 // 's' has been updated.
203 zzBuffer = s.array;
204 /*
205 * We replaced the line below with the two below it because zzRefill
206 * no longer "refills" the buffer (since the way we do it, it's always
207 * "full" the first time through, since it points to the segment's
208 * array). So, we assign zzEndRead here.
209 */
210 //zzStartRead = zzEndRead = s.offset;
211 zzStartRead = s.offset;
212 zzEndRead = zzStartRead + s.count - 1;
213 zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
214 zzLexicalState = YYINITIAL;
215 zzReader = reader;
216 zzAtBOL = true;
217 zzAtEOF = false;
218 }
219
220
221%}
222
223Letter = [A-Za-z]
224LetterOrUnderscore = ({Letter}|"_")
225Digit = [0-9]
226HexDigit = {Digit}|[A-Fa-f]
227OctalDigit = [0-7]
228Exponent = [eE][+-]?{Digit}+
229
230PreprocessorWord = define|elif|else|endif|error|if|ifdef|ifndef|include|line|pragma|undef
231
232Trigraph = ("??="|"??("|"??)"|"??/"|"??'"|"??<"|"??>"|"??!"|"??-")
233
234OctEscape1 = ([\\]{OctalDigit})
235OctEscape2 = ([\\]{OctalDigit}{OctalDigit})
236OctEscape3 = ([\\][0-3]{OctalDigit}{OctalDigit})
237OctEscape = ({OctEscape1}|{OctEscape2}|{OctEscape3})
238HexEscape = ([\\][xX]{HexDigit}{HexDigit})
239
240AnyChrChr = ([^\'\n\\])
241Escape = ([\\]([abfnrtv\'\"\?\\0]))
242UnclosedCharLiteral = ([\']({Escape}|{OctEscape}|{HexEscape}|{Trigraph}|{AnyChrChr}))
243CharLiteral = ({UnclosedCharLiteral}[\'])
244ErrorUnclosedCharLiteral = ([\'][^\'\n]*)
245ErrorCharLiteral = (([\'][\'])|{ErrorUnclosedCharLiteral}[\'])
246AnyStrChr = ([^\"\n\\])
247FalseTrigraph = (("?"(("?")*)[^\=\(\)\/\'\<\>\!\-\\\?\"\n])|("?"[\=\(\)\/\'\<\>\!\-]))
248StringLiteral = ([\"]((((("?")*)({Escape}|{OctEscape}|{HexEscape}|{Trigraph}))|{FalseTrigraph}|{AnyStrChr})*)(("?")*)[\"])
249UnclosedStringLiteral = ([\"]([\\].|[^\\\"])*[^\"]?)
250ErrorStringLiteral = ({UnclosedStringLiteral}[\"])
251
252
253LineTerminator = \n
254WhiteSpace = [ \t\f]
255
256MLCBegin = "/*"
257MLCEnd = "*/"
258LineCommentBegin = "//"
259
260NonFloatSuffix = (([uU][lL]?)|([lL][uU]?))
261IntegerLiteral = ({Digit}+{Exponent}?{NonFloatSuffix}?)
262HexLiteral = ("0"[xX]{HexDigit}+{NonFloatSuffix}?)
263FloatLiteral = ((({Digit}*[\.]{Digit}+)|({Digit}+[\.]{Digit}*)){Exponent}?[fFlL]?)
264ErrorNumberFormat = (({IntegerLiteral}|{HexLiteral}|{FloatLiteral}){NonSeparator}+)
265
266NonSeparator = ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#")
267Identifier = ({LetterOrUnderscore}({LetterOrUnderscore}|{Digit}|[$])*)
268ErrorIdentifier = ({NonSeparator}+)
269
270URLGenDelim = ([:\/\?#\[\]@])
271URLSubDelim = ([\!\$&'\(\)\*\+,;=])
272URLUnreserved = ({LetterOrUnderscore}|{Digit}|[\-\.\~])
273URLCharacter = ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%])
274URLCharacters = ({URLCharacter}*)
275URLEndCharacter = ([\/\$]|{Letter}|{Digit})
276URL = (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?)
277
278
279%state MLC
280%state EOL_COMMENT
281
282%%
283
284<YYINITIAL> {
285
286 /* Keywords */
287 "auto" |
288 "break" |
289 "case" |
290 "catch" |
291 "class" |
292 "const" |
293 "const_cast" |
294 "continue" |
295 "default" |
296 "delete" |
297 "do" |
298 "dynamic_cast" |
299 "else" |
300 "enum" |
301 "explicit" |
302 "extern" |
303 "for" |
304 "friend" |
305 "goto" |
306 "if" |
307 "inline" |
308 "mutable" |
309 "namespace" |
310 "new" |
311 "operator" |
312 "private" |
313 "protected" |
314 "public" |
315 "register" |
316 "reinterpret_cast" |
317 "return" |
318 "sizeof" |
319 "static" |
320 "static_cast" |
321 "struct" |
322 "switch" |
323 "template" |
324 "this" |
325 "throw" |
326 "try" |
327 "typedef" |
328 "typeid" |
329 "typename" |
330 "union" |
331 "using" |
332 "virtual" |
333 "volatile" |
334 "while" { addToken(Token.RESERVED_WORD); }
335
336 /* Boolean literals. */
337 "true" |
338 "false" |
339
340 /* Data types. */
341 "bool" |
342 "char" |
343 "double" |
344 "float" |
345 "int" |
346 "long" |
347 "short" |
348 "signed" |
349 "unsigned" |
350 "void" |
351 "wchar_t" { addToken(Token.DATA_TYPE); }
352
353 /* Standard functions */
354 "abort" |
355 "abs" |
356 "acos" |
357 "asctime" |
358 "asin" |
359 "assert" |
360 "atan2" |
361 "atan" |
362 "atexit" |
363 "atof" |
364 "atoi" |
365 "atol" |
366 "bsearch" |
367 "btowc" |
368 "calloc" |
369 "ceil" |
370 "clearerr" |
371 "clock" |
372 "cosh" |
373 "cos" |
374 "ctime" |
375 "difftime" |
376 "div" |
377 "errno" |
378 "exit" |
379 "exp" |
380 "fabs" |
381 "fclose" |
382 "feof" |
383 "ferror" |
384 "fflush" |
385 "fgetc" |
386 "fgetpos" |
387 "fgetwc" |
388 "fgets" |
389 "fgetws" |
390 "floor" |
391 "fmod" |
392 "fopen" |
393 "fprintf" |
394 "fputc" |
395 "fputs" |
396 "fputwc" |
397 "fputws" |
398 "fread" |
399 "free" |
400 "freopen" |
401 "frexp" |
402 "fscanf" |
403 "fseek" |
404 "fsetpos" |
405 "ftell" |
406 "fwprintf" |
407 "fwrite" |
408 "fwscanf" |
409 "getchar" |
410 "getc" |
411 "getenv" |
412 "gets" |
413 "getwc" |
414 "getwchar" |
415 "gmtime" |
416 "isalnum" |
417 "isalpha" |
418 "iscntrl" |
419 "isdigit" |
420 "isgraph" |
421 "islower" |
422 "isprint" |
423 "ispunct" |
424 "isspace" |
425 "isupper" |
426 "isxdigit" |
427 "labs" |
428 "ldexp" |
429 "ldiv" |
430 "localeconv" |
431 "localtime" |
432 "log10" |
433 "log" |
434 "longjmp" |
435 "malloc" |
436 "mblen" |
437 "mbrlen" |
438 "mbrtowc" |
439 "mbsinit" |
440 "mbsrtowcs" |
441 "mbstowcs" |
442 "mbtowc" |
443 "memchr" |
444 "memcmp" |
445 "memcpy" |
446 "memmove" |
447 "memset" |
448 "mktime" |
449 "modf" |
450 "offsetof" |
451 "perror" |
452 "pow" |
453 "printf" |
454 "putchar" |
455 "putc" |
456 "puts" |
457 "putwc" |
458 "putwchar" |
459 "qsort" |
460 "raise" |
461 "rand" |
462 "realloc" |
463 "remove" |
464 "rename" |
465 "rewind" |
466 "scanf" |
467 "setbuf" |
468 "setjmp" |
469 "setlocale" |
470 "setvbuf" |
471 "setvbuf" |
472 "signal" |
473 "sinh" |
474 "sin" |
475 "sprintf" |
476 "sqrt" |
477 "srand" |
478 "sscanf" |
479 "strcat" |
480 "strchr" |
481 "strcmp" |
482 "strcmp" |
483 "strcoll" |
484 "strcpy" |
485 "strcspn" |
486 "strerror" |
487 "strftime" |
488 "strlen" |
489 "strncat" |
490 "strncmp" |
491 "strncpy" |
492 "strpbrk" |
493 "strrchr" |
494 "strspn" |
495 "strstr" |
496 "strtod" |
497 "strtok" |
498 "strtol" |
499 "strtoul" |
500 "strxfrm" |
501 "swprintf" |
502 "swscanf" |
503 "system" |
504 "tanh" |
505 "tan" |
506 "time" |
507 "tmpfile" |
508 "tmpnam" |
509 "tolower" |
510 "toupper" |
511 "ungetc" |
512 "ungetwc" |
513 "va_arg" |
514 "va_end" |
515 "va_start" |
516 "vfprintf" |
517 "vfwprintf" |
518 "vprintf" |
519 "vsprintf" |
520 "vswprintf" |
521 "vwprintf" |
522 "wcrtomb" |
523 "wcscat" |
524 "wcschr" |
525 "wcscmp" |
526 "wcscoll" |
527 "wcscpy" |
528 "wcscspn" |
529 "wcsftime" |
530 "wcslen" |
531 "wcsncat" |
532 "wcsncmp" |
533 "wcsncpy" |
534 "wcspbrk" |
535 "wcsrchr" |
536 "wcsrtombs" |
537 "wcsspn" |
538 "wcsstr" |
539 "wcstod" |
540 "wcstok" |
541 "wcstol" |
542 "wcstombs" |
543 "wcstoul" |
544 "wcsxfrm" |
545 "wctob" |
546 "wctomb" |
547 "wmemchr" |
548 "wmemcmp" |
549 "wmemcpy" |
550 "wmemmove" |
551 "wmemset" |
552 "wprintf" |
553 "wscanf" { addToken(Token.FUNCTION); }
554
555 /* Standard-defined macros. */
556 "__DATE__" |
557 "__TIME__" |
558 "__FILE__" |
559 "__LINE__" |
560 "__STDC__" { addToken(Token.PREPROCESSOR); }
561
562 {LineTerminator} { addNullToken(); return firstToken; }
563
564 {Identifier} { addToken(Token.IDENTIFIER); }
565
566 {WhiteSpace}+ { addToken(Token.WHITESPACE); }
567
568 /* Preprocessor directives */
569 "#"{WhiteSpace}*{PreprocessorWord} { addToken(Token.PREPROCESSOR); }
570
571 /* String/Character Literals. */
572 {CharLiteral} { addToken(Token.LITERAL_CHAR); }
573 {UnclosedCharLiteral} { addToken(Token.ERROR_CHAR); /*addNullToken(); return firstToken;*/ }
574 {ErrorUnclosedCharLiteral} { addToken(Token.ERROR_CHAR); addNullToken(); return firstToken; }
575 {ErrorCharLiteral} { addToken(Token.ERROR_CHAR); }
576 {StringLiteral} { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
577 {UnclosedStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); addNullToken(); return firstToken; }
578 {ErrorStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); }
579
580 /* Comment Literals. */
581 {MLCBegin} { start = zzMarkedPos-2; yybegin(MLC); }
582 {LineCommentBegin} { start = zzMarkedPos-2; yybegin(EOL_COMMENT); }
583
584 /* Separators. */
585 "(" |
586 ")" |
587 "[" |
588 "]" |
589 "{" |
590 "}" { addToken(Token.SEPARATOR); }
591
592 /* Operators. */
593 {Trigraph} |
594 "=" |
595 "+" |
596 "-" |
597 "*" |
598 "/" |
599 "%" |
600 "~" |
601 "<" |
602 ">" |
603 "<<" |
604 ">>" |
605 "==" |
606 "+=" |
607 "-=" |
608 "*=" |
609 "/=" |
610 "%=" |
611 ">>=" |
612 "<<=" |
613 "^" |
614 "&" |
615 "&&" |
616 "|" |
617 "||" |
618 "?" |
619 ":" |
620 "," |
621 "!" |
622 "++" |
623 "--" |
624 "." |
625 "," { addToken(Token.OPERATOR); }
626
627 /* Numbers */
628 {IntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
629 {HexLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
630 {FloatLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); }
631 {ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
632
633 /* Some lines will end in '\' to wrap an expression. */
634 "\\" { addToken(Token.IDENTIFIER); }
635
636 {ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); }
637
638 /* Other punctuation, we'll highlight it as "identifiers." */
639 ";" { addToken(Token.IDENTIFIER); }
640
641 /* Ended with a line not in a string or comment. */
642 <<EOF>> { addNullToken(); return firstToken; }
643
644 /* Catch any other (unhandled) characters and flag them as bad. */
645 . { addToken(Token.ERROR_IDENTIFIER); }
646
647}
648
649<MLC> {
650
651 [^hwf\n\*]+ {}
652 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
653 [hwf] {}
654
655 \n { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
656 {MLCEnd} { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
657 \* {}
658 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
659
660}
661
662<EOL_COMMENT> {
663 [^hwf\n]+ {}
664 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
665 [hwf] {}
666 \n { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addNullToken(); return firstToken; }
667 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addNullToken(); return firstToken; }
668
669}
Note: See TracBrowser for help on using the repository browser.