source: other-projects/rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/JavaScriptTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 18.4 KB
Line 
1/*
2 * 02/05/2012
3 *
4 * JavaScriptTokenMaker.java - Parses a document into JavaScript tokens.
5 *
6 * This library is distributed under a modified BSD license. See the included
7 * RSyntaxTextArea.License.txt file for details.
8 */
9package org.fife.ui.rsyntaxtextarea.modes;
10
11import java.io.*;
12import javax.swing.text.Segment;
13
14import org.fife.ui.rsyntaxtextarea.*;
15
16
17/**
18 * Scanner for JavaScript files. Its states could be simplified, but are
19 * kept the way they are to keep a degree of similarity (i.e. copy/paste)
20 * between it and HTML/JSP/PHPTokenMaker. This should cause no difference in
21 * performance.<p>
22 *
23 * This implementation was created using
24 * <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
25 * was modified for performance. Memory allocation needs to be almost
26 * completely removed to be competitive with the handwritten lexers (subclasses
27 * of <code>AbstractTokenMaker</code>, so this class has been modified so that
28 * Strings are never allocated (via yytext()), and the scanner never has to
29 * worry about refilling its buffer (needlessly copying chars around).
30 * We can achieve this because RText always scans exactly 1 line of tokens at a
31 * time, and hands the scanner this line as an array of characters (a Segment
32 * really). Since tokens contain pointers to char arrays instead of Strings
33 * holding their contents, there is no need for allocating new memory for
34 * Strings.<p>
35 *
36 * The actual algorithm generated for scanning has, of course, not been
37 * modified.<p>
38 *
39 * If you wish to regenerate this file yourself, keep in mind the following:
40 * <ul>
41 * <li>The generated JavaScriptTokenMaker.java</code> file will contain two
42 * definitions of both <code>zzRefill</code> and <code>yyreset</code>.
43 * You should hand-delete the second of each definition (the ones
44 * generated by the lexer), as these generated methods modify the input
45 * buffer, which we'll never have to do.</li>
46 * <li>You should also change the declaration/definition of zzBuffer to NOT
47 * be initialized. This is a needless memory allocation for us since we
48 * will be pointing the array somewhere else anyway.</li>
49 * <li>You should NOT call <code>yylex()</code> on the generated scanner
50 * directly; rather, you should use <code>getTokenList</code> as you would
51 * with any other <code>TokenMaker</code> instance.</li>
52 * </ul>
53 *
54 * @author Robert Futrell
55 * @version 0.8
56 *
57 */
58%%
59
60%public
61%class JavaScriptTokenMaker
62%extends AbstractJFlexCTokenMaker
63%unicode
64%type org.fife.ui.rsyntaxtextarea.Token
65
66
67%{
68
69 /**
70 * Token type specifying we're in a JavaScript multiline comment.
71 */
72 public static final int INTERNAL_IN_JS_MLC = -8;
73
74 /**
75 * Token type specifying we're in an invalid multi-line JS string.
76 */
77 public static final int INTERNAL_IN_JS_STRING_INVALID = -9;
78
79 /**
80 * Token type specifying we're in a valid multi-line JS string.
81 */
82 public static final int INTERNAL_IN_JS_STRING_VALID = -10;
83
84 /**
85 * Token type specifying we're in an invalid multi-line JS single-quoted string.
86 */
87 public static final int INTERNAL_IN_JS_CHAR_INVALID = -11;
88
89 /**
90 * Token type specifying we're in a valid multi-line JS single-quoted string.
91 */
92 public static final int INTERNAL_IN_JS_CHAR_VALID = -12;
93
94 /**
95 * When in the JS_STRING state, whether the current string is valid.
96 */
97 private boolean validJSString;
98
99
100 /**
101 * Constructor. This must be here because JFlex does not generate a
102 * no-parameter constructor.
103 */
104 public JavaScriptTokenMaker() {
105 super();
106 }
107
108
109 /**
110 * Adds the token specified to the current linked list of tokens as an
111 * "end token;" that is, at <code>zzMarkedPos</code>.
112 *
113 * @param tokenType The token's type.
114 */
115 private void addEndToken(int tokenType) {
116 addToken(zzMarkedPos,zzMarkedPos, tokenType);
117 }
118
119
120 /**
121 * Adds the token specified to the current linked list of tokens.
122 *
123 * @param tokenType The token's type.
124 * @see #addToken(int, int, int)
125 */
126 private void addHyperlinkToken(int start, int end, int tokenType) {
127 int so = start + offsetShift;
128 addToken(zzBuffer, start,end, tokenType, so, true);
129 }
130
131
132 /**
133 * Adds the token specified to the current linked list of tokens.
134 *
135 * @param tokenType The token's type.
136 */
137 private void addToken(int tokenType) {
138 addToken(zzStartRead, zzMarkedPos-1, tokenType);
139 }
140
141
142 /**
143 * Adds the token specified to the current linked list of tokens.
144 *
145 * @param tokenType The token's type.
146 */
147 private void addToken(int start, int end, int tokenType) {
148 int so = start + offsetShift;
149 addToken(zzBuffer, start,end, tokenType, so);
150 }
151
152
153 /**
154 * Adds the token specified to the current linked list of tokens.
155 *
156 * @param array The character array.
157 * @param start The starting offset in the array.
158 * @param end The ending offset in the array.
159 * @param tokenType The token's type.
160 * @param startOffset The offset in the document at which this token
161 * occurs.
162 */
163 public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
164 super.addToken(array, start,end, tokenType, startOffset);
165 zzStartRead = zzMarkedPos;
166 }
167
168
169 /**
170 * {@inheritDoc}
171 */
172 public String[] getLineCommentStartAndEnd() {
173 return new String[] { "//", null };
174 }
175
176
177 /**
178 * Returns the first token in the linked list of tokens generated
179 * from <code>text</code>. This method must be implemented by
180 * subclasses so they can correctly implement syntax highlighting.
181 *
182 * @param text The text from which to get tokens.
183 * @param initialTokenType The token type we should start with.
184 * @param startOffset The offset into the document at which
185 * <code>text</code> starts.
186 * @return The first <code>Token</code> in a linked list representing
187 * the syntax highlighted text.
188 */
189 public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
190
191 resetTokenList();
192 this.offsetShift = -text.offset + startOffset;
193
194 // Start off in the proper state.
195 int state = Token.NULL;
196 switch (initialTokenType) {
197 case INTERNAL_IN_JS_MLC:
198 state = JS_MLC;
199 start = text.offset;
200 break;
201 case INTERNAL_IN_JS_STRING_INVALID:
202 state = JS_STRING;
203 validJSString = false;
204 start = text.offset;
205 break;
206 case INTERNAL_IN_JS_STRING_VALID:
207 state = JS_STRING;
208 validJSString = true;
209 start = text.offset;
210 break;
211 case INTERNAL_IN_JS_CHAR_INVALID:
212 state = JS_CHAR;
213 validJSString = false;
214 start = text.offset;
215 break;
216 case INTERNAL_IN_JS_CHAR_VALID:
217 state = JS_CHAR;
218 validJSString = true;
219 start = text.offset;
220 break;
221 default:
222 state = Token.NULL;
223 }
224
225 s = text;
226 try {
227 yyreset(zzReader);
228 yybegin(state);
229 return yylex();
230 } catch (IOException ioe) {
231 ioe.printStackTrace();
232 return new DefaultToken();
233 }
234
235 }
236
237
238 /**
239 * Refills the input buffer.
240 *
241 * @return <code>true</code> if EOF was reached, otherwise
242 * <code>false</code>.
243 */
244 private boolean zzRefill() {
245 return zzCurrentPos>=s.offset+s.count;
246 }
247
248
249 /**
250 * Resets the scanner to read from a new input stream.
251 * Does not close the old reader.
252 *
253 * All internal variables are reset, the old input stream
254 * <b>cannot</b> be reused (internal buffer is discarded and lost).
255 * Lexical state is set to <tt>YY_INITIAL</tt>.
256 *
257 * @param reader the new input stream
258 */
259 public final void yyreset(java.io.Reader reader) {
260 // 's' has been updated.
261 zzBuffer = s.array;
262 /*
263 * We replaced the line below with the two below it because zzRefill
264 * no longer "refills" the buffer (since the way we do it, it's always
265 * "full" the first time through, since it points to the segment's
266 * array). So, we assign zzEndRead here.
267 */
268 //zzStartRead = zzEndRead = s.offset;
269 zzStartRead = s.offset;
270 zzEndRead = zzStartRead + s.count - 1;
271 zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
272 zzLexicalState = YYINITIAL;
273 zzReader = reader;
274 zzAtBOL = true;
275 zzAtEOF = false;
276 }
277
278
279%}
280
281Whitespace = ([ \t\f]+)
282LineTerminator = ([\n])
283
284Letter = [A-Za-z]
285NonzeroDigit = [1-9]
286Digit = ("0"|{NonzeroDigit})
287HexDigit = ({Digit}|[A-Fa-f])
288OctalDigit = ([0-7])
289EscapedSourceCharacter = ("u"{HexDigit}{HexDigit}{HexDigit}{HexDigit})
290NonSeparator = ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#"|"\\")
291IdentifierStart = ({Letter}|"_"|"$")
292IdentifierPart = ({IdentifierStart}|{Digit}|("\\"{EscapedSourceCharacter}))
293JS_MLCBegin = "/*"
294JS_MLCEnd = "*/"
295JS_LineCommentBegin = "//"
296JS_IntegerHelper1 = (({NonzeroDigit}{Digit}*)|"0")
297JS_IntegerHelper2 = ("0"(([xX]{HexDigit}+)|({OctalDigit}*)))
298JS_IntegerLiteral = ({JS_IntegerHelper1}[lL]?)
299JS_HexLiteral = ({JS_IntegerHelper2}[lL]?)
300JS_FloatHelper1 = ([fFdD]?)
301JS_FloatHelper2 = ([eE][+-]?{Digit}+{JS_FloatHelper1})
302JS_FloatLiteral1 = ({Digit}+"."({JS_FloatHelper1}|{JS_FloatHelper2}|{Digit}+({JS_FloatHelper1}|{JS_FloatHelper2})))
303JS_FloatLiteral2 = ("."{Digit}+({JS_FloatHelper1}|{JS_FloatHelper2}))
304JS_FloatLiteral3 = ({Digit}+{JS_FloatHelper2})
305JS_FloatLiteral = ({JS_FloatLiteral1}|{JS_FloatLiteral2}|{JS_FloatLiteral3}|({Digit}+[fFdD]))
306JS_ErrorNumberFormat = (({JS_IntegerLiteral}|{JS_HexLiteral}|{JS_FloatLiteral}){NonSeparator}+)
307JS_Separator = ([\(\)\{\}\[\]\]])
308JS_Separator2 = ([\;,.])
309JS_NonAssignmentOperator = ("+"|"-"|"<="|"^"|"++"|"<"|"*"|">="|"%"|"--"|">"|"/"|"!="|"?"|">>"|"!"|"&"|"=="|":"|">>"|"~"|"|"|"&&"|">>>")
310JS_AssignmentOperator = ("="|"-="|"*="|"/="|"|="|"&="|"^="|"+="|"%="|"<<="|">>="|">>>=")
311JS_Operator = ({JS_NonAssignmentOperator}|{JS_AssignmentOperator})
312JS_Identifier = ({IdentifierStart}{IdentifierPart}*)
313JS_ErrorIdentifier = ({NonSeparator}+)
314JS_Regex = ("/"([^\*\\/]|\\.)([^/\\]|\\.)*"/"[gim]*)
315
316URLGenDelim = ([:\/\?#\[\]@])
317URLSubDelim = ([\!\$&'\(\)\*\+,;=])
318URLUnreserved = ({Letter}|"_"|{Digit}|[\-\.\~])
319URLCharacter = ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%])
320URLCharacters = ({URLCharacter}*)
321URLEndCharacter = ([\/\$]|{Letter}|{Digit})
322URL = (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?)
323
324
325%state JS_STRING
326%state JS_CHAR
327%state JS_MLC
328%state JS_EOL_COMMENT
329
330
331%%
332
333<YYINITIAL> {
334
335 // ECMA keywords.
336 "break" |
337 "continue" |
338 "delete" |
339 "else" |
340 "for" |
341 "function" |
342 "if" |
343 "in" |
344 "new" |
345 "this" |
346 "typeof" |
347 "var" |
348 "void" |
349 "while" |
350 "with" { addToken(Token.RESERVED_WORD); }
351 "return" { addToken(Token.RESERVED_WORD_2); }
352
353 // Reserved (but not yet used) ECMA keywords.
354 "abstract" { addToken(Token.RESERVED_WORD); }
355 "boolean" { addToken(Token.DATA_TYPE); }
356 "byte" { addToken(Token.DATA_TYPE); }
357 "case" { addToken(Token.RESERVED_WORD); }
358 "catch" { addToken(Token.RESERVED_WORD); }
359 "char" { addToken(Token.DATA_TYPE); }
360 "class" { addToken(Token.RESERVED_WORD); }
361 "const" { addToken(Token.RESERVED_WORD); }
362 "debugger" { addToken(Token.RESERVED_WORD); }
363 "default" { addToken(Token.RESERVED_WORD); }
364 "do" { addToken(Token.RESERVED_WORD); }
365 "double" { addToken(Token.DATA_TYPE); }
366 "enum" { addToken(Token.RESERVED_WORD); }
367 "export" { addToken(Token.RESERVED_WORD); }
368 "extends" { addToken(Token.RESERVED_WORD); }
369 "final" { addToken(Token.RESERVED_WORD); }
370 "finally" { addToken(Token.RESERVED_WORD); }
371 "float" { addToken(Token.DATA_TYPE); }
372 "goto" { addToken(Token.RESERVED_WORD); }
373 "implements" { addToken(Token.RESERVED_WORD); }
374 "import" { addToken(Token.RESERVED_WORD); }
375 "instanceof" { addToken(Token.RESERVED_WORD); }
376 "int" { addToken(Token.DATA_TYPE); }
377 "interface" { addToken(Token.RESERVED_WORD); }
378 "long" { addToken(Token.DATA_TYPE); }
379 "native" { addToken(Token.RESERVED_WORD); }
380 "package" { addToken(Token.RESERVED_WORD); }
381 "private" { addToken(Token.RESERVED_WORD); }
382 "protected" { addToken(Token.RESERVED_WORD); }
383 "public" { addToken(Token.RESERVED_WORD); }
384 "short" { addToken(Token.DATA_TYPE); }
385 "static" { addToken(Token.RESERVED_WORD); }
386 "super" { addToken(Token.RESERVED_WORD); }
387 "switch" { addToken(Token.RESERVED_WORD); }
388 "synchronized" { addToken(Token.RESERVED_WORD); }
389 "throw" { addToken(Token.RESERVED_WORD); }
390 "throws" { addToken(Token.RESERVED_WORD); }
391 "transient" { addToken(Token.RESERVED_WORD); }
392 "try" { addToken(Token.RESERVED_WORD); }
393 "volatile" { addToken(Token.RESERVED_WORD); }
394 "null" { addToken(Token.RESERVED_WORD); }
395
396 // Literals.
397 "false" |
398 "true" { addToken(Token.LITERAL_BOOLEAN); }
399 "NaN" { addToken(Token.RESERVED_WORD); }
400 "Infinity" { addToken(Token.RESERVED_WORD); }
401
402 // Functions.
403 "eval" |
404 "parseInt" |
405 "parseFloat" |
406 "escape" |
407 "unescape" |
408 "isNaN" |
409 "isFinite" { addToken(Token.FUNCTION); }
410
411 {LineTerminator} { addNullToken(); return firstToken; }
412 {JS_Identifier} { addToken(Token.IDENTIFIER); }
413 {Whitespace} { addToken(Token.WHITESPACE); }
414
415 /* String/Character literals. */
416 [\'] { start = zzMarkedPos-1; validJSString = true; yybegin(JS_CHAR); }
417 [\"] { start = zzMarkedPos-1; validJSString = true; yybegin(JS_STRING); }
418
419 /* Comment literals. */
420 "/**/" { addToken(Token.COMMENT_MULTILINE); }
421 {JS_MLCBegin} { start = zzMarkedPos-2; yybegin(JS_MLC); }
422 {JS_LineCommentBegin} { start = zzMarkedPos-2; yybegin(JS_EOL_COMMENT); }
423
424 /* Attempt to identify regular expressions (not foolproof) - do after comments! */
425 {JS_Regex} {
426 boolean highlightedAsRegex = false;
427 if (firstToken==null) {
428 addToken(Token.REGEX);
429 highlightedAsRegex = true;
430 }
431 else {
432 // If this is *likely* to be a regex, based on
433 // the previous token, highlight it as such.
434 Token t = firstToken.getLastNonCommentNonWhitespaceToken();
435 if (RSyntaxUtilities.regexCanFollowInJavaScript(t)) {
436 addToken(Token.REGEX);
437 highlightedAsRegex = true;
438 }
439 }
440 // If it doesn't *appear* to be a regex, highlight it as
441 // individual tokens.
442 if (!highlightedAsRegex) {
443 int temp = zzStartRead + 1;
444 addToken(zzStartRead, zzStartRead, Token.OPERATOR);
445 zzStartRead = zzCurrentPos = zzMarkedPos = temp;
446 }
447 }
448
449 /* Separators. */
450 {JS_Separator} { addToken(Token.SEPARATOR); }
451 {JS_Separator2} { addToken(Token.IDENTIFIER); }
452
453 /* Operators. */
454 {JS_Operator} { addToken(Token.OPERATOR); }
455
456 /* Numbers */
457 {JS_IntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
458 {JS_HexLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
459 {JS_FloatLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); }
460 {JS_ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
461
462 {JS_ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); }
463
464 /* Ended with a line not in a string or comment. */
465 <<EOF>> { addNullToken(); return firstToken; }
466
467 /* Catch any other (unhandled) characters and flag them as bad. */
468 . { addToken(Token.ERROR_IDENTIFIER); }
469
470}
471
472<JS_STRING> {
473 [^\n\\\"]+ {}
474 \n { addToken(start,zzStartRead-1, Token.ERROR_STRING_DOUBLE); addNullToken(); return firstToken; }
475 \\x{HexDigit}{2} {}
476 \\x { /* Invalid latin-1 character \xXX */ validJSString = false; }
477 \\u{HexDigit}{4} {}
478 \\u { /* Invalid Unicode character \\uXXXX */ validJSString = false; }
479 \\. { /* Skip all escaped chars. */ }
480 \\ { /* Line ending in '\' => continue to next line. */
481 if (validJSString) {
482 addToken(start,zzStartRead, Token.LITERAL_STRING_DOUBLE_QUOTE);
483 addEndToken(INTERNAL_IN_JS_STRING_VALID);
484 }
485 else {
486 addToken(start,zzStartRead, Token.ERROR_STRING_DOUBLE);
487 addEndToken(INTERNAL_IN_JS_STRING_INVALID);
488 }
489 return firstToken;
490 }
491 \" { int type = validJSString ? Token.LITERAL_STRING_DOUBLE_QUOTE : Token.ERROR_STRING_DOUBLE; addToken(start,zzStartRead, type); yybegin(YYINITIAL); }
492 <<EOF>> { addToken(start,zzStartRead-1, Token.ERROR_STRING_DOUBLE); addNullToken(); return firstToken; }
493}
494
495<JS_CHAR> {
496 [^\n\\\']+ {}
497 \n { addToken(start,zzStartRead-1, Token.ERROR_CHAR); addNullToken(); return firstToken; }
498 \\x{HexDigit}{2} {}
499 \\x { /* Invalid latin-1 character \xXX */ validJSString = false; }
500 \\u{HexDigit}{4} {}
501 \\u { /* Invalid Unicode character \\uXXXX */ validJSString = false; }
502 \\. { /* Skip all escaped chars. */ }
503 \\ { /* Line ending in '\' => continue to next line. */
504 if (validJSString) {
505 addToken(start,zzStartRead, Token.LITERAL_CHAR);
506 addEndToken(INTERNAL_IN_JS_CHAR_VALID);
507 }
508 else {
509 addToken(start,zzStartRead, Token.ERROR_CHAR);
510 addEndToken(INTERNAL_IN_JS_CHAR_INVALID);
511 }
512 return firstToken;
513 }
514 \' { int type = validJSString ? Token.LITERAL_CHAR : Token.ERROR_CHAR; addToken(start,zzStartRead, type); yybegin(YYINITIAL); }
515 <<EOF>> { addToken(start,zzStartRead-1, Token.ERROR_CHAR); addNullToken(); return firstToken; }
516}
517
518<JS_MLC> {
519 // JavaScript MLC's. This state is essentially Java's MLC state.
520 [^hwf\n\*]+ {}
521 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
522 [hwf] {}
523 \n { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_JS_MLC); return firstToken; }
524 {JS_MLCEnd} { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
525 \* {}
526 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_JS_MLC); return firstToken; }
527}
528
529<JS_EOL_COMMENT> {
530 [^hwf\n]+ {}
531 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
532 [hwf] {}
533 \n |
534 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addNullToken(); return firstToken; }
535}
Note: See TracBrowser for help on using the repository browser.