source: other-projects/rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/HTMLTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 9 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 30.7 KB
Line 
1/*
2 * 01/24/2005
3 *
4 * HTMLTokenMaker.java - Generates tokens for HTML syntax highlighting.
5 *
6 * This library is distributed under a modified BSD license. See the included
7 * RSyntaxTextArea.License.txt file for details.
8 */
9package org.fife.ui.rsyntaxtextarea.modes;
10
11import java.io.*;
12import javax.swing.text.Segment;
13
14import org.fife.ui.rsyntaxtextarea.*;
15
16
17/**
18 * Scanner for HTML 5 files.
19 *
20 * This implementation was created using
21 * <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
22 * was modified for performance. Memory allocation needs to be almost
23 * completely removed to be competitive with the handwritten lexers (subclasses
24 * of <code>AbstractTokenMaker</code>, so this class has been modified so that
25 * Strings are never allocated (via yytext()), and the scanner never has to
26 * worry about refilling its buffer (needlessly copying chars around).
27 * We can achieve this because RText always scans exactly 1 line of tokens at a
28 * time, and hands the scanner this line as an array of characters (a Segment
29 * really). Since tokens contain pointers to char arrays instead of Strings
30 * holding their contents, there is no need for allocating new memory for
31 * Strings.<p>
32 *
33 * The actual algorithm generated for scanning has, of course, not been
34 * modified.<p>
35 *
36 * If you wish to regenerate this file yourself, keep in mind the following:
37 * <ul>
38 * <li>The generated HTMLTokenMaker.java</code> file will contain two
39 * definitions of both <code>zzRefill</code> and <code>yyreset</code>.
40 * You should hand-delete the second of each definition (the ones
41 * generated by the lexer), as these generated methods modify the input
42 * buffer, which we'll never have to do.</li>
43 * <li>You should also change the declaration/definition of zzBuffer to NOT
44 * be initialized. This is a needless memory allocation for us since we
45 * will be pointing the array somewhere else anyway.</li>
46 * <li>You should NOT call <code>yylex()</code> on the generated scanner
47 * directly; rather, you should use <code>getTokenList</code> as you would
48 * with any other <code>TokenMaker</code> instance.</li>
49 * </ul>
50 *
51 * @author Robert Futrell
52 * @version 0.8
53 *
54 */
55%%
56
57%public
58%class HTMLTokenMaker
59%extends AbstractMarkupTokenMaker
60%unicode
61%type org.fife.ui.rsyntaxtextarea.Token
62
63
64%{
65
66 /**
67 * Type specific to XMLTokenMaker denoting a line ending with an unclosed
68 * double-quote attribute.
69 */
70 public static final int INTERNAL_ATTR_DOUBLE = -1;
71
72
73 /**
74 * Type specific to XMLTokenMaker denoting a line ending with an unclosed
75 * single-quote attribute.
76 */
77 public static final int INTERNAL_ATTR_SINGLE = -2;
78
79
80 /**
81 * Token type specific to HTMLTokenMaker; this signals that the user has
82 * ended a line with an unclosed HTML tag; thus a new line is beginning
83 * still inside of the tag.
84 */
85 public static final int INTERNAL_INTAG = -3;
86
87 /**
88 * Token type specific to HTMLTokenMaker; this signals that the user has
89 * ended a line with an unclosed <code>&lt;script&gt;</code> tag.
90 */
91 public static final int INTERNAL_INTAG_SCRIPT = -4;
92
93 /**
94 * Token type specifying we're in a double-qouted attribute in a
95 * script tag.
96 */
97 public static final int INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT = -5;
98
99 /**
100 * Token type specifying we're in a single-qouted attribute in a
101 * script tag.
102 */
103 public static final int INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT = -6;
104
105 /**
106 * Token type specifying we're in JavaScript.
107 */
108 public static final int INTERNAL_IN_JS = -7;
109
110 /**
111 * Token type specifying we're in a JavaScript multiline comment.
112 */
113 public static final int INTERNAL_IN_JS_MLC = -8;
114
115 /**
116 * Token type specifying we're in an invalid multi-line JS string.
117 */
118 public static final int INTERNAL_IN_JS_STRING_INVALID = -9;
119
120 /**
121 * Token type specifying we're in a valid multi-line JS string.
122 */
123 public static final int INTERNAL_IN_JS_STRING_VALID = -10;
124
125 /**
126 * Token type specifying we're in an invalid multi-line JS single-quoted string.
127 */
128 public static final int INTERNAL_IN_JS_CHAR_INVALID = -11;
129
130 /**
131 * Token type specifying we're in a valid multi-line JS single-quoted string.
132 */
133 public static final int INTERNAL_IN_JS_CHAR_VALID = -12;
134
135 /**
136 * Whether closing markup tags are automatically completed for HTML.
137 */
138 private static boolean completeCloseTags;
139
140 /**
141 * When in the JS_STRING state, whether the current string is valid.
142 */
143 private boolean validJSString;
144
145
146 /**
147 * Constructor. This must be here because JFlex does not generate a
148 * no-parameter constructor.
149 */
150 public HTMLTokenMaker() {
151 super();
152 }
153
154
155 /**
156 * Adds the token specified to the current linked list of tokens as an
157 * "end token;" that is, at <code>zzMarkedPos</code>.
158 *
159 * @param tokenType The token's type.
160 */
161 private void addEndToken(int tokenType) {
162 addToken(zzMarkedPos,zzMarkedPos, tokenType);
163 }
164
165
166 /**
167 * Adds the token specified to the current linked list of tokens.
168 *
169 * @param tokenType The token's type.
170 * @see #addToken(int, int, int)
171 */
172 private void addHyperlinkToken(int start, int end, int tokenType) {
173 int so = start + offsetShift;
174 addToken(zzBuffer, start,end, tokenType, so, true);
175 }
176
177
178 /**
179 * Adds the token specified to the current linked list of tokens.
180 *
181 * @param tokenType The token's type.
182 */
183 private void addToken(int tokenType) {
184 addToken(zzStartRead, zzMarkedPos-1, tokenType);
185 }
186
187
188 /**
189 * Adds the token specified to the current linked list of tokens.
190 *
191 * @param tokenType The token's type.
192 */
193 private void addToken(int start, int end, int tokenType) {
194 int so = start + offsetShift;
195 addToken(zzBuffer, start,end, tokenType, so);
196 }
197
198
199 /**
200 * Adds the token specified to the current linked list of tokens.
201 *
202 * @param array The character array.
203 * @param start The starting offset in the array.
204 * @param end The ending offset in the array.
205 * @param tokenType The token's type.
206 * @param startOffset The offset in the document at which this token
207 * occurs.
208 */
209 public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
210 super.addToken(array, start,end, tokenType, startOffset);
211 zzStartRead = zzMarkedPos;
212 }
213
214
215 /**
216 * Sets whether markup close tags should be completed. You might not want
217 * this to be the case, since some tags in standard HTML aren't usually
218 * closed.
219 *
220 * @return Whether closing markup tags are completed.
221 * @see #setCompleteCloseTags(boolean)
222 */
223 public boolean getCompleteCloseTags() {
224 return completeCloseTags;
225 }
226
227
228 /**
229 * Returns the first token in the linked list of tokens generated
230 * from <code>text</code>. This method must be implemented by
231 * subclasses so they can correctly implement syntax highlighting.
232 *
233 * @param text The text from which to get tokens.
234 * @param initialTokenType The token type we should start with.
235 * @param startOffset The offset into the document at which
236 * <code>text</code> starts.
237 * @return The first <code>Token</code> in a linked list representing
238 * the syntax highlighted text.
239 */
240 public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
241
242 resetTokenList();
243 this.offsetShift = -text.offset + startOffset;
244
245 // Start off in the proper state.
246 int state = Token.NULL;
247 switch (initialTokenType) {
248 case Token.COMMENT_MULTILINE:
249 state = COMMENT;
250 start = text.offset;
251 break;
252 case Token.PREPROCESSOR:
253 state = PI;
254 start = text.offset;
255 break;
256 case Token.VARIABLE:
257 state = DTD;
258 start = text.offset;
259 break;
260 case INTERNAL_INTAG:
261 state = INTAG;
262 start = text.offset;
263 break;
264 case INTERNAL_INTAG_SCRIPT:
265 state = INTAG_SCRIPT;
266 start = text.offset;
267 break;
268 case INTERNAL_ATTR_DOUBLE:
269 state = INATTR_DOUBLE;
270 start = text.offset;
271 break;
272 case INTERNAL_ATTR_SINGLE:
273 state = INATTR_SINGLE;
274 start = text.offset;
275 break;
276 case INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT:
277 state = INATTR_DOUBLE_SCRIPT;
278 start = text.offset;
279 break;
280 case INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT:
281 state = INATTR_SINGLE_SCRIPT;
282 start = text.offset;
283 break;
284 case INTERNAL_IN_JS:
285 state = JAVASCRIPT;
286 start = text.offset;
287 break;
288 case INTERNAL_IN_JS_MLC:
289 state = JS_MLC;
290 start = text.offset;
291 break;
292 case INTERNAL_IN_JS_STRING_INVALID:
293 state = JS_STRING;
294 validJSString = false;
295 start = text.offset;
296 break;
297 case INTERNAL_IN_JS_STRING_VALID:
298 state = JS_STRING;
299 validJSString = true;
300 start = text.offset;
301 break;
302 case INTERNAL_IN_JS_CHAR_INVALID:
303 state = JS_CHAR;
304 validJSString = false;
305 start = text.offset;
306 break;
307 case INTERNAL_IN_JS_CHAR_VALID:
308 state = JS_CHAR;
309 validJSString = true;
310 start = text.offset;
311 break;
312 default:
313 state = Token.NULL;
314 }
315
316 s = text;
317 try {
318 yyreset(zzReader);
319 yybegin(state);
320 return yylex();
321 } catch (IOException ioe) {
322 ioe.printStackTrace();
323 return new DefaultToken();
324 }
325
326 }
327
328
329 /**
330 * Sets whether markup close tags should be completed. You might not want
331 * this to be the case, since some tags in standard HTML aren't usually
332 * closed.
333 *
334 * @param complete Whether closing markup tags are completed.
335 * @see #getCompleteCloseTags()
336 */
337 public static void setCompleteCloseTags(boolean complete) {
338 completeCloseTags = complete;
339 }
340
341
342 /**
343 * Refills the input buffer.
344 *
345 * @return <code>true</code> if EOF was reached, otherwise
346 * <code>false</code>.
347 */
348
349 private boolean zzRefill() {
350 return zzCurrentPos>=s.offset+s.count;
351 }
352
353
354 /**
355 * Resets the scanner to read from a new input stream.
356 * Does not close the old reader.
357 *
358 * All internal variables are reset, the old input stream
359 * <b>cannot</b> be reused (internal buffer is discarded and lost).
360 * Lexical state is set to <tt>YY_INITIAL</tt>.
361 *
362 * @param reader the new input stream
363 */
364
365 public final void yyreset(java.io.Reader reader) {
366 // 's' has been updated.
367 zzBuffer = s.array;
368
369 /*
370 * We replaced the line below with the two below it because zzRefill
371 * no longer "refills" the buffer (since the way we do it, it's always
372 * "full" the first time through, since it points to the segment's
373 * array). So, we assign zzEndRead here.
374 */
375 //zzStartRead = zzEndRead = s.offset;
376
377 zzStartRead = s.offset;
378 zzEndRead = zzStartRead + s.count - 1;
379 //### zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
380 zzCurrentPos = zzMarkedPos = s.offset;
381 zzLexicalState = YYINITIAL;
382 zzReader = reader;
383 zzAtBOL = true;
384 zzAtEOF = false;
385 }
386
387
388
389%}
390
391// HTML-specific stuff.
392Whitespace = ([ \t\f]+)
393LineTerminator = ([\n])
394Identifier = ([^ \t\n<&]+)
395AmperItem = ([&][^; \t]*[;]?)
396InTagIdentifier = ([^ \t\n\"\'/=>]+)
397EndScriptTag = ("</" [sS][cC][rR][iI][pP][tT] ">")
398
399
400// JavaScript stuff.
401Letter = [A-Za-z]
402NonzeroDigit = [1-9]
403Digit = ("0"|{NonzeroDigit})
404HexDigit = ({Digit}|[A-Fa-f])
405OctalDigit = ([0-7])
406EscapedSourceCharacter = ("u"{HexDigit}{HexDigit}{HexDigit}{HexDigit})
407NonSeparator = ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#"|"\\")
408IdentifierStart = ({Letter}|"_"|"$")
409IdentifierPart = ({IdentifierStart}|{Digit}|("\\"{EscapedSourceCharacter}))
410JS_MLCBegin = "/*"
411JS_MLCEnd = "*/"
412JS_LineCommentBegin = "//"
413JS_IntegerHelper1 = (({NonzeroDigit}{Digit}*)|"0")
414JS_IntegerHelper2 = ("0"(([xX]{HexDigit}+)|({OctalDigit}*)))
415JS_IntegerLiteral = ({JS_IntegerHelper1}[lL]?)
416JS_HexLiteral = ({JS_IntegerHelper2}[lL]?)
417JS_FloatHelper1 = ([fFdD]?)
418JS_FloatHelper2 = ([eE][+-]?{Digit}+{JS_FloatHelper1})
419JS_FloatLiteral1 = ({Digit}+"."({JS_FloatHelper1}|{JS_FloatHelper2}|{Digit}+({JS_FloatHelper1}|{JS_FloatHelper2})))
420JS_FloatLiteral2 = ("."{Digit}+({JS_FloatHelper1}|{JS_FloatHelper2}))
421JS_FloatLiteral3 = ({Digit}+{JS_FloatHelper2})
422JS_FloatLiteral = ({JS_FloatLiteral1}|{JS_FloatLiteral2}|{JS_FloatLiteral3}|({Digit}+[fFdD]))
423JS_ErrorNumberFormat = (({JS_IntegerLiteral}|{JS_HexLiteral}|{JS_FloatLiteral}){NonSeparator}+)
424JS_Separator = ([\(\)\{\}\[\]\]])
425JS_Separator2 = ([\;,.])
426JS_NonAssignmentOperator = ("+"|"-"|"<="|"^"|"++"|"<"|"*"|">="|"%"|"--"|">"|"/"|"!="|"?"|">>"|"!"|"&"|"=="|":"|">>"|"~"|"|"|"&&"|">>>")
427JS_AssignmentOperator = ("="|"-="|"*="|"/="|"|="|"&="|"^="|"+="|"%="|"<<="|">>="|">>>=")
428JS_Operator = ({JS_NonAssignmentOperator}|{JS_AssignmentOperator})
429JS_Identifier = ({IdentifierStart}{IdentifierPart}*)
430JS_ErrorIdentifier = ({NonSeparator}+)
431JS_Regex = ("/"([^\*\\/]|\\.)([^/\\]|\\.)*"/"[gim]*)
432
433URLGenDelim = ([:\/\?#\[\]@])
434URLSubDelim = ([\!\$&'\(\)\*\+,;=])
435URLUnreserved = ({Letter}|"_"|{Digit}|[\-\.\~])
436URLCharacter = ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%])
437URLCharacters = ({URLCharacter}*)
438URLEndCharacter = ([\/\$]|{Letter}|{Digit})
439URL = (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?)
440
441
442%state COMMENT
443%state PI
444%state DTD
445%state INTAG
446%state INTAG_CHECK_TAG_NAME
447%state INATTR_DOUBLE
448%state INATTR_SINGLE
449%state INTAG_SCRIPT
450%state INATTR_DOUBLE_SCRIPT
451%state INATTR_SINGLE_SCRIPT
452%state JAVASCRIPT
453%state JS_STRING
454%state JS_CHAR
455%state JS_MLC
456%state JS_EOL_COMMENT
457
458
459%%
460
461<YYINITIAL> {
462 "<!--" { start = zzMarkedPos-4; yybegin(COMMENT); }
463 "<"[sS][cC][rR][iI][pP][tT] {
464 addToken(zzStartRead,zzStartRead, Token.MARKUP_TAG_DELIMITER);
465 addToken(zzMarkedPos-6,zzMarkedPos-1, Token.MARKUP_TAG_NAME);
466 start = zzMarkedPos; yybegin(INTAG_SCRIPT);
467 }
468 "<!" { start = zzMarkedPos-2; yybegin(DTD); }
469 "<?" { start = zzMarkedPos-2; yybegin(PI); }
470 "<"({Letter}|{Digit})+ {
471 int count = yylength();
472 addToken(zzStartRead,zzStartRead, Token.MARKUP_TAG_DELIMITER);
473 zzMarkedPos -= (count-1); //yypushback(count-1);
474 yybegin(INTAG_CHECK_TAG_NAME);
475 }
476 "</"({Letter}|{Digit})+ {
477 int count = yylength();
478 addToken(zzStartRead,zzStartRead+1, Token.MARKUP_TAG_DELIMITER);
479 zzMarkedPos -= (count-2); //yypushback(count-2);
480 yybegin(INTAG_CHECK_TAG_NAME);
481 }
482 "<" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(INTAG); }
483 "</" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(INTAG); }
484 {LineTerminator} { addNullToken(); return firstToken; }
485 {Identifier} { addToken(Token.IDENTIFIER); } // Catches everything.
486 {AmperItem} { addToken(Token.DATA_TYPE); }
487 {Whitespace} { addToken(Token.WHITESPACE); }
488 <<EOF>> { addNullToken(); return firstToken; }
489}
490
491<COMMENT> {
492 [^hwf\n\-]+ {}
493 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
494 [hwf] {}
495 {LineTerminator} { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
496 "-->" { yybegin(YYINITIAL); addToken(start,zzStartRead+2, Token.COMMENT_MULTILINE); }
497 "-" {}
498 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
499}
500
501<PI> {
502 [^\n\?]+ {}
503 {LineTerminator} { addToken(start,zzStartRead-1, Token.PREPROCESSOR); return firstToken; }
504 "?>" { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.PREPROCESSOR); }
505 "?" {}
506 <<EOF>> { addToken(start,zzStartRead-1, Token.PREPROCESSOR); return firstToken; }
507}
508
509<DTD> {
510 [^\n>]+ {}
511 {LineTerminator} { addToken(start,zzStartRead-1, Token.VARIABLE); return firstToken; }
512 ">" { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.VARIABLE); }
513 <<EOF>> { addToken(start,zzStartRead-1, Token.VARIABLE); return firstToken; }
514}
515
516<INTAG_CHECK_TAG_NAME> {
517 [Aa] |
518 [aA][bB][bB][rR] |
519 [aA][cC][rR][oO][nN][yY][mM] |
520 [aA][dD][dD][rR][eE][sS][sS] |
521 [aA][pP][pP][lL][eE][tT] |
522 [aA][rR][eE][aA] |
523 [aA][rR][tT][iI][cC][lL][eE] |
524 [aA][sS][iI][dD][eE] |
525 [aA][uU][dD][iI][oO] |
526 [bB] |
527 [bB][aA][sS][eE] |
528 [bB][aA][sS][eE][fF][oO][nN][tT] |
529 [bB][dD][oO] |
530 [bB][gG][sS][oO][uU][nN][dD] |
531 [bB][iI][gG] |
532 [bB][lL][iI][nN][kK] |
533 [bB][lL][oO][cC][kK][qQ][uU][oO][tT][eE] |
534 [bB][oO][dD][yY] |
535 [bB][rR] |
536 [bB][uU][tT][tT][oO][nN] |
537 [cC][aA][nN][vV][aA][sS] |
538 [cC][aA][pP][tT][iI][oO][nN] |
539 [cC][eE][nN][tT][eE][rR] |
540 [cC][iI][tT][eE] |
541 [cC][oO][dD][eE] |
542 [cC][oO][lL] |
543 [cC][oO][lL][gG][rR][oO][uU][pP] |
544 [cC][oO][mM][mM][aA][nN][dD] |
545 [cC][oO][mM][mM][eE][nN][tT] |
546 [dD][dD] |
547 [dD][aA][tT][aA][gG][rR][iI][dD] |
548 [dD][aA][tT][aA][lL][iI][sS][tT] |
549 [dD][aA][tT][aA][tT][eE][mM][pP][lL][aA][tT][eE] |
550 [dD][eE][lL] |
551 [dD][eE][tT][aA][iI][lL][sS] |
552 [dD][fF][nN] |
553 [dD][iI][aA][lL][oO][gG] |
554 [dD][iI][rR] |
555 [dD][iI][vV] |
556 [dD][lL] |
557 [dD][tT] |
558 [eE][mM] |
559 [eE][mM][bB][eE][dD] |
560 [eE][vV][eE][nN][tT][sS][oO][uU][rR][cC][eE] |
561 [fF][iI][eE][lL][dD][sS][eE][tT] |
562 [fF][iI][gG][uU][rR][eE] |
563 [fF][oO][nN][tT] |
564 [fF][oO][oO][tT][eE][rR] |
565 [fF][oO][rR][mM] |
566 [fF][rR][aA][mM][eE] |
567 [fF][rR][aA][mM][eE][sS][eE][tT] |
568 [hH][123456] |
569 [hH][eE][aA][dD] |
570 [hH][eE][aA][dD][eE][rR] |
571 [hH][rR] |
572 [hH][tT][mM][lL] |
573 [iI] |
574 [iI][fF][rR][aA][mM][eE] |
575 [iI][lL][aA][yY][eE][rR] |
576 [iI][mM][gG] |
577 [iI][nN][pP][uU][tT] |
578 [iI][nN][sS] |
579 [iI][sS][iI][nN][dD][eE][xX] |
580 [kK][bB][dD] |
581 [kK][eE][yY][gG][eE][nN] |
582 [lL][aA][bB][eE][lL] |
583 [lL][aA][yY][eE][rR] |
584 [lL][eE][gG][eE][nN][dD] |
585 [lL][iI] |
586 [lL][iI][nN][kK] |
587 [mM][aA][pP] |
588 [mM][aA][rR][kK] |
589 [mM][aA][rR][qQ][uU][eE][eE] |
590 [mM][eE][nN][uU] |
591 [mM][eE][tT][aA] |
592 [mM][eE][tT][eE][rR] |
593 [mM][uU][lL][tT][iI][cC][oO][lL] |
594 [nN][aA][vV] |
595 [nN][eE][sS][tT] |
596 [nN][oO][bB][rR] |
597 [nN][oO][eE][mM][bB][eE][dD] |
598 [nN][oO][fF][rR][aA][mM][eE][sS] |
599 [nN][oO][lL][aA][yY][eE][rR] |
600 [nN][oO][sS][cC][rR][iI][pP][tT] |
601 [oO][bB][jJ][eE][cC][tT] |
602 [oO][lL] |
603 [oO][pP][tT][gG][rR][oO][uU][pP] |
604 [oO][pP][tT][iI][oO][nN] |
605 [oO][uU][tT][pP][uU][tT] |
606 [pP] |
607 [pP][aA][rR][aA][mM] |
608 [pP][lL][aA][iI][nN][tT][eE][xX][tT] |
609 [pP][rR][eE] |
610 [pP][rR][oO][gG][rR][eE][sS][sS] |
611 [qQ] |
612 [rR][uU][lL][eE] |
613 [sS] |
614 [sS][aA][mM][pP] |
615 [sS][cC][rR][iI][pP][tT] |
616 [sS][eE][cC][tT][iI][oO][nN] |
617 [sS][eE][lL][eE][cC][tT] |
618 [sS][eE][rR][vV][eE][rR] |
619 [sS][mM][aA][lL][lL] |
620 [sS][oO][uU][rR][cC][eE] |
621 [sS][pP][aA][cC][eE][rR] |
622 [sS][pP][aA][nN] |
623 [sS][tT][rR][iI][kK][eE] |
624 [sS][tT][rR][oO][nN][gG] |
625 [sS][tT][yY][lL][eE] |
626 [sS][uU][bB] |
627 [sS][uU][pP] |
628 [tT][aA][bB][lL][eE] |
629 [tT][bB][oO][dD][yY] |
630 [tT][dD] |
631 [tT][eE][xX][tT][aA][rR][eE][aA] |
632 [tT][fF][oO][oO][tT] |
633 [tT][hH] |
634 [tT][hH][eE][aA][dD] |
635 [tT][iI][mM][eE] |
636 [tT][iI][tT][lL][eE] |
637 [tT][rR] |
638 [tT][tT] |
639 [uU] |
640 [uU][lL] |
641 [vV][aA][rR] |
642 [vV][iI][dD][eE][oO] { addToken(Token.MARKUP_TAG_NAME); }
643 {InTagIdentifier} { /* A non-recognized HTML tag name */ yypushback(yylength()); yybegin(INTAG); }
644 . { /* Shouldn't happen */ yypushback(1); yybegin(INTAG); }
645 <<EOF>> { addToken(zzMarkedPos,zzMarkedPos, INTERNAL_INTAG); return firstToken; }
646}
647
648<INTAG> {
649 "/" { addToken(Token.MARKUP_TAG_DELIMITER); }
650 {InTagIdentifier} { addToken(Token.MARKUP_TAG_ATTRIBUTE); }
651 {Whitespace} { addToken(Token.WHITESPACE); }
652 "=" { addToken(Token.OPERATOR); }
653 "/>" { yybegin(YYINITIAL); addToken(Token.MARKUP_TAG_DELIMITER); }
654 ">" { yybegin(YYINITIAL); addToken(Token.MARKUP_TAG_DELIMITER); }
655 [\"] { start = zzMarkedPos-1; yybegin(INATTR_DOUBLE); }
656 [\'] { start = zzMarkedPos-1; yybegin(INATTR_SINGLE); }
657 <<EOF>> { addToken(zzMarkedPos,zzMarkedPos, INTERNAL_INTAG); return firstToken; }
658}
659
660<INATTR_DOUBLE> {
661 [^\"]* {}
662 [\"] { yybegin(INTAG); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
663 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_DOUBLE); return firstToken; }
664}
665
666<INATTR_SINGLE> {
667 [^\']* {}
668 [\'] { yybegin(INTAG); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
669 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_SINGLE); return firstToken; }
670}
671
672<INTAG_SCRIPT> {
673 {InTagIdentifier} { addToken(Token.MARKUP_TAG_ATTRIBUTE); }
674 "/>" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(YYINITIAL); }
675 "/" { addToken(Token.MARKUP_TAG_DELIMITER); } // Won't appear in valid HTML.
676 {Whitespace} { addToken(Token.WHITESPACE); }
677 "=" { addToken(Token.OPERATOR); }
678 ">" { yybegin(JAVASCRIPT); addToken(Token.MARKUP_TAG_DELIMITER); }
679 [\"] { start = zzMarkedPos-1; yybegin(INATTR_DOUBLE_SCRIPT); }
680 [\'] { start = zzMarkedPos-1; yybegin(INATTR_SINGLE_SCRIPT); }
681 <<EOF>> { addToken(zzMarkedPos,zzMarkedPos, INTERNAL_INTAG_SCRIPT); return firstToken; }
682}
683
684<INATTR_DOUBLE_SCRIPT> {
685 [^\"]* {}
686 [\"] { yybegin(INTAG_SCRIPT); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
687 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT); return firstToken; }
688}
689
690<INATTR_SINGLE_SCRIPT> {
691 [^\']* {}
692 [\'] { yybegin(INTAG_SCRIPT); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
693 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT); return firstToken; }
694}
695
696<JAVASCRIPT> {
697
698 {EndScriptTag} {
699 yybegin(YYINITIAL);
700 addToken(zzStartRead,zzStartRead+1, Token.MARKUP_TAG_DELIMITER);
701 addToken(zzMarkedPos-7,zzMarkedPos-2, Token.MARKUP_TAG_NAME);
702 addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER);
703 }
704
705 // ECMA keywords.
706 "break" |
707 "continue" |
708 "delete" |
709 "else" |
710 "for" |
711 "function" |
712 "if" |
713 "in" |
714 "new" |
715 "return" |
716 "this" |
717 "typeof" |
718 "var" |
719 "void" |
720 "while" |
721 "with" { addToken(Token.RESERVED_WORD); }
722
723 // Reserved (but not yet used) ECMA keywords.
724 "abstract" { addToken(Token.RESERVED_WORD); }
725 "boolean" { addToken(Token.DATA_TYPE); }
726 "byte" { addToken(Token.DATA_TYPE); }
727 "case" { addToken(Token.RESERVED_WORD); }
728 "catch" { addToken(Token.RESERVED_WORD); }
729 "char" { addToken(Token.DATA_TYPE); }
730 "class" { addToken(Token.RESERVED_WORD); }
731 "const" { addToken(Token.RESERVED_WORD); }
732 "debugger" { addToken(Token.RESERVED_WORD); }
733 "default" { addToken(Token.RESERVED_WORD); }
734 "do" { addToken(Token.RESERVED_WORD); }
735 "double" { addToken(Token.DATA_TYPE); }
736 "enum" { addToken(Token.RESERVED_WORD); }
737 "export" { addToken(Token.RESERVED_WORD); }
738 "extends" { addToken(Token.RESERVED_WORD); }
739 "final" { addToken(Token.RESERVED_WORD); }
740 "finally" { addToken(Token.RESERVED_WORD); }
741 "float" { addToken(Token.DATA_TYPE); }
742 "goto" { addToken(Token.RESERVED_WORD); }
743 "implements" { addToken(Token.RESERVED_WORD); }
744 "import" { addToken(Token.RESERVED_WORD); }
745 "instanceof" { addToken(Token.RESERVED_WORD); }
746 "int" { addToken(Token.DATA_TYPE); }
747 "interface" { addToken(Token.RESERVED_WORD); }
748 "long" { addToken(Token.DATA_TYPE); }
749 "native" { addToken(Token.RESERVED_WORD); }
750 "package" { addToken(Token.RESERVED_WORD); }
751 "private" { addToken(Token.RESERVED_WORD); }
752 "protected" { addToken(Token.RESERVED_WORD); }
753 "public" { addToken(Token.RESERVED_WORD); }
754 "short" { addToken(Token.DATA_TYPE); }
755 "static" { addToken(Token.RESERVED_WORD); }
756 "super" { addToken(Token.RESERVED_WORD); }
757 "switch" { addToken(Token.RESERVED_WORD); }
758 "synchronized" { addToken(Token.RESERVED_WORD); }
759 "throw" { addToken(Token.RESERVED_WORD); }
760 "throws" { addToken(Token.RESERVED_WORD); }
761 "transient" { addToken(Token.RESERVED_WORD); }
762 "try" { addToken(Token.RESERVED_WORD); }
763 "volatile" { addToken(Token.RESERVED_WORD); }
764 "null" { addToken(Token.RESERVED_WORD); }
765
766 // Literals.
767 "false" |
768 "true" { addToken(Token.LITERAL_BOOLEAN); }
769 "NaN" { addToken(Token.RESERVED_WORD); }
770 "Infinity" { addToken(Token.RESERVED_WORD); }
771
772 // Functions.
773 "eval" |
774 "parseInt" |
775 "parseFloat" |
776 "escape" |
777 "unescape" |
778 "isNaN" |
779 "isFinite" { addToken(Token.FUNCTION); }
780
781 {LineTerminator} { addEndToken(INTERNAL_IN_JS); return firstToken; }
782 {JS_Identifier} { addToken(Token.IDENTIFIER); }
783 {Whitespace} { addToken(Token.WHITESPACE); }
784
785 /* String/Character literals. */
786 [\'] { start = zzMarkedPos-1; validJSString = true; yybegin(JS_CHAR); }
787 [\"] { start = zzMarkedPos-1; validJSString = true; yybegin(JS_STRING); }
788
789 /* Comment literals. */
790 "/**/" { addToken(Token.COMMENT_MULTILINE); }
791 {JS_MLCBegin} { start = zzMarkedPos-2; yybegin(JS_MLC); }
792 {JS_LineCommentBegin} { start = zzMarkedPos-2; yybegin(JS_EOL_COMMENT); }
793
794 /* Attempt to identify regular expressions (not foolproof) - do after comments! */
795 {JS_Regex} {
796 boolean highlightedAsRegex = false;
797 if (firstToken==null) {
798 addToken(Token.REGEX);
799 highlightedAsRegex = true;
800 }
801 else {
802 // If this is *likely* to be a regex, based on
803 // the previous token, highlight it as such.
804 Token t = firstToken.getLastNonCommentNonWhitespaceToken();
805 if (RSyntaxUtilities.regexCanFollowInJavaScript(t)) {
806 addToken(Token.REGEX);
807 highlightedAsRegex = true;
808 }
809 }
810 // If it doesn't *appear* to be a regex, highlight it as
811 // individual tokens.
812 if (!highlightedAsRegex) {
813 int temp = zzStartRead + 1;
814 addToken(zzStartRead, zzStartRead, Token.OPERATOR);
815 zzStartRead = zzCurrentPos = zzMarkedPos = temp;
816 }
817 }
818
819 /* Separators. */
820 {JS_Separator} { addToken(Token.SEPARATOR); }
821 {JS_Separator2} { addToken(Token.IDENTIFIER); }
822
823 /* Operators. */
824 {JS_Operator} { addToken(Token.OPERATOR); }
825
826 /* Numbers */
827 {JS_IntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
828 {JS_HexLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
829 {JS_FloatLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); }
830 {JS_ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
831
832 {JS_ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); }
833
834 /* Ended with a line not in a string or comment. */
835 <<EOF>> { addEndToken(INTERNAL_IN_JS); return firstToken; }
836
837 /* Catch any other (unhandled) characters and flag them as bad. */
838 . { addToken(Token.ERROR_IDENTIFIER); }
839
840}
841
842<JS_STRING> {
843 [^\n\\\"]+ {}
844 \n { addToken(start,zzStartRead-1, Token.ERROR_STRING_DOUBLE); addEndToken(INTERNAL_IN_JS); return firstToken; }
845 \\x{HexDigit}{2} {}
846 \\x { /* Invalid latin-1 character \xXX */ validJSString = false; }
847 \\u{HexDigit}{4} {}
848 \\u { /* Invalid Unicode character \\uXXXX */ validJSString = false; }
849 \\. { /* Skip all escaped chars. */ }
850 \\ { /* Line ending in '\' => continue to next line. */
851 if (validJSString) {
852 addToken(start,zzStartRead, Token.LITERAL_STRING_DOUBLE_QUOTE);
853 addEndToken(INTERNAL_IN_JS_STRING_VALID);
854 }
855 else {
856 addToken(start,zzStartRead, Token.ERROR_STRING_DOUBLE);
857 addEndToken(INTERNAL_IN_JS_STRING_INVALID);
858 }
859 return firstToken;
860 }
861 \" { int type = validJSString ? Token.LITERAL_STRING_DOUBLE_QUOTE : Token.ERROR_STRING_DOUBLE; addToken(start,zzStartRead, type); yybegin(JAVASCRIPT); }
862 <<EOF>> { addToken(start,zzStartRead-1, Token.ERROR_STRING_DOUBLE); addEndToken(INTERNAL_IN_JS); return firstToken; }
863}
864
865<JS_CHAR> {
866 [^\n\\\']+ {}
867 \n { addToken(start,zzStartRead-1, Token.ERROR_CHAR); addEndToken(INTERNAL_IN_JS); return firstToken; }
868 \\x{HexDigit}{2} {}
869 \\x { /* Invalid latin-1 character \xXX */ validJSString = false; }
870 \\u{HexDigit}{4} {}
871 \\u { /* Invalid Unicode character \\uXXXX */ validJSString = false; }
872 \\. { /* Skip all escaped chars. */ }
873 \\ { /* Line ending in '\' => continue to next line. */
874 if (validJSString) {
875 addToken(start,zzStartRead, Token.LITERAL_CHAR);
876 addEndToken(INTERNAL_IN_JS_CHAR_VALID);
877 }
878 else {
879 addToken(start,zzStartRead, Token.ERROR_CHAR);
880 addEndToken(INTERNAL_IN_JS_CHAR_INVALID);
881 }
882 return firstToken;
883 }
884 \' { int type = validJSString ? Token.LITERAL_CHAR : Token.ERROR_CHAR; addToken(start,zzStartRead, type); yybegin(JAVASCRIPT); }
885 <<EOF>> { addToken(start,zzStartRead-1, Token.ERROR_CHAR); addEndToken(INTERNAL_IN_JS); return firstToken; }
886}
887
888<JS_MLC> {
889 // JavaScript MLC's. This state is essentially Java's MLC state.
890 [^hwf<\n\*]+ {}
891 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
892 [hwf] {}
893 {EndScriptTag} {
894 yybegin(YYINITIAL);
895 int temp = zzStartRead;
896 addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE);
897 addToken(temp,temp+1, Token.MARKUP_TAG_DELIMITER);
898 addToken(zzMarkedPos-7,zzMarkedPos-2, Token.MARKUP_TAG_NAME);
899 addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER);
900 }
901 "<" {}
902 \n { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_JS_MLC); return firstToken; }
903 {JS_MLCEnd} { yybegin(JAVASCRIPT); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
904 \* {}
905 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_JS_MLC); return firstToken; }
906}
907
908<JS_EOL_COMMENT> {
909 [^hwf<\n]+ {}
910 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
911 [hwf] {}
912 {EndScriptTag} {
913 yybegin(YYINITIAL);
914 int temp = zzStartRead;
915 addToken(start,zzStartRead-1, Token.COMMENT_EOL);
916 addToken(temp,temp+1, Token.MARKUP_TAG_DELIMITER);
917 addToken(zzMarkedPos-7,zzMarkedPos-2, Token.MARKUP_TAG_NAME);
918 addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER);
919 }
920 "<" {}
921 \n { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addEndToken(INTERNAL_IN_JS); return firstToken; }
922 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addEndToken(INTERNAL_IN_JS); return firstToken; }
923
924}
Note: See TracBrowser for help on using the repository browser.