source: other-projects/rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/HTMLTokenMaker.flex.orig@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 30.7 KB
Line 
1/*
2 * 01/24/2005
3 *
4 * HTMLTokenMaker.java - Generates tokens for HTML syntax highlighting.
5 *
6 * This library is distributed under a modified BSD license. See the included
7 * RSyntaxTextArea.License.txt file for details.
8 */
9package org.fife.ui.rsyntaxtextarea.modes;
10
11import java.io.*;
12import javax.swing.text.Segment;
13
14import org.fife.ui.rsyntaxtextarea.*;
15
16
17/**
18 * Scanner for HTML 5 files.
19 *
20 * This implementation was created using
21 * <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
22 * was modified for performance. Memory allocation needs to be almost
23 * completely removed to be competitive with the handwritten lexers (subclasses
24 * of <code>AbstractTokenMaker</code>, so this class has been modified so that
25 * Strings are never allocated (via yytext()), and the scanner never has to
26 * worry about refilling its buffer (needlessly copying chars around).
27 * We can achieve this because RText always scans exactly 1 line of tokens at a
28 * time, and hands the scanner this line as an array of characters (a Segment
29 * really). Since tokens contain pointers to char arrays instead of Strings
30 * holding their contents, there is no need for allocating new memory for
31 * Strings.<p>
32 *
33 * The actual algorithm generated for scanning has, of course, not been
34 * modified.<p>
35 *
36 * If you wish to regenerate this file yourself, keep in mind the following:
37 * <ul>
38 * <li>The generated HTMLTokenMaker.java</code> file will contain two
39 * definitions of both <code>zzRefill</code> and <code>yyreset</code>.
40 * You should hand-delete the second of each definition (the ones
41 * generated by the lexer), as these generated methods modify the input
42 * buffer, which we'll never have to do.</li>
43 * <li>You should also change the declaration/definition of zzBuffer to NOT
44 * be initialized. This is a needless memory allocation for us since we
45 * will be pointing the array somewhere else anyway.</li>
46 * <li>You should NOT call <code>yylex()</code> on the generated scanner
47 * directly; rather, you should use <code>getTokenList</code> as you would
48 * with any other <code>TokenMaker</code> instance.</li>
49 * </ul>
50 *
51 * @author Robert Futrell
52 * @version 0.8
53 *
54 */
55%%
56
57%public
58%class HTMLTokenMaker
59%extends AbstractMarkupTokenMaker
60%unicode
61%type org.fife.ui.rsyntaxtextarea.Token
62
63
64%{
65
66 /**
67 * Type specific to XMLTokenMaker denoting a line ending with an unclosed
68 * double-quote attribute.
69 */
70 public static final int INTERNAL_ATTR_DOUBLE = -1;
71
72
73 /**
74 * Type specific to XMLTokenMaker denoting a line ending with an unclosed
75 * single-quote attribute.
76 */
77 public static final int INTERNAL_ATTR_SINGLE = -2;
78
79
80 /**
81 * Token type specific to HTMLTokenMaker; this signals that the user has
82 * ended a line with an unclosed HTML tag; thus a new line is beginning
83 * still inside of the tag.
84 */
85 public static final int INTERNAL_INTAG = -3;
86
87 /**
88 * Token type specific to HTMLTokenMaker; this signals that the user has
89 * ended a line with an unclosed <code>&lt;script&gt;</code> tag.
90 */
91 public static final int INTERNAL_INTAG_SCRIPT = -4;
92
93 /**
94 * Token type specifying we're in a double-qouted attribute in a
95 * script tag.
96 */
97 public static final int INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT = -5;
98
99 /**
100 * Token type specifying we're in a single-qouted attribute in a
101 * script tag.
102 */
103 public static final int INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT = -6;
104
105 /**
106 * Token type specifying we're in JavaScript.
107 */
108 public static final int INTERNAL_IN_JS = -7;
109
110 /**
111 * Token type specifying we're in a JavaScript multiline comment.
112 */
113 public static final int INTERNAL_IN_JS_MLC = -8;
114
115 /**
116 * Token type specifying we're in an invalid multi-line JS string.
117 */
118 public static final int INTERNAL_IN_JS_STRING_INVALID = -9;
119
120 /**
121 * Token type specifying we're in a valid multi-line JS string.
122 */
123 public static final int INTERNAL_IN_JS_STRING_VALID = -10;
124
125 /**
126 * Token type specifying we're in an invalid multi-line JS single-quoted string.
127 */
128 public static final int INTERNAL_IN_JS_CHAR_INVALID = -11;
129
130 /**
131 * Token type specifying we're in a valid multi-line JS single-quoted string.
132 */
133 public static final int INTERNAL_IN_JS_CHAR_VALID = -12;
134
135 /**
136 * Whether closing markup tags are automatically completed for HTML.
137 */
138 private static boolean completeCloseTags;
139
140 /**
141 * When in the JS_STRING state, whether the current string is valid.
142 */
143 private boolean validJSString;
144
145
146 /**
147 * Constructor. This must be here because JFlex does not generate a
148 * no-parameter constructor.
149 */
150 public HTMLTokenMaker() {
151 super();
152 }
153
154
155 /**
156 * Adds the token specified to the current linked list of tokens as an
157 * "end token;" that is, at <code>zzMarkedPos</code>.
158 *
159 * @param tokenType The token's type.
160 */
161 private void addEndToken(int tokenType) {
162 addToken(zzMarkedPos,zzMarkedPos, tokenType);
163 }
164
165
166 /**
167 * Adds the token specified to the current linked list of tokens.
168 *
169 * @param tokenType The token's type.
170 * @see #addToken(int, int, int)
171 */
172 private void addHyperlinkToken(int start, int end, int tokenType) {
173 int so = start + offsetShift;
174 addToken(zzBuffer, start,end, tokenType, so, true);
175 }
176
177
178 /**
179 * Adds the token specified to the current linked list of tokens.
180 *
181 * @param tokenType The token's type.
182 */
183 private void addToken(int tokenType) {
184 addToken(zzStartRead, zzMarkedPos-1, tokenType);
185 }
186
187
188 /**
189 * Adds the token specified to the current linked list of tokens.
190 *
191 * @param tokenType The token's type.
192 */
193 private void addToken(int start, int end, int tokenType) {
194 int so = start + offsetShift;
195 addToken(zzBuffer, start,end, tokenType, so);
196 }
197
198
199 /**
200 * Adds the token specified to the current linked list of tokens.
201 *
202 * @param array The character array.
203 * @param start The starting offset in the array.
204 * @param end The ending offset in the array.
205 * @param tokenType The token's type.
206 * @param startOffset The offset in the document at which this token
207 * occurs.
208 */
209 public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
210 super.addToken(array, start,end, tokenType, startOffset);
211 zzStartRead = zzMarkedPos;
212 }
213
214
215 /**
216 * Sets whether markup close tags should be completed. You might not want
217 * this to be the case, since some tags in standard HTML aren't usually
218 * closed.
219 *
220 * @return Whether closing markup tags are completed.
221 * @see #setCompleteCloseTags(boolean)
222 */
223 public boolean getCompleteCloseTags() {
224 return completeCloseTags;
225 }
226
227
228 /**
229 * Returns the first token in the linked list of tokens generated
230 * from <code>text</code>. This method must be implemented by
231 * subclasses so they can correctly implement syntax highlighting.
232 *
233 * @param text The text from which to get tokens.
234 * @param initialTokenType The token type we should start with.
235 * @param startOffset The offset into the document at which
236 * <code>text</code> starts.
237 * @return The first <code>Token</code> in a linked list representing
238 * the syntax highlighted text.
239 */
240 public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
241
242 resetTokenList();
243 this.offsetShift = -text.offset + startOffset;
244
245 // Start off in the proper state.
246 int state = Token.NULL;
247 switch (initialTokenType) {
248 case Token.COMMENT_MULTILINE:
249 state = COMMENT;
250 start = text.offset;
251 break;
252 case Token.PREPROCESSOR:
253 state = PI;
254 start = text.offset;
255 break;
256 case Token.VARIABLE:
257 state = DTD;
258 start = text.offset;
259 break;
260 case INTERNAL_INTAG:
261 state = INTAG;
262 start = text.offset;
263 break;
264 case INTERNAL_INTAG_SCRIPT:
265 state = INTAG_SCRIPT;
266 start = text.offset;
267 break;
268 case INTERNAL_ATTR_DOUBLE:
269 state = INATTR_DOUBLE;
270 start = text.offset;
271 break;
272 case INTERNAL_ATTR_SINGLE:
273 state = INATTR_SINGLE;
274 start = text.offset;
275 break;
276 case INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT:
277 state = INATTR_DOUBLE_SCRIPT;
278 start = text.offset;
279 break;
280 case INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT:
281 state = INATTR_SINGLE_SCRIPT;
282 start = text.offset;
283 break;
284 case INTERNAL_IN_JS:
285 state = JAVASCRIPT;
286 start = text.offset;
287 break;
288 case INTERNAL_IN_JS_MLC:
289 state = JS_MLC;
290 start = text.offset;
291 break;
292 case INTERNAL_IN_JS_STRING_INVALID:
293 state = JS_STRING;
294 validJSString = false;
295 start = text.offset;
296 break;
297 case INTERNAL_IN_JS_STRING_VALID:
298 state = JS_STRING;
299 validJSString = true;
300 start = text.offset;
301 break;
302 case INTERNAL_IN_JS_CHAR_INVALID:
303 state = JS_CHAR;
304 validJSString = false;
305 start = text.offset;
306 break;
307 case INTERNAL_IN_JS_CHAR_VALID:
308 state = JS_CHAR;
309 validJSString = true;
310 start = text.offset;
311 break;
312 default:
313 state = Token.NULL;
314 }
315
316 s = text;
317 try {
318 yyreset(zzReader);
319 yybegin(state);
320 return yylex();
321 } catch (IOException ioe) {
322 ioe.printStackTrace();
323 return new DefaultToken();
324 }
325
326 }
327
328
329 /**
330 * Sets whether markup close tags should be completed. You might not want
331 * this to be the case, since some tags in standard HTML aren't usually
332 * closed.
333 *
334 * @param complete Whether closing markup tags are completed.
335 * @see #getCompleteCloseTags()
336 */
337 public static void setCompleteCloseTags(boolean complete) {
338 completeCloseTags = complete;
339 }
340
341
342 /**
343 * Refills the input buffer.
344 *
345 * @return <code>true</code> if EOF was reached, otherwise
346 * <code>false</code>.
347 */
348 private boolean zzRefill() {
349 return zzCurrentPos>=s.offset+s.count;
350 }
351
352
353 /**
354 * Resets the scanner to read from a new input stream.
355 * Does not close the old reader.
356 *
357 * All internal variables are reset, the old input stream
358 * <b>cannot</b> be reused (internal buffer is discarded and lost).
359 * Lexical state is set to <tt>YY_INITIAL</tt>.
360 *
361 * @param reader the new input stream
362 */
363 public final void yyreset(java.io.Reader reader) {
364 // 's' has been updated.
365 zzBuffer = s.array;
366 /*
367 * We replaced the line below with the two below it because zzRefill
368 * no longer "refills" the buffer (since the way we do it, it's always
369 * "full" the first time through, since it points to the segment's
370 * array). So, we assign zzEndRead here.
371 */
372 //zzStartRead = zzEndRead = s.offset;
373 zzStartRead = s.offset;
374 zzEndRead = zzStartRead + s.count - 1;
375 zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
376 zzLexicalState = YYINITIAL;
377 zzReader = reader;
378 zzAtBOL = true;
379 zzAtEOF = false;
380 }
381
382
383%}
384
385// HTML-specific stuff.
386Whitespace = ([ \t\f]+)
387LineTerminator = ([\n])
388Identifier = ([^ \t\n<&]+)
389AmperItem = ([&][^; \t]*[;]?)
390InTagIdentifier = ([^ \t\n\"\'/=>]+)
391EndScriptTag = ("</" [sS][cC][rR][iI][pP][tT] ">")
392
393
394// JavaScript stuff.
395Letter = [A-Za-z]
396NonzeroDigit = [1-9]
397Digit = ("0"|{NonzeroDigit})
398HexDigit = ({Digit}|[A-Fa-f])
399OctalDigit = ([0-7])
400EscapedSourceCharacter = ("u"{HexDigit}{HexDigit}{HexDigit}{HexDigit})
401NonSeparator = ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#"|"\\")
402IdentifierStart = ({Letter}|"_"|"$")
403IdentifierPart = ({IdentifierStart}|{Digit}|("\\"{EscapedSourceCharacter}))
404JS_MLCBegin = "/*"
405JS_MLCEnd = "*/"
406JS_LineCommentBegin = "//"
407JS_IntegerHelper1 = (({NonzeroDigit}{Digit}*)|"0")
408JS_IntegerHelper2 = ("0"(([xX]{HexDigit}+)|({OctalDigit}*)))
409JS_IntegerLiteral = ({JS_IntegerHelper1}[lL]?)
410JS_HexLiteral = ({JS_IntegerHelper2}[lL]?)
411JS_FloatHelper1 = ([fFdD]?)
412JS_FloatHelper2 = ([eE][+-]?{Digit}+{JS_FloatHelper1})
413JS_FloatLiteral1 = ({Digit}+"."({JS_FloatHelper1}|{JS_FloatHelper2}|{Digit}+({JS_FloatHelper1}|{JS_FloatHelper2})))
414JS_FloatLiteral2 = ("."{Digit}+({JS_FloatHelper1}|{JS_FloatHelper2}))
415JS_FloatLiteral3 = ({Digit}+{JS_FloatHelper2})
416JS_FloatLiteral = ({JS_FloatLiteral1}|{JS_FloatLiteral2}|{JS_FloatLiteral3}|({Digit}+[fFdD]))
417JS_ErrorNumberFormat = (({JS_IntegerLiteral}|{JS_HexLiteral}|{JS_FloatLiteral}){NonSeparator}+)
418JS_Separator = ([\(\)\{\}\[\]\]])
419JS_Separator2 = ([\;,.])
420JS_NonAssignmentOperator = ("+"|"-"|"<="|"^"|"++"|"<"|"*"|">="|"%"|"--"|">"|"/"|"!="|"?"|">>"|"!"|"&"|"=="|":"|">>"|"~"|"|"|"&&"|">>>")
421JS_AssignmentOperator = ("="|"-="|"*="|"/="|"|="|"&="|"^="|"+="|"%="|"<<="|">>="|">>>=")
422JS_Operator = ({JS_NonAssignmentOperator}|{JS_AssignmentOperator})
423JS_Identifier = ({IdentifierStart}{IdentifierPart}*)
424JS_ErrorIdentifier = ({NonSeparator}+)
425JS_Regex = ("/"([^\*\\/]|\\.)([^/\\]|\\.)*"/"[gim]*)
426
427URLGenDelim = ([:\/\?#\[\]@])
428URLSubDelim = ([\!\$&'\(\)\*\+,;=])
429URLUnreserved = ({Letter}|"_"|{Digit}|[\-\.\~])
430URLCharacter = ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%])
431URLCharacters = ({URLCharacter}*)
432URLEndCharacter = ([\/\$]|{Letter}|{Digit})
433URL = (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?)
434
435
436%state COMMENT
437%state PI
438%state DTD
439%state INTAG
440%state INTAG_CHECK_TAG_NAME
441%state INATTR_DOUBLE
442%state INATTR_SINGLE
443%state INTAG_SCRIPT
444%state INATTR_DOUBLE_SCRIPT
445%state INATTR_SINGLE_SCRIPT
446%state JAVASCRIPT
447%state JS_STRING
448%state JS_CHAR
449%state JS_MLC
450%state JS_EOL_COMMENT
451
452
453%%
454
455<YYINITIAL> {
456 "<!--" { start = zzMarkedPos-4; yybegin(COMMENT); }
457 "<"[sS][cC][rR][iI][pP][tT] {
458 addToken(zzStartRead,zzStartRead, Token.MARKUP_TAG_DELIMITER);
459 addToken(zzMarkedPos-6,zzMarkedPos-1, Token.MARKUP_TAG_NAME);
460 start = zzMarkedPos; yybegin(INTAG_SCRIPT);
461 }
462 "<!" { start = zzMarkedPos-2; yybegin(DTD); }
463 "<?" { start = zzMarkedPos-2; yybegin(PI); }
464 "<"({Letter}|{Digit})+ {
465 int count = yylength();
466 addToken(zzStartRead,zzStartRead, Token.MARKUP_TAG_DELIMITER);
467 zzMarkedPos -= (count-1); //yypushback(count-1);
468 yybegin(INTAG_CHECK_TAG_NAME);
469 }
470 "</"({Letter}|{Digit})+ {
471 int count = yylength();
472 addToken(zzStartRead,zzStartRead+1, Token.MARKUP_TAG_DELIMITER);
473 zzMarkedPos -= (count-2); //yypushback(count-2);
474 yybegin(INTAG_CHECK_TAG_NAME);
475 }
476 "<" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(INTAG); }
477 "</" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(INTAG); }
478 {LineTerminator} { addNullToken(); return firstToken; }
479 {Identifier} { addToken(Token.IDENTIFIER); } // Catches everything.
480 {AmperItem} { addToken(Token.DATA_TYPE); }
481 {Whitespace} { addToken(Token.WHITESPACE); }
482 <<EOF>> { addNullToken(); return firstToken; }
483}
484
485<COMMENT> {
486 [^hwf\n\-]+ {}
487 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
488 [hwf] {}
489 {LineTerminator} { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
490 "-->" { yybegin(YYINITIAL); addToken(start,zzStartRead+2, Token.COMMENT_MULTILINE); }
491 "-" {}
492 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
493}
494
495<PI> {
496 [^\n\?]+ {}
497 {LineTerminator} { addToken(start,zzStartRead-1, Token.PREPROCESSOR); return firstToken; }
498 "?>" { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.PREPROCESSOR); }
499 "?" {}
500 <<EOF>> { addToken(start,zzStartRead-1, Token.PREPROCESSOR); return firstToken; }
501}
502
503<DTD> {
504 [^\n>]+ {}
505 {LineTerminator} { addToken(start,zzStartRead-1, Token.VARIABLE); return firstToken; }
506 ">" { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.VARIABLE); }
507 <<EOF>> { addToken(start,zzStartRead-1, Token.VARIABLE); return firstToken; }
508}
509
510<INTAG_CHECK_TAG_NAME> {
511 [Aa] |
512 [aA][bB][bB][rR] |
513 [aA][cC][rR][oO][nN][yY][mM] |
514 [aA][dD][dD][rR][eE][sS][sS] |
515 [aA][pP][pP][lL][eE][tT] |
516 [aA][rR][eE][aA] |
517 [aA][rR][tT][iI][cC][lL][eE] |
518 [aA][sS][iI][dD][eE] |
519 [aA][uU][dD][iI][oO] |
520 [bB] |
521 [bB][aA][sS][eE] |
522 [bB][aA][sS][eE][fF][oO][nN][tT] |
523 [bB][dD][oO] |
524 [bB][gG][sS][oO][uU][nN][dD] |
525 [bB][iI][gG] |
526 [bB][lL][iI][nN][kK] |
527 [bB][lL][oO][cC][kK][qQ][uU][oO][tT][eE] |
528 [bB][oO][dD][yY] |
529 [bB][rR] |
530 [bB][uU][tT][tT][oO][nN] |
531 [cC][aA][nN][vV][aA][sS] |
532 [cC][aA][pP][tT][iI][oO][nN] |
533 [cC][eE][nN][tT][eE][rR] |
534 [cC][iI][tT][eE] |
535 [cC][oO][dD][eE] |
536 [cC][oO][lL] |
537 [cC][oO][lL][gG][rR][oO][uU][pP] |
538 [cC][oO][mM][mM][aA][nN][dD] |
539 [cC][oO][mM][mM][eE][nN][tT] |
540 [dD][dD] |
541 [dD][aA][tT][aA][gG][rR][iI][dD] |
542 [dD][aA][tT][aA][lL][iI][sS][tT] |
543 [dD][aA][tT][aA][tT][eE][mM][pP][lL][aA][tT][eE] |
544 [dD][eE][lL] |
545 [dD][eE][tT][aA][iI][lL][sS] |
546 [dD][fF][nN] |
547 [dD][iI][aA][lL][oO][gG] |
548 [dD][iI][rR] |
549 [dD][iI][vV] |
550 [dD][lL] |
551 [dD][tT] |
552 [eE][mM] |
553 [eE][mM][bB][eE][dD] |
554 [eE][vV][eE][nN][tT][sS][oO][uU][rR][cC][eE] |
555 [fF][iI][eE][lL][dD][sS][eE][tT] |
556 [fF][iI][gG][uU][rR][eE] |
557 [fF][oO][nN][tT] |
558 [fF][oO][oO][tT][eE][rR] |
559 [fF][oO][rR][mM] |
560 [fF][rR][aA][mM][eE] |
561 [fF][rR][aA][mM][eE][sS][eE][tT] |
562 [hH][123456] |
563 [hH][eE][aA][dD] |
564 [hH][eE][aA][dD][eE][rR] |
565 [hH][rR] |
566 [hH][tT][mM][lL] |
567 [iI] |
568 [iI][fF][rR][aA][mM][eE] |
569 [iI][lL][aA][yY][eE][rR] |
570 [iI][mM][gG] |
571 [iI][nN][pP][uU][tT] |
572 [iI][nN][sS] |
573 [iI][sS][iI][nN][dD][eE][xX] |
574 [kK][bB][dD] |
575 [kK][eE][yY][gG][eE][nN] |
576 [lL][aA][bB][eE][lL] |
577 [lL][aA][yY][eE][rR] |
578 [lL][eE][gG][eE][nN][dD] |
579 [lL][iI] |
580 [lL][iI][nN][kK] |
581 [mM][aA][pP] |
582 [mM][aA][rR][kK] |
583 [mM][aA][rR][qQ][uU][eE][eE] |
584 [mM][eE][nN][uU] |
585 [mM][eE][tT][aA] |
586 [mM][eE][tT][eE][rR] |
587 [mM][uU][lL][tT][iI][cC][oO][lL] |
588 [nN][aA][vV] |
589 [nN][eE][sS][tT] |
590 [nN][oO][bB][rR] |
591 [nN][oO][eE][mM][bB][eE][dD] |
592 [nN][oO][fF][rR][aA][mM][eE][sS] |
593 [nN][oO][lL][aA][yY][eE][rR] |
594 [nN][oO][sS][cC][rR][iI][pP][tT] |
595 [oO][bB][jJ][eE][cC][tT] |
596 [oO][lL] |
597 [oO][pP][tT][gG][rR][oO][uU][pP] |
598 [oO][pP][tT][iI][oO][nN] |
599 [oO][uU][tT][pP][uU][tT] |
600 [pP] |
601 [pP][aA][rR][aA][mM] |
602 [pP][lL][aA][iI][nN][tT][eE][xX][tT] |
603 [pP][rR][eE] |
604 [pP][rR][oO][gG][rR][eE][sS][sS] |
605 [qQ] |
606 [rR][uU][lL][eE] |
607 [sS] |
608 [sS][aA][mM][pP] |
609 [sS][cC][rR][iI][pP][tT] |
610 [sS][eE][cC][tT][iI][oO][nN] |
611 [sS][eE][lL][eE][cC][tT] |
612 [sS][eE][rR][vV][eE][rR] |
613 [sS][mM][aA][lL][lL] |
614 [sS][oO][uU][rR][cC][eE] |
615 [sS][pP][aA][cC][eE][rR] |
616 [sS][pP][aA][nN] |
617 [sS][tT][rR][iI][kK][eE] |
618 [sS][tT][rR][oO][nN][gG] |
619 [sS][tT][yY][lL][eE] |
620 [sS][uU][bB] |
621 [sS][uU][pP] |
622 [tT][aA][bB][lL][eE] |
623 [tT][bB][oO][dD][yY] |
624 [tT][dD] |
625 [tT][eE][xX][tT][aA][rR][eE][aA] |
626 [tT][fF][oO][oO][tT] |
627 [tT][hH] |
628 [tT][hH][eE][aA][dD] |
629 [tT][iI][mM][eE] |
630 [tT][iI][tT][lL][eE] |
631 [tT][rR] |
632 [tT][tT] |
633 [uU] |
634 [uU][lL] |
635 [vV][aA][rR] |
636 [vV][iI][dD][eE][oO] { addToken(Token.MARKUP_TAG_NAME); }
637 {InTagIdentifier} { /* A non-recognized HTML tag name */ yypushback(yylength()); yybegin(INTAG); }
638 . { /* Shouldn't happen */ yypushback(1); yybegin(INTAG); }
639 <<EOF>> { addToken(zzMarkedPos,zzMarkedPos, INTERNAL_INTAG); return firstToken; }
640}
641
642<INTAG> {
643 "/" { addToken(Token.MARKUP_TAG_DELIMITER); }
644 {InTagIdentifier} { addToken(Token.MARKUP_TAG_ATTRIBUTE); }
645 {Whitespace} { addToken(Token.WHITESPACE); }
646 "=" { addToken(Token.OPERATOR); }
647 "/>" { yybegin(YYINITIAL); addToken(Token.MARKUP_TAG_DELIMITER); }
648 ">" { yybegin(YYINITIAL); addToken(Token.MARKUP_TAG_DELIMITER); }
649 [\"] { start = zzMarkedPos-1; yybegin(INATTR_DOUBLE); }
650 [\'] { start = zzMarkedPos-1; yybegin(INATTR_SINGLE); }
651 <<EOF>> { addToken(zzMarkedPos,zzMarkedPos, INTERNAL_INTAG); return firstToken; }
652}
653
654<INATTR_DOUBLE> {
655 [^\"]* {}
656 [\"] { yybegin(INTAG); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
657 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_DOUBLE); return firstToken; }
658}
659
660<INATTR_SINGLE> {
661 [^\']* {}
662 [\'] { yybegin(INTAG); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
663 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_SINGLE); return firstToken; }
664}
665
666<INTAG_SCRIPT> {
667 {InTagIdentifier} { addToken(Token.MARKUP_TAG_ATTRIBUTE); }
668 "/>" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(YYINITIAL); }
669 "/" { addToken(Token.MARKUP_TAG_DELIMITER); } // Won't appear in valid HTML.
670 {Whitespace} { addToken(Token.WHITESPACE); }
671 "=" { addToken(Token.OPERATOR); }
672 ">" { yybegin(JAVASCRIPT); addToken(Token.MARKUP_TAG_DELIMITER); }
673 [\"] { start = zzMarkedPos-1; yybegin(INATTR_DOUBLE_SCRIPT); }
674 [\'] { start = zzMarkedPos-1; yybegin(INATTR_SINGLE_SCRIPT); }
675 <<EOF>> { addToken(zzMarkedPos,zzMarkedPos, INTERNAL_INTAG_SCRIPT); return firstToken; }
676}
677
678<INATTR_DOUBLE_SCRIPT> {
679 [^\"]* {}
680 [\"] { yybegin(INTAG_SCRIPT); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
681 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT); return firstToken; }
682}
683
684<INATTR_SINGLE_SCRIPT> {
685 [^\']* {}
686 [\'] { yybegin(INTAG_SCRIPT); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
687 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT); return firstToken; }
688}
689
690<JAVASCRIPT> {
691
692 {EndScriptTag} {
693 yybegin(YYINITIAL);
694 addToken(zzStartRead,zzStartRead+1, Token.MARKUP_TAG_DELIMITER);
695 addToken(zzMarkedPos-7,zzMarkedPos-2, Token.MARKUP_TAG_NAME);
696 addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER);
697 }
698
699 // ECMA keywords.
700 "break" |
701 "continue" |
702 "delete" |
703 "else" |
704 "for" |
705 "function" |
706 "if" |
707 "in" |
708 "new" |
709 "return" |
710 "this" |
711 "typeof" |
712 "var" |
713 "void" |
714 "while" |
715 "with" { addToken(Token.RESERVED_WORD); }
716
717 // Reserved (but not yet used) ECMA keywords.
718 "abstract" { addToken(Token.RESERVED_WORD); }
719 "boolean" { addToken(Token.DATA_TYPE); }
720 "byte" { addToken(Token.DATA_TYPE); }
721 "case" { addToken(Token.RESERVED_WORD); }
722 "catch" { addToken(Token.RESERVED_WORD); }
723 "char" { addToken(Token.DATA_TYPE); }
724 "class" { addToken(Token.RESERVED_WORD); }
725 "const" { addToken(Token.RESERVED_WORD); }
726 "debugger" { addToken(Token.RESERVED_WORD); }
727 "default" { addToken(Token.RESERVED_WORD); }
728 "do" { addToken(Token.RESERVED_WORD); }
729 "double" { addToken(Token.DATA_TYPE); }
730 "enum" { addToken(Token.RESERVED_WORD); }
731 "export" { addToken(Token.RESERVED_WORD); }
732 "extends" { addToken(Token.RESERVED_WORD); }
733 "final" { addToken(Token.RESERVED_WORD); }
734 "finally" { addToken(Token.RESERVED_WORD); }
735 "float" { addToken(Token.DATA_TYPE); }
736 "goto" { addToken(Token.RESERVED_WORD); }
737 "implements" { addToken(Token.RESERVED_WORD); }
738 "import" { addToken(Token.RESERVED_WORD); }
739 "instanceof" { addToken(Token.RESERVED_WORD); }
740 "int" { addToken(Token.DATA_TYPE); }
741 "interface" { addToken(Token.RESERVED_WORD); }
742 "long" { addToken(Token.DATA_TYPE); }
743 "native" { addToken(Token.RESERVED_WORD); }
744 "package" { addToken(Token.RESERVED_WORD); }
745 "private" { addToken(Token.RESERVED_WORD); }
746 "protected" { addToken(Token.RESERVED_WORD); }
747 "public" { addToken(Token.RESERVED_WORD); }
748 "short" { addToken(Token.DATA_TYPE); }
749 "static" { addToken(Token.RESERVED_WORD); }
750 "super" { addToken(Token.RESERVED_WORD); }
751 "switch" { addToken(Token.RESERVED_WORD); }
752 "synchronized" { addToken(Token.RESERVED_WORD); }
753 "throw" { addToken(Token.RESERVED_WORD); }
754 "throws" { addToken(Token.RESERVED_WORD); }
755 "transient" { addToken(Token.RESERVED_WORD); }
756 "try" { addToken(Token.RESERVED_WORD); }
757 "volatile" { addToken(Token.RESERVED_WORD); }
758 "null" { addToken(Token.RESERVED_WORD); }
759
760 // Literals.
761 "false" |
762 "true" { addToken(Token.LITERAL_BOOLEAN); }
763 "NaN" { addToken(Token.RESERVED_WORD); }
764 "Infinity" { addToken(Token.RESERVED_WORD); }
765
766 // Functions.
767 "eval" |
768 "parseInt" |
769 "parseFloat" |
770 "escape" |
771 "unescape" |
772 "isNaN" |
773 "isFinite" { addToken(Token.FUNCTION); }
774
775 {LineTerminator} { addEndToken(INTERNAL_IN_JS); return firstToken; }
776 {JS_Identifier} { addToken(Token.IDENTIFIER); }
777 {Whitespace} { addToken(Token.WHITESPACE); }
778
779 /* String/Character literals. */
780 [\'] { start = zzMarkedPos-1; validJSString = true; yybegin(JS_CHAR); }
781 [\"] { start = zzMarkedPos-1; validJSString = true; yybegin(JS_STRING); }
782
783 /* Comment literals. */
784 "/**/" { addToken(Token.COMMENT_MULTILINE); }
785 {JS_MLCBegin} { start = zzMarkedPos-2; yybegin(JS_MLC); }
786 {JS_LineCommentBegin} { start = zzMarkedPos-2; yybegin(JS_EOL_COMMENT); }
787
788 /* Attempt to identify regular expressions (not foolproof) - do after comments! */
789 {JS_Regex} {
790 boolean highlightedAsRegex = false;
791 if (firstToken==null) {
792 addToken(Token.REGEX);
793 highlightedAsRegex = true;
794 }
795 else {
796 // If this is *likely* to be a regex, based on
797 // the previous token, highlight it as such.
798 Token t = firstToken.getLastNonCommentNonWhitespaceToken();
799 if (RSyntaxUtilities.regexCanFollowInJavaScript(t)) {
800 addToken(Token.REGEX);
801 highlightedAsRegex = true;
802 }
803 }
804 // If it doesn't *appear* to be a regex, highlight it as
805 // individual tokens.
806 if (!highlightedAsRegex) {
807 int temp = zzStartRead + 1;
808 addToken(zzStartRead, zzStartRead, Token.OPERATOR);
809 zzStartRead = zzCurrentPos = zzMarkedPos = temp;
810 }
811 }
812
813 /* Separators. */
814 {JS_Separator} { addToken(Token.SEPARATOR); }
815 {JS_Separator2} { addToken(Token.IDENTIFIER); }
816
817 /* Operators. */
818 {JS_Operator} { addToken(Token.OPERATOR); }
819
820 /* Numbers */
821 {JS_IntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
822 {JS_HexLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
823 {JS_FloatLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); }
824 {JS_ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
825
826 {JS_ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); }
827
828 /* Ended with a line not in a string or comment. */
829 <<EOF>> { addEndToken(INTERNAL_IN_JS); return firstToken; }
830
831 /* Catch any other (unhandled) characters and flag them as bad. */
832 . { addToken(Token.ERROR_IDENTIFIER); }
833
834}
835
836<JS_STRING> {
837 [^\n\\\"]+ {}
838 \n { addToken(start,zzStartRead-1, Token.ERROR_STRING_DOUBLE); addEndToken(INTERNAL_IN_JS); return firstToken; }
839 \\x{HexDigit}{2} {}
840 \\x { /* Invalid latin-1 character \xXX */ validJSString = false; }
841 \\u{HexDigit}{4} {}
842 \\u { /* Invalid Unicode character \\uXXXX */ validJSString = false; }
843 \\. { /* Skip all escaped chars. */ }
844 \\ { /* Line ending in '\' => continue to next line. */
845 if (validJSString) {
846 addToken(start,zzStartRead, Token.LITERAL_STRING_DOUBLE_QUOTE);
847 addEndToken(INTERNAL_IN_JS_STRING_VALID);
848 }
849 else {
850 addToken(start,zzStartRead, Token.ERROR_STRING_DOUBLE);
851 addEndToken(INTERNAL_IN_JS_STRING_INVALID);
852 }
853 return firstToken;
854 }
855 \" { int type = validJSString ? Token.LITERAL_STRING_DOUBLE_QUOTE : Token.ERROR_STRING_DOUBLE; addToken(start,zzStartRead, type); yybegin(JAVASCRIPT); }
856 <<EOF>> { addToken(start,zzStartRead-1, Token.ERROR_STRING_DOUBLE); addEndToken(INTERNAL_IN_JS); return firstToken; }
857}
858
859<JS_CHAR> {
860 [^\n\\\']+ {}
861 \n { addToken(start,zzStartRead-1, Token.ERROR_CHAR); addEndToken(INTERNAL_IN_JS); return firstToken; }
862 \\x{HexDigit}{2} {}
863 \\x { /* Invalid latin-1 character \xXX */ validJSString = false; }
864 \\u{HexDigit}{4} {}
865 \\u { /* Invalid Unicode character \\uXXXX */ validJSString = false; }
866 \\. { /* Skip all escaped chars. */ }
867 \\ { /* Line ending in '\' => continue to next line. */
868 if (validJSString) {
869 addToken(start,zzStartRead, Token.LITERAL_CHAR);
870 addEndToken(INTERNAL_IN_JS_CHAR_VALID);
871 }
872 else {
873 addToken(start,zzStartRead, Token.ERROR_CHAR);
874 addEndToken(INTERNAL_IN_JS_CHAR_INVALID);
875 }
876 return firstToken;
877 }
878 \' { int type = validJSString ? Token.LITERAL_CHAR : Token.ERROR_CHAR; addToken(start,zzStartRead, type); yybegin(JAVASCRIPT); }
879 <<EOF>> { addToken(start,zzStartRead-1, Token.ERROR_CHAR); addEndToken(INTERNAL_IN_JS); return firstToken; }
880}
881
882<JS_MLC> {
883 // JavaScript MLC's. This state is essentially Java's MLC state.
884 [^hwf<\n\*]+ {}
885 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
886 [hwf] {}
887 {EndScriptTag} {
888 yybegin(YYINITIAL);
889 int temp = zzStartRead;
890 addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE);
891 addToken(temp,temp+1, Token.MARKUP_TAG_DELIMITER);
892 addToken(zzMarkedPos-7,zzMarkedPos-2, Token.MARKUP_TAG_NAME);
893 addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER);
894 }
895 "<" {}
896 \n { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_JS_MLC); return firstToken; }
897 {JS_MLCEnd} { yybegin(JAVASCRIPT); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
898 \* {}
899 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_JS_MLC); return firstToken; }
900}
901
902<JS_EOL_COMMENT> {
903 [^hwf<\n]+ {}
904 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
905 [hwf] {}
906 {EndScriptTag} {
907 yybegin(YYINITIAL);
908 int temp = zzStartRead;
909 addToken(start,zzStartRead-1, Token.COMMENT_EOL);
910 addToken(temp,temp+1, Token.MARKUP_TAG_DELIMITER);
911 addToken(zzMarkedPos-7,zzMarkedPos-2, Token.MARKUP_TAG_NAME);
912 addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER);
913 }
914 "<" {}
915 \n { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addEndToken(INTERNAL_IN_JS); return firstToken; }
916 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addEndToken(INTERNAL_IN_JS); return firstToken; }
917
918}
Note: See TracBrowser for help on using the repository browser.