source: other-projects/gli-rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/HTMLTokenMaker.flex@ 25801

Last change on this file since 25801 was 25801, checked in by ak19, 12 years ago

REGENERATE-FLEX did not correctly handle substitutions yyreset() and zzRefill(). It incorrectly also changed functions that called these files, which we did not want. The revised 'sed' expression now looks for the declaring type information infront of the function names as a way to differeniate these from other locations, which call the function definitions.

File size: 30.8 KB
RevLine 
[25584]1/*
2 * 01/24/2005
3 *
4 * HTMLTokenMaker.java - Generates tokens for HTML syntax highlighting.
5 *
6 * This library is distributed under a modified BSD license. See the included
7 * RSyntaxTextArea.License.txt file for details.
8 */
9package org.fife.ui.rsyntaxtextarea.modes;
10
11import java.io.*;
12import javax.swing.text.Segment;
13
14import org.fife.ui.rsyntaxtextarea.*;
15
16
17/**
18 * Scanner for HTML 5 files.
19 *
20 * This implementation was created using
21 * <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
22 * was modified for performance. Memory allocation needs to be almost
23 * completely removed to be competitive with the handwritten lexers (subclasses
24 * of <code>AbstractTokenMaker</code>, so this class has been modified so that
25 * Strings are never allocated (via yytext()), and the scanner never has to
26 * worry about refilling its buffer (needlessly copying chars around).
27 * We can achieve this because RText always scans exactly 1 line of tokens at a
28 * time, and hands the scanner this line as an array of characters (a Segment
29 * really). Since tokens contain pointers to char arrays instead of Strings
30 * holding their contents, there is no need for allocating new memory for
31 * Strings.<p>
32 *
33 * The actual algorithm generated for scanning has, of course, not been
34 * modified.<p>
35 *
36 * If you wish to regenerate this file yourself, keep in mind the following:
37 * <ul>
38 * <li>The generated HTMLTokenMaker.java</code> file will contain two
39 * definitions of both <code>zzRefill</code> and <code>yyreset</code>.
40 * You should hand-delete the second of each definition (the ones
41 * generated by the lexer), as these generated methods modify the input
42 * buffer, which we'll never have to do.</li>
43 * <li>You should also change the declaration/definition of zzBuffer to NOT
44 * be initialized. This is a needless memory allocation for us since we
45 * will be pointing the array somewhere else anyway.</li>
46 * <li>You should NOT call <code>yylex()</code> on the generated scanner
47 * directly; rather, you should use <code>getTokenList</code> as you would
48 * with any other <code>TokenMaker</code> instance.</li>
49 * </ul>
50 *
51 * @author Robert Futrell
52 * @version 0.8
53 *
54 */
55%%
56
57%public
58%class HTMLTokenMaker
59%extends AbstractMarkupTokenMaker
60%unicode
61%type org.fife.ui.rsyntaxtextarea.Token
62
63
64%{
65
66 /**
67 * Type specific to XMLTokenMaker denoting a line ending with an unclosed
68 * double-quote attribute.
69 */
70 public static final int INTERNAL_ATTR_DOUBLE = -1;
71
72
73 /**
74 * Type specific to XMLTokenMaker denoting a line ending with an unclosed
75 * single-quote attribute.
76 */
77 public static final int INTERNAL_ATTR_SINGLE = -2;
78
79
80 /**
81 * Token type specific to HTMLTokenMaker; this signals that the user has
82 * ended a line with an unclosed HTML tag; thus a new line is beginning
83 * still inside of the tag.
84 */
85 public static final int INTERNAL_INTAG = -3;
86
87 /**
88 * Token type specific to HTMLTokenMaker; this signals that the user has
89 * ended a line with an unclosed <code>&lt;script&gt;</code> tag.
90 */
91 public static final int INTERNAL_INTAG_SCRIPT = -4;
92
93 /**
94 * Token type specifying we're in a double-qouted attribute in a
95 * script tag.
96 */
97 public static final int INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT = -5;
98
99 /**
100 * Token type specifying we're in a single-qouted attribute in a
101 * script tag.
102 */
103 public static final int INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT = -6;
104
105 /**
106 * Token type specifying we're in JavaScript.
107 */
108 public static final int INTERNAL_IN_JS = -7;
109
110 /**
111 * Token type specifying we're in a JavaScript multiline comment.
112 */
113 public static final int INTERNAL_IN_JS_MLC = -8;
114
115 /**
116 * Token type specifying we're in an invalid multi-line JS string.
117 */
118 public static final int INTERNAL_IN_JS_STRING_INVALID = -9;
119
120 /**
121 * Token type specifying we're in a valid multi-line JS string.
122 */
123 public static final int INTERNAL_IN_JS_STRING_VALID = -10;
124
125 /**
126 * Token type specifying we're in an invalid multi-line JS single-quoted string.
127 */
128 public static final int INTERNAL_IN_JS_CHAR_INVALID = -11;
129
130 /**
131 * Token type specifying we're in a valid multi-line JS single-quoted string.
132 */
133 public static final int INTERNAL_IN_JS_CHAR_VALID = -12;
134
135 /**
136 * Whether closing markup tags are automatically completed for HTML.
137 */
138 private static boolean completeCloseTags;
139
140 /**
141 * When in the JS_STRING state, whether the current string is valid.
142 */
143 private boolean validJSString;
144
145
146 /**
147 * Constructor. This must be here because JFlex does not generate a
148 * no-parameter constructor.
149 */
150 public HTMLTokenMaker() {
151 super();
152 }
153
154
155 /**
156 * Adds the token specified to the current linked list of tokens as an
157 * "end token;" that is, at <code>zzMarkedPos</code>.
158 *
159 * @param tokenType The token's type.
160 */
161 private void addEndToken(int tokenType) {
162 addToken(zzMarkedPos,zzMarkedPos, tokenType);
163 }
164
165
166 /**
167 * Adds the token specified to the current linked list of tokens.
168 *
169 * @param tokenType The token's type.
170 * @see #addToken(int, int, int)
171 */
172 private void addHyperlinkToken(int start, int end, int tokenType) {
173 int so = start + offsetShift;
174 addToken(zzBuffer, start,end, tokenType, so, true);
175 }
176
177
178 /**
179 * Adds the token specified to the current linked list of tokens.
180 *
181 * @param tokenType The token's type.
182 */
183 private void addToken(int tokenType) {
184 addToken(zzStartRead, zzMarkedPos-1, tokenType);
185 }
186
187
188 /**
189 * Adds the token specified to the current linked list of tokens.
190 *
191 * @param tokenType The token's type.
192 */
193 private void addToken(int start, int end, int tokenType) {
194 int so = start + offsetShift;
195 addToken(zzBuffer, start,end, tokenType, so);
196 }
197
198
199 /**
200 * Adds the token specified to the current linked list of tokens.
201 *
202 * @param array The character array.
203 * @param start The starting offset in the array.
204 * @param end The ending offset in the array.
205 * @param tokenType The token's type.
206 * @param startOffset The offset in the document at which this token
207 * occurs.
208 */
209 public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
210 super.addToken(array, start,end, tokenType, startOffset);
211 zzStartRead = zzMarkedPos;
212 }
213
214
215 /**
216 * Sets whether markup close tags should be completed. You might not want
217 * this to be the case, since some tags in standard HTML aren't usually
218 * closed.
219 *
220 * @return Whether closing markup tags are completed.
221 * @see #setCompleteCloseTags(boolean)
222 */
223 public boolean getCompleteCloseTags() {
224 return completeCloseTags;
225 }
226
227
228 /**
229 * Returns the first token in the linked list of tokens generated
230 * from <code>text</code>. This method must be implemented by
231 * subclasses so they can correctly implement syntax highlighting.
232 *
233 * @param text The text from which to get tokens.
234 * @param initialTokenType The token type we should start with.
235 * @param startOffset The offset into the document at which
236 * <code>text</code> starts.
237 * @return The first <code>Token</code> in a linked list representing
238 * the syntax highlighted text.
239 */
240 public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
241
242 resetTokenList();
243 this.offsetShift = -text.offset + startOffset;
244
245 // Start off in the proper state.
246 int state = Token.NULL;
247 switch (initialTokenType) {
248 case Token.COMMENT_MULTILINE:
249 state = COMMENT;
250 start = text.offset;
251 break;
252 case Token.PREPROCESSOR:
253 state = PI;
254 start = text.offset;
255 break;
256 case Token.VARIABLE:
257 state = DTD;
258 start = text.offset;
259 break;
260 case INTERNAL_INTAG:
261 state = INTAG;
262 start = text.offset;
263 break;
264 case INTERNAL_INTAG_SCRIPT:
265 state = INTAG_SCRIPT;
266 start = text.offset;
267 break;
268 case INTERNAL_ATTR_DOUBLE:
269 state = INATTR_DOUBLE;
270 start = text.offset;
271 break;
272 case INTERNAL_ATTR_SINGLE:
273 state = INATTR_SINGLE;
274 start = text.offset;
275 break;
276 case INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT:
277 state = INATTR_DOUBLE_SCRIPT;
278 start = text.offset;
279 break;
280 case INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT:
281 state = INATTR_SINGLE_SCRIPT;
282 start = text.offset;
283 break;
284 case INTERNAL_IN_JS:
285 state = JAVASCRIPT;
286 start = text.offset;
287 break;
288 case INTERNAL_IN_JS_MLC:
289 state = JS_MLC;
290 start = text.offset;
291 break;
292 case INTERNAL_IN_JS_STRING_INVALID:
293 state = JS_STRING;
294 validJSString = false;
295 start = text.offset;
296 break;
297 case INTERNAL_IN_JS_STRING_VALID:
298 state = JS_STRING;
299 validJSString = true;
300 start = text.offset;
301 break;
302 case INTERNAL_IN_JS_CHAR_INVALID:
303 state = JS_CHAR;
304 validJSString = false;
305 start = text.offset;
306 break;
307 case INTERNAL_IN_JS_CHAR_VALID:
308 state = JS_CHAR;
309 validJSString = true;
310 start = text.offset;
311 break;
312 default:
313 state = Token.NULL;
314 }
315
316 s = text;
317 try {
[25801]318 YYRESETNEW(zzReader);
[25584]319 yybegin(state);
320 return yylex();
321 } catch (IOException ioe) {
322 ioe.printStackTrace();
323 return new DefaultToken();
324 }
325
326 }
327
328
329 /**
330 * Sets whether markup close tags should be completed. You might not want
331 * this to be the case, since some tags in standard HTML aren't usually
332 * closed.
333 *
334 * @param complete Whether closing markup tags are completed.
335 * @see #getCompleteCloseTags()
336 */
337 public static void setCompleteCloseTags(boolean complete) {
338 completeCloseTags = complete;
339 }
340
341
342 /**
343 * Refills the input buffer.
344 *
345 * @return <code>true</code> if EOF was reached, otherwise
346 * <code>false</code>.
347 */
348
[25801]349 private boolean ZZREFILLNEW() {
[25584]350 return zzCurrentPos>=s.offset+s.count;
351 }
352
353
354 /**
355 * Resets the scanner to read from a new input stream.
356 * Does not close the old reader.
357 *
358 * All internal variables are reset, the old input stream
359 * <b>cannot</b> be reused (internal buffer is discarded and lost).
360 * Lexical state is set to <tt>YY_INITIAL</tt>.
361 *
362 * @param reader the new input stream
363 */
364
[25801]365 public final void YYRESETNEW(java.io.Reader reader) {
[25584]366 // 's' has been updated.
367 zzBuffer = s.array;
368
369 /*
370 * We replaced the line below with the two below it because zzRefill
371 * no longer "refills" the buffer (since the way we do it, it's always
372 * "full" the first time through, since it points to the segment's
373 * array). So, we assign zzEndRead here.
374 */
375 //zzStartRead = zzEndRead = s.offset;
376
377 zzStartRead = s.offset;
378 zzEndRead = zzStartRead + s.count - 1;
379 //### zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
380 zzCurrentPos = zzMarkedPos = s.offset;
381 zzLexicalState = YYINITIAL;
382 zzReader = reader;
383 zzAtBOL = true;
384 zzAtEOF = false;
385 }
386
387
388
389%}
390
391// HTML-specific stuff.
392Whitespace = ([ \t\f]+)
393LineTerminator = ([\n])
394Identifier = ([^ \t\n<&]+)
395AmperItem = ([&][^; \t]*[;]?)
396InTagIdentifier = ([^ \t\n\"\'/=>]+)
397EndScriptTag = ("</" [sS][cC][rR][iI][pP][tT] ">")
398
399
400// JavaScript stuff.
401Letter = [A-Za-z]
402NonzeroDigit = [1-9]
403Digit = ("0"|{NonzeroDigit})
404HexDigit = ({Digit}|[A-Fa-f])
405OctalDigit = ([0-7])
406EscapedSourceCharacter = ("u"{HexDigit}{HexDigit}{HexDigit}{HexDigit})
407NonSeparator = ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#"|"\\")
408IdentifierStart = ({Letter}|"_"|"$")
409IdentifierPart = ({IdentifierStart}|{Digit}|("\\"{EscapedSourceCharacter}))
410JS_MLCBegin = "/*"
411JS_MLCEnd = "*/"
412JS_LineCommentBegin = "//"
413JS_IntegerHelper1 = (({NonzeroDigit}{Digit}*)|"0")
414JS_IntegerHelper2 = ("0"(([xX]{HexDigit}+)|({OctalDigit}*)))
415JS_IntegerLiteral = ({JS_IntegerHelper1}[lL]?)
416JS_HexLiteral = ({JS_IntegerHelper2}[lL]?)
417JS_FloatHelper1 = ([fFdD]?)
418JS_FloatHelper2 = ([eE][+-]?{Digit}+{JS_FloatHelper1})
419JS_FloatLiteral1 = ({Digit}+"."({JS_FloatHelper1}|{JS_FloatHelper2}|{Digit}+({JS_FloatHelper1}|{JS_FloatHelper2})))
420JS_FloatLiteral2 = ("."{Digit}+({JS_FloatHelper1}|{JS_FloatHelper2}))
421JS_FloatLiteral3 = ({Digit}+{JS_FloatHelper2})
422JS_FloatLiteral = ({JS_FloatLiteral1}|{JS_FloatLiteral2}|{JS_FloatLiteral3}|({Digit}+[fFdD]))
423JS_ErrorNumberFormat = (({JS_IntegerLiteral}|{JS_HexLiteral}|{JS_FloatLiteral}){NonSeparator}+)
424JS_Separator = ([\(\)\{\}\[\]\]])
425JS_Separator2 = ([\;,.])
426JS_NonAssignmentOperator = ("+"|"-"|"<="|"^"|"++"|"<"|"*"|">="|"%"|"--"|">"|"/"|"!="|"?"|">>"|"!"|"&"|"=="|":"|">>"|"~"|"|"|"&&"|">>>")
427JS_AssignmentOperator = ("="|"-="|"*="|"/="|"|="|"&="|"^="|"+="|"%="|"<<="|">>="|">>>=")
428JS_Operator = ({JS_NonAssignmentOperator}|{JS_AssignmentOperator})
429JS_Identifier = ({IdentifierStart}{IdentifierPart}*)
430JS_ErrorIdentifier = ({NonSeparator}+)
431JS_Regex = ("/"([^\*\\/]|\\.)([^/\\]|\\.)*"/"[gim]*)
432
433URLGenDelim = ([:\/\?#\[\]@])
434URLSubDelim = ([\!\$&'\(\)\*\+,;=])
435URLUnreserved = ({Letter}|"_"|{Digit}|[\-\.\~])
436URLCharacter = ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%])
437URLCharacters = ({URLCharacter}*)
438URLEndCharacter = ([\/\$]|{Letter}|{Digit})
439URL = (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?)
440
441
442%state COMMENT
443%state PI
444%state DTD
445%state INTAG
446%state INTAG_CHECK_TAG_NAME
447%state INATTR_DOUBLE
448%state INATTR_SINGLE
449%state INTAG_SCRIPT
450%state INATTR_DOUBLE_SCRIPT
451%state INATTR_SINGLE_SCRIPT
452%state JAVASCRIPT
453%state JS_STRING
454%state JS_CHAR
455%state JS_MLC
456%state JS_EOL_COMMENT
457
458
459%%
460
461<YYINITIAL> {
462 "<!--" { start = zzMarkedPos-4; yybegin(COMMENT); }
463 "<"[sS][cC][rR][iI][pP][tT] {
464 addToken(zzStartRead,zzStartRead, Token.MARKUP_TAG_DELIMITER);
465 addToken(zzMarkedPos-6,zzMarkedPos-1, Token.MARKUP_TAG_NAME);
466 start = zzMarkedPos; yybegin(INTAG_SCRIPT);
467 }
468 "<!" { start = zzMarkedPos-2; yybegin(DTD); }
469 "<?" { start = zzMarkedPos-2; yybegin(PI); }
470 "<"({Letter}|{Digit})+ {
471 int count = yylength();
472 addToken(zzStartRead,zzStartRead, Token.MARKUP_TAG_DELIMITER);
473 zzMarkedPos -= (count-1); //yypushback(count-1);
474 yybegin(INTAG_CHECK_TAG_NAME);
475 }
476 "</"({Letter}|{Digit})+ {
477 int count = yylength();
478 addToken(zzStartRead,zzStartRead+1, Token.MARKUP_TAG_DELIMITER);
479 zzMarkedPos -= (count-2); //yypushback(count-2);
480 yybegin(INTAG_CHECK_TAG_NAME);
481 }
482 "<" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(INTAG); }
483 "</" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(INTAG); }
484 {LineTerminator} { addNullToken(); return firstToken; }
485 {Identifier} { addToken(Token.IDENTIFIER); } // Catches everything.
486 {AmperItem} { addToken(Token.DATA_TYPE); }
487 {Whitespace} { addToken(Token.WHITESPACE); }
488 <<EOF>> { addNullToken(); return firstToken; }
489}
490
491<COMMENT> {
492 [^hwf\n\-]+ {}
493 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
494 [hwf] {}
495 {LineTerminator} { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
496 "-->" { yybegin(YYINITIAL); addToken(start,zzStartRead+2, Token.COMMENT_MULTILINE); }
497 "-" {}
498 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
499}
500
501<PI> {
502 [^\n\?]+ {}
503 {LineTerminator} { addToken(start,zzStartRead-1, Token.PREPROCESSOR); return firstToken; }
504 "?>" { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.PREPROCESSOR); }
505 "?" {}
506 <<EOF>> { addToken(start,zzStartRead-1, Token.PREPROCESSOR); return firstToken; }
507}
508
509<DTD> {
510 [^\n>]+ {}
511 {LineTerminator} { addToken(start,zzStartRead-1, Token.VARIABLE); return firstToken; }
512 ">" { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.VARIABLE); }
513 <<EOF>> { addToken(start,zzStartRead-1, Token.VARIABLE); return firstToken; }
514}
515
516<INTAG_CHECK_TAG_NAME> {
517 [Aa] |
518 [aA][bB][bB][rR] |
519 [aA][cC][rR][oO][nN][yY][mM] |
520 [aA][dD][dD][rR][eE][sS][sS] |
521 [aA][pP][pP][lL][eE][tT] |
522 [aA][rR][eE][aA] |
523 [aA][rR][tT][iI][cC][lL][eE] |
524 [aA][sS][iI][dD][eE] |
525 [aA][uU][dD][iI][oO] |
526 [bB] |
527 [bB][aA][sS][eE] |
528 [bB][aA][sS][eE][fF][oO][nN][tT] |
529 [bB][dD][oO] |
530 [bB][gG][sS][oO][uU][nN][dD] |
531 [bB][iI][gG] |
532 [bB][lL][iI][nN][kK] |
533 [bB][lL][oO][cC][kK][qQ][uU][oO][tT][eE] |
534 [bB][oO][dD][yY] |
535 [bB][rR] |
536 [bB][uU][tT][tT][oO][nN] |
537 [cC][aA][nN][vV][aA][sS] |
538 [cC][aA][pP][tT][iI][oO][nN] |
539 [cC][eE][nN][tT][eE][rR] |
540 [cC][iI][tT][eE] |
541 [cC][oO][dD][eE] |
542 [cC][oO][lL] |
543 [cC][oO][lL][gG][rR][oO][uU][pP] |
544 [cC][oO][mM][mM][aA][nN][dD] |
545 [cC][oO][mM][mM][eE][nN][tT] |
546 [dD][dD] |
547 [dD][aA][tT][aA][gG][rR][iI][dD] |
548 [dD][aA][tT][aA][lL][iI][sS][tT] |
549 [dD][aA][tT][aA][tT][eE][mM][pP][lL][aA][tT][eE] |
550 [dD][eE][lL] |
551 [dD][eE][tT][aA][iI][lL][sS] |
552 [dD][fF][nN] |
553 [dD][iI][aA][lL][oO][gG] |
554 [dD][iI][rR] |
555 [dD][iI][vV] |
556 [dD][lL] |
557 [dD][tT] |
558 [eE][mM] |
559 [eE][mM][bB][eE][dD] |
560 [eE][vV][eE][nN][tT][sS][oO][uU][rR][cC][eE] |
561 [fF][iI][eE][lL][dD][sS][eE][tT] |
562 [fF][iI][gG][uU][rR][eE] |
563 [fF][oO][nN][tT] |
564 [fF][oO][oO][tT][eE][rR] |
565 [fF][oO][rR][mM] |
566 [fF][rR][aA][mM][eE] |
567 [fF][rR][aA][mM][eE][sS][eE][tT] |
568 [hH][123456] |
569 [hH][eE][aA][dD] |
570 [hH][eE][aA][dD][eE][rR] |
571 [hH][rR] |
572 [hH][tT][mM][lL] |
573 [iI] |
574 [iI][fF][rR][aA][mM][eE] |
575 [iI][lL][aA][yY][eE][rR] |
576 [iI][mM][gG] |
577 [iI][nN][pP][uU][tT] |
578 [iI][nN][sS] |
579 [iI][sS][iI][nN][dD][eE][xX] |
580 [kK][bB][dD] |
581 [kK][eE][yY][gG][eE][nN] |
582 [lL][aA][bB][eE][lL] |
583 [lL][aA][yY][eE][rR] |
584 [lL][eE][gG][eE][nN][dD] |
585 [lL][iI] |
586 [lL][iI][nN][kK] |
587 [mM][aA][pP] |
588 [mM][aA][rR][kK] |
589 [mM][aA][rR][qQ][uU][eE][eE] |
590 [mM][eE][nN][uU] |
591 [mM][eE][tT][aA] |
592 [mM][eE][tT][eE][rR] |
593 [mM][uU][lL][tT][iI][cC][oO][lL] |
594 [nN][aA][vV] |
595 [nN][eE][sS][tT] |
596 [nN][oO][bB][rR] |
597 [nN][oO][eE][mM][bB][eE][dD] |
598 [nN][oO][fF][rR][aA][mM][eE][sS] |
599 [nN][oO][lL][aA][yY][eE][rR] |
600 [nN][oO][sS][cC][rR][iI][pP][tT] |
601 [oO][bB][jJ][eE][cC][tT] |
602 [oO][lL] |
603 [oO][pP][tT][gG][rR][oO][uU][pP] |
604 [oO][pP][tT][iI][oO][nN] |
605 [oO][uU][tT][pP][uU][tT] |
606 [pP] |
607 [pP][aA][rR][aA][mM] |
608 [pP][lL][aA][iI][nN][tT][eE][xX][tT] |
609 [pP][rR][eE] |
610 [pP][rR][oO][gG][rR][eE][sS][sS] |
611 [qQ] |
612 [rR][uU][lL][eE] |
613 [sS] |
614 [sS][aA][mM][pP] |
615 [sS][cC][rR][iI][pP][tT] |
616 [sS][eE][cC][tT][iI][oO][nN] |
617 [sS][eE][lL][eE][cC][tT] |
618 [sS][eE][rR][vV][eE][rR] |
619 [sS][mM][aA][lL][lL] |
620 [sS][oO][uU][rR][cC][eE] |
621 [sS][pP][aA][cC][eE][rR] |
622 [sS][pP][aA][nN] |
623 [sS][tT][rR][iI][kK][eE] |
624 [sS][tT][rR][oO][nN][gG] |
625 [sS][tT][yY][lL][eE] |
626 [sS][uU][bB] |
627 [sS][uU][pP] |
628 [tT][aA][bB][lL][eE] |
629 [tT][bB][oO][dD][yY] |
630 [tT][dD] |
631 [tT][eE][xX][tT][aA][rR][eE][aA] |
632 [tT][fF][oO][oO][tT] |
633 [tT][hH] |
634 [tT][hH][eE][aA][dD] |
635 [tT][iI][mM][eE] |
636 [tT][iI][tT][lL][eE] |
637 [tT][rR] |
638 [tT][tT] |
639 [uU] |
640 [uU][lL] |
641 [vV][aA][rR] |
642 [vV][iI][dD][eE][oO] { addToken(Token.MARKUP_TAG_NAME); }
643 {InTagIdentifier} { /* A non-recognized HTML tag name */ yypushback(yylength()); yybegin(INTAG); }
644 . { /* Shouldn't happen */ yypushback(1); yybegin(INTAG); }
645 <<EOF>> { addToken(zzMarkedPos,zzMarkedPos, INTERNAL_INTAG); return firstToken; }
646}
647
648<INTAG> {
649 "/" { addToken(Token.MARKUP_TAG_DELIMITER); }
650 {InTagIdentifier} { addToken(Token.MARKUP_TAG_ATTRIBUTE); }
651 {Whitespace} { addToken(Token.WHITESPACE); }
652 "=" { addToken(Token.OPERATOR); }
653 "/>" { yybegin(YYINITIAL); addToken(Token.MARKUP_TAG_DELIMITER); }
654 ">" { yybegin(YYINITIAL); addToken(Token.MARKUP_TAG_DELIMITER); }
655 [\"] { start = zzMarkedPos-1; yybegin(INATTR_DOUBLE); }
656 [\'] { start = zzMarkedPos-1; yybegin(INATTR_SINGLE); }
657 <<EOF>> { addToken(zzMarkedPos,zzMarkedPos, INTERNAL_INTAG); return firstToken; }
658}
659
660<INATTR_DOUBLE> {
661 [^\"]* {}
662 [\"] { yybegin(INTAG); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
663 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_DOUBLE); return firstToken; }
664}
665
666<INATTR_SINGLE> {
667 [^\']* {}
668 [\'] { yybegin(INTAG); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
669 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_SINGLE); return firstToken; }
670}
671
672<INTAG_SCRIPT> {
673 {InTagIdentifier} { addToken(Token.MARKUP_TAG_ATTRIBUTE); }
674 "/>" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(YYINITIAL); }
675 "/" { addToken(Token.MARKUP_TAG_DELIMITER); } // Won't appear in valid HTML.
676 {Whitespace} { addToken(Token.WHITESPACE); }
677 "=" { addToken(Token.OPERATOR); }
678 ">" { yybegin(JAVASCRIPT); addToken(Token.MARKUP_TAG_DELIMITER); }
679 [\"] { start = zzMarkedPos-1; yybegin(INATTR_DOUBLE_SCRIPT); }
680 [\'] { start = zzMarkedPos-1; yybegin(INATTR_SINGLE_SCRIPT); }
681 <<EOF>> { addToken(zzMarkedPos,zzMarkedPos, INTERNAL_INTAG_SCRIPT); return firstToken; }
682}
683
684<INATTR_DOUBLE_SCRIPT> {
685 [^\"]* {}
686 [\"] { yybegin(INTAG_SCRIPT); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
687 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_DOUBLE_QUOTE_SCRIPT); return firstToken; }
688}
689
690<INATTR_SINGLE_SCRIPT> {
691 [^\']* {}
692 [\'] { yybegin(INTAG_SCRIPT); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
693 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_SINGLE_QUOTE_SCRIPT); return firstToken; }
694}
695
696<JAVASCRIPT> {
697
698 {EndScriptTag} {
699 yybegin(YYINITIAL);
700 addToken(zzStartRead,zzStartRead+1, Token.MARKUP_TAG_DELIMITER);
701 addToken(zzMarkedPos-7,zzMarkedPos-2, Token.MARKUP_TAG_NAME);
702 addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER);
703 }
704
705 // ECMA keywords.
706 "break" |
707 "continue" |
708 "delete" |
709 "else" |
710 "for" |
711 "function" |
712 "if" |
713 "in" |
714 "new" |
715 "return" |
716 "this" |
717 "typeof" |
718 "var" |
719 "void" |
720 "while" |
721 "with" { addToken(Token.RESERVED_WORD); }
722
723 // Reserved (but not yet used) ECMA keywords.
724 "abstract" { addToken(Token.RESERVED_WORD); }
725 "boolean" { addToken(Token.DATA_TYPE); }
726 "byte" { addToken(Token.DATA_TYPE); }
727 "case" { addToken(Token.RESERVED_WORD); }
728 "catch" { addToken(Token.RESERVED_WORD); }
729 "char" { addToken(Token.DATA_TYPE); }
730 "class" { addToken(Token.RESERVED_WORD); }
731 "const" { addToken(Token.RESERVED_WORD); }
732 "debugger" { addToken(Token.RESERVED_WORD); }
733 "default" { addToken(Token.RESERVED_WORD); }
734 "do" { addToken(Token.RESERVED_WORD); }
735 "double" { addToken(Token.DATA_TYPE); }
736 "enum" { addToken(Token.RESERVED_WORD); }
737 "export" { addToken(Token.RESERVED_WORD); }
738 "extends" { addToken(Token.RESERVED_WORD); }
739 "final" { addToken(Token.RESERVED_WORD); }
740 "finally" { addToken(Token.RESERVED_WORD); }
741 "float" { addToken(Token.DATA_TYPE); }
742 "goto" { addToken(Token.RESERVED_WORD); }
743 "implements" { addToken(Token.RESERVED_WORD); }
744 "import" { addToken(Token.RESERVED_WORD); }
745 "instanceof" { addToken(Token.RESERVED_WORD); }
746 "int" { addToken(Token.DATA_TYPE); }
747 "interface" { addToken(Token.RESERVED_WORD); }
748 "long" { addToken(Token.DATA_TYPE); }
749 "native" { addToken(Token.RESERVED_WORD); }
750 "package" { addToken(Token.RESERVED_WORD); }
751 "private" { addToken(Token.RESERVED_WORD); }
752 "protected" { addToken(Token.RESERVED_WORD); }
753 "public" { addToken(Token.RESERVED_WORD); }
754 "short" { addToken(Token.DATA_TYPE); }
755 "static" { addToken(Token.RESERVED_WORD); }
756 "super" { addToken(Token.RESERVED_WORD); }
757 "switch" { addToken(Token.RESERVED_WORD); }
758 "synchronized" { addToken(Token.RESERVED_WORD); }
759 "throw" { addToken(Token.RESERVED_WORD); }
760 "throws" { addToken(Token.RESERVED_WORD); }
761 "transient" { addToken(Token.RESERVED_WORD); }
762 "try" { addToken(Token.RESERVED_WORD); }
763 "volatile" { addToken(Token.RESERVED_WORD); }
764 "null" { addToken(Token.RESERVED_WORD); }
765
766 // Literals.
767 "false" |
768 "true" { addToken(Token.LITERAL_BOOLEAN); }
769 "NaN" { addToken(Token.RESERVED_WORD); }
770 "Infinity" { addToken(Token.RESERVED_WORD); }
771
772 // Functions.
773 "eval" |
774 "parseInt" |
775 "parseFloat" |
776 "escape" |
777 "unescape" |
778 "isNaN" |
779 "isFinite" { addToken(Token.FUNCTION); }
780
781 {LineTerminator} { addEndToken(INTERNAL_IN_JS); return firstToken; }
782 {JS_Identifier} { addToken(Token.IDENTIFIER); }
783 {Whitespace} { addToken(Token.WHITESPACE); }
784
785 /* String/Character literals. */
786 [\'] { start = zzMarkedPos-1; validJSString = true; yybegin(JS_CHAR); }
787 [\"] { start = zzMarkedPos-1; validJSString = true; yybegin(JS_STRING); }
788
789 /* Comment literals. */
790 "/**/" { addToken(Token.COMMENT_MULTILINE); }
791 {JS_MLCBegin} { start = zzMarkedPos-2; yybegin(JS_MLC); }
792 {JS_LineCommentBegin} { start = zzMarkedPos-2; yybegin(JS_EOL_COMMENT); }
793
794 /* Attempt to identify regular expressions (not foolproof) - do after comments! */
795 {JS_Regex} {
796 boolean highlightedAsRegex = false;
797 if (firstToken==null) {
798 addToken(Token.REGEX);
799 highlightedAsRegex = true;
800 }
801 else {
802 // If this is *likely* to be a regex, based on
803 // the previous token, highlight it as such.
804 Token t = firstToken.getLastNonCommentNonWhitespaceToken();
805 if (RSyntaxUtilities.regexCanFollowInJavaScript(t)) {
806 addToken(Token.REGEX);
807 highlightedAsRegex = true;
808 }
809 }
810 // If it doesn't *appear* to be a regex, highlight it as
811 // individual tokens.
812 if (!highlightedAsRegex) {
813 int temp = zzStartRead + 1;
814 addToken(zzStartRead, zzStartRead, Token.OPERATOR);
815 zzStartRead = zzCurrentPos = zzMarkedPos = temp;
816 }
817 }
818
819 /* Separators. */
820 {JS_Separator} { addToken(Token.SEPARATOR); }
821 {JS_Separator2} { addToken(Token.IDENTIFIER); }
822
823 /* Operators. */
824 {JS_Operator} { addToken(Token.OPERATOR); }
825
826 /* Numbers */
827 {JS_IntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
828 {JS_HexLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
829 {JS_FloatLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); }
830 {JS_ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
831
832 {JS_ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); }
833
834 /* Ended with a line not in a string or comment. */
835 <<EOF>> { addEndToken(INTERNAL_IN_JS); return firstToken; }
836
837 /* Catch any other (unhandled) characters and flag them as bad. */
838 . { addToken(Token.ERROR_IDENTIFIER); }
839
840}
841
842<JS_STRING> {
843 [^\n\\\"]+ {}
844 \n { addToken(start,zzStartRead-1, Token.ERROR_STRING_DOUBLE); addEndToken(INTERNAL_IN_JS); return firstToken; }
845 \\x{HexDigit}{2} {}
846 \\x { /* Invalid latin-1 character \xXX */ validJSString = false; }
847 \\u{HexDigit}{4} {}
848 \\u { /* Invalid Unicode character \\uXXXX */ validJSString = false; }
849 \\. { /* Skip all escaped chars. */ }
850 \\ { /* Line ending in '\' => continue to next line. */
851 if (validJSString) {
852 addToken(start,zzStartRead, Token.LITERAL_STRING_DOUBLE_QUOTE);
853 addEndToken(INTERNAL_IN_JS_STRING_VALID);
854 }
855 else {
856 addToken(start,zzStartRead, Token.ERROR_STRING_DOUBLE);
857 addEndToken(INTERNAL_IN_JS_STRING_INVALID);
858 }
859 return firstToken;
860 }
861 \" { int type = validJSString ? Token.LITERAL_STRING_DOUBLE_QUOTE : Token.ERROR_STRING_DOUBLE; addToken(start,zzStartRead, type); yybegin(JAVASCRIPT); }
862 <<EOF>> { addToken(start,zzStartRead-1, Token.ERROR_STRING_DOUBLE); addEndToken(INTERNAL_IN_JS); return firstToken; }
863}
864
865<JS_CHAR> {
866 [^\n\\\']+ {}
867 \n { addToken(start,zzStartRead-1, Token.ERROR_CHAR); addEndToken(INTERNAL_IN_JS); return firstToken; }
868 \\x{HexDigit}{2} {}
869 \\x { /* Invalid latin-1 character \xXX */ validJSString = false; }
870 \\u{HexDigit}{4} {}
871 \\u { /* Invalid Unicode character \\uXXXX */ validJSString = false; }
872 \\. { /* Skip all escaped chars. */ }
873 \\ { /* Line ending in '\' => continue to next line. */
874 if (validJSString) {
875 addToken(start,zzStartRead, Token.LITERAL_CHAR);
876 addEndToken(INTERNAL_IN_JS_CHAR_VALID);
877 }
878 else {
879 addToken(start,zzStartRead, Token.ERROR_CHAR);
880 addEndToken(INTERNAL_IN_JS_CHAR_INVALID);
881 }
882 return firstToken;
883 }
884 \' { int type = validJSString ? Token.LITERAL_CHAR : Token.ERROR_CHAR; addToken(start,zzStartRead, type); yybegin(JAVASCRIPT); }
885 <<EOF>> { addToken(start,zzStartRead-1, Token.ERROR_CHAR); addEndToken(INTERNAL_IN_JS); return firstToken; }
886}
887
888<JS_MLC> {
889 // JavaScript MLC's. This state is essentially Java's MLC state.
890 [^hwf<\n\*]+ {}
891 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
892 [hwf] {}
893 {EndScriptTag} {
894 yybegin(YYINITIAL);
895 int temp = zzStartRead;
896 addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE);
897 addToken(temp,temp+1, Token.MARKUP_TAG_DELIMITER);
898 addToken(zzMarkedPos-7,zzMarkedPos-2, Token.MARKUP_TAG_NAME);
899 addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER);
900 }
901 "<" {}
902 \n { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_JS_MLC); return firstToken; }
903 {JS_MLCEnd} { yybegin(JAVASCRIPT); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
904 \* {}
905 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_JS_MLC); return firstToken; }
906}
907
908<JS_EOL_COMMENT> {
909 [^hwf<\n]+ {}
910 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
911 [hwf] {}
912 {EndScriptTag} {
913 yybegin(YYINITIAL);
914 int temp = zzStartRead;
915 addToken(start,zzStartRead-1, Token.COMMENT_EOL);
916 addToken(temp,temp+1, Token.MARKUP_TAG_DELIMITER);
917 addToken(zzMarkedPos-7,zzMarkedPos-2, Token.MARKUP_TAG_NAME);
918 addToken(zzMarkedPos-1,zzMarkedPos-1, Token.MARKUP_TAG_DELIMITER);
919 }
920 "<" {}
921 \n { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addEndToken(INTERNAL_IN_JS); return firstToken; }
922 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addEndToken(INTERNAL_IN_JS); return firstToken; }
923
924}
Note: See TracBrowser for help on using the repository browser.