Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

JavaScriptTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago
Initial cut an a text edit area for GLI that supports color syntax highlighting
File size: 18.4 KB

Line
1	/*
2	* 02/05/2012
3	*
4	* JavaScriptTokenMaker.java - Parses a document into JavaScript tokens.
5	*
6	* This library is distributed under a modified BSD license. See the included
7	* RSyntaxTextArea.License.txt file for details.
8	*/
9	package org.fife.ui.rsyntaxtextarea.modes;
10
11	import java.io.*;
12	import javax.swing.text.Segment;
13
14	import org.fife.ui.rsyntaxtextarea.*;
15
16
17	/**
18	* Scanner for JavaScript files. Its states could be simplified, but are
19	* kept the way they are to keep a degree of similarity (i.e. copy/paste)
20	* between it and HTML/JSP/PHPTokenMaker. This should cause no difference in
21	* performance.<p>
22	*
23	* This implementation was created using
24	* <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
25	* was modified for performance. Memory allocation needs to be almost
26	* completely removed to be competitive with the handwritten lexers (subclasses
27	* of <code>AbstractTokenMaker</code>, so this class has been modified so that
28	* Strings are never allocated (via yytext()), and the scanner never has to
29	* worry about refilling its buffer (needlessly copying chars around).
30	* We can achieve this because RText always scans exactly 1 line of tokens at a
31	* time, and hands the scanner this line as an array of characters (a Segment
32	* really). Since tokens contain pointers to char arrays instead of Strings
33	* holding their contents, there is no need for allocating new memory for
34	* Strings.<p>
35	*
36	* The actual algorithm generated for scanning has, of course, not been
37	* modified.<p>
38	*
39	* If you wish to regenerate this file yourself, keep in mind the following:
40	* <ul>
41	* <li>The generated JavaScriptTokenMaker.java</code> file will contain two
42	* definitions of both <code>zzRefill</code> and <code>yyreset</code>.
43	* You should hand-delete the second of each definition (the ones
44	* generated by the lexer), as these generated methods modify the input
45	* buffer, which we'll never have to do.</li>
46	* <li>You should also change the declaration/definition of zzBuffer to NOT
47	* be initialized. This is a needless memory allocation for us since we
48	* will be pointing the array somewhere else anyway.</li>
49	* <li>You should NOT call <code>yylex()</code> on the generated scanner
50	* directly; rather, you should use <code>getTokenList</code> as you would
51	* with any other <code>TokenMaker</code> instance.</li>
52	* </ul>
53	*
54	* @author Robert Futrell
55	* @version 0.8
56	*
57	*/
58	%%
59
60	%public
61	%class JavaScriptTokenMaker
62	%extends AbstractJFlexCTokenMaker
63	%unicode
64	%type org.fife.ui.rsyntaxtextarea.Token
65
66
67	%{
68
69	/**
70	* Token type specifying we're in a JavaScript multiline comment.
71	*/
72	public static final int INTERNAL_IN_JS_MLC = -8;
73
74	/**
75	* Token type specifying we're in an invalid multi-line JS string.
76	*/
77	public static final int INTERNAL_IN_JS_STRING_INVALID = -9;
78
79	/**
80	* Token type specifying we're in a valid multi-line JS string.
81	*/
82	public static final int INTERNAL_IN_JS_STRING_VALID = -10;
83
84	/**
85	* Token type specifying we're in an invalid multi-line JS single-quoted string.
86	*/
87	public static final int INTERNAL_IN_JS_CHAR_INVALID = -11;
88
89	/**
90	* Token type specifying we're in a valid multi-line JS single-quoted string.
91	*/
92	public static final int INTERNAL_IN_JS_CHAR_VALID = -12;
93
94	/**
95	* When in the JS_STRING state, whether the current string is valid.
96	*/
97	private boolean validJSString;
98
99
100	/**
101	* Constructor. This must be here because JFlex does not generate a
102	* no-parameter constructor.
103	*/
104	public JavaScriptTokenMaker() {
105	super();
106	}
107
108
109	/**
110	* Adds the token specified to the current linked list of tokens as an
111	* "end token;" that is, at <code>zzMarkedPos</code>.
112	*
113	* @param tokenType The token's type.
114	*/
115	private void addEndToken(int tokenType) {
116	addToken(zzMarkedPos,zzMarkedPos, tokenType);
117	}
118
119
120	/**
121	* Adds the token specified to the current linked list of tokens.
122	*
123	* @param tokenType The token's type.
124	* @see #addToken(int, int, int)
125	*/
126	private void addHyperlinkToken(int start, int end, int tokenType) {
127	int so = start + offsetShift;
128	addToken(zzBuffer, start,end, tokenType, so, true);
129	}
130
131
132	/**
133	* Adds the token specified to the current linked list of tokens.
134	*
135	* @param tokenType The token's type.
136	*/
137	private void addToken(int tokenType) {
138	addToken(zzStartRead, zzMarkedPos-1, tokenType);
139	}
140
141
142	/**
143	* Adds the token specified to the current linked list of tokens.
144	*
145	* @param tokenType The token's type.
146	*/
147	private void addToken(int start, int end, int tokenType) {
148	int so = start + offsetShift;
149	addToken(zzBuffer, start,end, tokenType, so);
150	}
151
152
153	/**
154	* Adds the token specified to the current linked list of tokens.
155	*
156	* @param array The character array.
157	* @param start The starting offset in the array.
158	* @param end The ending offset in the array.
159	* @param tokenType The token's type.
160	* @param startOffset The offset in the document at which this token
161	* occurs.
162	*/
163	public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
164	super.addToken(array, start,end, tokenType, startOffset);
165	zzStartRead = zzMarkedPos;
166	}
167
168
169	/**
170	* {@inheritDoc}
171	*/
172	public String[] getLineCommentStartAndEnd() {
173	return new String[] { "//", null };
174	}
175
176
177	/**
178	* Returns the first token in the linked list of tokens generated
179	* from <code>text</code>. This method must be implemented by
180	* subclasses so they can correctly implement syntax highlighting.
181	*
182	* @param text The text from which to get tokens.
183	* @param initialTokenType The token type we should start with.
184	* @param startOffset The offset into the document at which
185	* <code>text</code> starts.
186	* @return The first <code>Token</code> in a linked list representing
187	* the syntax highlighted text.
188	*/
189	public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
190
191	resetTokenList();
192	this.offsetShift = -text.offset + startOffset;
193
194	// Start off in the proper state.
195	int state = Token.NULL;
196	switch (initialTokenType) {
197	case INTERNAL_IN_JS_MLC:
198	state = JS_MLC;
199	start = text.offset;
200	break;
201	case INTERNAL_IN_JS_STRING_INVALID:
202	state = JS_STRING;
203	validJSString = false;
204	start = text.offset;
205	break;
206	case INTERNAL_IN_JS_STRING_VALID:
207	state = JS_STRING;
208	validJSString = true;
209	start = text.offset;
210	break;
211	case INTERNAL_IN_JS_CHAR_INVALID:
212	state = JS_CHAR;
213	validJSString = false;
214	start = text.offset;
215	break;
216	case INTERNAL_IN_JS_CHAR_VALID:
217	state = JS_CHAR;
218	validJSString = true;
219	start = text.offset;
220	break;
221	default:
222	state = Token.NULL;
223	}
224
225	s = text;
226	try {
227	yyreset(zzReader);
228	yybegin(state);
229	return yylex();
230	} catch (IOException ioe) {
231	ioe.printStackTrace();
232	return new DefaultToken();
233	}
234
235	}
236
237
238	/**
239	* Refills the input buffer.
240	*
241	* @return <code>true</code> if EOF was reached, otherwise
242	* <code>false</code>.
243	*/
244	private boolean zzRefill() {
245	return zzCurrentPos>=s.offset+s.count;
246	}
247
248
249	/**
250	* Resets the scanner to read from a new input stream.
251	* Does not close the old reader.
252	*
253	* All internal variables are reset, the old input stream
254	* <b>cannot</b> be reused (internal buffer is discarded and lost).
255	* Lexical state is set to <tt>YY_INITIAL</tt>.
256	*
257	* @param reader the new input stream
258	*/
259	public final void yyreset(java.io.Reader reader) {
260	// 's' has been updated.
261	zzBuffer = s.array;
262	/*
263	* We replaced the line below with the two below it because zzRefill
264	* no longer "refills" the buffer (since the way we do it, it's always
265	* "full" the first time through, since it points to the segment's
266	* array). So, we assign zzEndRead here.
267	*/
268	//zzStartRead = zzEndRead = s.offset;
269	zzStartRead = s.offset;
270	zzEndRead = zzStartRead + s.count - 1;
271	zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
272	zzLexicalState = YYINITIAL;
273	zzReader = reader;
274	zzAtBOL = true;
275	zzAtEOF = false;
276	}
277
278
279	%}
280
281	Whitespace = ([ \t\f]+)
282	LineTerminator = ([\n])
283
284	Letter = [A-Za-z]
285	NonzeroDigit = [1-9]
286	Digit = ("0"\|{NonzeroDigit})
287	HexDigit = ({Digit}\|[A-Fa-f])
288	OctalDigit = ([0-7])
289	EscapedSourceCharacter = ("u"{HexDigit}{HexDigit}{HexDigit}{HexDigit})
290	NonSeparator = ([^\t\f\r\n\ \{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\\|\^\%\"\']\|"#"\|"\\")
291	IdentifierStart = ({Letter}\|"_"\|"$")
292	IdentifierPart = ({IdentifierStart}\|{Digit}\|("\\"{EscapedSourceCharacter}))
293	JS_MLCBegin = "/*"
294	JS_MLCEnd = "*/"
295	JS_LineCommentBegin = "//"
296	JS_IntegerHelper1 = (({NonzeroDigit}{Digit}*)\|"0")
297	JS_IntegerHelper2 = ("0"(([xX]{HexDigit}+)\|({OctalDigit}*)))
298	JS_IntegerLiteral = ({JS_IntegerHelper1}[lL]?)
299	JS_HexLiteral = ({JS_IntegerHelper2}[lL]?)
300	JS_FloatHelper1 = ([fFdD]?)
301	JS_FloatHelper2 = ([eE][+-]?{Digit}+{JS_FloatHelper1})
302	JS_FloatLiteral1 = ({Digit}+"."({JS_FloatHelper1}\|{JS_FloatHelper2}\|{Digit}+({JS_FloatHelper1}\|{JS_FloatHelper2})))
303	JS_FloatLiteral2 = ("."{Digit}+({JS_FloatHelper1}\|{JS_FloatHelper2}))
304	JS_FloatLiteral3 = ({Digit}+{JS_FloatHelper2})
305	JS_FloatLiteral = ({JS_FloatLiteral1}\|{JS_FloatLiteral2}\|{JS_FloatLiteral3}\|({Digit}+[fFdD]))
306	JS_ErrorNumberFormat = (({JS_IntegerLiteral}\|{JS_HexLiteral}\|{JS_FloatLiteral}){NonSeparator}+)
307	JS_Separator = ([\{\}\[\]\]])
308	JS_Separator2 = ([\;,.])
309	JS_NonAssignmentOperator = ("+"\|"-"\|"<="\|"^"\|"++"\|"<"\|"*"\|">="\|"%"\|"--"\|">"\|"/"\|"!="\|"?"\|">>"\|"!"\|"&"\|"=="\|":"\|">>"\|"~"\|"\|"\|"&&"\|">>>")
310	JS_AssignmentOperator = ("="\|"-="\|"*="\|"/="\|"\|="\|"&="\|"^="\|"+="\|"%="\|"<<="\|">>="\|">>>=")
311	JS_Operator = ({JS_NonAssignmentOperator}\|{JS_AssignmentOperator})
312	JS_Identifier = ({IdentifierStart}{IdentifierPart}*)
313	JS_ErrorIdentifier = ({NonSeparator}+)
314	JS_Regex = ("/"([^\\\/]\|\\.)([^/\\]\|\\.)"/"[gim]*)
315
316	URLGenDelim = ([:\/\?#\[\]@])
317	URLSubDelim = ([\!\$&'\*\+,;=])
318	URLUnreserved = ({Letter}\|"_"\|{Digit}\|[\-\.\~])
319	URLCharacter = ({URLGenDelim}\|{URLSubDelim}\|{URLUnreserved}\|[%])
320	URLCharacters = ({URLCharacter}*)
321	URLEndCharacter = ([\/\$]\|{Letter}\|{Digit})
322	URL = (((https?\|f(tp\|ile))"://"\|"www.")({URLCharacters}{URLEndCharacter})?)
323
324
325	%state JS_STRING
326	%state JS_CHAR
327	%state JS_MLC
328	%state JS_EOL_COMMENT
329
330
331	%%
332
333	<YYINITIAL> {
334
335	// ECMA keywords.
336	"break" \|
337	"continue" \|
338	"delete" \|
339	"else" \|
340	"for" \|
341	"function" \|
342	"if" \|
343	"in" \|
344	"new" \|
345	"this" \|
346	"typeof" \|
347	"var" \|
348	"void" \|
349	"while" \|
350	"with" { addToken(Token.RESERVED_WORD); }
351	"return" { addToken(Token.RESERVED_WORD_2); }
352
353	// Reserved (but not yet used) ECMA keywords.
354	"abstract" { addToken(Token.RESERVED_WORD); }
355	"boolean" { addToken(Token.DATA_TYPE); }
356	"byte" { addToken(Token.DATA_TYPE); }
357	"case" { addToken(Token.RESERVED_WORD); }
358	"catch" { addToken(Token.RESERVED_WORD); }
359	"char" { addToken(Token.DATA_TYPE); }
360	"class" { addToken(Token.RESERVED_WORD); }
361	"const" { addToken(Token.RESERVED_WORD); }
362	"debugger" { addToken(Token.RESERVED_WORD); }
363	"default" { addToken(Token.RESERVED_WORD); }
364	"do" { addToken(Token.RESERVED_WORD); }
365	"double" { addToken(Token.DATA_TYPE); }
366	"enum" { addToken(Token.RESERVED_WORD); }
367	"export" { addToken(Token.RESERVED_WORD); }
368	"extends" { addToken(Token.RESERVED_WORD); }
369	"final" { addToken(Token.RESERVED_WORD); }
370	"finally" { addToken(Token.RESERVED_WORD); }
371	"float" { addToken(Token.DATA_TYPE); }
372	"goto" { addToken(Token.RESERVED_WORD); }
373	"implements" { addToken(Token.RESERVED_WORD); }
374	"import" { addToken(Token.RESERVED_WORD); }
375	"instanceof" { addToken(Token.RESERVED_WORD); }
376	"int" { addToken(Token.DATA_TYPE); }
377	"interface" { addToken(Token.RESERVED_WORD); }
378	"long" { addToken(Token.DATA_TYPE); }
379	"native" { addToken(Token.RESERVED_WORD); }
380	"package" { addToken(Token.RESERVED_WORD); }
381	"private" { addToken(Token.RESERVED_WORD); }
382	"protected" { addToken(Token.RESERVED_WORD); }
383	"public" { addToken(Token.RESERVED_WORD); }
384	"short" { addToken(Token.DATA_TYPE); }
385	"static" { addToken(Token.RESERVED_WORD); }
386	"super" { addToken(Token.RESERVED_WORD); }
387	"switch" { addToken(Token.RESERVED_WORD); }
388	"synchronized" { addToken(Token.RESERVED_WORD); }
389	"throw" { addToken(Token.RESERVED_WORD); }
390	"throws" { addToken(Token.RESERVED_WORD); }
391	"transient" { addToken(Token.RESERVED_WORD); }
392	"try" { addToken(Token.RESERVED_WORD); }
393	"volatile" { addToken(Token.RESERVED_WORD); }
394	"null" { addToken(Token.RESERVED_WORD); }
395
396	// Literals.
397	"false" \|
398	"true" { addToken(Token.LITERAL_BOOLEAN); }
399	"NaN" { addToken(Token.RESERVED_WORD); }
400	"Infinity" { addToken(Token.RESERVED_WORD); }
401
402	// Functions.
403	"eval" \|
404	"parseInt" \|
405	"parseFloat" \|
406	"escape" \|
407	"unescape" \|
408	"isNaN" \|
409	"isFinite" { addToken(Token.FUNCTION); }
410
411	{LineTerminator} { addNullToken(); return firstToken; }
412	{JS_Identifier} { addToken(Token.IDENTIFIER); }
413	{Whitespace} { addToken(Token.WHITESPACE); }
414
415	/* String/Character literals. */
416	[\'] { start = zzMarkedPos-1; validJSString = true; yybegin(JS_CHAR); }
417	[\"] { start = zzMarkedPos-1; validJSString = true; yybegin(JS_STRING); }
418
419	/* Comment literals. */
420	"/**/" { addToken(Token.COMMENT_MULTILINE); }
421	{JS_MLCBegin} { start = zzMarkedPos-2; yybegin(JS_MLC); }
422	{JS_LineCommentBegin} { start = zzMarkedPos-2; yybegin(JS_EOL_COMMENT); }
423
424	/* Attempt to identify regular expressions (not foolproof) - do after comments! */
425	{JS_Regex} {
426	boolean highlightedAsRegex = false;
427	if (firstToken==null) {
428	addToken(Token.REGEX);
429	highlightedAsRegex = true;
430	}
431	else {
432	// If this is likely to be a regex, based on
433	// the previous token, highlight it as such.
434	Token t = firstToken.getLastNonCommentNonWhitespaceToken();
435	if (RSyntaxUtilities.regexCanFollowInJavaScript(t)) {
436	addToken(Token.REGEX);
437	highlightedAsRegex = true;
438	}
439	}
440	// If it doesn't appear to be a regex, highlight it as
441	// individual tokens.
442	if (!highlightedAsRegex) {
443	int temp = zzStartRead + 1;
444	addToken(zzStartRead, zzStartRead, Token.OPERATOR);
445	zzStartRead = zzCurrentPos = zzMarkedPos = temp;
446	}
447	}
448
449	/* Separators. */
450	{JS_Separator} { addToken(Token.SEPARATOR); }
451	{JS_Separator2} { addToken(Token.IDENTIFIER); }
452
453	/* Operators. */
454	{JS_Operator} { addToken(Token.OPERATOR); }
455
456	/* Numbers */
457	{JS_IntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
458	{JS_HexLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
459	{JS_FloatLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); }
460	{JS_ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
461
462	{JS_ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); }
463
464	/* Ended with a line not in a string or comment. */
465	<<EOF>> { addNullToken(); return firstToken; }
466
467	/* Catch any other (unhandled) characters and flag them as bad. */
468	. { addToken(Token.ERROR_IDENTIFIER); }
469
470	}
471
472	<JS_STRING> {
473	[^\n\\\"]+ {}
474	\n { addToken(start,zzStartRead-1, Token.ERROR_STRING_DOUBLE); addNullToken(); return firstToken; }
475	\\x{HexDigit}{2} {}
476	\\x { /* Invalid latin-1 character \xXX */ validJSString = false; }
477	\\u{HexDigit}{4} {}
478	\\u { /* Invalid Unicode character \\uXXXX */ validJSString = false; }
479	\\. { /* Skip all escaped chars. */ }
480	\\ { /* Line ending in '\' => continue to next line. */
481	if (validJSString) {
482	addToken(start,zzStartRead, Token.LITERAL_STRING_DOUBLE_QUOTE);
483	addEndToken(INTERNAL_IN_JS_STRING_VALID);
484	}
485	else {
486	addToken(start,zzStartRead, Token.ERROR_STRING_DOUBLE);
487	addEndToken(INTERNAL_IN_JS_STRING_INVALID);
488	}
489	return firstToken;
490	}
491	\" { int type = validJSString ? Token.LITERAL_STRING_DOUBLE_QUOTE : Token.ERROR_STRING_DOUBLE; addToken(start,zzStartRead, type); yybegin(YYINITIAL); }
492	<<EOF>> { addToken(start,zzStartRead-1, Token.ERROR_STRING_DOUBLE); addNullToken(); return firstToken; }
493	}
494
495	<JS_CHAR> {
496	[^\n\\\']+ {}
497	\n { addToken(start,zzStartRead-1, Token.ERROR_CHAR); addNullToken(); return firstToken; }
498	\\x{HexDigit}{2} {}
499	\\x { /* Invalid latin-1 character \xXX */ validJSString = false; }
500	\\u{HexDigit}{4} {}
501	\\u { /* Invalid Unicode character \\uXXXX */ validJSString = false; }
502	\\. { /* Skip all escaped chars. */ }
503	\\ { /* Line ending in '\' => continue to next line. */
504	if (validJSString) {
505	addToken(start,zzStartRead, Token.LITERAL_CHAR);
506	addEndToken(INTERNAL_IN_JS_CHAR_VALID);
507	}
508	else {
509	addToken(start,zzStartRead, Token.ERROR_CHAR);
510	addEndToken(INTERNAL_IN_JS_CHAR_INVALID);
511	}
512	return firstToken;
513	}
514	\' { int type = validJSString ? Token.LITERAL_CHAR : Token.ERROR_CHAR; addToken(start,zzStartRead, type); yybegin(YYINITIAL); }
515	<<EOF>> { addToken(start,zzStartRead-1, Token.ERROR_CHAR); addNullToken(); return firstToken; }
516	}
517
518	<JS_MLC> {
519	// JavaScript MLC's. This state is essentially Java's MLC state.
520	[^hwf\n\*]+ {}
521	{URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
522	[hwf] {}
523	\n { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_JS_MLC); return firstToken; }
524	{JS_MLCEnd} { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
525	\* {}
526	<<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_JS_MLC); return firstToken; }
527	}
528
529	<JS_EOL_COMMENT> {
530	[^hwf\n]+ {}
531	{URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
532	[hwf] {}
533	\n \|
534	<<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addNullToken(); return firstToken; }
535	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format