Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

CTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago
Initial cut an a text edit area for GLI that supports color syntax highlighting
File size: 15.4 KB

Line
1	/*
2	* 11/13/2004
3	*
4	* CTokenMaker.java - An object that can take a chunk of text and
5	* return a linked list of tokens representing it in the C programming
6	* language.
7	*
8	* This library is distributed under a modified BSD license. See the included
9	* RSyntaxTextArea.License.txt file for details.
10	*/
11	package org.fife.ui.rsyntaxtextarea.modes;
12
13	import java.io.*;
14	import javax.swing.text.Segment;
15
16	import org.fife.ui.rsyntaxtextarea.*;
17
18
19	/**
20	* Scanner for the C programming language.
21	*
22	* This implementation was created using
23	* <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
24	* was modified for performance. Memory allocation needs to be almost
25	* completely removed to be competitive with the handwritten lexers (subclasses
26	* of <code>AbstractTokenMaker</code>, so this class has been modified so that
27	* Strings are never allocated (via yytext()), and the scanner never has to
28	* worry about refilling its buffer (needlessly copying chars around).
29	* We can achieve this because RText always scans exactly 1 line of tokens at a
30	* time, and hands the scanner this line as an array of characters (a Segment
31	* really). Since tokens contain pointers to char arrays instead of Strings
32	* holding their contents, there is no need for allocating new memory for
33	* Strings.<p>
34	*
35	* The actual algorithm generated for scanning has, of course, not been
36	* modified.<p>
37	*
38	* If you wish to regenerate this file yourself, keep in mind the following:
39	* <ul>
40	* <li>The generated <code>CTokenMaker.java</code> file will contain two
41	* definitions of both <code>zzRefill</code> and <code>yyreset</code>.
42	* You should hand-delete the second of each definition (the ones
43	* generated by the lexer), as these generated methods modify the input
44	* buffer, which we'll never have to do.</li>
45	* <li>You should also change the declaration/definition of zzBuffer to NOT
46	* be initialized. This is a needless memory allocation for us since we
47	* will be pointing the array somewhere else anyway.</li>
48	* <li>You should NOT call <code>yylex()</code> on the generated scanner
49	* directly; rather, you should use <code>getTokenList</code> as you would
50	* with any other <code>TokenMaker</code> instance.</li>
51	* </ul>
52	*
53	* @author Robert Futrell
54	* @version 0.5
55	*
56	*/
57	%%
58
59	%public
60	%class CTokenMaker
61	%extends AbstractJFlexCTokenMaker
62	%unicode
63	%type org.fife.ui.rsyntaxtextarea.Token
64
65
66	%{
67
68
69	/**
70	* Constructor. This must be here because JFlex does not generate a
71	* no-parameter constructor.
72	*/
73	public CTokenMaker() {
74	super();
75	}
76
77
78	/**
79	* Adds the token specified to the current linked list of tokens.
80	*
81	* @param tokenType The token's type.
82	* @see #addToken(int, int, int)
83	*/
84	private void addHyperlinkToken(int start, int end, int tokenType) {
85	int so = start + offsetShift;
86	addToken(zzBuffer, start,end, tokenType, so, true);
87	}
88
89
90	/**
91	* Adds the token specified to the current linked list of tokens.
92	*
93	* @param tokenType The token's type.
94	*/
95	private void addToken(int tokenType) {
96	addToken(zzStartRead, zzMarkedPos-1, tokenType);
97	}
98
99
100	/**
101	* Adds the token specified to the current linked list of tokens.
102	*
103	* @param tokenType The token's type.
104	*/
105	private void addToken(int start, int end, int tokenType) {
106	int so = start + offsetShift;
107	addToken(zzBuffer, start,end, tokenType, so);
108	}
109
110
111	/**
112	* Adds the token specified to the current linked list of tokens.
113	*
114	* @param array The character array.
115	* @param start The starting offset in the array.
116	* @param end The ending offset in the array.
117	* @param tokenType The token's type.
118	* @param startOffset The offset in the document at which this token
119	* occurs.
120	*/
121	public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
122	super.addToken(array, start,end, tokenType, startOffset);
123	zzStartRead = zzMarkedPos;
124	}
125
126
127	/**
128	* Returns the text to place at the beginning and end of a
129	* line to "comment" it in a this programming language.
130	*
131	* @return The start and end strings to add to a line to "comment"
132	* it out.
133	*/
134	public String[] getLineCommentStartAndEnd() {
135	return new String[] { "//", null };
136	}
137
138
139	/**
140	* Returns the first token in the linked list of tokens generated
141	* from <code>text</code>. This method must be implemented by
142	* subclasses so they can correctly implement syntax highlighting.
143	*
144	* @param text The text from which to get tokens.
145	* @param initialTokenType The token type we should start with.
146	* @param startOffset The offset into the document at which
147	* <code>text</code> starts.
148	* @return The first <code>Token</code> in a linked list representing
149	* the syntax highlighted text.
150	*/
151	public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
152
153	resetTokenList();
154	this.offsetShift = -text.offset + startOffset;
155
156	// Start off in the proper state.
157	int state = Token.NULL;
158	switch (initialTokenType) {
159	case Token.COMMENT_MULTILINE:
160	state = MLC;
161	start = text.offset;
162	break;
163	default:
164	state = Token.NULL;
165	}
166
167	s = text;
168	try {
169	yyreset(zzReader);
170	yybegin(state);
171	return yylex();
172	} catch (IOException ioe) {
173	ioe.printStackTrace();
174	return new DefaultToken();
175	}
176
177	}
178
179
180	/**
181	* Refills the input buffer.
182	*
183	* @return <code>true</code> if EOF was reached, otherwise
184	* <code>false</code>.
185	* @exception IOException if any I/O-Error occurs.
186	*/
187	private boolean zzRefill() throws java.io.IOException {
188	return zzCurrentPos>=s.offset+s.count;
189	}
190
191
192	/**
193	* Resets the scanner to read from a new input stream.
194	* Does not close the old reader.
195	*
196	* All internal variables are reset, the old input stream
197	* <b>cannot</b> be reused (internal buffer is discarded and lost).
198	* Lexical state is set to <tt>YY_INITIAL</tt>.
199	*
200	* @param reader the new input stream
201	*/
202	public final void yyreset(java.io.Reader reader) throws java.io.IOException {
203	// 's' has been updated.
204	zzBuffer = s.array;
205	/*
206	* We replaced the line below with the two below it because zzRefill
207	* no longer "refills" the buffer (since the way we do it, it's always
208	* "full" the first time through, since it points to the segment's
209	* array). So, we assign zzEndRead here.
210	*/
211	//zzStartRead = zzEndRead = s.offset;
212	zzStartRead = s.offset;
213	zzEndRead = zzStartRead + s.count - 1;
214	zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
215	zzLexicalState = YYINITIAL;
216	zzReader = reader;
217	zzAtBOL = true;
218	zzAtEOF = false;
219	}
220
221
222	%}
223
224	Letter = [A-Za-z]
225	LetterOrUnderscore = ({Letter}\|[_])
226	Digit = [0-9]
227	HexDigit = {Digit}\|[A-Fa-f]
228	OctalDigit = [0-7]
229	Exponent = [eE][+-]?{Digit}+
230
231	PreprocessorWord = define\|elif\|else\|endif\|error\|if\|ifdef\|ifndef\|include\|line\|pragma\|undef
232
233	Trigraph = ("??="\|"??("\|"??)"\|"??/"\|"??'"\|"??<"\|"??>"\|"??!"\|"??-")
234
235	OctEscape1 = ([\\]{OctalDigit})
236	OctEscape2 = ([\\]{OctalDigit}{OctalDigit})
237	OctEscape3 = ([\\][0-3]{OctalDigit}{OctalDigit})
238	OctEscape = ({OctEscape1}\|{OctEscape2}\|{OctEscape3})
239	HexEscape = ([\\][xX]{HexDigit}{HexDigit})
240
241	AnyChrChr = ([^\'\n\\])
242	Escape = ([\\]([abfnrtv\'\"\?\\0]))
243	UnclosedCharLiteral = ([\']({Escape}\|{OctEscape}\|{HexEscape}\|{Trigraph}\|{AnyChrChr}))
244	CharLiteral = ({UnclosedCharLiteral}[\'])
245	ErrorUnclosedCharLiteral = ([\'][^\'\n]*)
246	ErrorCharLiteral = (([\'][\'])\|{ErrorUnclosedCharLiteral}[\'])
247	AnyStrChr = ([^\"\n\\])
248	FalseTrigraph = (("?"(("?")*)[^\=\/\'\<\>\!\-\\\?\"\n])\|("?"[\=\/\'\<\>\!\-]))
249	StringLiteral = ([\"]((((("?"))({Escape}\|{OctEscape}\|{HexEscape}\|{Trigraph}))\|{FalseTrigraph}\|{AnyStrChr}))(("?")*)[\"])
250	UnclosedStringLiteral = ([\"]([\\].\|[^\\\"])*[^\"]?)
251	ErrorStringLiteral = ({UnclosedStringLiteral}[\"])
252
253
254	LineTerminator = \n
255	WhiteSpace = [ \t\f]
256
257	MLCBegin = "/*"
258	MLCEnd = "*/"
259	LineCommentBegin = "//"
260
261	NonFloatSuffix = (([uU][lL]?)\|([lL][uU]?))
262	IntegerLiteral = ({Digit}+{Exponent}?{NonFloatSuffix}?)
263	HexLiteral = ("0"[xX]{HexDigit}+{NonFloatSuffix}?)
264	FloatLiteral = ((({Digit}[\.]{Digit}+)\|({Digit}+[\.]{Digit})){Exponent}?[fFlL]?)
265	ErrorNumberFormat = (({IntegerLiteral}\|{HexLiteral}\|{FloatLiteral}){NonSeparator}+)
266
267	NonSeparator = ([^\t\f\r\n\ \{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\\|\^\%\"\']\|"#")
268	Identifier = ({LetterOrUnderscore}({LetterOrUnderscore}\|{Digit}\|[$])*)
269	ErrorIdentifier = ({NonSeparator}+)
270
271
272	URLGenDelim = ([:\/\?#\[\]@])
273	URLSubDelim = ([\!\$&'\*\+,;=])
274	URLUnreserved = ({LetterOrUnderscore}\|{Digit}\|[\-\.\~])
275	URLCharacter = ({URLGenDelim}\|{URLSubDelim}\|{URLUnreserved}\|[%])
276	URLCharacters = ({URLCharacter}*)
277	URLEndCharacter = ([\/\$]\|{Letter}\|{Digit})
278	URL = (((https?\|f(tp\|ile))"://"\|"www.")({URLCharacters}{URLEndCharacter})?)
279
280	%state MLC
281	%state EOL_COMMENT
282
283	%%
284
285	<YYINITIAL> {
286
287	/* Keywords */
288	"auto" \|
289	"break" \|
290	"case" \|
291	"const" \|
292	"continue" \|
293	"default" \|
294	"do" \|
295	"else" \|
296	"enum" \|
297	"extern" \|
298	"for" \|
299	"goto" \|
300	"if" \|
301	"register" \|
302	"return" \|
303	"sizeof" \|
304	"static" \|
305	"struct" \|
306	"switch" \|
307	"typedef" \|
308	"union" \|
309	"volatile" \|
310	"while" { addToken(Token.RESERVED_WORD); }
311
312	/* Data types. */
313	"char" \|
314	"div_t" \|
315	"double" \|
316	"float" \|
317	"int" \|
318	"ldiv_t" \|
319	"long" \|
320	"short" \|
321	"signed" \|
322	"size_t" \|
323	"unsigned" \|
324	"void" \|
325	"wchar_t" { addToken(Token.DATA_TYPE); }
326
327	/* Standard functions */
328	"abort" \|
329	"abs" \|
330	"acos" \|
331	"asctime" \|
332	"asin" \|
333	"assert" \|
334	"atan2" \|
335	"atan" \|
336	"atexit" \|
337	"atof" \|
338	"atoi" \|
339	"atol" \|
340	"bsearch" \|
341	"btowc" \|
342	"calloc" \|
343	"ceil" \|
344	"clearerr" \|
345	"clock" \|
346	"cosh" \|
347	"cos" \|
348	"ctime" \|
349	"difftime" \|
350	"div" \|
351	"errno" \|
352	"exit" \|
353	"exp" \|
354	"fabs" \|
355	"fclose" \|
356	"feof" \|
357	"ferror" \|
358	"fflush" \|
359	"fgetc" \|
360	"fgetpos" \|
361	"fgetwc" \|
362	"fgets" \|
363	"fgetws" \|
364	"floor" \|
365	"fmod" \|
366	"fopen" \|
367	"fprintf" \|
368	"fputc" \|
369	"fputs" \|
370	"fputwc" \|
371	"fputws" \|
372	"fread" \|
373	"free" \|
374	"freopen" \|
375	"frexp" \|
376	"fscanf" \|
377	"fseek" \|
378	"fsetpos" \|
379	"ftell" \|
380	"fwprintf" \|
381	"fwrite" \|
382	"fwscanf" \|
383	"getchar" \|
384	"getc" \|
385	"getenv" \|
386	"gets" \|
387	"getwc" \|
388	"getwchar" \|
389	"gmtime" \|
390	"isalnum" \|
391	"isalpha" \|
392	"iscntrl" \|
393	"isdigit" \|
394	"isgraph" \|
395	"islower" \|
396	"isprint" \|
397	"ispunct" \|
398	"isspace" \|
399	"isupper" \|
400	"isxdigit" \|
401	"labs" \|
402	"ldexp" \|
403	"ldiv" \|
404	"localeconv" \|
405	"localtime" \|
406	"log10" \|
407	"log" \|
408	"longjmp" \|
409	"malloc" \|
410	"mblen" \|
411	"mbrlen" \|
412	"mbrtowc" \|
413	"mbsinit" \|
414	"mbsrtowcs" \|
415	"mbstowcs" \|
416	"mbtowc" \|
417	"memchr" \|
418	"memcmp" \|
419	"memcpy" \|
420	"memmove" \|
421	"memset" \|
422	"mktime" \|
423	"modf" \|
424	"offsetof" \|
425	"perror" \|
426	"pow" \|
427	"printf" \|
428	"putchar" \|
429	"putc" \|
430	"puts" \|
431	"putwc" \|
432	"putwchar" \|
433	"qsort" \|
434	"raise" \|
435	"rand" \|
436	"realloc" \|
437	"remove" \|
438	"rename" \|
439	"rewind" \|
440	"scanf" \|
441	"setbuf" \|
442	"setjmp" \|
443	"setlocale" \|
444	"setvbuf" \|
445	"setvbuf" \|
446	"signal" \|
447	"sinh" \|
448	"sin" \|
449	"sprintf" \|
450	"sqrt" \|
451	"srand" \|
452	"sscanf" \|
453	"strcat" \|
454	"strchr" \|
455	"strcmp" \|
456	"strcmp" \|
457	"strcoll" \|
458	"strcpy" \|
459	"strcspn" \|
460	"strerror" \|
461	"strftime" \|
462	"strlen" \|
463	"strncat" \|
464	"strncmp" \|
465	"strncpy" \|
466	"strpbrk" \|
467	"strrchr" \|
468	"strspn" \|
469	"strstr" \|
470	"strtod" \|
471	"strtok" \|
472	"strtol" \|
473	"strtoul" \|
474	"strxfrm" \|
475	"swprintf" \|
476	"swscanf" \|
477	"system" \|
478	"tanh" \|
479	"tan" \|
480	"time" \|
481	"tmpfile" \|
482	"tmpnam" \|
483	"tolower" \|
484	"toupper" \|
485	"ungetc" \|
486	"ungetwc" \|
487	"va_arg" \|
488	"va_end" \|
489	"va_start" \|
490	"vfprintf" \|
491	"vfwprintf" \|
492	"vprintf" \|
493	"vsprintf" \|
494	"vswprintf" \|
495	"vwprintf" \|
496	"wcrtomb" \|
497	"wcscat" \|
498	"wcschr" \|
499	"wcscmp" \|
500	"wcscoll" \|
501	"wcscpy" \|
502	"wcscspn" \|
503	"wcsftime" \|
504	"wcslen" \|
505	"wcsncat" \|
506	"wcsncmp" \|
507	"wcsncpy" \|
508	"wcspbrk" \|
509	"wcsrchr" \|
510	"wcsrtombs" \|
511	"wcsspn" \|
512	"wcsstr" \|
513	"wcstod" \|
514	"wcstok" \|
515	"wcstol" \|
516	"wcstombs" \|
517	"wcstoul" \|
518	"wcsxfrm" \|
519	"wctob" \|
520	"wctomb" \|
521	"wmemchr" \|
522	"wmemcmp" \|
523	"wmemcpy" \|
524	"wmemmove" \|
525	"wmemset" \|
526	"wprintf" \|
527	"wscanf" { addToken(Token.FUNCTION); }
528
529	/* Standard-defined macros. */
530	"__DATE__" \|
531	"__TIME__" \|
532	"__FILE__" \|
533	"__LINE__" \|
534	"__STDC__" { addToken(Token.PREPROCESSOR); }
535
536	{LineTerminator} { addNullToken(); return firstToken; }
537
538	{Identifier} { addToken(Token.IDENTIFIER); }
539
540	{WhiteSpace}+ { addToken(Token.WHITESPACE); }
541
542	/* Preprocessor directives */
543	"#"{WhiteSpace}*{PreprocessorWord} { addToken(Token.PREPROCESSOR); }
544
545	/* String/Character Literals. */
546	{CharLiteral} { addToken(Token.LITERAL_CHAR); }
547	{UnclosedCharLiteral} { addToken(Token.ERROR_CHAR); /addNullToken(); return firstToken;/ }
548	{ErrorUnclosedCharLiteral} { addToken(Token.ERROR_CHAR); addNullToken(); return firstToken; }
549	{ErrorCharLiteral} { addToken(Token.ERROR_CHAR); }
550	{StringLiteral} { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
551	{UnclosedStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); addNullToken(); return firstToken; }
552	{ErrorStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); }
553
554	/* Comment Literals. */
555	{MLCBegin} { start = zzMarkedPos-2; yybegin(MLC); }
556	{LineCommentBegin} { start = zzMarkedPos-2; yybegin(EOL_COMMENT); }
557
558	/* Separators. */
559	"(" \|
560	")" \|
561	"[" \|
562	"]" \|
563	"{" \|
564	"}" { addToken(Token.SEPARATOR); }
565
566	/* Operators. */
567	{Trigraph} \|
568	"=" \|
569	"+" \|
570	"-" \|
571	"*" \|
572	"/" \|
573	"%" \|
574	"~" \|
575	"<" \|
576	">" \|
577	"<<" \|
578	">>" \|
579	"==" \|
580	"+=" \|
581	"-=" \|
582	"*=" \|
583	"/=" \|
584	"%=" \|
585	">>=" \|
586	"<<=" \|
587	"^" \|
588	"&" \|
589	"&&" \|
590	"\|" \|
591	"\|\|" \|
592	"?" \|
593	":" \|
594	"," \|
595	"!" \|
596	"++" \|
597	"--" \|
598	"." \|
599	"," { addToken(Token.OPERATOR); }
600
601	/* Numbers */
602	{IntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
603	{HexLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
604	{FloatLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); }
605	{ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
606
607	/* Some lines will end in '\' to wrap an expression. */
608	"\\" { addToken(Token.IDENTIFIER); }
609
610	{ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); }
611
612	/* Other punctuation, we'll highlight it as "identifiers." */
613	";" { addToken(Token.IDENTIFIER); }
614
615	/* Ended with a line not in a string or comment. */
616	<<EOF>> { addNullToken(); return firstToken; }
617
618	/* Catch any other (unhandled) characters and flag them as bad. */
619	. { addToken(Token.ERROR_IDENTIFIER); }
620
621	}
622
623	<MLC> {
624
625	[^hwf\n\*]+ {}
626	{URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
627	[hwf] {}
628
629	\n { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
630	{MLCEnd} { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
631	\* {}
632	<<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
633
634	}
635
636
637	<EOL_COMMENT> {
638	[^hwf\n]+ {}
639	{URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
640	[hwf] {}
641	\n { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addNullToken(); return firstToken; }
642	<<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addNullToken(); return firstToken; }
643	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format