Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

CPlusPlusTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago
Initial cut an a text edit area for GLI that supports color syntax highlighting
File size: 15.7 KB

Line
1	/*
2	* 11/19/2004
3	*
4	* CPlusPlusTokenMaker.java - An object that can take a chunk of text and
5	* return a linked list of tokens representing it in C++.
6	*
7	* This library is distributed under a modified BSD license. See the included
8	* RSyntaxTextArea.License.txt file for details.
9	*/
10	package org.fife.ui.rsyntaxtextarea.modes;
11
12	import java.io.*;
13	import javax.swing.text.Segment;
14
15	import org.fife.ui.rsyntaxtextarea.*;
16
17
18	/**
19	* A parser for the C++ programming language.
20	*
21	* This implementation was created using
22	* <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
23	* was modified for performance. Memory allocation needs to be almost
24	* completely removed to be competitive with the handwritten lexers (subclasses
25	* of <code>AbstractTokenMaker</code>, so this class has been modified so that
26	* Strings are never allocated (via yytext()), and the scanner never has to
27	* worry about refilling its buffer (needlessly copying chars around).
28	* We can achieve this because RText always scans exactly 1 line of tokens at a
29	* time, and hands the scanner this line as an array of characters (a Segment
30	* really). Since tokens contain pointers to char arrays instead of Strings
31	* holding their contents, there is no need for allocating new memory for
32	* Strings.<p>
33	*
34	* The actual algorithm generated for scanning has, of course, not been
35	* modified.<p>
36	*
37	* If you wish to regenerate this file yourself, keep in mind the following:
38	* <ul>
39	* <li>The generated CPlusPlusTokenMaker.java</code> file will contain two
40	* definitions of both <code>zzRefill</code> and <code>yyreset</code>.
41	* You should hand-delete the second of each definition (the ones
42	* generated by the lexer), as these generated methods modify the input
43	* buffer, which we'll never have to do.</li>
44	* <li>You should also change the declaration/definition of zzBuffer to NOT
45	* be initialized. This is a needless memory allocation for us since we
46	* will be pointing the array somewhere else anyway.</li>
47	* <li>You should NOT call <code>yylex()</code> on the generated scanner
48	* directly; rather, you should use <code>getTokenList</code> as you would
49	* with any other <code>TokenMaker</code> instance.</li>
50	* </ul>
51	*
52	* @author Robert Futrell
53	* @version 0.6
54	*
55	*/
56	%%
57
58	%public
59	%class CPlusPlusTokenMaker
60	%extends AbstractJFlexCTokenMaker
61	%unicode
62	%type org.fife.ui.rsyntaxtextarea.Token
63
64
65	%{
66
67
68	/**
69	* Constructor. This must be here because JFlex does not generate a
70	* no-parameter constructor.
71	*/
72	public CPlusPlusTokenMaker() {
73	super();
74	}
75
76
77	/**
78	* Adds the token specified to the current linked list of tokens.
79	*
80	* @param tokenType The token's type.
81	* @see #addToken(int, int, int)
82	*/
83	private void addHyperlinkToken(int start, int end, int tokenType) {
84	int so = start + offsetShift;
85	addToken(zzBuffer, start,end, tokenType, so, true);
86	}
87
88
89	/**
90	* Adds the token specified to the current linked list of tokens.
91	*
92	* @param tokenType The token's type.
93	*/
94	private void addToken(int tokenType) {
95	addToken(zzStartRead, zzMarkedPos-1, tokenType);
96	}
97
98
99	/**
100	* Adds the token specified to the current linked list of tokens.
101	*
102	* @param tokenType The token's type.
103	*/
104	private void addToken(int start, int end, int tokenType) {
105	int so = start + offsetShift;
106	addToken(zzBuffer, start,end, tokenType, so);
107	}
108
109
110	/**
111	* Adds the token specified to the current linked list of tokens.
112	*
113	* @param array The character array.
114	* @param start The starting offset in the array.
115	* @param end The ending offset in the array.
116	* @param tokenType The token's type.
117	* @param startOffset The offset in the document at which this token
118	* occurs.
119	*/
120	public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
121	super.addToken(array, start,end, tokenType, startOffset);
122	zzStartRead = zzMarkedPos;
123	}
124
125
126	/**
127	* Returns the text to place at the beginning and end of a
128	* line to "comment" it in a this programming language.
129	*
130	* @return The start and end strings to add to a line to "comment"
131	* it out.
132	*/
133	public String[] getLineCommentStartAndEnd() {
134	return new String[] { "//", null };
135	}
136
137
138	/**
139	* Returns the first token in the linked list of tokens generated
140	* from <code>text</code>. This method must be implemented by
141	* subclasses so they can correctly implement syntax highlighting.
142	*
143	* @param text The text from which to get tokens.
144	* @param initialTokenType The token type we should start with.
145	* @param startOffset The offset into the document at which
146	* <code>text</code> starts.
147	* @return The first <code>Token</code> in a linked list representing
148	* the syntax highlighted text.
149	*/
150	public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
151
152	resetTokenList();
153	this.offsetShift = -text.offset + startOffset;
154
155	// Start off in the proper state.
156	int state = Token.NULL;
157	switch (initialTokenType) {
158	case Token.COMMENT_MULTILINE:
159	state = MLC;
160	start = text.offset;
161	break;
162	default:
163	state = Token.NULL;
164	}
165
166	s = text;
167	try {
168	yyreset(zzReader);
169	yybegin(state);
170	return yylex();
171	} catch (IOException ioe) {
172	ioe.printStackTrace();
173	return new DefaultToken();
174	}
175
176	}
177
178
179	/**
180	* Refills the input buffer.
181	*
182	* @return <code>true</code> if EOF was reached, otherwise
183	* <code>false</code>.
184	* @exception IOException if any I/O-Error occurs.
185	*/
186	private boolean zzRefill() throws java.io.IOException {
187	return zzCurrentPos>=s.offset+s.count;
188	}
189
190
191	/**
192	* Resets the scanner to read from a new input stream.
193	* Does not close the old reader.
194	*
195	* All internal variables are reset, the old input stream
196	* <b>cannot</b> be reused (internal buffer is discarded and lost).
197	* Lexical state is set to <tt>YY_INITIAL</tt>.
198	*
199	* @param reader the new input stream
200	*/
201	public final void yyreset(java.io.Reader reader) throws java.io.IOException {
202	// 's' has been updated.
203	zzBuffer = s.array;
204	/*
205	* We replaced the line below with the two below it because zzRefill
206	* no longer "refills" the buffer (since the way we do it, it's always
207	* "full" the first time through, since it points to the segment's
208	* array). So, we assign zzEndRead here.
209	*/
210	//zzStartRead = zzEndRead = s.offset;
211	zzStartRead = s.offset;
212	zzEndRead = zzStartRead + s.count - 1;
213	zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
214	zzLexicalState = YYINITIAL;
215	zzReader = reader;
216	zzAtBOL = true;
217	zzAtEOF = false;
218	}
219
220
221	%}
222
223	Letter = [A-Za-z]
224	LetterOrUnderscore = ({Letter}\|"_")
225	Digit = [0-9]
226	HexDigit = {Digit}\|[A-Fa-f]
227	OctalDigit = [0-7]
228	Exponent = [eE][+-]?{Digit}+
229
230	PreprocessorWord = define\|elif\|else\|endif\|error\|if\|ifdef\|ifndef\|include\|line\|pragma\|undef
231
232	Trigraph = ("??="\|"??("\|"??)"\|"??/"\|"??'"\|"??<"\|"??>"\|"??!"\|"??-")
233
234	OctEscape1 = ([\\]{OctalDigit})
235	OctEscape2 = ([\\]{OctalDigit}{OctalDigit})
236	OctEscape3 = ([\\][0-3]{OctalDigit}{OctalDigit})
237	OctEscape = ({OctEscape1}\|{OctEscape2}\|{OctEscape3})
238	HexEscape = ([\\][xX]{HexDigit}{HexDigit})
239
240	AnyChrChr = ([^\'\n\\])
241	Escape = ([\\]([abfnrtv\'\"\?\\0]))
242	UnclosedCharLiteral = ([\']({Escape}\|{OctEscape}\|{HexEscape}\|{Trigraph}\|{AnyChrChr}))
243	CharLiteral = ({UnclosedCharLiteral}[\'])
244	ErrorUnclosedCharLiteral = ([\'][^\'\n]*)
245	ErrorCharLiteral = (([\'][\'])\|{ErrorUnclosedCharLiteral}[\'])
246	AnyStrChr = ([^\"\n\\])
247	FalseTrigraph = (("?"(("?")*)[^\=\/\'\<\>\!\-\\\?\"\n])\|("?"[\=\/\'\<\>\!\-]))
248	StringLiteral = ([\"]((((("?"))({Escape}\|{OctEscape}\|{HexEscape}\|{Trigraph}))\|{FalseTrigraph}\|{AnyStrChr}))(("?")*)[\"])
249	UnclosedStringLiteral = ([\"]([\\].\|[^\\\"])*[^\"]?)
250	ErrorStringLiteral = ({UnclosedStringLiteral}[\"])
251
252
253	LineTerminator = \n
254	WhiteSpace = [ \t\f]
255
256	MLCBegin = "/*"
257	MLCEnd = "*/"
258	LineCommentBegin = "//"
259
260	NonFloatSuffix = (([uU][lL]?)\|([lL][uU]?))
261	IntegerLiteral = ({Digit}+{Exponent}?{NonFloatSuffix}?)
262	HexLiteral = ("0"[xX]{HexDigit}+{NonFloatSuffix}?)
263	FloatLiteral = ((({Digit}[\.]{Digit}+)\|({Digit}+[\.]{Digit})){Exponent}?[fFlL]?)
264	ErrorNumberFormat = (({IntegerLiteral}\|{HexLiteral}\|{FloatLiteral}){NonSeparator}+)
265
266	NonSeparator = ([^\t\f\r\n\ \{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\\|\^\%\"\']\|"#")
267	Identifier = ({LetterOrUnderscore}({LetterOrUnderscore}\|{Digit}\|[$])*)
268	ErrorIdentifier = ({NonSeparator}+)
269
270	URLGenDelim = ([:\/\?#\[\]@])
271	URLSubDelim = ([\!\$&'\*\+,;=])
272	URLUnreserved = ({LetterOrUnderscore}\|{Digit}\|[\-\.\~])
273	URLCharacter = ({URLGenDelim}\|{URLSubDelim}\|{URLUnreserved}\|[%])
274	URLCharacters = ({URLCharacter}*)
275	URLEndCharacter = ([\/\$]\|{Letter}\|{Digit})
276	URL = (((https?\|f(tp\|ile))"://"\|"www.")({URLCharacters}{URLEndCharacter})?)
277
278
279	%state MLC
280	%state EOL_COMMENT
281
282	%%
283
284	<YYINITIAL> {
285
286	/* Keywords */
287	"auto" \|
288	"break" \|
289	"case" \|
290	"catch" \|
291	"class" \|
292	"const" \|
293	"const_cast" \|
294	"continue" \|
295	"default" \|
296	"delete" \|
297	"do" \|
298	"dynamic_cast" \|
299	"else" \|
300	"enum" \|
301	"explicit" \|
302	"extern" \|
303	"for" \|
304	"friend" \|
305	"goto" \|
306	"if" \|
307	"inline" \|
308	"mutable" \|
309	"namespace" \|
310	"new" \|
311	"operator" \|
312	"private" \|
313	"protected" \|
314	"public" \|
315	"register" \|
316	"reinterpret_cast" \|
317	"return" \|
318	"sizeof" \|
319	"static" \|
320	"static_cast" \|
321	"struct" \|
322	"switch" \|
323	"template" \|
324	"this" \|
325	"throw" \|
326	"try" \|
327	"typedef" \|
328	"typeid" \|
329	"typename" \|
330	"union" \|
331	"using" \|
332	"virtual" \|
333	"volatile" \|
334	"while" { addToken(Token.RESERVED_WORD); }
335
336	/* Boolean literals. */
337	"true" \|
338	"false" \|
339
340	/* Data types. */
341	"bool" \|
342	"char" \|
343	"double" \|
344	"float" \|
345	"int" \|
346	"long" \|
347	"short" \|
348	"signed" \|
349	"unsigned" \|
350	"void" \|
351	"wchar_t" { addToken(Token.DATA_TYPE); }
352
353	/* Standard functions */
354	"abort" \|
355	"abs" \|
356	"acos" \|
357	"asctime" \|
358	"asin" \|
359	"assert" \|
360	"atan2" \|
361	"atan" \|
362	"atexit" \|
363	"atof" \|
364	"atoi" \|
365	"atol" \|
366	"bsearch" \|
367	"btowc" \|
368	"calloc" \|
369	"ceil" \|
370	"clearerr" \|
371	"clock" \|
372	"cosh" \|
373	"cos" \|
374	"ctime" \|
375	"difftime" \|
376	"div" \|
377	"errno" \|
378	"exit" \|
379	"exp" \|
380	"fabs" \|
381	"fclose" \|
382	"feof" \|
383	"ferror" \|
384	"fflush" \|
385	"fgetc" \|
386	"fgetpos" \|
387	"fgetwc" \|
388	"fgets" \|
389	"fgetws" \|
390	"floor" \|
391	"fmod" \|
392	"fopen" \|
393	"fprintf" \|
394	"fputc" \|
395	"fputs" \|
396	"fputwc" \|
397	"fputws" \|
398	"fread" \|
399	"free" \|
400	"freopen" \|
401	"frexp" \|
402	"fscanf" \|
403	"fseek" \|
404	"fsetpos" \|
405	"ftell" \|
406	"fwprintf" \|
407	"fwrite" \|
408	"fwscanf" \|
409	"getchar" \|
410	"getc" \|
411	"getenv" \|
412	"gets" \|
413	"getwc" \|
414	"getwchar" \|
415	"gmtime" \|
416	"isalnum" \|
417	"isalpha" \|
418	"iscntrl" \|
419	"isdigit" \|
420	"isgraph" \|
421	"islower" \|
422	"isprint" \|
423	"ispunct" \|
424	"isspace" \|
425	"isupper" \|
426	"isxdigit" \|
427	"labs" \|
428	"ldexp" \|
429	"ldiv" \|
430	"localeconv" \|
431	"localtime" \|
432	"log10" \|
433	"log" \|
434	"longjmp" \|
435	"malloc" \|
436	"mblen" \|
437	"mbrlen" \|
438	"mbrtowc" \|
439	"mbsinit" \|
440	"mbsrtowcs" \|
441	"mbstowcs" \|
442	"mbtowc" \|
443	"memchr" \|
444	"memcmp" \|
445	"memcpy" \|
446	"memmove" \|
447	"memset" \|
448	"mktime" \|
449	"modf" \|
450	"offsetof" \|
451	"perror" \|
452	"pow" \|
453	"printf" \|
454	"putchar" \|
455	"putc" \|
456	"puts" \|
457	"putwc" \|
458	"putwchar" \|
459	"qsort" \|
460	"raise" \|
461	"rand" \|
462	"realloc" \|
463	"remove" \|
464	"rename" \|
465	"rewind" \|
466	"scanf" \|
467	"setbuf" \|
468	"setjmp" \|
469	"setlocale" \|
470	"setvbuf" \|
471	"setvbuf" \|
472	"signal" \|
473	"sinh" \|
474	"sin" \|
475	"sprintf" \|
476	"sqrt" \|
477	"srand" \|
478	"sscanf" \|
479	"strcat" \|
480	"strchr" \|
481	"strcmp" \|
482	"strcmp" \|
483	"strcoll" \|
484	"strcpy" \|
485	"strcspn" \|
486	"strerror" \|
487	"strftime" \|
488	"strlen" \|
489	"strncat" \|
490	"strncmp" \|
491	"strncpy" \|
492	"strpbrk" \|
493	"strrchr" \|
494	"strspn" \|
495	"strstr" \|
496	"strtod" \|
497	"strtok" \|
498	"strtol" \|
499	"strtoul" \|
500	"strxfrm" \|
501	"swprintf" \|
502	"swscanf" \|
503	"system" \|
504	"tanh" \|
505	"tan" \|
506	"time" \|
507	"tmpfile" \|
508	"tmpnam" \|
509	"tolower" \|
510	"toupper" \|
511	"ungetc" \|
512	"ungetwc" \|
513	"va_arg" \|
514	"va_end" \|
515	"va_start" \|
516	"vfprintf" \|
517	"vfwprintf" \|
518	"vprintf" \|
519	"vsprintf" \|
520	"vswprintf" \|
521	"vwprintf" \|
522	"wcrtomb" \|
523	"wcscat" \|
524	"wcschr" \|
525	"wcscmp" \|
526	"wcscoll" \|
527	"wcscpy" \|
528	"wcscspn" \|
529	"wcsftime" \|
530	"wcslen" \|
531	"wcsncat" \|
532	"wcsncmp" \|
533	"wcsncpy" \|
534	"wcspbrk" \|
535	"wcsrchr" \|
536	"wcsrtombs" \|
537	"wcsspn" \|
538	"wcsstr" \|
539	"wcstod" \|
540	"wcstok" \|
541	"wcstol" \|
542	"wcstombs" \|
543	"wcstoul" \|
544	"wcsxfrm" \|
545	"wctob" \|
546	"wctomb" \|
547	"wmemchr" \|
548	"wmemcmp" \|
549	"wmemcpy" \|
550	"wmemmove" \|
551	"wmemset" \|
552	"wprintf" \|
553	"wscanf" { addToken(Token.FUNCTION); }
554
555	/* Standard-defined macros. */
556	"__DATE__" \|
557	"__TIME__" \|
558	"__FILE__" \|
559	"__LINE__" \|
560	"__STDC__" { addToken(Token.PREPROCESSOR); }
561
562	{LineTerminator} { addNullToken(); return firstToken; }
563
564	{Identifier} { addToken(Token.IDENTIFIER); }
565
566	{WhiteSpace}+ { addToken(Token.WHITESPACE); }
567
568	/* Preprocessor directives */
569	"#"{WhiteSpace}*{PreprocessorWord} { addToken(Token.PREPROCESSOR); }
570
571	/* String/Character Literals. */
572	{CharLiteral} { addToken(Token.LITERAL_CHAR); }
573	{UnclosedCharLiteral} { addToken(Token.ERROR_CHAR); /addNullToken(); return firstToken;/ }
574	{ErrorUnclosedCharLiteral} { addToken(Token.ERROR_CHAR); addNullToken(); return firstToken; }
575	{ErrorCharLiteral} { addToken(Token.ERROR_CHAR); }
576	{StringLiteral} { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
577	{UnclosedStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); addNullToken(); return firstToken; }
578	{ErrorStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); }
579
580	/* Comment Literals. */
581	{MLCBegin} { start = zzMarkedPos-2; yybegin(MLC); }
582	{LineCommentBegin} { start = zzMarkedPos-2; yybegin(EOL_COMMENT); }
583
584	/* Separators. */
585	"(" \|
586	")" \|
587	"[" \|
588	"]" \|
589	"{" \|
590	"}" { addToken(Token.SEPARATOR); }
591
592	/* Operators. */
593	{Trigraph} \|
594	"=" \|
595	"+" \|
596	"-" \|
597	"*" \|
598	"/" \|
599	"%" \|
600	"~" \|
601	"<" \|
602	">" \|
603	"<<" \|
604	">>" \|
605	"==" \|
606	"+=" \|
607	"-=" \|
608	"*=" \|
609	"/=" \|
610	"%=" \|
611	">>=" \|
612	"<<=" \|
613	"^" \|
614	"&" \|
615	"&&" \|
616	"\|" \|
617	"\|\|" \|
618	"?" \|
619	":" \|
620	"," \|
621	"!" \|
622	"++" \|
623	"--" \|
624	"." \|
625	"," { addToken(Token.OPERATOR); }
626
627	/* Numbers */
628	{IntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
629	{HexLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); }
630	{FloatLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); }
631	{ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
632
633	/* Some lines will end in '\' to wrap an expression. */
634	"\\" { addToken(Token.IDENTIFIER); }
635
636	{ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); }
637
638	/* Other punctuation, we'll highlight it as "identifiers." */
639	";" { addToken(Token.IDENTIFIER); }
640
641	/* Ended with a line not in a string or comment. */
642	<<EOF>> { addNullToken(); return firstToken; }
643
644	/* Catch any other (unhandled) characters and flag them as bad. */
645	. { addToken(Token.ERROR_IDENTIFIER); }
646
647	}
648
649	<MLC> {
650
651	[^hwf\n\*]+ {}
652	{URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
653	[hwf] {}
654
655	\n { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
656	{MLCEnd} { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
657	\* {}
658	<<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
659
660	}
661
662	<EOL_COMMENT> {
663	[^hwf\n]+ {}
664	{URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_EOL); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_EOL); start = zzMarkedPos; }
665	[hwf] {}
666	\n { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addNullToken(); return firstToken; }
667	<<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_EOL); addNullToken(); return firstToken; }
668
669	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format