Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

SASTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago
Initial cut an a text edit area for GLI that supports color syntax highlighting
File size: 14.8 KB

Line
1	/*
2	* 02/25/2005
3	*
4	* SASTokenMaker.java - Scanner for SAS files.
5	*
6	* This library is distributed under a modified BSD license. See the included
7	* RSyntaxTextArea.License.txt file for details.
8	*/
9	package org.fife.ui.rsyntaxtextarea.modes;
10
11	import java.io.*;
12	import javax.swing.text.Segment;
13
14	import org.fife.ui.rsyntaxtextarea.*;
15
16
17	/**
18	* This class generates tokens representing a text stream as SAS.<p>
19	*
20	* This implementation was created using
21	* <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
22	* was modified for performance. Memory allocation needs to be almost
23	* completely removed to be competitive with the handwritten lexers (subclasses
24	* of <code>AbstractTokenMaker</code>, so this class has been modified so that
25	* Strings are never allocated (via yytext()), and the scanner never has to
26	* worry about refilling its buffer (needlessly copying chars around).
27	* We can achieve this because RText always scans exactly 1 line of tokens at a
28	* time, and hands the scanner this line as an array of characters (a Segment
29	* really). Since tokens contain pointers to char arrays instead of Strings
30	* holding their contents, there is no need for allocating new memory for
31	* Strings.<p>
32	*
33	* The actual algorithm generated for scanning has, of course, not been
34	* modified.<p>
35	*
36	* If you wish to regenerate this file yourself, keep in mind the following:
37	* <ul>
38	* <li>The generated SASTokenMaker.java</code> file will contain two
39	* definitions of both <code>zzRefill</code> and <code>yyreset</code>.
40	* You should hand-delete the second of each definition (the ones
41	* generated by the lexer), as these generated methods modify the input
42	* buffer, which we'll never have to do.</li>
43	* <li>You should also change the declaration/definition of zzBuffer to NOT
44	* be initialized. This is a needless memory allocation for us since we
45	* will be pointing the array somewhere else anyway.</li>
46	* <li>You should NOT call <code>yylex()</code> on the generated scanner
47	* directly; rather, you should use <code>getTokenList</code> as you would
48	* with any other <code>TokenMaker</code> instance.</li>
49	* </ul>
50	*
51	* @author Robert Futrell
52	* @version 0.5
53	*
54	*/
55	%%
56
57	%public
58	%class SASTokenMaker
59	%extends AbstractJFlexTokenMaker
60	%unicode
61	%ignorecase
62	%type org.fife.ui.rsyntaxtextarea.Token
63
64
65	%{
66
67
68	/**
69	* Constructor. This must be here because JFlex does not generate a
70	* no-parameter constructor.
71	*/
72	public SASTokenMaker() {
73	super();
74	}
75
76
77	/**
78	* Adds the token specified to the current linked list of tokens.
79	*
80	* @param tokenType The token's type.
81	*/
82	private void addToken(int tokenType) {
83	addToken(zzStartRead, zzMarkedPos-1, tokenType);
84	}
85
86
87	/**
88	* Adds the token specified to the current linked list of tokens.
89	*
90	* @param tokenType The token's type.
91	*/
92	private void addToken(int start, int end, int tokenType) {
93	int so = start + offsetShift;
94	addToken(zzBuffer, start,end, tokenType, so);
95	}
96
97
98	/**
99	* Adds the token specified to the current linked list of tokens.
100	*
101	* @param array The character array.
102	* @param start The starting offset in the array.
103	* @param end The ending offset in the array.
104	* @param tokenType The token's type.
105	* @param startOffset The offset in the document at which this token
106	* occurs.
107	*/
108	public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
109	super.addToken(array, start,end, tokenType, startOffset);
110	zzStartRead = zzMarkedPos;
111	}
112
113
114	/**
115	* Returns the text to place at the beginning and end of a
116	* line to "comment" it in a this programming language.
117	*
118	* @return The start and end strings to add to a line to "comment"
119	* it out.
120	*/
121	public String[] getLineCommentStartAndEnd() {
122	return new String[] { "*", null };
123	}
124
125
126	/**
127	* Returns whether tokens of the specified type should have "mark
128	* occurrences" enabled for the current programming language.
129	*
130	* @param type The token type.
131	* @return Whether tokens of this type should have "mark occurrences"
132	* enabled.
133	*/
134	public boolean getMarkOccurrencesOfTokenType(int type) {
135	return type==Token.IDENTIFIER \|\| type==Token.VARIABLE;
136	}
137
138
139	/**
140	* Returns the first token in the linked list of tokens generated
141	* from <code>text</code>. This method must be implemented by
142	* subclasses so they can correctly implement syntax highlighting.
143	*
144	* @param text The text from which to get tokens.
145	* @param initialTokenType The token type we should start with.
146	* @param startOffset The offset into the document at which
147	* <code>text</code> starts.
148	* @return The first <code>Token</code> in a linked list representing
149	* the syntax highlighted text.
150	*/
151	public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
152
153	resetTokenList();
154	this.offsetShift = -text.offset + startOffset;
155
156	// Start off in the proper state.
157	int state = Token.NULL;
158	switch (initialTokenType) {
159	case Token.LITERAL_STRING_DOUBLE_QUOTE:
160	state = STRING;
161	start = text.offset;
162	break;
163	case Token.LITERAL_CHAR:
164	state = CHAR;
165	start = text.offset;
166	break;
167	case Token.COMMENT_MULTILINE:
168	state = MLC;
169	start = text.offset;
170	break;
171	default:
172	state = Token.NULL;
173	}
174
175	s = text;
176	try {
177	yyreset(zzReader);
178	yybegin(state);
179	return yylex();
180	} catch (IOException ioe) {
181	ioe.printStackTrace();
182	return new DefaultToken();
183	}
184
185	}
186
187
188	/**
189	* Refills the input buffer.
190	*
191	* @return <code>true</code> if EOF was reached, otherwise
192	* <code>false</code>.
193	* @exception IOException if any I/O-Error occurs.
194	*/
195	private boolean zzRefill() throws java.io.IOException {
196	return zzCurrentPos>=s.offset+s.count;
197	}
198
199
200	/**
201	* Resets the scanner to read from a new input stream.
202	* Does not close the old reader.
203	*
204	* All internal variables are reset, the old input stream
205	* <b>cannot</b> be reused (internal buffer is discarded and lost).
206	* Lexical state is set to <tt>YY_INITIAL</tt>.
207	*
208	* @param reader the new input stream
209	*/
210	public final void yyreset(java.io.Reader reader) throws java.io.IOException {
211	// 's' has been updated.
212	zzBuffer = s.array;
213	/*
214	* We replaced the line below with the two below it because zzRefill
215	* no longer "refills" the buffer (since the way we do it, it's always
216	* "full" the first time through, since it points to the segment's
217	* array). So, we assign zzEndRead here.
218	*/
219	//zzStartRead = zzEndRead = s.offset;
220	zzStartRead = s.offset;
221	zzEndRead = zzStartRead + s.count - 1;
222	zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
223	zzLexicalState = YYINITIAL;
224	zzReader = reader;
225	zzAtBOL = true;
226	zzAtEOF = false;
227	}
228
229
230	%}
231
232	LineTerminator = ([\n])
233	Letter = ([A-Za-z_])
234	Digit = ([0-9])
235	Whitespace = ([ \t]+)
236	Semicolon = ([;])
237
238	Identifier = (({Letter}\|{Digit})+)
239	MacroVariable = (&{Identifier})
240
241	Operators1 = ("+"\|"-"\|"*"\|"/"\|"^"\|"\|")
242	Operators2 = (([\^\~]?=)\|(">"[=]?)\|("<"[=]?))
243	Operators3 = ("eq"\|"ne"\|"gt"\|"lt"\|"ge"\|"le"\|"in")
244	Operator = ({Operators1}\|{Operators2}\|{Operators3})
245	Separator = ([\(\)])
246
247	StringBoundary = (\")
248	CharBoundary = (\')
249
250	LineCommentBegin = ("*")
251	MLCBegin = ("/*")
252	MLCEnd = ("*/")
253
254	%state STRING
255	%state CHAR
256	%state MLC
257
258	%%
259
260	<YYINITIAL> {
261
262	/* Keywords */
263	"_all_" \|
264	"_character_" \|
265	"_data_" \|
266	"_infile_" \|
267	"_last_" \|
268	"_null_" \|
269	"_numeric_" \|
270	"_page_" \|
271	"_temporary_" \|
272	"abend" \|
273	"abort" \|
274	"all" \|
275	"alter" \|
276	"and" \|
277	"array" \|
278	"as" \|
279	"ascending" \|
280	"attrib" \|
281	"axis" \|
282	"bell" \|
283	"blank" \|
284	"border" \|
285	"bounds" \|
286	"by" \|
287	"call" \|
288	"cancel" \|
289	"cards" \|
290	"cards4" \|
291	"choro" \|
292	"class" \|
293	"classes" \|
294	"clear" \|
295	"close" \|
296	"compute" \|
297	"contrast" \|
298	"coord" \|
299	"coordinates" \|
300	"cov" \|
301	"create" \|
302	"data" \|
303	"datalines" \|
304	"datalines4" \|
305	"delete" \|
306	"descending" \|
307	"describe" \|
308	"discrete" \|
309	"disk" \|
310	"display" \|
311	"dm" \|
312	"do" \|
313	"drop" \|
314	"dummy" \|
315	"else" \|
316	"end" \|
317	"endrsubmit" \|
318	"endsas" \|
319	"error" \|
320	"except" \|
321	"expandtabs" \|
322	"factors" \|
323	"file" \|
324	"filename" \|
325	"flowover" \|
326	"footnote" \|
327	"frame" \|
328	"freq" \|
329	"from" \|
330	"go" \|
331	"goption" \|
332	"goptions" \|
333	"goto" \|
334	"grid" \|
335	"group" \|
336	"groupby" \|
337	"groupformat" \|
338	"having" \|
339	"haxis" \|
340	"hbar" \|
341	"heading" \|
342	"high" \|
343	"html" \|
344	"id" \|
345	"if" \|
346	"infile" \|
347	"informat" \|
348	"inner" \|
349	"input" \|
350	"insert" \|
351	"intersect" \|
352	"keep" \|
353	"keylabel" \|
354	"label" \|
355	"lable" \|
356	"legend" \|
357	"length" \|
358	"libname" \|
359	"lineqs" \|
360	"link" \|
361	"list" \|
362	"listing" \|
363	"log" \|
364	"lostcard" \|
365	"low" \|
366	"mark" \|
367	"matings" \|
368	"mean" \|
369	"merge" \|
370	"missing" \|
371	"missover" \|
372	"mod" \|
373	"model" \|
374	"modify" \|
375	"n" \|
376	"nocell" \|
377	"nocharacters" \|
378	"nodupkey" \|
379	"noexpandtabs" \|
380	"noframe" \|
381	"noheading" \|
382	"noinput" \|
383	"nolegend" \|
384	"nopad" \|
385	"noprint" \|
386	"nosharebuffers" \|
387	"not" \|
388	"note" \|
389	"notitle" \|
390	"notitles" \|
391	"notsorted" \|
392	"ods" \|
393	"old" \|
394	"option" \|
395	"or" \|
396	"order" \|
397	"orderby" \|
398	"other" \|
399	"otherwise" \|
400	"outer" \|
401	"output" \|
402	"over" \|
403	"overlay" \|
404	"overprint" \|
405	"pad" \|
406	"pageby" \|
407	"pagesize" \|
408	"parmcards" \|
409	"parmcards4" \|
410	"parms" \|
411	"pattern" \|
412	"pct" \|
413	"pctn" \|
414	"pctsum" \|
415	"picture" \|
416	"pie" \|
417	"pie3d" \|
418	"plotter" \|
419	"predict" \|
420	"prefix" \|
421	"printer" \|
422	"proc" \|
423	"ps" \|
424	"put" \|
425	"quit" \|
426	"random" \|
427	"range" \|
428	"remove" \|
429	"rename" \|
430	"response" \|
431	"replace" \|
432	"reset" \|
433	"retain" \|
434	"return" \|
435	"rsubmit" \|
436	"run" \|
437	"s2" \|
438	"select" \|
439	"set" \|
440	"sharebuffers" \|
441	"signoff" \|
442	"signon" \|
443	"sim" \|
444	"skip" \|
445	"source2" \|
446	"startsas" \|
447	"std" \|
448	"stop" \|
449	"stopover" \|
450	"strata" \|
451	"sum" \|
452	"sumby" \|
453	"supvar" \|
454	"symbol" \|
455	"table" \|
456	"tables" \|
457	"tape" \|
458	"terminal" \|
459	"test" \|
460	"then" \|
461	"time" \|
462	"title" \|
463	"to" \|
464	"transform" \|
465	"treatments" \|
466	"truncover" \|
467	"unbuf" \|
468	"unbuffered" \|
469	"union" \|
470	"until" \|
471	"update" \|
472	"validate" \|
473	"value" \|
474	"var" \|
475	"variables" \|
476	"vaxis" \|
477	"vbar" \|
478	"weight" \|
479	"when" \|
480	"where" \|
481	"while" \|
482	"with" \|
483	"window" \|
484	"x" { addToken(Token.RESERVED_WORD); }
485
486	/* Base SAS procs. */
487	"append" \|
488	"calendar" \|
489	"catalog" \|
490	"chart" \|
491	"cimport" \|
492	"compare" \|
493	"contents" \|
494	"copy" \|
495	"cpm" \|
496	"cport" \|
497	"datasets" \|
498	"display" \|
499	"explode" \|
500	"export" \|
501	"fontreg" \|
502	"format" \|
503	"forms" \|
504	"fslist" \|
505	"import" \|
506	"means" \|
507	"migrate" \|
508	"options" \|
509	"optload" \|
510	"optsave" \|
511	"plot" \|
512	"pmenu" \|
513	"print" \|
514	"printto" \|
515	"proto" \|
516	"prtdef" \|
517	"prtexp" \|
518	"pwencode" \|
519	"rank" \|
520	"registry" \|
521	"report" \|
522	"sort" \|
523	"sql" \|
524	"standard" \|
525	"summary" \|
526	"tabulate" \|
527	"template" \|
528	"timeplot" \|
529	"transpose" { addToken(Token.DATA_TYPE); }
530
531	/* SAS/STAT procs. */
532	"corr" \|
533	"freq" \|
534	"univariate" { addToken(Token.DATA_TYPE); }
535
536	/* Macros. */
537	"%abort" \|
538	"%bquote" \|
539	"%by" \|
540	"%cms" \|
541	"%copy" \|
542	"%display" \|
543	"%do" \|
544	"%else" \|
545	"%end" \|
546	"%eval" \|
547	"%global" \|
548	"%go" \|
549	"%goto" \|
550	"%if" \|
551	"%inc" \|
552	"%include" \|
553	"%index" \|
554	"%input" \|
555	"%keydef" \|
556	"%length" \|
557	"%let" \|
558	"%local" \|
559	"%macro" \|
560	"%mend" \|
561	"%nrbquote" \|
562	"%nrquote" \|
563	"%nrstr" \|
564	"%put" \|
565	"%qscan" \|
566	"%qsubstr" \|
567	"%qsysfunc" \|
568	"%quote" \|
569	"%qupcase" \|
570	"%scan" \|
571	"%str" \|
572	"%substr" \|
573	"%superq" \|
574	"%syscall" \|
575	"%sysevalf" \|
576	"%sysexec" \|
577	"%sysfunc" \|
578	"%sysget" \|
579	"%sysprod" \|
580	"%sysrput" \|
581	"%then" \|
582	"%to" \|
583	"%tso" \|
584	"%unquote" \|
585	"%until" \|
586	"%upcase" \|
587	"%while" \|
588	"%window" { addToken(Token.FUNCTION); }
589
590	}
591
592	<YYINITIAL> {
593
594	{LineTerminator} { addNullToken(); return firstToken; }
595
596	/* Comments. */
597	/* Do comments before operators as "" can signify a line comment as /
598	/* well as an operator. */
599	^[ \t]*{LineCommentBegin} {
600	// We must do this because of how we
601	// abuse JFlex; since we return an entire
602	// list of tokens at once instead of a
603	// single token at a time, the "^" regex
604	// character doesn't really work, so we must
605	// check that we're at the beginning of a
606	// line ourselves.
607	start = zzStartRead;
608	// Might not be any whitespace.
609	if (yylength()>1) {
610	addToken(zzStartRead,zzMarkedPos-2, Token.WHITESPACE);
611	zzStartRead = zzMarkedPos-1;
612	}
613	// Remember: zzStartRead may now be updated,
614	// so we must check against 'start'.
615	if (start==s.offset) {
616	addToken(zzStartRead,zzEndRead, Token.COMMENT_EOL);
617	addNullToken();
618	return firstToken;
619	}
620	else {
621	addToken(zzStartRead,zzStartRead, Token.OPERATOR);
622	}
623	}
624	{MLCBegin} { start = zzMarkedPos-2; yybegin(MLC); }
625
626	/* Do operators before identifiers since some of them are words. */
627	{Operator} { addToken(Token.OPERATOR); }
628	{Separator} { addToken(Token.SEPARATOR); }
629
630	{Identifier} { addToken(Token.IDENTIFIER); }
631	{MacroVariable} { addToken(Token.VARIABLE); }
632	{Semicolon} { addToken(Token.IDENTIFIER); }
633
634	{Whitespace} { addToken(Token.WHITESPACE); }
635
636	{StringBoundary} { start = zzMarkedPos-1; yybegin(STRING); }
637	{CharBoundary} { start = zzMarkedPos-1; yybegin(CHAR); }
638
639	<<EOF>> { addNullToken(); return firstToken; }
640
641	/* Catch any other (unhandled) characters and flag them as OK; */
642	/* This will include "." from statements like "from lib.dataset". */
643	. { addToken(Token.IDENTIFIER); }
644
645	}
646
647	<STRING> {
648
649	[^\n\"]+ {}
650	{LineTerminator} { addToken(start,zzStartRead-1, Token.LITERAL_STRING_DOUBLE_QUOTE); return firstToken; }
651	/* {StringBoundary}{StringBoundary} {} */
652	{StringBoundary} { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.LITERAL_STRING_DOUBLE_QUOTE); }
653	<<EOF>> { addToken(start,zzStartRead-1, Token.LITERAL_STRING_DOUBLE_QUOTE); return firstToken; }
654
655	}
656
657	<CHAR> {
658
659	[^\n\']+ {}
660	{LineTerminator} { yybegin(YYINITIAL); addToken(start,zzStartRead-1, Token.LITERAL_CHAR); return firstToken; }
661	/* {CharBoundary}{CharBoundary} {} */
662	{CharBoundary} { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.LITERAL_CHAR); }
663	<<EOF>> { addToken(start,zzStartRead-1, Token.LITERAL_CHAR); return firstToken; }
664
665	}
666
667	<MLC> {
668
669	[^\n\*]+ {}
670	{LineTerminator} { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
671	{MLCEnd} { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
672	\* {}
673	<<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
674
675	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format