source: other-projects/rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/SASTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 14.8 KB
Line 
1/*
2 * 02/25/2005
3 *
4 * SASTokenMaker.java - Scanner for SAS files.
5 *
6 * This library is distributed under a modified BSD license. See the included
7 * RSyntaxTextArea.License.txt file for details.
8 */
9package org.fife.ui.rsyntaxtextarea.modes;
10
11import java.io.*;
12import javax.swing.text.Segment;
13
14import org.fife.ui.rsyntaxtextarea.*;
15
16
17/**
18 * This class generates tokens representing a text stream as SAS.<p>
19 *
20 * This implementation was created using
21 * <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
22 * was modified for performance. Memory allocation needs to be almost
23 * completely removed to be competitive with the handwritten lexers (subclasses
24 * of <code>AbstractTokenMaker</code>, so this class has been modified so that
25 * Strings are never allocated (via yytext()), and the scanner never has to
26 * worry about refilling its buffer (needlessly copying chars around).
27 * We can achieve this because RText always scans exactly 1 line of tokens at a
28 * time, and hands the scanner this line as an array of characters (a Segment
29 * really). Since tokens contain pointers to char arrays instead of Strings
30 * holding their contents, there is no need for allocating new memory for
31 * Strings.<p>
32 *
33 * The actual algorithm generated for scanning has, of course, not been
34 * modified.<p>
35 *
36 * If you wish to regenerate this file yourself, keep in mind the following:
37 * <ul>
38 * <li>The generated SASTokenMaker.java</code> file will contain two
39 * definitions of both <code>zzRefill</code> and <code>yyreset</code>.
40 * You should hand-delete the second of each definition (the ones
41 * generated by the lexer), as these generated methods modify the input
42 * buffer, which we'll never have to do.</li>
43 * <li>You should also change the declaration/definition of zzBuffer to NOT
44 * be initialized. This is a needless memory allocation for us since we
45 * will be pointing the array somewhere else anyway.</li>
46 * <li>You should NOT call <code>yylex()</code> on the generated scanner
47 * directly; rather, you should use <code>getTokenList</code> as you would
48 * with any other <code>TokenMaker</code> instance.</li>
49 * </ul>
50 *
51 * @author Robert Futrell
52 * @version 0.5
53 *
54 */
55%%
56
57%public
58%class SASTokenMaker
59%extends AbstractJFlexTokenMaker
60%unicode
61%ignorecase
62%type org.fife.ui.rsyntaxtextarea.Token
63
64
65%{
66
67
68 /**
69 * Constructor. This must be here because JFlex does not generate a
70 * no-parameter constructor.
71 */
72 public SASTokenMaker() {
73 super();
74 }
75
76
77 /**
78 * Adds the token specified to the current linked list of tokens.
79 *
80 * @param tokenType The token's type.
81 */
82 private void addToken(int tokenType) {
83 addToken(zzStartRead, zzMarkedPos-1, tokenType);
84 }
85
86
87 /**
88 * Adds the token specified to the current linked list of tokens.
89 *
90 * @param tokenType The token's type.
91 */
92 private void addToken(int start, int end, int tokenType) {
93 int so = start + offsetShift;
94 addToken(zzBuffer, start,end, tokenType, so);
95 }
96
97
98 /**
99 * Adds the token specified to the current linked list of tokens.
100 *
101 * @param array The character array.
102 * @param start The starting offset in the array.
103 * @param end The ending offset in the array.
104 * @param tokenType The token's type.
105 * @param startOffset The offset in the document at which this token
106 * occurs.
107 */
108 public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
109 super.addToken(array, start,end, tokenType, startOffset);
110 zzStartRead = zzMarkedPos;
111 }
112
113
114 /**
115 * Returns the text to place at the beginning and end of a
116 * line to "comment" it in a this programming language.
117 *
118 * @return The start and end strings to add to a line to "comment"
119 * it out.
120 */
121 public String[] getLineCommentStartAndEnd() {
122 return new String[] { "*", null };
123 }
124
125
126 /**
127 * Returns whether tokens of the specified type should have "mark
128 * occurrences" enabled for the current programming language.
129 *
130 * @param type The token type.
131 * @return Whether tokens of this type should have "mark occurrences"
132 * enabled.
133 */
134 public boolean getMarkOccurrencesOfTokenType(int type) {
135 return type==Token.IDENTIFIER || type==Token.VARIABLE;
136 }
137
138
139 /**
140 * Returns the first token in the linked list of tokens generated
141 * from <code>text</code>. This method must be implemented by
142 * subclasses so they can correctly implement syntax highlighting.
143 *
144 * @param text The text from which to get tokens.
145 * @param initialTokenType The token type we should start with.
146 * @param startOffset The offset into the document at which
147 * <code>text</code> starts.
148 * @return The first <code>Token</code> in a linked list representing
149 * the syntax highlighted text.
150 */
151 public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
152
153 resetTokenList();
154 this.offsetShift = -text.offset + startOffset;
155
156 // Start off in the proper state.
157 int state = Token.NULL;
158 switch (initialTokenType) {
159 case Token.LITERAL_STRING_DOUBLE_QUOTE:
160 state = STRING;
161 start = text.offset;
162 break;
163 case Token.LITERAL_CHAR:
164 state = CHAR;
165 start = text.offset;
166 break;
167 case Token.COMMENT_MULTILINE:
168 state = MLC;
169 start = text.offset;
170 break;
171 default:
172 state = Token.NULL;
173 }
174
175 s = text;
176 try {
177 yyreset(zzReader);
178 yybegin(state);
179 return yylex();
180 } catch (IOException ioe) {
181 ioe.printStackTrace();
182 return new DefaultToken();
183 }
184
185 }
186
187
188 /**
189 * Refills the input buffer.
190 *
191 * @return <code>true</code> if EOF was reached, otherwise
192 * <code>false</code>.
193 * @exception IOException if any I/O-Error occurs.
194 */
195 private boolean zzRefill() throws java.io.IOException {
196 return zzCurrentPos>=s.offset+s.count;
197 }
198
199
200 /**
201 * Resets the scanner to read from a new input stream.
202 * Does not close the old reader.
203 *
204 * All internal variables are reset, the old input stream
205 * <b>cannot</b> be reused (internal buffer is discarded and lost).
206 * Lexical state is set to <tt>YY_INITIAL</tt>.
207 *
208 * @param reader the new input stream
209 */
210 public final void yyreset(java.io.Reader reader) throws java.io.IOException {
211 // 's' has been updated.
212 zzBuffer = s.array;
213 /*
214 * We replaced the line below with the two below it because zzRefill
215 * no longer "refills" the buffer (since the way we do it, it's always
216 * "full" the first time through, since it points to the segment's
217 * array). So, we assign zzEndRead here.
218 */
219 //zzStartRead = zzEndRead = s.offset;
220 zzStartRead = s.offset;
221 zzEndRead = zzStartRead + s.count - 1;
222 zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
223 zzLexicalState = YYINITIAL;
224 zzReader = reader;
225 zzAtBOL = true;
226 zzAtEOF = false;
227 }
228
229
230%}
231
232LineTerminator = ([\n])
233Letter = ([A-Za-z_])
234Digit = ([0-9])
235Whitespace = ([ \t]+)
236Semicolon = ([;])
237
238Identifier = (({Letter}|{Digit})+)
239MacroVariable = (&{Identifier})
240
241Operators1 = ("+"|"-"|"*"|"/"|"^"|"|")
242Operators2 = (([\^\~]?=)|(">"[=]?)|("<"[=]?))
243Operators3 = ("eq"|"ne"|"gt"|"lt"|"ge"|"le"|"in")
244Operator = ({Operators1}|{Operators2}|{Operators3})
245Separator = ([\(\)])
246
247StringBoundary = (\")
248CharBoundary = (\')
249
250LineCommentBegin = ("*")
251MLCBegin = ("/*")
252MLCEnd = ("*/")
253
254%state STRING
255%state CHAR
256%state MLC
257
258%%
259
260<YYINITIAL> {
261
262 /* Keywords */
263 "_all_" |
264 "_character_" |
265 "_data_" |
266 "_infile_" |
267 "_last_" |
268 "_null_" |
269 "_numeric_" |
270 "_page_" |
271 "_temporary_" |
272 "abend" |
273 "abort" |
274 "all" |
275 "alter" |
276 "and" |
277 "array" |
278 "as" |
279 "ascending" |
280 "attrib" |
281 "axis" |
282 "bell" |
283 "blank" |
284 "border" |
285 "bounds" |
286 "by" |
287 "call" |
288 "cancel" |
289 "cards" |
290 "cards4" |
291 "choro" |
292 "class" |
293 "classes" |
294 "clear" |
295 "close" |
296 "compute" |
297 "contrast" |
298 "coord" |
299 "coordinates" |
300 "cov" |
301 "create" |
302 "data" |
303 "datalines" |
304 "datalines4" |
305 "delete" |
306 "descending" |
307 "describe" |
308 "discrete" |
309 "disk" |
310 "display" |
311 "dm" |
312 "do" |
313 "drop" |
314 "dummy" |
315 "else" |
316 "end" |
317 "endrsubmit" |
318 "endsas" |
319 "error" |
320 "except" |
321 "expandtabs" |
322 "factors" |
323 "file" |
324 "filename" |
325 "flowover" |
326 "footnote" |
327 "frame" |
328 "freq" |
329 "from" |
330 "go" |
331 "goption" |
332 "goptions" |
333 "goto" |
334 "grid" |
335 "group" |
336 "groupby" |
337 "groupformat" |
338 "having" |
339 "haxis" |
340 "hbar" |
341 "heading" |
342 "high" |
343 "html" |
344 "id" |
345 "if" |
346 "infile" |
347 "informat" |
348 "inner" |
349 "input" |
350 "insert" |
351 "intersect" |
352 "keep" |
353 "keylabel" |
354 "label" |
355 "lable" |
356 "legend" |
357 "length" |
358 "libname" |
359 "lineqs" |
360 "link" |
361 "list" |
362 "listing" |
363 "log" |
364 "lostcard" |
365 "low" |
366 "mark" |
367 "matings" |
368 "mean" |
369 "merge" |
370 "missing" |
371 "missover" |
372 "mod" |
373 "model" |
374 "modify" |
375 "n" |
376 "nocell" |
377 "nocharacters" |
378 "nodupkey" |
379 "noexpandtabs" |
380 "noframe" |
381 "noheading" |
382 "noinput" |
383 "nolegend" |
384 "nopad" |
385 "noprint" |
386 "nosharebuffers" |
387 "not" |
388 "note" |
389 "notitle" |
390 "notitles" |
391 "notsorted" |
392 "ods" |
393 "old" |
394 "option" |
395 "or" |
396 "order" |
397 "orderby" |
398 "other" |
399 "otherwise" |
400 "outer" |
401 "output" |
402 "over" |
403 "overlay" |
404 "overprint" |
405 "pad" |
406 "pageby" |
407 "pagesize" |
408 "parmcards" |
409 "parmcards4" |
410 "parms" |
411 "pattern" |
412 "pct" |
413 "pctn" |
414 "pctsum" |
415 "picture" |
416 "pie" |
417 "pie3d" |
418 "plotter" |
419 "predict" |
420 "prefix" |
421 "printer" |
422 "proc" |
423 "ps" |
424 "put" |
425 "quit" |
426 "random" |
427 "range" |
428 "remove" |
429 "rename" |
430 "response" |
431 "replace" |
432 "reset" |
433 "retain" |
434 "return" |
435 "rsubmit" |
436 "run" |
437 "s2" |
438 "select" |
439 "set" |
440 "sharebuffers" |
441 "signoff" |
442 "signon" |
443 "sim" |
444 "skip" |
445 "source2" |
446 "startsas" |
447 "std" |
448 "stop" |
449 "stopover" |
450 "strata" |
451 "sum" |
452 "sumby" |
453 "supvar" |
454 "symbol" |
455 "table" |
456 "tables" |
457 "tape" |
458 "terminal" |
459 "test" |
460 "then" |
461 "time" |
462 "title" |
463 "to" |
464 "transform" |
465 "treatments" |
466 "truncover" |
467 "unbuf" |
468 "unbuffered" |
469 "union" |
470 "until" |
471 "update" |
472 "validate" |
473 "value" |
474 "var" |
475 "variables" |
476 "vaxis" |
477 "vbar" |
478 "weight" |
479 "when" |
480 "where" |
481 "while" |
482 "with" |
483 "window" |
484 "x" { addToken(Token.RESERVED_WORD); }
485
486 /* Base SAS procs. */
487 "append" |
488 "calendar" |
489 "catalog" |
490 "chart" |
491 "cimport" |
492 "compare" |
493 "contents" |
494 "copy" |
495 "cpm" |
496 "cport" |
497 "datasets" |
498 "display" |
499 "explode" |
500 "export" |
501 "fontreg" |
502 "format" |
503 "forms" |
504 "fslist" |
505 "import" |
506 "means" |
507 "migrate" |
508 "options" |
509 "optload" |
510 "optsave" |
511 "plot" |
512 "pmenu" |
513 "print" |
514 "printto" |
515 "proto" |
516 "prtdef" |
517 "prtexp" |
518 "pwencode" |
519 "rank" |
520 "registry" |
521 "report" |
522 "sort" |
523 "sql" |
524 "standard" |
525 "summary" |
526 "tabulate" |
527 "template" |
528 "timeplot" |
529 "transpose" { addToken(Token.DATA_TYPE); }
530
531 /* SAS/STAT procs. */
532 "corr" |
533 "freq" |
534 "univariate" { addToken(Token.DATA_TYPE); }
535
536 /* Macros. */
537 "%abort" |
538 "%bquote" |
539 "%by" |
540 "%cms" |
541 "%copy" |
542 "%display" |
543 "%do" |
544 "%else" |
545 "%end" |
546 "%eval" |
547 "%global" |
548 "%go" |
549 "%goto" |
550 "%if" |
551 "%inc" |
552 "%include" |
553 "%index" |
554 "%input" |
555 "%keydef" |
556 "%length" |
557 "%let" |
558 "%local" |
559 "%macro" |
560 "%mend" |
561 "%nrbquote" |
562 "%nrquote" |
563 "%nrstr" |
564 "%put" |
565 "%qscan" |
566 "%qsubstr" |
567 "%qsysfunc" |
568 "%quote" |
569 "%qupcase" |
570 "%scan" |
571 "%str" |
572 "%substr" |
573 "%superq" |
574 "%syscall" |
575 "%sysevalf" |
576 "%sysexec" |
577 "%sysfunc" |
578 "%sysget" |
579 "%sysprod" |
580 "%sysrput" |
581 "%then" |
582 "%to" |
583 "%tso" |
584 "%unquote" |
585 "%until" |
586 "%upcase" |
587 "%while" |
588 "%window" { addToken(Token.FUNCTION); }
589
590}
591
592<YYINITIAL> {
593
594 {LineTerminator} { addNullToken(); return firstToken; }
595
596 /* Comments. */
597 /* Do comments before operators as "*" can signify a line comment as */
598 /* well as an operator. */
599 ^[ \t]*{LineCommentBegin} {
600 // We must do this because of how we
601 // abuse JFlex; since we return an entire
602 // list of tokens at once instead of a
603 // single token at a time, the "^" regex
604 // character doesn't really work, so we must
605 // check that we're at the beginning of a
606 // line ourselves.
607 start = zzStartRead;
608 // Might not be any whitespace.
609 if (yylength()>1) {
610 addToken(zzStartRead,zzMarkedPos-2, Token.WHITESPACE);
611 zzStartRead = zzMarkedPos-1;
612 }
613 // Remember: zzStartRead may now be updated,
614 // so we must check against 'start'.
615 if (start==s.offset) {
616 addToken(zzStartRead,zzEndRead, Token.COMMENT_EOL);
617 addNullToken();
618 return firstToken;
619 }
620 else {
621 addToken(zzStartRead,zzStartRead, Token.OPERATOR);
622 }
623 }
624 {MLCBegin} { start = zzMarkedPos-2; yybegin(MLC); }
625
626 /* Do operators before identifiers since some of them are words. */
627 {Operator} { addToken(Token.OPERATOR); }
628 {Separator} { addToken(Token.SEPARATOR); }
629
630 {Identifier} { addToken(Token.IDENTIFIER); }
631 {MacroVariable} { addToken(Token.VARIABLE); }
632 {Semicolon} { addToken(Token.IDENTIFIER); }
633
634 {Whitespace} { addToken(Token.WHITESPACE); }
635
636 {StringBoundary} { start = zzMarkedPos-1; yybegin(STRING); }
637 {CharBoundary} { start = zzMarkedPos-1; yybegin(CHAR); }
638
639 <<EOF>> { addNullToken(); return firstToken; }
640
641 /* Catch any other (unhandled) characters and flag them as OK; */
642 /* This will include "." from statements like "from lib.dataset". */
643 . { addToken(Token.IDENTIFIER); }
644
645}
646
647<STRING> {
648
649 [^\n\"]+ {}
650 {LineTerminator} { addToken(start,zzStartRead-1, Token.LITERAL_STRING_DOUBLE_QUOTE); return firstToken; }
651/* {StringBoundary}{StringBoundary} {} */
652 {StringBoundary} { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.LITERAL_STRING_DOUBLE_QUOTE); }
653 <<EOF>> { addToken(start,zzStartRead-1, Token.LITERAL_STRING_DOUBLE_QUOTE); return firstToken; }
654
655}
656
657<CHAR> {
658
659 [^\n\']+ {}
660 {LineTerminator} { yybegin(YYINITIAL); addToken(start,zzStartRead-1, Token.LITERAL_CHAR); return firstToken; }
661/* {CharBoundary}{CharBoundary} {} */
662 {CharBoundary} { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.LITERAL_CHAR); }
663 <<EOF>> { addToken(start,zzStartRead-1, Token.LITERAL_CHAR); return firstToken; }
664
665}
666
667<MLC> {
668
669 [^\n\*]+ {}
670 {LineTerminator} { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
671 {MLCEnd} { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); }
672 \* {}
673 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; }
674
675}
Note: See TracBrowser for help on using the repository browser.