source: other-projects/rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/WindowsBatchTokenMaker.java@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 19.1 KB
Line 
1/*
2 * 03/07/2004
3 *
4 * WindowsBatchTokenMaker.java - Scanner for Windows batch files.
5 *
6 * This library is distributed under a modified BSD license. See the included
7 * RSyntaxTextArea.License.txt file for details.
8 */
9package org.fife.ui.rsyntaxtextarea.modes;
10
11import javax.swing.text.Segment;
12
13import org.fife.ui.rsyntaxtextarea.*;
14
15
16/**
17 * A token maker that turns text into a linked list of
18 * <code>Token</code>s for syntax highlighting Microsoft
19 * Windows batch files.
20 *
21 * @author Robert Futrell
22 * @version 0.1
23 */
24public class WindowsBatchTokenMaker extends AbstractTokenMaker {
25
26 protected final String operators = "@:*<>=?";
27
28 private int currentTokenStart;
29 private int currentTokenType;
30
31 private boolean bracketVariable; // Whether a variable is of the format %{...}
32
33
34 /**
35 * Constructor.
36 */
37 public WindowsBatchTokenMaker() {
38 super(); // Initializes tokensToHighlight.
39 }
40
41
42 /**
43 * Checks the token to give it the exact ID it deserves before
44 * being passed up to the super method.
45 *
46 * @param segment <code>Segment</code> to get text from.
47 * @param start Start offset in <code>segment</code> of token.
48 * @param end End offset in <code>segment</code> of token.
49 * @param tokenType The token's type.
50 * @param startOffset The offset in the document at which the token occurs.
51 */
52 public void addToken(Segment segment, int start, int end, int tokenType, int startOffset) {
53
54 switch (tokenType) {
55 // Since reserved words, functions, and data types are all passed
56 // into here as "identifiers," we have to see what the token
57 // really is...
58 case Token.IDENTIFIER:
59 int value = wordsToHighlight.get(segment, start,end);
60 if (value!=-1)
61 tokenType = value;
62 break;
63 }
64
65 super.addToken(segment, start, end, tokenType, startOffset);
66
67 }
68
69
70 /**
71 * Returns the text to place at the beginning and end of a
72 * line to "comment" it in a this programming language.
73 *
74 * @return The start and end strings to add to a line to "comment"
75 * it out.
76 */
77 public String[] getLineCommentStartAndEnd() {
78 return new String[] { "rem ", null };
79 }
80
81
82 /**
83 * Returns whether tokens of the specified type should have "mark
84 * occurrences" enabled for the current programming language.
85 *
86 * @param type The token type.
87 * @return Whether tokens of this type should have "mark occurrences"
88 * enabled.
89 */
90 public boolean getMarkOccurrencesOfTokenType(int type) {
91 return type==Token.IDENTIFIER || type==Token.VARIABLE;
92 }
93
94
95 /**
96 * Returns the words to highlight for Windows batch files.
97 *
98 * @return A <code>TokenMap</code> containing the words to highlight for
99 * Windows batch files.
100 * @see org.fife.ui.rsyntaxtextarea.AbstractTokenMaker#getWordsToHighlight
101 */
102 public TokenMap getWordsToHighlight() {
103
104 TokenMap tokenMap = new TokenMap(true); // Ignore case.
105
106 int reservedWord = Token.RESERVED_WORD;
107 tokenMap.put("call", reservedWord);
108 tokenMap.put("choice", reservedWord);
109 tokenMap.put("cls", reservedWord);
110 tokenMap.put("echo", reservedWord);
111 tokenMap.put("exit", reservedWord);
112 tokenMap.put("goto", reservedWord);
113 tokenMap.put("if", reservedWord);
114 tokenMap.put("pause", reservedWord);
115 tokenMap.put("shift", reservedWord);
116 tokenMap.put("start", reservedWord);
117
118 tokenMap.put("ansi.sys", reservedWord);
119 tokenMap.put("append", reservedWord);
120 tokenMap.put("arp", reservedWord);
121 tokenMap.put("assign", reservedWord);
122 tokenMap.put("assoc", reservedWord);
123 tokenMap.put("at", reservedWord);
124 tokenMap.put("attrib", reservedWord);
125 tokenMap.put("break", reservedWord);
126 tokenMap.put("cacls", reservedWord);
127 tokenMap.put("call", reservedWord);
128 tokenMap.put("cd", reservedWord);
129 tokenMap.put("chcp", reservedWord);
130 tokenMap.put("chdir", reservedWord);
131 tokenMap.put("chkdsk", reservedWord);
132 tokenMap.put("chknfts", reservedWord);
133 tokenMap.put("choice", reservedWord);
134 tokenMap.put("cls", reservedWord);
135 tokenMap.put("cmd", reservedWord);
136 tokenMap.put("color", reservedWord);
137 tokenMap.put("comp", reservedWord);
138 tokenMap.put("compact", reservedWord);
139 tokenMap.put("control", reservedWord);
140 tokenMap.put("convert", reservedWord);
141 tokenMap.put("copy", reservedWord);
142 tokenMap.put("ctty", reservedWord);
143 tokenMap.put("date", reservedWord);
144 tokenMap.put("debug", reservedWord);
145 tokenMap.put("defrag", reservedWord);
146 tokenMap.put("del", reservedWord);
147 tokenMap.put("deltree", reservedWord);
148 tokenMap.put("dir", reservedWord);
149 tokenMap.put("diskcomp", reservedWord);
150 tokenMap.put("diskcopy", reservedWord);
151 tokenMap.put("do", reservedWord);
152 tokenMap.put("doskey", reservedWord);
153 tokenMap.put("dosshell", reservedWord);
154 tokenMap.put("drivparm", reservedWord);
155 tokenMap.put("echo", reservedWord);
156 tokenMap.put("edit", reservedWord);
157 tokenMap.put("edlin", reservedWord);
158 tokenMap.put("emm386", reservedWord);
159 tokenMap.put("erase", reservedWord);
160 tokenMap.put("exist", reservedWord);
161 tokenMap.put("exit", reservedWord);
162 tokenMap.put("expand", reservedWord);
163 tokenMap.put("extract", reservedWord);
164 tokenMap.put("fasthelp", reservedWord);
165 tokenMap.put("fc", reservedWord);
166 tokenMap.put("fdisk", reservedWord);
167 tokenMap.put("find", reservedWord);
168 tokenMap.put("for", reservedWord);
169 tokenMap.put("format", reservedWord);
170 tokenMap.put("ftp", reservedWord);
171 tokenMap.put("graftabl", reservedWord);
172 tokenMap.put("help", reservedWord);
173 tokenMap.put("ifshlp.sys", reservedWord);
174 tokenMap.put("in", reservedWord);
175 tokenMap.put("ipconfig", reservedWord);
176 tokenMap.put("keyb", reservedWord);
177 tokenMap.put("label", reservedWord);
178 tokenMap.put("lh", reservedWord);
179 tokenMap.put("loadfix", reservedWord);
180 tokenMap.put("loadhigh", reservedWord);
181 tokenMap.put("lock", reservedWord);
182 tokenMap.put("md", reservedWord);
183 tokenMap.put("mem", reservedWord);
184 tokenMap.put("mkdir", reservedWord);
185 tokenMap.put("mode", reservedWord);
186 tokenMap.put("more", reservedWord);
187 tokenMap.put("move", reservedWord);
188 tokenMap.put("msav", reservedWord);
189 tokenMap.put("msd", reservedWord);
190 tokenMap.put("mscdex", reservedWord);
191 tokenMap.put("nbtstat", reservedWord);
192 tokenMap.put("net", reservedWord);
193 tokenMap.put("netstat", reservedWord);
194 tokenMap.put("nlsfunc", reservedWord);
195 tokenMap.put("not", reservedWord);
196 tokenMap.put("nslookup", reservedWord);
197 tokenMap.put("path", reservedWord);
198 tokenMap.put("pathping", reservedWord);
199 tokenMap.put("pause", reservedWord);
200 tokenMap.put("ping", reservedWord);
201 tokenMap.put("power", reservedWord);
202 tokenMap.put("print", reservedWord);
203 tokenMap.put("prompt", reservedWord);
204 tokenMap.put("qbasic", reservedWord);
205 tokenMap.put("rd", reservedWord);
206 tokenMap.put("ren", reservedWord);
207 tokenMap.put("rename", reservedWord);
208 tokenMap.put("rmdir", reservedWord);
209 tokenMap.put("route", reservedWord);
210 tokenMap.put("sc", reservedWord);
211 tokenMap.put("scandisk", reservedWord);
212 tokenMap.put("scandreg", reservedWord);
213 tokenMap.put("set", reservedWord);
214 tokenMap.put("setx", reservedWord);
215 tokenMap.put("setver", reservedWord);
216 tokenMap.put("share", reservedWord);
217 tokenMap.put("shutdown", reservedWord);
218 tokenMap.put("smartdrv", reservedWord);
219 tokenMap.put("sort", reservedWord);
220 tokenMap.put("subset", reservedWord);
221 tokenMap.put("switches", reservedWord);
222 tokenMap.put("sys", reservedWord);
223 tokenMap.put("time", reservedWord);
224 tokenMap.put("tracert", reservedWord);
225 tokenMap.put("tree", reservedWord);
226 tokenMap.put("type", reservedWord);
227 tokenMap.put("undelete", reservedWord);
228 tokenMap.put("unformat", reservedWord);
229 tokenMap.put("unlock", reservedWord);
230 tokenMap.put("ver", reservedWord);
231 tokenMap.put("verify", reservedWord);
232 tokenMap.put("vol", reservedWord);
233 tokenMap.put("xcopy", reservedWord);
234
235 return tokenMap;
236
237 }
238
239
240 /**
241 * Returns a list of tokens representing the given text.
242 *
243 * @param text The text to break into tokens.
244 * @param startTokenType The token with which to start tokenizing.
245 * @param startOffset The offset at which the line of tokens begins.
246 * @return A linked list of tokens representing <code>text</code>.
247 */
248 public Token getTokenList(Segment text, int startTokenType, final int startOffset) {
249
250 resetTokenList();
251
252 char[] array = text.array;
253 int offset = text.offset;
254 int count = text.count;
255 int end = offset + count;
256
257 // See, when we find a token, its starting position is always of the form:
258 // 'startOffset + (currentTokenStart-offset)'; but since startOffset and
259 // offset are constant, tokens' starting positions become:
260 // 'newStartOffset+currentTokenStart' for one less subtraction operation.
261 int newStartOffset = startOffset - offset;
262
263 currentTokenStart = offset;
264 currentTokenType = startTokenType;
265
266//beginning:
267 for (int i=offset; i<end; i++) {
268
269 char c = array[i];
270
271 switch (currentTokenType) {
272
273 case Token.NULL:
274
275 currentTokenStart = i; // Starting a new token here.
276
277 switch (c) {
278
279 case ' ':
280 case '\t':
281 currentTokenType = Token.WHITESPACE;
282 break;
283
284 case '"':
285 currentTokenType = Token.ERROR_STRING_DOUBLE;
286 break;
287
288 case '%':
289 currentTokenType = Token.VARIABLE;
290 break;
291
292 // The "separators".
293 case '(':
294 case ')':
295 addToken(text, currentTokenStart,i, Token.SEPARATOR, newStartOffset+currentTokenStart);
296 currentTokenType = Token.NULL;
297 break;
298
299 // The "separators2".
300 case ',':
301 case ';':
302 addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
303 currentTokenType = Token.NULL;
304 break;
305
306 // Newer version of EOL comments, or a label
307 case ':':
308 // If this will be the first token added, it is
309 // a new-style comment or a label
310 if (firstToken==null) {
311 if (i<end-1 && array[i+1]==':') { // new-style comment
312 currentTokenType = Token.COMMENT_EOL;
313 }
314 else { // Label
315 currentTokenType = Token.PREPROCESSOR;
316 }
317 }
318 else { // Just a colon
319 currentTokenType = Token.IDENTIFIER;
320 }
321 break;
322
323 default:
324
325 // Just to speed things up a tad, as this will usually be the case (if spaces above failed).
326 if (RSyntaxUtilities.isLetterOrDigit(c) || c=='\\') {
327 currentTokenType = Token.IDENTIFIER;
328 break;
329 }
330
331 int indexOf = operators.indexOf(c,0);
332 if (indexOf>-1) {
333 addToken(text, currentTokenStart,i, Token.OPERATOR, newStartOffset+currentTokenStart);
334 currentTokenType = Token.NULL;
335 break;
336 }
337 else {
338 currentTokenType = Token.IDENTIFIER;
339 break;
340 }
341
342 } // End of switch (c).
343
344 break;
345
346 case Token.WHITESPACE:
347
348 switch (c) {
349
350 case ' ':
351 case '\t':
352 break; // Still whitespace.
353
354 case '"':
355 addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
356 currentTokenStart = i;
357 currentTokenType = Token.ERROR_STRING_DOUBLE;
358 break;
359
360 case '%':
361 addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
362 currentTokenStart = i;
363 currentTokenType = Token.VARIABLE;
364 break;
365
366 // The "separators".
367 case '(':
368 case ')':
369 addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
370 addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
371 currentTokenType = Token.NULL;
372 break;
373
374 // The "separators2".
375 case ',':
376 case ';':
377 addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
378 addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
379 currentTokenType = Token.NULL;
380 break;
381
382 // Newer version of EOL comments, or a label
383 case ':':
384 addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
385 currentTokenStart = i;
386 // If the previous (whitespace) token was the first token
387 // added, this is a new-style comment or a label
388 if (firstToken.getNextToken()==null) {
389 if (i<end-1 && array[i+1]==':') { // new-style comment
390 currentTokenType = Token.COMMENT_EOL;
391 }
392 else { // Label
393 currentTokenType = Token.PREPROCESSOR;
394 }
395 }
396 else { // Just a colon
397 currentTokenType = Token.IDENTIFIER;
398 }
399 break;
400
401 default: // Add the whitespace token and start anew.
402
403 addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
404 currentTokenStart = i;
405
406 // Just to speed things up a tad, as this will usually be the case (if spaces above failed).
407 if (RSyntaxUtilities.isLetterOrDigit(c) || c=='\\') {
408 currentTokenType = Token.IDENTIFIER;
409 break;
410 }
411
412 int indexOf = operators.indexOf(c,0);
413 if (indexOf>-1) {
414 addToken(text, currentTokenStart,i, Token.OPERATOR, newStartOffset+currentTokenStart);
415 currentTokenType = Token.NULL;
416 break;
417 }
418 else {
419 currentTokenType = Token.IDENTIFIER;
420 }
421
422 } // End of switch (c).
423
424 break;
425
426 default: // Should never happen
427 case Token.IDENTIFIER:
428
429 switch (c) {
430
431 case ' ':
432 case '\t':
433 // Check for REM comments.
434 if (i-currentTokenStart==3 &&
435 (array[i-3]=='r' || array[i-3]=='R') &&
436 (array[i-2]=='e' || array[i-2]=='E') &&
437 (array[i-1]=='m' || array[i-1]=='M')) {
438 currentTokenType = Token.COMMENT_EOL;
439 break;
440 }
441 addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
442 currentTokenStart = i;
443 currentTokenType = Token.WHITESPACE;
444 break;
445
446 case '"':
447 addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
448 currentTokenStart = i;
449 currentTokenType = Token.ERROR_STRING_DOUBLE;
450 break;
451
452 case '%':
453 addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
454 currentTokenStart = i;
455 currentTokenType = Token.VARIABLE;
456 break;
457
458 // Should be part of identifiers, but not at end of "REM".
459 case '\\':
460 // Check for REM comments.
461 if (i-currentTokenStart==3 &&
462 (array[i-3]=='r' || array[i-3]=='R') &&
463 (array[i-2]=='e' || array[i-2]=='E') &&
464 (array[i-1]=='m' || array[i-1]=='M')) {
465 currentTokenType = Token.COMMENT_EOL;
466 }
467 break;
468
469 case '.':
470 case '_':
471 break; // Characters good for identifiers.
472
473 // The "separators".
474 case '(':
475 case ')':
476 addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
477 addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
478 currentTokenType = Token.NULL;
479 break;
480
481 // The "separators2".
482 case ',':
483 case ';':
484 addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
485 addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
486 currentTokenType = Token.NULL;
487 break;
488
489 default:
490
491 // Just to speed things up a tad, as this will usually be the case.
492 if (RSyntaxUtilities.isLetterOrDigit(c) || c=='\\') {
493 break;
494 }
495
496 int indexOf = operators.indexOf(c);
497 if (indexOf>-1) {
498 addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
499 addToken(text, i,i, Token.OPERATOR, newStartOffset+i);
500 currentTokenType = Token.NULL;
501 break;
502 }
503
504 // Otherwise, fall through and assume we're still okay as an IDENTIFIER...
505
506 } // End of switch (c).
507
508 break;
509
510 case Token.COMMENT_EOL:
511 i = end - 1;
512 addToken(text, currentTokenStart,i, Token.COMMENT_EOL, newStartOffset+currentTokenStart);
513 // We need to set token type to null so at the bottom we don't add one more token.
514 currentTokenType = Token.NULL;
515 break;
516
517 case Token.PREPROCESSOR: // Used for labels
518 i = end - 1;
519 addToken(text, currentTokenStart,i, Token.PREPROCESSOR, newStartOffset+currentTokenStart);
520 // We need to set token type to null so at the bottom we don't add one more token.
521 currentTokenType = Token.NULL;
522 break;
523
524 case Token.ERROR_STRING_DOUBLE:
525
526 if (c=='"') {
527 addToken(text, currentTokenStart,i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+currentTokenStart);
528 currentTokenStart = i + 1;
529 currentTokenType = Token.NULL;
530 }
531 // Otherwise, we're still an unclosed string...
532
533 break;
534
535 case Token.VARIABLE:
536
537 if (i==currentTokenStart+1) { // first character after '%'.
538 bracketVariable = false;
539 switch (c) {
540 case '{':
541 bracketVariable = true;
542 break;
543 default:
544 if (RSyntaxUtilities.isLetter(c) || c==' ') { // No tab, just space; spaces are okay in variable names.
545 break;
546 }
547 else if (RSyntaxUtilities.isDigit(c)) { // Single-digit command-line argument ("%1").
548 addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart);
549 currentTokenType = Token.NULL;
550 break;
551 }
552 else { // Anything else, ???.
553 addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart); // ???
554 i--;
555 currentTokenType = Token.NULL;
556 break;
557 }
558 } // End of switch (c).
559 }
560 else { // Character other than first after the '%'.
561 if (bracketVariable==true) {
562 if (c=='}') {
563 addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart);
564 currentTokenType = Token.NULL;
565 }
566 }
567 else {
568 if (c=='%') {
569 addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart);
570 currentTokenType = Token.NULL;
571 }
572 }
573 break;
574 }
575 break;
576
577 } // End of switch (currentTokenType).
578
579 } // End of for (int i=offset; i<end; i++).
580
581 // Deal with the (possibly there) last token.
582 if (currentTokenType != Token.NULL) {
583
584 // Check for REM comments.
585 if (end-currentTokenStart==3 &&
586 (array[end-3]=='r' || array[end-3]=='R') &&
587 (array[end-2]=='e' || array[end-2]=='E') &&
588 (array[end-1]=='m' || array[end-1]=='M')) {
589 currentTokenType = Token.COMMENT_EOL;
590 }
591
592 addToken(text, currentTokenStart,end-1, currentTokenType, newStartOffset+currentTokenStart);
593 }
594
595 addNullToken();
596
597 // Return the first token in our linked list.
598 return firstToken;
599
600 }
601
602
603}
Note: See TracBrowser for help on using the repository browser.