Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

WindowsBatchTokenMaker.java@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago
Initial cut an a text edit area for GLI that supports color syntax highlighting
File size: 19.1 KB

Line
1	/*
2	* 03/07/2004
3	*
4	* WindowsBatchTokenMaker.java - Scanner for Windows batch files.
5	*
6	* This library is distributed under a modified BSD license. See the included
7	* RSyntaxTextArea.License.txt file for details.
8	*/
9	package org.fife.ui.rsyntaxtextarea.modes;
10
11	import javax.swing.text.Segment;
12
13	import org.fife.ui.rsyntaxtextarea.*;
14
15
16	/**
17	* A token maker that turns text into a linked list of
18	* <code>Token</code>s for syntax highlighting Microsoft
19	* Windows batch files.
20	*
21	* @author Robert Futrell
22	* @version 0.1
23	*/
24	public class WindowsBatchTokenMaker extends AbstractTokenMaker {
25
26	protected final String operators = "@:*<>=?";
27
28	private int currentTokenStart;
29	private int currentTokenType;
30
31	private boolean bracketVariable; // Whether a variable is of the format %{...}
32
33
34	/**
35	* Constructor.
36	*/
37	public WindowsBatchTokenMaker() {
38	super(); // Initializes tokensToHighlight.
39	}
40
41
42	/**
43	* Checks the token to give it the exact ID it deserves before
44	* being passed up to the super method.
45	*
46	* @param segment <code>Segment</code> to get text from.
47	* @param start Start offset in <code>segment</code> of token.
48	* @param end End offset in <code>segment</code> of token.
49	* @param tokenType The token's type.
50	* @param startOffset The offset in the document at which the token occurs.
51	*/
52	public void addToken(Segment segment, int start, int end, int tokenType, int startOffset) {
53
54	switch (tokenType) {
55	// Since reserved words, functions, and data types are all passed
56	// into here as "identifiers," we have to see what the token
57	// really is...
58	case Token.IDENTIFIER:
59	int value = wordsToHighlight.get(segment, start,end);
60	if (value!=-1)
61	tokenType = value;
62	break;
63	}
64
65	super.addToken(segment, start, end, tokenType, startOffset);
66
67	}
68
69
70	/**
71	* Returns the text to place at the beginning and end of a
72	* line to "comment" it in a this programming language.
73	*
74	* @return The start and end strings to add to a line to "comment"
75	* it out.
76	*/
77	public String[] getLineCommentStartAndEnd() {
78	return new String[] { "rem ", null };
79	}
80
81
82	/**
83	* Returns whether tokens of the specified type should have "mark
84	* occurrences" enabled for the current programming language.
85	*
86	* @param type The token type.
87	* @return Whether tokens of this type should have "mark occurrences"
88	* enabled.
89	*/
90	public boolean getMarkOccurrencesOfTokenType(int type) {
91	return type==Token.IDENTIFIER \|\| type==Token.VARIABLE;
92	}
93
94
95	/**
96	* Returns the words to highlight for Windows batch files.
97	*
98	* @return A <code>TokenMap</code> containing the words to highlight for
99	* Windows batch files.
100	* @see org.fife.ui.rsyntaxtextarea.AbstractTokenMaker#getWordsToHighlight
101	*/
102	public TokenMap getWordsToHighlight() {
103
104	TokenMap tokenMap = new TokenMap(true); // Ignore case.
105
106	int reservedWord = Token.RESERVED_WORD;
107	tokenMap.put("call", reservedWord);
108	tokenMap.put("choice", reservedWord);
109	tokenMap.put("cls", reservedWord);
110	tokenMap.put("echo", reservedWord);
111	tokenMap.put("exit", reservedWord);
112	tokenMap.put("goto", reservedWord);
113	tokenMap.put("if", reservedWord);
114	tokenMap.put("pause", reservedWord);
115	tokenMap.put("shift", reservedWord);
116	tokenMap.put("start", reservedWord);
117
118	tokenMap.put("ansi.sys", reservedWord);
119	tokenMap.put("append", reservedWord);
120	tokenMap.put("arp", reservedWord);
121	tokenMap.put("assign", reservedWord);
122	tokenMap.put("assoc", reservedWord);
123	tokenMap.put("at", reservedWord);
124	tokenMap.put("attrib", reservedWord);
125	tokenMap.put("break", reservedWord);
126	tokenMap.put("cacls", reservedWord);
127	tokenMap.put("call", reservedWord);
128	tokenMap.put("cd", reservedWord);
129	tokenMap.put("chcp", reservedWord);
130	tokenMap.put("chdir", reservedWord);
131	tokenMap.put("chkdsk", reservedWord);
132	tokenMap.put("chknfts", reservedWord);
133	tokenMap.put("choice", reservedWord);
134	tokenMap.put("cls", reservedWord);
135	tokenMap.put("cmd", reservedWord);
136	tokenMap.put("color", reservedWord);
137	tokenMap.put("comp", reservedWord);
138	tokenMap.put("compact", reservedWord);
139	tokenMap.put("control", reservedWord);
140	tokenMap.put("convert", reservedWord);
141	tokenMap.put("copy", reservedWord);
142	tokenMap.put("ctty", reservedWord);
143	tokenMap.put("date", reservedWord);
144	tokenMap.put("debug", reservedWord);
145	tokenMap.put("defrag", reservedWord);
146	tokenMap.put("del", reservedWord);
147	tokenMap.put("deltree", reservedWord);
148	tokenMap.put("dir", reservedWord);
149	tokenMap.put("diskcomp", reservedWord);
150	tokenMap.put("diskcopy", reservedWord);
151	tokenMap.put("do", reservedWord);
152	tokenMap.put("doskey", reservedWord);
153	tokenMap.put("dosshell", reservedWord);
154	tokenMap.put("drivparm", reservedWord);
155	tokenMap.put("echo", reservedWord);
156	tokenMap.put("edit", reservedWord);
157	tokenMap.put("edlin", reservedWord);
158	tokenMap.put("emm386", reservedWord);
159	tokenMap.put("erase", reservedWord);
160	tokenMap.put("exist", reservedWord);
161	tokenMap.put("exit", reservedWord);
162	tokenMap.put("expand", reservedWord);
163	tokenMap.put("extract", reservedWord);
164	tokenMap.put("fasthelp", reservedWord);
165	tokenMap.put("fc", reservedWord);
166	tokenMap.put("fdisk", reservedWord);
167	tokenMap.put("find", reservedWord);
168	tokenMap.put("for", reservedWord);
169	tokenMap.put("format", reservedWord);
170	tokenMap.put("ftp", reservedWord);
171	tokenMap.put("graftabl", reservedWord);
172	tokenMap.put("help", reservedWord);
173	tokenMap.put("ifshlp.sys", reservedWord);
174	tokenMap.put("in", reservedWord);
175	tokenMap.put("ipconfig", reservedWord);
176	tokenMap.put("keyb", reservedWord);
177	tokenMap.put("label", reservedWord);
178	tokenMap.put("lh", reservedWord);
179	tokenMap.put("loadfix", reservedWord);
180	tokenMap.put("loadhigh", reservedWord);
181	tokenMap.put("lock", reservedWord);
182	tokenMap.put("md", reservedWord);
183	tokenMap.put("mem", reservedWord);
184	tokenMap.put("mkdir", reservedWord);
185	tokenMap.put("mode", reservedWord);
186	tokenMap.put("more", reservedWord);
187	tokenMap.put("move", reservedWord);
188	tokenMap.put("msav", reservedWord);
189	tokenMap.put("msd", reservedWord);
190	tokenMap.put("mscdex", reservedWord);
191	tokenMap.put("nbtstat", reservedWord);
192	tokenMap.put("net", reservedWord);
193	tokenMap.put("netstat", reservedWord);
194	tokenMap.put("nlsfunc", reservedWord);
195	tokenMap.put("not", reservedWord);
196	tokenMap.put("nslookup", reservedWord);
197	tokenMap.put("path", reservedWord);
198	tokenMap.put("pathping", reservedWord);
199	tokenMap.put("pause", reservedWord);
200	tokenMap.put("ping", reservedWord);
201	tokenMap.put("power", reservedWord);
202	tokenMap.put("print", reservedWord);
203	tokenMap.put("prompt", reservedWord);
204	tokenMap.put("qbasic", reservedWord);
205	tokenMap.put("rd", reservedWord);
206	tokenMap.put("ren", reservedWord);
207	tokenMap.put("rename", reservedWord);
208	tokenMap.put("rmdir", reservedWord);
209	tokenMap.put("route", reservedWord);
210	tokenMap.put("sc", reservedWord);
211	tokenMap.put("scandisk", reservedWord);
212	tokenMap.put("scandreg", reservedWord);
213	tokenMap.put("set", reservedWord);
214	tokenMap.put("setx", reservedWord);
215	tokenMap.put("setver", reservedWord);
216	tokenMap.put("share", reservedWord);
217	tokenMap.put("shutdown", reservedWord);
218	tokenMap.put("smartdrv", reservedWord);
219	tokenMap.put("sort", reservedWord);
220	tokenMap.put("subset", reservedWord);
221	tokenMap.put("switches", reservedWord);
222	tokenMap.put("sys", reservedWord);
223	tokenMap.put("time", reservedWord);
224	tokenMap.put("tracert", reservedWord);
225	tokenMap.put("tree", reservedWord);
226	tokenMap.put("type", reservedWord);
227	tokenMap.put("undelete", reservedWord);
228	tokenMap.put("unformat", reservedWord);
229	tokenMap.put("unlock", reservedWord);
230	tokenMap.put("ver", reservedWord);
231	tokenMap.put("verify", reservedWord);
232	tokenMap.put("vol", reservedWord);
233	tokenMap.put("xcopy", reservedWord);
234
235	return tokenMap;
236
237	}
238
239
240	/**
241	* Returns a list of tokens representing the given text.
242	*
243	* @param text The text to break into tokens.
244	* @param startTokenType The token with which to start tokenizing.
245	* @param startOffset The offset at which the line of tokens begins.
246	* @return A linked list of tokens representing <code>text</code>.
247	*/
248	public Token getTokenList(Segment text, int startTokenType, final int startOffset) {
249
250	resetTokenList();
251
252	char[] array = text.array;
253	int offset = text.offset;
254	int count = text.count;
255	int end = offset + count;
256
257	// See, when we find a token, its starting position is always of the form:
258	// 'startOffset + (currentTokenStart-offset)'; but since startOffset and
259	// offset are constant, tokens' starting positions become:
260	// 'newStartOffset+currentTokenStart' for one less subtraction operation.
261	int newStartOffset = startOffset - offset;
262
263	currentTokenStart = offset;
264	currentTokenType = startTokenType;
265
266	//beginning:
267	for (int i=offset; i<end; i++) {
268
269	char c = array[i];
270
271	switch (currentTokenType) {
272
273	case Token.NULL:
274
275	currentTokenStart = i; // Starting a new token here.
276
277	switch (c) {
278
279	case ' ':
280	case '\t':
281	currentTokenType = Token.WHITESPACE;
282	break;
283
284	case '"':
285	currentTokenType = Token.ERROR_STRING_DOUBLE;
286	break;
287
288	case '%':
289	currentTokenType = Token.VARIABLE;
290	break;
291
292	// The "separators".
293	case '(':
294	case ')':
295	addToken(text, currentTokenStart,i, Token.SEPARATOR, newStartOffset+currentTokenStart);
296	currentTokenType = Token.NULL;
297	break;
298
299	// The "separators2".
300	case ',':
301	case ';':
302	addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
303	currentTokenType = Token.NULL;
304	break;
305
306	// Newer version of EOL comments, or a label
307	case ':':
308	// If this will be the first token added, it is
309	// a new-style comment or a label
310	if (firstToken==null) {
311	if (i<end-1 && array[i+1]==':') { // new-style comment
312	currentTokenType = Token.COMMENT_EOL;
313	}
314	else { // Label
315	currentTokenType = Token.PREPROCESSOR;
316	}
317	}
318	else { // Just a colon
319	currentTokenType = Token.IDENTIFIER;
320	}
321	break;
322
323	default:
324
325	// Just to speed things up a tad, as this will usually be the case (if spaces above failed).
326	if (RSyntaxUtilities.isLetterOrDigit(c) \|\| c=='\\') {
327	currentTokenType = Token.IDENTIFIER;
328	break;
329	}
330
331	int indexOf = operators.indexOf(c,0);
332	if (indexOf>-1) {
333	addToken(text, currentTokenStart,i, Token.OPERATOR, newStartOffset+currentTokenStart);
334	currentTokenType = Token.NULL;
335	break;
336	}
337	else {
338	currentTokenType = Token.IDENTIFIER;
339	break;
340	}
341
342	} // End of switch (c).
343
344	break;
345
346	case Token.WHITESPACE:
347
348	switch (c) {
349
350	case ' ':
351	case '\t':
352	break; // Still whitespace.
353
354	case '"':
355	addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
356	currentTokenStart = i;
357	currentTokenType = Token.ERROR_STRING_DOUBLE;
358	break;
359
360	case '%':
361	addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
362	currentTokenStart = i;
363	currentTokenType = Token.VARIABLE;
364	break;
365
366	// The "separators".
367	case '(':
368	case ')':
369	addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
370	addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
371	currentTokenType = Token.NULL;
372	break;
373
374	// The "separators2".
375	case ',':
376	case ';':
377	addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
378	addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
379	currentTokenType = Token.NULL;
380	break;
381
382	// Newer version of EOL comments, or a label
383	case ':':
384	addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
385	currentTokenStart = i;
386	// If the previous (whitespace) token was the first token
387	// added, this is a new-style comment or a label
388	if (firstToken.getNextToken()==null) {
389	if (i<end-1 && array[i+1]==':') { // new-style comment
390	currentTokenType = Token.COMMENT_EOL;
391	}
392	else { // Label
393	currentTokenType = Token.PREPROCESSOR;
394	}
395	}
396	else { // Just a colon
397	currentTokenType = Token.IDENTIFIER;
398	}
399	break;
400
401	default: // Add the whitespace token and start anew.
402
403	addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
404	currentTokenStart = i;
405
406	// Just to speed things up a tad, as this will usually be the case (if spaces above failed).
407	if (RSyntaxUtilities.isLetterOrDigit(c) \|\| c=='\\') {
408	currentTokenType = Token.IDENTIFIER;
409	break;
410	}
411
412	int indexOf = operators.indexOf(c,0);
413	if (indexOf>-1) {
414	addToken(text, currentTokenStart,i, Token.OPERATOR, newStartOffset+currentTokenStart);
415	currentTokenType = Token.NULL;
416	break;
417	}
418	else {
419	currentTokenType = Token.IDENTIFIER;
420	}
421
422	} // End of switch (c).
423
424	break;
425
426	default: // Should never happen
427	case Token.IDENTIFIER:
428
429	switch (c) {
430
431	case ' ':
432	case '\t':
433	// Check for REM comments.
434	if (i-currentTokenStart==3 &&
435	(array[i-3]=='r' \|\| array[i-3]=='R') &&
436	(array[i-2]=='e' \|\| array[i-2]=='E') &&
437	(array[i-1]=='m' \|\| array[i-1]=='M')) {
438	currentTokenType = Token.COMMENT_EOL;
439	break;
440	}
441	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
442	currentTokenStart = i;
443	currentTokenType = Token.WHITESPACE;
444	break;
445
446	case '"':
447	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
448	currentTokenStart = i;
449	currentTokenType = Token.ERROR_STRING_DOUBLE;
450	break;
451
452	case '%':
453	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
454	currentTokenStart = i;
455	currentTokenType = Token.VARIABLE;
456	break;
457
458	// Should be part of identifiers, but not at end of "REM".
459	case '\\':
460	// Check for REM comments.
461	if (i-currentTokenStart==3 &&
462	(array[i-3]=='r' \|\| array[i-3]=='R') &&
463	(array[i-2]=='e' \|\| array[i-2]=='E') &&
464	(array[i-1]=='m' \|\| array[i-1]=='M')) {
465	currentTokenType = Token.COMMENT_EOL;
466	}
467	break;
468
469	case '.':
470	case '_':
471	break; // Characters good for identifiers.
472
473	// The "separators".
474	case '(':
475	case ')':
476	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
477	addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
478	currentTokenType = Token.NULL;
479	break;
480
481	// The "separators2".
482	case ',':
483	case ';':
484	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
485	addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
486	currentTokenType = Token.NULL;
487	break;
488
489	default:
490
491	// Just to speed things up a tad, as this will usually be the case.
492	if (RSyntaxUtilities.isLetterOrDigit(c) \|\| c=='\\') {
493	break;
494	}
495
496	int indexOf = operators.indexOf(c);
497	if (indexOf>-1) {
498	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
499	addToken(text, i,i, Token.OPERATOR, newStartOffset+i);
500	currentTokenType = Token.NULL;
501	break;
502	}
503
504	// Otherwise, fall through and assume we're still okay as an IDENTIFIER...
505
506	} // End of switch (c).
507
508	break;
509
510	case Token.COMMENT_EOL:
511	i = end - 1;
512	addToken(text, currentTokenStart,i, Token.COMMENT_EOL, newStartOffset+currentTokenStart);
513	// We need to set token type to null so at the bottom we don't add one more token.
514	currentTokenType = Token.NULL;
515	break;
516
517	case Token.PREPROCESSOR: // Used for labels
518	i = end - 1;
519	addToken(text, currentTokenStart,i, Token.PREPROCESSOR, newStartOffset+currentTokenStart);
520	// We need to set token type to null so at the bottom we don't add one more token.
521	currentTokenType = Token.NULL;
522	break;
523
524	case Token.ERROR_STRING_DOUBLE:
525
526	if (c=='"') {
527	addToken(text, currentTokenStart,i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+currentTokenStart);
528	currentTokenStart = i + 1;
529	currentTokenType = Token.NULL;
530	}
531	// Otherwise, we're still an unclosed string...
532
533	break;
534
535	case Token.VARIABLE:
536
537	if (i==currentTokenStart+1) { // first character after '%'.
538	bracketVariable = false;
539	switch (c) {
540	case '{':
541	bracketVariable = true;
542	break;
543	default:
544	if (RSyntaxUtilities.isLetter(c) \|\| c==' ') { // No tab, just space; spaces are okay in variable names.
545	break;
546	}
547	else if (RSyntaxUtilities.isDigit(c)) { // Single-digit command-line argument ("%1").
548	addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart);
549	currentTokenType = Token.NULL;
550	break;
551	}
552	else { // Anything else, ???.
553	addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart); // ???
554	i--;
555	currentTokenType = Token.NULL;
556	break;
557	}
558	} // End of switch (c).
559	}
560	else { // Character other than first after the '%'.
561	if (bracketVariable==true) {
562	if (c=='}') {
563	addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart);
564	currentTokenType = Token.NULL;
565	}
566	}
567	else {
568	if (c=='%') {
569	addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart);
570	currentTokenType = Token.NULL;
571	}
572	}
573	break;
574	}
575	break;
576
577	} // End of switch (currentTokenType).
578
579	} // End of for (int i=offset; i<end; i++).
580
581	// Deal with the (possibly there) last token.
582	if (currentTokenType != Token.NULL) {
583
584	// Check for REM comments.
585	if (end-currentTokenStart==3 &&
586	(array[end-3]=='r' \|\| array[end-3]=='R') &&
587	(array[end-2]=='e' \|\| array[end-2]=='E') &&
588	(array[end-1]=='m' \|\| array[end-1]=='M')) {
589	currentTokenType = Token.COMMENT_EOL;
590	}
591
592	addToken(text, currentTokenStart,end-1, currentTokenType, newStartOffset+currentTokenStart);
593	}
594
595	addNullToken();
596
597	// Return the first token in our linked list.
598	return firstToken;
599
600	}
601
602
603	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: other-projects/rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/WindowsBatchTokenMaker.java@ 25584

Download in other formats: