Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

UnixShellTokenMaker.java@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago
Initial cut an a text edit area for GLI that supports color syntax highlighting
File size: 38.5 KB

Line
1	/*
2	* 03/16/2004
3	*
4	* UnixShellTokenMaker.java - Scanner for UNIX shell scripts.
5	*
6	* This library is distributed under a modified BSD license. See the included
7	* RSyntaxTextArea.License.txt file for details.
8	*/
9	package org.fife.ui.rsyntaxtextarea.modes;
10
11	import javax.swing.text.Segment;
12
13	import org.fife.ui.rsyntaxtextarea.*;
14
15
16	/**
17	* A token maker that turns text into a linked list of <code>Token</code>s
18	* for syntax highlighting UNIX shell scripts.
19	*
20	* @author Robert Futrell
21	* @version 0.1
22	*/
23	public class UnixShellTokenMaker extends AbstractTokenMaker {
24
25	protected final String operators = "=\|><&";
26	protected final String separators = "()[]";
27	protected final String separators2 = ".,;"; // Characters you don't want syntax highlighted but separate identifiers.
28	protected final String shellVariables = "#-?$!*@_"; // Characters that are part of "$<char>" shell variables; e.g., "$_".
29
30
31	private int currentTokenStart;
32	private int currentTokenType;
33
34
35	/**
36	* Constructor.
37	*/
38	public UnixShellTokenMaker() {
39	super(); // Initializes tokensToHighlight.
40	}
41
42
43	/**
44	* Checks the token to give it the exact ID it deserves before
45	* being passed up to the super method.
46	*
47	* @param segment <code>Segment</code> to get text from.
48	* @param start Start offset in <code>segment</code> of token.
49	* @param end End offset in <code>segment</code> of token.
50	* @param tokenType The token's type.
51	* @param startOffset The offset in the document at which the token occurs.
52	*/
53	public void addToken(Segment segment, int start, int end, int tokenType, int startOffset) {
54
55	switch (tokenType) {
56	// Since reserved words, functions, and data types are all passed into here
57	// as "identifiers," we have to see what the token really is...
58	case Token.IDENTIFIER:
59	int value = wordsToHighlight.get(segment, start,end);
60	if (value!=-1)
61	tokenType = value;
62	break;
63	case Token.WHITESPACE:
64	case Token.SEPARATOR:
65	case Token.OPERATOR:
66	case Token.LITERAL_NUMBER_DECIMAL_INT:
67	case Token.LITERAL_STRING_DOUBLE_QUOTE:
68	case Token.LITERAL_CHAR:
69	case Token.LITERAL_BACKQUOTE:
70	case Token.COMMENT_EOL:
71	case Token.PREPROCESSOR:
72	case Token.VARIABLE:
73	break;
74
75	default:
76	new Exception("Unknown tokenType: '" + tokenType + "'").
77	printStackTrace();
78	tokenType = Token.IDENTIFIER;
79	break;
80
81	}
82
83	super.addToken(segment, start, end, tokenType, startOffset);
84
85	}
86
87
88	/**
89	* Returns the text to place at the beginning and end of a
90	* line to "comment" it in a this programming language.
91	*
92	* @return The start and end strings to add to a line to "comment"
93	* it out.
94	*/
95	public String[] getLineCommentStartAndEnd() {
96	return new String[] { "#", null };
97	}
98
99
100	/**
101	* Returns whether tokens of the specified type should have "mark
102	* occurrences" enabled for the current programming language.
103	*
104	* @param type The token type.
105	* @return Whether tokens of this type should have "mark occurrences"
106	* enabled.
107	*/
108	public boolean getMarkOccurrencesOfTokenType(int type) {
109	return type==Token.IDENTIFIER \|\| type==Token.VARIABLE;
110	}
111
112
113	/**
114	* Returns the words to highlight for UNIX shell scripts.
115	*
116	* @return A <code>TokenMap</code> containing the words to highlight for
117	* UNIX shell scripts.
118	* @see org.fife.ui.rsyntaxtextarea.AbstractTokenMaker#getWordsToHighlight
119	*/
120	public TokenMap getWordsToHighlight() {
121
122	TokenMap tokenMap = new TokenMap();
123
124	int reservedWord = Token.RESERVED_WORD;
125	tokenMap.put("case", reservedWord);
126	tokenMap.put("do", reservedWord);
127	tokenMap.put("done", reservedWord);
128	tokenMap.put("elif", reservedWord);
129	tokenMap.put("else", reservedWord);
130	tokenMap.put("esac", reservedWord);
131	tokenMap.put("fi", reservedWord);
132	tokenMap.put("for", reservedWord);
133	tokenMap.put("if", reservedWord);
134	tokenMap.put("in", reservedWord);
135	tokenMap.put("select", reservedWord);
136	tokenMap.put("then", reservedWord);
137	tokenMap.put("until", reservedWord);
138	tokenMap.put("while", reservedWord);
139
140	int function = Token.FUNCTION;
141	tokenMap.put("addbib", function);
142	tokenMap.put("admin", function);
143	tokenMap.put("alias", function);
144	tokenMap.put("apropos", function);
145	tokenMap.put("ar", function);
146	tokenMap.put("at", function);
147	tokenMap.put("awk", function);
148	tokenMap.put("banner", function);
149	tokenMap.put("basename", function);
150	tokenMap.put("batch", function);
151	tokenMap.put("bg", function);
152	tokenMap.put("biff", function);
153	tokenMap.put("bin-mail", function);
154	tokenMap.put("binmail", function);
155	tokenMap.put("break", function);
156	tokenMap.put("cal", function);
157	tokenMap.put("calendar", function);
158	tokenMap.put("cancel", function);
159	tokenMap.put("cat", function);
160	tokenMap.put("cb", function);
161	tokenMap.put("cc", function);
162	tokenMap.put("cd", function);
163	tokenMap.put("cdc", function);
164	tokenMap.put("chdir", function);
165	tokenMap.put("checkeq", function);
166	tokenMap.put("checknr", function);
167	tokenMap.put("chfn", function);
168	tokenMap.put("chgrp", function);
169	tokenMap.put("chmod", function);
170	tokenMap.put("chown", function);
171	tokenMap.put("chsh", function);
172	tokenMap.put("clear", function);
173	tokenMap.put("cmp", function);
174	tokenMap.put("colcrt", function);
175	tokenMap.put("comb", function);
176	tokenMap.put("comm", function);
177	tokenMap.put("command", function);
178	tokenMap.put("compress", function);
179	tokenMap.put("continue", function);
180	tokenMap.put("cp", function);
181	tokenMap.put("cpio", function);
182	tokenMap.put("cpp", function);
183	tokenMap.put("crontab", function);
184	tokenMap.put("csh", function);
185	tokenMap.put("ctags", function);
186	tokenMap.put("cut", function);
187	tokenMap.put("cvs", function);
188	tokenMap.put("date", function);
189	tokenMap.put("dbx", function);
190	tokenMap.put("delta", function);
191	tokenMap.put("deroff", function);
192	tokenMap.put("df", function);
193	tokenMap.put("diff", function);
194	tokenMap.put("dtree", function);
195	tokenMap.put("du", function);
196	tokenMap.put("e", function);
197	tokenMap.put("echo", function);
198	tokenMap.put("ed", function);
199	tokenMap.put("edit", function);
200	tokenMap.put("enscript", function);
201	tokenMap.put("eqn", function);
202	tokenMap.put("error", function);
203	tokenMap.put("eval", function);
204	tokenMap.put("ex", function);
205	tokenMap.put("exec", function);
206	tokenMap.put("exit", function);
207	tokenMap.put("expand", function);
208	tokenMap.put("export", function);
209	tokenMap.put("expr", function);
210	tokenMap.put("false", function);
211	tokenMap.put("fc", function);
212	tokenMap.put("fg", function);
213	tokenMap.put("file", function);
214	tokenMap.put("find", function);
215	tokenMap.put("finger", function);
216	tokenMap.put("fmt", function);
217	tokenMap.put("fmt_mail", function);
218	tokenMap.put("fold", function);
219	tokenMap.put("ftp", function);
220	tokenMap.put("function", function);
221	tokenMap.put("gcore", function);
222	tokenMap.put("get", function);
223	tokenMap.put("getopts", function);
224	tokenMap.put("gprof", function);
225	tokenMap.put("grep", function);
226	tokenMap.put("groups", function);
227	tokenMap.put("gunzip", function);
228	tokenMap.put("gzip", function);
229	tokenMap.put("hashcheck", function);
230	tokenMap.put("hashmake", function);
231	tokenMap.put("head", function);
232	tokenMap.put("help", function);
233	tokenMap.put("history", function);
234	tokenMap.put("imake", function);
235	tokenMap.put("indent", function);
236	tokenMap.put("install", function);
237	tokenMap.put("jobs", function);
238	tokenMap.put("join", function);
239	tokenMap.put("kill", function);
240	tokenMap.put("last", function);
241	tokenMap.put("ld", function);
242	tokenMap.put("leave", function);
243	tokenMap.put("less", function);
244	tokenMap.put("let", function);
245	tokenMap.put("lex", function);
246	tokenMap.put("lint", function);
247	tokenMap.put("ln", function);
248	tokenMap.put("login", function);
249	tokenMap.put("look", function);
250	tokenMap.put("lookbib", function);
251	tokenMap.put("lorder", function);
252	tokenMap.put("lp", function);
253	tokenMap.put("lpq", function);
254	tokenMap.put("lpr", function);
255	tokenMap.put("lprm", function);
256	tokenMap.put("ls", function);
257	tokenMap.put("mail", function);
258	tokenMap.put("Mail", function);
259	tokenMap.put("make", function);
260	tokenMap.put("man", function);
261	tokenMap.put("md", function);
262	tokenMap.put("mesg", function);
263	tokenMap.put("mkdir", function);
264	tokenMap.put("mkstr", function);
265	tokenMap.put("more", function);
266	tokenMap.put("mount", function);
267	tokenMap.put("mv", function);
268	tokenMap.put("nawk", function);
269	tokenMap.put("neqn", function);
270	tokenMap.put("nice", function);
271	tokenMap.put("nm", function);
272	tokenMap.put("nroff", function);
273	tokenMap.put("od", function);
274	tokenMap.put("page", function);
275	tokenMap.put("passwd", function);
276	tokenMap.put("paste", function);
277	tokenMap.put("pr", function);
278	tokenMap.put("print", function);
279	tokenMap.put("printf", function);
280	tokenMap.put("printenv", function);
281	tokenMap.put("prof", function);
282	tokenMap.put("prs", function);
283	tokenMap.put("prt", function);
284	tokenMap.put("ps", function);
285	tokenMap.put("ptx", function);
286	tokenMap.put("pwd", function);
287	tokenMap.put("quota", function);
288	tokenMap.put("ranlib", function);
289	tokenMap.put("rcp", function);
290	tokenMap.put("rcs", function);
291	tokenMap.put("rcsdiff", function);
292	tokenMap.put("read", function);
293	tokenMap.put("readonly", function);
294	tokenMap.put("red", function);
295	tokenMap.put("return", function);
296	tokenMap.put("rev", function);
297	tokenMap.put("rlogin", function);
298	tokenMap.put("rm", function);
299	tokenMap.put("rmdel", function);
300	tokenMap.put("rmdir", function);
301	tokenMap.put("roffbib", function);
302	tokenMap.put("rsh", function);
303	tokenMap.put("rup", function);
304	tokenMap.put("ruptime", function);
305	tokenMap.put("rusers", function);
306	tokenMap.put("rwall", function);
307	tokenMap.put("rwho", function);
308	tokenMap.put("sact", function);
309	tokenMap.put("sccs", function);
310	tokenMap.put("sccsdiff", function);
311	tokenMap.put("script", function);
312	tokenMap.put("sed", function);
313	tokenMap.put("set", function);
314	tokenMap.put("setgroups", function);
315	tokenMap.put("setsenv", function);
316	tokenMap.put("sh", function);
317	tokenMap.put("shift", function);
318	tokenMap.put("size", function);
319	tokenMap.put("sleep", function);
320	tokenMap.put("sort", function);
321	tokenMap.put("sortbib", function);
322	tokenMap.put("spell", function);
323	tokenMap.put("split", function);
324	tokenMap.put("ssh", function);
325	tokenMap.put("strings", function);
326	tokenMap.put("strip", function);
327	tokenMap.put("stty", function);
328	tokenMap.put("su", function);
329	tokenMap.put("sudo", function);
330	tokenMap.put("symorder", function);
331	tokenMap.put("tabs", function);
332	tokenMap.put("tail", function);
333	tokenMap.put("talk", function);
334	tokenMap.put("tar", function);
335	tokenMap.put("tbl", function);
336	tokenMap.put("tee", function);
337	tokenMap.put("telnet", function);
338	tokenMap.put("test", function);
339	tokenMap.put("tftp", function);
340	tokenMap.put("time", function);
341	tokenMap.put("times", function);
342	tokenMap.put("touch", function);
343	tokenMap.put("trap", function);
344	tokenMap.put("troff", function);
345	tokenMap.put("true", function);
346	tokenMap.put("tsort", function);
347	tokenMap.put("tty", function);
348	tokenMap.put("type", function);
349	tokenMap.put("typeset", function);
350	tokenMap.put("ue", function);
351	tokenMap.put("ul", function);
352	tokenMap.put("ulimit", function);
353	tokenMap.put("umask", function);
354	tokenMap.put("unalias", function);
355	tokenMap.put("uncompress", function);
356	tokenMap.put("unexpand", function);
357	tokenMap.put("unget", function);
358	tokenMap.put("unifdef", function);
359	tokenMap.put("uniq", function);
360	tokenMap.put("units", function);
361	tokenMap.put("unset", function);
362	tokenMap.put("uptime", function);
363	tokenMap.put("users", function);
364	tokenMap.put("uucp", function);
365	tokenMap.put("uudecode", function);
366	tokenMap.put("uuencode", function);
367	tokenMap.put("uulog", function);
368	tokenMap.put("uuname", function);
369	tokenMap.put("uusend", function);
370	tokenMap.put("uux", function);
371	tokenMap.put("vacation", function);
372	tokenMap.put("val", function);
373	tokenMap.put("vedit", function);
374	tokenMap.put("vgrind", function);
375	tokenMap.put("vi", function);
376	tokenMap.put("view", function);
377	tokenMap.put("vtroff", function);
378	tokenMap.put("w", function);
379	tokenMap.put("wait", function);
380	tokenMap.put("wall", function);
381	tokenMap.put("wc", function);
382	tokenMap.put("wait", function);
383	tokenMap.put("what", function);
384	tokenMap.put("whatis", function);
385	tokenMap.put("whence", function);
386	tokenMap.put("whereis", function);
387	tokenMap.put("which", function);
388	tokenMap.put("who", function);
389	tokenMap.put("whoami", function);
390	tokenMap.put("write", function);
391	tokenMap.put("xargs", function);
392	tokenMap.put("xstr", function);
393	tokenMap.put("yacc", function);
394	tokenMap.put("yes", function);
395	tokenMap.put("zcat", function);
396
397	return tokenMap;
398
399	}
400
401
402	/**
403	* Returns a list of tokens representing the given text.
404	*
405	* @param text The text to break into tokens.
406	* @param startTokenType The token with which to start tokenizing.
407	* @param startOffset The offset at which the line of tokens begins.
408	* @return A linked list of tokens representing <code>text</code>.
409	*/
410	public Token getTokenList(Segment text, int startTokenType, final int startOffset) {
411
412	resetTokenList();
413
414	char[] array = text.array;
415	int offset = text.offset;
416	int count = text.count;
417	int end = offset + count;
418
419	// See, when we find a token, its starting position is always of the form:
420	// 'startOffset + (currentTokenStart-offset)'; but since startOffset and
421	// offset are constant, tokens' starting positions become:
422	// 'newStartOffset+currentTokenStart' for one less subraction operation.
423	int newStartOffset = startOffset - offset;
424
425	currentTokenStart = offset;
426	currentTokenType = startTokenType;
427	boolean backslash = false;
428
429	//beginning:
430	for (int i=offset; i<end; i++) {
431
432	char c = array[i];
433
434	switch (currentTokenType) {
435
436	case Token.NULL:
437
438	currentTokenStart = i; // Starting a new token here.
439
440	switch (c) {
441
442	case ' ':
443	case '\t':
444	currentTokenType = Token.WHITESPACE;
445	break;
446
447	case '`':
448	if (backslash) { // Escaped back quote => call '`' an identifier..
449	addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
450	backslash = false;
451	}
452	else {
453	currentTokenType = Token.LITERAL_BACKQUOTE;
454	}
455	break;
456
457	case '"':
458	if (backslash) { // Escaped double quote => call '"' an identifier..
459	addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
460	backslash = false;
461	}
462	else {
463	currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
464	}
465	break;
466
467	case '\'':
468	if (backslash) { // Escaped single quote => call '\'' an identifier.
469	addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
470	backslash = false;
471	}
472	else {
473	currentTokenType = Token.LITERAL_CHAR;
474	}
475	break;
476
477	case '\\':
478	addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
479	currentTokenType = Token.NULL;
480	backslash = !backslash;
481	break;
482
483	case '$':
484	if (backslash) { // Escaped dollar sign => call '$' an identifier..
485	addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
486	backslash = false;
487	}
488	else {
489	currentTokenType = Token.VARIABLE;
490	}
491	break;
492
493	case '#':
494	backslash = false;
495	currentTokenType = Token.COMMENT_EOL;
496	break;
497
498	default:
499	if (RSyntaxUtilities.isDigit(c)) {
500	currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT;
501	break;
502	}
503	else if (RSyntaxUtilities.isLetter(c) \|\| c=='/' \|\| c=='_') {
504	currentTokenType = Token.IDENTIFIER;
505	break;
506	}
507	int indexOf = operators.indexOf(c,0);
508	if (indexOf>-1) {
509	addToken(text, currentTokenStart,i, Token.OPERATOR, newStartOffset+currentTokenStart);
510	currentTokenType = Token.NULL;
511	break;
512	}
513	indexOf = separators.indexOf(c,0);
514	if (indexOf>-1) {
515	addToken(text, currentTokenStart,i, Token.SEPARATOR, newStartOffset+currentTokenStart);
516	currentTokenType = Token.NULL;
517	break;
518	}
519	indexOf = separators2.indexOf(c,0);
520	if (indexOf>-1) {
521	addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
522	currentTokenType = Token.NULL;
523	break;
524	}
525	else {
526	currentTokenType = Token.IDENTIFIER;
527	break;
528	}
529
530	} // End of switch (c).
531
532	break;
533
534	case Token.WHITESPACE:
535
536	switch (c) {
537
538	case ' ':
539	case '\t':
540	break; // Still whitespace.
541
542	case '\\':
543	addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
544	addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
545	currentTokenType = Token.NULL;
546	backslash = true; // Previous char whitespace => this must be first backslash.
547	break;
548
549	case '`': // Don't need to worry about backslashes as previous char is space.
550	addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
551	currentTokenStart = i;
552	currentTokenType = Token.LITERAL_BACKQUOTE;
553	backslash = false;
554	break;
555
556	case '"': // Don't need to worry about backslashes as previous char is space.
557	addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
558	currentTokenStart = i;
559	currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
560	backslash = false;
561	break;
562
563	case '\'': // Don't need to worry about backslashes as previous char is space.
564	addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
565	currentTokenStart = i;
566	currentTokenType = Token.LITERAL_CHAR;
567	backslash = false;
568	break;
569
570	case '$': // Don't need to worry about backslashes as previous char is space.
571	addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
572	currentTokenStart = i;
573	currentTokenType = Token.VARIABLE;
574	backslash = false;
575	break;
576
577	case '#':
578	addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
579	currentTokenStart = i;
580	currentTokenType = Token.COMMENT_EOL;
581	break;
582
583	default: // Add the whitespace token and start anew.
584
585	addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
586	currentTokenStart = i;
587
588	if (RSyntaxUtilities.isDigit(c)) {
589	currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT;
590	break;
591	}
592	else if (RSyntaxUtilities.isLetter(c) \|\| c=='/' \|\| c=='_') {
593	currentTokenType = Token.IDENTIFIER;
594	break;
595	}
596	int indexOf = operators.indexOf(c,0);
597	if (indexOf>-1) {
598	addToken(text, i,i, Token.OPERATOR, newStartOffset+i);
599	currentTokenType = Token.NULL;
600	break;
601	}
602	indexOf = separators.indexOf(c,0);
603	if (indexOf>-1) {
604	addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
605	currentTokenType = Token.NULL;
606	break;
607	}
608	indexOf = separators2.indexOf(c,0);
609	if (indexOf>-1) {
610	addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
611	currentTokenType = Token.NULL;
612	break;
613	}
614	else {
615	currentTokenType = Token.IDENTIFIER;
616	}
617
618	} // End of switch (c).
619
620	break;
621
622	default: // Should never happen
623	case Token.IDENTIFIER:
624
625	switch (c) {
626
627	case ' ':
628	case '\t':
629	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
630	currentTokenStart = i;
631	currentTokenType = Token.WHITESPACE;
632	break;
633
634	case '/': // Special-case to colorize commands like "echo" in "/bin/echo"
635	addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
636	currentTokenStart = i+1;
637	currentTokenType = Token.NULL;
638	break;
639
640	case '`': // Don't need to worry about backslashes as previous char is space.
641	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
642	currentTokenStart = i;
643	currentTokenType = Token.LITERAL_BACKQUOTE;
644	backslash = false;
645	break;
646
647	case '"': // Don't need to worry about backslashes as previous char is non-backslash.
648	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
649	currentTokenStart = i;
650	currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
651	backslash = false;
652	break;
653
654	case '\'': // Don't need to worry about backslashes as previous char is non-backslash.
655	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
656	currentTokenStart = i;
657	currentTokenType = Token.LITERAL_CHAR;
658	backslash = false;
659	break;
660
661	case '\\':
662	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
663	addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
664	currentTokenType = Token.NULL;
665	backslash = true;
666	break;
667
668	case '$': // Don't need to worry about backslashes as previous char is non-backslash.
669	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
670	currentTokenStart = i;
671	currentTokenType = Token.VARIABLE;
672	backslash = false;
673	break;
674
675	case '=': // Special case here; when you have "identifier=<value>" in shell, "identifier" is a variable.
676	addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
677	addToken(text, i,i, Token.OPERATOR, newStartOffset+i);
678	currentTokenType = Token.NULL;
679	break;
680
681	default:
682	if (RSyntaxUtilities.isLetterOrDigit(c) \|\| c=='/' \|\| c=='_') {
683	break; // Still an identifier of some type.
684	}
685	int indexOf = operators.indexOf(c);
686	if (indexOf>-1) {
687	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
688	addToken(text, i,i, Token.OPERATOR, newStartOffset+i);
689	currentTokenType = Token.NULL;
690	break;
691	}
692	indexOf = separators.indexOf(c,0);
693	if (indexOf>-1) {
694	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
695	addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
696	currentTokenType = Token.NULL;
697	break;
698	}
699	indexOf = separators2.indexOf(c,0);
700	if (indexOf>-1) {
701	addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
702	addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
703	currentTokenType = Token.NULL;
704	break;
705	}
706	// Otherwise, we're still an identifier (?).
707
708	} // End of switch (c).
709
710	break;
711
712	case Token.LITERAL_NUMBER_DECIMAL_INT:
713
714	switch (c) {
715
716	case ' ':
717	case '\t':
718	addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
719	currentTokenStart = i;
720	currentTokenType = Token.WHITESPACE;
721	break;
722
723	case '`': // Don't need to worry about backslashes as previous char is space.
724	addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
725	currentTokenStart = i;
726	currentTokenType = Token.LITERAL_BACKQUOTE;
727	backslash = false;
728	break;
729
730	case '"': // Don't need to worry about backslashes as previous char is non-backslash.
731	addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
732	currentTokenStart = i;
733	currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
734	backslash = false;
735	break;
736
737	case '\'': // Don't need to worry about backslashes as previous char is non-backslash.
738	addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
739	currentTokenStart = i;
740	currentTokenType = Token.LITERAL_CHAR;
741	backslash = false;
742	break;
743
744	case '$': // Don't need to worry about backslashes as previous char is non-backslash.
745	addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
746	currentTokenStart = i;
747	currentTokenType = Token.VARIABLE;
748	backslash = false;
749	break;
750
751	case '\\':
752	addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
753	addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
754	currentTokenType = Token.NULL;
755	backslash = true;
756	break;
757
758	default:
759
760	if (RSyntaxUtilities.isDigit(c)) {
761	break; // Still a literal number.
762	}
763	int indexOf = operators.indexOf(c);
764	if (indexOf>-1) {
765	addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
766	addToken(text, i,i, Token.OPERATOR, newStartOffset+i);
767	currentTokenType = Token.NULL;
768	break;
769	}
770	indexOf = separators.indexOf(c);
771	if (indexOf>-1) {
772	addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
773	addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
774	currentTokenType = Token.NULL;
775	break;
776	}
777	indexOf = separators2.indexOf(c);
778	if (indexOf>-1) {
779	addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
780	addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
781	currentTokenType = Token.NULL;
782	break;
783	}
784
785	// Otherwise, remember this was a number and start over.
786	addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
787	i--;
788	currentTokenType = Token.NULL;
789
790	} // End of switch (c).
791
792	break;
793
794	case Token.VARIABLE:
795
796	// Note that we first arrive here AFTER the '$' character.
797	// First check if the variable name is enclosed in '{' and '}' characters.
798	if (c=='{') {
799	while (++i<end) {
800	if (array[i]=='}') {
801	addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart);
802	currentTokenType = Token.NULL;
803	break;
804	}
805	} // End of while (++i<end).
806	if (i==end) { // Happens when '}' wasn't found...
807	addToken(text, currentTokenStart,end-1, Token.VARIABLE, newStartOffset+currentTokenStart);
808	currentTokenType = Token.NULL;
809	}
810	break;
811	} // End of if (i<end-1 && array[i+1]=='{').
812
813	// If we didn't find the '{' character, find the end of the variable...
814	while (i<end) {
815	c = array[i]; // Not needed the first iteration, but can't think of a better way to do it...
816	if (!RSyntaxUtilities.isLetterOrDigit(c) && shellVariables.indexOf(c)==-1 && c!='_') {
817	addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
818	i--;
819	currentTokenType = Token.NULL;
820	break;
821	}
822	i++;
823	}
824
825	// This only happens if we never found the end of the variable in the loop above.
826	if (i==end) {
827	addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
828	currentTokenType = Token.NULL;
829	}
830
831	break;
832
833	case Token.COMMENT_EOL:
834	// If we got here, then the line != "#" only, so check for "#!".
835	if (c=='!')
836	currentTokenType = Token.PREPROCESSOR;
837	i = end - 1;
838	addToken(text, currentTokenStart,i, currentTokenType, newStartOffset+currentTokenStart);
839	// We need to set token type to null so at the bottom we don't add one more token.
840	currentTokenType = Token.NULL;
841
842	break;
843
844	case Token.LITERAL_CHAR:
845
846	if (c=='\\') {
847	backslash = !backslash; // Okay because if we got in here, backslash was initially false.
848	}
849	else {
850	if (c=='\'' && !backslash) {
851	addToken(text, currentTokenStart,i, Token.LITERAL_CHAR, newStartOffset+currentTokenStart);
852	currentTokenStart = i + 1;
853	currentTokenType = Token.NULL;
854	// backslash is definitely false when we leave.
855	}
856
857	backslash = false; // Need to set backslash to false here as a character was typed.
858
859	}
860	// Otherwise, we're still an unclosed char literal...
861
862	break;
863
864	case Token.LITERAL_BACKQUOTE:
865
866	switch (c) {
867
868	case '\\':
869	backslash = !backslash;
870	break;
871
872	case '`':
873	if (!backslash) {
874	addToken(text, currentTokenStart,i, Token.LITERAL_BACKQUOTE, newStartOffset+currentTokenStart);
875	currentTokenType = Token.NULL;
876	// backslash is definitely false when we leave.
877	break;
878	}
879	backslash = false;
880	break;
881
882	// Variable in the backquote string...
883	case '$':
884
885	if (backslash==true) {
886	backslash = false;
887	break;
888	}
889
890	// Add the string up-to the variable.
891	addToken(text, currentTokenStart,i-1, Token.LITERAL_BACKQUOTE, newStartOffset+currentTokenStart);
892	currentTokenType = Token.VARIABLE;
893	currentTokenStart = i;
894
895	// First check if the variable name is enclosed in '{' and '}' characters.
896	if (i<end-1 && array[i+1]=='{') {
897	i++; // Now we're on the '{' char.
898	while (++i<end) {
899	if (array[i]=='}') {
900	addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart);
901	i++;
902	if (i<end) {
903	c = array[i];
904	if (c=='`') { // The only rub - back quote right after variable.
905	addToken(text, i,i, Token.LITERAL_BACKQUOTE, newStartOffset+i);
906	currentTokenType = Token.NULL;
907	break;
908	}
909	else { // Continue on with the string.
910	currentTokenStart = i;
911	currentTokenType = Token.LITERAL_BACKQUOTE;
912	i--;
913	break;
914	}
915	}
916	else { // i==end = "trick" this method so that the string is continued to the next line.
917	currentTokenStart = i;
918	currentTokenType = Token.LITERAL_BACKQUOTE;
919	break; // So we don't hit the condition below.
920	}
921	} // End of if (array[i]=='}').
922	} // End of while (++i<end).
923	if (i==end) { // Happens when '}' wasn't found...
924	addToken(text, currentTokenStart,end-1, Token.VARIABLE, newStartOffset+currentTokenStart);
925	currentTokenStart = end; // ???
926	currentTokenType = Token.LITERAL_BACKQUOTE;
927	break;
928	}
929	} // End of if (i<end-1 && array[i+1]=='{').
930
931	// If we reached the end of the variable, get out.
932	if (currentTokenType==Token.NULL \|\| currentTokenType==Token.LITERAL_BACKQUOTE)
933	break;
934
935	// If we didn't find the '{' character, find the end of the variable...
936	// Increment first to skip the '$'.
937	while (++i<end) {
938	c = array[i];
939	if (!RSyntaxUtilities.isLetterOrDigit(c) && shellVariables.indexOf(c)==-1 && c!='_') {
940	addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
941	if (c=='`') { // The only rub.
942	addToken(text, i,i, Token.LITERAL_BACKQUOTE, newStartOffset+i);
943	currentTokenType = Token.NULL;
944	break;
945	}
946	else {
947	currentTokenStart = i;
948	currentTokenType = Token.LITERAL_BACKQUOTE;
949	i--;
950	break;
951	}
952	}
953	}
954
955	// This only happens if we never found the end of the variable in the loop above.
956	// We "trick" this method so that the backquote string token is at the end.
957	if (i==end) {
958	addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
959	currentTokenStart = i;
960	currentTokenType = Token.LITERAL_BACKQUOTE;
961	}
962
963	break;
964
965	// Otherwise, we're still in an unclosed string...
966	default:
967	backslash = false; // Need to set backslash to false here as a character was typed.
968
969	} // End of switch (c).
970
971	break;
972
973	case Token.LITERAL_STRING_DOUBLE_QUOTE:
974
975	switch (c) {
976
977	case '\\':
978	backslash = !backslash;
979	break;
980
981	case '"':
982	if (!backslash) {
983	addToken(text, currentTokenStart,i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+currentTokenStart);
984	currentTokenType = Token.NULL;
985	// backslash is definitely false when we leave.
986	break;
987	}
988	backslash = false;
989	break;
990
991	// Variable in the double-quoted string...
992	case '$':
993
994	if (backslash==true) {
995	backslash = false;
996	break;
997	}
998
999	// Add the string up-to the variable.
1000	addToken(text, currentTokenStart,i-1, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+currentTokenStart);
1001	currentTokenType = Token.VARIABLE;
1002	currentTokenStart = i;
1003
1004	// First check if the variable name is enclosed in '{' and '}' characters.
1005	if (i<end-1 && array[i+1]=='{') {
1006	i++; // Now we're on the '{' char.
1007	while (++i<end) {
1008	if (array[i]=='}') {
1009	addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart);
1010	i++;
1011	if (i<end) {
1012	c = array[i];
1013	if (c=='"') { // The only rub - double-quote right after variable.
1014	addToken(text, i,i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+i);
1015	currentTokenType = Token.NULL;
1016	break;
1017	}
1018	else { // Continue on with the string.
1019	currentTokenStart = i;
1020	currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
1021	i--;
1022	break;
1023	}
1024	}
1025	else { // i==end = "trick" this method so that the string is continued to the next line.
1026	currentTokenStart = i;
1027	currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
1028	break; // So we don't hit the condition below.
1029	}
1030	} // End of if (array[i]=='}').
1031	} // End of while (++i<end).
1032	if (i==end) { // Happens when '}' wasn't found...
1033	addToken(text, currentTokenStart,end-1, Token.VARIABLE, newStartOffset+currentTokenStart);
1034	currentTokenStart = end; // ???
1035	currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
1036	break;
1037	}
1038	} // End of if (i<end-1 && array[i+1]=='{').
1039
1040	// If we reached the end of the variable, get out.
1041	if (currentTokenType==Token.NULL \|\| currentTokenType==Token.LITERAL_STRING_DOUBLE_QUOTE)
1042	break;
1043
1044	// If we didn't find the '{' character, find the end of the variable...
1045	// Increment first to skip the '$'.
1046	while (++i<end) {
1047	c = array[i];
1048	if (!RSyntaxUtilities.isLetterOrDigit(c) && shellVariables.indexOf(c)==-1 && c!='_') {
1049	addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
1050	if (c=='"') { // The only rub.
1051	addToken(text, i,i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+i);
1052	currentTokenType = Token.NULL;
1053	break;
1054	}
1055	else {
1056	currentTokenStart = i;
1057	currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
1058	i--;
1059	break;
1060	}
1061	}
1062	}
1063
1064	// This only happens if we never found the end of the variable in the loop above.
1065	// We "trick" this method so that the double-quote string token is at the end.
1066	if (i==end) {
1067	addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
1068	currentTokenStart = i;
1069	currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
1070	}
1071
1072	break;
1073
1074	// Otherwise, we're still in an unclosed string...
1075	default:
1076	backslash = false; // Need to set backslash to false here as a character was typed.
1077
1078	} // End of switch (c).
1079
1080	break;
1081
1082	} // End of switch (currentTokenType).
1083
1084	} // End of for (int i=offset; i<end; i++).
1085
1086	switch (currentTokenType) {
1087
1088	// Remember what token type to begin the next line with.
1089	case Token.LITERAL_BACKQUOTE:
1090	case Token.LITERAL_STRING_DOUBLE_QUOTE:
1091	case Token.LITERAL_CHAR:
1092	addToken(text, currentTokenStart,end-1, currentTokenType, newStartOffset+currentTokenStart);
1093	break;
1094
1095	// Do nothing if everything was okay.
1096	case Token.NULL:
1097	addNullToken();
1098	break;
1099
1100	// All other token types don't continue to the next line...
1101	default:
1102	addToken(text, currentTokenStart,end-1, currentTokenType, newStartOffset+currentTokenStart);
1103	addNullToken();
1104
1105	}
1106
1107	// Return the first token in our linked list.
1108	return firstToken;
1109
1110	}
1111
1112
1113	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: other-projects/rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/UnixShellTokenMaker.java@ 25584

Download in other formats: