source: other-projects/rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/DtdTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 11.0 KB
Line 
1/*
2 * 04/12/2012
3 *
4 * DtdTokenMaker.java - Generates tokens for DTD syntax highlighting.
5 *
6 * This library is distributed under a modified BSD license. See the included
7 * RSyntaxTextArea.License.txt file for details.
8 */
9package org.fife.ui.rsyntaxtextarea.modes;
10
11import java.io.*;
12import javax.swing.text.Segment;
13
14import org.fife.ui.rsyntaxtextarea.*;
15
16
17/**
18 * Scanner for DTD files.
19 *
20 * This implementation was created using
21 * <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
22 * was modified for performance. Memory allocation needs to be almost
23 * completely removed to be competitive with the handwritten lexers (subclasses
24 * of <code>AbstractTokenMaker</code>, so this class has been modified so that
25 * Strings are never allocated (via yytext()), and the scanner never has to
26 * worry about refilling its buffer (needlessly copying chars around).
27 * We can achieve this because RText always scans exactly 1 line of tokens at a
28 * time, and hands the scanner this line as an array of characters (a Segment
29 * really). Since tokens contain pointers to char arrays instead of Strings
30 * holding their contents, there is no need for allocating new memory for
31 * Strings.<p>
32 *
33 * The actual algorithm generated for scanning has, of course, not been
34 * modified.<p>
35 *
36 * If you wish to regenerate this file yourself, keep in mind the following:
37 * <ul>
38 * <li>The generated <code>XMLTokenMaker.java</code> file will contain two
39 * definitions of both <code>zzRefill</code> and <code>yyreset</code>.
40 * You should hand-delete the second of each definition (the ones
41 * generated by the lexer), as these generated methods modify the input
42 * buffer, which we'll never have to do.</li>
43 * <li>You should also change the declaration/definition of zzBuffer to NOT
44 * be initialized. This is a needless memory allocation for us since we
45 * will be pointing the array somewhere else anyway.</li>
46 * <li>You should NOT call <code>yylex()</code> on the generated scanner
47 * directly; rather, you should use <code>getTokenList</code> as you would
48 * with any other <code>TokenMaker</code> instance.</li>
49 * </ul>
50 *
51 * @author Robert Futrell
52 * @version 1.0
53 */
54%%
55
56%public
57%class DtdTokenMaker
58%extends AbstractJFlexTokenMaker
59%unicode
60%type org.fife.ui.rsyntaxtextarea.Token
61
62
63%{
64
65 /**
66 * Token type specific to XMLTokenMaker denoting a line ending with an
67 * unclosed XML tag; thus a new line is beginning still inside of the tag.
68 */
69 public static final int INTERNAL_INTAG_START = -1;
70
71 /**
72 * Token type specific to XMLTokenMaker denoting a line ending with an
73 * unclosed DOCTYPE element.
74 */
75 public static final int INTERNAL_INTAG_ELEMENT = -2;
76
77 /**
78 * Token type specific to XMLTokenMaker denoting a line ending with an
79 * unclosed, locally-defined DTD in a DOCTYPE element.
80 */
81 public static final int INTERNAL_INTAG_ATTLIST = -3;
82
83 /**
84 * Token type specific to XMLTokenMaker denoting a line ending with an
85 * unclosed comment. The state to return to when this comment ends is
86 * embedded in the token type as well.
87 */
88 public static final int INTERNAL_IN_COMMENT = -(1<<11);
89
90 /**
91 * The state we were in prior to the current one. This is used to know
92 * what state to resume after an MLC ends.
93 */
94 private int prevState;
95
96
97 /**
98 * Constructor. This must be here because JFlex does not generate a
99 * no-parameter constructor.
100 */
101 public DtdTokenMaker() {
102 }
103
104
105 /**
106 * Adds the token specified to the current linked list of tokens as an
107 * "end token;" that is, at <code>zzMarkedPos</code>.
108 *
109 * @param tokenType The token's type.
110 */
111 private void addEndToken(int tokenType) {
112 addToken(zzMarkedPos,zzMarkedPos, tokenType);
113 }
114
115
116 /**
117 * Adds the token specified to the current linked list of tokens.
118 *
119 * @param tokenType The token's type.
120 * @see #addToken(int, int, int)
121 */
122 private void addHyperlinkToken(int start, int end, int tokenType) {
123 int so = start + offsetShift;
124 addToken(zzBuffer, start,end, tokenType, so, true);
125 }
126
127
128 /**
129 * Adds the token specified to the current linked list of tokens.
130 *
131 * @param tokenType The token's type.
132 */
133 private void addToken(int tokenType) {
134 addToken(zzStartRead, zzMarkedPos-1, tokenType);
135 }
136
137
138 /**
139 * Adds the token specified to the current linked list of tokens.
140 *
141 * @param tokenType The token's type.
142 */
143 private void addToken(int start, int end, int tokenType) {
144 int so = start + offsetShift;
145 addToken(zzBuffer, start,end, tokenType, so);
146 }
147
148
149 /**
150 * Adds the token specified to the current linked list of tokens.
151 *
152 * @param array The character array.
153 * @param start The starting offset in the array.
154 * @param end The ending offset in the array.
155 * @param tokenType The token's type.
156 * @param startOffset The offset in the document at which this token
157 * occurs.
158 */
159 public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
160 super.addToken(array, start,end, tokenType, startOffset);
161 zzStartRead = zzMarkedPos;
162 }
163
164
165 /**
166 * Always returns <tt>false</tt>, as you never want "mark occurrences"
167 * working in XML files.
168 *
169 * @param type The token type.
170 * @return Whether tokens of this type should have "mark occurrences"
171 * enabled.
172 */
173 public boolean getMarkOccurrencesOfTokenType(int type) {
174 return false;
175 }
176
177
178 /**
179 * Returns the first token in the linked list of tokens generated
180 * from <code>text</code>. This method must be implemented by
181 * subclasses so they can correctly implement syntax highlighting.
182 *
183 * @param text The text from which to get tokens.
184 * @param initialTokenType The token type we should start with.
185 * @param startOffset The offset into the document at which
186 * <code>text</code> starts.
187 * @return The first <code>Token</code> in a linked list representing
188 * the syntax highlighted text.
189 */
190 public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
191
192 resetTokenList();
193 this.offsetShift = -text.offset + startOffset;
194 prevState = YYINITIAL;
195
196 // Start off in the proper state.
197 int state = YYINITIAL;
198 switch (initialTokenType) {
199 case INTERNAL_INTAG_START:
200 state = INTAG_START;
201 break;
202 case INTERNAL_INTAG_ELEMENT:
203 state = INTAG_ELEMENT;
204 break;
205 case INTERNAL_INTAG_ATTLIST:
206 state = INTAG_ATTLIST;
207 break;
208 default:
209 if (initialTokenType<-1024) { // INTERNAL_IN_COMMENT - prevState
210 int main = -(-initialTokenType & 0xffffff00);
211 switch (main) {
212 default: // Should never happen
213 case INTERNAL_IN_COMMENT:
214 state = COMMENT;
215 break;
216 }
217 prevState = -initialTokenType&0xff;
218 }
219 else { // Shouldn't happen
220 state = YYINITIAL;
221 }
222 }
223
224 start = text.offset;
225 s = text;
226 try {
227 yyreset(zzReader);
228 yybegin(state);
229 return yylex();
230 } catch (IOException ioe) {
231 ioe.printStackTrace();
232 return new DefaultToken();
233 }
234
235 }
236
237
238 /**
239 * Refills the input buffer.
240 *
241 * @return <code>true</code> if EOF was reached, otherwise
242 * <code>false</code>.
243 */
244 private boolean zzRefill() {
245 return zzCurrentPos>=s.offset+s.count;
246 }
247
248
249 /**
250 * Resets the scanner to read from a new input stream.
251 * Does not close the old reader.
252 *
253 * All internal variables are reset, the old input stream
254 * <b>cannot</b> be reused (internal buffer is discarded and lost).
255 * Lexical state is set to <tt>YY_INITIAL</tt>.
256 *
257 * @param reader the new input stream
258 */
259 public final void yyreset(java.io.Reader reader) {
260 // 's' has been updated.
261 zzBuffer = s.array;
262 /*
263 * We replaced the line below with the two below it because zzRefill
264 * no longer "refills" the buffer (since the way we do it, it's always
265 * "full" the first time through, since it points to the segment's
266 * array). So, we assign zzEndRead here.
267 */
268 //zzStartRead = zzEndRead = s.offset;
269 zzStartRead = s.offset;
270 zzEndRead = zzStartRead + s.count - 1;
271 zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
272 zzLexicalState = YYINITIAL;
273 zzReader = reader;
274 zzAtBOL = true;
275 zzAtEOF = false;
276 }
277
278
279%}
280
281Whitespace = ([ \t\f])
282LineTerminator = ([\n])
283UnclosedString = ([\"][^\"]*)
284UnclosedChar = ([\'][^\']*)
285
286URLGenDelim = ([:\/\?#\[\]@])
287URLSubDelim = ([\!\$&'\(\)\*\+,;=])
288URLUnreserved = ([A-Za-z_0-9\-\.\~])
289URLCharacter = ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%])
290URLCharacters = ({URLCharacter}*)
291URLEndCharacter = ([\/\$A-Za-z0-9])
292URL = (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?)
293
294%state COMMENT
295%state INTAG_START
296%state INTAG_ELEMENT
297%state INTAG_ATTLIST
298
299%%
300
301<YYINITIAL> {
302 ([^ \t\f<]+) { /* Not really valid */ addToken(Token.IDENTIFIER); }
303 "<!--" { start = zzStartRead; prevState = zzLexicalState; yybegin(COMMENT); }
304 "<!" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(INTAG_START); }
305 "<" { addToken(Token.IDENTIFIER); }
306 {Whitespace}+ { addToken(Token.WHITESPACE); }
307 {LineTerminator} |
308 <<EOF>> { addNullToken(); return firstToken; }
309}
310
311<COMMENT> {
312 [^hwf\n\-]+ {}
313 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
314 [hwf] {}
315 "-->" { int temp = zzMarkedPos; addToken(start,zzStartRead+2, Token.COMMENT_MULTILINE); start = temp; yybegin(prevState); }
316 "-" {}
317 {LineTerminator} |
318 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_COMMENT - prevState); return firstToken; }
319}
320
321<INTAG_START> {
322 ("ELEMENT") { addToken(Token.MARKUP_TAG_NAME); yybegin(INTAG_ELEMENT); }
323 ("ATTLIST") { addToken(Token.MARKUP_TAG_NAME); yybegin(INTAG_ATTLIST); }
324 ([^ \t\f>]+) { addToken(Token.IDENTIFIER); }
325 {Whitespace}+ { addToken(Token.WHITESPACE); }
326 (">") { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(YYINITIAL); }
327 <<EOF>> { addEndToken(INTERNAL_INTAG_START); return firstToken; }
328}
329
330<INTAG_ELEMENT> {
331 ([^ \t\f>]+) { addToken(Token.MARKUP_TAG_ATTRIBUTE); }
332 {Whitespace}+ { addToken(Token.WHITESPACE); }
333 (">") { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(YYINITIAL); }
334 <<EOF>> { addEndToken(INTERNAL_INTAG_ELEMENT); return firstToken; }
335}
336
337<INTAG_ATTLIST> {
338 ("CDATA"|"#IMPLIED"|"#REQUIRED") { addToken(Token.MARKUP_PROCESSING_INSTRUCTION); }
339 ([^ \t\f>\"\']+) { addToken(Token.MARKUP_TAG_ATTRIBUTE); }
340 ({UnclosedString}[\"]?) { addToken(Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
341 ({UnclosedChar}[\']?) { addToken(Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
342 {Whitespace}+ { addToken(Token.WHITESPACE); }
343 (">") { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(YYINITIAL); }
344 <<EOF>> { addEndToken(INTERNAL_INTAG_ATTLIST); return firstToken; }
345}
Note: See TracBrowser for help on using the repository browser.