source: other-projects/rsyntax-textarea/src/java/org/fife/ui/rsyntaxtextarea/modes/XMLTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago

Initial cut an a text edit area for GLI that supports color syntax highlighting

File size: 15.1 KB
Line 
1/*
2 * 01/24/2005
3 *
4 * XMLTokenMaker.java - Generates tokens for XML syntax highlighting.
5 *
6 * This library is distributed under a modified BSD license. See the included
7 * RSyntaxTextArea.License.txt file for details.
8 */
9package org.fife.ui.rsyntaxtextarea.modes;
10
11import java.io.*;
12import javax.swing.text.Segment;
13
14import org.fife.ui.rsyntaxtextarea.*;
15
16
17/**
18 * Scanner for XML.
19 *
20 * This implementation was created using
21 * <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
22 * was modified for performance. Memory allocation needs to be almost
23 * completely removed to be competitive with the handwritten lexers (subclasses
24 * of <code>AbstractTokenMaker</code>, so this class has been modified so that
25 * Strings are never allocated (via yytext()), and the scanner never has to
26 * worry about refilling its buffer (needlessly copying chars around).
27 * We can achieve this because RText always scans exactly 1 line of tokens at a
28 * time, and hands the scanner this line as an array of characters (a Segment
29 * really). Since tokens contain pointers to char arrays instead of Strings
30 * holding their contents, there is no need for allocating new memory for
31 * Strings.<p>
32 *
33 * The actual algorithm generated for scanning has, of course, not been
34 * modified.<p>
35 *
36 * If you wish to regenerate this file yourself, keep in mind the following:
37 * <ul>
38 * <li>The generated <code>XMLTokenMaker.java</code> file will contain two
39 * definitions of both <code>zzRefill</code> and <code>yyreset</code>.
40 * You should hand-delete the second of each definition (the ones
41 * generated by the lexer), as these generated methods modify the input
42 * buffer, which we'll never have to do.</li>
43 * <li>You should also change the declaration/definition of zzBuffer to NOT
44 * be initialized. This is a needless memory allocation for us since we
45 * will be pointing the array somewhere else anyway.</li>
46 * <li>You should NOT call <code>yylex()</code> on the generated scanner
47 * directly; rather, you should use <code>getTokenList</code> as you would
48 * with any other <code>TokenMaker</code> instance.</li>
49 * </ul>
50 *
51 * @author Robert Futrell
52 * @version 0.5
53 *
54 */
55%%
56
57%public
58%class XMLTokenMaker
59%extends AbstractMarkupTokenMaker
60%unicode
61%type org.fife.ui.rsyntaxtextarea.Token
62
63
64%{
65
66 /**
67 * Type specific to XMLTokenMaker denoting a line ending with an unclosed
68 * double-quote attribute.
69 */
70 public static final int INTERNAL_ATTR_DOUBLE = -1;
71
72
73 /**
74 * Type specific to XMLTokenMaker denoting a line ending with an unclosed
75 * single-quote attribute.
76 */
77 public static final int INTERNAL_ATTR_SINGLE = -2;
78
79
80 /**
81 * Token type specific to XMLTokenMaker denoting a line ending with an
82 * unclosed XML tag; thus a new line is beginning still inside of the tag.
83 */
84 public static final int INTERNAL_INTAG = -3;
85
86 /**
87 * Token type specific to XMLTokenMaker denoting a line ending with an
88 * unclosed DOCTYPE element.
89 */
90 public static final int INTERNAL_DTD = -4;
91
92 /**
93 * Token type specific to XMLTokenMaker denoting a line ending with an
94 * unclosed, locally-defined DTD in a DOCTYPE element.
95 */
96 public static final int INTERNAL_DTD_INTERNAL = -5;
97
98 /**
99 * Token type specific to XMLTokenMaker denoting a line ending with an
100 * unclosed comment. The state to return to when this comment ends is
101 * embedded in the token type as well.
102 */
103 public static final int INTERNAL_IN_XML_COMMENT = -(1<<11);
104
105 /**
106 * Whether closing markup tags are automatically completed for HTML.
107 */
108 private static boolean completeCloseTags;
109
110 /**
111 * Whether the DTD we're currently in is a locally-defined one. This
112 * field is only valid when in a DOCTYPE element (the <DTD> state).
113 */
114 private boolean inInternalDtd;
115
116 /**
117 * The state we were in prior to the current one. This is used to know
118 * what state to resume after an MLC ends.
119 */
120 private int prevState;
121
122
123 /**
124 * Constructor. This must be here because JFlex does not generate a
125 * no-parameter constructor.
126 */
127 public XMLTokenMaker() {
128 }
129
130
131 static {
132 completeCloseTags = true;
133 }
134
135
136 /**
137 * Adds the token specified to the current linked list of tokens as an
138 * "end token;" that is, at <code>zzMarkedPos</code>.
139 *
140 * @param tokenType The token's type.
141 */
142 private void addEndToken(int tokenType) {
143 addToken(zzMarkedPos,zzMarkedPos, tokenType);
144 }
145
146
147 /**
148 * Adds the token specified to the current linked list of tokens.
149 *
150 * @param tokenType The token's type.
151 * @see #addToken(int, int, int)
152 */
153 private void addHyperlinkToken(int start, int end, int tokenType) {
154 int so = start + offsetShift;
155 addToken(zzBuffer, start,end, tokenType, so, true);
156 }
157
158
159 /**
160 * Adds the token specified to the current linked list of tokens.
161 *
162 * @param tokenType The token's type.
163 */
164 private void addToken(int tokenType) {
165 addToken(zzStartRead, zzMarkedPos-1, tokenType);
166 }
167
168
169 /**
170 * Adds the token specified to the current linked list of tokens.
171 *
172 * @param tokenType The token's type.
173 */
174 private void addToken(int start, int end, int tokenType) {
175 int so = start + offsetShift;
176 addToken(zzBuffer, start,end, tokenType, so);
177 }
178
179
180 /**
181 * Adds the token specified to the current linked list of tokens.
182 *
183 * @param array The character array.
184 * @param start The starting offset in the array.
185 * @param end The ending offset in the array.
186 * @param tokenType The token's type.
187 * @param startOffset The offset in the document at which this token
188 * occurs.
189 */
190 public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
191 super.addToken(array, start,end, tokenType, startOffset);
192 zzStartRead = zzMarkedPos;
193 }
194
195
196 /**
197 * Returns whether markup close tags should be completed. For XML, the
198 * default value is <code>true</code>.
199 *
200 * @return Whether closing markup tags are completed.
201 * @see #setCompleteCloseTags(boolean)
202 */
203 public boolean getCompleteCloseTags() {
204 return completeCloseTags;
205 }
206
207
208 /**
209 * Static version of {@link #getCompleteCloseTags()}. This hack is
210 * unfortunately needed for applications to be able to query this value
211 * without instantiating this class.
212 *
213 * @return Whether closing markup tags are completed.
214 * @see #setCompleteCloseTags(boolean)
215 */
216 public static boolean getCompleteCloseMarkupTags() {
217 return completeCloseTags;
218 }
219
220
221 /**
222 * Always returns <tt>false</tt>, as you never want "mark occurrences"
223 * working in XML files.
224 *
225 * @param type The token type.
226 * @return Whether tokens of this type should have "mark occurrences"
227 * enabled.
228 */
229 public boolean getMarkOccurrencesOfTokenType(int type) {
230 return false;
231 }
232
233
234 /**
235 * Returns the first token in the linked list of tokens generated
236 * from <code>text</code>. This method must be implemented by
237 * subclasses so they can correctly implement syntax highlighting.
238 *
239 * @param text The text from which to get tokens.
240 * @param initialTokenType The token type we should start with.
241 * @param startOffset The offset into the document at which
242 * <code>text</code> starts.
243 * @return The first <code>Token</code> in a linked list representing
244 * the syntax highlighted text.
245 */
246 public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
247
248 resetTokenList();
249 this.offsetShift = -text.offset + startOffset;
250 prevState = YYINITIAL;
251 inInternalDtd = false;
252
253 // Start off in the proper state.
254 int state = Token.NULL;
255 switch (initialTokenType) {
256 case Token.COMMENT_MULTILINE:
257 state = COMMENT;
258 break;
259 case INTERNAL_DTD:
260 state = DTD;
261 break;
262 case INTERNAL_DTD_INTERNAL:
263 state = DTD;
264 inInternalDtd = true;
265 break;
266 case INTERNAL_ATTR_DOUBLE:
267 state = INATTR_DOUBLE;
268 break;
269 case INTERNAL_ATTR_SINGLE:
270 state = INATTR_SINGLE;
271 break;
272 case Token.MARKUP_PROCESSING_INSTRUCTION:
273 state = PI;
274 break;
275 case INTERNAL_INTAG:
276 state = INTAG;
277 break;
278 case Token.MARKUP_CDATA:
279 state = CDATA;
280 break;
281 default:
282 if (initialTokenType<-1024) { // INTERNAL_IN_XML_COMMENT - prevState
283 int main = -(-initialTokenType & 0xffffff00);
284 switch (main) {
285 default: // Should never happen
286 case INTERNAL_IN_XML_COMMENT:
287 state = COMMENT;
288 break;
289 }
290 prevState = -initialTokenType&0xff;
291 }
292 else { // Shouldn't happen
293 state = Token.NULL;
294 }
295 }
296
297 start = text.offset;
298 s = text;
299 try {
300 yyreset(zzReader);
301 yybegin(state);
302 return yylex();
303 } catch (IOException ioe) {
304 ioe.printStackTrace();
305 return new DefaultToken();
306 }
307
308 }
309
310
311 /**
312 * Sets whether markup close tags should be completed.
313 *
314 * @param complete Whether closing markup tags are completed.
315 * @see #getCompleteCloseTags()
316 */
317 public static void setCompleteCloseTags(boolean complete) {
318 completeCloseTags = complete;
319 }
320
321
322 /**
323 * Refills the input buffer.
324 *
325 * @return <code>true</code> if EOF was reached, otherwise
326 * <code>false</code>.
327 */
328 private boolean zzRefill() {
329 return zzCurrentPos>=s.offset+s.count;
330 }
331
332
333 /**
334 * Resets the scanner to read from a new input stream.
335 * Does not close the old reader.
336 *
337 * All internal variables are reset, the old input stream
338 * <b>cannot</b> be reused (internal buffer is discarded and lost).
339 * Lexical state is set to <tt>YY_INITIAL</tt>.
340 *
341 * @param reader the new input stream
342 */
343 public final void yyreset(java.io.Reader reader) {
344 // 's' has been updated.
345 zzBuffer = s.array;
346 /*
347 * We replaced the line below with the two below it because zzRefill
348 * no longer "refills" the buffer (since the way we do it, it's always
349 * "full" the first time through, since it points to the segment's
350 * array). So, we assign zzEndRead here.
351 */
352 //zzStartRead = zzEndRead = s.offset;
353 zzStartRead = s.offset;
354 zzEndRead = zzStartRead + s.count - 1;
355 zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
356 zzLexicalState = YYINITIAL;
357 zzReader = reader;
358 zzAtBOL = true;
359 zzAtEOF = false;
360 }
361
362
363%}
364
365NameStartChar = ([\:A-Z_a-z])
366NameChar = ({NameStartChar}|[\-\.0-9])
367TagName = ({NameStartChar}{NameChar}*)
368Whitespace = ([ \t\f])
369LineTerminator = ([\n])
370Identifier = ([^ \t\n<&]+)
371AmperItem = ([&][^; \t]*[;]?)
372InTagIdentifier = ([^ \t\n\"\'=\/>]+)
373CDataBegin = ("<![CDATA[")
374CDataEnd = ("]]>")
375
376URLGenDelim = ([:\/\?#\[\]@])
377URLSubDelim = ([\!\$&'\(\)\*\+,;=])
378URLUnreserved = ([A-Za-z_0-9\-\.\~])
379URLCharacter = ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%])
380URLCharacters = ({URLCharacter}*)
381URLEndCharacter = ([\/\$A-Za-z0-9])
382URL = (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?)
383
384%state COMMENT
385%state PI
386%state DTD
387%state INTAG
388%state INATTR_DOUBLE
389%state INATTR_SINGLE
390%state CDATA
391
392%%
393
394<YYINITIAL> {
395 "<!--" { start = zzStartRead; prevState = zzLexicalState; yybegin(COMMENT); }
396 {CDataBegin} { addToken(Token.DATA_TYPE); start = zzMarkedPos; yybegin(CDATA); }
397 "<!" { start = zzMarkedPos-2; inInternalDtd = false; yybegin(DTD); }
398 "<?" { start = zzMarkedPos-2; yybegin(PI); }
399 "<"{TagName} {
400 int count = yylength();
401 addToken(zzStartRead,zzStartRead, Token.MARKUP_TAG_DELIMITER);
402 addToken(zzMarkedPos-(count-1), zzMarkedPos-1, Token.MARKUP_TAG_NAME);
403 yybegin(INTAG);
404 }
405 "</"{TagName} {
406 int count = yylength();
407 addToken(zzStartRead,zzStartRead+1, Token.MARKUP_TAG_DELIMITER);
408 addToken(zzMarkedPos-(count-2), zzMarkedPos-1, Token.MARKUP_TAG_NAME);
409 yybegin(INTAG);
410 }
411 "<" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(INTAG); }
412 "</" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(INTAG); }
413 {LineTerminator} { addNullToken(); return firstToken; }
414 {Identifier} { addToken(Token.IDENTIFIER); }
415 {AmperItem} { addToken(Token.DATA_TYPE); }
416 {Whitespace}+ { addToken(Token.WHITESPACE); }
417 <<EOF>> { addNullToken(); return firstToken; }
418}
419
420<COMMENT> {
421 [^hwf\n\-]+ {}
422 {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
423 [hwf] {}
424 "-->" { int temp = zzMarkedPos; addToken(start,zzStartRead+2, Token.COMMENT_MULTILINE); start = temp; yybegin(prevState); }
425 "-" {}
426 {LineTerminator} |
427 <<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_XML_COMMENT - prevState); return firstToken; }
428}
429
430<PI> {
431 [^\n\?]+ {}
432 {LineTerminator} { addToken(start,zzStartRead-1, Token.MARKUP_PROCESSING_INSTRUCTION); return firstToken; }
433 "?>" { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.MARKUP_PROCESSING_INSTRUCTION); }
434 "?" {}
435 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_PROCESSING_INSTRUCTION); return firstToken; }
436}
437
438<DTD> {
439 [^\n\[\]<>]+ {}
440 "<!--" { int temp = zzStartRead; addToken(start,zzStartRead-1, Token.FUNCTION); start = temp; prevState = zzLexicalState; yybegin(COMMENT); }
441 "<" {}
442 "[" { inInternalDtd = true; }
443 "]" { inInternalDtd = false; }
444 ">" { if (!inInternalDtd) { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.FUNCTION); } }
445 {LineTerminator} |
446 <<EOF>> { addToken(start,zzStartRead-1, Token.FUNCTION); addEndToken(inInternalDtd ? INTERNAL_DTD_INTERNAL : INTERNAL_DTD); return firstToken; }
447}
448
449<INTAG> {
450 {InTagIdentifier} { addToken(Token.MARKUP_TAG_ATTRIBUTE); }
451 {Whitespace}+ { addToken(Token.WHITESPACE); }
452 "=" { addToken(Token.OPERATOR); }
453 "/" { addToken(Token.MARKUP_TAG_DELIMITER); /* Not valid but we'll still accept it */ }
454 "/>" { yybegin(YYINITIAL); addToken(Token.MARKUP_TAG_DELIMITER); }
455 ">" { yybegin(YYINITIAL); addToken(Token.MARKUP_TAG_DELIMITER); }
456 [\"] { start = zzMarkedPos-1; yybegin(INATTR_DOUBLE); }
457 [\'] { start = zzMarkedPos-1; yybegin(INATTR_SINGLE); }
458 <<EOF>> { addToken(start,zzStartRead-1, INTERNAL_INTAG); return firstToken; }
459}
460
461<INATTR_DOUBLE> {
462 [^\"]* {}
463 [\"] { yybegin(INTAG); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
464 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_DOUBLE); return firstToken; }
465}
466
467<INATTR_SINGLE> {
468 [^\']* {}
469 [\'] { yybegin(INTAG); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
470 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_SINGLE); return firstToken; }
471}
472
473<CDATA> {
474 [^\]]+ {}
475 {CDataEnd} { int temp=zzStartRead; yybegin(YYINITIAL); addToken(start,zzStartRead-1, Token.MARKUP_CDATA); addToken(temp,zzMarkedPos-1, Token.DATA_TYPE); }
476 "]" {}
477 <<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_CDATA); return firstToken; }
478}
Note: See TracBrowser for help on using the repository browser.