Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

XMLTokenMaker.flex@ 25584

Last change on this file since 25584 was 25584, checked in by davidb, 12 years ago
Initial cut an a text edit area for GLI that supports color syntax highlighting
File size: 15.1 KB

Line
1	/*
2	* 01/24/2005
3	*
4	* XMLTokenMaker.java - Generates tokens for XML syntax highlighting.
5	*
6	* This library is distributed under a modified BSD license. See the included
7	* RSyntaxTextArea.License.txt file for details.
8	*/
9	package org.fife.ui.rsyntaxtextarea.modes;
10
11	import java.io.*;
12	import javax.swing.text.Segment;
13
14	import org.fife.ui.rsyntaxtextarea.*;
15
16
17	/**
18	* Scanner for XML.
19	*
20	* This implementation was created using
21	* <a href="http://www.jflex.de/">JFlex</a> 1.4.1; however, the generated file
22	* was modified for performance. Memory allocation needs to be almost
23	* completely removed to be competitive with the handwritten lexers (subclasses
24	* of <code>AbstractTokenMaker</code>, so this class has been modified so that
25	* Strings are never allocated (via yytext()), and the scanner never has to
26	* worry about refilling its buffer (needlessly copying chars around).
27	* We can achieve this because RText always scans exactly 1 line of tokens at a
28	* time, and hands the scanner this line as an array of characters (a Segment
29	* really). Since tokens contain pointers to char arrays instead of Strings
30	* holding their contents, there is no need for allocating new memory for
31	* Strings.<p>
32	*
33	* The actual algorithm generated for scanning has, of course, not been
34	* modified.<p>
35	*
36	* If you wish to regenerate this file yourself, keep in mind the following:
37	* <ul>
38	* <li>The generated <code>XMLTokenMaker.java</code> file will contain two
39	* definitions of both <code>zzRefill</code> and <code>yyreset</code>.
40	* You should hand-delete the second of each definition (the ones
41	* generated by the lexer), as these generated methods modify the input
42	* buffer, which we'll never have to do.</li>
43	* <li>You should also change the declaration/definition of zzBuffer to NOT
44	* be initialized. This is a needless memory allocation for us since we
45	* will be pointing the array somewhere else anyway.</li>
46	* <li>You should NOT call <code>yylex()</code> on the generated scanner
47	* directly; rather, you should use <code>getTokenList</code> as you would
48	* with any other <code>TokenMaker</code> instance.</li>
49	* </ul>
50	*
51	* @author Robert Futrell
52	* @version 0.5
53	*
54	*/
55	%%
56
57	%public
58	%class XMLTokenMaker
59	%extends AbstractMarkupTokenMaker
60	%unicode
61	%type org.fife.ui.rsyntaxtextarea.Token
62
63
64	%{
65
66	/**
67	* Type specific to XMLTokenMaker denoting a line ending with an unclosed
68	* double-quote attribute.
69	*/
70	public static final int INTERNAL_ATTR_DOUBLE = -1;
71
72
73	/**
74	* Type specific to XMLTokenMaker denoting a line ending with an unclosed
75	* single-quote attribute.
76	*/
77	public static final int INTERNAL_ATTR_SINGLE = -2;
78
79
80	/**
81	* Token type specific to XMLTokenMaker denoting a line ending with an
82	* unclosed XML tag; thus a new line is beginning still inside of the tag.
83	*/
84	public static final int INTERNAL_INTAG = -3;
85
86	/**
87	* Token type specific to XMLTokenMaker denoting a line ending with an
88	* unclosed DOCTYPE element.
89	*/
90	public static final int INTERNAL_DTD = -4;
91
92	/**
93	* Token type specific to XMLTokenMaker denoting a line ending with an
94	* unclosed, locally-defined DTD in a DOCTYPE element.
95	*/
96	public static final int INTERNAL_DTD_INTERNAL = -5;
97
98	/**
99	* Token type specific to XMLTokenMaker denoting a line ending with an
100	* unclosed comment. The state to return to when this comment ends is
101	* embedded in the token type as well.
102	*/
103	public static final int INTERNAL_IN_XML_COMMENT = -(1<<11);
104
105	/**
106	* Whether closing markup tags are automatically completed for HTML.
107	*/
108	private static boolean completeCloseTags;
109
110	/**
111	* Whether the DTD we're currently in is a locally-defined one. This
112	* field is only valid when in a DOCTYPE element (the <DTD> state).
113	*/
114	private boolean inInternalDtd;
115
116	/**
117	* The state we were in prior to the current one. This is used to know
118	* what state to resume after an MLC ends.
119	*/
120	private int prevState;
121
122
123	/**
124	* Constructor. This must be here because JFlex does not generate a
125	* no-parameter constructor.
126	*/
127	public XMLTokenMaker() {
128	}
129
130
131	static {
132	completeCloseTags = true;
133	}
134
135
136	/**
137	* Adds the token specified to the current linked list of tokens as an
138	* "end token;" that is, at <code>zzMarkedPos</code>.
139	*
140	* @param tokenType The token's type.
141	*/
142	private void addEndToken(int tokenType) {
143	addToken(zzMarkedPos,zzMarkedPos, tokenType);
144	}
145
146
147	/**
148	* Adds the token specified to the current linked list of tokens.
149	*
150	* @param tokenType The token's type.
151	* @see #addToken(int, int, int)
152	*/
153	private void addHyperlinkToken(int start, int end, int tokenType) {
154	int so = start + offsetShift;
155	addToken(zzBuffer, start,end, tokenType, so, true);
156	}
157
158
159	/**
160	* Adds the token specified to the current linked list of tokens.
161	*
162	* @param tokenType The token's type.
163	*/
164	private void addToken(int tokenType) {
165	addToken(zzStartRead, zzMarkedPos-1, tokenType);
166	}
167
168
169	/**
170	* Adds the token specified to the current linked list of tokens.
171	*
172	* @param tokenType The token's type.
173	*/
174	private void addToken(int start, int end, int tokenType) {
175	int so = start + offsetShift;
176	addToken(zzBuffer, start,end, tokenType, so);
177	}
178
179
180	/**
181	* Adds the token specified to the current linked list of tokens.
182	*
183	* @param array The character array.
184	* @param start The starting offset in the array.
185	* @param end The ending offset in the array.
186	* @param tokenType The token's type.
187	* @param startOffset The offset in the document at which this token
188	* occurs.
189	*/
190	public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
191	super.addToken(array, start,end, tokenType, startOffset);
192	zzStartRead = zzMarkedPos;
193	}
194
195
196	/**
197	* Returns whether markup close tags should be completed. For XML, the
198	* default value is <code>true</code>.
199	*
200	* @return Whether closing markup tags are completed.
201	* @see #setCompleteCloseTags(boolean)
202	*/
203	public boolean getCompleteCloseTags() {
204	return completeCloseTags;
205	}
206
207
208	/**
209	* Static version of {@link #getCompleteCloseTags()}. This hack is
210	* unfortunately needed for applications to be able to query this value
211	* without instantiating this class.
212	*
213	* @return Whether closing markup tags are completed.
214	* @see #setCompleteCloseTags(boolean)
215	*/
216	public static boolean getCompleteCloseMarkupTags() {
217	return completeCloseTags;
218	}
219
220
221	/**
222	* Always returns <tt>false</tt>, as you never want "mark occurrences"
223	* working in XML files.
224	*
225	* @param type The token type.
226	* @return Whether tokens of this type should have "mark occurrences"
227	* enabled.
228	*/
229	public boolean getMarkOccurrencesOfTokenType(int type) {
230	return false;
231	}
232
233
234	/**
235	* Returns the first token in the linked list of tokens generated
236	* from <code>text</code>. This method must be implemented by
237	* subclasses so they can correctly implement syntax highlighting.
238	*
239	* @param text The text from which to get tokens.
240	* @param initialTokenType The token type we should start with.
241	* @param startOffset The offset into the document at which
242	* <code>text</code> starts.
243	* @return The first <code>Token</code> in a linked list representing
244	* the syntax highlighted text.
245	*/
246	public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
247
248	resetTokenList();
249	this.offsetShift = -text.offset + startOffset;
250	prevState = YYINITIAL;
251	inInternalDtd = false;
252
253	// Start off in the proper state.
254	int state = Token.NULL;
255	switch (initialTokenType) {
256	case Token.COMMENT_MULTILINE:
257	state = COMMENT;
258	break;
259	case INTERNAL_DTD:
260	state = DTD;
261	break;
262	case INTERNAL_DTD_INTERNAL:
263	state = DTD;
264	inInternalDtd = true;
265	break;
266	case INTERNAL_ATTR_DOUBLE:
267	state = INATTR_DOUBLE;
268	break;
269	case INTERNAL_ATTR_SINGLE:
270	state = INATTR_SINGLE;
271	break;
272	case Token.MARKUP_PROCESSING_INSTRUCTION:
273	state = PI;
274	break;
275	case INTERNAL_INTAG:
276	state = INTAG;
277	break;
278	case Token.MARKUP_CDATA:
279	state = CDATA;
280	break;
281	default:
282	if (initialTokenType<-1024) { // INTERNAL_IN_XML_COMMENT - prevState
283	int main = -(-initialTokenType & 0xffffff00);
284	switch (main) {
285	default: // Should never happen
286	case INTERNAL_IN_XML_COMMENT:
287	state = COMMENT;
288	break;
289	}
290	prevState = -initialTokenType&0xff;
291	}
292	else { // Shouldn't happen
293	state = Token.NULL;
294	}
295	}
296
297	start = text.offset;
298	s = text;
299	try {
300	yyreset(zzReader);
301	yybegin(state);
302	return yylex();
303	} catch (IOException ioe) {
304	ioe.printStackTrace();
305	return new DefaultToken();
306	}
307
308	}
309
310
311	/**
312	* Sets whether markup close tags should be completed.
313	*
314	* @param complete Whether closing markup tags are completed.
315	* @see #getCompleteCloseTags()
316	*/
317	public static void setCompleteCloseTags(boolean complete) {
318	completeCloseTags = complete;
319	}
320
321
322	/**
323	* Refills the input buffer.
324	*
325	* @return <code>true</code> if EOF was reached, otherwise
326	* <code>false</code>.
327	*/
328	private boolean zzRefill() {
329	return zzCurrentPos>=s.offset+s.count;
330	}
331
332
333	/**
334	* Resets the scanner to read from a new input stream.
335	* Does not close the old reader.
336	*
337	* All internal variables are reset, the old input stream
338	* <b>cannot</b> be reused (internal buffer is discarded and lost).
339	* Lexical state is set to <tt>YY_INITIAL</tt>.
340	*
341	* @param reader the new input stream
342	*/
343	public final void yyreset(java.io.Reader reader) {
344	// 's' has been updated.
345	zzBuffer = s.array;
346	/*
347	* We replaced the line below with the two below it because zzRefill
348	* no longer "refills" the buffer (since the way we do it, it's always
349	* "full" the first time through, since it points to the segment's
350	* array). So, we assign zzEndRead here.
351	*/
352	//zzStartRead = zzEndRead = s.offset;
353	zzStartRead = s.offset;
354	zzEndRead = zzStartRead + s.count - 1;
355	zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
356	zzLexicalState = YYINITIAL;
357	zzReader = reader;
358	zzAtBOL = true;
359	zzAtEOF = false;
360	}
361
362
363	%}
364
365	NameStartChar = ([\:A-Z_a-z])
366	NameChar = ({NameStartChar}\|[\-\.0-9])
367	TagName = ({NameStartChar}{NameChar}*)
368	Whitespace = ([ \t\f])
369	LineTerminator = ([\n])
370	Identifier = ([^ \t\n<&]+)
371	AmperItem = ([&][^; \t]*[;]?)
372	InTagIdentifier = ([^ \t\n\"\'=\/>]+)
373	CDataBegin = ("<![CDATA[")
374	CDataEnd = ("]]>")
375
376	URLGenDelim = ([:\/\?#\[\]@])
377	URLSubDelim = ([\!\$&'\*\+,;=])
378	URLUnreserved = ([A-Za-z_0-9\-\.\~])
379	URLCharacter = ({URLGenDelim}\|{URLSubDelim}\|{URLUnreserved}\|[%])
380	URLCharacters = ({URLCharacter}*)
381	URLEndCharacter = ([\/\$A-Za-z0-9])
382	URL = (((https?\|f(tp\|ile))"://"\|"www.")({URLCharacters}{URLEndCharacter})?)
383
384	%state COMMENT
385	%state PI
386	%state DTD
387	%state INTAG
388	%state INATTR_DOUBLE
389	%state INATTR_SINGLE
390	%state CDATA
391
392	%%
393
394	<YYINITIAL> {
395	"<!--" { start = zzStartRead; prevState = zzLexicalState; yybegin(COMMENT); }
396	{CDataBegin} { addToken(Token.DATA_TYPE); start = zzMarkedPos; yybegin(CDATA); }
397	"<!" { start = zzMarkedPos-2; inInternalDtd = false; yybegin(DTD); }
398	"<?" { start = zzMarkedPos-2; yybegin(PI); }
399	"<"{TagName} {
400	int count = yylength();
401	addToken(zzStartRead,zzStartRead, Token.MARKUP_TAG_DELIMITER);
402	addToken(zzMarkedPos-(count-1), zzMarkedPos-1, Token.MARKUP_TAG_NAME);
403	yybegin(INTAG);
404	}
405	"</"{TagName} {
406	int count = yylength();
407	addToken(zzStartRead,zzStartRead+1, Token.MARKUP_TAG_DELIMITER);
408	addToken(zzMarkedPos-(count-2), zzMarkedPos-1, Token.MARKUP_TAG_NAME);
409	yybegin(INTAG);
410	}
411	"<" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(INTAG); }
412	"</" { addToken(Token.MARKUP_TAG_DELIMITER); yybegin(INTAG); }
413	{LineTerminator} { addNullToken(); return firstToken; }
414	{Identifier} { addToken(Token.IDENTIFIER); }
415	{AmperItem} { addToken(Token.DATA_TYPE); }
416	{Whitespace}+ { addToken(Token.WHITESPACE); }
417	<<EOF>> { addNullToken(); return firstToken; }
418	}
419
420	<COMMENT> {
421	[^hwf\n\-]+ {}
422	{URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; }
423	[hwf] {}
424	"-->" { int temp = zzMarkedPos; addToken(start,zzStartRead+2, Token.COMMENT_MULTILINE); start = temp; yybegin(prevState); }
425	"-" {}
426	{LineTerminator} \|
427	<<EOF>> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addEndToken(INTERNAL_IN_XML_COMMENT - prevState); return firstToken; }
428	}
429
430	<PI> {
431	[^\n\?]+ {}
432	{LineTerminator} { addToken(start,zzStartRead-1, Token.MARKUP_PROCESSING_INSTRUCTION); return firstToken; }
433	"?>" { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.MARKUP_PROCESSING_INSTRUCTION); }
434	"?" {}
435	<<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_PROCESSING_INSTRUCTION); return firstToken; }
436	}
437
438	<DTD> {
439	[^\n\[\]<>]+ {}
440	"<!--" { int temp = zzStartRead; addToken(start,zzStartRead-1, Token.FUNCTION); start = temp; prevState = zzLexicalState; yybegin(COMMENT); }
441	"<" {}
442	"[" { inInternalDtd = true; }
443	"]" { inInternalDtd = false; }
444	">" { if (!inInternalDtd) { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.FUNCTION); } }
445	{LineTerminator} \|
446	<<EOF>> { addToken(start,zzStartRead-1, Token.FUNCTION); addEndToken(inInternalDtd ? INTERNAL_DTD_INTERNAL : INTERNAL_DTD); return firstToken; }
447	}
448
449	<INTAG> {
450	{InTagIdentifier} { addToken(Token.MARKUP_TAG_ATTRIBUTE); }
451	{Whitespace}+ { addToken(Token.WHITESPACE); }
452	"=" { addToken(Token.OPERATOR); }
453	"/" { addToken(Token.MARKUP_TAG_DELIMITER); /* Not valid but we'll still accept it */ }
454	"/>" { yybegin(YYINITIAL); addToken(Token.MARKUP_TAG_DELIMITER); }
455	">" { yybegin(YYINITIAL); addToken(Token.MARKUP_TAG_DELIMITER); }
456	[\"] { start = zzMarkedPos-1; yybegin(INATTR_DOUBLE); }
457	[\'] { start = zzMarkedPos-1; yybegin(INATTR_SINGLE); }
458	<<EOF>> { addToken(start,zzStartRead-1, INTERNAL_INTAG); return firstToken; }
459	}
460
461	<INATTR_DOUBLE> {
462	[^\"]* {}
463	[\"] { yybegin(INTAG); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
464	<<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_DOUBLE); return firstToken; }
465	}
466
467	<INATTR_SINGLE> {
468	[^\']* {}
469	[\'] { yybegin(INTAG); addToken(start,zzStartRead, Token.MARKUP_TAG_ATTRIBUTE_VALUE); }
470	<<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_TAG_ATTRIBUTE_VALUE); addEndToken(INTERNAL_ATTR_SINGLE); return firstToken; }
471	}
472
473	<CDATA> {
474	[^\]]+ {}
475	{CDataEnd} { int temp=zzStartRead; yybegin(YYINITIAL); addToken(start,zzStartRead-1, Token.MARKUP_CDATA); addToken(temp,zzMarkedPos-1, Token.DATA_TYPE); }
476	"]" {}
477	<<EOF>> { addToken(start,zzStartRead-1, Token.MARKUP_CDATA); return firstToken; }
478	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format