Context Navigation

source: main/trunk/gli/src/org/greenstone/gatherer/util/HTMLStringTokenizer.java@ 31711

Last change on this file since 31711 was 9313, checked in by mdewsnip, 19 years ago
Removed main function, and changed to have Unix line endings.
Property svn:keywords set to `Author Date Id Revision`
File size: 3.9 KB

Line
1	/**
2	*#########################################################################
3	*
4	* A component of the Gatherer application, part of the Greenstone digital
5	* library suite from the New Zealand Digital Library Project at the
6	* University of Waikato, New Zealand.
7	*
8	* <BR><BR>
9	*
10	* Author: John Thompson, Greenstone Digital Library, University of Waikato
11	*
12	* <BR><BR>
13	*
14	* Copyright (C) 1999 New Zealand Digital Library Project
15	*
16	* <BR><BR>
17	*
18	* This program is free software; you can redistribute it and/or modify
19	* it under the terms of the GNU General Public License as published by
20	* the Free Software Foundation; either version 2 of the License, or
21	* (at your option) any later version.
22	*
23	* <BR><BR>
24	*
25	* This program is distributed in the hope that it will be useful,
26	* but WITHOUT ANY WARRANTY; without even the implied warranty of
27	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28	* GNU General Public License for more details.
29	*
30	* <BR><BR>
31	*
32	* You should have received a copy of the GNU General Public License
33	* along with this program; if not, write to the Free Software
34	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35	*########################################################################
36	*/
37	package org.greenstone.gatherer.util;
38
39
40	/** This class functions much like a <strong>StringTokenizer</strong> in that it tokenizes a long string into tokens, however this tokenizer cleverly notices HTML formatting tags. */
41	public class HTMLStringTokenizer {
42	/** The current position in the source string. */
43	private int pos = 0;
44	/** The current token, usually created by the last nextToken call. */
45	private String current = null;
46	/** The previous token. */
47	private String previous = null;
48	/** The string to be tokenized, including any HTML markup. */
49	private String source = null;
50	/** Constructor.
51	* @param source The source <strong>String</strong> to be tokenized.
52	*/
53	public HTMLStringTokenizer(String source) {
54	this.source = source;
55	// Parse the first token.
56	parseToken();
57	}
58
59	/** Determines if there are still tokens remaining unparsed in the source.
60	* @return A <strong>boolean</strong> which is <i>true</i> if there are more tokens.
61	*/
62	public boolean hasMoreTokens() {
63	if(current != null && current.length() > 0) {
64	return true;
65	}
66	return false;
67	}
68
69	/** Determines if the tag currently being returned by sameToken is a tag.
70	* @return A <strong>boolean</strong> indicating if the token is a tag.
71	*/
72	public boolean isTag() {
73	if(previous.startsWith("<") && previous.endsWith(">")) {
74	return true;
75	}
76	return false;
77	}
78
79	/** Retrieves the next token.
80	* @return A <strong>String</strong> representing the token.
81	*/
82	public String nextToken() {
83	previous = current;
84	// Get the next token.
85	parseToken();
86	// Return previous.
87	return previous;
88	}
89
90	/** Parses the next token and stores it in current.
91	*/
92	private void parseToken() {
93	boolean found = false;
94	boolean tag = false;
95	boolean text = false;
96	// Reset current
97	current = "";
98	// Parse away
99	dumpWhiteSpace();
100	while(pos < source.length() && !found) {
101	char c = (char)source.charAt(pos);
102	if(!tag && !text) {
103	if(c == '<') {
104	tag = true;
105	}
106	else {
107	text = true;
108	}
109	current = current + c;
110	}
111	// Reading a tag. Watch only for '>'.
112	else if(tag) {
113	if(c == '>') {
114	found = true;
115	}
116	current = current + c;
117	}
118	// Reading text. Watch for ' ' and '<'. Rollback '<'.
119	else if(text) {
120	if(c == ' ') {
121	found = true;
122	}
123	else if(c == '<') {
124	found = true;
125	pos--;
126	}
127	else {
128	current = current + c;
129	}
130	}
131	pos++;
132	}
133	}
134
135	/** Method to ignore whitespace in the source.
136	*/
137	private void dumpWhiteSpace() {
138	while(pos < source.length() && source.charAt(pos) == ' ') {
139	pos++;
140	}
141	}
142	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: