Context Navigation

source: trunk/gli/src/org/greenstone/gatherer/util/HTMLStringTokenizer.java@ 5581

Last change on this file since 5581 was 5581, checked in by mdewsnip, 21 years ago
Many formatting, structural and code improvements.
Property svn:keywords set to `Author Date Id Revision`
File size: 4.8 KB

Line
1	/**
2	*#########################################################################
3	*
4	* A component of the Gatherer application, part of the Greenstone digital
5	* library suite from the New Zealand Digital Library Project at the
6	* University of Waikato, New Zealand.
7	*
8	* <BR><BR>
9	*
10	* Author: John Thompson, Greenstone Digital Library, University of Waikato
11	*
12	* <BR><BR>
13	*
14	* Copyright (C) 1999 New Zealand Digital Library Project
15	*
16	* <BR><BR>
17	*
18	* This program is free software; you can redistribute it and/or modify
19	* it under the terms of the GNU General Public License as published by
20	* the Free Software Foundation; either version 2 of the License, or
21	* (at your option) any later version.
22	*
23	* <BR><BR>
24	*
25	* This program is distributed in the hope that it will be useful,
26	* but WITHOUT ANY WARRANTY; without even the implied warranty of
27	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28	* GNU General Public License for more details.
29	*
30	* <BR><BR>
31	*
32	* You should have received a copy of the GNU General Public License
33	* along with this program; if not, write to the Free Software
34	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35	*########################################################################
36	*/
37	package org.greenstone.gatherer.util;
38
39	/**
40	* Title: The Gatherer<br>
41	* Description: The Gatherer: a tool for gathering and enriching digital collections.<br>
42	* Copyright: Copyright (c) 2001<br>
43	* Company: The University of Waikato<br>
44	* @author John Thompson, Greenstone Digital Libraries
45	* @version 2.1
46	*/
47	import org.greenstone.gatherer.util.Utility;
48
49	/** This class functions much like a <strong>StringTokenizer</strong> in that it tokenizes a long string into tokens, however this tokenizer cleverly notices HTML formatting tags. */
50	public class HTMLStringTokenizer {
51	/** The current position in the source string. */
52	private int pos = 0;
53	/** The current token, usually created by the last nextToken call. */
54	private String current = null;
55	/** The previous token. */
56	private String previous = null;
57	/** The string to be tokenized, including any HTML markup. */
58	private String source = null;
59	/** Constructor.
60	* @param source The source <strong>String</strong> to be tokenized.
61	*/
62	public HTMLStringTokenizer(String source) {
63	this.source = source;
64	// Parse the first token.
65	parseToken();
66	}
67	/** Determines if there are still tokens remaining unparsed in the source.
68	* @return A <strong>boolean</strong> which is <i>true</i> if there are more tokens.
69	*/
70	public boolean hasMoreTokens() {
71	if(current != null && current.length() > 0) {
72	return true;
73	}
74	return false;
75	}
76	/** Determines if the tag currently being returned by sameToken is a tag.
77	* @return A <strong>boolean</strong> indicating if the token is a tag.
78	*/
79	public boolean isTag() {
80	if(previous.startsWith("<") && previous.endsWith(">")) {
81	return true;
82	}
83	return false;
84	}
85	/** Retrieves the next token.
86	* @return A <strong>String</strong> representing the token.
87	*/
88	public String nextToken() {
89	previous = current;
90	// Get the next token.
91	parseToken();
92	// Return previous.
93	return previous;
94	}
95	/** Repeats the result of the last <i>nextToken()</i>.
96	* @return A <strong>String</strong> representing the token.
97	*/
98	public String sameToken() {
99	return previous;
100	}
101	/** Parses the next token and stores it in current.
102	*/
103	private void parseToken() {
104	boolean found = false;
105	boolean tag = false;
106	boolean text = false;
107	// Reset current
108	current = "";
109	// Parse away
110	dumpWhiteSpace();
111	while(pos < source.length() && !found) {
112	char c = (char)source.charAt(pos);
113	if(!tag && !text) {
114	if(c == '<') {
115	tag = true;
116	}
117	else {
118	text = true;
119	}
120	current = current + c;
121	}
122	// Reading a tag. Watch only for '>'.
123	else if(tag) {
124	if(c == '>') {
125	found = true;
126	}
127	current = current + c;
128	}
129	// Reading text. Watch for ' ' and '<'. Rollback '<'.
130	else if(text) {
131	if(c == ' ') {
132	found = true;
133	}
134	else if(c == '<') {
135	found = true;
136	pos--;
137	}
138	else {
139	current = current + c;
140	}
141	}
142	pos++;
143	}
144	}
145	/** Method to ignore whitespace in the source.
146	*/
147	private void dumpWhiteSpace() {
148	while(pos < source.length() && source.charAt(pos) == ' ') {
149	pos++;
150	}
151	}
152
153	static public void main(String args[]) {
154	String init = "<HTML>Where material to be imported is found. Defaults to <i>GSDLHOME/collection/col_name/gimport</i></HTML>";
155	///ystem.err.println("Before: " + init);
156	String result = Utility.formatHTMLWidth(init, 40);
157	///ystem.err.println("After: " + result);
158	}
159	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: