Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: trunk/gli/src/org/greenstone/gatherer/util/HTMLStringTokenizer.java@ 4293

Last change on this file since 4293 was 4293, checked in by jmt12, 21 years ago
Initial revision
Property svn:keywords set to `Author Date Id Revision`
File size: 4.8 KB

Line
1	/**
2	*#########################################################################
3	*
4	* A component of the Gatherer application, part of the Greenstone digital
5	* library suite from the New Zealand Digital Library Project at the
6	* University of Waikato, New Zealand.
7	*
8	* <BR><BR>
9	*
10	* Author: John Thompson, Greenstone Digital Library, University of Waikato
11	*
12	* <BR><BR>
13	*
14	* Copyright (C) 1999 New Zealand Digital Library Project
15	*
16	* <BR><BR>
17	*
18	* This program is free software; you can redistribute it and/or modify
19	* it under the terms of the GNU General Public License as published by
20	* the Free Software Foundation; either version 2 of the License, or
21	* (at your option) any later version.
22	*
23	* <BR><BR>
24	*
25	* This program is distributed in the hope that it will be useful,
26	* but WITHOUT ANY WARRANTY; without even the implied warranty of
27	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28	* GNU General Public License for more details.
29	*
30	* <BR><BR>
31	*
32	* You should have received a copy of the GNU General Public License
33	* along with this program; if not, write to the Free Software
34	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35	*########################################################################
36	*/
37
38
39
40
41
42
43	package org.greenstone.gatherer.util;
44	/**
45	* Title: The Gatherer<br>
46	* Description: The Gatherer: a tool for gathering and enriching digital collections.<br>
47	* Copyright: Copyright (c) 2001<br>
48	* Company: The University of Waikato<br>
49	* @author John Thompson, Greenstone Digital Libraries
50	* @version 2.1
51	*/
52	import java.util.Stack;
53	import org.greenstone.gatherer.util.Utility;
54	/** This class functions much like a <strong>StringTokenizer</strong> in that it tokenizes a long string into tokens, however this tokenizer cleverly notices HTML formatting tags. */
55	public class HTMLStringTokenizer {
56	/** The current position in the source string. */
57	private int pos = 0;
58	/** The current token, usually created by the last nextToken call. */
59	private String current = null;
60	/** The previous token. */
61	private String previous = null;
62	/** The string to be tokenized, including any HTML markup. */
63	private String source = null;
64	/** Constructor.
65	* @param source The source <strong>String</strong> to be tokenized.
66	*/
67	public HTMLStringTokenizer(String source) {
68	this.source = source;
69	// Parse the first token.
70	parseToken();
71	}
72	/** Determines if there are still tokens remaining unparsed in the source.
73	* @return A <strong>boolean</strong> which is <i>true</i> if there are more tokens.
74	*/
75	public boolean hasMoreTokens() {
76	if(current != null && current.length() > 0) {
77	return true;
78	}
79	return false;
80	}
81	/** Determines if the tag currently being returned by sameToken is a tag.
82	* @return A <strong>boolean</strong> indicating if the token is a tag.
83	*/
84	public boolean isTag() {
85	if(previous.startsWith("<") && previous.endsWith(">")) {
86	return true;
87	}
88	return false;
89	}
90	/** Retrieves the next token.
91	* @return A <strong>String</strong> representing the token.
92	*/
93	public String nextToken() {
94	previous = current;
95	// Get the next token.
96	parseToken();
97	// Return previous.
98	return previous;
99	}
100	/** Repeats the result of the last <i>nextToken()</i>.
101	* @return A <strong>String</strong> representing the token.
102	*/
103	public String sameToken() {
104	return previous;
105	}
106	/** Parses the next token and stores it in current.
107	*/
108	private void parseToken() {
109	boolean found = false;
110	boolean tag = false;
111	boolean text = false;
112	// Reset current
113	current = "";
114	// Parse away
115	dumpWhiteSpace();
116	while(pos < source.length() && !found) {
117	char c = (char)source.charAt(pos);
118	if(!tag && !text) {
119	if(c == '<') {
120	tag = true;
121	}
122	else {
123	text = true;
124	}
125	current = current + c;
126	}
127	// Reading a tag. Watch only for '>'.
128	else if(tag) {
129	if(c == '>') {
130	found = true;
131	}
132	current = current + c;
133	}
134	// Reading text. Watch for ' ' and '<'. Rollback '<'.
135	else if(text) {
136	if(c == ' ') {
137	found = true;
138	}
139	else if(c == '<') {
140	found = true;
141	pos--;
142	}
143	else {
144	current = current + c;
145	}
146	}
147	pos++;
148	}
149	}
150	/** Method to ignore whitespace in the source.
151	*/
152	private void dumpWhiteSpace() {
153	while(pos < source.length() && source.charAt(pos) == ' ') {
154	pos++;
155	}
156	}
157
158	static public void main(String args[]) {
159	String init = "<HTML>Where material to be imported is found. Defaults to <i>GSDLHOME/collection/col_name/gimport</i></HTML>";
160	///ystem.err.println("Before: " + init);
161	String result = Utility.formatHTMLWidth(init, 40);
162	///ystem.err.println("After: " + result);
163	}
164	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: