source: trunk/gli/src/org/greenstone/gatherer/util/HTMLStringTokenizer.java@ 4293

Last change on this file since 4293 was 4293, checked in by jmt12, 21 years ago

Initial revision

  • Property svn:keywords set to Author Date Id Revision
File size: 4.8 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37
38
39
40
41
42
43package org.greenstone.gatherer.util;
44/**
45 * Title: The Gatherer<br>
46 * Description: The Gatherer: a tool for gathering and enriching digital collections.<br>
47 * Copyright: Copyright (c) 2001<br>
48 * Company: The University of Waikato<br>
49 * @author John Thompson, Greenstone Digital Libraries
50 * @version 2.1
51 */
52import java.util.Stack;
53import org.greenstone.gatherer.util.Utility;
54/** This class functions much like a <strong>StringTokenizer</strong> in that it tokenizes a long string into tokens, however this tokenizer cleverly notices HTML formatting tags. */
55public class HTMLStringTokenizer {
56 /** The current position in the source string. */
57 private int pos = 0;
58 /** The current token, usually created by the last nextToken call. */
59 private String current = null;
60 /** The previous token. */
61 private String previous = null;
62 /** The string to be tokenized, including any HTML markup. */
63 private String source = null;
64 /** Constructor.
65 * @param source The source <strong>String</strong> to be tokenized.
66 */
67 public HTMLStringTokenizer(String source) {
68 this.source = source;
69 // Parse the first token.
70 parseToken();
71 }
72 /** Determines if there are still tokens remaining unparsed in the source.
73 * @return A <strong>boolean</strong> which is <i>true</i> if there are more tokens.
74 */
75 public boolean hasMoreTokens() {
76 if(current != null && current.length() > 0) {
77 return true;
78 }
79 return false;
80 }
81 /** Determines if the tag currently being returned by sameToken is a tag.
82 * @return A <strong>boolean</strong> indicating if the token is a tag.
83 */
84 public boolean isTag() {
85 if(previous.startsWith("<") && previous.endsWith(">")) {
86 return true;
87 }
88 return false;
89 }
90 /** Retrieves the next token.
91 * @return A <strong>String</strong> representing the token.
92 */
93 public String nextToken() {
94 previous = current;
95 // Get the next token.
96 parseToken();
97 // Return previous.
98 return previous;
99 }
100 /** Repeats the result of the last <i>nextToken()</i>.
101 * @return A <strong>String</strong> representing the token.
102 */
103 public String sameToken() {
104 return previous;
105 }
106 /** Parses the next token and stores it in current.
107 */
108 private void parseToken() {
109 boolean found = false;
110 boolean tag = false;
111 boolean text = false;
112 // Reset current
113 current = "";
114 // Parse away
115 dumpWhiteSpace();
116 while(pos < source.length() && !found) {
117 char c = (char)source.charAt(pos);
118 if(!tag && !text) {
119 if(c == '<') {
120 tag = true;
121 }
122 else {
123 text = true;
124 }
125 current = current + c;
126 }
127 // Reading a tag. Watch only for '>'.
128 else if(tag) {
129 if(c == '>') {
130 found = true;
131 }
132 current = current + c;
133 }
134 // Reading text. Watch for ' ' and '<'. Rollback '<'.
135 else if(text) {
136 if(c == ' ') {
137 found = true;
138 }
139 else if(c == '<') {
140 found = true;
141 pos--;
142 }
143 else {
144 current = current + c;
145 }
146 }
147 pos++;
148 }
149 }
150 /** Method to ignore whitespace in the source.
151 */
152 private void dumpWhiteSpace() {
153 while(pos < source.length() && source.charAt(pos) == ' ') {
154 pos++;
155 }
156 }
157
158 static public void main(String args[]) {
159 String init = "<HTML>Where material to be imported is found. Defaults to <i>GSDLHOME/collection/col_name/gimport</i></HTML>";
160 ///ystem.err.println("Before: " + init);
161 String result = Utility.formatHTMLWidth(init, 40);
162 ///ystem.err.println("After: " + result);
163 }
164}
Note: See TracBrowser for help on using the repository browser.