source: trunk/gli/src/org/greenstone/gatherer/util/HTMLStringTokenizer.java@ 5581

Last change on this file since 5581 was 5581, checked in by mdewsnip, 21 years ago

Many formatting, structural and code improvements.

  • Property svn:keywords set to Author Date Id Revision
File size: 4.8 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.util;
38
39/**
40 * Title: The Gatherer<br>
41 * Description: The Gatherer: a tool for gathering and enriching digital collections.<br>
42 * Copyright: Copyright (c) 2001<br>
43 * Company: The University of Waikato<br>
44 * @author John Thompson, Greenstone Digital Libraries
45 * @version 2.1
46 */
47import org.greenstone.gatherer.util.Utility;
48
49/** This class functions much like a <strong>StringTokenizer</strong> in that it tokenizes a long string into tokens, however this tokenizer cleverly notices HTML formatting tags. */
50public class HTMLStringTokenizer {
51 /** The current position in the source string. */
52 private int pos = 0;
53 /** The current token, usually created by the last nextToken call. */
54 private String current = null;
55 /** The previous token. */
56 private String previous = null;
57 /** The string to be tokenized, including any HTML markup. */
58 private String source = null;
59 /** Constructor.
60 * @param source The source <strong>String</strong> to be tokenized.
61 */
62 public HTMLStringTokenizer(String source) {
63 this.source = source;
64 // Parse the first token.
65 parseToken();
66 }
67 /** Determines if there are still tokens remaining unparsed in the source.
68 * @return A <strong>boolean</strong> which is <i>true</i> if there are more tokens.
69 */
70 public boolean hasMoreTokens() {
71 if(current != null && current.length() > 0) {
72 return true;
73 }
74 return false;
75 }
76 /** Determines if the tag currently being returned by sameToken is a tag.
77 * @return A <strong>boolean</strong> indicating if the token is a tag.
78 */
79 public boolean isTag() {
80 if(previous.startsWith("<") && previous.endsWith(">")) {
81 return true;
82 }
83 return false;
84 }
85 /** Retrieves the next token.
86 * @return A <strong>String</strong> representing the token.
87 */
88 public String nextToken() {
89 previous = current;
90 // Get the next token.
91 parseToken();
92 // Return previous.
93 return previous;
94 }
95 /** Repeats the result of the last <i>nextToken()</i>.
96 * @return A <strong>String</strong> representing the token.
97 */
98 public String sameToken() {
99 return previous;
100 }
101 /** Parses the next token and stores it in current.
102 */
103 private void parseToken() {
104 boolean found = false;
105 boolean tag = false;
106 boolean text = false;
107 // Reset current
108 current = "";
109 // Parse away
110 dumpWhiteSpace();
111 while(pos < source.length() && !found) {
112 char c = (char)source.charAt(pos);
113 if(!tag && !text) {
114 if(c == '<') {
115 tag = true;
116 }
117 else {
118 text = true;
119 }
120 current = current + c;
121 }
122 // Reading a tag. Watch only for '>'.
123 else if(tag) {
124 if(c == '>') {
125 found = true;
126 }
127 current = current + c;
128 }
129 // Reading text. Watch for ' ' and '<'. Rollback '<'.
130 else if(text) {
131 if(c == ' ') {
132 found = true;
133 }
134 else if(c == '<') {
135 found = true;
136 pos--;
137 }
138 else {
139 current = current + c;
140 }
141 }
142 pos++;
143 }
144 }
145 /** Method to ignore whitespace in the source.
146 */
147 private void dumpWhiteSpace() {
148 while(pos < source.length() && source.charAt(pos) == ' ') {
149 pos++;
150 }
151 }
152
153 static public void main(String args[]) {
154 String init = "<HTML>Where material to be imported is found. Defaults to <i>GSDLHOME/collection/col_name/gimport</i></HTML>";
155 ///ystem.err.println("Before: " + init);
156 String result = Utility.formatHTMLWidth(init, 40);
157 ///ystem.err.println("After: " + result);
158 }
159}
Note: See TracBrowser for help on using the repository browser.