source: trunk/gli/src/org/greenstone/gatherer/util/HTMLStringTokenizer.java@ 9187

Last change on this file since 9187 was 8243, checked in by mdewsnip, 20 years ago

Removed all occurrences of classes explicitly importing other classes in the same package.

  • Property svn:keywords set to Author Date Id Revision
File size: 4.5 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.util;
38
39
40/** This class functions much like a <strong>StringTokenizer</strong> in that it tokenizes a long string into tokens, however this tokenizer cleverly notices HTML formatting tags. */
41public class HTMLStringTokenizer {
42 /** The current position in the source string. */
43 private int pos = 0;
44 /** The current token, usually created by the last nextToken call. */
45 private String current = null;
46 /** The previous token. */
47 private String previous = null;
48 /** The string to be tokenized, including any HTML markup. */
49 private String source = null;
50 /** Constructor.
51 * @param source The source <strong>String</strong> to be tokenized.
52 */
53 public HTMLStringTokenizer(String source) {
54 this.source = source;
55 // Parse the first token.
56 parseToken();
57 }
58 /** Determines if there are still tokens remaining unparsed in the source.
59 * @return A <strong>boolean</strong> which is <i>true</i> if there are more tokens.
60 */
61 public boolean hasMoreTokens() {
62 if(current != null && current.length() > 0) {
63 return true;
64 }
65 return false;
66 }
67 /** Determines if the tag currently being returned by sameToken is a tag.
68 * @return A <strong>boolean</strong> indicating if the token is a tag.
69 */
70 public boolean isTag() {
71 if(previous.startsWith("<") && previous.endsWith(">")) {
72 return true;
73 }
74 return false;
75 }
76 /** Retrieves the next token.
77 * @return A <strong>String</strong> representing the token.
78 */
79 public String nextToken() {
80 previous = current;
81 // Get the next token.
82 parseToken();
83 // Return previous.
84 return previous;
85 }
86 /** Repeats the result of the last <i>nextToken()</i>.
87 * @return A <strong>String</strong> representing the token.
88 */
89 /* private String sameToken() {
90 return previous;
91 } */
92 /** Parses the next token and stores it in current.
93 */
94 private void parseToken() {
95 boolean found = false;
96 boolean tag = false;
97 boolean text = false;
98 // Reset current
99 current = "";
100 // Parse away
101 dumpWhiteSpace();
102 while(pos < source.length() && !found) {
103 char c = (char)source.charAt(pos);
104 if(!tag && !text) {
105 if(c == '<') {
106 tag = true;
107 }
108 else {
109 text = true;
110 }
111 current = current + c;
112 }
113 // Reading a tag. Watch only for '>'.
114 else if(tag) {
115 if(c == '>') {
116 found = true;
117 }
118 current = current + c;
119 }
120 // Reading text. Watch for ' ' and '<'. Rollback '<'.
121 else if(text) {
122 if(c == ' ') {
123 found = true;
124 }
125 else if(c == '<') {
126 found = true;
127 pos--;
128 }
129 else {
130 current = current + c;
131 }
132 }
133 pos++;
134 }
135 }
136 /** Method to ignore whitespace in the source.
137 */
138 private void dumpWhiteSpace() {
139 while(pos < source.length() && source.charAt(pos) == ' ') {
140 pos++;
141 }
142 }
143
144 static public void main(String args[]) {
145 String init = "<HTML>Where material to be imported is found. Defaults to <i>GSDLHOME/collection/col_name/gimport</i></HTML>";
146 ///ystem.err.println("Before: " + init);
147 String result = Utility.formatHTMLWidth(init, 40);
148 ///ystem.err.println("After: " + result);
149 }
150}
Note: See TracBrowser for help on using the repository browser.