source: other-projects/FileTransfer-WebSocketPair/testGXTWithGreenstone/src/org/greenstone/gatherer/util/HTMLStringTokenizer.java@ 33053

Last change on this file since 33053 was 33053, checked in by ak19, 5 years ago

I still had some stuff of Nathan Kelly's (FileTransfer-WebSocketPair) sitting on my USB. Had already commited the Themes folder at the time, 2 years back. Not sure if he wanted this additional folder commited. But I didn't want to delete it and decided it will be better off on SVN. When we use his project, if we find we didn't need this test folder, we can remove it from svn then.

File size: 3.9 KB
Line 
1/**
2 *#########################################################################
3 *
4 * A component of the Gatherer application, part of the Greenstone digital
5 * library suite from the New Zealand Digital Library Project at the
6 * University of Waikato, New Zealand.
7 *
8 * <BR><BR>
9 *
10 * Author: John Thompson, Greenstone Digital Library, University of Waikato
11 *
12 * <BR><BR>
13 *
14 * Copyright (C) 1999 New Zealand Digital Library Project
15 *
16 * <BR><BR>
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * <BR><BR>
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * <BR><BR>
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with this program; if not, write to the Free Software
34 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
35 *########################################################################
36 */
37package org.greenstone.gatherer.util;
38
39
40/** This class functions much like a <strong>StringTokenizer</strong> in that it tokenizes a long string into tokens, however this tokenizer cleverly notices HTML formatting tags. */
41public class HTMLStringTokenizer {
42 /** The current position in the source string. */
43 private int pos = 0;
44 /** The current token, usually created by the last nextToken call. */
45 private String current = null;
46 /** The previous token. */
47 private String previous = null;
48 /** The string to be tokenized, including any HTML markup. */
49 private String source = null;
50 /** Constructor.
51 * @param source The source <strong>String</strong> to be tokenized.
52 */
53 public HTMLStringTokenizer(String source) {
54 this.source = source;
55 // Parse the first token.
56 parseToken();
57 }
58
59 /** Determines if there are still tokens remaining unparsed in the source.
60 * @return A <strong>boolean</strong> which is <i>true</i> if there are more tokens.
61 */
62 public boolean hasMoreTokens() {
63 if(current != null && current.length() > 0) {
64 return true;
65 }
66 return false;
67 }
68
69 /** Determines if the tag currently being returned by sameToken is a tag.
70 * @return A <strong>boolean</strong> indicating if the token is a tag.
71 */
72 public boolean isTag() {
73 if(previous.startsWith("<") && previous.endsWith(">")) {
74 return true;
75 }
76 return false;
77 }
78
79 /** Retrieves the next token.
80 * @return A <strong>String</strong> representing the token.
81 */
82 public String nextToken() {
83 previous = current;
84 // Get the next token.
85 parseToken();
86 // Return previous.
87 return previous;
88 }
89
90 /** Parses the next token and stores it in current.
91 */
92 private void parseToken() {
93 boolean found = false;
94 boolean tag = false;
95 boolean text = false;
96 // Reset current
97 current = "";
98 // Parse away
99 dumpWhiteSpace();
100 while(pos < source.length() && !found) {
101 char c = (char)source.charAt(pos);
102 if(!tag && !text) {
103 if(c == '<') {
104 tag = true;
105 }
106 else {
107 text = true;
108 }
109 current = current + c;
110 }
111 // Reading a tag. Watch only for '>'.
112 else if(tag) {
113 if(c == '>') {
114 found = true;
115 }
116 current = current + c;
117 }
118 // Reading text. Watch for ' ' and '<'. Rollback '<'.
119 else if(text) {
120 if(c == ' ') {
121 found = true;
122 }
123 else if(c == '<') {
124 found = true;
125 pos--;
126 }
127 else {
128 current = current + c;
129 }
130 }
131 pos++;
132 }
133 }
134
135 /** Method to ignore whitespace in the source.
136 */
137 private void dumpWhiteSpace() {
138 while(pos < source.length() && source.charAt(pos) == ' ') {
139 pos++;
140 }
141 }
142}
Note: See TracBrowser for help on using the repository browser.