- Timestamp:
- 2003-05-27T15:40:47+12:00 (21 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gli/src/org/greenstone/gatherer/util/HTMLStringTokenizer.java
r4293 r4364 54 54 /** This class functions much like a <strong>StringTokenizer</strong> in that it tokenizes a long string into tokens, however this tokenizer cleverly notices HTML formatting tags. */ 55 55 public class HTMLStringTokenizer { 56 57 58 59 60 61 62 63 64 65 56 /** The current position in the source string. */ 57 private int pos = 0; 58 /** The current token, usually created by the last nextToken call. */ 59 private String current = null; 60 /** The previous token. */ 61 private String previous = null; 62 /** The string to be tokenized, including any HTML markup. */ 63 private String source = null; 64 /** Constructor. 65 * @param source The source <strong>String</strong> to be tokenized. 66 66 */ 67 68 69 70 71 72 67 public HTMLStringTokenizer(String source) { 68 this.source = source; 69 // Parse the first token. 70 parseToken(); 71 } 72 /** Determines if there are still tokens remaining unparsed in the source. 73 73 * @return A <strong>boolean</strong> which is <i>true</i> if there are more tokens. 74 74 */ 75 76 77 78 79 80 81 75 public boolean hasMoreTokens() { 76 if(current != null && current.length() > 0) { 77 return true; 78 } 79 return false; 80 } 81 /** Determines if the tag currently being returned by sameToken is a tag. 82 82 * @return A <strong>boolean</strong> indicating if the token is a tag. 83 83 */ 84 85 86 87 88 89 90 84 public boolean isTag() { 85 if(previous.startsWith("<") && previous.endsWith(">")) { 86 return true; 87 } 88 return false; 89 } 90 /** Retrieves the next token. 91 91 * @return A <strong>String</strong> representing the token. 92 92 */ 93 94 95 96 97 98 99 100 93 public String nextToken() { 94 previous = current; 95 // Get the next token. 96 parseToken(); 97 // Return previous. 98 return previous; 99 } 100 /** Repeats the result of the last <i>nextToken()</i>. 101 101 * @return A <strong>String</strong> representing the token. 102 102 */ 103 104 105 106 103 public String sameToken() { 104 return previous; 105 } 106 /** Parses the next token and stores it in current. 107 107 */ 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 108 private void parseToken() { 109 boolean found = false; 110 boolean tag = false; 111 boolean text = false; 112 // Reset current 113 current = ""; 114 // Parse away 115 dumpWhiteSpace(); 116 while(pos < source.length() && !found) { 117 char c = (char)source.charAt(pos); 118 if(!tag && !text) { 119 if(c == '<') { 120 tag = true; 121 } 122 else { 123 text = true; 124 } 125 current = current + c; 126 } 127 127 // Reading a tag. Watch only for '>'. 128 129 130 131 132 133 128 else if(tag) { 129 if(c == '>') { 130 found = true; 131 } 132 current = current + c; 133 } 134 134 // Reading text. Watch for ' ' and '<'. Rollback '<'. 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 135 else if(text) { 136 if(c == ' ') { 137 found = true; 138 } 139 else if(c == '<') { 140 found = true; 141 pos--; 142 } 143 else { 144 current = current + c; 145 } 146 } 147 pos++; 148 } 149 } 150 /** Method to ignore whitespace in the source. 151 151 */ 152 153 154 155 156 152 private void dumpWhiteSpace() { 153 while(pos < source.length() && source.charAt(pos) == ' ') { 154 pos++; 155 } 156 } 157 157 158 159 160 161 162 163 158 static public void main(String args[]) { 159 String init = "<HTML>Where material to be imported is found. Defaults to <i>GSDLHOME/collection/col_name/gimport</i></HTML>"; 160 ///ystem.err.println("Before: " + init); 161 String result = Utility.formatHTMLWidth(init, 40); 162 ///ystem.err.println("After: " + result); 163 } 164 164 }
Note:
See TracChangeset
for help on using the changeset viewer.