Changeset 6285
- Timestamp:
- 2003-12-17T13:09:14+13:00 (20 years ago)
- Location:
- trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/util
- Files:
-
- 2 added
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/util/HTMLBlock.java
r5800 r6285 4 4 5 5 public class HTMLBlock 6 { Vector content; 7 int tagpos; 8 int endpos; 9 10 public HTMLBlock(int docpos) 11 { this.content = new Vector(1); 12 this.tagpos = docpos; 13 } 14 15 public HTMLBlock(int docpos, int endpos) 16 { this.content = new Vector(1); 17 this.tagpos = docpos; 18 this.endpos = endpos; 19 } 20 21 public void addTag(HTMLTag tag) 22 { if (tag.endPos() > this.endpos) 23 { this.endpos = tag.endPos(); 24 } 25 content.addElement(tag); 26 } 27 28 public void addText(String text) 29 { content.addElement(text); 30 } 31 32 public void addText(HTMLCText text) 33 { if (text.endPos() > this.endpos) 34 { this.endpos = text.endPos(); 35 } 36 content.addElement(text); 37 } 38 39 /** 40 * @return HTML encoded <code>String</code> of the document 41 */ 42 public String contentString() 43 { int member; 44 StringBuffer reply; 45 String type; 46 47 reply = new StringBuffer(); 48 for (member = 1; member < content.size() - 1; member ++) 49 { if (content.elementAt(member) instanceof String) 50 { reply.append((String) content.elementAt(member)); 51 } 52 else if (content.elementAt(member) instanceof HTMLCText) 53 { reply.append(((HTMLCText) content.elementAt(member)).toString()); 54 } 55 } 56 return reply.toString(); 57 } 58 59 /** 60 * Returns document position of start of the block 61 */ 62 public int startPos() 63 { return this.tagpos; 64 } 65 66 /** 67 * Returns document position of end of the block 68 */ 69 public int endPos() 70 { return this.tagpos; 71 } 72 73 /** 74 * @return head (first) tag of the block 75 */ 76 public HTMLTag headTag() 77 { return ((HTMLTag) content.elementAt(0)); 78 } 79 80 /** 81 * @return name (as a <code>String</code>) of the tag at the head of the block 82 */ 83 public String headTagName() 84 { return ((HTMLTag) content.elementAt(0)).tagName(); 85 } 86 87 /** 88 * @return name of the tail (last) tag of the block as a <code>String</code> 89 */ 90 public String tailTagName() 91 { return (((HTMLTag) content.elementAt(content.size() - 1)).tagName()); 92 } 6 { Vector content; 7 int tagpos; 8 int endpos; 9 10 public HTMLBlock(int docpos) 11 { this.content = new Vector(1); 12 this.tagpos = docpos; 13 } 14 15 public HTMLBlock(int docpos, int endpos) 16 { this.content = new Vector(1); 17 this.tagpos = docpos; 18 this.endpos = endpos; 19 } 20 21 public void addTag(HTMLTag tag) 22 { if (tag.endPos() > this.endpos) 23 { this.endpos = tag.endPos(); 24 } 25 content.addElement(tag); 26 } 27 28 /** 29 * @deprecccate 30 public void addText(String text) 31 { content.addElement(text); 32 } 33 */ 34 35 public void addText(HTMLCText text) 36 { if (text.endPos() > this.endpos) 37 { this.endpos = text.endPos(); 38 } 39 content.addElement(text); 40 } 41 42 /** 43 * @return HTML encoded <code>String</code> of the document 44 */ 45 public String contentString() 46 { int member; 47 StringBuffer reply; 48 String type; 49 50 reply = new StringBuffer(); 51 for (member = 1; member < content.size() - 1; member ++) 52 { /*if (content.elementAt(member) instanceof String) 53 { reply.append((String) content.elementAt(member)); 54 } 55 else 56 */ 57 if (content.elementAt(member) instanceof HTMLCText) 58 { reply.append(((HTMLCText) content.elementAt(member)).toString()); 59 } 60 } 61 return reply.toString(); 62 } 63 64 /** 65 * Returns document position of start of the block 66 */ 67 public int startPos() 68 { return this.tagpos; 69 } 70 71 /** 72 * Returns document position of end of the block 73 */ 74 public int endPos() 75 { return this.tagpos; 76 } 77 78 /** 79 * @return head (first) tag of the block 80 */ 81 public HTMLTag headTag() 82 { return ((HTMLTag) content.elementAt(0)); 83 } 84 85 /** 86 * @return name (as a <code>String</code>) of the tag at the head of the block 87 */ 88 public String headTagName() 89 { return ((HTMLTag) content.elementAt(0)).tagName(); 90 } 91 92 /** 93 * @return name of the tail (last) tag of the block as a <code>String</code> 94 */ 95 public String tailTagName() 96 { return (((HTMLTag) content.elementAt(content.size() - 1)).tagName()); 97 } 93 98 94 99 95 96 *@return: starting element character position of the item-th item97 98 99 { Objectelement;100 100 /** 101 * @return: starting element character position of the item-th item 102 */ 103 private int elementStartPos(int item) 104 { Object element; 105 int pos = -1; 101 106 102 103 104 105 106 107 108 109 110 107 element = this.content.elementAt(item); 108 if (element instanceof HTMLTag) 109 { pos = ((HTMLTag) element).startPos(); 110 } 111 else if (element instanceof HTMLCText) 112 { pos = ((HTMLCText) element).startPos(); 113 } 114 return pos; 115 } 111 116 112 113 *@return: ending element character position of the item-th item114 115 116 {Object element;117 117 /** 118 * @return: ending element character position of the item-th item 119 */ 120 private int elementEndPos(int item) 121 { Object element; 122 int pos = -1; 118 123 119 element = this.content.elementAt(item); 120 if (element instanceof HTMLTag) 121 { pos = ((HTMLTag) element).endPos(); 122 } 123 else if (element instanceof HTMLCText) 124 { pos = ((HTMLCText) element).endPos(); 125 } 126 return pos; 127 } 128 129 /** 130 * return the HTMLBlock of the indicated subitems 131 */ 132 public HTMLBlock subBlock(int startitem, int enditem) 133 { int i; 134 Object element; 135 HTMLBlock reply; 136 reply = new HTMLBlock( this.elementStartPos(startitem), 137 this.elementEndPos(enditem-1)); 138 // copy all the bits 139 for (i = startitem; i < enditem; i++) 140 { element = this.content.elementAt(i); 141 if (element instanceof HTMLTag) 142 { reply.addTag((HTMLTag) element); 143 } 144 else if (element instanceof HTMLCText) 145 { reply.addText((HTMLCText) element); 146 } 147 else if (element instanceof String) 148 { reply.addText((String) element); 149 } 150 } 151 return reply; 152 } 153 154 /** 155 * Return the position of the given <code>HTMLBlock</code> within this block 156 * @return the tag index of the child block; or < 0 if not a chile 157 */ 158 public int find(HTMLBlock subblock) 159 { int start, end, at, attag; 160 HTMLTag tag; 161 162 start = 0; 163 end = this.content.size(); 164 while (start != end) 165 { /* compare positions */ 166 at = (start + end) / 2; 167 attag = at; 168 while (attag < end && 169 (content.elementAt(attag) instanceof HTMLTag) == false) 170 { attag ++; 171 } 172 173 if (attag == end) 174 { end = at; 175 continue; 176 } 177 tag = (HTMLTag) content.elementAt(attag); 178 if (tag.startPos() > subblock.startPos()) 179 { end = at; 180 } 181 else if (tag.startPos() < subblock.startPos()) 182 { start = attag + 1; 183 } 184 else 185 { return attag; 186 } 187 } 188 return -1; 189 } 190 191 /** 192 * @return The raw HTML of the block 193 */ 194 public String HTMLString() 195 { int loop; 196 StringBuffer reply; 197 198 reply = new StringBuffer(); 199 for (loop = 0; loop < this.content.size(); loop ++) 200 { if (content.elementAt(loop) instanceof String) 201 { reply.append((String) content.elementAt(loop)); 202 } 203 else if (content.elementAt(loop) instanceof HTMLCText) 204 { reply.append(((HTMLCText) content.elementAt(loop)).toString()); 205 } 206 else if (content.elementAt(loop) instanceof HTMLTag) 207 { reply.append(((HTMLTag) content.elementAt(loop)).toString()); 208 } 209 } 210 return reply.toString(); 211 } 212 213 /** 214 * @return The text of the block - no HTML tags. If no text is present, any 215 * <code>alt</code> information for <code>img</code> tags will be given 216 * instead 217 */ 218 public String toString() 219 { int member; 220 StringBuffer reply; 221 StringBuffer ireply; 222 223 reply = new StringBuffer(""); 224 ireply = new StringBuffer(""); 225 for (member = 0; member < content.size(); member ++) 226 { if (content.elementAt(member) instanceof String) 227 { reply.append((String) content.elementAt(member)); 228 } 229 if (content.elementAt(member) instanceof HTMLCText) 230 { reply.append(((HTMLCText) content.elementAt(member)).toString()); 231 } 232 else if (content.elementAt(member) instanceof HTMLTag) 233 { if (((HTMLTag) content.elementAt(member)).tagName().equals("img")) 234 { ireply.append(((HTMLTag) content.elementAt(member)).idValue("alt")); 235 } 236 } 237 } 238 reply = HTMLCText.cleanString(reply); 239 ireply = HTMLCText.cleanString(ireply); 124 element = this.content.elementAt(item); 125 if (element instanceof HTMLTag) 126 { pos = ((HTMLTag) element).endPos(); 127 } 128 else if (element instanceof HTMLCText) 129 { pos = ((HTMLCText) element).endPos(); 130 } 131 return pos; 132 } 133 134 /** 135 * @return the HTMLBlock of the indicated subitems 136 */ 137 public HTMLBlock subBlock(int startitem, int enditem) 138 { int i; 139 Object element; 140 HTMLBlock reply; 141 reply = new HTMLBlock(this.elementStartPos(startitem), 142 this.elementEndPos(enditem-1)); 143 // copy all the bits 144 for (i = startitem; i < enditem; i++) 145 { element = this.content.elementAt(i); 146 if (element instanceof HTMLTag) 147 { reply.addTag((HTMLTag) element); 148 } 149 else if (element instanceof HTMLCText) 150 { reply.addText((HTMLCText) element); 151 } 152 /* 153 else if (element instanceof String) 154 { reply.addText((String) element); 155 } 156 */ 157 } 158 return reply; 159 } 160 161 /** 162 * Return the position of the given <code>HTMLBlock</code> within this block 163 * @return the tag index of the child block; or < 0 if not a chile 164 */ 165 public int find(HTMLBlock subblock) 166 { int start, end, at, attag; 167 HTMLTag tag; 168 169 start = 0; 170 end = this.content.size(); 171 while (start != end) 172 { /* compare positions */ 173 at = (start + end) / 2; 174 attag = at; 175 while (attag < end && 176 (content.elementAt(attag) instanceof HTMLTag) == false) 177 { attag ++; 178 } 179 180 if (attag == end) 181 { end = at; 182 continue; 183 } 184 tag = (HTMLTag) content.elementAt(attag); 185 if (tag.startPos() > subblock.startPos()) 186 { end = at; 187 } 188 else if (tag.startPos() < subblock.startPos()) 189 { start = attag + 1; 190 } 191 else 192 { return attag; 193 } 194 } 195 return -1; 196 } 197 198 /** 199 * @return The raw HTML of the block 200 */ 201 public String HTMLString() 202 { int loop; 203 StringBuffer reply; 204 205 reply = new StringBuffer(); 206 for (loop = 0; loop < this.content.size(); loop ++) 207 { /* 208 if (content.elementAt(loop) instanceof String) 209 { reply.append((String) content.elementAt(loop)); 210 } 211 else 212 */ 213 if (content.elementAt(loop) instanceof HTMLCText) 214 { reply.append(((HTMLCText) content.elementAt(loop)).toString()); 215 } 216 else if (content.elementAt(loop) instanceof HTMLTag) 217 { reply.append(((HTMLTag) content.elementAt(loop)).toString()); 218 } 219 } 220 return reply.toString(); 221 } 222 223 /** 224 * @return The text of the block - no HTML tags. If no text is present, any 225 * <code>alt</code> information for <code>img</code> tags will be given 226 * instead 227 */ 228 public String toString() 229 { int member; 230 StringBuffer reply; 231 StringBuffer ireply; 232 233 reply = new StringBuffer(""); 234 ireply = new StringBuffer(""); 235 for (member = 0; member < content.size(); member ++) 236 { /*if (content.elementAt(member) instanceof String) 237 { reply.append((String) content.elementAt(member)); 238 } 239 */ 240 if (content.elementAt(member) instanceof HTMLCText) 241 { reply.append(((HTMLCText) content.elementAt(member)).toString()); 242 } 243 else if (content.elementAt(member) instanceof HTMLTag) 244 { if (((HTMLTag) content.elementAt(member)).tagName().equals("img")) 245 { ireply.append(((HTMLTag) content.elementAt(member)).idValue("alt")); 246 } 247 } 248 } 249 reply = HTMLCText.cleanString(reply); 250 ireply = HTMLCText.cleanString(ireply); 240 251 // reply = reply.trim(); 241 242 243 244 245 246 247 publicObject elementAt(int at)248 { returnthis.content.elementAt(at);249 250 251 252 253 254 255 {return this.content.size();256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 {this.content = null;283 284 252 if (reply == null || reply.length() == 0) 253 { return ireply.toString(); 254 } 255 return reply.toString(); 256 } 257 258 public HTMLObject elementAt(int at) 259 { return (HTMLObject) this.content.elementAt(at); 260 } 261 262 /** 263 * @return the number of elements in the block 264 */ 265 public int size() 266 { return this.content.size(); 267 } 268 269 /** 270 * @return an enumeration of all elements in the block 271 */ 272 public Enumeration elements() 273 { Enumeration enum; 274 275 enum = new HTMLBlockEnumerator(this); 276 return enum; 277 } 278 279 /** 280 * @return an enumeration of all tags in the block 281 */ 282 public Enumeration tagElements() 283 { Enumeration enum; 284 285 enum = new HTMLBlockTagEnumerator(this); 286 return enum; 287 } 288 289 /** 290 * Finalization method 291 */ 292 protected void finalize() throws Throwable 293 { this.content = null; 294 super.finalize(); 295 } 285 296 } 286 297 -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/util/HTMLCText.java
r5800 r6285 1 1 package org.greenstone.gsdl3.gs3build.util; 2 2 3 public class HTMLCText implements HTMLObject 4 { String text; 5 int start; 6 int end; 3 7 8 public static final String HTML_TEXT_TYPE = "Text"; 4 9 5 public class HTMLCText 10 public HTMLCText(String text, int start, int end) 11 { this.text = text; 12 this.start = start; 13 this.end = end; 14 } 6 15 7 { String text; 16 public String getHTMLType() 17 { return HTML_TEXT_TYPE; 18 } 8 19 9 int start; 20 public int startPos() 21 { return this.start; 22 } 10 23 11 int end; 24 public int endPos() 25 { return this.end; 26 } 12 27 28 public boolean isEmpty() 29 { return emptyString(this.text); 30 } 13 31 32 public static boolean emptyString(String s) 33 { int c; 14 34 15 public HTMLCText(String text, int start, int end) 35 for (c = 0; c < s.length(); c ++) 36 { if (s.charAt(c) > 32) 37 { return false; 38 } 39 } 40 return true; 41 } 16 42 17 { this.text = text; 43 public static StringBuffer cleanString(StringBuffer buffer) 44 { int c; 45 int w; 18 46 19 this.start = start; 47 w = 0; 48 for (c = 0; c < buffer.length(); c ++) 49 { if (buffer.charAt(c) >= 32) 50 { if (buffer.charAt(c) == 32) 51 { if (w > 0 && buffer.charAt(w-1) > 32) 52 { buffer.setCharAt(w, buffer.charAt(c)); 53 w ++; 54 } 55 } 56 else 57 { if (c != w) 58 { buffer.setCharAt(w, buffer.charAt(c)); 59 } 60 w ++; 61 } 62 } 63 else 64 { if (w > 0 && buffer.charAt(w-1) > 32) 65 { buffer.setCharAt(w,' '); 66 w ++; 67 } 68 } 69 } 70 buffer.setLength(w); 71 return buffer; 72 } 20 73 21 this.end = end; 74 /** 75 * nullString; used by initialisation for checking for 76 * non-blank content (ie. text other than just spaces/returns) 77 * see constructor (ie HTMLBlockList) methods below for role of 78 * this in the larger plan of things 79 */ 80 public boolean nullString() 81 { int ch; 22 82 83 if (text != null) 84 { for (ch = 0; ch < text.length(); ch ++) 85 { if (text.charAt(ch) > ' ') 86 { return false; 23 87 } 88 } 89 } 90 return true; 91 } 24 92 93 public String toString() 94 { return this.text; 95 } 25 96 26 27 public int startPos() 28 29 { return this.start; 30 31 } 32 33 34 35 public int endPos() 36 37 { return this.end; 38 39 } 40 41 42 43 public boolean isEmpty() 44 45 { return emptyString(this.text); 46 47 } 48 49 50 51 public static boolean emptyString(String s) 52 53 { int c; 54 55 56 57 for (c = 0; c < s.length(); c ++) 58 59 { if (s.charAt(c) > 32) 60 61 { return false; 62 63 } 64 65 } 66 67 return true; 68 69 } 70 71 72 73 public static StringBuffer cleanString(StringBuffer buffer) 74 75 { int c; 76 77 int w; 78 79 80 81 w = 0; 82 83 for (c = 0; c < buffer.length(); c ++) 84 85 { if (buffer.charAt(c) >= 32) 86 87 { if (buffer.charAt(c) == 32) 88 89 { if (w > 0 && buffer.charAt(w-1) > 32) 90 91 { buffer.setCharAt(w, buffer.charAt(c)); 92 93 w ++; 94 95 } 96 97 } 98 99 else 100 101 { if (c != w) 102 103 { buffer.setCharAt(w, buffer.charAt(c)); 104 105 } 106 107 w ++; 108 109 } 110 111 } 112 113 else 114 115 { if (w > 0 && buffer.charAt(w-1) > 32) 116 117 { buffer.setCharAt(w,' '); 118 119 w ++; 120 121 } 122 123 } 124 125 } 126 127 buffer.setLength(w); 128 129 return buffer; 130 131 } 132 133 134 135 /** 136 137 * nullString; used by initialisation for checking for 138 139 * non-blank content (ie. text other than just spaces/returns) 140 141 * see constructor (ie HTMLBlockList) methods below for role of 142 143 * this in the larger plan of things 144 145 */ 146 147 public boolean nullString() 148 149 { int ch; 150 151 152 153 if (text != null) 154 155 { for (ch = 0; ch < text.length(); ch ++) 156 157 { if (text.charAt(ch) > ' ') 158 159 { return false; 160 161 } 162 163 } 164 165 } 166 167 return true; 168 169 } 170 171 172 173 public String toString() 174 175 { return this.text; 176 177 } 178 179 public String toString(boolean cleaned) 180 { if (cleaned == false) 181 { return toString(); 182 } 183 else 184 { return HTMLEntity.decodeText(this.text); 185 } 186 } 187 97 public String toString(boolean cleaned) 98 { if (cleaned == false) 99 { return toString(); 100 } 101 else 102 { return HTMLEntity.decodeText(this.text); 103 } 104 } 188 105 } -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/util/HTMLContext.java
r5800 r6285 46 46 length = 0; 47 47 while ( start > 0) 48 { if (parent.elementAt(start) instanceof String) 48 { 49 /* 50 if (parent.elementAt(start) instanceof String) 49 51 { string = (String) parent.elementAt(start); 50 52 … … 59 61 { break; 60 62 } 61 } 62 else if (parent.elementAt(start) instanceof HTMLCText) 63 } 64 else 65 */ 66 if (parent.elementAt(start) instanceof HTMLCText) 63 67 { string = ((HTMLCText) parent.elementAt(start)).toString(); 64 68 -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/util/HTMLDoc.java
r5800 r6285 11 11 12 12 public class HTMLDoc 13 { String 14 Stringtitle;15 URL url; 16 intpos;17 intstatus;18 19 20 21 Objectnote;22 HTMLLoaderloader;23 HTMLBlockcoded;13 { String content; 14 String title; 15 URL url; 16 int pos; 17 int status; 18 HTMLBlockList blocklist; 19 List urls_out; 20 List urls_in; 21 Object note; 22 HTMLLoader loader; 23 HTMLBlock coded; 24 24 25 25 /** -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/util/HTMLEntity.java
r5800 r6285 7 7 8 8 public class HTMLEntity 9 { String 10 11 static Vectorentities = null;12 staticVector nmapping = null;9 { String name; 10 int code; 11 static Vector entities = null; 12 static Vector nmapping = null; 13 13 14 14 public HTMLEntity(String name, int code) 15 15 { int start, end, at; 16 17 18 19 20 } 21 22 16 HTMLEntity compareEntity; 17 18 if (HTMLEntity.entities == null) 19 { HTMLEntity.entities = new Vector(); 20 } 21 if (HTMLEntity.nmapping == null) 22 { HTMLEntity.nmapping = new Vector(); 23 23 } 24 24 … … 27 27 28 28 entities.addElement(this); 29 30 end= nmapping.size();31 32 { at= (start + end) / 2;29 start = 0; 30 end = nmapping.size(); 31 while (start != end) 32 { at = (start + end) / 2; 33 33 34 35 36 37 {end = at;38 39 40 {start = at + 1;41 42 43 44 45 46 47 {nmapping.insertElementAt(new Integer(nmapping.size()), start);48 34 compareEntity = 35 (HTMLEntity) entities.elementAt(((Integer) nmapping.elementAt(at)).intValue()); 36 if (compareEntity.code > code) 37 { end = at; 38 } 39 else 40 { start = at + 1; 41 } 42 } 43 if (end == nmapping.size()) 44 { nmapping.addElement(new Integer(nmapping.size())); 45 } 46 else 47 { nmapping.insertElementAt(new Integer(nmapping.size()), start); 48 } 49 49 } 50 50 … … 52 52 { Character c; 53 53 54 54 c = new Character((char) this.code); 55 55 return c.toString(); 56 56 } … … 58 58 public static String codeString(char c) 59 59 { StringBuffer reply; 60 61 62 63 64 } 65 66 reply 60 String codestring; 61 62 if (c < 128 || c == '&' || c == '<' || c == '>') 63 { return null; 64 } 65 codestring = Integer.toString((int) c); 66 reply = new StringBuffer("&#"); 67 67 reply.append(codestring); 68 68 reply.append(";"); … … 71 71 } 72 72 73 74 {int start, end, at, compare;75 76 77 78 79 80 {at = (start + end) / 2;81 82 83 84 85 {StringBuffer reply;86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 73 public static String nameCodeString(char c) 74 { int start, end, at, compare; 75 HTMLEntity compareEntity; 76 77 start = 0; 78 end = nmapping.size(); 79 while (start != end) 80 { at = (start + end) / 2; 81 82 compareEntity = 83 (HTMLEntity) entities.elementAt(((Integer) nmapping.elementAt(at)).intValue()); 84 if (compareEntity.code == (int) c) 85 { StringBuffer reply; 86 87 reply = new StringBuffer("&"); 88 reply.append(compareEntity.name); 89 reply.append(";"); 90 return reply.toString(); 91 } 92 else if (compareEntity.code < c) 93 { end = at; 94 } 95 else 96 { start = at + 1; 97 } 98 } 99 return codeString(c); 100 } 101 101 102 102 public static void getEntities(String source) 103 103 { HTMLEntity newentity; 104 104 int pos; 105 105 int start; 106 106 String name; 107 107 int code; 108 109 110 {return;111 108 109 if (source == null) 110 { return; 111 } 112 112 113 113 start = 0; 114 114 while (start < source.length()) 115 115 { pos = start; 116 while (source.charAt(pos) != ':') 117 { pos ++; 118 } 119 name = source.substring(start, pos); 120 121 // skip colon and reset 122 pos ++; 123 start = pos; 124 125 while(pos < source.length() && source.charAt(pos) > ' ') 126 { pos ++; 127 } 128 code = Integer.parseInt(source.substring(start, pos)); 129 130 newentity = new HTMLEntity(name, code); 131 132 while ( pos < source.length() && source.charAt(pos) < ' ') 116 while (source.charAt(pos) != ':') 133 117 { pos ++; 134 118 } 119 name = source.substring(start, pos); 120 121 // skip colon and reset 122 pos ++; 135 123 start = pos; 136 } 137 } 138 139 public static void getEntities(InputStream in) 140 { getEntities(DocumentLoader.getAsString(in)); 124 125 while(pos < source.length() && source.charAt(pos) > ' ') 126 { pos ++; 127 } 128 code = Integer.parseInt(source.substring(start, pos)); 129 130 newentity = new HTMLEntity(name, code); 131 132 while ( pos < source.length() && source.charAt(pos) < ' ') 133 { pos ++; 134 } 135 start = pos; 136 } 137 } 138 139 public static void getEntities(InputStream in) 140 { getEntities(DocumentLoader.getAsString(in)); 141 } 142 143 public static void getEntities(File file) 144 { FileInputStream in; 145 146 try 147 { in = new FileInputStream(file); 148 getEntities(in); 149 in.close(); 150 } 151 catch (IOException io) 152 { System.out.println("No entities"); 153 } 154 } 155 156 public static void getEntities() 157 { ClassLoader loader; 158 InputStream in; 159 160 in = ClassLoader.getSystemResourceAsStream("org\\greenstone\\gsdl3\\gs3build\\util\\Entities.map"); 161 if (in == null) 162 { System.out.println("Unable to load Entities.map from org.greenstone.gsdl3.gs3build.util"); 163 } 164 getEntities(in); 165 } 166 167 public static String encodeText(String source, boolean named) 168 { StringBuffer reply; 169 int start,at; 170 171 if (source == null) 172 { return null; 173 } 174 175 if (HTMLEntity.entities == null) 176 { getEntities(); 177 } 178 179 reply = new StringBuffer(); 180 at = 0; 181 start = 0; 182 while (at < source.length()) 183 { if (source.charAt(at) > 128) 184 { if (at != start) 185 { reply.append(source.substring(start, at)); 141 186 } 142 187 143 public static void getEntities(File file) 144 { FileInputStream in; 145 146 try 147 { in = new FileInputStream(file); 148 getEntities(in); 149 in.close(); 150 } 151 catch (IOException io) 152 { System.out.println("No entities"); 153 } 188 if (named) 189 { reply.append(nameCodeString(source.charAt(at))); 154 190 } 155 156 public static void getEntities() 157 { ClassLoader loader; 158 InputStream in; 159 160 in = ClassLoader.getSystemResourceAsStream("org\\greenstone\\gsdl3\\gs3build\\util\\Entities.map"); 161 if (in == null) 162 { System.out.println("Unable to load Entities.map from org.greenstone.gsdl3.gs3build.util"); 163 } 164 getEntities(in); 191 else 192 { reply.append(codeString(source.charAt(at))); 165 193 } 166 167 public static String encodeText(String source, boolean named) 168 { StringBuffer reply; 169 int start,at; 170 171 if (source == null) 172 { return null; 173 } 174 175 if (HTMLEntity.entities == null) 176 { getEntities(); 177 } 178 179 reply = new StringBuffer(); 180 at = 0; 181 start = 0; 182 while (at < source.length()) 183 { if (source.charAt(at) > 128) 184 { if (at != start) 185 { reply.append(source.substring(start, at)); 186 } 187 188 if (named) 189 { reply.append(nameCodeString(source.charAt(at))); 190 } 191 else 192 { reply.append(codeString(source.charAt(at))); 193 } 194 start = at + 1; 195 } 196 at ++; 197 } 194 start = at + 1; 195 } 196 at ++; 197 } 198 198 199 199 return reply.toString(); 200 200 } 201 201 202 202 public static String decodeText(String source) 203 203 { int start, end, loff, roff; 204 204 int sstart, send, test; 205 205 int startoff, endoff, baseoff; 206 206 boolean matched; 207 207 String tail; 208 209 210 211 212 213 214 215 216 217 218 reply 219 baseoff 208 StringBuffer reply; 209 210 if (source == null) 211 { return null; 212 } 213 214 if (HTMLEntity.entities == null) 215 { getEntities(); 216 } 217 218 reply = new StringBuffer(); 219 baseoff = 0; 220 220 221 221 tail = source; … … 254 254 } 255 255 } 256 256 else 257 257 { start = 0; 258 end= HTMLEntity.entities.size();259 loff 260 261 262 { sstart= start;263 send= end;264 test 265 266 267 268 269 270 271 {test = (sstart + send) >> 1;272 273 274 275 276 277 278 279 280 281 282 283 284 // System.out.println(sstart+">"+send+":"+loff);285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 258 end = HTMLEntity.entities.size(); 259 loff = 0; 260 261 while (startoff+1+loff < tail.length() && start != end) 262 { sstart = start; 263 send = end; 264 test = (sstart + send) >> 1; 265 266 matched = false; 267 268 roff = startoff + 1 + loff; 269 270 while (sstart != send) 271 { test = (sstart + send) >> 1; 272 273 if (tail.charAt(roff) < ((HTMLEntity) HTMLEntity.entities.elementAt(test)).name.charAt(loff)) 274 { send = test; 275 } 276 else if (tail.charAt(roff) > ((HTMLEntity) HTMLEntity.entities.elementAt(test)).name.charAt(loff)) 277 { sstart = test + 1; 278 } 279 else 280 { break; 281 } 282 } 283 284 // System.out.println(sstart+">"+send+":"+loff); 285 286 if (sstart != send) // found a match 287 { sstart = test; 288 while (sstart >= start && 289 tail.charAt(roff) == 290 ((HTMLEntity) HTMLEntity.entities.elementAt(sstart)).name.charAt(loff)) 291 { sstart --; 292 } 293 sstart ++; 294 295 send = test + 1; 296 while (send < end && 297 tail.charAt(roff) == 298 ((HTMLEntity) HTMLEntity.entities.elementAt(send)).name.charAt(loff)) 299 { send ++; 300 } 301 301 302 302 // System.out.println(sstart+"!"+send+":"+loff); 303 303 304 305 306 307 308 309 310 311 304 start = sstart; 305 end = send; 306 } 307 else 308 { loff = 0; 309 reply.append(tail.substring(0, startoff+1)); 310 break; 311 } 312 312 313 313 // System.out.println(start+">"+end+":"+loff); 314 315 316 317 318 319 320 321 322 323 324 325 314 315 loff ++; 316 if (end == start + 1 && 317 ((HTMLEntity) HTMLEntity.entities.elementAt(test)).name.length() == loff) 318 { reply.append(tail.substring(0, startoff)); 319 reply.append(((HTMLEntity) HTMLEntity.entities.elementAt(test)).toString()); 320 if (roff < tail.length() - 1 && tail.charAt(roff+1) == ';') 321 { loff ++; 322 } 323 break; 324 } 325 } 326 326 } 327 327 328 328 tail = tail.substring(startoff + 1 + loff); 329 329 } while (tail != null && tail.length() > 0); 330 330 331 331 if (tail != null) 332 332 { reply.append(tail); … … 335 335 } 336 336 337 338 339 340 341 342 343 344 337 public static void main(String args[]) 338 { String reply; 339 340 getEntities(); 341 342 reply = decodeText(args[0]); 343 System.out.println(reply); 344 } 345 345 } 346 346 -
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/util/HTMLTag.java
r5800 r6285 1 1 package org.greenstone.gsdl3.gs3build.util; 2 2 3 public class HTMLTag 4 { String tagtext; 5 String name; 6 int docpos; 7 int endpos; 8 9 public HTMLTag(String tagtext, int docpos, int endpos) 10 { this.tagtext = tagtext; 11 this.name = null; 12 this.docpos = docpos; 13 if (this.docpos < 0) 14 { this.endpos = -1; 15 } 16 else 17 { this.endpos = endpos; 18 } 19 } 20 21 public HTMLTag(String tagtext) 22 { this.tagtext = tagtext; 23 this.docpos = -1; 24 this.name = null; 25 } 3 public class HTMLTag implements HTMLObject 4 { String tagtext; 5 String name; 6 int docpos; 7 int endpos; 8 9 public static final String HTML_ELEMENT_TYPE = "Element"; 10 11 public HTMLTag(String tagtext, int docpos, int endpos) 12 { this.tagtext = tagtext; 13 this.name = null; 14 this.docpos = docpos; 15 if (this.docpos < 0) 16 { this.endpos = -1; 17 } 18 else 19 { this.endpos = endpos; 20 } 21 } 22 23 public HTMLTag(String tagtext) 24 { this.tagtext = tagtext; 25 this.docpos = -1; 26 this.name = null; 27 } 28 29 public String getHTMLType() 30 { return HTML_ELEMENT_TYPE; 31 } 26 32 27 33 public HTMLTag endTag()
Note:
See TracChangeset
for help on using the changeset viewer.