Changeset 8705 for trunk/gsdl3/src
- Timestamp:
- 2004-11-30T11:46:24+13:00 (20 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/util/HTMLTidy.java
r6344 r8705 23 23 public class HTMLTidy extends DefaultHandler 24 24 { 25 String label; 26 File file; 27 StringBuffer value; 28 boolean inElement; 29 boolean accumulate; 30 Document document; 31 Element currentElement; 32 Element root; 33 34 class HTMLCommentHandler implements LexicalHandler 35 { 36 HTMLTidy parent; 37 38 HTMLCommentHandler(HTMLTidy tidier) 39 { this.parent = tidier; 40 } 41 42 public void comment(char ch[], int start, int length) 43 { String comment = new String(ch, start, length); 44 45 parent.comment(comment); 46 } 47 48 public void endCDATA() 25 String label; 26 File file; 27 StringBuffer value; 28 boolean inElement; 29 boolean accumulate; 30 Document document; 31 Element currentElement; 32 Element root; 33 34 class HTMLCommentHandler implements LexicalHandler 49 35 { 50 } 51 52 public void endDTD() 36 HTMLTidy parent; 37 38 HTMLCommentHandler(HTMLTidy tidier) 39 { 40 this.parent = tidier; 41 } 42 43 public void comment(char ch[], int start, int length) 44 { 45 String comment = new String(ch, start, length); 46 47 parent.comment(comment); 48 } 49 50 public void endCDATA() 51 { 52 } 53 54 public void endDTD() 55 { 56 } 57 58 public void endEntity(String name) 59 { 60 } 61 62 public void startCDATA() 63 { 64 } 65 66 public void startDTD(String name, String publicId, String systemId) 67 { 68 } 69 70 public void startEntity(String name) 71 { 72 } 73 } 74 75 public HTMLTidy(URL url) 76 { 77 // TODO: complete 78 } 79 80 public HTMLTidy(File file) 81 { 82 this.file = null; 83 this.label = null; 84 this.value = null; 85 86 this.document = new org.apache.html.dom.HTMLDocumentImpl(); 87 88 this.currentElement = this.document.getDocumentElement(); 89 this.root = this.currentElement; 90 91 try { 92 XMLReader handler = new org.ccil.cowan.tagsoup.Parser(); 93 HTMLCommentHandler commentHandler = new HTMLCommentHandler(this); 94 handler.setContentHandler(this); 95 handler.setProperty("http://xml.org/sax/properties/lexical-handler", commentHandler); 96 handler.parse(file.toString()); 97 } 98 catch (java.io.IOException io) 99 { 100 } 101 catch (org.xml.sax.SAXException saxEx) 102 { 103 System.out.println(saxEx); 104 } 105 } 106 107 public void startElement(String URI, String localName, String qName, Attributes attributes) 108 { 109 //System.out.println("<"+localName+">"); 110 111 if (localName.equals("html") == false) { 112 Element childElement = this.document.createElement(localName); 113 114 this.currentElement.appendChild(childElement); 115 this.currentElement = childElement; 116 117 for (int a = 0; a < attributes.getLength(); a ++){ 118 String name = attributes.getLocalName(a); 119 String value = attributes.getValue(a); 120 121 childElement.setAttribute(name, value); 122 } 123 } 124 } 125 126 public void comment(String text) 53 127 { 54 } 55 56 public void endEntity(String name) 128 Comment comment = document.createComment(text); 129 this.currentElement.appendChild(comment); 130 //System.out.println("<!-- "+text+" -->"); 131 } 132 133 public void endElement(String URI, String localName, String qName) 134 { 135 // System.out.println("Ending " + localName); 136 // System.out.println("</"+localName+">"); 137 138 if (localName.equals("html")) { 139 return; 140 } 141 142 // TODO: check is the same name as current node 143 if (!this.currentElement.getTagName().equals(localName.toUpperCase())) { 144 // TODO: error handling 145 System.out.println("Bad end at " + localName + " " + this.currentElement.getTagName()); 146 return; 147 } 148 149 // Optimisation: is this an empty tag? 150 boolean isEmpty = false; 151 152 if (this.currentElement.getChildNodes().getLength() == 1){ 153 Node child = this.currentElement.getFirstChild(); 154 155 if (child.getNodeType() == Node.TEXT_NODE) { 156 String text = child.getNodeValue(); 157 158 int i = 0; 159 while (i < text.length()){ 160 if (text.charAt(i) > 32 && text.charAt(i) != 160) { 161 break; 162 } 163 i++; 164 } 165 if (text.length() == i) { 166 isEmpty = true; 167 } 168 } 169 } 170 else if (this.currentElement.getChildNodes().getLength() == 0){ 171 isEmpty = true; 172 } 173 174 boolean isSingleton = false; 175 String tagName = this.currentElement.getTagName().toLowerCase(); 176 177 if (tagName.equals("img") || 178 tagName.equals("br") || 179 tagName.equals("meta") || 180 tagName.equals("input") || 181 tagName.equals("area") || 182 tagName.equals("link") || 183 tagName.equals("base") || 184 tagName.equals("img") || 185 tagName.equals("hr")) { 186 isSingleton = true; 187 } 188 189 Element parent = (Element) this.currentElement.getParentNode(); 190 191 if (isEmpty == true && !isSingleton) { 192 // trim the current child 193 parent.removeChild(this.currentElement); 194 // System.out.println("***Removing child***"); 195 } 196 197 this.currentElement = parent; 198 } 199 200 public void characters(char c[], int start, int length) 201 { 202 String string = new String(c, start, length); 203 Node text_node = this.document.createTextNode(string); 204 this.currentElement.appendChild(text_node); 205 // System.out.println(string); 206 } 207 208 public void recursePrint(Element element, int indent) 57 209 { 58 } 59 60 public void startCDATA() 61 { 62 } 63 64 public void startDTD(String name, String publicId, String systemId) 65 { 66 } 67 68 public void startEntity(String name) 69 { 70 } 71 } 72 73 public HTMLTidy(URL url) 74 { // TODO: complete 75 } 76 77 public HTMLTidy(File file) 78 { this.file = null; 79 this.label = null; 80 this.value = null; 81 82 this.document = new org.apache.html.dom.HTMLDocumentImpl(); 83 84 this.currentElement = this.document.getDocumentElement(); 85 this.root = this.currentElement; 86 87 try { 88 XMLReader handler = new org.ccil.cowan.tagsoup.Parser(); 89 HTMLCommentHandler commentHandler = new HTMLCommentHandler(this); 90 handler.setContentHandler(this); 91 handler.setProperty("http://xml.org/sax/properties/lexical-handler", commentHandler); 92 handler.parse(file.toString()); 93 } 94 catch (java.io.IOException io) 95 { 96 } 97 catch (org.xml.sax.SAXException saxEx) 98 { 99 System.out.println(saxEx); 100 } 101 } 102 103 public void startElement(String URI, String localName, String qName, Attributes attributes) 104 { //System.out.println("<"+localName+">"); 105 106 if (localName.equals("html") == false) { 107 Element childElement = this.document.createElement(localName); 108 109 this.currentElement.appendChild(childElement); 110 this.currentElement = childElement; 111 112 for (int a = 0; a < attributes.getLength(); a ++) 113 { String name = attributes.getLocalName(a); 114 String value = attributes.getValue(a); 115 116 childElement.setAttribute(name, value); 117 } 118 } 119 } 120 121 public void comment(String text) 122 { 123 Comment comment = document.createComment(text); 124 this.currentElement.appendChild(comment); 125 //System.out.println("<!-- "+text+" -->"); 126 } 127 128 public void endElement(String URI, String localName, String qName) 129 { // System.out.println("Ending " + localName); 130 // System.out.println("</"+localName+">"); 131 132 if (localName.equals("html")) { 133 return; 134 } 135 136 // TODO: check is the same name as current node 137 if (!this.currentElement.getTagName().equals(localName.toUpperCase())) { 138 // TODO: error handling 139 System.out.println("Bad end at " + localName + " " + this.currentElement.getTagName()); 140 return; 141 } 142 143 // Optimisation: is this an empty tag? 144 boolean isEmpty = false; 145 146 if (this.currentElement.getChildNodes().getLength() == 1) 147 { Node child = this.currentElement.getFirstChild(); 148 149 if (child.getNodeType() == Node.TEXT_NODE) { 150 String text = child.getNodeValue(); 151 152 int i = 0; 153 while (i < text.length()) 154 { if (text.charAt(i) > 32 && text.charAt(i) != 160) { 155 break; 156 } 157 i++; 158 } 159 if (text.length() == i) { 160 isEmpty = true; 161 } 162 } 163 } 164 else if (this.currentElement.getChildNodes().getLength() == 0) 165 { isEmpty = true; 166 } 167 168 boolean isSingleton = false; 169 String tagName = this.currentElement.getTagName().toLowerCase(); 170 171 if (tagName.equals("img") || 172 tagName.equals("br") || 173 tagName.equals("meta") || 174 tagName.equals("input") || 175 tagName.equals("area") || 176 tagName.equals("link") || 177 tagName.equals("base") || 178 tagName.equals("img") || 179 tagName.equals("hr")) { 180 isSingleton = true; 181 } 182 183 Element parent = (Element) this.currentElement.getParentNode(); 184 185 if (isEmpty == true && !isSingleton) { 186 // trim the current child 187 parent.removeChild(this.currentElement); 188 // System.out.println("***Removing child***"); 189 } 190 191 this.currentElement = parent; 192 } 193 194 public void characters(char c[], int start, int length) 195 { String string = new String(c, start, length); 196 Node text_node = this.document.createTextNode(string); 197 this.currentElement.appendChild(text_node); 198 // System.out.println(string); 199 } 200 201 public void recursePrint(Element element, int indent) 202 { 203 for (int i = 0; i < indent; i++) { 204 System.out.print(" "); 205 } 206 System.out.println(element.toString()); 207 for (int c = 0; c < element.getChildNodes().getLength(); c ++) 208 { Node n = element.getChildNodes().item(c); 209 210 if (n.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { 211 Element e = (Element) n; 212 213 this.recursePrint(e, indent+2); 214 } 215 } 216 } 217 218 public Document getDocument() 219 { 220 // this.recursePrint(this.root, 0); 221 // System.out.println(this.document.toString()); 222 // System.out.println(this.root); 223 return this.document; 224 } 210 for (int i = 0; i < indent; i++) { 211 System.out.print(" "); 212 } 213 System.out.println(element.toString()); 214 for (int c = 0; c < element.getChildNodes().getLength(); c ++){ 215 Node n = element.getChildNodes().item(c); 216 217 if (n.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { 218 Element e = (Element) n; 219 220 this.recursePrint(e, indent+2); 221 } 222 } 223 } 224 225 public Document getDocument() 226 { 227 // this.recursePrint(this.root, 0); 228 // System.out.println(this.document.toString()); 229 // System.out.println(this.root); 230 return this.document; 231 } 225 232 }
Note:
See TracChangeset
for help on using the changeset viewer.