source: greenstone3/trunk/src/java/org/greenstone/gsdl3/util/XMLConverter.java@ 16999

Last change on this file since 16999 was 16999, checked in by ak19, 16 years ago

When testing GathererApplet4gs3 with all the update core java code, the Greenstone Gateway page failed with an exception. Nullpointer exception in XMLConverter.nodeToElement() now fixed.

  • Property svn:keywords set to Author Date Id Revision
File size: 11.1 KB
Line 
1/*
2 * XMLConverter.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.util;
20
21// XML classes
22import org.w3c.dom.Document;
23import org.w3c.dom.DocumentType;
24import org.w3c.dom.Element;
25import org.w3c.dom.Node;
26import org.w3c.dom.NodeList;
27import org.w3c.dom.NamedNodeMap;
28import org.xml.sax.InputSource;
29import org.xml.sax.EntityResolver;
30import org.apache.xerces.parsers.DOMParser;
31import org.apache.xerces.dom.DocumentImpl; // for new Documents
32
33// other java classes
34import java.io.Reader;
35import java.io.InputStreamReader;
36import java.io.StringReader;
37import java.io.File;
38import java.io.FileInputStream;
39import java.io.FileReader;
40
41import org.apache.log4j.*;
42
43/** XMLConverter - utility class for greenstone
44 *
45 * parses XML Strings into Documents, converts Nodes to Strings
46 * different parsers have different behaviour - can experiment in here
47 * now we only use xerces
48 *
49 * @author <a href="mailto:[email protected]">Katherine Don</a>
50 * @version $Revision: 16999 $
51 *
52 */
53public class XMLConverter {
54
55 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.XMLConverter.class.getName());
56
57 /** xerces parser */
58 protected DOMParser parser = null;
59
60 private boolean outputEscaping = true;
61
62
63 /** the no-args constructor */
64 public XMLConverter() {
65 try {
66 this.parser = new DOMParser();
67 this.parser.setFeature("http://xml.org/sax/features/validation", false);
68 // don't try and load external DTD - no need if we are not validating, and may cause connection errors if a proxy is not set up.
69 this.parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
70 // a performance test showed that having this on lead to increased
71 // memory use for small-medium docs, and not much gain for large
72 // docs.
73 // http://www.sosnoski.com/opensrc/xmlbench/conclusions.html
74 this.parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
75 } catch (Exception e) {
76 logger.error(e.getMessage());
77 }
78 }
79
80 /** sets the entity resolver. pass in null to unset it */
81 public void setEntityResolver(EntityResolver er) {
82 this.parser.setEntityResolver(er);
83 }
84
85 /** Given a Node representing an Element or Document, will return the
86 * Element/docroot Element. Returns null if the Node was not an element. */
87 public static Element nodeToElement(Node node)
88 {
89 if(node == null) {
90 return null;
91 }
92 short nodeType = node.getNodeType();
93
94 if (nodeType == Node.DOCUMENT_NODE) {
95 Document docNode = (Document)node;
96 return docNode.getDocumentElement() ;
97 }
98 else if (nodeType == Node.ELEMENT_NODE) {
99 return (Element)node;
100 }
101 else {
102 String message = "Expecting Document or Element node type but got "
103 + node.getNodeName() + "\nReturning null";
104 System.err.println(message);
105 logger.warn(message);
106 return null;
107 }
108 }
109
110 /** returns a DOM Document */
111 public Document getDOM(String in) {
112
113 try {
114 Reader reader = new StringReader(in);
115 InputSource xml_source = new InputSource(reader);
116
117 this.parser.parse(xml_source);
118 Document doc = this.parser.getDocument();
119
120 return doc;
121
122 } catch (Exception e) {
123 logger.error(e.getMessage());
124 }
125 return null;
126 }
127
128 /** returns a DOM Document */
129 public Document getDOM(File in) {
130 try {
131 FileReader reader = new FileReader(in);
132 InputSource xml_source = new InputSource(reader);
133 this.parser.parse(xml_source);
134 Document doc = this.parser.getDocument();
135 return doc;
136
137 } catch (Exception e) {
138 logger.error(e.getMessage(), e);
139
140 }
141 return null;
142 }
143
144 /** returns a DOM document */
145 public Document getDOM(File in, String encoding) {
146 try {
147
148 InputStreamReader isr = new InputStreamReader(new FileInputStream(in), encoding);
149 InputSource xml_source = new InputSource(isr);
150
151 this.parser.parse(xml_source);
152 Document doc = this.parser.getDocument();
153
154 return doc;
155
156 } catch (Exception e) {
157 logger.error(e.getMessage());
158 }
159 return null;
160 }
161
162
163 /** creates a new empty DOM Document */
164 public Document newDOM() {
165 Document doc = new DocumentImpl();
166 return doc;
167 }
168
169 /** returns the Node as a String */
170 public String getString(Node xmlNode)
171 {
172 outputEscaping = true;
173 StringBuffer xmlRepresentation = new StringBuffer();
174 getString(xmlNode, xmlRepresentation, 0, false);
175 return xmlRepresentation.toString();
176 }
177
178 /** returns the node as a nicely formatted String - this introduces extra
179 * text nodes if the String is read back in as a DOM, so should only be
180 * used for printing */
181 public String getPrettyString(Node xmlNode) {
182
183 outputEscaping = true;
184 StringBuffer xmlRepresentation = new StringBuffer();
185 getString(xmlNode, xmlRepresentation, 0, true);
186 return xmlRepresentation.toString();
187 }
188
189 private void getString(Node xmlNode, StringBuffer xmlRepresentation,
190 int depth, boolean pretty)
191 {
192
193 if (xmlNode == null) {
194 xmlRepresentation.append("<null>");
195 return;
196 }
197
198 short nodeType = xmlNode.getNodeType();
199 String nodeName = xmlNode.getNodeName();
200
201 if (nodeType == Node.DOCUMENT_NODE) {
202 Document xmlDocNode = (Document)xmlNode;
203
204 if (xmlDocNode.getDoctype() == null) {
205 System.err.println("Doctype is null.");
206 }
207 else {
208
209 DocumentType dt = xmlDocNode.getDoctype();
210
211 String name = dt.getName();
212 String pid = dt.getPublicId();
213 String sid = dt.getSystemId();
214
215 String doctype_str = "<!DOCTYPE " + dt.getName() + " PUBLIC \"" + pid + "\" \"" + sid + "\">\n";
216
217 xmlRepresentation.append(doctype_str);
218 }
219 getString(xmlDocNode.getDocumentElement(), xmlRepresentation, depth, pretty);
220 return;
221 }
222 // Handle Element nodes
223 if (nodeType == Node.ELEMENT_NODE) {
224 if (pretty) {
225 xmlRepresentation.append("\n");
226 for (int i = 0; i < depth; i++) {
227 xmlRepresentation.append(" ");
228 }
229 }
230
231 // Write opening tag
232 xmlRepresentation.append("<");
233 xmlRepresentation.append(nodeName);
234
235 // Write the node attributes
236 NamedNodeMap nodeAttributes = xmlNode.getAttributes();
237 for (int i = 0; i < nodeAttributes.getLength(); i++) {
238 Node attribute = nodeAttributes.item(i);
239 xmlRepresentation.append(" ");
240 xmlRepresentation.append(attribute.getNodeName());
241 xmlRepresentation.append("=\"");
242 xmlRepresentation.append(attribute.getNodeValue());
243 xmlRepresentation.append("\"");
244 }
245
246 // If the node has no children, close the opening tag and return
247 if (xmlNode.hasChildNodes() == false) {
248 // This produces somewhat ugly output, but it is necessary to compensate
249 // for display bugs in Netscape. Firstly, the space is needed before the
250 // closing bracket otherwise Netscape will ignore some tags (<br/>, for
251 // example). Also, a newline character would be expected after the tag,
252 // but this causes problems with the display of links (the link text
253 // will contain a newline character, which is displayed badly).
254 xmlRepresentation.append(" />");
255 return;
256 }
257
258 // Close the opening tag
259 xmlRepresentation.append(">");
260
261 // Apply recursively to the children of this node
262 // hack for nodes next to text nodes - dont make them pretty
263 // this is needed for text inside a <pre> element - any new lines
264 // or spaces around the span elements show up in the text
265 NodeList children = xmlNode.getChildNodes();
266 boolean do_pretty = pretty;
267 for (int i = 0; i < children.getLength(); i++) {
268 if (children.item(i).getNodeType()==Node.TEXT_NODE) {
269 do_pretty=false; // if there is a text node amongst the children, do teh following nodes in non-pretty mode - hope this doesn't stuff up something else
270 }
271 getString(children.item(i), xmlRepresentation, depth + 1, do_pretty);
272 }
273
274 // Write closing tag
275 if (pretty) {
276 if (xmlRepresentation.charAt(xmlRepresentation.length()-1) == '\n') {
277 for (int i = 0; i < depth; i++)
278 xmlRepresentation.append(" ");
279 }
280 }
281 xmlRepresentation.append("</");
282 xmlRepresentation.append(nodeName);
283 xmlRepresentation.append(">");
284 if (pretty) {
285 xmlRepresentation.append("\n");
286 }
287 }
288
289 // Handle Text nodes
290 else if (nodeType == Node.TEXT_NODE) {
291 String text = xmlNode.getNodeValue();
292
293 // Perform output escaping, if required
294 if (outputEscaping) {
295 text = text.replaceAll("&", "&amp;"); // Must be done first!!
296 text = text.replaceAll("<", "&lt;");
297 text = text.replaceAll(">", "&gt;");
298 text = text.replaceAll("\"", "&quot;");
299 text = text.replaceAll("\'", "&apos;");
300 }
301
302 // Remove any control-C characters
303 text = text.replaceAll("" + (char) 3, "");
304 xmlRepresentation.append(text);
305 }
306
307 // Handle Processing Instruction nodes
308 else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE) {
309 if (nodeName.equals("javax.xml.transform.disable-output-escaping")) {
310 outputEscaping = false;
311 }
312 else if (nodeName.equals("javax.xml.transform.enable-output-escaping")) {
313 outputEscaping = true;
314 }
315 else {
316 logger.warn("Unhandled processing instruction " + nodeName);
317 }
318 }
319
320 else if (nodeType == Node.COMMENT_NODE) {
321 String text = xmlNode.getNodeValue();
322 xmlRepresentation.append("<!-- ");
323 xmlRepresentation.append(text);
324 xmlRepresentation.append(" -->");
325 }
326
327
328
329 // A type of node that is not handled yet
330 else {
331 logger.warn("Unknown node type: " + nodeType+" "+getNodeTypeString(nodeType));
332 }
333
334 return;
335 }
336
337 protected static String getNodeTypeString(short node_type) {
338
339 String type = "";
340 switch(node_type) {
341 case Node.ATTRIBUTE_NODE:
342 type="ATTRIBUTE_NODE";
343 break;
344 case Node.CDATA_SECTION_NODE:
345 type="CDATA_SECTION_NODE";
346 break;
347 case Node.COMMENT_NODE:
348 type="COMMENT_NODE";
349 break;
350 case Node.DOCUMENT_FRAGMENT_NODE:
351 type="DOCUMENT_FRAGMENT_NODE";
352 break;
353 case Node.DOCUMENT_NODE:
354 type="DOCUMENT_NODE";
355 break;
356 case Node.DOCUMENT_TYPE_NODE:
357 type="DOCUMENT_TYPE_NODE";
358 break;
359 case Node.ELEMENT_NODE:
360 type="ELEMENT_NODE";
361 break;
362 case Node.ENTITY_NODE:
363 type="ENTITY_NODE";
364 break;
365 case Node.ENTITY_REFERENCE_NODE:
366 type="ENTITY_REFERENCE_NODE";
367 break;
368 case Node.NOTATION_NODE:
369 type="NOTATION_NODE";
370 break;
371 case Node.PROCESSING_INSTRUCTION_NODE:
372 type="PROCESSING_INSTRUCTION_NODE";
373 break;
374 case Node.TEXT_NODE:
375 type="TEXT_NODE";
376 break;
377 default:
378 type="UNKNOWN";
379 }
380
381 return type;
382 }
383}
Note: See TracBrowser for help on using the repository browser.