Context Navigation

source: greenstone3/trunk/src/java/org/greenstone/gsdl3/util/XMLConverter.java@ 16999

Last change on this file since 16999 was 16999, checked in by ak19, 16 years ago
When testing GathererApplet4gs3 with all the update core java code, the Greenstone Gateway page failed with an exception. Nullpointer exception in XMLConverter.nodeToElement() now fixed.
Property svn:keywords set to `Author Date Id Revision`
File size: 11.1 KB

Line
1	/*
2	* XMLConverter.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.util;
20
21	// XML classes
22	import org.w3c.dom.Document;
23	import org.w3c.dom.DocumentType;
24	import org.w3c.dom.Element;
25	import org.w3c.dom.Node;
26	import org.w3c.dom.NodeList;
27	import org.w3c.dom.NamedNodeMap;
28	import org.xml.sax.InputSource;
29	import org.xml.sax.EntityResolver;
30	import org.apache.xerces.parsers.DOMParser;
31	import org.apache.xerces.dom.DocumentImpl; // for new Documents
32
33	// other java classes
34	import java.io.Reader;
35	import java.io.InputStreamReader;
36	import java.io.StringReader;
37	import java.io.File;
38	import java.io.FileInputStream;
39	import java.io.FileReader;
40
41	import org.apache.log4j.*;
42
43	/** XMLConverter - utility class for greenstone
44	*
45	* parses XML Strings into Documents, converts Nodes to Strings
46	* different parsers have different behaviour - can experiment in here
47	* now we only use xerces
48	*
49	* @author <a href="mailto:[email protected]">Katherine Don</a>
50	* @version $Revision: 16999 $
51	*
52	*/
53	public class XMLConverter {
54
55	static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.XMLConverter.class.getName());
56
57	/** xerces parser */
58	protected DOMParser parser = null;
59
60	private boolean outputEscaping = true;
61
62
63	/** the no-args constructor */
64	public XMLConverter() {
65	try {
66	this.parser = new DOMParser();
67	this.parser.setFeature("http://xml.org/sax/features/validation", false);
68	// don't try and load external DTD - no need if we are not validating, and may cause connection errors if a proxy is not set up.
69	this.parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
70	// a performance test showed that having this on lead to increased
71	// memory use for small-medium docs, and not much gain for large
72	// docs.
73	// http://www.sosnoski.com/opensrc/xmlbench/conclusions.html
74	this.parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
75	} catch (Exception e) {
76	logger.error(e.getMessage());
77	}
78	}
79
80	/** sets the entity resolver. pass in null to unset it */
81	public void setEntityResolver(EntityResolver er) {
82	this.parser.setEntityResolver(er);
83	}
84
85	/** Given a Node representing an Element or Document, will return the
86	* Element/docroot Element. Returns null if the Node was not an element. */
87	public static Element nodeToElement(Node node)
88	{
89	if(node == null) {
90	return null;
91	}
92	short nodeType = node.getNodeType();
93
94	if (nodeType == Node.DOCUMENT_NODE) {
95	Document docNode = (Document)node;
96	return docNode.getDocumentElement() ;
97	}
98	else if (nodeType == Node.ELEMENT_NODE) {
99	return (Element)node;
100	}
101	else {
102	String message = "Expecting Document or Element node type but got "
103	+ node.getNodeName() + "\nReturning null";
104	System.err.println(message);
105	logger.warn(message);
106	return null;
107	}
108	}
109
110	/** returns a DOM Document */
111	public Document getDOM(String in) {
112
113	try {
114	Reader reader = new StringReader(in);
115	InputSource xml_source = new InputSource(reader);
116
117	this.parser.parse(xml_source);
118	Document doc = this.parser.getDocument();
119
120	return doc;
121
122	} catch (Exception e) {
123	logger.error(e.getMessage());
124	}
125	return null;
126	}
127
128	/** returns a DOM Document */
129	public Document getDOM(File in) {
130	try {
131	FileReader reader = new FileReader(in);
132	InputSource xml_source = new InputSource(reader);
133	this.parser.parse(xml_source);
134	Document doc = this.parser.getDocument();
135	return doc;
136
137	} catch (Exception e) {
138	logger.error(e.getMessage(), e);
139
140	}
141	return null;
142	}
143
144	/** returns a DOM document */
145	public Document getDOM(File in, String encoding) {
146	try {
147
148	InputStreamReader isr = new InputStreamReader(new FileInputStream(in), encoding);
149	InputSource xml_source = new InputSource(isr);
150
151	this.parser.parse(xml_source);
152	Document doc = this.parser.getDocument();
153
154	return doc;
155
156	} catch (Exception e) {
157	logger.error(e.getMessage());
158	}
159	return null;
160	}
161
162
163	/** creates a new empty DOM Document */
164	public Document newDOM() {
165	Document doc = new DocumentImpl();
166	return doc;
167	}
168
169	/** returns the Node as a String */
170	public String getString(Node xmlNode)
171	{
172	outputEscaping = true;
173	StringBuffer xmlRepresentation = new StringBuffer();
174	getString(xmlNode, xmlRepresentation, 0, false);
175	return xmlRepresentation.toString();
176	}
177
178	/** returns the node as a nicely formatted String - this introduces extra
179	* text nodes if the String is read back in as a DOM, so should only be
180	* used for printing */
181	public String getPrettyString(Node xmlNode) {
182
183	outputEscaping = true;
184	StringBuffer xmlRepresentation = new StringBuffer();
185	getString(xmlNode, xmlRepresentation, 0, true);
186	return xmlRepresentation.toString();
187	}
188
189	private void getString(Node xmlNode, StringBuffer xmlRepresentation,
190	int depth, boolean pretty)
191	{
192
193	if (xmlNode == null) {
194	xmlRepresentation.append("<null>");
195	return;
196	}
197
198	short nodeType = xmlNode.getNodeType();
199	String nodeName = xmlNode.getNodeName();
200
201	if (nodeType == Node.DOCUMENT_NODE) {
202	Document xmlDocNode = (Document)xmlNode;
203
204	if (xmlDocNode.getDoctype() == null) {
205	System.err.println("Doctype is null.");
206	}
207	else {
208
209	DocumentType dt = xmlDocNode.getDoctype();
210
211	String name = dt.getName();
212	String pid = dt.getPublicId();
213	String sid = dt.getSystemId();
214
215	String doctype_str = "<!DOCTYPE " + dt.getName() + " PUBLIC \"" + pid + "\" \"" + sid + "\">\n";
216
217	xmlRepresentation.append(doctype_str);
218	}
219	getString(xmlDocNode.getDocumentElement(), xmlRepresentation, depth, pretty);
220	return;
221	}
222	// Handle Element nodes
223	if (nodeType == Node.ELEMENT_NODE) {
224	if (pretty) {
225	xmlRepresentation.append("\n");
226	for (int i = 0; i < depth; i++) {
227	xmlRepresentation.append(" ");
228	}
229	}
230
231	// Write opening tag
232	xmlRepresentation.append("<");
233	xmlRepresentation.append(nodeName);
234
235	// Write the node attributes
236	NamedNodeMap nodeAttributes = xmlNode.getAttributes();
237	for (int i = 0; i < nodeAttributes.getLength(); i++) {
238	Node attribute = nodeAttributes.item(i);
239	xmlRepresentation.append(" ");
240	xmlRepresentation.append(attribute.getNodeName());
241	xmlRepresentation.append("=\"");
242	xmlRepresentation.append(attribute.getNodeValue());
243	xmlRepresentation.append("\"");
244	}
245
246	// If the node has no children, close the opening tag and return
247	if (xmlNode.hasChildNodes() == false) {
248	// This produces somewhat ugly output, but it is necessary to compensate
249	// for display bugs in Netscape. Firstly, the space is needed before the
250	// closing bracket otherwise Netscape will ignore some tags (<br/>, for
251	// example). Also, a newline character would be expected after the tag,
252	// but this causes problems with the display of links (the link text
253	// will contain a newline character, which is displayed badly).
254	xmlRepresentation.append(" />");
255	return;
256	}
257
258	// Close the opening tag
259	xmlRepresentation.append(">");
260
261	// Apply recursively to the children of this node
262	// hack for nodes next to text nodes - dont make them pretty
263	// this is needed for text inside a <pre> element - any new lines
264	// or spaces around the span elements show up in the text
265	NodeList children = xmlNode.getChildNodes();
266	boolean do_pretty = pretty;
267	for (int i = 0; i < children.getLength(); i++) {
268	if (children.item(i).getNodeType()==Node.TEXT_NODE) {
269	do_pretty=false; // if there is a text node amongst the children, do teh following nodes in non-pretty mode - hope this doesn't stuff up something else
270	}
271	getString(children.item(i), xmlRepresentation, depth + 1, do_pretty);
272	}
273
274	// Write closing tag
275	if (pretty) {
276	if (xmlRepresentation.charAt(xmlRepresentation.length()-1) == '\n') {
277	for (int i = 0; i < depth; i++)
278	xmlRepresentation.append(" ");
279	}
280	}
281	xmlRepresentation.append("</");
282	xmlRepresentation.append(nodeName);
283	xmlRepresentation.append(">");
284	if (pretty) {
285	xmlRepresentation.append("\n");
286	}
287	}
288
289	// Handle Text nodes
290	else if (nodeType == Node.TEXT_NODE) {
291	String text = xmlNode.getNodeValue();
292
293	// Perform output escaping, if required
294	if (outputEscaping) {
295	text = text.replaceAll("&", "&"); // Must be done first!!
296	text = text.replaceAll("<", "<");
297	text = text.replaceAll(">", ">");
298	text = text.replaceAll("\"", """);
299	text = text.replaceAll("\'", "'");
300	}
301
302	// Remove any control-C characters
303	text = text.replaceAll("" + (char) 3, "");
304	xmlRepresentation.append(text);
305	}
306
307	// Handle Processing Instruction nodes
308	else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE) {
309	if (nodeName.equals("javax.xml.transform.disable-output-escaping")) {
310	outputEscaping = false;
311	}
312	else if (nodeName.equals("javax.xml.transform.enable-output-escaping")) {
313	outputEscaping = true;
314	}
315	else {
316	logger.warn("Unhandled processing instruction " + nodeName);
317	}
318	}
319
320	else if (nodeType == Node.COMMENT_NODE) {
321	String text = xmlNode.getNodeValue();
322	xmlRepresentation.append("<!-- ");
323	xmlRepresentation.append(text);
324	xmlRepresentation.append(" -->");
325	}
326
327
328
329	// A type of node that is not handled yet
330	else {
331	logger.warn("Unknown node type: " + nodeType+" "+getNodeTypeString(nodeType));
332	}
333
334	return;
335	}
336
337	protected static String getNodeTypeString(short node_type) {
338
339	String type = "";
340	switch(node_type) {
341	case Node.ATTRIBUTE_NODE:
342	type="ATTRIBUTE_NODE";
343	break;
344	case Node.CDATA_SECTION_NODE:
345	type="CDATA_SECTION_NODE";
346	break;
347	case Node.COMMENT_NODE:
348	type="COMMENT_NODE";
349	break;
350	case Node.DOCUMENT_FRAGMENT_NODE:
351	type="DOCUMENT_FRAGMENT_NODE";
352	break;
353	case Node.DOCUMENT_NODE:
354	type="DOCUMENT_NODE";
355	break;
356	case Node.DOCUMENT_TYPE_NODE:
357	type="DOCUMENT_TYPE_NODE";
358	break;
359	case Node.ELEMENT_NODE:
360	type="ELEMENT_NODE";
361	break;
362	case Node.ENTITY_NODE:
363	type="ENTITY_NODE";
364	break;
365	case Node.ENTITY_REFERENCE_NODE:
366	type="ENTITY_REFERENCE_NODE";
367	break;
368	case Node.NOTATION_NODE:
369	type="NOTATION_NODE";
370	break;
371	case Node.PROCESSING_INSTRUCTION_NODE:
372	type="PROCESSING_INSTRUCTION_NODE";
373	break;
374	case Node.TEXT_NODE:
375	type="TEXT_NODE";
376	break;
377	default:
378	type="UNKNOWN";
379	}
380
381	return type;
382	}
383	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: