Context Navigation

source: trunk/gsdl3/src/java/org/greenstone/gsdl3/util/XMLConverter.java@ 5187

Last change on this file since 5187 was 5187, checked in by kjdon, 21 years ago
code tidy, added method to set teh entity resolver for the parser. not actually used at the moment
Property svn:keywords set to `Author Date Id Revision`
File size: 8.7 KB

Line
1	/*
2	* XMLConverter.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.util;
20
21	// XML classes
22	import org.w3c.dom.Document;
23	import org.w3c.dom.Node;
24	import org.w3c.dom.NodeList;
25	import org.w3c.dom.NamedNodeMap;
26	import org.xml.sax.InputSource;
27	import org.xml.sax.EntityResolver;
28	import org.apache.xerces.parsers.DOMParser;
29	import org.apache.xerces.dom.DocumentImpl; // for new Documents
30
31	// other java classes
32	import java.io.Reader;
33	import java.io.InputStreamReader;
34	import java.io.StringReader;
35	import java.io.File;
36	import java.io.FileInputStream;
37	import java.io.FileReader;
38
39	/** XMLConverter - utility class for greenstone
40	*
41	* parses XML Strings into Documents, converts Nodes to Strings
42	* different parsers have different behaviour - can experiment in here
43	* now we only use xerces
44	*
45	* @author <a href="mailto:[email protected]">Katherine Don</a>
46	* @version $Revision: 5187 $
47	*
48	*/
49	public class XMLConverter {
50
51	/** xerces parser */
52	protected DOMParser parser = null;
53
54	private boolean outputEscaping = true;
55
56
57	/** the no-args constructor */
58	public XMLConverter() {
59	try {
60	this.parser = new DOMParser();
61	this.parser.setFeature("http://xml.org/sax/features/validation", false);
62
63	} catch (Exception e) {
64	System.err.println("XMLConverter:exception "+e.getMessage());
65	}
66	}
67
68	/** sets the entity resolver. pass in null to unset it */
69	public void setEntityResolver(EntityResolver er) {
70	this.parser.setEntityResolver(er);
71	}
72
73	/** returns a DOM Document */
74	public Document getDOM(String in) {
75
76	try {
77	Reader reader = new StringReader(in);
78	InputSource xml_source = new InputSource(reader);
79
80	this.parser.parse(xml_source);
81	Document doc = this.parser.getDocument();
82	return doc;
83
84	} catch (Exception e) {
85	System.err.println("XMLConverter.getDOM(String): exception "+e.getMessage());
86	}
87	return null;
88	}
89
90	/** returns a DOM Document */
91	public Document getDOM(File in) {
92	try {
93
94	FileReader reader = new FileReader(in);
95	InputSource xml_source = new InputSource(reader);
96
97	this.parser.parse(xml_source);
98	Document doc = this.parser.getDocument();
99
100	return doc;
101
102	} catch (Exception e) {
103	System.err.println("XMLConverter.getDOM(File): exception "+e.getMessage());
104	e.printStackTrace();
105	}
106	return null;
107	}
108
109	/** returns a DOM document */
110	public Document getDOM(File in, String encoding) {
111	try {
112
113	InputStreamReader isr = new InputStreamReader(new FileInputStream(in), encoding);
114	InputSource xml_source = new InputSource(isr);
115
116	this.parser.parse(xml_source);
117	Document doc = this.parser.getDocument();
118
119	return doc;
120
121	} catch (Exception e) {
122	System.err.println("XMLConverter.getDOM(File): exception "+e.getMessage());
123	}
124	return null;
125	}
126
127
128	/** creates a new empty DOM Document */
129	public Document newDOM() {
130	Document doc = new DocumentImpl();
131	return doc;
132	}
133
134	/** returns the Node as a String */
135	public String getString(Node xmlNode)
136	{
137	outputEscaping = true;
138	return getString(xmlNode, 0, false);
139	}
140
141	/** returns the node as a nicely formatted String - this introduces extra
142	* text nodes if the String is read back in as a DOM, so should only be
143	* used for printing */
144	public String getPrettyString(Node xmlNode) {
145
146	outputEscaping = true;
147	return getString(xmlNode, 0, true);
148	}
149
150	private String getString(Node xmlNode, int depth, boolean pretty)
151	{
152	String xmlRepresentation = "";
153
154	if (xmlNode == null)
155	return "<null>";
156
157	short nodeType = xmlNode.getNodeType();
158	String nodeName = xmlNode.getNodeName();
159
160	// Handle Element nodes
161	if (nodeType == Node.ELEMENT_NODE) {
162	if (pretty) {
163	xmlRepresentation += "\n";
164	for (int i = 0; i < depth; i++) {
165	xmlRepresentation += " ";
166	}
167	}
168
169	// Write opening tag
170	xmlRepresentation += "<" + nodeName;
171
172	// Write the node attributes
173	NamedNodeMap nodeAttributes = xmlNode.getAttributes();
174	for (int i = 0; i < nodeAttributes.getLength(); i++) {
175	Node attribute = nodeAttributes.item(i);
176	xmlRepresentation += " " + attribute.getNodeName() + "=";
177	xmlRepresentation += "\"" + attribute.getNodeValue() + "\"";
178	}
179
180	// If the node has no children, close the opening tag and return
181	if (xmlNode.hasChildNodes() == false) {
182	// This produces somewhat ugly output, but it is necessary to compensate
183	// for display bugs in Netscape. Firstly, the space is needed before the
184	// closing bracket otherwise Netscape will ignore some tags (<br/>, for
185	// example). Also, a newline character would be expected after the tag,
186	// but this causes problems with the display of links (the link text
187	// will contain a newline character, which is displayed badly).
188	xmlRepresentation += " />";
189	return xmlRepresentation;
190	}
191
192	// Close the opening tag
193	xmlRepresentation += ">";
194
195	// Apply recursively to the children of this node
196	// hack for nodes next to text nodes - dont make them pretty
197	// this is needed for text inside a <pre> element - any new lines
198	// or spaces around the span elements show up in the text
199	NodeList children = xmlNode.getChildNodes();
200	boolean do_pretty = pretty;
201	for (int i = 0; i < children.getLength(); i++) {
202	if (children.item(i).getNodeType()==Node.TEXT_NODE) {
203	do_pretty=false; // if there is a text node amongst the children, do teh following nodes in non-pretty mode - hope this doesn't stuff up something else
204	}
205	xmlRepresentation += getString(children.item(i), depth + 1, do_pretty);
206	}
207
208	// Write closing tag
209	if (pretty) {
210	if (xmlRepresentation.endsWith("\n")) {
211	for (int i = 0; i < depth; i++)
212	xmlRepresentation += " ";
213	}
214	}
215	xmlRepresentation += "</" + nodeName + ">";
216	if (pretty) {
217	xmlRepresentation += "\n";
218	}
219	}
220
221	// Handle Text nodes
222	else if (nodeType == Node.TEXT_NODE) {
223	String text = xmlNode.getNodeValue();
224
225	// Perform output escaping, if required
226	if (outputEscaping) {
227	text = text.replaceAll("&", "&"); // Must be done first!!
228	text = text.replaceAll("<", "<");
229	text = text.replaceAll(">", ">");
230	text = text.replaceAll("\"", """);
231	text = text.replaceAll("\'", "'");
232	}
233
234	// Remove any control-C characters
235	text = text.replaceAll("" + (char) 3, "");
236	xmlRepresentation += text;
237	}
238
239	// Handle Processing Instruction nodes
240	else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE) {
241	if (nodeName.equals("javax.xml.transform.disable-output-escaping")) {
242	outputEscaping = false;
243	}
244	else if (nodeName.equals("javax.xml.transform.enable-output-escaping")) {
245	outputEscaping = true;
246	}
247	else {
248	System.err.println("XMLConverter.getString(): Warning: Unhandled processing instruction " + nodeName);
249	}
250	}
251
252	// A type of node that is not handled yet
253	else {
254	System.err.println("XMLConverter.getString(): Warning: Unknown node type: " + nodeType+" "+getNodeTypeString(nodeType));
255	}
256
257	return xmlRepresentation;
258	}
259
260	protected static String getNodeTypeString(short node_type) {
261
262	String type = "";
263	switch(node_type) {
264	case Node.ATTRIBUTE_NODE:
265	type="ATTRIBUTE_NODE";
266	break;
267	case Node.CDATA_SECTION_NODE:
268	type="CDATA_SECTION_NODE";
269	break;
270	case Node.COMMENT_NODE:
271	type="COMMENT_NODE";
272	break;
273	case Node.DOCUMENT_FRAGMENT_NODE:
274	type="DOCUMENT_FRAGMENT_NODE";
275	break;
276	case Node.DOCUMENT_NODE:
277	type="DOCUMENT_NODE";
278	break;
279	case Node.DOCUMENT_TYPE_NODE:
280	type="DOCUMENT_TYPE_NODE";
281	break;
282	case Node.ELEMENT_NODE:
283	type="ELEMENT_NODE";
284	break;
285	case Node.ENTITY_NODE:
286	type="ENTITY_NODE";
287	break;
288	case Node.ENTITY_REFERENCE_NODE:
289	type="ENTITY_REFERENCE_NODE";
290	break;
291	case Node.NOTATION_NODE:
292	type="NOTATION_NODE";
293	break;
294	case Node.PROCESSING_INSTRUCTION_NODE:
295	type="PROCESSING_INSTRUCTION_NODE";
296	break;
297	case Node.TEXT_NODE:
298	type="TEXT_NODE";
299	break;
300	default:
301	type="UNKNOWN";
302	}
303
304	return type;
305	}
306	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: