Context Navigation

source: trunk/gsdl3/src/java/org/greenstone/gsdl3/util/XMLConverter.java@ 4013

Last change on this file since 4013 was 4013, checked in by mdewsnip, 21 years ago
Changed string comparisons to use .equals().
Property svn:keywords set to `Author Date Id Revision`
File size: 7.4 KB

Line
1	/*
2	* XMLConverter.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.util;
20
21	// XML classes
22	import org.w3c.dom.Document;
23	import org.w3c.dom.Node;
24	import org.w3c.dom.NodeList;
25	import org.w3c.dom.NamedNodeMap;
26	import org.xml.sax.InputSource;
27	import org.apache.xerces.parsers.DOMParser;
28	import org.apache.xerces.dom.DocumentImpl; // for new Documents
29
30	// other java classes
31	import java.io.Reader;
32	import java.io.StringReader;
33	import java.io.File;
34	import java.io.FileReader;
35
36	/** XMLConverter - utitlity class for greenstone
37	*
38	* parses XML Strings into Documents, converts Nodes to Strings
39	* different parsers have different behaviour - can experiment in here
40	* now we only use xerces
41	*
42	* @author <a href="mailto:[email protected]">Katherine Don</a>
43	* @version $Revision: 4013 $
44	*
45	*/
46	public class XMLConverter {
47
48	/** xerces parser */
49	protected DOMParser parser_ = null;
50
51	private boolean outputEscaping = true;
52
53
54	/** the no-args constructor */
55	public XMLConverter() {
56	try {
57	parser_ = new DOMParser();
58	} catch (Exception e) {
59	System.err.println("XMLConverter:exception "+e.getMessage());
60	}
61	}
62
63	/** returns a DOM Document */
64	public Document getDOM(String in) {
65
66	try {
67	Reader reader = new StringReader(in);
68	InputSource xml_source = new InputSource(reader);
69
70	parser_.parse(xml_source);
71	Document doc = parser_.getDocument();
72	return doc;
73
74	} catch (Exception e) {
75	System.err.println("XMLConverter.getDOM(String): exception "+e.getMessage());
76	}
77	return null;
78	}
79
80	/** returns a DOM Document */
81	public Document getDOM(File in) {
82	try {
83
84	Reader reader = new FileReader(in);
85	InputSource xml_source = new InputSource(reader);
86
87	parser_.parse(xml_source);
88	Document doc = parser_.getDocument();
89
90	return doc;
91
92	} catch (Exception e) {
93	System.err.println("XMLConverter.getDOM(File): exception "+e.getMessage());
94	}
95	return null;
96	}
97
98	/** creates a new empty DOM Document */
99	public Document newDOM() {
100	Document doc = new DocumentImpl();
101	return doc;
102	}
103
104	/** returns the Node as a String */
105	public String getString(Node xmlNode)
106	{
107	outputEscaping = true;
108	return getString(xmlNode, 0, false);
109	}
110
111	/** returns the node as a nicely formatted String - this introduces extra
112	* text nodes if the String is read back in as a DOM, so should only be
113	* used for printing */
114	public String getPrettyString(Node xmlNode) {
115
116	outputEscaping = true;
117	return getString(xmlNode, 0, true);
118	}
119
120	private String getString(Node xmlNode, int depth, boolean pretty)
121	{
122	String xmlRepresentation = "";
123
124	if (xmlNode == null)
125	return "<null>";
126
127	short nodeType = xmlNode.getNodeType();
128	String nodeName = xmlNode.getNodeName();
129
130	// Handle Element nodes
131	if (nodeType == Node.ELEMENT_NODE) {
132	if (pretty) {
133	xmlRepresentation += "\n";
134	for (int i = 0; i < depth; i++) {
135	xmlRepresentation += " ";
136	}
137	}
138
139	// Write opening tag
140	xmlRepresentation += "<" + nodeName;
141
142	// Write the node attributes
143	NamedNodeMap nodeAttributes = xmlNode.getAttributes();
144	for (int i = 0; i < nodeAttributes.getLength(); i++) {
145	Node attribute = nodeAttributes.item(i);
146	xmlRepresentation += " " + attribute.getNodeName() + "=";
147	xmlRepresentation += "\"" + attribute.getNodeValue() + "\"";
148	}
149
150	// If the node has no children, close the opening tag and return
151	if (xmlNode.hasChildNodes() == false) {
152	// This produces somewhat ugly output, but it is necessary to compensate
153	// for display bugs in Netscape. Firstly, the space is needed before the
154	// closing bracket otherwise Netscape will ignore some tags (<br/>, for
155	// example). Also, a newline character would be expected after the tag,
156	// but this causes problems with the display of links (the link text
157	// will contain a newline character, which is displayed badly).
158	xmlRepresentation += " />";
159	return xmlRepresentation;
160	}
161
162	// Close the opening tag
163	xmlRepresentation += ">";
164
165	// Apply recursively to the children of this node
166	NodeList children = xmlNode.getChildNodes();
167	for (int i = 0; i < children.getLength(); i++) {
168	xmlRepresentation += getString(children.item(i), depth + 1, pretty);
169	}
170
171	// Write closing tag
172	if (pretty) {
173	if (xmlRepresentation.endsWith("\n")) {
174	for (int i = 0; i < depth; i++)
175	xmlRepresentation += " ";
176	}
177	}
178	xmlRepresentation += "</" + nodeName + ">";
179	if (pretty) {
180	xmlRepresentation += "\n";
181	}
182	}
183
184	// Handle Text nodes
185	else if (nodeType == Node.TEXT_NODE) {
186	String text = xmlNode.getNodeValue();
187
188	// Perform output escaping, if required
189	if (outputEscaping) {
190	text = text.replaceAll("&", "&"); // Must be done first!!
191	text = text.replaceAll("<", "<");
192	text = text.replaceAll(">", ">");
193	text = text.replaceAll("\"", """);
194	text = text.replaceAll("\'", "'");
195	}
196
197	// Remove any control-C characters
198	text = text.replaceAll("" + (char) 3, "");
199	xmlRepresentation += text;
200	}
201
202	// Handle Processing Instruction nodes
203	else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE) {
204	if (nodeName.equals("javax.xml.transform.disable-output-escaping")) {
205	outputEscaping = false;
206	}
207	else if (nodeName.equals("javax.xml.transform.enable-output-escaping")) {
208	outputEscaping = true;
209	}
210	else {
211	System.err.println("XMLConverter.getString(): Warning: Unhandled processing instruction " + nodeName);
212	}
213	}
214
215	// A type of node that is not handled yet
216	else {
217	System.err.println("XMLConverter.getString(): Warning: Unknown node type: " + nodeType+" "+getNodeTypeString(nodeType));
218	}
219
220	return xmlRepresentation;
221	}
222
223	protected static String getNodeTypeString(short node_type) {
224
225	String type = "";
226	switch(node_type) {
227	case Node.ATTRIBUTE_NODE:
228	type="ATTRIBUTE_NODE";
229	break;
230	case Node.CDATA_SECTION_NODE:
231	type="CDATA_SECTION_NODE";
232	break;
233	case Node.COMMENT_NODE:
234	type="COMMENT_NODE";
235	break;
236	case Node.DOCUMENT_FRAGMENT_NODE:
237	type="DOCUMENT_FRAGMENT_NODE";
238	break;
239	case Node.DOCUMENT_NODE:
240	type="DOCUMENT_NODE";
241	break;
242	case Node.DOCUMENT_TYPE_NODE:
243	type="DOCUMENT_TYPE_NODE";
244	break;
245	case Node.ELEMENT_NODE:
246	type="ELEMENT_NODE";
247	break;
248	case Node.ENTITY_NODE:
249	type="ENTITY_NODE";
250	break;
251	case Node.ENTITY_REFERENCE_NODE:
252	type="ENTITY_REFERENCE_NODE";
253	break;
254	case Node.NOTATION_NODE:
255	type="NOTATION_NODE";
256	break;
257	case Node.PROCESSING_INSTRUCTION_NODE:
258	type="PROCESSING_INSTRUCTION_NODE";
259	break;
260	case Node.TEXT_NODE:
261	type="TEXT_NODE";
262	break;
263	default:
264	type="UNKNOWN";
265	}
266
267	return type;
268	}
269	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: