Context Navigation

source: trunk/gsdl3/src/java/org/greenstone/gsdl3/util/XMLConverter.java@ 4285

Last change on this file since 4285 was 4285, checked in by kjdon, 21 years ago
changed slightly the pretty printing stuff - any nodes that are following siblings to a text node are not printed pretty - hack for printing span elements - if inside a pre element, the indentation appears in the page
Property svn:keywords set to `Author Date Id Revision`
File size: 8.4 KB

Line
1	/*
2	* XMLConverter.java
3	* Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4	*
5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program; if not, write to the Free Software
17	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18	*/
19	package org.greenstone.gsdl3.util;
20
21	// XML classes
22	import org.w3c.dom.Document;
23	import org.w3c.dom.Node;
24	import org.w3c.dom.NodeList;
25	import org.w3c.dom.NamedNodeMap;
26	import org.xml.sax.InputSource;
27	import org.apache.xerces.parsers.DOMParser;
28	import org.apache.xerces.dom.DocumentImpl; // for new Documents
29
30	// other java classes
31	import java.io.Reader;
32	import java.io.InputStreamReader;
33	import java.io.StringReader;
34	import java.io.File;
35	import java.io.FileInputStream;
36	import java.io.FileReader;
37
38	/** XMLConverter - utitlity class for greenstone
39	*
40	* parses XML Strings into Documents, converts Nodes to Strings
41	* different parsers have different behaviour - can experiment in here
42	* now we only use xerces
43	*
44	* @author <a href="mailto:[email protected]">Katherine Don</a>
45	* @version $Revision: 4285 $
46	*
47	*/
48	public class XMLConverter {
49
50	/** xerces parser */
51	protected DOMParser parser_ = null;
52
53	private boolean outputEscaping = true;
54
55
56	/** the no-args constructor */
57	public XMLConverter() {
58	try {
59	parser_ = new DOMParser();
60	} catch (Exception e) {
61	System.err.println("XMLConverter:exception "+e.getMessage());
62	}
63	}
64
65	/** returns a DOM Document */
66	public Document getDOM(String in) {
67
68	try {
69	Reader reader = new StringReader(in);
70	InputSource xml_source = new InputSource(reader);
71
72	parser_.parse(xml_source);
73	Document doc = parser_.getDocument();
74	return doc;
75
76	} catch (Exception e) {
77	System.err.println("XMLConverter.getDOM(String): exception "+e.getMessage());
78	}
79	return null;
80	}
81
82	/** returns a DOM Document */
83	public Document getDOM(File in) {
84	try {
85
86	FileReader reader = new FileReader(in);
87	InputSource xml_source = new InputSource(reader);
88
89	parser_.parse(xml_source);
90	Document doc = parser_.getDocument();
91
92	return doc;
93
94	} catch (Exception e) {
95	System.err.println("XMLConverter.getDOM(File): exception "+e.getMessage());
96	}
97	return null;
98	}
99
100	/** returns a DOM document */
101	public Document getDOM(File in, String encoding) {
102	try {
103
104	InputStreamReader isr = new InputStreamReader(new FileInputStream(in), encoding);
105	InputSource xml_source = new InputSource(isr);
106
107	parser_.parse(xml_source);
108	Document doc = parser_.getDocument();
109
110	return doc;
111
112	} catch (Exception e) {
113	System.err.println("XMLConverter.getDOM(File): exception "+e.getMessage());
114	}
115	return null;
116	}
117
118
119	/** creates a new empty DOM Document */
120	public Document newDOM() {
121	Document doc = new DocumentImpl();
122	return doc;
123	}
124
125	/** returns the Node as a String */
126	public String getString(Node xmlNode)
127	{
128	outputEscaping = true;
129	return getString(xmlNode, 0, false);
130	}
131
132	/** returns the node as a nicely formatted String - this introduces extra
133	* text nodes if the String is read back in as a DOM, so should only be
134	* used for printing */
135	public String getPrettyString(Node xmlNode) {
136
137	outputEscaping = true;
138	return getString(xmlNode, 0, true);
139	}
140
141	private String getString(Node xmlNode, int depth, boolean pretty)
142	{
143	String xmlRepresentation = "";
144
145	if (xmlNode == null)
146	return "<null>";
147
148	short nodeType = xmlNode.getNodeType();
149	String nodeName = xmlNode.getNodeName();
150
151	// Handle Element nodes
152	if (nodeType == Node.ELEMENT_NODE) {
153	if (pretty) {
154	xmlRepresentation += "\n";
155	for (int i = 0; i < depth; i++) {
156	xmlRepresentation += " ";
157	}
158	}
159
160	// Write opening tag
161	xmlRepresentation += "<" + nodeName;
162
163	// Write the node attributes
164	NamedNodeMap nodeAttributes = xmlNode.getAttributes();
165	for (int i = 0; i < nodeAttributes.getLength(); i++) {
166	Node attribute = nodeAttributes.item(i);
167	xmlRepresentation += " " + attribute.getNodeName() + "=";
168	xmlRepresentation += "\"" + attribute.getNodeValue() + "\"";
169	}
170
171	// If the node has no children, close the opening tag and return
172	if (xmlNode.hasChildNodes() == false) {
173	// This produces somewhat ugly output, but it is necessary to compensate
174	// for display bugs in Netscape. Firstly, the space is needed before the
175	// closing bracket otherwise Netscape will ignore some tags (<br/>, for
176	// example). Also, a newline character would be expected after the tag,
177	// but this causes problems with the display of links (the link text
178	// will contain a newline character, which is displayed badly).
179	xmlRepresentation += " />";
180	return xmlRepresentation;
181	}
182
183	// Close the opening tag
184	xmlRepresentation += ">";
185
186	// Apply recursively to the children of this node
187	// hack for nodes next to text nodes - dont make them pretty
188	// this is needed for text inside a <pre> element - any new lines
189	// or spaces around the span elements show up in the text
190	NodeList children = xmlNode.getChildNodes();
191	boolean do_pretty = pretty;
192	for (int i = 0; i < children.getLength(); i++) {
193	if (children.item(i).getNodeType()==Node.TEXT_NODE) {
194	do_pretty=false; // if there is a text node amongst the children, do teh following nodes in non-pretty mode - hope this doesn't stuff up something else
195	}
196	xmlRepresentation += getString(children.item(i), depth + 1, do_pretty);
197	}
198
199	// Write closing tag
200	if (pretty) {
201	if (xmlRepresentation.endsWith("\n")) {
202	for (int i = 0; i < depth; i++)
203	xmlRepresentation += " ";
204	}
205	}
206	xmlRepresentation += "</" + nodeName + ">";
207	if (pretty) {
208	xmlRepresentation += "\n";
209	}
210	}
211
212	// Handle Text nodes
213	else if (nodeType == Node.TEXT_NODE) {
214	String text = xmlNode.getNodeValue();
215
216	// Perform output escaping, if required
217	if (outputEscaping) {
218	text = text.replaceAll("&", "&"); // Must be done first!!
219	text = text.replaceAll("<", "<");
220	text = text.replaceAll(">", ">");
221	text = text.replaceAll("\"", """);
222	text = text.replaceAll("\'", "'");
223	}
224
225	// Remove any control-C characters
226	text = text.replaceAll("" + (char) 3, "");
227	xmlRepresentation += text;
228	}
229
230	// Handle Processing Instruction nodes
231	else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE) {
232	if (nodeName.equals("javax.xml.transform.disable-output-escaping")) {
233	outputEscaping = false;
234	}
235	else if (nodeName.equals("javax.xml.transform.enable-output-escaping")) {
236	outputEscaping = true;
237	}
238	else {
239	System.err.println("XMLConverter.getString(): Warning: Unhandled processing instruction " + nodeName);
240	}
241	}
242
243	// A type of node that is not handled yet
244	else {
245	System.err.println("XMLConverter.getString(): Warning: Unknown node type: " + nodeType+" "+getNodeTypeString(nodeType));
246	}
247
248	return xmlRepresentation;
249	}
250
251	protected static String getNodeTypeString(short node_type) {
252
253	String type = "";
254	switch(node_type) {
255	case Node.ATTRIBUTE_NODE:
256	type="ATTRIBUTE_NODE";
257	break;
258	case Node.CDATA_SECTION_NODE:
259	type="CDATA_SECTION_NODE";
260	break;
261	case Node.COMMENT_NODE:
262	type="COMMENT_NODE";
263	break;
264	case Node.DOCUMENT_FRAGMENT_NODE:
265	type="DOCUMENT_FRAGMENT_NODE";
266	break;
267	case Node.DOCUMENT_NODE:
268	type="DOCUMENT_NODE";
269	break;
270	case Node.DOCUMENT_TYPE_NODE:
271	type="DOCUMENT_TYPE_NODE";
272	break;
273	case Node.ELEMENT_NODE:
274	type="ELEMENT_NODE";
275	break;
276	case Node.ENTITY_NODE:
277	type="ENTITY_NODE";
278	break;
279	case Node.ENTITY_REFERENCE_NODE:
280	type="ENTITY_REFERENCE_NODE";
281	break;
282	case Node.NOTATION_NODE:
283	type="NOTATION_NODE";
284	break;
285	case Node.PROCESSING_INSTRUCTION_NODE:
286	type="PROCESSING_INSTRUCTION_NODE";
287	break;
288	case Node.TEXT_NODE:
289	type="TEXT_NODE";
290	break;
291	default:
292	type="UNKNOWN";
293	}
294
295	return type;
296	}
297	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: