source: tags/ant-install-branch-merged-1/gsdl3/src/java/org/greenstone/gsdl3/util/XMLConverter.java@ 9873

Last change on this file since 9873 was 9873, checked in by (none), 19 years ago

This commit was manufactured by cvs2svn to create tag
'ant-install-branch-merged-1'.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.0 KB
Line 
1/*
2 * XMLConverter.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.util;
20
21// XML classes
22import org.w3c.dom.Document;
23import org.w3c.dom.Node;
24import org.w3c.dom.NodeList;
25import org.w3c.dom.NamedNodeMap;
26import org.xml.sax.InputSource;
27import org.xml.sax.EntityResolver;
28import org.apache.xerces.parsers.DOMParser;
29import org.apache.xerces.dom.DocumentImpl; // for new Documents
30
31// other java classes
32import java.io.Reader;
33import java.io.InputStreamReader;
34import java.io.StringReader;
35import java.io.File;
36import java.io.FileInputStream;
37import java.io.FileReader;
38
39/** XMLConverter - utility class for greenstone
40 *
41 * parses XML Strings into Documents, converts Nodes to Strings
42 * different parsers have different behaviour - can experiment in here
43 * now we only use xerces
44 *
45 * @author <a href="mailto:[email protected]">Katherine Don</a>
46 * @version $Revision: 9873 $
47 *
48 */
49public class XMLConverter {
50
51 /** xerces parser */
52 protected DOMParser parser = null;
53
54 private boolean outputEscaping = true;
55
56
57 /** the no-args constructor */
58 public XMLConverter() {
59 try {
60 this.parser = new DOMParser();
61 this.parser.setFeature("http://xml.org/sax/features/validation", false);
62
63 } catch (Exception e) {
64 System.err.println("XMLConverter:exception "+e.getMessage());
65 }
66 }
67
68 /** sets the entity resolver. pass in null to unset it */
69 public void setEntityResolver(EntityResolver er) {
70 this.parser.setEntityResolver(er);
71 }
72
73 /** returns a DOM Document */
74 public Document getDOM(String in) {
75
76 try {
77 Reader reader = new StringReader(in);
78 InputSource xml_source = new InputSource(reader);
79
80 this.parser.parse(xml_source);
81 Document doc = this.parser.getDocument();
82 return doc;
83
84 } catch (Exception e) {
85 System.err.println("XMLConverter.getDOM(String): exception "+e.getMessage());
86 }
87 return null;
88 }
89
90 /** returns a DOM Document */
91 public Document getDOM(File in) {
92 try {
93
94 FileReader reader = new FileReader(in);
95 InputSource xml_source = new InputSource(reader);
96
97 this.parser.parse(xml_source);
98 Document doc = this.parser.getDocument();
99
100 return doc;
101
102 } catch (Exception e) {
103 System.err.println("XMLConverter.getDOM(File): exception "+e.getMessage());
104 e.printStackTrace();
105 }
106 return null;
107 }
108
109 /** returns a DOM document */
110 public Document getDOM(File in, String encoding) {
111 try {
112
113 InputStreamReader isr = new InputStreamReader(new FileInputStream(in), encoding);
114 InputSource xml_source = new InputSource(isr);
115
116 this.parser.parse(xml_source);
117 Document doc = this.parser.getDocument();
118
119 return doc;
120
121 } catch (Exception e) {
122 System.err.println("XMLConverter.getDOM(File): exception "+e.getMessage());
123 }
124 return null;
125 }
126
127
128 /** creates a new empty DOM Document */
129 public Document newDOM() {
130 Document doc = new DocumentImpl();
131 return doc;
132 }
133
134 /** returns the Node as a String */
135 public String getString(Node xmlNode)
136 {
137 outputEscaping = true;
138 return getString(xmlNode, 0, false);
139 }
140
141 /** returns the node as a nicely formatted String - this introduces extra
142 * text nodes if the String is read back in as a DOM, so should only be
143 * used for printing */
144 public String getPrettyString(Node xmlNode) {
145
146 outputEscaping = true;
147 return getString(xmlNode, 0, true);
148 }
149
150 private String getString(Node xmlNode, int depth, boolean pretty)
151 {
152 String xmlRepresentation = "";
153
154 if (xmlNode == null)
155 return "<null>";
156
157 short nodeType = xmlNode.getNodeType();
158 String nodeName = xmlNode.getNodeName();
159
160 if (nodeType == Node.DOCUMENT_NODE) {
161 return getString(((Document)xmlNode).getDocumentElement(), depth, pretty);
162 }
163 // Handle Element nodes
164 if (nodeType == Node.ELEMENT_NODE) {
165 if (pretty) {
166 xmlRepresentation += "\n";
167 for (int i = 0; i < depth; i++) {
168 xmlRepresentation += " ";
169 }
170 }
171
172 // Write opening tag
173 xmlRepresentation += "<" + nodeName;
174
175 // Write the node attributes
176 NamedNodeMap nodeAttributes = xmlNode.getAttributes();
177 for (int i = 0; i < nodeAttributes.getLength(); i++) {
178 Node attribute = nodeAttributes.item(i);
179 xmlRepresentation += " " + attribute.getNodeName() + "=";
180 xmlRepresentation += "\"" + attribute.getNodeValue() + "\"";
181 }
182
183 // If the node has no children, close the opening tag and return
184 if (xmlNode.hasChildNodes() == false) {
185 // This produces somewhat ugly output, but it is necessary to compensate
186 // for display bugs in Netscape. Firstly, the space is needed before the
187 // closing bracket otherwise Netscape will ignore some tags (<br/>, for
188 // example). Also, a newline character would be expected after the tag,
189 // but this causes problems with the display of links (the link text
190 // will contain a newline character, which is displayed badly).
191 xmlRepresentation += " />";
192 return xmlRepresentation;
193 }
194
195 // Close the opening tag
196 xmlRepresentation += ">";
197
198 // Apply recursively to the children of this node
199 // hack for nodes next to text nodes - dont make them pretty
200 // this is needed for text inside a <pre> element - any new lines
201 // or spaces around the span elements show up in the text
202 NodeList children = xmlNode.getChildNodes();
203 boolean do_pretty = pretty;
204 for (int i = 0; i < children.getLength(); i++) {
205 if (children.item(i).getNodeType()==Node.TEXT_NODE) {
206 do_pretty=false; // if there is a text node amongst the children, do teh following nodes in non-pretty mode - hope this doesn't stuff up something else
207 }
208 xmlRepresentation += getString(children.item(i), depth + 1, do_pretty);
209 }
210
211 // Write closing tag
212 if (pretty) {
213 if (xmlRepresentation.endsWith("\n")) {
214 for (int i = 0; i < depth; i++)
215 xmlRepresentation += " ";
216 }
217 }
218 xmlRepresentation += "</" + nodeName + ">";
219 if (pretty) {
220 xmlRepresentation += "\n";
221 }
222 }
223
224 // Handle Text nodes
225 else if (nodeType == Node.TEXT_NODE) {
226 String text = xmlNode.getNodeValue();
227
228 // Perform output escaping, if required
229 if (outputEscaping) {
230 text = text.replaceAll("&", "&amp;"); // Must be done first!!
231 text = text.replaceAll("<", "&lt;");
232 text = text.replaceAll(">", "&gt;");
233 text = text.replaceAll("\"", "&quot;");
234 text = text.replaceAll("\'", "&apos;");
235 }
236
237 // Remove any control-C characters
238 text = text.replaceAll("" + (char) 3, "");
239 xmlRepresentation += text;
240 }
241
242 // Handle Processing Instruction nodes
243 else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE) {
244 if (nodeName.equals("javax.xml.transform.disable-output-escaping")) {
245 outputEscaping = false;
246 }
247 else if (nodeName.equals("javax.xml.transform.enable-output-escaping")) {
248 outputEscaping = true;
249 }
250 else {
251 System.err.println("XMLConverter.getString(): Warning: Unhandled processing instruction " + nodeName);
252 }
253 }
254
255 else if (nodeType == Node.COMMENT_NODE) {
256 String text = xmlNode.getNodeValue();
257 xmlRepresentation += "<!-- "+text+" -->";
258 }
259
260
261
262 // A type of node that is not handled yet
263 else {
264 System.err.println("XMLConverter.getString(): Warning: Unknown node type: " + nodeType+" "+getNodeTypeString(nodeType));
265 }
266
267 return xmlRepresentation;
268 }
269
270 protected static String getNodeTypeString(short node_type) {
271
272 String type = "";
273 switch(node_type) {
274 case Node.ATTRIBUTE_NODE:
275 type="ATTRIBUTE_NODE";
276 break;
277 case Node.CDATA_SECTION_NODE:
278 type="CDATA_SECTION_NODE";
279 break;
280 case Node.COMMENT_NODE:
281 type="COMMENT_NODE";
282 break;
283 case Node.DOCUMENT_FRAGMENT_NODE:
284 type="DOCUMENT_FRAGMENT_NODE";
285 break;
286 case Node.DOCUMENT_NODE:
287 type="DOCUMENT_NODE";
288 break;
289 case Node.DOCUMENT_TYPE_NODE:
290 type="DOCUMENT_TYPE_NODE";
291 break;
292 case Node.ELEMENT_NODE:
293 type="ELEMENT_NODE";
294 break;
295 case Node.ENTITY_NODE:
296 type="ENTITY_NODE";
297 break;
298 case Node.ENTITY_REFERENCE_NODE:
299 type="ENTITY_REFERENCE_NODE";
300 break;
301 case Node.NOTATION_NODE:
302 type="NOTATION_NODE";
303 break;
304 case Node.PROCESSING_INSTRUCTION_NODE:
305 type="PROCESSING_INSTRUCTION_NODE";
306 break;
307 case Node.TEXT_NODE:
308 type="TEXT_NODE";
309 break;
310 default:
311 type="UNKNOWN";
312 }
313
314 return type;
315 }
316}
Note: See TracBrowser for help on using the repository browser.