source: trunk/gsdl3/src/java/org/greenstone/gsdl3/util/XMLConverter.java@ 5187

Last change on this file since 5187 was 5187, checked in by kjdon, 21 years ago

code tidy, added method to set teh entity resolver for the parser. not actually used at the moment

  • Property svn:keywords set to Author Date Id Revision
File size: 8.7 KB
Line 
1/*
2 * XMLConverter.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.util;
20
21// XML classes
22import org.w3c.dom.Document;
23import org.w3c.dom.Node;
24import org.w3c.dom.NodeList;
25import org.w3c.dom.NamedNodeMap;
26import org.xml.sax.InputSource;
27import org.xml.sax.EntityResolver;
28import org.apache.xerces.parsers.DOMParser;
29import org.apache.xerces.dom.DocumentImpl; // for new Documents
30
31// other java classes
32import java.io.Reader;
33import java.io.InputStreamReader;
34import java.io.StringReader;
35import java.io.File;
36import java.io.FileInputStream;
37import java.io.FileReader;
38
39/** XMLConverter - utility class for greenstone
40 *
41 * parses XML Strings into Documents, converts Nodes to Strings
42 * different parsers have different behaviour - can experiment in here
43 * now we only use xerces
44 *
45 * @author <a href="mailto:[email protected]">Katherine Don</a>
46 * @version $Revision: 5187 $
47 *
48 */
49public class XMLConverter {
50
51 /** xerces parser */
52 protected DOMParser parser = null;
53
54 private boolean outputEscaping = true;
55
56
57 /** the no-args constructor */
58 public XMLConverter() {
59 try {
60 this.parser = new DOMParser();
61 this.parser.setFeature("http://xml.org/sax/features/validation", false);
62
63 } catch (Exception e) {
64 System.err.println("XMLConverter:exception "+e.getMessage());
65 }
66 }
67
68 /** sets the entity resolver. pass in null to unset it */
69 public void setEntityResolver(EntityResolver er) {
70 this.parser.setEntityResolver(er);
71 }
72
73 /** returns a DOM Document */
74 public Document getDOM(String in) {
75
76 try {
77 Reader reader = new StringReader(in);
78 InputSource xml_source = new InputSource(reader);
79
80 this.parser.parse(xml_source);
81 Document doc = this.parser.getDocument();
82 return doc;
83
84 } catch (Exception e) {
85 System.err.println("XMLConverter.getDOM(String): exception "+e.getMessage());
86 }
87 return null;
88 }
89
90 /** returns a DOM Document */
91 public Document getDOM(File in) {
92 try {
93
94 FileReader reader = new FileReader(in);
95 InputSource xml_source = new InputSource(reader);
96
97 this.parser.parse(xml_source);
98 Document doc = this.parser.getDocument();
99
100 return doc;
101
102 } catch (Exception e) {
103 System.err.println("XMLConverter.getDOM(File): exception "+e.getMessage());
104 e.printStackTrace();
105 }
106 return null;
107 }
108
109 /** returns a DOM document */
110 public Document getDOM(File in, String encoding) {
111 try {
112
113 InputStreamReader isr = new InputStreamReader(new FileInputStream(in), encoding);
114 InputSource xml_source = new InputSource(isr);
115
116 this.parser.parse(xml_source);
117 Document doc = this.parser.getDocument();
118
119 return doc;
120
121 } catch (Exception e) {
122 System.err.println("XMLConverter.getDOM(File): exception "+e.getMessage());
123 }
124 return null;
125 }
126
127
128 /** creates a new empty DOM Document */
129 public Document newDOM() {
130 Document doc = new DocumentImpl();
131 return doc;
132 }
133
134 /** returns the Node as a String */
135 public String getString(Node xmlNode)
136 {
137 outputEscaping = true;
138 return getString(xmlNode, 0, false);
139 }
140
141 /** returns the node as a nicely formatted String - this introduces extra
142 * text nodes if the String is read back in as a DOM, so should only be
143 * used for printing */
144 public String getPrettyString(Node xmlNode) {
145
146 outputEscaping = true;
147 return getString(xmlNode, 0, true);
148 }
149
150 private String getString(Node xmlNode, int depth, boolean pretty)
151 {
152 String xmlRepresentation = "";
153
154 if (xmlNode == null)
155 return "<null>";
156
157 short nodeType = xmlNode.getNodeType();
158 String nodeName = xmlNode.getNodeName();
159
160 // Handle Element nodes
161 if (nodeType == Node.ELEMENT_NODE) {
162 if (pretty) {
163 xmlRepresentation += "\n";
164 for (int i = 0; i < depth; i++) {
165 xmlRepresentation += " ";
166 }
167 }
168
169 // Write opening tag
170 xmlRepresentation += "<" + nodeName;
171
172 // Write the node attributes
173 NamedNodeMap nodeAttributes = xmlNode.getAttributes();
174 for (int i = 0; i < nodeAttributes.getLength(); i++) {
175 Node attribute = nodeAttributes.item(i);
176 xmlRepresentation += " " + attribute.getNodeName() + "=";
177 xmlRepresentation += "\"" + attribute.getNodeValue() + "\"";
178 }
179
180 // If the node has no children, close the opening tag and return
181 if (xmlNode.hasChildNodes() == false) {
182 // This produces somewhat ugly output, but it is necessary to compensate
183 // for display bugs in Netscape. Firstly, the space is needed before the
184 // closing bracket otherwise Netscape will ignore some tags (<br/>, for
185 // example). Also, a newline character would be expected after the tag,
186 // but this causes problems with the display of links (the link text
187 // will contain a newline character, which is displayed badly).
188 xmlRepresentation += " />";
189 return xmlRepresentation;
190 }
191
192 // Close the opening tag
193 xmlRepresentation += ">";
194
195 // Apply recursively to the children of this node
196 // hack for nodes next to text nodes - dont make them pretty
197 // this is needed for text inside a <pre> element - any new lines
198 // or spaces around the span elements show up in the text
199 NodeList children = xmlNode.getChildNodes();
200 boolean do_pretty = pretty;
201 for (int i = 0; i < children.getLength(); i++) {
202 if (children.item(i).getNodeType()==Node.TEXT_NODE) {
203 do_pretty=false; // if there is a text node amongst the children, do teh following nodes in non-pretty mode - hope this doesn't stuff up something else
204 }
205 xmlRepresentation += getString(children.item(i), depth + 1, do_pretty);
206 }
207
208 // Write closing tag
209 if (pretty) {
210 if (xmlRepresentation.endsWith("\n")) {
211 for (int i = 0; i < depth; i++)
212 xmlRepresentation += " ";
213 }
214 }
215 xmlRepresentation += "</" + nodeName + ">";
216 if (pretty) {
217 xmlRepresentation += "\n";
218 }
219 }
220
221 // Handle Text nodes
222 else if (nodeType == Node.TEXT_NODE) {
223 String text = xmlNode.getNodeValue();
224
225 // Perform output escaping, if required
226 if (outputEscaping) {
227 text = text.replaceAll("&", "&amp;"); // Must be done first!!
228 text = text.replaceAll("<", "&lt;");
229 text = text.replaceAll(">", "&gt;");
230 text = text.replaceAll("\"", "&quot;");
231 text = text.replaceAll("\'", "&apos;");
232 }
233
234 // Remove any control-C characters
235 text = text.replaceAll("" + (char) 3, "");
236 xmlRepresentation += text;
237 }
238
239 // Handle Processing Instruction nodes
240 else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE) {
241 if (nodeName.equals("javax.xml.transform.disable-output-escaping")) {
242 outputEscaping = false;
243 }
244 else if (nodeName.equals("javax.xml.transform.enable-output-escaping")) {
245 outputEscaping = true;
246 }
247 else {
248 System.err.println("XMLConverter.getString(): Warning: Unhandled processing instruction " + nodeName);
249 }
250 }
251
252 // A type of node that is not handled yet
253 else {
254 System.err.println("XMLConverter.getString(): Warning: Unknown node type: " + nodeType+" "+getNodeTypeString(nodeType));
255 }
256
257 return xmlRepresentation;
258 }
259
260 protected static String getNodeTypeString(short node_type) {
261
262 String type = "";
263 switch(node_type) {
264 case Node.ATTRIBUTE_NODE:
265 type="ATTRIBUTE_NODE";
266 break;
267 case Node.CDATA_SECTION_NODE:
268 type="CDATA_SECTION_NODE";
269 break;
270 case Node.COMMENT_NODE:
271 type="COMMENT_NODE";
272 break;
273 case Node.DOCUMENT_FRAGMENT_NODE:
274 type="DOCUMENT_FRAGMENT_NODE";
275 break;
276 case Node.DOCUMENT_NODE:
277 type="DOCUMENT_NODE";
278 break;
279 case Node.DOCUMENT_TYPE_NODE:
280 type="DOCUMENT_TYPE_NODE";
281 break;
282 case Node.ELEMENT_NODE:
283 type="ELEMENT_NODE";
284 break;
285 case Node.ENTITY_NODE:
286 type="ENTITY_NODE";
287 break;
288 case Node.ENTITY_REFERENCE_NODE:
289 type="ENTITY_REFERENCE_NODE";
290 break;
291 case Node.NOTATION_NODE:
292 type="NOTATION_NODE";
293 break;
294 case Node.PROCESSING_INSTRUCTION_NODE:
295 type="PROCESSING_INSTRUCTION_NODE";
296 break;
297 case Node.TEXT_NODE:
298 type="TEXT_NODE";
299 break;
300 default:
301 type="UNKNOWN";
302 }
303
304 return type;
305 }
306}
Note: See TracBrowser for help on using the repository browser.