source: trunk/gsdl3/src/java/org/greenstone/gsdl3/util/XMLConverter.java@ 4285

Last change on this file since 4285 was 4285, checked in by kjdon, 21 years ago

changed slightly the pretty printing stuff - any nodes that are following siblings to a text node are not printed pretty - hack for printing span elements - if inside a pre element, the indentation appears in the page

  • Property svn:keywords set to Author Date Id Revision
File size: 8.4 KB
Line 
1/*
2 * XMLConverter.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.util;
20
21// XML classes
22import org.w3c.dom.Document;
23import org.w3c.dom.Node;
24import org.w3c.dom.NodeList;
25import org.w3c.dom.NamedNodeMap;
26import org.xml.sax.InputSource;
27import org.apache.xerces.parsers.DOMParser;
28import org.apache.xerces.dom.DocumentImpl; // for new Documents
29
30// other java classes
31import java.io.Reader;
32import java.io.InputStreamReader;
33import java.io.StringReader;
34import java.io.File;
35import java.io.FileInputStream;
36import java.io.FileReader;
37
38/** XMLConverter - utitlity class for greenstone
39 *
40 * parses XML Strings into Documents, converts Nodes to Strings
41 * different parsers have different behaviour - can experiment in here
42 * now we only use xerces
43 *
44 * @author <a href="mailto:[email protected]">Katherine Don</a>
45 * @version $Revision: 4285 $
46 *
47 */
48public class XMLConverter {
49
50 /** xerces parser */
51 protected DOMParser parser_ = null;
52
53 private boolean outputEscaping = true;
54
55
56 /** the no-args constructor */
57 public XMLConverter() {
58 try {
59 parser_ = new DOMParser();
60 } catch (Exception e) {
61 System.err.println("XMLConverter:exception "+e.getMessage());
62 }
63 }
64
65 /** returns a DOM Document */
66 public Document getDOM(String in) {
67
68 try {
69 Reader reader = new StringReader(in);
70 InputSource xml_source = new InputSource(reader);
71
72 parser_.parse(xml_source);
73 Document doc = parser_.getDocument();
74 return doc;
75
76 } catch (Exception e) {
77 System.err.println("XMLConverter.getDOM(String): exception "+e.getMessage());
78 }
79 return null;
80 }
81
82 /** returns a DOM Document */
83 public Document getDOM(File in) {
84 try {
85
86 FileReader reader = new FileReader(in);
87 InputSource xml_source = new InputSource(reader);
88
89 parser_.parse(xml_source);
90 Document doc = parser_.getDocument();
91
92 return doc;
93
94 } catch (Exception e) {
95 System.err.println("XMLConverter.getDOM(File): exception "+e.getMessage());
96 }
97 return null;
98 }
99
100 /** returns a DOM document */
101 public Document getDOM(File in, String encoding) {
102 try {
103
104 InputStreamReader isr = new InputStreamReader(new FileInputStream(in), encoding);
105 InputSource xml_source = new InputSource(isr);
106
107 parser_.parse(xml_source);
108 Document doc = parser_.getDocument();
109
110 return doc;
111
112 } catch (Exception e) {
113 System.err.println("XMLConverter.getDOM(File): exception "+e.getMessage());
114 }
115 return null;
116 }
117
118
119 /** creates a new empty DOM Document */
120 public Document newDOM() {
121 Document doc = new DocumentImpl();
122 return doc;
123 }
124
125 /** returns the Node as a String */
126 public String getString(Node xmlNode)
127 {
128 outputEscaping = true;
129 return getString(xmlNode, 0, false);
130 }
131
132 /** returns the node as a nicely formatted String - this introduces extra
133 * text nodes if the String is read back in as a DOM, so should only be
134 * used for printing */
135 public String getPrettyString(Node xmlNode) {
136
137 outputEscaping = true;
138 return getString(xmlNode, 0, true);
139 }
140
141 private String getString(Node xmlNode, int depth, boolean pretty)
142 {
143 String xmlRepresentation = "";
144
145 if (xmlNode == null)
146 return "<null>";
147
148 short nodeType = xmlNode.getNodeType();
149 String nodeName = xmlNode.getNodeName();
150
151 // Handle Element nodes
152 if (nodeType == Node.ELEMENT_NODE) {
153 if (pretty) {
154 xmlRepresentation += "\n";
155 for (int i = 0; i < depth; i++) {
156 xmlRepresentation += " ";
157 }
158 }
159
160 // Write opening tag
161 xmlRepresentation += "<" + nodeName;
162
163 // Write the node attributes
164 NamedNodeMap nodeAttributes = xmlNode.getAttributes();
165 for (int i = 0; i < nodeAttributes.getLength(); i++) {
166 Node attribute = nodeAttributes.item(i);
167 xmlRepresentation += " " + attribute.getNodeName() + "=";
168 xmlRepresentation += "\"" + attribute.getNodeValue() + "\"";
169 }
170
171 // If the node has no children, close the opening tag and return
172 if (xmlNode.hasChildNodes() == false) {
173 // This produces somewhat ugly output, but it is necessary to compensate
174 // for display bugs in Netscape. Firstly, the space is needed before the
175 // closing bracket otherwise Netscape will ignore some tags (<br/>, for
176 // example). Also, a newline character would be expected after the tag,
177 // but this causes problems with the display of links (the link text
178 // will contain a newline character, which is displayed badly).
179 xmlRepresentation += " />";
180 return xmlRepresentation;
181 }
182
183 // Close the opening tag
184 xmlRepresentation += ">";
185
186 // Apply recursively to the children of this node
187 // hack for nodes next to text nodes - dont make them pretty
188 // this is needed for text inside a <pre> element - any new lines
189 // or spaces around the span elements show up in the text
190 NodeList children = xmlNode.getChildNodes();
191 boolean do_pretty = pretty;
192 for (int i = 0; i < children.getLength(); i++) {
193 if (children.item(i).getNodeType()==Node.TEXT_NODE) {
194 do_pretty=false; // if there is a text node amongst the children, do teh following nodes in non-pretty mode - hope this doesn't stuff up something else
195 }
196 xmlRepresentation += getString(children.item(i), depth + 1, do_pretty);
197 }
198
199 // Write closing tag
200 if (pretty) {
201 if (xmlRepresentation.endsWith("\n")) {
202 for (int i = 0; i < depth; i++)
203 xmlRepresentation += " ";
204 }
205 }
206 xmlRepresentation += "</" + nodeName + ">";
207 if (pretty) {
208 xmlRepresentation += "\n";
209 }
210 }
211
212 // Handle Text nodes
213 else if (nodeType == Node.TEXT_NODE) {
214 String text = xmlNode.getNodeValue();
215
216 // Perform output escaping, if required
217 if (outputEscaping) {
218 text = text.replaceAll("&", "&amp;"); // Must be done first!!
219 text = text.replaceAll("<", "&lt;");
220 text = text.replaceAll(">", "&gt;");
221 text = text.replaceAll("\"", "&quot;");
222 text = text.replaceAll("\'", "&apos;");
223 }
224
225 // Remove any control-C characters
226 text = text.replaceAll("" + (char) 3, "");
227 xmlRepresentation += text;
228 }
229
230 // Handle Processing Instruction nodes
231 else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE) {
232 if (nodeName.equals("javax.xml.transform.disable-output-escaping")) {
233 outputEscaping = false;
234 }
235 else if (nodeName.equals("javax.xml.transform.enable-output-escaping")) {
236 outputEscaping = true;
237 }
238 else {
239 System.err.println("XMLConverter.getString(): Warning: Unhandled processing instruction " + nodeName);
240 }
241 }
242
243 // A type of node that is not handled yet
244 else {
245 System.err.println("XMLConverter.getString(): Warning: Unknown node type: " + nodeType+" "+getNodeTypeString(nodeType));
246 }
247
248 return xmlRepresentation;
249 }
250
251 protected static String getNodeTypeString(short node_type) {
252
253 String type = "";
254 switch(node_type) {
255 case Node.ATTRIBUTE_NODE:
256 type="ATTRIBUTE_NODE";
257 break;
258 case Node.CDATA_SECTION_NODE:
259 type="CDATA_SECTION_NODE";
260 break;
261 case Node.COMMENT_NODE:
262 type="COMMENT_NODE";
263 break;
264 case Node.DOCUMENT_FRAGMENT_NODE:
265 type="DOCUMENT_FRAGMENT_NODE";
266 break;
267 case Node.DOCUMENT_NODE:
268 type="DOCUMENT_NODE";
269 break;
270 case Node.DOCUMENT_TYPE_NODE:
271 type="DOCUMENT_TYPE_NODE";
272 break;
273 case Node.ELEMENT_NODE:
274 type="ELEMENT_NODE";
275 break;
276 case Node.ENTITY_NODE:
277 type="ENTITY_NODE";
278 break;
279 case Node.ENTITY_REFERENCE_NODE:
280 type="ENTITY_REFERENCE_NODE";
281 break;
282 case Node.NOTATION_NODE:
283 type="NOTATION_NODE";
284 break;
285 case Node.PROCESSING_INSTRUCTION_NODE:
286 type="PROCESSING_INSTRUCTION_NODE";
287 break;
288 case Node.TEXT_NODE:
289 type="TEXT_NODE";
290 break;
291 default:
292 type="UNKNOWN";
293 }
294
295 return type;
296 }
297}
Note: See TracBrowser for help on using the repository browser.