source: trunk/gsdl3/src/java/org/greenstone/gsdl3/util/XMLConverter.java@ 4013

Last change on this file since 4013 was 4013, checked in by mdewsnip, 21 years ago

Changed string comparisons to use .equals().

  • Property svn:keywords set to Author Date Id Revision
File size: 7.4 KB
Line 
1/*
2 * XMLConverter.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.util;
20
21// XML classes
22import org.w3c.dom.Document;
23import org.w3c.dom.Node;
24import org.w3c.dom.NodeList;
25import org.w3c.dom.NamedNodeMap;
26import org.xml.sax.InputSource;
27import org.apache.xerces.parsers.DOMParser;
28import org.apache.xerces.dom.DocumentImpl; // for new Documents
29
30// other java classes
31import java.io.Reader;
32import java.io.StringReader;
33import java.io.File;
34import java.io.FileReader;
35
36/** XMLConverter - utitlity class for greenstone
37 *
38 * parses XML Strings into Documents, converts Nodes to Strings
39 * different parsers have different behaviour - can experiment in here
40 * now we only use xerces
41 *
42 * @author <a href="mailto:[email protected]">Katherine Don</a>
43 * @version $Revision: 4013 $
44 *
45 */
46public class XMLConverter {
47
48 /** xerces parser */
49 protected DOMParser parser_ = null;
50
51 private boolean outputEscaping = true;
52
53
54 /** the no-args constructor */
55 public XMLConverter() {
56 try {
57 parser_ = new DOMParser();
58 } catch (Exception e) {
59 System.err.println("XMLConverter:exception "+e.getMessage());
60 }
61 }
62
63 /** returns a DOM Document */
64 public Document getDOM(String in) {
65
66 try {
67 Reader reader = new StringReader(in);
68 InputSource xml_source = new InputSource(reader);
69
70 parser_.parse(xml_source);
71 Document doc = parser_.getDocument();
72 return doc;
73
74 } catch (Exception e) {
75 System.err.println("XMLConverter.getDOM(String): exception "+e.getMessage());
76 }
77 return null;
78 }
79
80 /** returns a DOM Document */
81 public Document getDOM(File in) {
82 try {
83
84 Reader reader = new FileReader(in);
85 InputSource xml_source = new InputSource(reader);
86
87 parser_.parse(xml_source);
88 Document doc = parser_.getDocument();
89
90 return doc;
91
92 } catch (Exception e) {
93 System.err.println("XMLConverter.getDOM(File): exception "+e.getMessage());
94 }
95 return null;
96 }
97
98 /** creates a new empty DOM Document */
99 public Document newDOM() {
100 Document doc = new DocumentImpl();
101 return doc;
102 }
103
104 /** returns the Node as a String */
105 public String getString(Node xmlNode)
106 {
107 outputEscaping = true;
108 return getString(xmlNode, 0, false);
109 }
110
111 /** returns the node as a nicely formatted String - this introduces extra
112 * text nodes if the String is read back in as a DOM, so should only be
113 * used for printing */
114 public String getPrettyString(Node xmlNode) {
115
116 outputEscaping = true;
117 return getString(xmlNode, 0, true);
118 }
119
120 private String getString(Node xmlNode, int depth, boolean pretty)
121 {
122 String xmlRepresentation = "";
123
124 if (xmlNode == null)
125 return "<null>";
126
127 short nodeType = xmlNode.getNodeType();
128 String nodeName = xmlNode.getNodeName();
129
130 // Handle Element nodes
131 if (nodeType == Node.ELEMENT_NODE) {
132 if (pretty) {
133 xmlRepresentation += "\n";
134 for (int i = 0; i < depth; i++) {
135 xmlRepresentation += " ";
136 }
137 }
138
139 // Write opening tag
140 xmlRepresentation += "<" + nodeName;
141
142 // Write the node attributes
143 NamedNodeMap nodeAttributes = xmlNode.getAttributes();
144 for (int i = 0; i < nodeAttributes.getLength(); i++) {
145 Node attribute = nodeAttributes.item(i);
146 xmlRepresentation += " " + attribute.getNodeName() + "=";
147 xmlRepresentation += "\"" + attribute.getNodeValue() + "\"";
148 }
149
150 // If the node has no children, close the opening tag and return
151 if (xmlNode.hasChildNodes() == false) {
152 // This produces somewhat ugly output, but it is necessary to compensate
153 // for display bugs in Netscape. Firstly, the space is needed before the
154 // closing bracket otherwise Netscape will ignore some tags (<br/>, for
155 // example). Also, a newline character would be expected after the tag,
156 // but this causes problems with the display of links (the link text
157 // will contain a newline character, which is displayed badly).
158 xmlRepresentation += " />";
159 return xmlRepresentation;
160 }
161
162 // Close the opening tag
163 xmlRepresentation += ">";
164
165 // Apply recursively to the children of this node
166 NodeList children = xmlNode.getChildNodes();
167 for (int i = 0; i < children.getLength(); i++) {
168 xmlRepresentation += getString(children.item(i), depth + 1, pretty);
169 }
170
171 // Write closing tag
172 if (pretty) {
173 if (xmlRepresentation.endsWith("\n")) {
174 for (int i = 0; i < depth; i++)
175 xmlRepresentation += " ";
176 }
177 }
178 xmlRepresentation += "</" + nodeName + ">";
179 if (pretty) {
180 xmlRepresentation += "\n";
181 }
182 }
183
184 // Handle Text nodes
185 else if (nodeType == Node.TEXT_NODE) {
186 String text = xmlNode.getNodeValue();
187
188 // Perform output escaping, if required
189 if (outputEscaping) {
190 text = text.replaceAll("&", "&amp;"); // Must be done first!!
191 text = text.replaceAll("<", "&lt;");
192 text = text.replaceAll(">", "&gt;");
193 text = text.replaceAll("\"", "&quot;");
194 text = text.replaceAll("\'", "&apos;");
195 }
196
197 // Remove any control-C characters
198 text = text.replaceAll("" + (char) 3, "");
199 xmlRepresentation += text;
200 }
201
202 // Handle Processing Instruction nodes
203 else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE) {
204 if (nodeName.equals("javax.xml.transform.disable-output-escaping")) {
205 outputEscaping = false;
206 }
207 else if (nodeName.equals("javax.xml.transform.enable-output-escaping")) {
208 outputEscaping = true;
209 }
210 else {
211 System.err.println("XMLConverter.getString(): Warning: Unhandled processing instruction " + nodeName);
212 }
213 }
214
215 // A type of node that is not handled yet
216 else {
217 System.err.println("XMLConverter.getString(): Warning: Unknown node type: " + nodeType+" "+getNodeTypeString(nodeType));
218 }
219
220 return xmlRepresentation;
221 }
222
223 protected static String getNodeTypeString(short node_type) {
224
225 String type = "";
226 switch(node_type) {
227 case Node.ATTRIBUTE_NODE:
228 type="ATTRIBUTE_NODE";
229 break;
230 case Node.CDATA_SECTION_NODE:
231 type="CDATA_SECTION_NODE";
232 break;
233 case Node.COMMENT_NODE:
234 type="COMMENT_NODE";
235 break;
236 case Node.DOCUMENT_FRAGMENT_NODE:
237 type="DOCUMENT_FRAGMENT_NODE";
238 break;
239 case Node.DOCUMENT_NODE:
240 type="DOCUMENT_NODE";
241 break;
242 case Node.DOCUMENT_TYPE_NODE:
243 type="DOCUMENT_TYPE_NODE";
244 break;
245 case Node.ELEMENT_NODE:
246 type="ELEMENT_NODE";
247 break;
248 case Node.ENTITY_NODE:
249 type="ENTITY_NODE";
250 break;
251 case Node.ENTITY_REFERENCE_NODE:
252 type="ENTITY_REFERENCE_NODE";
253 break;
254 case Node.NOTATION_NODE:
255 type="NOTATION_NODE";
256 break;
257 case Node.PROCESSING_INSTRUCTION_NODE:
258 type="PROCESSING_INSTRUCTION_NODE";
259 break;
260 case Node.TEXT_NODE:
261 type="TEXT_NODE";
262 break;
263 default:
264 type="UNKNOWN";
265 }
266
267 return type;
268 }
269}
Note: See TracBrowser for help on using the repository browser.