source: trunk/gsdl3/src/java/org/greenstone/gsdl3/util/XMLConverter.java@ 3849

Last change on this file since 3849 was 3849, checked in by kjdon, 21 years ago

now uses only xerces, not generic jaxp stuff - this can cause problems cos xml parsers are not interchangeable

  • Property svn:keywords set to Author Date Id Revision
File size: 6.4 KB
Line 
1/*
2 * XMLConverter.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.util;
20
21// XML classes
22import org.w3c.dom.Document;
23import org.w3c.dom.Node;
24import org.w3c.dom.NodeList;
25import org.w3c.dom.NamedNodeMap;
26import org.xml.sax.InputSource;
27//import javax.xml.parsers.*;
28//import org.apache.xerces.dom.TextImpl;
29import org.apache.xerces.parsers.DOMParser;
30import org.apache.xerces.dom.DocumentImpl; // for new Documents
31
32// other java classes
33import java.io.Reader;
34import java.io.StringReader;
35import java.io.File;
36import java.io.FileReader;
37
38/** XMLConverter - utitlity class for greenstone
39 *
40 * parses XML Strings into Documents, converts Nodes to Strings
41 * different parsers have different behaviour - can experiment in here
42 *
43 * @author <a href="mailto:[email protected]">Katherine Don</a>
44 * @version $Revision: 3849 $
45 *
46 */
47public class XMLConverter {
48
49 /** JAXP parser factory */
50 //protected DocumentBuilderFactory doc_build_fact_=null;
51 /** JAXP parser */
52 // protected DocumentBuilder doc_builder_=null;
53
54 /** xerces parser */
55 protected DOMParser parser_ = null;
56
57 private boolean outputEscaping = true;
58
59
60 /** the no-args constructor */
61 public XMLConverter() {
62 try {
63 //doc_build_fact_ = DocumentBuilderFactory.newInstance();
64
65 //doc_builder_ = doc_build_fact_.newDocumentBuilder();
66 parser_ = new DOMParser();
67 } catch (Exception e) {
68 System.out.println("XMLConverter:exception "+e.getMessage());
69 }
70 }
71
72 /** returns a DOM Document */
73 public Document getDOM(String in) {
74
75 try {
76 Reader reader = new StringReader(in);
77 InputSource xml_source = new InputSource(reader);
78
79 // Document doc = doc_builder_.parse(xml_source);
80 parser_.parse(xml_source);
81 Document doc = parser_.getDocument();
82 return doc;
83
84 } catch (Exception e) {
85 System.out.println("XMLConverter.getDOM(String): exception "+e.getMessage());
86 }
87 return null;
88 }
89
90 /** returns a DOM Document */
91 public Document getDOM(File in) {
92 try {
93
94 Reader reader = new FileReader(in);
95 InputSource xml_source = new InputSource(reader);
96
97 //Document doc = doc_builder_.parse(xml_source);
98 parser_.parse(xml_source);
99 Document doc = parser_.getDocument();
100
101 return doc;
102
103 } catch (Exception e) {
104 System.out.println("XMLConverter.getDOM(File): exception "+e.getMessage());
105 }
106 return null;
107 }
108
109 /** creates a new empty DOM Document */
110 public Document newDOM() {
111 // try with xerces
112 Document doc = new DocumentImpl(); //doc_builder_.newDocument();
113 return doc;
114 }
115
116 /** returns the Node as a String */
117 public String getString(Node xmlNode)
118 {
119 outputEscaping = true;
120 return getString(xmlNode, 0);
121 }
122
123
124 private String getString(Node xmlNode, int depth)
125 {
126 String xmlRepresentation = "";
127
128 if (xmlNode == null)
129 return "<null>";
130
131 short nodeType = xmlNode.getNodeType();
132 String nodeName = xmlNode.getNodeName();
133
134 // Handle Element nodes
135 if (nodeType == Node.ELEMENT_NODE) {
136 xmlRepresentation += "\n";
137 for (int i = 0; i < depth; i++)
138 xmlRepresentation += " ";
139
140 // Write opening tag
141 xmlRepresentation += "<" + nodeName;
142
143 // Write the node attributes
144 NamedNodeMap nodeAttributes = xmlNode.getAttributes();
145 for (int i = 0; i < nodeAttributes.getLength(); i++) {
146 Node attribute = nodeAttributes.item(i);
147 xmlRepresentation += " " + attribute.getNodeName() + "=";
148 xmlRepresentation += "\"" + attribute.getNodeValue() + "\"";
149 }
150
151 // If the node has no children, close the opening tag and return
152 if (xmlNode.hasChildNodes() == false) {
153 // This produces somewhat ugly output, but it is necessary to compensate
154 // for display bugs in Netscape. Firstly, the space is needed before the
155 // closing bracket otherwise Netscape will ignore some tags (<br/>, for
156 // example). Also, a newline character would be expected after the tag,
157 // but this causes problems with the display of links (the link text
158 // will contain a newline character, which is displayed badly).
159 xmlRepresentation += " />";
160 return xmlRepresentation;
161 }
162
163 // Close the opening tag
164 xmlRepresentation += ">";
165
166 // Apply recursively to the children of this node
167 NodeList children = xmlNode.getChildNodes();
168 for (int i = 0; i < children.getLength(); i++) {
169 xmlRepresentation += getString(children.item(i), depth + 1);
170 }
171
172 // Write closing tag
173 if (xmlRepresentation.endsWith("\n")) {
174 for (int i = 0; i < depth; i++)
175 xmlRepresentation += " ";
176 }
177 xmlRepresentation += "</" + nodeName + ">\n";
178 }
179
180 // Handle Text nodes
181 else if (nodeType == Node.TEXT_NODE) {
182 String text = xmlNode.getNodeValue();
183
184 // Perform output escaping, if required
185 if (outputEscaping) {
186 text = text.replaceAll("&", "&amp;"); // Must be done first!!
187 text = text.replaceAll("<", "&lt;");
188 text = text.replaceAll(">", "&gt;");
189 text = text.replaceAll("\"", "&quot;");
190 text = text.replaceAll("\'", "&apos;");
191 }
192
193 // Remove any control-C characters
194 text = text.replaceAll("" + (char) 3, "");
195 xmlRepresentation += text;
196 }
197
198 // Handle Processing Instruction nodes
199 else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE) {
200 if (nodeName == "javax.xml.transform.disable-output-escaping") {
201 outputEscaping = false;
202 }
203 else if (nodeName == "javax.xml.transform.enable-output-escaping") {
204 outputEscaping = true;
205 }
206 else {
207 System.err.println("Warning: Unhandled processing instruction " + nodeName);
208 }
209 }
210
211 // A type of node that is not handled yet
212 else {
213 System.err.println("Warning: Unknown node type: " + nodeType);
214 }
215
216 return xmlRepresentation;
217 }
218}
Note: See TracBrowser for help on using the repository browser.