source: trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 13325

Last change on this file since 13325 was 13325, checked in by mdewsnip, 17 years ago

Added another exception check to hopefully get more information about what has gone wrong when plugin/classifier argument fails.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.9 KB
Line 
1package org.greenstone.gatherer.util;
2
3
4import java.io.*;
5import java.net.*;
6import java.util.*;
7import org.apache.xerces.parsers.*;
8import org.apache.xml.serialize.*;
9import org.greenstone.gatherer.DebugStream;
10import org.w3c.dom.*;
11import org.xml.sax.*;
12
13
14/** This class is a static class containing useful XML functions */
15public class XMLTools
16{
17 /** Remove all of the child nodes from a certain node. */
18 static final public void clear(Node node)
19 {
20 while (node.hasChildNodes()) {
21 node.removeChild(node.getFirstChild());
22 }
23 }
24
25
26 static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
27 {
28 ArrayList child_elements = new ArrayList();
29
30 NodeList children_nodelist = parent_element.getChildNodes();
31 for (int i = 0; i < children_nodelist.getLength(); i++) {
32 Node child_node = children_nodelist.item(i);
33 if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name)) {
34 child_elements.add(child_node);
35 }
36 }
37
38 return child_elements;
39 }
40
41
42 static public String getElementTextValue(Element element)
43 {
44 // Find the first text node child
45 NodeList children_nodelist = element.getChildNodes();
46 for (int i = 0; i < children_nodelist.getLength(); i++) {
47 Node child_node = children_nodelist.item(i);
48 if (child_node.getNodeType() == Node.TEXT_NODE) {
49 return child_node.getNodeValue();
50 }
51 }
52
53 // None found
54 return "";
55 }
56
57
58 /** Method to retrieve the value of a given node.
59 * @param element The <strong>Element</strong> whose value we wish to find.
60 * Soon to be deprecated!
61 */
62 static final public String getValue(Node element) {
63 if (element == null) {
64 return "";
65 }
66 // If we've been given a subject node first retrieve its value node.
67 if(element.getNodeName().equals("Subject")) {
68 element = getNodeFromNamed(element, "Value");
69 }
70 // If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
71 if(element != null && element.hasChildNodes()) {
72 StringBuffer text_buffer = new StringBuffer();
73 NodeList text_nodes = element.getChildNodes();
74 for(int i = 0; i < text_nodes.getLength(); i++) {
75 Node possible_text = text_nodes.item(i);
76 if(possible_text.getNodeName().equals(StaticStrings.TEXT_NODE)) {
77 text_buffer.append(possible_text.getNodeValue());
78 }
79 }
80 return text_buffer.toString();
81 }
82 return "";
83 }
84
85
86 /** Method to retrieve from the node given, a certain child node with the specified name.
87 * @param parent The <strong>Node</strong> whose children should be searched.
88 * @param name The required nodes name as a <strong>String</strong>.
89 * @return The requested <strong>Node</strong> if it is found, <i>null</i> otherwise.
90 * Soon to be deprecated!
91 */
92 static final public Node getNodeFromNamed(Node parent, String name) {
93 Node child = null;
94 for(Node i = parent.getFirstChild(); i != null && child == null;
95 i = i.getNextSibling()) {
96 if(i.getNodeName().equals(name)) {
97 child = i;
98 }
99 }
100 return child;
101 }
102
103
104 /** Parse an XML document from a given file path */
105 static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
106 {
107 if (use_class_loader == true) {
108 InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
109 if (is != null) {
110 return parseXML(is);
111 }
112 }
113
114 // Try the file outside the classes directory
115 return parseXMLFile(new File(xml_file_path));
116 }
117
118
119 /** Parse an XML document from a given file */
120 static public Document parseXMLFile(File xml_file)
121 {
122 // No file? No point trying!
123 if (xml_file.exists() == false) {
124 return null;
125 }
126
127 try {
128 return parseXML(new FileInputStream(xml_file));
129 }
130 catch (Exception exception) {
131 DebugStream.printStackTrace(exception);
132 return null;
133 }
134 }
135
136
137 /** Parse an XML document from a given input stream */
138 static public Document parseXML(InputStream xml_input_stream)
139 {
140 Document document = null;
141
142 try {
143 InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
144 Reader xml_reader = new BufferedReader(isr);
145 document = parseXML(xml_reader);
146 isr.close();
147 xml_input_stream.close();
148 }
149 catch (Exception exception) {
150 DebugStream.printStackTrace(exception);
151 }
152
153 return document;
154 }
155
156
157 /** Parse an XML document from a given reader */
158 static public Document parseXML(Reader xml_reader)
159 {
160 Document document = null;
161
162 try {
163 InputSource isc = new InputSource(xml_reader);
164 DOMParser parser = new DOMParser();
165 parser.setFeature("http://xml.org/sax/features/validation", false);
166 parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
167 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
168 parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
169 parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
170 parser.parse(isc);
171 document = parser.getDocument();
172 }
173 catch (SAXException exception) {
174 System.err.println("SAX exception: " + exception.getMessage());
175 DebugStream.printStackTrace(exception);
176 }
177 catch (Exception exception) {
178 DebugStream.printStackTrace(exception);
179 }
180
181 return document;
182 }
183
184
185 static public StringBuffer readXMLStream(InputStream input_stream)
186 {
187 StringBuffer xml = new StringBuffer("");
188
189 try {
190 InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
191 BufferedReader buffered_in = new BufferedReader(isr);
192
193 String line = "";
194 boolean xml_content = false;
195 while((line = buffered_in.readLine()) != null) {
196 if(xml_content) {
197 xml.append(line);
198 xml.append("\n");
199 }
200 else if(line.trim().startsWith("<?xml")) {
201 xml_content = true;
202 xml.append(line);
203 xml.append("\n");
204 }
205 }
206 buffered_in = null;
207 }
208 catch (Exception error) {
209 System.err.println("Failed when trying to parse XML stream");
210 error.printStackTrace();
211 }
212
213 return xml;
214 }
215
216
217 /** Removes characters that are invalid in XML (see http://www.w3.org/TR/2000/REC-xml-20001006#charsets) */
218 static public String removeInvalidCharacters(String text)
219 {
220 char[] safe_characters = new char[text.length()];
221 int j = 0;
222
223 char[] raw_characters = new char[text.length()];
224 text.getChars(0, text.length(), raw_characters, 0);
225 for (int i = 0; i < raw_characters.length; i++) {
226 char character = raw_characters[i];
227 if ((character >= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF)) {
228 safe_characters[j] = character;
229 j++;
230 }
231 }
232
233 return new String(safe_characters, 0, j);
234 }
235
236
237 static public void setElementTextValue(Element element, String text)
238 {
239 // Remove all text node children
240 NodeList children_nodelist = element.getChildNodes();
241 for (int i = children_nodelist.getLength() - 1; i >= 0; i--) {
242 Node child_node = children_nodelist.item(i);
243 if (child_node.getNodeType() == Node.TEXT_NODE) {
244 element.removeChild(child_node);
245 }
246 }
247
248 // Add a new text node
249 if (text != null) {
250 element.appendChild(element.getOwnerDocument().createTextNode(text));
251 }
252 }
253
254
255 /** Set the #text node value of some element.
256 * @param element the Element whose value we wish to set
257 * @param value the new value for the element as a String
258 * Soon to be deprecated!
259 */
260 static final public void setValue(Element element, String value) {
261 // Remove any existing child node(s)
262 clear(element);
263 // Add new text node.
264 if (value != null) {
265 element.appendChild(element.getOwnerDocument().createTextNode(value));
266 }
267 }
268
269
270 /** Write an XML document to a given file */
271 static public void writeXMLFile(File xml_file, Document document)
272 {
273 try {
274 OutputStream os = new FileOutputStream(xml_file);
275 // Create an output format for our document.
276 OutputFormat f = new OutputFormat(document);
277 f.setEncoding("UTF-8");
278 f.setIndenting(true);
279 f.setLineWidth(0); // Why isn't this working!
280 f.setPreserveSpace(false);
281 // Create the necessary writer stream for serialization.
282 OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
283 Writer w = new BufferedWriter(osw);
284 // Generate a new serializer from the above.
285 XMLSerializer s = new XMLSerializer(w, f);
286 s.asDOMSerializer();
287 // Finally serialize the document to file.
288 s.serialize(document);
289 // And close.
290 os.close();
291 }
292 catch (Exception exception) {
293 DebugStream.printStackTrace(exception);
294 }
295 }
296
297 public static void printXMLNode(Node e) {
298 printXMLNode(e, 0) ;
299 }
300
301 public static void printXMLNode(Node e, int depth) { //recursive method call using DOM API...
302
303 for (int i=0 ; i<depth ; i++)
304 System.out.print(' ') ;
305
306 if (e.getNodeType() == Node.TEXT_NODE){
307 System.out.println("text") ;
308 //System.out.println(e.getNodeValue()) ;
309 return ;
310 }
311
312 System.out.print('<');
313 System.out.print(e.getNodeName());
314 NamedNodeMap attrs = e.getAttributes();
315 for (int i = 0; i < attrs.getLength(); i++) {
316 Node attr = attrs.item(i);
317 System.out.print(' ');
318 System.out.print(attr.getNodeName());
319 System.out.print("=\"");
320 System.out.print(attr.getNodeValue());
321 System.out.print('"');
322 }
323
324 NodeList children = e.getChildNodes();
325
326 if (children == null || children.getLength() == 0)
327 System.out.println("/>") ;
328 else {
329
330 System.out.println('>') ;
331
332 int len = children.getLength();
333 for (int i = 0; i < len; i++) {
334 printXMLNode(children.item(i), depth + 1);
335 }
336
337 for (int i=0 ; i<depth ; i++)
338 System.out.print(' ') ;
339
340 System.out.println("</" + e.getNodeName() + ">");
341 }
342
343 }
344}
Note: See TracBrowser for help on using the repository browser.