source: main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 26018

Last change on this file since 26018 was 26018, checked in by sjm84, 12 years ago

Changing the default format statement to include some templates for document display

  • Property svn:keywords set to Author Date Id Revision
File size: 30.8 KB
Line 
1package org.greenstone.gatherer.util;
2
3import java.io.*;
4import java.net.*;
5import java.util.*;
6import org.apache.xerces.parsers.*;
7import org.apache.xml.serialize.*;
8import org.greenstone.gatherer.DebugStream;
9import org.w3c.dom.*;
10import org.xml.sax.*;
11
12import java.io.FileReader;
13import java.io.IOException;
14import java.io.StringReader;
15
16// SAX
17import org.xml.sax.XMLReader;
18import org.xml.sax.SAXException;
19import org.xml.sax.SAXParseException;
20import org.xml.sax.helpers.DefaultHandler;
21import org.xml.sax.InputSource;
22
23// JAXP
24import javax.xml.parsers.FactoryConfigurationError;
25import javax.xml.parsers.ParserConfigurationException;
26import javax.xml.parsers.SAXParser;
27import javax.xml.parsers.SAXParserFactory;
28
29/** This class is a static class containing useful XML functions */
30public class XMLTools
31{
32 /** extracts the text out of a node */
33 public static Node getNodeTextNode(Element param)
34 {
35 param.normalize();
36 Node n = param.getFirstChild();
37 while (n != null && n.getNodeType() != Node.TEXT_NODE)
38 {
39 n = n.getNextSibling();
40 }
41 return n;
42 }
43
44 /** extracts the text out of a node */
45 public static String getNodeText(Element param)
46 {
47 Node text_node = getNodeTextNode(param);
48 if (text_node == null)
49 {
50 return "";
51 }
52 return text_node.getNodeValue();
53 }
54
55 public static void setNodeText(Element elem, String text)
56 {
57 Node old_text_node = getNodeTextNode(elem);
58 if (old_text_node != null)
59 {
60 elem.removeChild(old_text_node);
61 }
62 Text t = elem.getOwnerDocument().createTextNode(text);
63 elem.appendChild(t);
64 }
65
66 /** returns the (first) child element with the given name */
67 public static Node getChildByTagName(Node n, String name)
68 {
69
70 Node child = n.getFirstChild();
71 while (child != null)
72 {
73 if (child.getNodeName().equals(name))
74 {
75 return child;
76 }
77 child = child.getNextSibling();
78 }
79 return null; //not found
80 }
81
82 /**
83 * returns the (nth) child element with the given name index numbers start
84 * at 0
85 */
86 public static Node getChildByTagNameIndexed(Node n, String name, int index)
87 {
88 if (index == -1)
89 {
90 return getChildByTagName(n, name);
91 }
92 int count = 0;
93 Node child = n.getFirstChild();
94 while (child != null)
95 {
96 if (child.getNodeName().equals(name))
97 {
98 if (count == index)
99 {
100 return child;
101 }
102 else
103 {
104 count++;
105 }
106 }
107 child = child.getNextSibling();
108 }
109 return null; //not found
110 }
111
112 /**
113 * returns the element parent/node_name[@attribute_name='attribute_value']
114 */
115 public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
116 {
117
118 NodeList children = parent.getChildNodes();
119 for (int i = 0; i < children.getLength(); i++)
120 {
121 Node child = children.item(i);
122 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
123 if (child.getNodeName().equals(node_name))
124 {
125 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
126 return (Element) child;
127 }
128 }
129 // not found
130 return null;
131 }
132
133 /**
134 * returns a list of elements
135 * parent/node_name[@attribute_name='attribute_value']
136 */
137 public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
138 {
139 ArrayList elements = new ArrayList();
140 NodeList children = parent.getChildNodes();
141 for (int i = 0; i < children.getLength(); i++)
142 {
143 //System.out.println("getNamedElementList");
144 Node child = children.item(i);
145 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
146 if (child.getNodeName().equals(node_name))
147 {
148 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
149 elements.add((Element) child);
150 }
151 }
152 // not found
153 if (elements.size() == 0)
154 {
155 elements = null;
156 }
157 return elements;
158 }
159
160 public static void copyAllChildren(Element to, Element from)
161 {
162
163 Document to_doc = to.getOwnerDocument();
164 Node child = from.getFirstChild();
165 while (child != null)
166 {
167 to.appendChild(to_doc.importNode(child, true));
168 child = child.getNextSibling();
169 }
170 }
171
172 /** Duplicates an element */
173 public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
174 {
175 return duplicateElementNS(owner, element, null, with_attributes);
176 }
177
178 /** Duplicates an element */
179 public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
180 {
181 Element duplicate;
182 if (namespace_uri == null)
183 {
184 duplicate = owner.createElement(element.getTagName());
185 }
186 else
187 {
188 duplicate = owner.createElementNS(namespace_uri, element.getTagName());
189 }
190 // Copy element attributes
191 if (with_attributes)
192 {
193 NamedNodeMap attributes = element.getAttributes();
194 for (int i = 0; i < attributes.getLength(); i++)
195 {
196 Node attribute = attributes.item(i);
197 duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
198 }
199 }
200
201 // Copy element children
202 NodeList children = element.getChildNodes();
203 for (int i = 0; i < children.getLength(); i++)
204 {
205 Node child = children.item(i);
206 duplicate.appendChild(owner.importNode(child, true));
207 }
208
209 return duplicate;
210 }
211
212 /** Remove all of the child nodes from a certain node. */
213 static final public void clear(Node node)
214 {
215 while (node.hasChildNodes())
216 {
217 node.removeChild(node.getFirstChild());
218 }
219 }
220
221 static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
222 {
223 ArrayList child_elements = new ArrayList();
224
225 NodeList children_nodelist = parent_element.getChildNodes();
226 for (int i = 0; i < children_nodelist.getLength(); i++)
227 {
228 Node child_node = children_nodelist.item(i);
229 if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
230 {
231 child_elements.add(child_node);
232 }
233 }
234
235 return child_elements;
236 }
237
238 static public String getElementTextValue(Element element)
239 {
240 // Find the first text node child
241 NodeList children_nodelist = element.getChildNodes();
242 for (int i = 0; i < children_nodelist.getLength(); i++)
243 {
244 Node child_node = children_nodelist.item(i);
245 if (child_node.getNodeType() == Node.TEXT_NODE)
246 {
247 return child_node.getNodeValue();
248 }
249 }
250
251 // None found
252 return "";
253 }
254
255 /**
256 * Method to retrieve the value of a given node.
257 *
258 * @param element
259 * The <strong>Element</strong> whose value we wish to find. Soon
260 * to be deprecated!
261 */
262 static final public String getValue(Node element)
263 {
264 if (element == null)
265 {
266 return "";
267 }
268 // If we've been given a subject node first retrieve its value node.
269 if (element.getNodeName().equals("Subject"))
270 {
271 element = getNodeFromNamed(element, "Value");
272 }
273 // If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
274 if (element != null && element.hasChildNodes())
275 {
276 StringBuffer text_buffer = new StringBuffer();
277 NodeList text_nodes = element.getChildNodes();
278 for (int i = 0; i < text_nodes.getLength(); i++)
279 {
280 Node possible_text = text_nodes.item(i);
281 if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
282 {
283 text_buffer.append(possible_text.getNodeValue());
284 }
285 }
286 return text_buffer.toString();
287 }
288 return "";
289 }
290
291 /**
292 * Method to retrieve from the node given, a certain child node with the
293 * specified name.
294 *
295 * @param parent
296 * The <strong>Node</strong> whose children should be searched.
297 * @param name
298 * The required nodes name as a <strong>String</strong>.
299 * @return The requested <strong>Node</strong> if it is found, <i>null</i>
300 * otherwise. Soon to be deprecated!
301 */
302 static final public Node getNodeFromNamed(Node parent, String name)
303 {
304 Node child = null;
305 for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
306 {
307 if (i.getNodeName().equals(name))
308 {
309 child = i;
310 }
311 }
312 return child;
313 }
314
315 static final public String WELLFORMED = "well-formed !";
316 static final public String NOTWELLFORMED = "not well-formed";
317 static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
318 static final private String FOOTER = "</collectionConfig>";
319
320 public static String parse(String xml_str)
321 {
322 String validation_msg = WELLFORMED;
323 xml_str = HEADER + xml_str + FOOTER;
324 try
325 {
326 SAXParserFactory factory = SAXParserFactory.newInstance();
327 factory.setNamespaceAware(true);
328 //factory.setValidating (true);
329 SAXParser parser = factory.newSAXParser();
330 InputSource iSource = new InputSource(new StringReader(xml_str));
331 // parser.parse (iSource, new DefaultHandler ());
332
333 org.xml.sax.XMLReader reader = parser.getXMLReader();
334 reader.setContentHandler(new DefaultHandler());
335 reader.setErrorHandler(new DefaultHandler());
336 reader.parse(iSource);
337 }
338 catch (FactoryConfigurationError e)
339 {
340 validation_msg = "unable to get a document builder factory";
341 }
342 catch (ParserConfigurationException e)
343 {
344 validation_msg = "unable to configure parser";
345 }
346 catch (SAXParseException e)
347 {
348 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
349 }
350 catch (SAXException e)
351 {
352 validation_msg += " Fatal error: " + e.toString();
353 }
354 catch (IOException e)
355 {
356 validation_msg = "Unable to read the input, i/o error";
357 }
358
359 return validation_msg;
360 }
361
362 //In this method, the parsed string xml_str is not wrapped by the header and footer strings.
363 public static String parseDOM(String xml_str)
364 {
365 String validation_msg = WELLFORMED;
366
367 try
368 {
369 SAXParserFactory factory = SAXParserFactory.newInstance();
370 factory.setNamespaceAware(true);
371 //factory.setValidating (true);
372 SAXParser parser = factory.newSAXParser();
373 InputSource iSource = new InputSource(new StringReader(xml_str));
374 // parser.parse (iSource, new DefaultHandler ());
375
376 org.xml.sax.XMLReader reader = parser.getXMLReader();
377 reader.setContentHandler(new DefaultHandler());
378 reader.setErrorHandler(new DefaultHandler());
379 reader.parse(iSource);
380 }
381 catch (FactoryConfigurationError e)
382 {
383 validation_msg = "unable to get a document builder factory";
384 }
385 catch (ParserConfigurationException e)
386 {
387 validation_msg = "unable to configure parser";
388 }
389 catch (SAXParseException e)
390 {
391 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
392 }
393 catch (SAXException e)
394 {
395 validation_msg += " " + e.toString();
396 }
397 catch (IOException e)
398 {
399 validation_msg = "Unable to read the input, i/o error";
400 }
401
402 return validation_msg;
403 }
404
405 public static String parse(File xml_file)
406 {
407 String validation_msg = WELLFORMED;
408
409 try
410 {
411 SAXParserFactory factory = SAXParserFactory.newInstance();
412 factory.setNamespaceAware(true);
413 //factory.setValidating (true);
414 SAXParser parser = factory.newSAXParser();
415 FileReader r = new FileReader(xml_file);
416 InputSource iSource = new InputSource(r);
417 XMLReader reader = parser.getXMLReader();
418 reader.setContentHandler(new DefaultHandler());
419 reader.setErrorHandler(new DefaultHandler());
420 reader.parse(iSource);
421 }
422 catch (FactoryConfigurationError e)
423 {
424 validation_msg = "unable to get a document builder factory";
425 }
426 catch (ParserConfigurationException e)
427 {
428 validation_msg = "unable to configure parser";
429 }
430 catch (SAXParseException e)
431 {
432 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
433 }
434 catch (SAXException e)
435 {
436 validation_msg += " Fatal error: " + e.toString();
437 }
438 catch (IOException e)
439 {
440 validation_msg = "Unable to read the input, i/o error";
441 }
442
443 return validation_msg;
444 }
445
446 /** Returns a string of the location. */
447 private static String getLocationString(SAXParseException ex)
448 {
449 StringBuffer str = new StringBuffer();
450
451 String systemId = ex.getSystemId();
452 if (systemId != null)
453 {
454 int index = systemId.lastIndexOf('/');
455 if (index != -1)
456 systemId = systemId.substring(index + 1);
457 str.append(systemId);
458 }
459 str.append("(line ");
460 str.append(ex.getLineNumber() - 1);
461 str.append(", column ");
462 str.append(ex.getColumnNumber());
463 str.append("): ");
464
465 return str.toString();
466
467 } // getLocationString(SAXParseException):String
468
469 /** Parse an XML document from a given file path */
470 static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
471 {
472 if (use_class_loader == true)
473 {
474 InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
475 if (is != null)
476 {
477 return parseXML(is);
478 }
479 }
480
481 // Try the file outside the classes directory
482 return parseXMLFile(new File(xml_file_path));
483 }
484
485 /** Parse an XML document from a given file */
486 static public Document parseXMLFile(File xml_file)
487 {
488 // No file? No point trying!
489 if (xml_file.exists() == false)
490 {
491 return null;
492 }
493
494 try
495 {
496 return parseXML(new FileInputStream(xml_file));
497 }
498 catch (Exception exception)
499 {
500 DebugStream.printStackTrace(exception);
501 return null;
502 }
503 }
504
505 /** Parse an XML document from a given input stream */
506 static public Document parseXML(InputStream xml_input_stream)
507 {
508 Document document = null;
509
510 try
511 {
512 InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
513 document = parseXML(isr);
514 isr.close();
515 xml_input_stream.close();
516 }
517 catch (Exception exception)
518 {
519 DebugStream.printStackTrace(exception);
520 }
521
522 return document;
523 }
524
525 /** Parse an XML document from a given reader */
526 static public Document parseXML(Reader xml_reader)
527 {
528 Document document = null;
529
530 // If debugging, the following will store the XML contents to be parsed,
531 // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
532 String xmlContents = "";
533
534 try
535 {
536 Reader reader = null;
537
538 // (1) By default, GLI will remove any contents preceeding (and invalidating)
539 // the XML and present these lines separately to the user
540 if (!DebugStream.isDebuggingEnabled())
541 {
542 try
543 {
544 reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
545 }
546 catch (Exception e)
547 {
548 System.err.println("Exception while wrapping the reader in parseXML(Reader)");
549 e.printStackTrace();
550 }
551 }
552
553 // (2) If we are running GLI in debug mode:
554 // In case parsing exceptions are thrown (SAX Exceptions), we want to get some
555 // idea of where things went wrong. This will print the "XML" contents to either
556 // system.out (if debugging is off) or to the DebugStream otherwise.
557 // We need to read the XML twice to know the line where things went wrong, so
558 // do the additional reading only if we're debugging
559 else
560 {
561 StringBuffer buf = new StringBuffer();
562 char[] buffer = new char[500];
563 int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
564 while (numCharsRead != -1)
565 {
566 buf.append(buffer, 0, numCharsRead);
567 numCharsRead = xml_reader.read(buffer, 0, buffer.length);
568 }
569 xmlContents = buf.toString();
570 xml_reader.close(); // closing the old Reader
571 xml_reader = null;
572 buffer = null;
573 buf = null;
574 // we need a Reader to parse the same contents as the Reader that was just closed
575 reader = new BufferedReader(new StringReader(xmlContents));
576 //System.err.println("xmlContents:\n" + xmlContents);
577 }
578
579 // (2) The actual XML parsing
580 InputSource isc = new InputSource(reader);
581 DOMParser parser = new DOMParser();
582 parser.setFeature("http://xml.org/sax/features/validation", false);
583 parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
584 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
585 parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
586 parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
587 parser.parse(isc);
588 document = parser.getDocument();
589
590 }
591 catch (SAXParseException e)
592 {
593 showXMLParseFailureLine(e, xmlContents);
594 }
595 catch (SAXException exception)
596 {
597 System.err.println("SAX exception: " + exception.getMessage());
598 if (DebugStream.isDebuggingEnabled())
599 {
600 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
601 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
602 DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
603 System.exit(-1);
604 }
605 // else, not running in debug mode, so don't exit after exception
606 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
607 DebugStream.printStackTrace(exception);
608 }
609 catch (Exception exception)
610 {
611 DebugStream.printStackTrace(exception);
612 }
613
614 return document;
615 }
616
617 /**
618 * Displays the line (string) where the SAXParseException occurred, given a
619 * String of the entire xml that was being parsed and the SAXParseException
620 * object that was caught. The messages are printed to DebugStream, so run
621 * GLI/FLI with -debug to view this output.
622 *
623 * @param xmlContents
624 * is the entire xml that was being parsed when the exception
625 * occurred
626 * @param e
627 * is the SAXParseException object that was thrown upon parsing
628 * the xmlContents.
629 */
630 public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
631 {
632
633 // There should be no characters at all that preceed the <?xml>... bit.
634 // The first check is for starting spaces:
635 if (xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t"))
636 {
637 DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
638 DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
639 return; // nothing more to do, first error identified
640 }
641
642 // the actual line (String literal) where parsing failed and the SAXParseException occurred.
643 String line = "";
644 int linenumber = e.getLineNumber();
645 DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
646 if (DebugStream.isDebuggingEnabled())
647 {
648 if (linenumber != -1)
649 {
650 String[] lines = xmlContents.split("\n");
651 if (lines.length > 0)
652 {
653 DebugStream.println(" (number of lines: " + lines.length + ")");
654 if (lines.length >= linenumber)
655 {
656 line = lines[linenumber - 1];
657 }
658 else
659 { // error is past the last line
660 line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
661 }
662 }
663 else
664 {
665 DebugStream.print("\n");
666 }
667 lines = null;
668
669 DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
670 DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
671
672 // Uncomment if you want to print out the entire contents of the XML doc:
673 //DebugStream.println("\n\nThis was the XML:\n*********START\n"
674 // + xmlContents + "\n************END\n");
675 }
676 else
677 { // no particular line number, print out all the xml so debugger can inspect it
678 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
679 }
680 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
681 DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
682 System.exit(-1);
683 }
684 else
685 { // not running in debug mode
686 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
687 }
688 }
689
690 static public StringBuffer readXMLStream(InputStream input_stream)
691 {
692 StringBuffer xml = new StringBuffer("");
693
694 try
695 {
696 InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
697 BufferedReader buffered_in = new BufferedReader(isr);
698
699 String line = "";
700 boolean xml_content = false;
701 while ((line = buffered_in.readLine()) != null)
702 {
703 if (xml_content)
704 {
705 xml.append(line);
706 xml.append("\n");
707 }
708 else if (line.trim().startsWith("<?xml"))
709 {
710 xml_content = true;
711 xml.append(line);
712 xml.append("\n");
713 }
714 else
715 {
716 System.err.println(line);
717 }
718 }
719 buffered_in = null;
720 }
721 catch (Exception error)
722 {
723 System.err.println("Failed when trying to parse XML stream");
724 error.printStackTrace();
725 }
726
727 return xml;
728 }
729
730 /**
731 * Removes characters that are invalid in XML (see
732 * http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
733 */
734 static public String removeInvalidCharacters(String text)
735 {
736 char[] safe_characters = new char[text.length()];
737 int j = 0;
738
739 char[] raw_characters = new char[text.length()];
740 text.getChars(0, text.length(), raw_characters, 0);
741 for (int i = 0; i < raw_characters.length; i++)
742 {
743 char character = raw_characters[i];
744 if ((character >= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF))
745 {
746 safe_characters[j] = character;
747 j++;
748 }
749 }
750
751 return new String(safe_characters, 0, j);
752 }
753
754 static public void setElementTextValue(Element element, String text)
755 {
756 // Remove all text node children
757 NodeList children_nodelist = element.getChildNodes();
758 for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
759 {
760 Node child_node = children_nodelist.item(i);
761 if (child_node.getNodeType() == Node.TEXT_NODE)
762 {
763 element.removeChild(child_node);
764 }
765 }
766
767 // Add a new text node
768 if (text != null)
769 {
770 element.appendChild(element.getOwnerDocument().createTextNode(text));
771 }
772 }
773
774 /**
775 * Set the #text node value of some element.
776 *
777 * @param element
778 * the Element whose value we wish to set
779 * @param value
780 * the new value for the element as a String Soon to be
781 * deprecated!
782 */
783 static final public void setValue(Element element, String value)
784 {
785 // Remove any existing child node(s)
786 clear(element);
787 // Add new text node.
788 if (value != null)
789 {
790 element.appendChild(element.getOwnerDocument().createTextNode(value));
791 }
792 }
793
794 static public void indentXML(Element elem, int depth)
795 {
796 Document doc = elem.getOwnerDocument();
797
798 String startIndentString = "\n";
799 for (int i = 0; i < depth; i++)
800 {
801 startIndentString += "\t";
802 }
803 Node startTextNode = doc.createTextNode(startIndentString);
804
805 String endIndentString = "\n";
806 for (int i = 0; i < depth - 1; i++)
807 {
808 endIndentString += "\t";
809 }
810 Node endTextNode = doc.createTextNode(endIndentString);
811
812 boolean found = false;
813 Node child = elem.getFirstChild();
814 while (child != null)
815 {
816 // first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
817 if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
818 {
819 Node spaceTextNode = child;
820 child = child.getNextSibling();
821 elem.removeChild(spaceTextNode);
822
823 if(child == null) break;
824 }
825
826 // now process normal element nodes as intended
827 if (child.getNodeType() == Node.ELEMENT_NODE)
828 {
829 found = true;
830 break;
831 }
832 child = child.getNextSibling();
833 }
834
835 if (found)
836 {
837 elem.appendChild(endTextNode);
838 }
839
840 child = elem.getFirstChild();
841 while (child != null)
842 {
843 // Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
844 // because the first while loop above would break out when it found an element node and wouldn't have got rid
845 // of all the empty text nodes yet.
846 // This time, beware not to delete the special end and start empty textnodes just added, since
847 // they've been created and inserted specifically.
848 if(child != endTextNode && child != startTextNode
849 && child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
850 {
851 Node spaceTextNode = child;
852 child = child.getNextSibling();
853 elem.removeChild(spaceTextNode);
854
855 if(child == null) break;
856 }
857
858 // go back to processing normal element nodes as intended
859 if (child.getNodeType() == Node.ELEMENT_NODE)
860 {
861 elem.insertBefore(startTextNode.cloneNode(false), child);
862 indentXML((Element) child, depth + 1);
863 }
864 child = child.getNextSibling();
865 }
866 }
867
868 /**
869 * Write an XML document to a given file with the text node of the specified
870 * element unescaped
871 */
872 static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
873 {
874 indentXML(document.getDocumentElement(), 1);
875 try
876 {
877 OutputStream os = new FileOutputStream(xml_file);
878 // Create an output format for our document.
879 OutputFormat f = new OutputFormat(document);
880 f.setEncoding("UTF-8");
881 f.setIndenting(true);
882 f.setLineWidth(0); // Why isn't this working!
883 f.setPreserveSpace(true);
884 if (nonEscapingTagNames != null)
885 {
886 f.setNonEscapingElements(nonEscapingTagNames);
887 }
888 // Create the necessary writer stream for serialization.
889 OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
890 Writer w = new BufferedWriter(osw);
891 // Generate a new serializer from the above.
892 XMLSerializer s = new XMLSerializer(w, f);
893 s.asDOMSerializer();
894 // Finally serialize the document to file.
895 s.serialize(document);
896 // And close.
897 os.close();
898 }
899 catch (Exception exception)
900 {
901 DebugStream.printStackTrace(exception);
902 }
903 }
904
905 /** Write an XML document to a given file */
906 static public void writeXMLFile(File xml_file, Document document)
907 {
908 writeXMLFile(xml_file, document, null);
909 }
910
911 public static void printXMLNode(Node e)
912 {
913 printXMLNode(e, 0);
914 }
915
916 public static void printXMLNode(Node e, int depth)
917 { //recursive method call using DOM API...
918
919 for (int i = 0; i < depth; i++)
920 System.out.print(' ');
921
922 if (e.getNodeType() == Node.TEXT_NODE)
923 {
924 //System.out.println("text") ;
925 if (e.getNodeValue() != "")
926 {
927 System.out.println(e.getNodeValue());
928 }
929 return;
930 }
931
932 System.out.print('<');
933 System.out.print(e.getNodeName());
934 NamedNodeMap attrs = e.getAttributes();
935 if (attrs != null)
936 {
937 for (int i = 0; i < attrs.getLength(); i++)
938 {
939 Node attr = attrs.item(i);
940 System.out.print(' ');
941 System.out.print(attr.getNodeName());
942 System.out.print("=\"");
943 System.out.print(attr.getNodeValue());
944 System.out.print('"');
945 }
946 }
947 NodeList children = e.getChildNodes();
948
949 if (children == null || children.getLength() == 0)
950 System.out.println("/>");
951 else
952 {
953
954 System.out.println('>');
955
956 int len = children.getLength();
957 for (int i = 0; i < len; i++)
958 {
959 printXMLNode(children.item(i), depth + 1);
960 }
961
962 for (int i = 0; i < depth; i++)
963 System.out.print(' ');
964
965 System.out.println("</" + e.getNodeName() + ">");
966 }
967
968 }
969
970 public static String xmlNodeToString(Node e)
971 {
972 StringBuffer sb = new StringBuffer("");
973 xmlNodeToString(sb, e, true, "\t", 2);
974 return sb.toString();
975 }
976
977 public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
978 {
979 if (e.getNodeType() == Node.TEXT_NODE)
980 {
981 if (e.getNodeValue() != "")
982 {
983 String text = e.getNodeValue();
984 text = text.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("[\\n\\r\\t\\s]*$", "");
985 for (Character c : text.toCharArray())
986 {
987 if (c.equals('\n'))
988 {
989 text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
990 break;
991 }
992
993 if (!Character.isWhitespace(c))
994 {
995 break;
996 }
997 }
998 sb.append(text);
999 }
1000 return;
1001 }
1002
1003 if (e.getNodeType() == Node.COMMENT_NODE)
1004 {
1005 if (e.getNodeValue() != "")
1006 {
1007 sb.append("<!--\n" + e.getNodeValue() + "\n-->\n");
1008 }
1009 return;
1010 }
1011
1012 if (indent)
1013 {
1014 for (int i = 0; i < depth; i++)
1015 {
1016 sb.append(indentString);
1017 }
1018 }
1019
1020 sb.append('<');
1021 sb.append(e.getNodeName());
1022 NamedNodeMap attrs = e.getAttributes();
1023 if (attrs != null)
1024 {
1025 for (int i = 0; i < attrs.getLength(); i++)
1026 {
1027 Node attr = attrs.item(i);
1028 sb.append(' ');
1029 sb.append(attr.getNodeName());
1030 sb.append("=\"");
1031 sb.append(attr.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;"));
1032 sb.append('"');
1033 }
1034 }
1035 NodeList children = e.getChildNodes();
1036
1037 boolean hasElements = false;
1038 boolean indentSwapped = false;
1039 for (int i = 0; i < children.getLength(); i++)
1040 {
1041 if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1042 {
1043 hasElements = true;
1044 }
1045 if (children.item(i).getNodeType() == Node.TEXT_NODE && indent)
1046 {
1047 if (children.item(i).getNodeValue().trim().length() > 0)
1048 {
1049 indentSwapped = true;
1050 indent = false;
1051 }
1052 }
1053 }
1054
1055 if (children == null || children.getLength() == 0)
1056 {
1057 sb.append("/>");
1058
1059 if (indent)
1060 {
1061 sb.append("\n");
1062 }
1063 }
1064 else
1065 {
1066 sb.append(">");
1067 if (hasElements && indent)
1068 {
1069 sb.append("\n");
1070 }
1071
1072 int len = children.getLength();
1073 for (int i = 0; i < len; i++)
1074 {
1075 xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1076 }
1077
1078 if (indent)
1079 {
1080 for (int i = 0; i < depth; i++)
1081 {
1082 sb.append(indentString);
1083 }
1084 }
1085
1086 sb.append("</" + e.getNodeName() + ">");
1087
1088 if ((hasElements && indent) || indentSwapped)
1089 {
1090 sb.append("\n");
1091 }
1092 }
1093 }
1094
1095 public static String xmlNodeToStringWithoutIndenting(Node e)
1096 {
1097 StringBuffer sb = new StringBuffer("");
1098 xmlNodeToStringWithoutNewline(sb, e, -1);
1099 return sb.toString();
1100 }
1101
1102 public static String xmlNodeToStringWithoutNewline(Node e)
1103 {
1104 StringBuffer sb = new StringBuffer("");
1105 xmlNodeToStringWithoutNewline(sb, e, 0);
1106 return sb.toString();
1107 }
1108
1109 private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1110 {
1111
1112 for (int i = 0; i < depth; i++)
1113 {
1114 sb.append(' ');
1115 }
1116
1117 if (e.getNodeType() == Node.TEXT_NODE)
1118 {
1119 if (e.getNodeValue() != "")
1120 {
1121 sb.append(e.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replace(">", "&gt;"));
1122 }
1123 return;
1124 }
1125
1126 if (e.getNodeType() == Node.COMMENT_NODE)
1127 {
1128 if (e.getNodeValue() != "")
1129 {
1130 sb.append("<!--" + e.getNodeValue() + "-->");
1131 }
1132 return;
1133 }
1134
1135 sb.append('<');
1136 sb.append(e.getNodeName());
1137 NamedNodeMap attrs = e.getAttributes();
1138 if (attrs != null)
1139 {
1140 for (int i = 0; i < attrs.getLength(); i++)
1141 {
1142 Node attr = attrs.item(i);
1143 sb.append(' ');
1144 sb.append(attr.getNodeName());
1145 sb.append("=\"");
1146 sb.append(attr.getNodeValue());
1147 sb.append('"');
1148 }
1149 }
1150 NodeList children = e.getChildNodes();
1151
1152 if (children == null || children.getLength() == 0)
1153 sb.append("/>");
1154 else
1155 {
1156
1157 sb.append(">");
1158
1159 int len = children.getLength();
1160 for (int i = 0; i < len; i++)
1161 {
1162 if (depth >= 0)
1163 {
1164 xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1165 }
1166 else
1167 {
1168 xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1169 }
1170 }
1171
1172 for (int i = 0; i < depth; i++)
1173 sb.append(' ');
1174
1175 sb.append("</" + e.getNodeName() + ">");
1176 }
1177 }
1178}
Note: See TracBrowser for help on using the repository browser.