source: main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 25610

Last change on this file since 25610 was 25610, checked in by sjm84, 12 years ago

First phase of making GLI format editor properly indented

  • Property svn:keywords set to Author Date Id Revision
File size: 29.4 KB
Line 
1package org.greenstone.gatherer.util;
2
3import java.io.*;
4import java.net.*;
5import java.util.*;
6import org.apache.xerces.parsers.*;
7import org.apache.xml.serialize.*;
8import org.greenstone.gatherer.DebugStream;
9import org.w3c.dom.*;
10import org.xml.sax.*;
11
12import java.io.FileReader;
13import java.io.IOException;
14import java.io.StringReader;
15
16// SAX
17import org.xml.sax.XMLReader;
18import org.xml.sax.SAXException;
19import org.xml.sax.SAXParseException;
20import org.xml.sax.helpers.DefaultHandler;
21import org.xml.sax.InputSource;
22
23// JAXP
24import javax.xml.parsers.FactoryConfigurationError;
25import javax.xml.parsers.ParserConfigurationException;
26import javax.xml.parsers.SAXParser;
27import javax.xml.parsers.SAXParserFactory;
28
29/** This class is a static class containing useful XML functions */
30public class XMLTools
31{
32 /** extracts the text out of a node */
33 public static Node getNodeTextNode(Element param)
34 {
35 param.normalize();
36 Node n = param.getFirstChild();
37 while (n != null && n.getNodeType() != Node.TEXT_NODE)
38 {
39 n = n.getNextSibling();
40 }
41 return n;
42 }
43
44 /** extracts the text out of a node */
45 public static String getNodeText(Element param)
46 {
47 Node text_node = getNodeTextNode(param);
48 if (text_node == null)
49 {
50 return "";
51 }
52 return text_node.getNodeValue();
53 }
54
55 public static void setNodeText(Element elem, String text)
56 {
57 Node old_text_node = getNodeTextNode(elem);
58 if (old_text_node != null)
59 {
60 elem.removeChild(old_text_node);
61 }
62 Text t = elem.getOwnerDocument().createTextNode(text);
63 elem.appendChild(t);
64 }
65
66 /** returns the (first) child element with the given name */
67 public static Node getChildByTagName(Node n, String name)
68 {
69
70 Node child = n.getFirstChild();
71 while (child != null)
72 {
73 if (child.getNodeName().equals(name))
74 {
75 return child;
76 }
77 child = child.getNextSibling();
78 }
79 return null; //not found
80 }
81
82 /**
83 * returns the (nth) child element with the given name index numbers start
84 * at 0
85 */
86 public static Node getChildByTagNameIndexed(Node n, String name, int index)
87 {
88 if (index == -1)
89 {
90 return getChildByTagName(n, name);
91 }
92 int count = 0;
93 Node child = n.getFirstChild();
94 while (child != null)
95 {
96 if (child.getNodeName().equals(name))
97 {
98 if (count == index)
99 {
100 return child;
101 }
102 else
103 {
104 count++;
105 }
106 }
107 child = child.getNextSibling();
108 }
109 return null; //not found
110 }
111
112 /**
113 * returns the element parent/node_name[@attribute_name='attribute_value']
114 */
115 public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
116 {
117
118 NodeList children = parent.getChildNodes();
119 for (int i = 0; i < children.getLength(); i++)
120 {
121 Node child = children.item(i);
122 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
123 if (child.getNodeName().equals(node_name))
124 {
125 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
126 return (Element) child;
127 }
128 }
129 // not found
130 return null;
131 }
132
133 /**
134 * returns a list of elements
135 * parent/node_name[@attribute_name='attribute_value']
136 */
137 public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
138 {
139 ArrayList elements = new ArrayList();
140 NodeList children = parent.getChildNodes();
141 for (int i = 0; i < children.getLength(); i++)
142 {
143 //System.out.println("getNamedElementList");
144 Node child = children.item(i);
145 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
146 if (child.getNodeName().equals(node_name))
147 {
148 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
149 elements.add((Element) child);
150 }
151 }
152 // not found
153 if (elements.size() == 0)
154 {
155 elements = null;
156 }
157 return elements;
158 }
159
160 public static void copyAllChildren(Element to, Element from)
161 {
162
163 Document to_doc = to.getOwnerDocument();
164 Node child = from.getFirstChild();
165 while (child != null)
166 {
167 to.appendChild(to_doc.importNode(child, true));
168 child = child.getNextSibling();
169 }
170 }
171
172 /** Duplicates an element */
173 public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
174 {
175 return duplicateElementNS(owner, element, null, with_attributes);
176 }
177
178 /** Duplicates an element */
179 public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
180 {
181 Element duplicate;
182 if (namespace_uri == null)
183 {
184 duplicate = owner.createElement(element.getTagName());
185 }
186 else
187 {
188 duplicate = owner.createElementNS(namespace_uri, element.getTagName());
189 }
190 // Copy element attributes
191 if (with_attributes)
192 {
193 NamedNodeMap attributes = element.getAttributes();
194 for (int i = 0; i < attributes.getLength(); i++)
195 {
196 Node attribute = attributes.item(i);
197 duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
198 }
199 }
200
201 // Copy element children
202 NodeList children = element.getChildNodes();
203 for (int i = 0; i < children.getLength(); i++)
204 {
205 Node child = children.item(i);
206 duplicate.appendChild(owner.importNode(child, true));
207 }
208
209 return duplicate;
210 }
211
212 /** Remove all of the child nodes from a certain node. */
213 static final public void clear(Node node)
214 {
215 while (node.hasChildNodes())
216 {
217 node.removeChild(node.getFirstChild());
218 }
219 }
220
221 static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
222 {
223 ArrayList child_elements = new ArrayList();
224
225 NodeList children_nodelist = parent_element.getChildNodes();
226 for (int i = 0; i < children_nodelist.getLength(); i++)
227 {
228 Node child_node = children_nodelist.item(i);
229 if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
230 {
231 child_elements.add(child_node);
232 }
233 }
234
235 return child_elements;
236 }
237
238 static public String getElementTextValue(Element element)
239 {
240 // Find the first text node child
241 NodeList children_nodelist = element.getChildNodes();
242 for (int i = 0; i < children_nodelist.getLength(); i++)
243 {
244 Node child_node = children_nodelist.item(i);
245 if (child_node.getNodeType() == Node.TEXT_NODE)
246 {
247 return child_node.getNodeValue();
248 }
249 }
250
251 // None found
252 return "";
253 }
254
255 /**
256 * Method to retrieve the value of a given node.
257 *
258 * @param element
259 * The <strong>Element</strong> whose value we wish to find. Soon
260 * to be deprecated!
261 */
262 static final public String getValue(Node element)
263 {
264 if (element == null)
265 {
266 return "";
267 }
268 // If we've been given a subject node first retrieve its value node.
269 if (element.getNodeName().equals("Subject"))
270 {
271 element = getNodeFromNamed(element, "Value");
272 }
273 // If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
274 if (element != null && element.hasChildNodes())
275 {
276 StringBuffer text_buffer = new StringBuffer();
277 NodeList text_nodes = element.getChildNodes();
278 for (int i = 0; i < text_nodes.getLength(); i++)
279 {
280 Node possible_text = text_nodes.item(i);
281 if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
282 {
283 text_buffer.append(possible_text.getNodeValue());
284 }
285 }
286 return text_buffer.toString();
287 }
288 return "";
289 }
290
291 /**
292 * Method to retrieve from the node given, a certain child node with the
293 * specified name.
294 *
295 * @param parent
296 * The <strong>Node</strong> whose children should be searched.
297 * @param name
298 * The required nodes name as a <strong>String</strong>.
299 * @return The requested <strong>Node</strong> if it is found, <i>null</i>
300 * otherwise. Soon to be deprecated!
301 */
302 static final public Node getNodeFromNamed(Node parent, String name)
303 {
304 Node child = null;
305 for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
306 {
307 if (i.getNodeName().equals(name))
308 {
309 child = i;
310 }
311 }
312 return child;
313 }
314
315 static final public String WELLFORMED = "well-formed !";
316 static final public String NOTWELLFORMED = "not well-formed";
317 static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
318 static final private String FOOTER = "</collectionConfig>";
319
320 public static String parse(String xml_str)
321 {
322 String validation_msg = WELLFORMED;
323 xml_str = HEADER + xml_str + FOOTER;
324 try
325 {
326 SAXParserFactory factory = SAXParserFactory.newInstance();
327 factory.setNamespaceAware(true);
328 //factory.setValidating (true);
329 SAXParser parser = factory.newSAXParser();
330 InputSource iSource = new InputSource(new StringReader(xml_str));
331 // parser.parse (iSource, new DefaultHandler ());
332
333 org.xml.sax.XMLReader reader = parser.getXMLReader();
334 reader.setContentHandler(new DefaultHandler());
335 reader.setErrorHandler(new DefaultHandler());
336 reader.parse(iSource);
337 }
338 catch (FactoryConfigurationError e)
339 {
340 validation_msg = "unable to get a document builder factory";
341 }
342 catch (ParserConfigurationException e)
343 {
344 validation_msg = "unable to configure parser";
345 }
346 catch (SAXParseException e)
347 {
348 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
349 }
350 catch (SAXException e)
351 {
352 validation_msg += " Fatal error: " + e.toString();
353 }
354 catch (IOException e)
355 {
356 validation_msg = "Unable to read the input, i/o error";
357 }
358
359 return validation_msg;
360 }
361
362 //In this method, the parsed string xml_str is not wrapped by the header and footer strings.
363 public static String parseDOM(String xml_str)
364 {
365 String validation_msg = WELLFORMED;
366
367 try
368 {
369 SAXParserFactory factory = SAXParserFactory.newInstance();
370 factory.setNamespaceAware(true);
371 //factory.setValidating (true);
372 SAXParser parser = factory.newSAXParser();
373 InputSource iSource = new InputSource(new StringReader(xml_str));
374 // parser.parse (iSource, new DefaultHandler ());
375
376 org.xml.sax.XMLReader reader = parser.getXMLReader();
377 reader.setContentHandler(new DefaultHandler());
378 reader.setErrorHandler(new DefaultHandler());
379 reader.parse(iSource);
380 }
381 catch (FactoryConfigurationError e)
382 {
383 validation_msg = "unable to get a document builder factory";
384 }
385 catch (ParserConfigurationException e)
386 {
387 validation_msg = "unable to configure parser";
388 }
389 catch (SAXParseException e)
390 {
391 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
392 }
393 catch (SAXException e)
394 {
395 validation_msg += " " + e.toString();
396 }
397 catch (IOException e)
398 {
399 validation_msg = "Unable to read the input, i/o error";
400 }
401
402 return validation_msg;
403 }
404
405 public static String parse(File xml_file)
406 {
407 String validation_msg = WELLFORMED;
408
409 try
410 {
411 SAXParserFactory factory = SAXParserFactory.newInstance();
412 factory.setNamespaceAware(true);
413 //factory.setValidating (true);
414 SAXParser parser = factory.newSAXParser();
415 FileReader r = new FileReader(xml_file);
416 InputSource iSource = new InputSource(r);
417 XMLReader reader = parser.getXMLReader();
418 reader.setContentHandler(new DefaultHandler());
419 reader.setErrorHandler(new DefaultHandler());
420 reader.parse(iSource);
421 }
422 catch (FactoryConfigurationError e)
423 {
424 validation_msg = "unable to get a document builder factory";
425 }
426 catch (ParserConfigurationException e)
427 {
428 validation_msg = "unable to configure parser";
429 }
430 catch (SAXParseException e)
431 {
432 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
433 }
434 catch (SAXException e)
435 {
436 validation_msg += " Fatal error: " + e.toString();
437 }
438 catch (IOException e)
439 {
440 validation_msg = "Unable to read the input, i/o error";
441 }
442
443 return validation_msg;
444 }
445
446 /** Returns a string of the location. */
447 private static String getLocationString(SAXParseException ex)
448 {
449 StringBuffer str = new StringBuffer();
450
451 String systemId = ex.getSystemId();
452 if (systemId != null)
453 {
454 int index = systemId.lastIndexOf('/');
455 if (index != -1)
456 systemId = systemId.substring(index + 1);
457 str.append(systemId);
458 }
459 str.append("(line ");
460 str.append(ex.getLineNumber() - 1);
461 str.append(", column ");
462 str.append(ex.getColumnNumber());
463 str.append("): ");
464
465 return str.toString();
466
467 } // getLocationString(SAXParseException):String
468
469 /** Parse an XML document from a given file path */
470 static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
471 {
472 if (use_class_loader == true)
473 {
474 InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
475 if (is != null)
476 {
477 return parseXML(is);
478 }
479 }
480
481 // Try the file outside the classes directory
482 return parseXMLFile(new File(xml_file_path));
483 }
484
485 /** Parse an XML document from a given file */
486 static public Document parseXMLFile(File xml_file)
487 {
488 // No file? No point trying!
489 if (xml_file.exists() == false)
490 {
491 return null;
492 }
493
494 try
495 {
496 return parseXML(new FileInputStream(xml_file));
497 }
498 catch (Exception exception)
499 {
500 DebugStream.printStackTrace(exception);
501 return null;
502 }
503 }
504
505 /** Parse an XML document from a given input stream */
506 static public Document parseXML(InputStream xml_input_stream)
507 {
508 Document document = null;
509
510 try
511 {
512 InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
513 document = parseXML(isr);
514 isr.close();
515 xml_input_stream.close();
516 }
517 catch (Exception exception)
518 {
519 DebugStream.printStackTrace(exception);
520 }
521
522 return document;
523 }
524
525 /** Parse an XML document from a given reader */
526 static public Document parseXML(Reader xml_reader)
527 {
528 Document document = null;
529
530 // If debugging, the following will store the XML contents to be parsed,
531 // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
532 String xmlContents = "";
533
534 try
535 {
536 Reader reader = null;
537
538 // (1) By default, GLI will remove any contents preceeding (and invalidating)
539 // the XML and present these lines separately to the user
540 if (!DebugStream.isDebuggingEnabled())
541 {
542 try
543 {
544 reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
545 }
546 catch (Exception e)
547 {
548 System.err.println("Exception while wrapping the reader in parseXML(Reader)");
549 e.printStackTrace();
550 }
551 }
552
553 // (2) If we are running GLI in debug mode:
554 // In case parsing exceptions are thrown (SAX Exceptions), we want to get some
555 // idea of where things went wrong. This will print the "XML" contents to either
556 // system.out (if debugging is off) or to the DebugStream otherwise.
557 // We need to read the XML twice to know the line where things went wrong, so
558 // do the additional reading only if we're debugging
559 else
560 {
561 StringBuffer buf = new StringBuffer();
562 char[] buffer = new char[500];
563 int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
564 while (numCharsRead != -1)
565 {
566 buf.append(buffer, 0, numCharsRead);
567 numCharsRead = xml_reader.read(buffer, 0, buffer.length);
568 }
569 xmlContents = buf.toString();
570 xml_reader.close(); // closing the old Reader
571 xml_reader = null;
572 buffer = null;
573 buf = null;
574 // we need a Reader to parse the same contents as the Reader that was just closed
575 reader = new BufferedReader(new StringReader(xmlContents));
576 //System.err.println("xmlContents:\n" + xmlContents);
577 }
578
579 // (2) The actual XML parsing
580 InputSource isc = new InputSource(reader);
581 DOMParser parser = new DOMParser();
582 parser.setFeature("http://xml.org/sax/features/validation", false);
583 parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
584 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
585 parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
586 parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
587 parser.parse(isc);
588 document = parser.getDocument();
589
590 }
591 catch (SAXParseException e)
592 {
593 showXMLParseFailureLine(e, xmlContents);
594 }
595 catch (SAXException exception)
596 {
597 System.err.println("SAX exception: " + exception.getMessage());
598 if (DebugStream.isDebuggingEnabled())
599 {
600 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
601 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
602 DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
603 System.exit(-1);
604 }
605 // else, not running in debug mode, so don't exit after exception
606 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
607 DebugStream.printStackTrace(exception);
608 }
609 catch (Exception exception)
610 {
611 DebugStream.printStackTrace(exception);
612 }
613
614 return document;
615 }
616
617 /**
618 * Displays the line (string) where the SAXParseException occurred, given a
619 * String of the entire xml that was being parsed and the SAXParseException
620 * object that was caught. The messages are printed to DebugStream, so run
621 * GLI/FLI with -debug to view this output.
622 *
623 * @param xmlContents
624 * is the entire xml that was being parsed when the exception
625 * occurred
626 * @param e
627 * is the SAXParseException object that was thrown upon parsing
628 * the xmlContents.
629 */
630 public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
631 {
632
633 // There should be no characters at all that preceed the <?xml>... bit.
634 // The first check is for starting spaces:
635 if (xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t"))
636 {
637 DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
638 DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
639 return; // nothing more to do, first error identified
640 }
641
642 // the actual line (String literal) where parsing failed and the SAXParseException occurred.
643 String line = "";
644 int linenumber = e.getLineNumber();
645 DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
646 if (DebugStream.isDebuggingEnabled())
647 {
648 if (linenumber != -1)
649 {
650 String[] lines = xmlContents.split("\n");
651 if (lines.length > 0)
652 {
653 DebugStream.println(" (number of lines: " + lines.length + ")");
654 if (lines.length >= linenumber)
655 {
656 line = lines[linenumber - 1];
657 }
658 else
659 { // error is past the last line
660 line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
661 }
662 }
663 else
664 {
665 DebugStream.print("\n");
666 }
667 lines = null;
668
669 DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
670 DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
671
672 // Uncomment if you want to print out the entire contents of the XML doc:
673 //DebugStream.println("\n\nThis was the XML:\n*********START\n"
674 // + xmlContents + "\n************END\n");
675 }
676 else
677 { // no particular line number, print out all the xml so debugger can inspect it
678 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
679 }
680 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
681 DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
682 System.exit(-1);
683 }
684 else
685 { // not running in debug mode
686 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
687 }
688 }
689
690 static public StringBuffer readXMLStream(InputStream input_stream)
691 {
692 StringBuffer xml = new StringBuffer("");
693
694 try
695 {
696 InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
697 BufferedReader buffered_in = new BufferedReader(isr);
698
699 String line = "";
700 boolean xml_content = false;
701 while ((line = buffered_in.readLine()) != null)
702 {
703 if (xml_content)
704 {
705 xml.append(line);
706 xml.append("\n");
707 }
708 else if (line.trim().startsWith("<?xml"))
709 {
710 xml_content = true;
711 xml.append(line);
712 xml.append("\n");
713 }
714 else
715 {
716 System.err.println(line);
717 }
718 }
719 buffered_in = null;
720 }
721 catch (Exception error)
722 {
723 System.err.println("Failed when trying to parse XML stream");
724 error.printStackTrace();
725 }
726
727 return xml;
728 }
729
730 /**
731 * Removes characters that are invalid in XML (see
732 * http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
733 */
734 static public String removeInvalidCharacters(String text)
735 {
736 char[] safe_characters = new char[text.length()];
737 int j = 0;
738
739 char[] raw_characters = new char[text.length()];
740 text.getChars(0, text.length(), raw_characters, 0);
741 for (int i = 0; i < raw_characters.length; i++)
742 {
743 char character = raw_characters[i];
744 if ((character >= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF))
745 {
746 safe_characters[j] = character;
747 j++;
748 }
749 }
750
751 return new String(safe_characters, 0, j);
752 }
753
754 static public void setElementTextValue(Element element, String text)
755 {
756 // Remove all text node children
757 NodeList children_nodelist = element.getChildNodes();
758 for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
759 {
760 Node child_node = children_nodelist.item(i);
761 if (child_node.getNodeType() == Node.TEXT_NODE)
762 {
763 element.removeChild(child_node);
764 }
765 }
766
767 // Add a new text node
768 if (text != null)
769 {
770 element.appendChild(element.getOwnerDocument().createTextNode(text));
771 }
772 }
773
774 /**
775 * Set the #text node value of some element.
776 *
777 * @param element
778 * the Element whose value we wish to set
779 * @param value
780 * the new value for the element as a String Soon to be
781 * deprecated!
782 */
783 static final public void setValue(Element element, String value)
784 {
785 // Remove any existing child node(s)
786 clear(element);
787 // Add new text node.
788 if (value != null)
789 {
790 element.appendChild(element.getOwnerDocument().createTextNode(value));
791 }
792 }
793
794 static public void indentXML(Element elem, int depth)
795 {
796 Document doc = elem.getOwnerDocument();
797
798 String startIndentString = "\n";
799 for (int i = 0; i < depth; i++)
800 {
801 startIndentString += "\t";
802 }
803 Node startTextNode = doc.createTextNode(startIndentString);
804
805 String endIndentString = "\n";
806 for (int i = 0; i < depth - 1; i++)
807 {
808 endIndentString += "\t";
809 }
810 Node endTextNode = doc.createTextNode(endIndentString);
811
812 boolean found = false;
813 Node child = elem.getFirstChild();
814 while (child != null)
815 {
816 if (child.getNodeType() == Node.ELEMENT_NODE)
817 {
818 found = true;
819 break;
820 }
821 child = child.getNextSibling();
822 }
823
824 if (found)
825 {
826 elem.appendChild(endTextNode);
827 }
828
829 child = elem.getFirstChild();
830 while (child != null)
831 {
832 if (child.getNodeType() == Node.ELEMENT_NODE)
833 {
834 elem.insertBefore(startTextNode.cloneNode(false), child);
835 indentXML((Element) child, depth + 1);
836 }
837 child = child.getNextSibling();
838 }
839 }
840
841 /**
842 * Write an XML document to a given file with the text node of the specified
843 * element unescaped
844 */
845 static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
846 {
847 indentXML(document.getDocumentElement(), 1);
848 try
849 {
850 OutputStream os = new FileOutputStream(xml_file);
851 // Create an output format for our document.
852 OutputFormat f = new OutputFormat(document);
853 f.setEncoding("UTF-8");
854 f.setIndenting(true);
855 f.setLineWidth(0); // Why isn't this working!
856 f.setPreserveSpace(true);
857 if (nonEscapingTagNames != null)
858 {
859 f.setNonEscapingElements(nonEscapingTagNames);
860 }
861 // Create the necessary writer stream for serialization.
862 OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
863 Writer w = new BufferedWriter(osw);
864 // Generate a new serializer from the above.
865 XMLSerializer s = new XMLSerializer(w, f);
866 s.asDOMSerializer();
867 // Finally serialize the document to file.
868 s.serialize(document);
869 // And close.
870 os.close();
871 }
872 catch (Exception exception)
873 {
874 DebugStream.printStackTrace(exception);
875 }
876 }
877
878 /** Write an XML document to a given file */
879 static public void writeXMLFile(File xml_file, Document document)
880 {
881 writeXMLFile(xml_file, document, null);
882 }
883
884 public static void printXMLNode(Node e)
885 {
886 printXMLNode(e, 0);
887 }
888
889 public static void printXMLNode(Node e, int depth)
890 { //recursive method call using DOM API...
891
892 for (int i = 0; i < depth; i++)
893 System.out.print(' ');
894
895 if (e.getNodeType() == Node.TEXT_NODE)
896 {
897 //System.out.println("text") ;
898 if (e.getNodeValue() != "")
899 {
900 System.out.println(e.getNodeValue());
901 }
902 return;
903 }
904
905 System.out.print('<');
906 System.out.print(e.getNodeName());
907 NamedNodeMap attrs = e.getAttributes();
908 if (attrs != null)
909 {
910 for (int i = 0; i < attrs.getLength(); i++)
911 {
912 Node attr = attrs.item(i);
913 System.out.print(' ');
914 System.out.print(attr.getNodeName());
915 System.out.print("=\"");
916 System.out.print(attr.getNodeValue());
917 System.out.print('"');
918 }
919 }
920 NodeList children = e.getChildNodes();
921
922 if (children == null || children.getLength() == 0)
923 System.out.println("/>");
924 else
925 {
926
927 System.out.println('>');
928
929 int len = children.getLength();
930 for (int i = 0; i < len; i++)
931 {
932 printXMLNode(children.item(i), depth + 1);
933 }
934
935 for (int i = 0; i < depth; i++)
936 System.out.print(' ');
937
938 System.out.println("</" + e.getNodeName() + ">");
939 }
940
941 }
942
943 public static String xmlNodeToString(Node e)
944 {
945 StringBuffer sb = new StringBuffer("");
946 xmlNodeToString(sb, e, true, "\t", 2);
947 return sb.toString();
948 }
949
950 public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
951 {
952 if (e.getNodeType() == Node.TEXT_NODE)
953 {
954 if (e.getNodeValue() != "")
955 {
956 String text = e.getNodeValue();
957 text = text.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("^[\\n\\r\\t\\s]*", "");
958 sb.append(text);
959 }
960 return;
961 }
962
963 if (e.getNodeType() == Node.COMMENT_NODE)
964 {
965 if (e.getNodeValue() != "")
966 {
967 sb.append("<!--" + e.getNodeValue() + "-->");
968 }
969 return;
970 }
971
972 if (indent)
973 {
974 for (int i = 0; i < depth; i++)
975 {
976 sb.append(indentString);
977 }
978 }
979
980 sb.append('<');
981 sb.append(e.getNodeName());
982 NamedNodeMap attrs = e.getAttributes();
983 if (attrs != null)
984 {
985 for (int i = 0; i < attrs.getLength(); i++)
986 {
987 Node attr = attrs.item(i);
988 sb.append(' ');
989 sb.append(attr.getNodeName());
990 sb.append("=\"");
991 sb.append(attr.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;"));
992 sb.append('"');
993 }
994 }
995 NodeList children = e.getChildNodes();
996
997 boolean hasElements = false;
998 boolean indentSwapped = false;
999 for (int i = 0; i < children.getLength(); i++)
1000 {
1001 if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1002 {
1003 hasElements = true;
1004 }
1005 if (children.item(i).getNodeType() == Node.TEXT_NODE && indent)
1006 {
1007 if (children.item(i).getNodeValue().matches("[^\\s]*"))
1008 {
1009 indentSwapped = true;
1010 indent = false;
1011 }
1012 }
1013 }
1014
1015 if (children == null || children.getLength() == 0)
1016 {
1017 sb.append("/>");
1018
1019 if (indent)
1020 {
1021 sb.append("\n");
1022 }
1023 }
1024 else
1025 {
1026 sb.append(">");
1027 if (hasElements && indent)
1028 {
1029 sb.append("\n");
1030 }
1031
1032 int len = children.getLength();
1033 for (int i = 0; i < len; i++)
1034 {
1035 xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1036 }
1037
1038 if (indent)
1039 {
1040 for (int i = 0; i < depth; i++)
1041 {
1042 sb.append(indentString);
1043 }
1044 }
1045
1046 sb.append("</" + e.getNodeName() + ">");
1047
1048 if ((hasElements && indent) || indentSwapped)
1049 {
1050 sb.append("\n");
1051 }
1052 }
1053 }
1054
1055 public static String xmlNodeToStringWithoutIndenting(Node e)
1056 {
1057 StringBuffer sb = new StringBuffer("");
1058 xmlNodeToStringWithoutNewline(sb, e, -1);
1059 return sb.toString();
1060 }
1061
1062 public static String xmlNodeToStringWithoutNewline(Node e)
1063 {
1064 StringBuffer sb = new StringBuffer("");
1065 xmlNodeToStringWithoutNewline(sb, e, 0);
1066 return sb.toString();
1067 }
1068
1069 private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1070 {
1071
1072 for (int i = 0; i < depth; i++)
1073 {
1074 sb.append(' ');
1075 }
1076
1077 if (e.getNodeType() == Node.TEXT_NODE)
1078 {
1079 if (e.getNodeValue() != "")
1080 {
1081 sb.append(e.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replace(">", "&gt;"));
1082 }
1083 return;
1084 }
1085
1086 if (e.getNodeType() == Node.COMMENT_NODE)
1087 {
1088 if (e.getNodeValue() != "")
1089 {
1090 sb.append("<!--" + e.getNodeValue() + "-->");
1091 }
1092 return;
1093 }
1094
1095 sb.append('<');
1096 sb.append(e.getNodeName());
1097 NamedNodeMap attrs = e.getAttributes();
1098 if (attrs != null)
1099 {
1100 for (int i = 0; i < attrs.getLength(); i++)
1101 {
1102 Node attr = attrs.item(i);
1103 sb.append(' ');
1104 sb.append(attr.getNodeName());
1105 sb.append("=\"");
1106 sb.append(attr.getNodeValue());
1107 sb.append('"');
1108 }
1109 }
1110 NodeList children = e.getChildNodes();
1111
1112 if (children == null || children.getLength() == 0)
1113 sb.append("/>");
1114 else
1115 {
1116
1117 sb.append(">");
1118
1119 int len = children.getLength();
1120 for (int i = 0; i < len; i++)
1121 {
1122 if (depth >= 0)
1123 {
1124 xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1125 }
1126 else
1127 {
1128 xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1129 }
1130 }
1131
1132 for (int i = 0; i < depth; i++)
1133 sb.append(' ');
1134
1135 sb.append("</" + e.getNodeName() + ">");
1136 }
1137 }
1138}
Note: See TracBrowser for help on using the repository browser.