source: main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 29001

Last change on this file since 29001 was 29001, checked in by ak19, 10 years ago

Correction to recent commit in OpenCollectionDialog. And cosmetic changes to FormatConversionDialog and XMLTools

  • Property svn:keywords set to Author Date Id Revision
File size: 32.0 KB
Line 
1package org.greenstone.gatherer.util;
2
3import java.io.*;
4import java.net.*;
5import java.util.*;
6import org.apache.xerces.parsers.*;
7import org.apache.xml.serialize.*;
8import org.greenstone.gatherer.DebugStream;
9import org.w3c.dom.*;
10import org.xml.sax.*;
11
12import java.io.FileReader;
13import java.io.IOException;
14import java.io.StringReader;
15
16// SAX
17import org.xml.sax.XMLReader;
18import org.xml.sax.SAXException;
19import org.xml.sax.SAXParseException;
20import org.xml.sax.helpers.DefaultHandler;
21import org.xml.sax.InputSource;
22
23// JAXP
24import javax.xml.parsers.DocumentBuilder;
25import javax.xml.parsers.DocumentBuilderFactory;
26import javax.xml.parsers.FactoryConfigurationError;
27import javax.xml.parsers.ParserConfigurationException;
28import javax.xml.parsers.SAXParser;
29import javax.xml.parsers.SAXParserFactory;
30
31
32/** This class is a static class containing useful XML functions */
33public class XMLTools
34{
35 /** extracts the text out of a node */
36 public static Node getNodeTextNode(Element param)
37 {
38 param.normalize();
39 Node n = param.getFirstChild();
40 while (n != null && n.getNodeType() != Node.TEXT_NODE)
41 {
42 n = n.getNextSibling();
43 }
44 return n;
45 }
46
47 /** extracts the text out of a node */
48 public static String getNodeText(Element param)
49 {
50 Node text_node = getNodeTextNode(param);
51 if (text_node == null)
52 {
53 return "";
54 }
55 return text_node.getNodeValue();
56 }
57
58 public static void setNodeText(Element elem, String text)
59 {
60 Node old_text_node = getNodeTextNode(elem);
61 if (old_text_node != null)
62 {
63 elem.removeChild(old_text_node);
64 }
65 Text t = elem.getOwnerDocument().createTextNode(text);
66 elem.appendChild(t);
67 }
68
69 /** returns the (first) child element with the given name */
70 public static Node getChildByTagName(Node n, String name)
71 {
72
73 Node child = n.getFirstChild();
74 while (child != null)
75 {
76 if (child.getNodeName().equals(name))
77 {
78 return child;
79 }
80 child = child.getNextSibling();
81 }
82 return null; //not found
83 }
84
85 /**
86 * returns the (nth) child element with the given name index numbers start
87 * at 0
88 */
89 public static Node getChildByTagNameIndexed(Node n, String name, int index)
90 {
91 if (index == -1)
92 {
93 return getChildByTagName(n, name);
94 }
95 int count = 0;
96 Node child = n.getFirstChild();
97 while (child != null)
98 {
99 if (child.getNodeName().equals(name))
100 {
101 if (count == index)
102 {
103 return child;
104 }
105 else
106 {
107 count++;
108 }
109 }
110 child = child.getNextSibling();
111 }
112 return null; //not found
113 }
114
115 /**
116 * returns the element parent/node_name[@attribute_name='attribute_value']
117 */
118 public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
119 {
120
121 NodeList children = parent.getChildNodes();
122 for (int i = 0; i < children.getLength(); i++)
123 {
124 Node child = children.item(i);
125 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
126 if (child.getNodeName().equals(node_name))
127 {
128 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
129 return (Element) child;
130 }
131 }
132 // not found
133 return null;
134 }
135
136 /**
137 * returns a list of elements
138 * parent/node_name[@attribute_name='attribute_value']
139 */
140 public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
141 {
142 ArrayList elements = new ArrayList();
143 NodeList children = parent.getChildNodes();
144 for (int i = 0; i < children.getLength(); i++)
145 {
146 //System.out.println("getNamedElementList");
147 Node child = children.item(i);
148 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
149 if (child.getNodeName().equals(node_name))
150 {
151 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
152 elements.add((Element) child);
153 }
154 }
155 // not found
156 if (elements.size() == 0)
157 {
158 elements = null;
159 }
160 return elements;
161 }
162
163 public static void copyAllChildren(Element to, Element from)
164 {
165
166 Document to_doc = to.getOwnerDocument();
167 Node child = from.getFirstChild();
168 while (child != null)
169 {
170 to.appendChild(to_doc.importNode(child, true));
171 child = child.getNextSibling();
172 }
173 }
174
175 /** duplicates all elements in list elements and appends to toElement */
176 public static void duplicateElementList(Document owner, Element toElement, NodeList elements, boolean with_attributes) {
177 int num_elems = elements.getLength();
178 if (num_elems < 1)
179 {
180 return;
181 }
182 for (int i = 0; i < num_elems; i++)
183 {
184 Element to_element = XMLTools.duplicateElement(owner, (Element) elements.item(i), with_attributes);
185 toElement.appendChild(to_element);
186 }
187
188 }
189 /** Duplicates an element */
190 public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
191 {
192 return duplicateElementNS(owner, element, null, with_attributes);
193 }
194
195 /** Duplicates an element */
196 public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
197 {
198 Element duplicate;
199 if (namespace_uri == null)
200 {
201 duplicate = owner.createElement(element.getTagName());
202 }
203 else
204 {
205 duplicate = owner.createElementNS(namespace_uri, element.getTagName());
206 }
207 // Copy element attributes
208 if (with_attributes)
209 {
210 NamedNodeMap attributes = element.getAttributes();
211 for (int i = 0; i < attributes.getLength(); i++)
212 {
213 Node attribute = attributes.item(i);
214 duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
215 }
216 }
217
218 // Copy element children
219 NodeList children = element.getChildNodes();
220 for (int i = 0; i < children.getLength(); i++)
221 {
222 Node child = children.item(i);
223 duplicate.appendChild(owner.importNode(child, true));
224 }
225
226 return duplicate;
227 }
228
229 /** Remove all of the child nodes from a certain node. */
230 static final public void clear(Node node)
231 {
232 while (node.hasChildNodes())
233 {
234 node.removeChild(node.getFirstChild());
235 }
236 }
237
238 static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
239 {
240 ArrayList child_elements = new ArrayList();
241
242 NodeList children_nodelist = parent_element.getChildNodes();
243 for (int i = 0; i < children_nodelist.getLength(); i++)
244 {
245 Node child_node = children_nodelist.item(i);
246 if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
247 {
248 child_elements.add(child_node);
249 }
250 }
251
252 return child_elements;
253 }
254
255 static public String getElementTextValue(Element element)
256 {
257 // Find the first text node child
258 NodeList children_nodelist = element.getChildNodes();
259 for (int i = 0; i < children_nodelist.getLength(); i++)
260 {
261 Node child_node = children_nodelist.item(i);
262 if (child_node.getNodeType() == Node.TEXT_NODE)
263 {
264 return child_node.getNodeValue();
265 }
266 }
267
268 // None found
269 return "";
270 }
271
272 /**
273 * Method to retrieve the value of a given node.
274 *
275 * @param element
276 * The <strong>Element</strong> whose value we wish to find. Soon
277 * to be deprecated!
278 */
279 static final public String getValue(Node element)
280 {
281 if (element == null)
282 {
283 return "";
284 }
285 // If we've been given a subject node first retrieve its value node.
286 if (element.getNodeName().equals("Subject"))
287 {
288 element = getNodeFromNamed(element, "Value");
289 }
290 // If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
291 if (element != null && element.hasChildNodes())
292 {
293 StringBuffer text_buffer = new StringBuffer();
294 NodeList text_nodes = element.getChildNodes();
295 for (int i = 0; i < text_nodes.getLength(); i++)
296 {
297 Node possible_text = text_nodes.item(i);
298 if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
299 {
300 text_buffer.append(possible_text.getNodeValue());
301 }
302 }
303 return text_buffer.toString();
304 }
305 return "";
306 }
307
308 /**
309 * Method to retrieve from the node given, a certain child node with the
310 * specified name.
311 *
312 * @param parent
313 * The <strong>Node</strong> whose children should be searched.
314 * @param name
315 * The required nodes name as a <strong>String</strong>.
316 * @return The requested <strong>Node</strong> if it is found, <i>null</i>
317 * otherwise. Soon to be deprecated!
318 */
319 static final public Node getNodeFromNamed(Node parent, String name)
320 {
321 Node child = null;
322 for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
323 {
324 if (i.getNodeName().equals(name))
325 {
326 child = i;
327 }
328 }
329 return child;
330 }
331
332 static final public String WELLFORMED = "well-formed !";
333 static final public String NOTWELLFORMED = "not well-formed";
334 static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
335 static final private String FOOTER = "</collectionConfig>";
336
337
338 public static Document getDOM(String xml_str)
339 {
340 Document doc = null;
341 try {
342
343 DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
344 InputSource is = new InputSource();
345 is.setCharacterStream(new StringReader(xml_str));
346 doc = db.parse(is);
347
348 } catch (Exception e) {
349 e.printStackTrace();
350 }
351 return doc;
352 }
353
354 public static String parse(String xml_str)
355 {
356 String validation_msg = WELLFORMED;
357 xml_str = HEADER + xml_str + FOOTER;
358 try
359 {
360 SAXParserFactory factory = SAXParserFactory.newInstance();
361 factory.setNamespaceAware(true);
362 //factory.setValidating (true);
363 SAXParser parser = factory.newSAXParser();
364 InputSource iSource = new InputSource(new StringReader(xml_str));
365 // parser.parse (iSource, new DefaultHandler ());
366
367 org.xml.sax.XMLReader reader = parser.getXMLReader();
368 reader.setContentHandler(new DefaultHandler());
369 reader.setErrorHandler(new DefaultHandler());
370 reader.parse(iSource);
371 }
372 catch (FactoryConfigurationError e)
373 {
374 validation_msg = "unable to get a document builder factory";
375 }
376 catch (ParserConfigurationException e)
377 {
378 validation_msg = "unable to configure parser";
379 }
380 catch (SAXParseException e)
381 {
382 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
383 }
384 catch (SAXException e)
385 {
386 validation_msg += " Fatal error: " + e.toString();
387 }
388 catch (IOException e)
389 {
390 validation_msg = "Unable to read the input, i/o error";
391 }
392
393 return validation_msg;
394 }
395
396 //In this method, the parsed string xml_str is not wrapped by the header and footer strings.
397 public static String parseDOM(String xml_str)
398 {
399 String validation_msg = WELLFORMED;
400
401 try
402 {
403 SAXParserFactory factory = SAXParserFactory.newInstance();
404 factory.setNamespaceAware(true);
405 //factory.setValidating (true);
406 SAXParser parser = factory.newSAXParser();
407 InputSource iSource = new InputSource(new StringReader(xml_str));
408 // parser.parse (iSource, new DefaultHandler ());
409
410 org.xml.sax.XMLReader reader = parser.getXMLReader();
411 reader.setContentHandler(new DefaultHandler());
412 reader.setErrorHandler(new DefaultHandler());
413 reader.parse(iSource);
414 }
415 catch (FactoryConfigurationError e)
416 {
417 validation_msg = "unable to get a document builder factory";
418 }
419 catch (ParserConfigurationException e)
420 {
421 validation_msg = "unable to configure parser";
422 }
423 catch (SAXParseException e)
424 {
425 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
426 }
427 catch (SAXException e)
428 {
429 validation_msg += " " + e.toString();
430 }
431 catch (IOException e)
432 {
433 validation_msg = "Unable to read the input, i/o error";
434 }
435
436 return validation_msg;
437 }
438
439 public static String parse(File xml_file)
440 {
441 String validation_msg = WELLFORMED;
442
443 try
444 {
445 SAXParserFactory factory = SAXParserFactory.newInstance();
446 factory.setNamespaceAware(true);
447 //factory.setValidating (true);
448 SAXParser parser = factory.newSAXParser();
449 FileReader r = new FileReader(xml_file);
450 InputSource iSource = new InputSource(r);
451 XMLReader reader = parser.getXMLReader();
452 reader.setContentHandler(new DefaultHandler());
453 reader.setErrorHandler(new DefaultHandler());
454 reader.parse(iSource);
455 }
456 catch (FactoryConfigurationError e)
457 {
458 validation_msg = "unable to get a document builder factory";
459 }
460 catch (ParserConfigurationException e)
461 {
462 validation_msg = "unable to configure parser";
463 }
464 catch (SAXParseException e)
465 {
466 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
467 }
468 catch (SAXException e)
469 {
470 validation_msg += " Fatal error: " + e.toString();
471 }
472 catch (IOException e)
473 {
474 validation_msg = "Unable to read the input, i/o error";
475 }
476
477 return validation_msg;
478 }
479
480 /** Returns a string of the location. */
481 private static String getLocationString(SAXParseException ex)
482 {
483 StringBuffer str = new StringBuffer();
484
485 String systemId = ex.getSystemId();
486 if (systemId != null)
487 {
488 int index = systemId.lastIndexOf('/');
489 if (index != -1)
490 systemId = systemId.substring(index + 1);
491 str.append(systemId);
492 }
493 str.append("(line ");
494 str.append(ex.getLineNumber() - 1);
495 str.append(", column ");
496 str.append(ex.getColumnNumber());
497 str.append("): ");
498
499 return str.toString();
500
501 } // getLocationString(SAXParseException):String
502
503 /** Parse an XML document from a given file path */
504 static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
505 {
506 if (use_class_loader == true)
507 {
508 InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
509 if (is != null)
510 {
511 return parseXML(is);
512 }
513 }
514
515 // Try the file outside the classes directory
516 return parseXMLFile(new File(xml_file_path));
517 }
518
519 /** Parse an XML document from a given file */
520 static public Document parseXMLFile(File xml_file)
521 {
522 // No file? No point trying!
523 if (xml_file.exists() == false)
524 {
525 return null;
526 }
527
528 try
529 {
530 return parseXML(new FileInputStream(xml_file));
531 }
532 catch (Exception exception)
533 {
534 DebugStream.printStackTrace(exception);
535 return null;
536 }
537 }
538
539 /** Parse an XML document from a given input stream */
540 static public Document parseXML(InputStream xml_input_stream)
541 {
542 Document document = null;
543
544 try
545 {
546 InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
547 document = parseXML(isr);
548 isr.close();
549 xml_input_stream.close();
550 }
551 catch (Exception exception)
552 {
553 DebugStream.printStackTrace(exception);
554 }
555
556 return document;
557 }
558
559 /** Parse an XML document from a given reader */
560 static public Document parseXML(Reader xml_reader)
561 {
562 Document document = null;
563
564 // If debugging, the following will store the XML contents to be parsed,
565 // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
566 String xmlContents = "";
567
568 try
569 {
570 Reader reader = null;
571
572 // (1) By default, GLI will remove any contents preceeding (and invalidating)
573 // the XML and present these lines separately to the user
574 if (!DebugStream.isDebuggingEnabled())
575 {
576 try
577 {
578 reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
579 }
580 catch (Exception e)
581 {
582 System.err.println("Exception while wrapping the reader in parseXML(Reader)");
583 e.printStackTrace();
584 }
585 }
586
587 // (2) If we are running GLI in debug mode:
588 // In case parsing exceptions are thrown (SAX Exceptions), we want to get some
589 // idea of where things went wrong. This will print the "XML" contents to either
590 // system.out (if debugging is off) or to the DebugStream otherwise.
591 // We need to read the XML twice to know the line where things went wrong, so
592 // do the additional reading only if we're debugging
593 else
594 {
595 StringBuffer buf = new StringBuffer();
596 char[] buffer = new char[500];
597 int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
598 while (numCharsRead != -1)
599 {
600 buf.append(buffer, 0, numCharsRead);
601 numCharsRead = xml_reader.read(buffer, 0, buffer.length);
602 }
603 xmlContents = buf.toString();
604 xml_reader.close(); // closing the old Reader
605 xml_reader = null;
606 buffer = null;
607 buf = null;
608 // we need a Reader to parse the same contents as the Reader that was just closed
609 reader = new BufferedReader(new StringReader(xmlContents));
610 //System.err.println("xmlContents:\n" + xmlContents);
611 }
612
613 // (2) The actual XML parsing
614 InputSource isc = new InputSource(reader);
615 DOMParser parser = new DOMParser();
616 parser.setFeature("http://xml.org/sax/features/validation", false);
617 parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
618 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
619 parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
620 parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
621 parser.parse(isc);
622 document = parser.getDocument();
623
624 }
625 catch (SAXParseException e)
626 {
627 showXMLParseFailureLine(e, xmlContents);
628 }
629 catch (SAXException exception)
630 {
631 System.err.println("SAX exception: " + exception.getMessage());
632 if (DebugStream.isDebuggingEnabled())
633 {
634 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
635 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
636 DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
637 System.exit(-1);
638 }
639 // else, not running in debug mode, so don't exit after exception
640 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
641 DebugStream.printStackTrace(exception);
642 }
643 catch (Exception exception)
644 {
645 DebugStream.printStackTrace(exception);
646 }
647
648 return document;
649 }
650
651 /**
652 * Displays the line (string) where the SAXParseException occurred, given a
653 * String of the entire xml that was being parsed and the SAXParseException
654 * object that was caught. The messages are printed to DebugStream, so run
655 * GLI/FLI with -debug to view this output.
656 *
657 * @param xmlContents
658 * is the entire xml that was being parsed when the exception
659 * occurred
660 * @param e
661 * is the SAXParseException object that was thrown upon parsing
662 * the xmlContents.
663 */
664 public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
665 {
666
667 // There should be no characters at all that preceed the <?xml>... bit.
668 // The first check is for starting spaces:
669 if (xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t"))
670 {
671 DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
672 DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
673 return; // nothing more to do, first error identified
674 }
675
676 // the actual line (String literal) where parsing failed and the SAXParseException occurred.
677 String line = "";
678 int linenumber = e.getLineNumber();
679 DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
680 if (DebugStream.isDebuggingEnabled())
681 {
682 if (linenumber != -1)
683 {
684 String[] lines = xmlContents.split("\n");
685 if (lines.length > 0)
686 {
687 DebugStream.println(" (number of lines: " + lines.length + ")");
688 if (lines.length >= linenumber)
689 {
690 line = lines[linenumber - 1];
691 }
692 else
693 { // error is past the last line
694 line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
695 }
696 }
697 else
698 {
699 DebugStream.print("\n");
700 }
701 lines = null;
702
703 DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
704 DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
705
706 // Uncomment if you want to print out the entire contents of the XML doc:
707 //DebugStream.println("\n\nThis was the XML:\n*********START\n"
708 // + xmlContents + "\n************END\n");
709 }
710 else
711 { // no particular line number, print out all the xml so debugger can inspect it
712 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
713 }
714 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
715 DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
716 System.exit(-1);
717 }
718 else
719 { // not running in debug mode
720 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
721 }
722 }
723
724 static public StringBuffer readXMLStream(InputStream input_stream)
725 {
726 StringBuffer xml = new StringBuffer("");
727
728 try
729 {
730 InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
731 BufferedReader buffered_in = new BufferedReader(isr);
732
733 String line = "";
734 boolean xml_content = false;
735 while ((line = buffered_in.readLine()) != null)
736 {
737 if (xml_content)
738 {
739 xml.append(line);
740 xml.append("\n");
741 }
742 else if (line.trim().startsWith("<?xml"))
743 {
744 xml_content = true;
745 xml.append(line);
746 xml.append("\n");
747 }
748 else
749 {
750 System.err.println(line);
751 }
752 }
753 buffered_in = null;
754 }
755 catch (Exception error)
756 {
757 System.err.println("Failed when trying to parse XML stream");
758 error.printStackTrace();
759 }
760
761 return xml;
762 }
763
764 /**
765 * Removes characters that are invalid in XML (see
766 * http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
767 */
768 static public String removeInvalidCharacters(String text)
769 {
770 char[] safe_characters = new char[text.length()];
771 int j = 0;
772
773 char[] raw_characters = new char[text.length()];
774 text.getChars(0, text.length(), raw_characters, 0);
775 for (int i = 0; i < raw_characters.length; i++)
776 {
777 char character = raw_characters[i];
778 if ((character >= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF))
779 {
780 safe_characters[j] = character;
781 j++;
782 }
783 }
784
785 return new String(safe_characters, 0, j);
786 }
787
788 static public void setElementTextValue(Element element, String text)
789 {
790 // Remove all text node children
791 NodeList children_nodelist = element.getChildNodes();
792 for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
793 {
794 Node child_node = children_nodelist.item(i);
795 if (child_node.getNodeType() == Node.TEXT_NODE)
796 {
797 element.removeChild(child_node);
798 }
799 }
800
801 // Add a new text node
802 if (text != null)
803 {
804 element.appendChild(element.getOwnerDocument().createTextNode(text));
805 }
806 }
807
808 /**
809 * Set the #text node value of some element.
810 *
811 * @param element
812 * the Element whose value we wish to set
813 * @param value
814 * the new value for the element as a String Soon to be
815 * deprecated!
816 */
817 static final public void setValue(Element element, String value)
818 {
819 // Remove any existing child node(s)
820 clear(element);
821 // Add new text node.
822 if (value != null)
823 {
824 element.appendChild(element.getOwnerDocument().createTextNode(value));
825 }
826 }
827
828 static public void indentXML(Element elem, int depth)
829 {
830 Document doc = elem.getOwnerDocument();
831
832 String startIndentString = "\n";
833 for (int i = 0; i < depth; i++)
834 {
835 startIndentString += "\t";
836 }
837 Node startTextNode = doc.createTextNode(startIndentString);
838
839 String endIndentString = "\n";
840 for (int i = 0; i < depth - 1; i++)
841 {
842 endIndentString += "\t";
843 }
844 Node endTextNode = doc.createTextNode(endIndentString);
845
846 boolean found = false;
847 Node child = elem.getFirstChild();
848 while (child != null)
849 {
850 // first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
851 if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
852 {
853 Node spaceTextNode = child;
854 child = child.getNextSibling();
855 elem.removeChild(spaceTextNode);
856
857 if(child == null) break;
858 }
859
860 // now process normal element nodes as intended
861 if (child.getNodeType() == Node.ELEMENT_NODE)
862 {
863 found = true;
864 break;
865 }
866 child = child.getNextSibling();
867 }
868
869 if (found)
870 {
871 elem.appendChild(endTextNode);
872 }
873
874 child = elem.getFirstChild();
875 while (child != null)
876 {
877 // Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
878 // because the first while loop above would break out when it found an element node and wouldn't have got rid
879 // of all the empty text nodes yet.
880 // This time, beware not to delete the special end and start empty textnodes just added, since
881 // they've been created and inserted specifically.
882 if(child != endTextNode && child != startTextNode
883 && child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
884 {
885 Node spaceTextNode = child;
886 child = child.getNextSibling();
887 elem.removeChild(spaceTextNode);
888
889 if(child == null) break;
890 }
891
892 // go back to processing normal element nodes as intended
893 if (child.getNodeType() == Node.ELEMENT_NODE)
894 {
895 elem.insertBefore(startTextNode.cloneNode(false), child);
896 indentXML((Element) child, depth + 1);
897 }
898 child = child.getNextSibling();
899 }
900 }
901
902 /**
903 * Write an XML document to a given file with the text node of the specified
904 * element unescaped
905 */
906 static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
907 {
908 indentXML(document.getDocumentElement(), 1);
909 try
910 {
911 OutputStream os = new FileOutputStream(xml_file);
912 // Create an output format for our document.
913 OutputFormat f = new OutputFormat(document);
914 f.setEncoding("UTF-8");
915 f.setIndenting(true);
916 f.setLineWidth(0); // Why isn't this working!
917 f.setPreserveSpace(true);
918 if (nonEscapingTagNames != null)
919 {
920 f.setNonEscapingElements(nonEscapingTagNames);
921 }
922 // Create the necessary writer stream for serialization.
923 OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
924 Writer w = new BufferedWriter(osw);
925 // Generate a new serializer from the above.
926 XMLSerializer s = new XMLSerializer(w, f);
927 s.asDOMSerializer();
928 // Finally serialize the document to file.
929 s.serialize(document);
930 // And close.
931 os.close();
932 }
933 catch (Exception exception)
934 {
935 DebugStream.printStackTrace(exception);
936 }
937 }
938
939 /** Write an XML document to a given file */
940 static public void writeXMLFile(File xml_file, Document document)
941 {
942 writeXMLFile(xml_file, document, null);
943 }
944
945 public static void printXMLNode(Node e)
946 {
947 printXMLNode(e, 0);
948 }
949
950 public static void printXMLNode(Node e, int depth)
951 { //recursive method call using DOM API...
952
953 for (int i = 0; i < depth; i++)
954 System.out.print(' ');
955
956 if (e.getNodeType() == Node.TEXT_NODE)
957 {
958 //System.out.println("text") ;
959 if (e.getNodeValue() != "")
960 {
961 System.out.println(e.getNodeValue());
962 }
963 return;
964 }
965
966 System.out.print('<');
967 System.out.print(e.getNodeName());
968 NamedNodeMap attrs = e.getAttributes();
969 if (attrs != null)
970 {
971 for (int i = 0; i < attrs.getLength(); i++)
972 {
973 Node attr = attrs.item(i);
974 System.out.print(' ');
975 System.out.print(attr.getNodeName());
976 System.out.print("=\"");
977 System.out.print(attr.getNodeValue());
978 System.out.print('"');
979 }
980 }
981 NodeList children = e.getChildNodes();
982
983 if (children == null || children.getLength() == 0)
984 System.out.println("/>");
985 else
986 {
987
988 System.out.println('>');
989
990 int len = children.getLength();
991 for (int i = 0; i < len; i++)
992 {
993 printXMLNode(children.item(i), depth + 1);
994 }
995
996 for (int i = 0; i < depth; i++)
997 System.out.print(' ');
998
999 System.out.println("</" + e.getNodeName() + ">");
1000 }
1001
1002 }
1003
1004 public static String xmlNodeToString(Node e)
1005 {
1006 StringBuffer sb = new StringBuffer("");
1007 xmlNodeToString(sb, e, true, "\t", 2);
1008 return sb.toString();
1009 }
1010
1011 public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
1012 {
1013
1014 if (e.getNodeType() == Node.CDATA_SECTION_NODE)
1015 {
1016 if (e.getNodeValue() != "")
1017 {
1018 String text = e.getNodeValue();
1019 sb.append("<![CDATA[");
1020 sb.append(text);
1021 sb.append("]]>");
1022 }
1023 return;
1024 }
1025
1026 if (e.getNodeType() == Node.TEXT_NODE)
1027 {
1028 if (e.getNodeValue() != "")
1029 {
1030 String text = e.getNodeValue();
1031 text = text.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("[\\n\\r\\t\\s]*$", "");
1032 for (Character c : text.toCharArray())
1033 {
1034 if (c.equals('\n'))
1035 {
1036 text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
1037 break;
1038 }
1039
1040 if (!Character.isWhitespace(c))
1041 {
1042 break;
1043 }
1044 }
1045 sb.append(text);
1046 }
1047 return;
1048 }
1049
1050 if (e.getNodeType() == Node.COMMENT_NODE)
1051 {
1052 if (e.getNodeValue() != "")
1053 {
1054 sb.append("<!--\n" + e.getNodeValue().trim() + "\n-->\n");
1055 }
1056 return;
1057 }
1058
1059 if (indent)
1060 {
1061 for (int i = 0; i < depth; i++)
1062 {
1063 sb.append(indentString);
1064 }
1065 }
1066
1067 sb.append('<');
1068 sb.append(e.getNodeName());
1069 NamedNodeMap attrs = e.getAttributes();
1070 if (attrs != null)
1071 {
1072 for (int i = 0; i < attrs.getLength(); i++)
1073 {
1074 Node attr = attrs.item(i);
1075 sb.append(' ');
1076 sb.append(attr.getNodeName());
1077 sb.append("=\"");
1078 sb.append(attr.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;"));
1079 sb.append('"');
1080 }
1081 }
1082 NodeList children = e.getChildNodes();
1083
1084 boolean hasElements = false;
1085 boolean indentSwapped = false;
1086 for (int i = 0; i < children.getLength(); i++)
1087 {
1088 if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1089 {
1090 hasElements = true;
1091 }
1092 if ((children.item(i).getNodeType() == Node.TEXT_NODE || children.item(i).getNodeType() == Node.CDATA_SECTION_NODE) && indent)
1093 {
1094 if (children.item(i).getNodeValue().trim().length() > 0)
1095 {
1096 indentSwapped = true;
1097 indent = false;
1098 }
1099 }
1100 }
1101
1102 if (children == null || children.getLength() == 0)
1103 {
1104 sb.append("/>");
1105
1106 if (indent)
1107 {
1108 sb.append("\n");
1109 }
1110 }
1111 else
1112 {
1113 sb.append(">");
1114 if (hasElements && indent)
1115 {
1116 sb.append("\n");
1117 }
1118
1119 int len = children.getLength();
1120 for (int i = 0; i < len; i++)
1121 {
1122 xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1123 }
1124
1125 if (indent)
1126 {
1127 for (int i = 0; i < depth; i++)
1128 {
1129 sb.append(indentString);
1130 }
1131 }
1132
1133 sb.append("</" + e.getNodeName() + ">");
1134
1135 if ((hasElements && indent) || indentSwapped)
1136 {
1137 sb.append("\n");
1138 }
1139 }
1140 }
1141
1142 public static String xmlNodeToStringWithoutIndenting(Node e)
1143 {
1144 StringBuffer sb = new StringBuffer("");
1145 xmlNodeToStringWithoutNewline(sb, e, -1);
1146 return sb.toString();
1147 }
1148
1149 public static String xmlNodeToStringWithoutNewline(Node e)
1150 {
1151 StringBuffer sb = new StringBuffer("");
1152 xmlNodeToStringWithoutNewline(sb, e, 0);
1153 return sb.toString();
1154 }
1155
1156 private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1157 {
1158
1159 for (int i = 0; i < depth; i++)
1160 {
1161 sb.append(' ');
1162 }
1163
1164 if (e.getNodeType() == Node.TEXT_NODE)
1165 {
1166 if (e.getNodeValue() != "")
1167 {
1168 sb.append(e.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replace(">", "&gt;"));
1169 }
1170 return;
1171 }
1172
1173 if (e.getNodeType() == Node.COMMENT_NODE)
1174 {
1175 if (e.getNodeValue() != "")
1176 {
1177 sb.append("<!--" + e.getNodeValue() + "-->");
1178 }
1179 return;
1180 }
1181
1182 sb.append('<');
1183 sb.append(e.getNodeName());
1184 NamedNodeMap attrs = e.getAttributes();
1185 if (attrs != null)
1186 {
1187 for (int i = 0; i < attrs.getLength(); i++)
1188 {
1189 Node attr = attrs.item(i);
1190 sb.append(' ');
1191 sb.append(attr.getNodeName());
1192 sb.append("=\"");
1193 sb.append(attr.getNodeValue());
1194 sb.append('"');
1195 }
1196 }
1197 NodeList children = e.getChildNodes();
1198
1199 if (children == null || children.getLength() == 0)
1200 sb.append("/>");
1201 else
1202 {
1203
1204 sb.append(">");
1205
1206 int len = children.getLength();
1207 for (int i = 0; i < len; i++)
1208 {
1209 if (depth >= 0)
1210 {
1211 xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1212 }
1213 else
1214 {
1215 xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1216 }
1217 }
1218
1219 for (int i = 0; i < depth; i++)
1220 sb.append(' ');
1221
1222 sb.append("</" + e.getNodeName() + ">");
1223 }
1224 }
1225}
Note: See TracBrowser for help on using the repository browser.