source: main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 30678

Last change on this file since 30678 was 29730, checked in by ak19, 9 years ago

The second and final part of the commits to getting GLI running again and parsing web.xml, after the changes to commit r29687, where web.xml was split into two and included server.xml. In this commit: 1. GLI uses an EntityResolver to resolve entities in web.xml that are defined in the included servlets.xml file. In order to keep XMLTools.java tidy and hopefully make the GLI entity resolver more reusable, the new GLIEntityResolver.java class checks default search paths first when asked to resolve entities. web/WEB-INF, where web.xml and servlets.xml live, has been added to the default search paths, as also the gli user dir where the web.xml and server.xml will be in a client-gli situation. 2. Small tidy up to Greenstone runtime's GSEntityResolver. 3. Remote Greenstone gliserver.pl needs to also transfer the new server.xml file when zipping up web.xml. 4. Minor touchups to the new README on apache.jar.

  • Property svn:keywords set to Author Date Id Revision
File size: 32.0 KB
Line 
1package org.greenstone.gatherer.util;
2
3import java.io.*;
4import java.net.*;
5import java.util.*;
6import org.apache.xerces.parsers.*;
7import org.apache.xml.serialize.*;
8import org.greenstone.gatherer.DebugStream;
9import org.w3c.dom.*;
10import org.xml.sax.*;
11
12import java.io.FileReader;
13import java.io.IOException;
14import java.io.StringReader;
15
16// SAX
17import org.xml.sax.XMLReader;
18import org.xml.sax.SAXException;
19import org.xml.sax.SAXParseException;
20import org.xml.sax.helpers.DefaultHandler;
21import org.xml.sax.InputSource;
22
23// JAXP
24import javax.xml.parsers.DocumentBuilder;
25import javax.xml.parsers.DocumentBuilderFactory;
26import javax.xml.parsers.FactoryConfigurationError;
27import javax.xml.parsers.ParserConfigurationException;
28import javax.xml.parsers.SAXParser;
29import javax.xml.parsers.SAXParserFactory;
30
31
32/** This class is a static class containing useful XML functions */
33public class XMLTools
34{
35 /** extracts the text out of a node */
36 public static Node getNodeTextNode(Element param)
37 {
38 param.normalize();
39 Node n = param.getFirstChild();
40 while (n != null && n.getNodeType() != Node.TEXT_NODE)
41 {
42 n = n.getNextSibling();
43 }
44 return n;
45 }
46
47 /** extracts the text out of a node */
48 public static String getNodeText(Element param)
49 {
50 Node text_node = getNodeTextNode(param);
51 if (text_node == null)
52 {
53 return "";
54 }
55 return text_node.getNodeValue();
56 }
57
58 public static void setNodeText(Element elem, String text)
59 {
60 Node old_text_node = getNodeTextNode(elem);
61 if (old_text_node != null)
62 {
63 elem.removeChild(old_text_node);
64 }
65 Text t = elem.getOwnerDocument().createTextNode(text);
66 elem.appendChild(t);
67 }
68
69 /** returns the (first) child element with the given name */
70 public static Node getChildByTagName(Node n, String name)
71 {
72
73 Node child = n.getFirstChild();
74 while (child != null)
75 {
76 if (child.getNodeName().equals(name))
77 {
78 return child;
79 }
80 child = child.getNextSibling();
81 }
82 return null; //not found
83 }
84
85 /**
86 * returns the (nth) child element with the given name index numbers start
87 * at 0
88 */
89 public static Node getChildByTagNameIndexed(Node n, String name, int index)
90 {
91 if (index == -1)
92 {
93 return getChildByTagName(n, name);
94 }
95 int count = 0;
96 Node child = n.getFirstChild();
97 while (child != null)
98 {
99 if (child.getNodeName().equals(name))
100 {
101 if (count == index)
102 {
103 return child;
104 }
105 else
106 {
107 count++;
108 }
109 }
110 child = child.getNextSibling();
111 }
112 return null; //not found
113 }
114
115 /**
116 * returns the element parent/node_name[@attribute_name='attribute_value']
117 */
118 public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
119 {
120
121 NodeList children = parent.getChildNodes();
122 for (int i = 0; i < children.getLength(); i++)
123 {
124 Node child = children.item(i);
125 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
126 if (child.getNodeName().equals(node_name))
127 {
128 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
129 return (Element) child;
130 }
131 }
132 // not found
133 return null;
134 }
135
136 /**
137 * returns a list of elements
138 * parent/node_name[@attribute_name='attribute_value']
139 */
140 public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
141 {
142 ArrayList elements = new ArrayList();
143 NodeList children = parent.getChildNodes();
144 for (int i = 0; i < children.getLength(); i++)
145 {
146 //System.out.println("getNamedElementList");
147 Node child = children.item(i);
148 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
149 if (child.getNodeName().equals(node_name))
150 {
151 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
152 elements.add((Element) child);
153 }
154 }
155 // not found
156 if (elements.size() == 0)
157 {
158 elements = null;
159 }
160 return elements;
161 }
162
163 public static void copyAllChildren(Element to, Element from)
164 {
165
166 Document to_doc = to.getOwnerDocument();
167 Node child = from.getFirstChild();
168 while (child != null)
169 {
170 to.appendChild(to_doc.importNode(child, true));
171 child = child.getNextSibling();
172 }
173 }
174
175 /** duplicates all elements in list elements and appends to toElement */
176 public static void duplicateElementList(Document owner, Element toElement, NodeList elements, boolean with_attributes) {
177 int num_elems = elements.getLength();
178 if (num_elems < 1)
179 {
180 return;
181 }
182 for (int i = 0; i < num_elems; i++)
183 {
184 Element to_element = XMLTools.duplicateElement(owner, (Element) elements.item(i), with_attributes);
185 toElement.appendChild(to_element);
186 }
187
188 }
189 /** Duplicates an element */
190 public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
191 {
192 return duplicateElementNS(owner, element, null, with_attributes);
193 }
194
195 /** Duplicates an element */
196 public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
197 {
198 Element duplicate;
199 if (namespace_uri == null)
200 {
201 duplicate = owner.createElement(element.getTagName());
202 }
203 else
204 {
205 duplicate = owner.createElementNS(namespace_uri, element.getTagName());
206 }
207 // Copy element attributes
208 if (with_attributes)
209 {
210 NamedNodeMap attributes = element.getAttributes();
211 for (int i = 0; i < attributes.getLength(); i++)
212 {
213 Node attribute = attributes.item(i);
214 duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
215 }
216 }
217
218 // Copy element children
219 NodeList children = element.getChildNodes();
220 for (int i = 0; i < children.getLength(); i++)
221 {
222 Node child = children.item(i);
223 duplicate.appendChild(owner.importNode(child, true));
224 }
225
226 return duplicate;
227 }
228
229 /** Remove all of the child nodes from a certain node. */
230 static final public void clear(Node node)
231 {
232 while (node.hasChildNodes())
233 {
234 node.removeChild(node.getFirstChild());
235 }
236 }
237
238 static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
239 {
240 ArrayList child_elements = new ArrayList();
241
242 NodeList children_nodelist = parent_element.getChildNodes();
243 for (int i = 0; i < children_nodelist.getLength(); i++)
244 {
245 Node child_node = children_nodelist.item(i);
246 if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
247 {
248 child_elements.add(child_node);
249 }
250 }
251
252 return child_elements;
253 }
254
255 static public String getElementTextValue(Element element)
256 {
257 // Find the first text node child
258 NodeList children_nodelist = element.getChildNodes();
259 for (int i = 0; i < children_nodelist.getLength(); i++)
260 {
261 Node child_node = children_nodelist.item(i);
262 if (child_node.getNodeType() == Node.TEXT_NODE)
263 {
264 return child_node.getNodeValue();
265 }
266 }
267
268 // None found
269 return "";
270 }
271
272 /**
273 * Method to retrieve the value of a given node.
274 *
275 * @param element
276 * The <strong>Element</strong> whose value we wish to find. Soon
277 * to be deprecated!
278 */
279 static final public String getValue(Node element)
280 {
281 if (element == null)
282 {
283 return "";
284 }
285 // If we've been given a subject node first retrieve its value node.
286 if (element.getNodeName().equals("Subject"))
287 {
288 element = getNodeFromNamed(element, "Value");
289 }
290 // If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
291 if (element != null && element.hasChildNodes())
292 {
293 StringBuffer text_buffer = new StringBuffer();
294 NodeList text_nodes = element.getChildNodes();
295 for (int i = 0; i < text_nodes.getLength(); i++)
296 {
297 Node possible_text = text_nodes.item(i);
298 if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
299 {
300 text_buffer.append(possible_text.getNodeValue());
301 }
302 }
303 return text_buffer.toString();
304 }
305 return "";
306 }
307
308 /**
309 * Method to retrieve from the node given, a certain child node with the
310 * specified name.
311 *
312 * @param parent
313 * The <strong>Node</strong> whose children should be searched.
314 * @param name
315 * The required nodes name as a <strong>String</strong>.
316 * @return The requested <strong>Node</strong> if it is found, <i>null</i>
317 * otherwise. Soon to be deprecated!
318 */
319 static final public Node getNodeFromNamed(Node parent, String name)
320 {
321 Node child = null;
322 for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
323 {
324 if (i.getNodeName().equals(name))
325 {
326 child = i;
327 }
328 }
329 return child;
330 }
331
332 static final public String WELLFORMED = "well-formed !";
333 static final public String NOTWELLFORMED = "not well-formed";
334 static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
335 static final private String FOOTER = "</collectionConfig>";
336
337
338 public static Document getDOM(String xml_str)
339 {
340 Document doc = null;
341 try {
342
343 DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
344 InputSource is = new InputSource();
345 is.setCharacterStream(new StringReader(xml_str));
346 doc = db.parse(is);
347
348 } catch (Exception e) {
349 e.printStackTrace();
350 }
351 return doc;
352 }
353
354 public static String parse(String xml_str)
355 {
356 String validation_msg = WELLFORMED;
357 xml_str = HEADER + xml_str + FOOTER;
358 try
359 {
360 SAXParserFactory factory = SAXParserFactory.newInstance();
361 factory.setNamespaceAware(true);
362 //factory.setValidating (true);
363 SAXParser parser = factory.newSAXParser();
364 InputSource iSource = new InputSource(new StringReader(xml_str));
365 // parser.parse (iSource, new DefaultHandler ());
366
367 org.xml.sax.XMLReader reader = parser.getXMLReader();
368 reader.setContentHandler(new DefaultHandler());
369 reader.setErrorHandler(new DefaultHandler());
370 reader.parse(iSource);
371 }
372 catch (FactoryConfigurationError e)
373 {
374 validation_msg = "unable to get a document builder factory";
375 }
376 catch (ParserConfigurationException e)
377 {
378 validation_msg = "unable to configure parser";
379 }
380 catch (SAXParseException e)
381 {
382 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
383 }
384 catch (SAXException e)
385 {
386 validation_msg += " Fatal error: " + e.toString();
387 }
388 catch (IOException e)
389 {
390 validation_msg = "Unable to read the input, i/o error";
391 }
392
393 return validation_msg;
394 }
395
396 //In this method, the parsed string xml_str is not wrapped by the header and footer strings.
397 public static String parseDOM(String xml_str)
398 {
399 String validation_msg = WELLFORMED;
400
401 try
402 {
403 SAXParserFactory factory = SAXParserFactory.newInstance();
404 factory.setNamespaceAware(true);
405 //factory.setValidating (true);
406 SAXParser parser = factory.newSAXParser();
407 InputSource iSource = new InputSource(new StringReader(xml_str));
408 // parser.parse (iSource, new DefaultHandler ());
409
410 org.xml.sax.XMLReader reader = parser.getXMLReader();
411 reader.setContentHandler(new DefaultHandler());
412 reader.setErrorHandler(new DefaultHandler());
413 reader.parse(iSource);
414 }
415 catch (FactoryConfigurationError e)
416 {
417 validation_msg = "unable to get a document builder factory";
418 }
419 catch (ParserConfigurationException e)
420 {
421 validation_msg = "unable to configure parser";
422 }
423 catch (SAXParseException e)
424 {
425 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
426 }
427 catch (SAXException e)
428 {
429 validation_msg += " " + e.toString();
430 }
431 catch (IOException e)
432 {
433 validation_msg = "Unable to read the input, i/o error";
434 }
435
436 return validation_msg;
437 }
438
439 public static String parse(File xml_file)
440 {
441 String validation_msg = WELLFORMED;
442
443 try
444 {
445 SAXParserFactory factory = SAXParserFactory.newInstance();
446 factory.setNamespaceAware(true);
447 //factory.setValidating (true);
448 SAXParser parser = factory.newSAXParser();
449 FileReader r = new FileReader(xml_file);
450 InputSource iSource = new InputSource(r);
451 XMLReader reader = parser.getXMLReader();
452 reader.setContentHandler(new DefaultHandler());
453 reader.setErrorHandler(new DefaultHandler());
454 reader.parse(iSource);
455 }
456 catch (FactoryConfigurationError e)
457 {
458 validation_msg = "unable to get a document builder factory";
459 }
460 catch (ParserConfigurationException e)
461 {
462 validation_msg = "unable to configure parser";
463 }
464 catch (SAXParseException e)
465 {
466 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
467 }
468 catch (SAXException e)
469 {
470 validation_msg += " Fatal error: " + e.toString();
471 }
472 catch (IOException e)
473 {
474 validation_msg = "Unable to read the input, i/o error";
475 }
476
477 return validation_msg;
478 }
479
480 /** Returns a string of the location. */
481 private static String getLocationString(SAXParseException ex)
482 {
483 StringBuffer str = new StringBuffer();
484
485 String systemId = ex.getSystemId();
486 if (systemId != null)
487 {
488 int index = systemId.lastIndexOf('/');
489 if (index != -1)
490 systemId = systemId.substring(index + 1);
491 str.append(systemId);
492 }
493 str.append("(line ");
494 str.append(ex.getLineNumber() - 1);
495 str.append(", column ");
496 str.append(ex.getColumnNumber());
497 str.append("): ");
498
499 return str.toString();
500
501 } // getLocationString(SAXParseException):String
502
503 /** Parse an XML document from a given file path */
504 static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
505 {
506 if (use_class_loader == true)
507 {
508 InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
509 if (is != null)
510 {
511 return parseXML(is);
512 }
513 }
514
515 // Try the file outside the classes directory
516 return parseXMLFile(new File(xml_file_path));
517 }
518
519 /** Parse an XML document from a given file */
520 static public Document parseXMLFile(File xml_file)
521 {
522 // No file? No point trying!
523 if (xml_file.exists() == false)
524 {
525 return null;
526 }
527
528 try
529 {
530 return parseXML(new FileInputStream(xml_file));
531 }
532 catch (Exception exception)
533 {
534 DebugStream.printStackTrace(exception);
535 return null;
536 }
537 }
538
539 /** Parse an XML document from a given input stream */
540 static public Document parseXML(InputStream xml_input_stream)
541 {
542 Document document = null;
543
544 try
545 {
546 InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
547 document = parseXML(isr);
548 isr.close();
549 xml_input_stream.close();
550 }
551 catch (Exception exception)
552 {
553 DebugStream.printStackTrace(exception);
554 }
555
556 return document;
557 }
558
559 /** Parse an XML document from a given reader */
560 static public Document parseXML(Reader xml_reader)
561 {
562 Document document = null;
563
564 // If debugging, the following will store the XML contents to be parsed,
565 // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
566 String xmlContents = "";
567
568 try
569 {
570 Reader reader = null;
571
572 // (1) By default, GLI will remove any contents preceeding (and invalidating)
573 // the XML and present these lines separately to the user
574 if (!DebugStream.isDebuggingEnabled())
575 {
576 try
577 {
578 reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
579 }
580 catch (Exception e)
581 {
582 System.err.println("Exception while wrapping the reader in parseXML(Reader)");
583 e.printStackTrace();
584 }
585 }
586
587 // (2) If we are running GLI in debug mode:
588 // In case parsing exceptions are thrown (SAX Exceptions), we want to get some
589 // idea of where things went wrong. This will print the "XML" contents to either
590 // system.out (if debugging is off) or to the DebugStream otherwise.
591 // We need to read the XML twice to know the line where things went wrong, so
592 // do the additional reading only if we're debugging
593 else
594 {
595 StringBuffer buf = new StringBuffer();
596 char[] buffer = new char[500];
597 int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
598 while (numCharsRead != -1)
599 {
600 buf.append(buffer, 0, numCharsRead);
601 numCharsRead = xml_reader.read(buffer, 0, buffer.length);
602 }
603 xmlContents = buf.toString();
604 xml_reader.close(); // closing the old Reader
605 xml_reader = null;
606 buffer = null;
607 buf = null;
608 // we need a Reader to parse the same contents as the Reader that was just closed
609 reader = new BufferedReader(new StringReader(xmlContents));
610 //System.err.println("xmlContents:\n" + xmlContents);
611 }
612
613 // (2) The actual XML parsing
614 InputSource isc = new InputSource(reader);
615 DOMParser parser = new DOMParser();
616 parser.setFeature("http://xml.org/sax/features/validation", false);
617 parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
618 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
619 parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
620 parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
621 parser.setEntityResolver(new GLIEntityResolver());
622 parser.parse(isc);
623 document = parser.getDocument();
624
625 }
626 catch (SAXParseException e)
627 {
628 showXMLParseFailureLine(e, xmlContents);
629 }
630 catch (SAXException exception)
631 {
632 System.err.println("SAX exception: " + exception.getMessage());
633 if (DebugStream.isDebuggingEnabled())
634 {
635 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
636 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
637 DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
638 System.exit(-1);
639 }
640 // else, not running in debug mode, so don't exit after exception
641 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
642 DebugStream.printStackTrace(exception);
643 }
644 catch (Exception exception)
645 {
646 DebugStream.printStackTrace(exception);
647 }
648
649 return document;
650 }
651
652 /**
653 * Displays the line (string) where the SAXParseException occurred, given a
654 * String of the entire xml that was being parsed and the SAXParseException
655 * object that was caught. The messages are printed to DebugStream, so run
656 * GLI/FLI with -debug to view this output.
657 *
658 * @param xmlContents
659 * is the entire xml that was being parsed when the exception
660 * occurred
661 * @param e
662 * is the SAXParseException object that was thrown upon parsing
663 * the xmlContents.
664 */
665 public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
666 {
667
668 // There should be no characters at all that preceed the <?xml>... bit.
669 // The first check is for starting spaces:
670 if (xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t"))
671 {
672 DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
673 DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
674 return; // nothing more to do, first error identified
675 }
676
677 // the actual line (String literal) where parsing failed and the SAXParseException occurred.
678 String line = "";
679 int linenumber = e.getLineNumber();
680 DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
681 if (DebugStream.isDebuggingEnabled())
682 {
683 if (linenumber != -1)
684 {
685 String[] lines = xmlContents.split("\n");
686 if (lines.length > 0)
687 {
688 DebugStream.println(" (number of lines: " + lines.length + ")");
689 if (lines.length >= linenumber)
690 {
691 line = lines[linenumber - 1];
692 }
693 else
694 { // error is past the last line
695 line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
696 }
697 }
698 else
699 {
700 DebugStream.print("\n");
701 }
702 lines = null;
703
704 DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
705 DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
706
707 // Uncomment if you want to print out the entire contents of the XML doc:
708 //DebugStream.println("\n\nThis was the XML:\n*********START\n"
709 // + xmlContents + "\n************END\n");
710 }
711 else
712 { // no particular line number, print out all the xml so debugger can inspect it
713 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
714 }
715 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
716 DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
717 System.exit(-1);
718 }
719 else
720 { // not running in debug mode
721 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
722 }
723 }
724
725 static public StringBuffer readXMLStream(InputStream input_stream)
726 {
727 StringBuffer xml = new StringBuffer("");
728
729 try
730 {
731 InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
732 BufferedReader buffered_in = new BufferedReader(isr);
733
734 String line = "";
735 boolean xml_content = false;
736 while ((line = buffered_in.readLine()) != null)
737 {
738 if (xml_content)
739 {
740 xml.append(line);
741 xml.append("\n");
742 }
743 else if (line.trim().startsWith("<?xml"))
744 {
745 xml_content = true;
746 xml.append(line);
747 xml.append("\n");
748 }
749 else
750 {
751 System.err.println(line);
752 }
753 }
754 buffered_in = null;
755 }
756 catch (Exception error)
757 {
758 System.err.println("Failed when trying to parse XML stream");
759 error.printStackTrace();
760 }
761
762 return xml;
763 }
764
765 /**
766 * Removes characters that are invalid in XML (see
767 * http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
768 */
769 static public String removeInvalidCharacters(String text)
770 {
771 char[] safe_characters = new char[text.length()];
772 int j = 0;
773
774 char[] raw_characters = new char[text.length()];
775 text.getChars(0, text.length(), raw_characters, 0);
776 for (int i = 0; i < raw_characters.length; i++)
777 {
778 char character = raw_characters[i];
779 if ((character >= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF))
780 {
781 safe_characters[j] = character;
782 j++;
783 }
784 }
785
786 return new String(safe_characters, 0, j);
787 }
788
789 static public void setElementTextValue(Element element, String text)
790 {
791 // Remove all text node children
792 NodeList children_nodelist = element.getChildNodes();
793 for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
794 {
795 Node child_node = children_nodelist.item(i);
796 if (child_node.getNodeType() == Node.TEXT_NODE)
797 {
798 element.removeChild(child_node);
799 }
800 }
801
802 // Add a new text node
803 if (text != null)
804 {
805 element.appendChild(element.getOwnerDocument().createTextNode(text));
806 }
807 }
808
809 /**
810 * Set the #text node value of some element.
811 *
812 * @param element
813 * the Element whose value we wish to set
814 * @param value
815 * the new value for the element as a String Soon to be
816 * deprecated!
817 */
818 static final public void setValue(Element element, String value)
819 {
820 // Remove any existing child node(s)
821 clear(element);
822 // Add new text node.
823 if (value != null)
824 {
825 element.appendChild(element.getOwnerDocument().createTextNode(value));
826 }
827 }
828
829 static public void indentXML(Element elem, int depth)
830 {
831 Document doc = elem.getOwnerDocument();
832
833 String startIndentString = "\n";
834 for (int i = 0; i < depth; i++)
835 {
836 startIndentString += "\t";
837 }
838 Node startTextNode = doc.createTextNode(startIndentString);
839
840 String endIndentString = "\n";
841 for (int i = 0; i < depth - 1; i++)
842 {
843 endIndentString += "\t";
844 }
845 Node endTextNode = doc.createTextNode(endIndentString);
846
847 boolean found = false;
848 Node child = elem.getFirstChild();
849 while (child != null)
850 {
851 // first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
852 if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
853 {
854 Node spaceTextNode = child;
855 child = child.getNextSibling();
856 elem.removeChild(spaceTextNode);
857
858 if(child == null) break;
859 }
860
861 // now process normal element nodes as intended
862 if (child.getNodeType() == Node.ELEMENT_NODE)
863 {
864 found = true;
865 break;
866 }
867 child = child.getNextSibling();
868 }
869
870 if (found)
871 {
872 elem.appendChild(endTextNode);
873 }
874
875 child = elem.getFirstChild();
876 while (child != null)
877 {
878 // Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
879 // because the first while loop above would break out when it found an element node and wouldn't have got rid
880 // of all the empty text nodes yet.
881 // This time, beware not to delete the special end and start empty textnodes just added, since
882 // they've been created and inserted specifically.
883 if(child != endTextNode && child != startTextNode
884 && child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
885 {
886 Node spaceTextNode = child;
887 child = child.getNextSibling();
888 elem.removeChild(spaceTextNode);
889
890 if(child == null) break;
891 }
892
893 // go back to processing normal element nodes as intended
894 if (child.getNodeType() == Node.ELEMENT_NODE)
895 {
896 elem.insertBefore(startTextNode.cloneNode(false), child);
897 indentXML((Element) child, depth + 1);
898 }
899 child = child.getNextSibling();
900 }
901 }
902
903 /**
904 * Write an XML document to a given file with the text node of the specified
905 * element unescaped
906 */
907 static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
908 {
909 indentXML(document.getDocumentElement(), 1);
910 try
911 {
912 OutputStream os = new FileOutputStream(xml_file);
913 // Create an output format for our document.
914 OutputFormat f = new OutputFormat(document);
915 f.setEncoding("UTF-8");
916 f.setIndenting(true);
917 f.setLineWidth(0); // Why isn't this working!
918 f.setPreserveSpace(true);
919 if (nonEscapingTagNames != null)
920 {
921 f.setNonEscapingElements(nonEscapingTagNames);
922 }
923 // Create the necessary writer stream for serialization.
924 OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
925 Writer w = new BufferedWriter(osw);
926 // Generate a new serializer from the above.
927 XMLSerializer s = new XMLSerializer(w, f);
928 s.asDOMSerializer();
929 // Finally serialize the document to file.
930 s.serialize(document);
931 // And close.
932 os.close();
933 }
934 catch (Exception exception)
935 {
936 DebugStream.printStackTrace(exception);
937 }
938 }
939
940 /** Write an XML document to a given file */
941 static public void writeXMLFile(File xml_file, Document document)
942 {
943 writeXMLFile(xml_file, document, null);
944 }
945
946 public static void printXMLNode(Node e)
947 {
948 printXMLNode(e, 0);
949 }
950
951 public static void printXMLNode(Node e, int depth)
952 { //recursive method call using DOM API...
953
954 for (int i = 0; i < depth; i++)
955 System.out.print(' ');
956
957 if (e.getNodeType() == Node.TEXT_NODE)
958 {
959 //System.out.println("text") ;
960 if (e.getNodeValue() != "")
961 {
962 System.out.println(e.getNodeValue());
963 }
964 return;
965 }
966
967 System.out.print('<');
968 System.out.print(e.getNodeName());
969 NamedNodeMap attrs = e.getAttributes();
970 if (attrs != null)
971 {
972 for (int i = 0; i < attrs.getLength(); i++)
973 {
974 Node attr = attrs.item(i);
975 System.out.print(' ');
976 System.out.print(attr.getNodeName());
977 System.out.print("=\"");
978 System.out.print(attr.getNodeValue());
979 System.out.print('"');
980 }
981 }
982 NodeList children = e.getChildNodes();
983
984 if (children == null || children.getLength() == 0)
985 System.out.println("/>");
986 else
987 {
988
989 System.out.println('>');
990
991 int len = children.getLength();
992 for (int i = 0; i < len; i++)
993 {
994 printXMLNode(children.item(i), depth + 1);
995 }
996
997 for (int i = 0; i < depth; i++)
998 System.out.print(' ');
999
1000 System.out.println("</" + e.getNodeName() + ">");
1001 }
1002
1003 }
1004
1005 public static String xmlNodeToString(Node e)
1006 {
1007 StringBuffer sb = new StringBuffer("");
1008 xmlNodeToString(sb, e, true, "\t", 2);
1009 return sb.toString();
1010 }
1011
1012 public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
1013 {
1014
1015 if (e.getNodeType() == Node.CDATA_SECTION_NODE)
1016 {
1017 if (e.getNodeValue() != "")
1018 {
1019 String text = e.getNodeValue();
1020 sb.append("<![CDATA[");
1021 sb.append(text);
1022 sb.append("]]>");
1023 }
1024 return;
1025 }
1026
1027 if (e.getNodeType() == Node.TEXT_NODE)
1028 {
1029 if (e.getNodeValue() != "")
1030 {
1031 String text = e.getNodeValue();
1032 text = text.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("[\\n\\r\\t\\s]*$", "");
1033 for (Character c : text.toCharArray())
1034 {
1035 if (c.equals('\n'))
1036 {
1037 text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
1038 break;
1039 }
1040
1041 if (!Character.isWhitespace(c))
1042 {
1043 break;
1044 }
1045 }
1046 sb.append(text);
1047 }
1048 return;
1049 }
1050
1051 if (e.getNodeType() == Node.COMMENT_NODE)
1052 {
1053 if (e.getNodeValue() != "")
1054 {
1055 sb.append("<!--\n" + e.getNodeValue().trim() + "\n-->\n");
1056 }
1057 return;
1058 }
1059
1060 if (indent)
1061 {
1062 for (int i = 0; i < depth; i++)
1063 {
1064 sb.append(indentString);
1065 }
1066 }
1067
1068 sb.append('<');
1069 sb.append(e.getNodeName());
1070 NamedNodeMap attrs = e.getAttributes();
1071 if (attrs != null)
1072 {
1073 for (int i = 0; i < attrs.getLength(); i++)
1074 {
1075 Node attr = attrs.item(i);
1076 sb.append(' ');
1077 sb.append(attr.getNodeName());
1078 sb.append("=\"");
1079 sb.append(attr.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;"));
1080 sb.append('"');
1081 }
1082 }
1083 NodeList children = e.getChildNodes();
1084
1085 boolean hasElements = false;
1086 boolean indentSwapped = false;
1087 for (int i = 0; i < children.getLength(); i++)
1088 {
1089 if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1090 {
1091 hasElements = true;
1092 }
1093 if ((children.item(i).getNodeType() == Node.TEXT_NODE || children.item(i).getNodeType() == Node.CDATA_SECTION_NODE) && indent)
1094 {
1095 if (children.item(i).getNodeValue().trim().length() > 0)
1096 {
1097 indentSwapped = true;
1098 indent = false;
1099 }
1100 }
1101 }
1102
1103 if (children == null || children.getLength() == 0)
1104 {
1105 sb.append("/>");
1106
1107 if (indent)
1108 {
1109 sb.append("\n");
1110 }
1111 }
1112 else
1113 {
1114 sb.append(">");
1115 if (hasElements && indent)
1116 {
1117 sb.append("\n");
1118 }
1119
1120 int len = children.getLength();
1121 for (int i = 0; i < len; i++)
1122 {
1123 xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1124 }
1125
1126 if (indent)
1127 {
1128 for (int i = 0; i < depth; i++)
1129 {
1130 sb.append(indentString);
1131 }
1132 }
1133
1134 sb.append("</" + e.getNodeName() + ">");
1135
1136 if ((hasElements && indent) || indentSwapped)
1137 {
1138 sb.append("\n");
1139 }
1140 }
1141 }
1142
1143 public static String xmlNodeToStringWithoutIndenting(Node e)
1144 {
1145 StringBuffer sb = new StringBuffer("");
1146 xmlNodeToStringWithoutNewline(sb, e, -1);
1147 return sb.toString();
1148 }
1149
1150 public static String xmlNodeToStringWithoutNewline(Node e)
1151 {
1152 StringBuffer sb = new StringBuffer("");
1153 xmlNodeToStringWithoutNewline(sb, e, 0);
1154 return sb.toString();
1155 }
1156
1157 private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1158 {
1159
1160 for (int i = 0; i < depth; i++)
1161 {
1162 sb.append(' ');
1163 }
1164
1165 if (e.getNodeType() == Node.TEXT_NODE)
1166 {
1167 if (e.getNodeValue() != "")
1168 {
1169 sb.append(e.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replace(">", "&gt;"));
1170 }
1171 return;
1172 }
1173
1174 if (e.getNodeType() == Node.COMMENT_NODE)
1175 {
1176 if (e.getNodeValue() != "")
1177 {
1178 sb.append("<!--" + e.getNodeValue() + "-->");
1179 }
1180 return;
1181 }
1182
1183 sb.append('<');
1184 sb.append(e.getNodeName());
1185 NamedNodeMap attrs = e.getAttributes();
1186 if (attrs != null)
1187 {
1188 for (int i = 0; i < attrs.getLength(); i++)
1189 {
1190 Node attr = attrs.item(i);
1191 sb.append(' ');
1192 sb.append(attr.getNodeName());
1193 sb.append("=\"");
1194 sb.append(attr.getNodeValue());
1195 sb.append('"');
1196 }
1197 }
1198 NodeList children = e.getChildNodes();
1199
1200 if (children == null || children.getLength() == 0)
1201 sb.append("/>");
1202 else
1203 {
1204
1205 sb.append(">");
1206
1207 int len = children.getLength();
1208 for (int i = 0; i < len; i++)
1209 {
1210 if (depth >= 0)
1211 {
1212 xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1213 }
1214 else
1215 {
1216 xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1217 }
1218 }
1219
1220 for (int i = 0; i < depth; i++)
1221 sb.append(' ');
1222
1223 sb.append("</" + e.getNodeName() + ">");
1224 }
1225 }
1226}
Note: See TracBrowser for help on using the repository browser.