source: main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 35667

Last change on this file since 35667 was 35667, checked in by davidb, 3 years ago

Some notes on how to address deprecated warnings added

  • Property svn:keywords set to Author Date Id Revision
File size: 34.6 KB
Line 
1package org.greenstone.gatherer.util;
2
3import java.io.*;
4import java.net.*;
5import java.util.*;
6import org.apache.xerces.parsers.*;
7import org.apache.xml.serialize.*;
8import org.greenstone.gatherer.DebugStream;
9import org.w3c.dom.*;
10import org.xml.sax.*;
11
12import java.io.FileReader;
13import java.io.IOException;
14import java.io.StringReader;
15import java.io.StringWriter; // for elementToString()
16
17// SAX
18import org.xml.sax.XMLReader;
19import org.xml.sax.SAXException;
20import org.xml.sax.SAXParseException;
21import org.xml.sax.helpers.DefaultHandler;
22import org.xml.sax.InputSource;
23
24// JAXP
25import javax.xml.parsers.DocumentBuilder;
26import javax.xml.parsers.DocumentBuilderFactory;
27import javax.xml.parsers.FactoryConfigurationError;
28import javax.xml.parsers.ParserConfigurationException;
29import javax.xml.parsers.SAXParser;
30import javax.xml.parsers.SAXParserFactory;
31// for elementToString():
32import javax.xml.transform.OutputKeys;
33import javax.xml.transform.Transformer;
34import javax.xml.transform.TransformerFactory;
35import javax.xml.transform.dom.DOMSource;
36import javax.xml.transform.stream.StreamResult;
37
38
39/** This class is a static class containing useful XML functions */
40public class XMLTools
41{
42 /** extracts the text out of a node */
43 public static Node getNodeTextNode(Element param)
44 {
45 param.normalize();
46 Node n = param.getFirstChild();
47 while (n != null && n.getNodeType() != Node.TEXT_NODE)
48 {
49 n = n.getNextSibling();
50 }
51 return n;
52 }
53
54 /** extracts the text out of a node */
55 public static String getNodeText(Element param)
56 {
57 Node text_node = getNodeTextNode(param);
58 if (text_node == null)
59 {
60 return "";
61 }
62 return text_node.getNodeValue();
63 }
64
65 public static void setNodeText(Element elem, String text)
66 {
67 Node old_text_node = getNodeTextNode(elem);
68 if (old_text_node != null)
69 {
70 elem.removeChild(old_text_node);
71 }
72 Text t = elem.getOwnerDocument().createTextNode(text);
73 elem.appendChild(t);
74 }
75
76 /** returns the (first) child element with the given name */
77 public static Node getChildByTagName(Node n, String name)
78 {
79
80 Node child = n.getFirstChild();
81 while (child != null)
82 {
83 if (child.getNodeName().equals(name))
84 {
85 return child;
86 }
87 child = child.getNextSibling();
88 }
89 return null; //not found
90 }
91
92 /**
93 * returns the (nth) child element with the given name index numbers start
94 * at 0
95 */
96 public static Node getChildByTagNameIndexed(Node n, String name, int index)
97 {
98 if (index == -1)
99 {
100 return getChildByTagName(n, name);
101 }
102 int count = 0;
103 Node child = n.getFirstChild();
104 while (child != null)
105 {
106 if (child.getNodeName().equals(name))
107 {
108 if (count == index)
109 {
110 return child;
111 }
112 else
113 {
114 count++;
115 }
116 }
117 child = child.getNextSibling();
118 }
119 return null; //not found
120 }
121
122 /**
123 * returns the element parent/node_name[@attribute_name='attribute_value']
124 */
125 public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
126 {
127
128 NodeList children = parent.getChildNodes();
129 for (int i = 0; i < children.getLength(); i++)
130 {
131 Node child = children.item(i);
132 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
133 if (child.getNodeName().equals(node_name))
134 {
135 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
136 return (Element) child;
137 }
138 }
139 // not found
140 return null;
141 }
142
143 /**
144 * returns a list of elements
145 * parent/node_name[@attribute_name='attribute_value']
146 */
147 public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
148 {
149 ArrayList elements = new ArrayList();
150 NodeList children = parent.getChildNodes();
151 for (int i = 0; i < children.getLength(); i++)
152 {
153 //System.out.println("getNamedElementList");
154 Node child = children.item(i);
155 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
156 if (child.getNodeName().equals(node_name))
157 {
158 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
159 elements.add((Element) child);
160 }
161 }
162 // not found
163 if (elements.size() == 0)
164 {
165 elements = null;
166 }
167 return elements;
168 }
169
170 public static void copyAllChildren(Element to, Element from)
171 {
172
173 Document to_doc = to.getOwnerDocument();
174 Node child = from.getFirstChild();
175 while (child != null)
176 {
177 to.appendChild(to_doc.importNode(child, true));
178 child = child.getNextSibling();
179 }
180 }
181
182 /** duplicates all elements in list elements and appends to toElement */
183 public static void duplicateElementList(Document owner, Element toElement, NodeList elements, boolean with_attributes) {
184 int num_elems = elements.getLength();
185 if (num_elems < 1)
186 {
187 return;
188 }
189 for (int i = 0; i < num_elems; i++)
190 {
191 Element to_element = XMLTools.duplicateElement(owner, (Element) elements.item(i), with_attributes);
192 toElement.appendChild(to_element);
193 }
194
195 }
196 /** Duplicates an element */
197 public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
198 {
199 return duplicateElementNS(owner, element, null, with_attributes);
200 }
201
202 /** Duplicates an element */
203 public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
204 {
205 Element duplicate;
206 if (namespace_uri == null)
207 {
208 duplicate = owner.createElement(element.getTagName());
209 }
210 else
211 {
212 duplicate = owner.createElementNS(namespace_uri, element.getTagName());
213 }
214 // Copy element attributes
215 if (with_attributes)
216 {
217 NamedNodeMap attributes = element.getAttributes();
218 for (int i = 0; i < attributes.getLength(); i++)
219 {
220 Node attribute = attributes.item(i);
221 duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
222 }
223 }
224
225 // Copy element children
226 NodeList children = element.getChildNodes();
227 for (int i = 0; i < children.getLength(); i++)
228 {
229 Node child = children.item(i);
230 duplicate.appendChild(owner.importNode(child, true));
231 }
232
233 return duplicate;
234 }
235
236 /** Remove all of the child nodes from a certain node. */
237 static final public void clear(Node node)
238 {
239 while (node.hasChildNodes())
240 {
241 node.removeChild(node.getFirstChild());
242 }
243 }
244
245 static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
246 {
247 ArrayList child_elements = new ArrayList();
248
249 NodeList children_nodelist = parent_element.getChildNodes();
250 for (int i = 0; i < children_nodelist.getLength(); i++)
251 {
252 Node child_node = children_nodelist.item(i);
253 if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
254 {
255 child_elements.add(child_node);
256 }
257 }
258
259 return child_elements;
260 }
261
262 static public String getElementTextValue(Element element)
263 {
264 // Find the first text node child
265 NodeList children_nodelist = element.getChildNodes();
266 for (int i = 0; i < children_nodelist.getLength(); i++)
267 {
268 Node child_node = children_nodelist.item(i);
269 if (child_node.getNodeType() == Node.TEXT_NODE)
270 {
271 return child_node.getNodeValue();
272 }
273 }
274
275 // None found
276 return "";
277 }
278
279 /**
280 * Method to retrieve the value of a given node.
281 *
282 * @param element
283 * The <strong>Element</strong> whose value we wish to find. Soon
284 * to be deprecated!
285 */
286 static final public String getValue(Node element)
287 {
288 if (element == null)
289 {
290 return "";
291 }
292 // If we've been given a subject node first retrieve its value node.
293 if (element.getNodeName().equals("Subject"))
294 {
295 element = getNodeFromNamed(element, "Value");
296 }
297 // If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
298 if (element != null && element.hasChildNodes())
299 {
300 StringBuffer text_buffer = new StringBuffer();
301 NodeList text_nodes = element.getChildNodes();
302 for (int i = 0; i < text_nodes.getLength(); i++)
303 {
304 Node possible_text = text_nodes.item(i);
305 if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
306 {
307 text_buffer.append(possible_text.getNodeValue());
308 }
309 }
310 return text_buffer.toString();
311 }
312 return "";
313 }
314
315 /**
316 * Method to retrieve from the node given, a certain child node with the
317 * specified name.
318 *
319 * @param parent
320 * The <strong>Node</strong> whose children should be searched.
321 * @param name
322 * The required nodes name as a <strong>String</strong>.
323 * @return The requested <strong>Node</strong> if it is found, <i>null</i>
324 * otherwise. Soon to be deprecated!
325 */
326 static final public Node getNodeFromNamed(Node parent, String name)
327 {
328 Node child = null;
329 for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
330 {
331 if (i.getNodeName().equals(name))
332 {
333 child = i;
334 }
335 }
336 return child;
337 }
338
339 static final public String WELLFORMED = "well-formed !";
340 static final public String NOTWELLFORMED = "not well-formed";
341 static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:gslib='http://www.greenstone.org/skinning' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
342 static final private String FOOTER = "</collectionConfig>";
343
344
345 public static Document getDOM(String xml_str)
346 {
347 Document doc = null;
348 try {
349
350 DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
351 InputSource is = new InputSource();
352 is.setCharacterStream(new StringReader(xml_str));
353 doc = db.parse(is);
354
355 } catch (Exception e) {
356 e.printStackTrace();
357 }
358 return doc;
359 }
360
361 public static String parse(String xml_str)
362 {
363 String validation_msg = WELLFORMED;
364 xml_str = HEADER + xml_str + FOOTER;
365 try
366 {
367 SAXParserFactory factory = SAXParserFactory.newInstance();
368 factory.setNamespaceAware(true);
369 //factory.setValidating (true);
370 SAXParser parser = factory.newSAXParser();
371 InputSource iSource = new InputSource(new StringReader(xml_str));
372 // parser.parse (iSource, new DefaultHandler ());
373
374 org.xml.sax.XMLReader reader = parser.getXMLReader();
375 reader.setContentHandler(new DefaultHandler());
376 reader.setErrorHandler(new DefaultHandler());
377 reader.parse(iSource);
378 }
379 catch (FactoryConfigurationError e)
380 {
381 validation_msg = "unable to get a document builder factory";
382 }
383 catch (ParserConfigurationException e)
384 {
385 validation_msg = "unable to configure parser";
386 }
387 catch (SAXParseException e)
388 {
389 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
390 }
391 catch (SAXException e)
392 {
393 validation_msg += " Fatal error: " + e.toString();
394 }
395 catch (IOException e)
396 {
397 validation_msg = "Unable to read the input, i/o error";
398 }
399
400 return validation_msg;
401 }
402
403 //In this method, the parsed string xml_str is not wrapped by the header and footer strings.
404 public static String parseDOM(String xml_str)
405 {
406 String validation_msg = WELLFORMED;
407
408 try
409 {
410 SAXParserFactory factory = SAXParserFactory.newInstance();
411 factory.setNamespaceAware(true);
412 //factory.setValidating (true);
413 SAXParser parser = factory.newSAXParser();
414 InputSource iSource = new InputSource(new StringReader(xml_str));
415 // parser.parse (iSource, new DefaultHandler ());
416
417 org.xml.sax.XMLReader reader = parser.getXMLReader();
418 reader.setContentHandler(new DefaultHandler());
419 reader.setErrorHandler(new DefaultHandler());
420 reader.parse(iSource);
421 }
422 catch (FactoryConfigurationError e)
423 {
424 validation_msg = "unable to get a document builder factory";
425 }
426 catch (ParserConfigurationException e)
427 {
428 validation_msg = "unable to configure parser";
429 }
430 catch (SAXParseException e)
431 {
432 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
433 }
434 catch (SAXException e)
435 {
436 validation_msg += " " + e.toString();
437 }
438 catch (IOException e)
439 {
440 validation_msg = "Unable to read the input, i/o error";
441 }
442
443 return validation_msg;
444 }
445
446 public static String parse(File xml_file)
447 {
448 String validation_msg = WELLFORMED;
449
450 try
451 {
452 SAXParserFactory factory = SAXParserFactory.newInstance();
453 factory.setNamespaceAware(true);
454 //factory.setValidating (true);
455 SAXParser parser = factory.newSAXParser();
456 FileReader r = new FileReader(xml_file);
457 InputSource iSource = new InputSource(r);
458 XMLReader reader = parser.getXMLReader();
459 reader.setContentHandler(new DefaultHandler());
460 reader.setErrorHandler(new DefaultHandler());
461 reader.parse(iSource);
462 }
463 catch (FactoryConfigurationError e)
464 {
465 validation_msg = "unable to get a document builder factory";
466 }
467 catch (ParserConfigurationException e)
468 {
469 validation_msg = "unable to configure parser";
470 }
471 catch (SAXParseException e)
472 {
473 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
474 }
475 catch (SAXException e)
476 {
477 validation_msg += " Fatal error: " + e.toString();
478 }
479 catch (IOException e)
480 {
481 validation_msg = "Unable to read the input, i/o error";
482 }
483
484 return validation_msg;
485 }
486
487 /** Returns a string of the location. */
488 private static String getLocationString(SAXParseException ex)
489 {
490 StringBuffer str = new StringBuffer();
491
492 String systemId = ex.getSystemId();
493 if (systemId != null)
494 {
495 int index = systemId.lastIndexOf('/');
496 if (index != -1)
497 systemId = systemId.substring(index + 1);
498 str.append(systemId);
499 }
500 str.append("(line ");
501 str.append(ex.getLineNumber() - 1);
502 str.append(", column ");
503 str.append(ex.getColumnNumber());
504 str.append("): ");
505
506 return str.toString();
507
508 } // getLocationString(SAXParseException):String
509
510 /** Parse an XML document from a given file path */
511 static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
512 {
513 if (use_class_loader == true)
514 {
515 InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
516 if (is != null)
517 {
518 return parseXML(is);
519 }
520 }
521
522 // Try the file outside the classes directory
523 return parseXMLFile(new File(xml_file_path));
524 }
525
526 /** Parse an XML document from a given file */
527 static public Document parseXMLFile(File xml_file)
528 {
529 // No file? No point trying!
530 if (xml_file.exists() == false)
531 {
532 // System.err.println("@@@ file " + xml_file + " does not exist.");
533 return null;
534 }
535
536 try
537 {
538 return parseXML(new FileInputStream(xml_file));
539 }
540 catch (Exception exception)
541 {
542 DebugStream.printStackTrace(exception);
543 return null;
544 }
545 }
546
547 /** Parse an XML document from a given input stream */
548 static public Document parseXML(InputStream xml_input_stream)
549 {
550 Document document = null;
551
552 try
553 {
554 InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
555 document = parseXML(isr);
556 isr.close();
557 xml_input_stream.close();
558 }
559 catch (Exception exception)
560 {
561 DebugStream.printStackTrace(exception);
562 }
563
564 return document;
565 }
566
567 /** Parse an XML document from a given reader */
568 static public Document parseXML(Reader xml_reader)
569 {
570 Document document = null;
571
572 // If debugging, the following will store the XML contents to be parsed,
573 // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
574 String xmlContents = "";
575
576 try
577 {
578 Reader reader = null;
579
580 // (1) By default, GLI will remove any contents preceeding (and invalidating)
581 // the XML and present these lines separately to the user
582 if (!DebugStream.isDebuggingEnabled())
583 {
584 try
585 {
586 reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
587 }
588 catch (Exception e)
589 {
590 System.err.println("Exception while wrapping the reader in parseXML(Reader)");
591 e.printStackTrace();
592 }
593 }
594
595 // (2) If we are running GLI in debug mode:
596 // In case parsing exceptions are thrown (SAX Exceptions), we want to get some
597 // idea of where things went wrong. This will print the "XML" contents to either
598 // system.out (if debugging is off) or to the DebugStream otherwise.
599 // We need to read the XML twice to know the line where things went wrong, so
600 // do the additional reading only if we're debugging
601 else
602 {
603 StringBuffer buf = new StringBuffer();
604 char[] buffer = new char[500];
605 int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
606 while (numCharsRead != -1)
607 {
608 buf.append(buffer, 0, numCharsRead);
609 numCharsRead = xml_reader.read(buffer, 0, buffer.length);
610 }
611 xmlContents = buf.toString();
612 xml_reader.close(); // closing the old Reader
613 xml_reader = null;
614 buffer = null;
615 buf = null;
616 // we need a Reader to parse the same contents as the Reader that was just closed
617 reader = new BufferedReader(new StringReader(xmlContents));
618 //System.err.println("xmlContents:\n" + xmlContents);
619 }
620
621 // (2) The actual XML parsing
622 InputSource isc = new InputSource(reader);
623 DOMParser parser = new DOMParser();
624 parser.setFeature("http://xml.org/sax/features/validation", false);
625 parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
626 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
627 parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
628 parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
629 parser.setEntityResolver(new GLIEntityResolver());
630 parser.parse(isc);
631 document = parser.getDocument();
632
633 }
634 catch (SAXParseException e)
635 {
636 showXMLParseFailureLine(e, xmlContents);
637 }
638 catch (SAXException exception)
639 {
640 System.err.println("SAX exception: " + exception.getMessage());
641 if (DebugStream.isDebuggingEnabled())
642 {
643 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
644 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
645 DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
646 System.exit(-1);
647 }
648 // else, not running in debug mode, so don't exit after exception
649 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
650 DebugStream.printStackTrace(exception);
651 }
652 catch (Exception exception)
653 {
654 DebugStream.printStackTrace(exception);
655 }
656
657 return document;
658 }
659
660 /**
661 * Displays the line (string) where the SAXParseException occurred, given a
662 * String of the entire xml that was being parsed and the SAXParseException
663 * object that was caught. The messages are printed to DebugStream, so run
664 * GLI/FLI with -debug to view this output.
665 *
666 * @param xmlContents
667 * is the entire xml that was being parsed when the exception
668 * occurred
669 * @param e
670 * is the SAXParseException object that was thrown upon parsing
671 * the xmlContents.
672 */
673 public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
674 {
675
676 // There should be no characters at all that preceed the <?xml>... bit.
677 // The first check is for starting spaces:
678 if (xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t"))
679 {
680 DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
681 DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
682 return; // nothing more to do, first error identified
683 }
684
685 // the actual line (String literal) where parsing failed and the SAXParseException occurred.
686 String line = "";
687 int linenumber = e.getLineNumber();
688 DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
689 if (DebugStream.isDebuggingEnabled())
690 {
691 if (linenumber != -1)
692 {
693 String[] lines = xmlContents.split("\n");
694 if (lines.length > 0)
695 {
696 DebugStream.println(" (number of lines: " + lines.length + ")");
697 if (lines.length >= linenumber)
698 {
699 line = lines[linenumber - 1];
700 }
701 else
702 { // error is past the last line
703 line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
704 }
705 }
706 else
707 {
708 DebugStream.print("\n");
709 }
710 lines = null;
711
712 DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
713 DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
714
715 // Uncomment if you want to print out the entire contents of the XML doc:
716 //DebugStream.println("\n\nThis was the XML:\n*********START\n"
717 // + xmlContents + "\n************END\n");
718 }
719 else
720 { // no particular line number, print out all the xml so debugger can inspect it
721 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
722 }
723 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
724 DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
725 System.exit(-1);
726 }
727 else
728 { // not running in debug mode
729 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
730 }
731 }
732
733 static public StringBuffer readXMLStream(InputStream input_stream)
734 {
735 StringBuffer xml = new StringBuffer("");
736 try {
737 InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
738 xml = XMLTools.readXMLStream(new InputStreamReader(input_stream, "UTF-8"));
739 } catch (UnsupportedEncodingException error) {
740 System.err.println("Failed when trying to parse XML stream");
741 error.printStackTrace();
742 }
743
744 return xml;
745 }
746
747 static public StringBuffer readXMLStream(String s) {
748 return XMLTools.readXMLStream(new StringReader(s));
749 }
750
751
752 static public StringBuffer readXMLStream(Reader reader)
753 {
754 StringBuffer xml = new StringBuffer("");
755
756 try
757 {
758 BufferedReader buffered_in = new BufferedReader(reader);
759
760 String line = "";
761 boolean xml_content = false;
762 while ((line = buffered_in.readLine()) != null)
763 {
764 if (xml_content)
765 {
766 xml.append(line);
767 xml.append("\n");
768 }
769 else if (line.trim().startsWith("<?xml"))
770 {
771 xml_content = true;
772 xml.append(line);
773 xml.append("\n");
774 }
775 else
776 {
777 System.err.println(line);
778 }
779 }
780 buffered_in = null;
781 }
782 catch (Exception error)
783 {
784 System.err.println("Failed when trying to parse XML stream");
785 error.printStackTrace();
786 }
787
788 return xml;
789 }
790
791 /**
792 * Removes characters that are invalid in XML (see
793 * http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
794 */
795 static public String removeInvalidCharacters(String text)
796 {
797 char[] safe_characters = new char[text.length()];
798 int j = 0;
799
800 char[] raw_characters = new char[text.length()];
801 text.getChars(0, text.length(), raw_characters, 0);
802 for (int i = 0; i < raw_characters.length; i++)
803 {
804 char character = raw_characters[i];
805 if ((character >= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF))
806 {
807 safe_characters[j] = character;
808 j++;
809 }
810 }
811
812 return new String(safe_characters, 0, j);
813 }
814
815 static public void setElementTextValue(Element element, String text)
816 {
817 // Remove all text node children
818 NodeList children_nodelist = element.getChildNodes();
819 for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
820 {
821 Node child_node = children_nodelist.item(i);
822 if (child_node.getNodeType() == Node.TEXT_NODE)
823 {
824 element.removeChild(child_node);
825 }
826 }
827
828 // Add a new text node
829 if (text != null)
830 {
831 element.appendChild(element.getOwnerDocument().createTextNode(text));
832 }
833 }
834
835 /**
836 * Set the #text node value of some element.
837 *
838 * @param element
839 * the Element whose value we wish to set
840 * @param value
841 * the new value for the element as a String Soon to be
842 * deprecated!
843 */
844 static final public void setValue(Element element, String value)
845 {
846 // Remove any existing child node(s)
847 clear(element);
848 // Add new text node.
849 if (value != null)
850 {
851 element.appendChild(element.getOwnerDocument().createTextNode(value));
852 }
853 }
854
855 static public void indentXML(Element elem, int depth)
856 {
857 Document doc = elem.getOwnerDocument();
858
859 String startIndentString = "\n";
860 for (int i = 0; i < depth; i++)
861 {
862 startIndentString += "\t";
863 }
864 Node startTextNode = doc.createTextNode(startIndentString);
865
866 String endIndentString = "\n";
867 for (int i = 0; i < depth - 1; i++)
868 {
869 endIndentString += "\t";
870 }
871 Node endTextNode = doc.createTextNode(endIndentString);
872
873 boolean found = false;
874 Node child = elem.getFirstChild();
875 while (child != null)
876 {
877 // first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
878 if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
879 {
880 Node spaceTextNode = child;
881 child = child.getNextSibling();
882 elem.removeChild(spaceTextNode);
883
884 if(child == null) break;
885 }
886
887 // now process normal element nodes as intended
888 if (child.getNodeType() == Node.ELEMENT_NODE)
889 {
890 found = true;
891 break;
892 }
893 child = child.getNextSibling();
894 }
895
896 if (found)
897 {
898 elem.appendChild(endTextNode);
899 }
900
901 child = elem.getFirstChild();
902 while (child != null)
903 {
904 // Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
905 // because the first while loop above would break out when it found an element node and wouldn't have got rid
906 // of all the empty text nodes yet.
907 // This time, beware not to delete the special end and start empty textnodes just added, since
908 // they've been created and inserted specifically.
909 if(child != endTextNode && child != startTextNode
910 && child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
911 {
912 Node spaceTextNode = child;
913 child = child.getNextSibling();
914 elem.removeChild(spaceTextNode);
915
916 if(child == null) break;
917 }
918
919 // go back to processing normal element nodes as intended
920 if (child.getNodeType() == Node.ELEMENT_NODE)
921 {
922 elem.insertBefore(startTextNode.cloneNode(false), child);
923 indentXML((Element) child, depth + 1);
924 }
925 child = child.getNextSibling();
926 }
927 }
928
929 /**
930 * Write an XML document to a given file with the text node of the specified
931 * element unescaped
932 */
933 static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
934 {
935 indentXML(document.getDocumentElement(), 1);
936 try
937 {
938 // OutputFormat() and XMLSerlalizer() have been deprecated
939 // StackOverflow suggested alternative is to use LSSerializer class from the package org.w3c.dom.ls
940 // Some example code doing this at:
941 // https://stackoverflow.com/questions/55729019/xmlserializer-outputformat-deprecated
942
943 OutputStream os = new FileOutputStream(xml_file);
944 // Create an output format for our document.
945 OutputFormat f = new OutputFormat(document);
946 f.setEncoding("UTF-8");
947 f.setIndenting(true);
948 f.setLineWidth(0); // Why isn't this working!
949 f.setPreserveSpace(true);
950 if (nonEscapingTagNames != null)
951 {
952 f.setNonEscapingElements(nonEscapingTagNames);
953 }
954 // Create the necessary writer stream for serialization.
955 OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
956 Writer w = new BufferedWriter(osw);
957 // Generate a new serializer from the above.
958 XMLSerializer s = new XMLSerializer(w, f);
959 s.asDOMSerializer();
960 // Finally serialize the document to file.
961 s.serialize(document);
962 // And close.
963 os.close();
964
965 // Useful for debugging unescaped chars that may otherwise break stuff
966 // w = new StringWriter();
967 // // Generate a new serializer from the above.
968 // s = new XMLSerializer(w, f);
969 // s.asDOMSerializer();
970 // // Finally serialize the document to file.
971 // s.serialize(document);
972
973 // System.err.println("@@@@ XMLTools: wrote out to doc:\n" + w.toString());
974
975 }
976 catch (Exception exception)
977 {
978 DebugStream.printStackTrace(exception);
979 }
980 }
981
982 /** Write an XML document to a given file */
983 static public void writeXMLFile(File xml_file, Document document)
984 {
985 writeXMLFile(xml_file, document, null);
986 }
987
988 public static void printXMLNode(Node e)
989 {
990 printXMLNode(e, 0);
991 }
992
993 public static void printXMLNode(Node e, int depth)
994 { //recursive method call using DOM API...
995
996 for (int i = 0; i < depth; i++)
997 System.out.print(' ');
998
999 if (e.getNodeType() == Node.TEXT_NODE)
1000 {
1001 //System.out.println("text") ;
1002 if (e.getNodeValue() != "")
1003 {
1004 System.out.println(e.getNodeValue());
1005 }
1006 return;
1007 }
1008
1009 System.out.print('<');
1010 System.out.print(e.getNodeName());
1011 NamedNodeMap attrs = e.getAttributes();
1012 if (attrs != null)
1013 {
1014 for (int i = 0; i < attrs.getLength(); i++)
1015 {
1016 Node attr = attrs.item(i);
1017 System.out.print(' ');
1018 System.out.print(attr.getNodeName());
1019 System.out.print("=\"");
1020 System.out.print(attr.getNodeValue());
1021 System.out.print('"');
1022 }
1023 }
1024 NodeList children = e.getChildNodes();
1025
1026 if (children == null || children.getLength() == 0)
1027 System.out.println("/>");
1028 else
1029 {
1030
1031 System.out.println('>');
1032
1033 int len = children.getLength();
1034 for (int i = 0; i < len; i++)
1035 {
1036 printXMLNode(children.item(i), depth + 1);
1037 }
1038
1039 for (int i = 0; i < depth; i++)
1040 System.out.print(' ');
1041
1042 System.out.println("</" + e.getNodeName() + ">");
1043 }
1044
1045 }
1046
1047 public static String xmlNodeToString(Node e)
1048 {
1049 StringBuffer sb = new StringBuffer("");
1050 xmlNodeToString(sb, e, true, "\t", 2);
1051 return sb.toString();
1052 }
1053
1054 public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
1055 {
1056
1057 if (e.getNodeType() == Node.CDATA_SECTION_NODE)
1058 {
1059 if (e.getNodeValue() != "")
1060 {
1061 String text = e.getNodeValue();
1062 sb.append("<![CDATA[");
1063 sb.append(text);
1064 sb.append("]]>");
1065 }
1066 return;
1067 }
1068
1069 if (e.getNodeType() == Node.TEXT_NODE)
1070 {
1071 if (e.getNodeValue() != "")
1072 {
1073 String text = e.getNodeValue();
1074 text = text.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("[\\n\\r\\t\\s]*$", "");
1075 for (Character c : text.toCharArray())
1076 {
1077 if (c.equals('\n'))
1078 {
1079 text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
1080 break;
1081 }
1082
1083 if (!Character.isWhitespace(c))
1084 {
1085 break;
1086 }
1087 }
1088 sb.append(text);
1089 }
1090 return;
1091 }
1092
1093 if (e.getNodeType() == Node.COMMENT_NODE)
1094 {
1095 if (e.getNodeValue() != "")
1096 {
1097 sb.append("<!--\n" + e.getNodeValue().trim() + "\n-->\n");
1098 }
1099 return;
1100 }
1101
1102 if (indent)
1103 {
1104 for (int i = 0; i < depth; i++)
1105 {
1106 sb.append(indentString);
1107 }
1108 }
1109
1110 sb.append('<');
1111 sb.append(e.getNodeName());
1112 NamedNodeMap attrs = e.getAttributes();
1113 if (attrs != null)
1114 {
1115 for (int i = 0; i < attrs.getLength(); i++)
1116 {
1117 Node attr = attrs.item(i);
1118 sb.append(' ');
1119 sb.append(attr.getNodeName());
1120 sb.append("=\"");
1121 sb.append(attr.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;"));
1122 sb.append('"');
1123 }
1124 }
1125 NodeList children = e.getChildNodes();
1126
1127 boolean hasElements = false;
1128 boolean indentSwapped = false;
1129 for (int i = 0; i < children.getLength(); i++)
1130 {
1131 if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1132 {
1133 hasElements = true;
1134 }
1135 if ((children.item(i).getNodeType() == Node.TEXT_NODE || children.item(i).getNodeType() == Node.CDATA_SECTION_NODE) && indent)
1136 {
1137 if (children.item(i).getNodeValue().trim().length() > 0)
1138 {
1139 indentSwapped = true;
1140 indent = false;
1141 }
1142 }
1143 }
1144
1145 if (children == null || children.getLength() == 0)
1146 {
1147 sb.append("/>");
1148
1149 if (indent)
1150 {
1151 sb.append("\n");
1152 }
1153 }
1154 else
1155 {
1156 sb.append(">");
1157 if (hasElements && indent)
1158 {
1159 sb.append("\n");
1160 }
1161
1162 int len = children.getLength();
1163 for (int i = 0; i < len; i++)
1164 {
1165 xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1166 }
1167
1168 if (indent)
1169 {
1170 for (int i = 0; i < depth; i++)
1171 {
1172 sb.append(indentString);
1173 }
1174 }
1175
1176 sb.append("</" + e.getNodeName() + ">");
1177
1178 if ((hasElements && indent) || indentSwapped)
1179 {
1180 sb.append("\n");
1181 }
1182 }
1183 }
1184
1185 public static String xmlNodeToStringWithoutIndenting(Node e)
1186 {
1187 StringBuffer sb = new StringBuffer("");
1188 xmlNodeToStringWithoutNewline(sb, e, -1);
1189 return sb.toString();
1190 }
1191
1192 public static String xmlNodeToStringWithoutNewline(Node e)
1193 {
1194 StringBuffer sb = new StringBuffer("");
1195 xmlNodeToStringWithoutNewline(sb, e, 0);
1196 return sb.toString();
1197 }
1198
1199 private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1200 {
1201
1202 for (int i = 0; i < depth; i++)
1203 {
1204 sb.append(' ');
1205 }
1206
1207 if (e.getNodeType() == Node.TEXT_NODE)
1208 {
1209 if (e.getNodeValue() != "")
1210 {
1211 sb.append(e.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replace(">", "&gt;"));
1212 }
1213 return;
1214 }
1215
1216 if (e.getNodeType() == Node.COMMENT_NODE)
1217 {
1218 if (e.getNodeValue() != "")
1219 {
1220 sb.append("<!--" + e.getNodeValue() + "-->");
1221 }
1222 return;
1223 }
1224
1225 sb.append('<');
1226 sb.append(e.getNodeName());
1227 NamedNodeMap attrs = e.getAttributes();
1228 if (attrs != null)
1229 {
1230 for (int i = 0; i < attrs.getLength(); i++)
1231 {
1232 Node attr = attrs.item(i);
1233 sb.append(' ');
1234 sb.append(attr.getNodeName());
1235 sb.append("=\"");
1236 sb.append(attr.getNodeValue());
1237 sb.append('"');
1238 }
1239 }
1240 NodeList children = e.getChildNodes();
1241
1242 if (children == null || children.getLength() == 0)
1243 sb.append("/>");
1244 else
1245 {
1246
1247 sb.append(">");
1248
1249 int len = children.getLength();
1250 for (int i = 0; i < len; i++)
1251 {
1252 if (depth >= 0)
1253 {
1254 xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1255 }
1256 else
1257 {
1258 xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1259 }
1260 }
1261
1262 for (int i = 0; i < depth; i++)
1263 sb.append(' ');
1264
1265 sb.append("</" + e.getNodeName() + ">");
1266 }
1267 }
1268
1269
1270
1271 // This method will convert an Element to a String too, like xmlNodeToString() above.
1272 // But for a document root element (doc.getDocumentElement()), this method will additionally
1273 // return its processing instruction line at the start (<?xml ... ?>).
1274 // This method copied into GLI from src/java/org/greenstone/gsdl3/util/GSXML.java
1275 public static String elementToString(Element e, boolean indent)
1276 {
1277 String str = "";
1278 try
1279 {
1280 TransformerFactory tf = TransformerFactory.newInstance();
1281 Transformer trans = tf.newTransformer();
1282 StringWriter sw = new StringWriter();
1283 if (indent)
1284 {
1285 trans.setOutputProperty(OutputKeys.INDENT, "yes");
1286 }
1287 else
1288 {
1289 trans.setOutputProperty(OutputKeys.INDENT, "no");
1290 }
1291 trans.transform(new DOMSource(e), new StreamResult(sw));
1292 str = sw.toString();
1293 }
1294 catch (Exception ex)
1295 {
1296 str += "Exception: couldn't write " + e + " to log";
1297 }
1298 finally
1299 {
1300 return str;
1301 }
1302 }
1303}
Note: See TracBrowser for help on using the repository browser.