source: main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 30705

Last change on this file since 30705 was 30705, checked in by ak19, 8 years ago

Merely tidying up.

  • Property svn:keywords set to Author Date Id Revision
File size: 33.4 KB
Line 
1package org.greenstone.gatherer.util;
2
3import java.io.*;
4import java.net.*;
5import java.util.*;
6import org.apache.xerces.parsers.*;
7import org.apache.xml.serialize.*;
8import org.greenstone.gatherer.DebugStream;
9import org.w3c.dom.*;
10import org.xml.sax.*;
11
12import java.io.FileReader;
13import java.io.IOException;
14import java.io.StringReader;
15import java.io.StringWriter; // for elementToString()
16
17// SAX
18import org.xml.sax.XMLReader;
19import org.xml.sax.SAXException;
20import org.xml.sax.SAXParseException;
21import org.xml.sax.helpers.DefaultHandler;
22import org.xml.sax.InputSource;
23
24// JAXP
25import javax.xml.parsers.DocumentBuilder;
26import javax.xml.parsers.DocumentBuilderFactory;
27import javax.xml.parsers.FactoryConfigurationError;
28import javax.xml.parsers.ParserConfigurationException;
29import javax.xml.parsers.SAXParser;
30import javax.xml.parsers.SAXParserFactory;
31// for elementToString():
32import javax.xml.transform.OutputKeys;
33import javax.xml.transform.Transformer;
34import javax.xml.transform.TransformerFactory;
35import javax.xml.transform.dom.DOMSource;
36import javax.xml.transform.stream.StreamResult;
37
38
39/** This class is a static class containing useful XML functions */
40public class XMLTools
41{
42 /** extracts the text out of a node */
43 public static Node getNodeTextNode(Element param)
44 {
45 param.normalize();
46 Node n = param.getFirstChild();
47 while (n != null && n.getNodeType() != Node.TEXT_NODE)
48 {
49 n = n.getNextSibling();
50 }
51 return n;
52 }
53
54 /** extracts the text out of a node */
55 public static String getNodeText(Element param)
56 {
57 Node text_node = getNodeTextNode(param);
58 if (text_node == null)
59 {
60 return "";
61 }
62 return text_node.getNodeValue();
63 }
64
65 public static void setNodeText(Element elem, String text)
66 {
67 Node old_text_node = getNodeTextNode(elem);
68 if (old_text_node != null)
69 {
70 elem.removeChild(old_text_node);
71 }
72 Text t = elem.getOwnerDocument().createTextNode(text);
73 elem.appendChild(t);
74 }
75
76 /** returns the (first) child element with the given name */
77 public static Node getChildByTagName(Node n, String name)
78 {
79
80 Node child = n.getFirstChild();
81 while (child != null)
82 {
83 if (child.getNodeName().equals(name))
84 {
85 return child;
86 }
87 child = child.getNextSibling();
88 }
89 return null; //not found
90 }
91
92 /**
93 * returns the (nth) child element with the given name index numbers start
94 * at 0
95 */
96 public static Node getChildByTagNameIndexed(Node n, String name, int index)
97 {
98 if (index == -1)
99 {
100 return getChildByTagName(n, name);
101 }
102 int count = 0;
103 Node child = n.getFirstChild();
104 while (child != null)
105 {
106 if (child.getNodeName().equals(name))
107 {
108 if (count == index)
109 {
110 return child;
111 }
112 else
113 {
114 count++;
115 }
116 }
117 child = child.getNextSibling();
118 }
119 return null; //not found
120 }
121
122 /**
123 * returns the element parent/node_name[@attribute_name='attribute_value']
124 */
125 public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
126 {
127
128 NodeList children = parent.getChildNodes();
129 for (int i = 0; i < children.getLength(); i++)
130 {
131 Node child = children.item(i);
132 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
133 if (child.getNodeName().equals(node_name))
134 {
135 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
136 return (Element) child;
137 }
138 }
139 // not found
140 return null;
141 }
142
143 /**
144 * returns a list of elements
145 * parent/node_name[@attribute_name='attribute_value']
146 */
147 public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
148 {
149 ArrayList elements = new ArrayList();
150 NodeList children = parent.getChildNodes();
151 for (int i = 0; i < children.getLength(); i++)
152 {
153 //System.out.println("getNamedElementList");
154 Node child = children.item(i);
155 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
156 if (child.getNodeName().equals(node_name))
157 {
158 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
159 elements.add((Element) child);
160 }
161 }
162 // not found
163 if (elements.size() == 0)
164 {
165 elements = null;
166 }
167 return elements;
168 }
169
170 public static void copyAllChildren(Element to, Element from)
171 {
172
173 Document to_doc = to.getOwnerDocument();
174 Node child = from.getFirstChild();
175 while (child != null)
176 {
177 to.appendChild(to_doc.importNode(child, true));
178 child = child.getNextSibling();
179 }
180 }
181
182 /** duplicates all elements in list elements and appends to toElement */
183 public static void duplicateElementList(Document owner, Element toElement, NodeList elements, boolean with_attributes) {
184 int num_elems = elements.getLength();
185 if (num_elems < 1)
186 {
187 return;
188 }
189 for (int i = 0; i < num_elems; i++)
190 {
191 Element to_element = XMLTools.duplicateElement(owner, (Element) elements.item(i), with_attributes);
192 toElement.appendChild(to_element);
193 }
194
195 }
196 /** Duplicates an element */
197 public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
198 {
199 return duplicateElementNS(owner, element, null, with_attributes);
200 }
201
202 /** Duplicates an element */
203 public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
204 {
205 Element duplicate;
206 if (namespace_uri == null)
207 {
208 duplicate = owner.createElement(element.getTagName());
209 }
210 else
211 {
212 duplicate = owner.createElementNS(namespace_uri, element.getTagName());
213 }
214 // Copy element attributes
215 if (with_attributes)
216 {
217 NamedNodeMap attributes = element.getAttributes();
218 for (int i = 0; i < attributes.getLength(); i++)
219 {
220 Node attribute = attributes.item(i);
221 duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
222 }
223 }
224
225 // Copy element children
226 NodeList children = element.getChildNodes();
227 for (int i = 0; i < children.getLength(); i++)
228 {
229 Node child = children.item(i);
230 duplicate.appendChild(owner.importNode(child, true));
231 }
232
233 return duplicate;
234 }
235
236 /** Remove all of the child nodes from a certain node. */
237 static final public void clear(Node node)
238 {
239 while (node.hasChildNodes())
240 {
241 node.removeChild(node.getFirstChild());
242 }
243 }
244
245 static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
246 {
247 ArrayList child_elements = new ArrayList();
248
249 NodeList children_nodelist = parent_element.getChildNodes();
250 for (int i = 0; i < children_nodelist.getLength(); i++)
251 {
252 Node child_node = children_nodelist.item(i);
253 if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
254 {
255 child_elements.add(child_node);
256 }
257 }
258
259 return child_elements;
260 }
261
262 static public String getElementTextValue(Element element)
263 {
264 // Find the first text node child
265 NodeList children_nodelist = element.getChildNodes();
266 for (int i = 0; i < children_nodelist.getLength(); i++)
267 {
268 Node child_node = children_nodelist.item(i);
269 if (child_node.getNodeType() == Node.TEXT_NODE)
270 {
271 return child_node.getNodeValue();
272 }
273 }
274
275 // None found
276 return "";
277 }
278
279 /**
280 * Method to retrieve the value of a given node.
281 *
282 * @param element
283 * The <strong>Element</strong> whose value we wish to find. Soon
284 * to be deprecated!
285 */
286 static final public String getValue(Node element)
287 {
288 if (element == null)
289 {
290 return "";
291 }
292 // If we've been given a subject node first retrieve its value node.
293 if (element.getNodeName().equals("Subject"))
294 {
295 element = getNodeFromNamed(element, "Value");
296 }
297 // If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
298 if (element != null && element.hasChildNodes())
299 {
300 StringBuffer text_buffer = new StringBuffer();
301 NodeList text_nodes = element.getChildNodes();
302 for (int i = 0; i < text_nodes.getLength(); i++)
303 {
304 Node possible_text = text_nodes.item(i);
305 if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
306 {
307 text_buffer.append(possible_text.getNodeValue());
308 }
309 }
310 return text_buffer.toString();
311 }
312 return "";
313 }
314
315 /**
316 * Method to retrieve from the node given, a certain child node with the
317 * specified name.
318 *
319 * @param parent
320 * The <strong>Node</strong> whose children should be searched.
321 * @param name
322 * The required nodes name as a <strong>String</strong>.
323 * @return The requested <strong>Node</strong> if it is found, <i>null</i>
324 * otherwise. Soon to be deprecated!
325 */
326 static final public Node getNodeFromNamed(Node parent, String name)
327 {
328 Node child = null;
329 for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
330 {
331 if (i.getNodeName().equals(name))
332 {
333 child = i;
334 }
335 }
336 return child;
337 }
338
339 static final public String WELLFORMED = "well-formed !";
340 static final public String NOTWELLFORMED = "not well-formed";
341 static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
342 static final private String FOOTER = "</collectionConfig>";
343
344
345 public static Document getDOM(String xml_str)
346 {
347 Document doc = null;
348 try {
349
350 DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
351 InputSource is = new InputSource();
352 is.setCharacterStream(new StringReader(xml_str));
353 doc = db.parse(is);
354
355 } catch (Exception e) {
356 e.printStackTrace();
357 }
358 return doc;
359 }
360
361 public static String parse(String xml_str)
362 {
363 String validation_msg = WELLFORMED;
364 xml_str = HEADER + xml_str + FOOTER;
365 try
366 {
367 SAXParserFactory factory = SAXParserFactory.newInstance();
368 factory.setNamespaceAware(true);
369 //factory.setValidating (true);
370 SAXParser parser = factory.newSAXParser();
371 InputSource iSource = new InputSource(new StringReader(xml_str));
372 // parser.parse (iSource, new DefaultHandler ());
373
374 org.xml.sax.XMLReader reader = parser.getXMLReader();
375 reader.setContentHandler(new DefaultHandler());
376 reader.setErrorHandler(new DefaultHandler());
377 reader.parse(iSource);
378 }
379 catch (FactoryConfigurationError e)
380 {
381 validation_msg = "unable to get a document builder factory";
382 }
383 catch (ParserConfigurationException e)
384 {
385 validation_msg = "unable to configure parser";
386 }
387 catch (SAXParseException e)
388 {
389 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
390 }
391 catch (SAXException e)
392 {
393 validation_msg += " Fatal error: " + e.toString();
394 }
395 catch (IOException e)
396 {
397 validation_msg = "Unable to read the input, i/o error";
398 }
399
400 return validation_msg;
401 }
402
403 //In this method, the parsed string xml_str is not wrapped by the header and footer strings.
404 public static String parseDOM(String xml_str)
405 {
406 String validation_msg = WELLFORMED;
407
408 try
409 {
410 SAXParserFactory factory = SAXParserFactory.newInstance();
411 factory.setNamespaceAware(true);
412 //factory.setValidating (true);
413 SAXParser parser = factory.newSAXParser();
414 InputSource iSource = new InputSource(new StringReader(xml_str));
415 // parser.parse (iSource, new DefaultHandler ());
416
417 org.xml.sax.XMLReader reader = parser.getXMLReader();
418 reader.setContentHandler(new DefaultHandler());
419 reader.setErrorHandler(new DefaultHandler());
420 reader.parse(iSource);
421 }
422 catch (FactoryConfigurationError e)
423 {
424 validation_msg = "unable to get a document builder factory";
425 }
426 catch (ParserConfigurationException e)
427 {
428 validation_msg = "unable to configure parser";
429 }
430 catch (SAXParseException e)
431 {
432 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
433 }
434 catch (SAXException e)
435 {
436 validation_msg += " " + e.toString();
437 }
438 catch (IOException e)
439 {
440 validation_msg = "Unable to read the input, i/o error";
441 }
442
443 return validation_msg;
444 }
445
446 public static String parse(File xml_file)
447 {
448 String validation_msg = WELLFORMED;
449
450 try
451 {
452 SAXParserFactory factory = SAXParserFactory.newInstance();
453 factory.setNamespaceAware(true);
454 //factory.setValidating (true);
455 SAXParser parser = factory.newSAXParser();
456 FileReader r = new FileReader(xml_file);
457 InputSource iSource = new InputSource(r);
458 XMLReader reader = parser.getXMLReader();
459 reader.setContentHandler(new DefaultHandler());
460 reader.setErrorHandler(new DefaultHandler());
461 reader.parse(iSource);
462 }
463 catch (FactoryConfigurationError e)
464 {
465 validation_msg = "unable to get a document builder factory";
466 }
467 catch (ParserConfigurationException e)
468 {
469 validation_msg = "unable to configure parser";
470 }
471 catch (SAXParseException e)
472 {
473 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
474 }
475 catch (SAXException e)
476 {
477 validation_msg += " Fatal error: " + e.toString();
478 }
479 catch (IOException e)
480 {
481 validation_msg = "Unable to read the input, i/o error";
482 }
483
484 return validation_msg;
485 }
486
487 /** Returns a string of the location. */
488 private static String getLocationString(SAXParseException ex)
489 {
490 StringBuffer str = new StringBuffer();
491
492 String systemId = ex.getSystemId();
493 if (systemId != null)
494 {
495 int index = systemId.lastIndexOf('/');
496 if (index != -1)
497 systemId = systemId.substring(index + 1);
498 str.append(systemId);
499 }
500 str.append("(line ");
501 str.append(ex.getLineNumber() - 1);
502 str.append(", column ");
503 str.append(ex.getColumnNumber());
504 str.append("): ");
505
506 return str.toString();
507
508 } // getLocationString(SAXParseException):String
509
510 /** Parse an XML document from a given file path */
511 static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
512 {
513 if (use_class_loader == true)
514 {
515 InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
516 if (is != null)
517 {
518 return parseXML(is);
519 }
520 }
521
522 // Try the file outside the classes directory
523 return parseXMLFile(new File(xml_file_path));
524 }
525
526 /** Parse an XML document from a given file */
527 static public Document parseXMLFile(File xml_file)
528 {
529 // No file? No point trying!
530 if (xml_file.exists() == false)
531 {
532 System.err.println("@@@ file " + xml_file + " does not exist.");
533 return null;
534 }
535
536 try
537 {
538 return parseXML(new FileInputStream(xml_file));
539 }
540 catch (Exception exception)
541 {
542 DebugStream.printStackTrace(exception);
543 return null;
544 }
545 }
546
547 /** Parse an XML document from a given input stream */
548 static public Document parseXML(InputStream xml_input_stream)
549 {
550 Document document = null;
551
552 try
553 {
554 InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
555 document = parseXML(isr);
556 isr.close();
557 xml_input_stream.close();
558 }
559 catch (Exception exception)
560 {
561 DebugStream.printStackTrace(exception);
562 }
563
564 return document;
565 }
566
567 /** Parse an XML document from a given reader */
568 static public Document parseXML(Reader xml_reader)
569 {
570 Document document = null;
571
572 // If debugging, the following will store the XML contents to be parsed,
573 // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
574 String xmlContents = "";
575
576 try
577 {
578 Reader reader = null;
579
580 // (1) By default, GLI will remove any contents preceeding (and invalidating)
581 // the XML and present these lines separately to the user
582 if (!DebugStream.isDebuggingEnabled())
583 {
584 try
585 {
586 reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
587 }
588 catch (Exception e)
589 {
590 System.err.println("Exception while wrapping the reader in parseXML(Reader)");
591 e.printStackTrace();
592 }
593 }
594
595 // (2) If we are running GLI in debug mode:
596 // In case parsing exceptions are thrown (SAX Exceptions), we want to get some
597 // idea of where things went wrong. This will print the "XML" contents to either
598 // system.out (if debugging is off) or to the DebugStream otherwise.
599 // We need to read the XML twice to know the line where things went wrong, so
600 // do the additional reading only if we're debugging
601 else
602 {
603 StringBuffer buf = new StringBuffer();
604 char[] buffer = new char[500];
605 int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
606 while (numCharsRead != -1)
607 {
608 buf.append(buffer, 0, numCharsRead);
609 numCharsRead = xml_reader.read(buffer, 0, buffer.length);
610 }
611 xmlContents = buf.toString();
612 xml_reader.close(); // closing the old Reader
613 xml_reader = null;
614 buffer = null;
615 buf = null;
616 // we need a Reader to parse the same contents as the Reader that was just closed
617 reader = new BufferedReader(new StringReader(xmlContents));
618 //System.err.println("xmlContents:\n" + xmlContents);
619 }
620
621 // (2) The actual XML parsing
622 InputSource isc = new InputSource(reader);
623 DOMParser parser = new DOMParser();
624 parser.setFeature("http://xml.org/sax/features/validation", false);
625 parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
626 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
627 parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
628 parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
629 parser.setEntityResolver(new GLIEntityResolver());
630 parser.parse(isc);
631 document = parser.getDocument();
632
633 }
634 catch (SAXParseException e)
635 {
636 showXMLParseFailureLine(e, xmlContents);
637 }
638 catch (SAXException exception)
639 {
640 System.err.println("SAX exception: " + exception.getMessage());
641 if (DebugStream.isDebuggingEnabled())
642 {
643 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
644 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
645 DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
646 System.exit(-1);
647 }
648 // else, not running in debug mode, so don't exit after exception
649 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
650 DebugStream.printStackTrace(exception);
651 }
652 catch (Exception exception)
653 {
654 DebugStream.printStackTrace(exception);
655 }
656
657 return document;
658 }
659
660 /**
661 * Displays the line (string) where the SAXParseException occurred, given a
662 * String of the entire xml that was being parsed and the SAXParseException
663 * object that was caught. The messages are printed to DebugStream, so run
664 * GLI/FLI with -debug to view this output.
665 *
666 * @param xmlContents
667 * is the entire xml that was being parsed when the exception
668 * occurred
669 * @param e
670 * is the SAXParseException object that was thrown upon parsing
671 * the xmlContents.
672 */
673 public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
674 {
675
676 // There should be no characters at all that preceed the <?xml>... bit.
677 // The first check is for starting spaces:
678 if (xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t"))
679 {
680 DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
681 DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
682 return; // nothing more to do, first error identified
683 }
684
685 // the actual line (String literal) where parsing failed and the SAXParseException occurred.
686 String line = "";
687 int linenumber = e.getLineNumber();
688 DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
689 if (DebugStream.isDebuggingEnabled())
690 {
691 if (linenumber != -1)
692 {
693 String[] lines = xmlContents.split("\n");
694 if (lines.length > 0)
695 {
696 DebugStream.println(" (number of lines: " + lines.length + ")");
697 if (lines.length >= linenumber)
698 {
699 line = lines[linenumber - 1];
700 }
701 else
702 { // error is past the last line
703 line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
704 }
705 }
706 else
707 {
708 DebugStream.print("\n");
709 }
710 lines = null;
711
712 DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
713 DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
714
715 // Uncomment if you want to print out the entire contents of the XML doc:
716 //DebugStream.println("\n\nThis was the XML:\n*********START\n"
717 // + xmlContents + "\n************END\n");
718 }
719 else
720 { // no particular line number, print out all the xml so debugger can inspect it
721 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
722 }
723 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
724 DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
725 System.exit(-1);
726 }
727 else
728 { // not running in debug mode
729 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
730 }
731 }
732
733 static public StringBuffer readXMLStream(InputStream input_stream)
734 {
735 StringBuffer xml = new StringBuffer("");
736
737 try
738 {
739 InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
740 BufferedReader buffered_in = new BufferedReader(isr);
741
742 String line = "";
743 boolean xml_content = false;
744 while ((line = buffered_in.readLine()) != null)
745 {
746 if (xml_content)
747 {
748 xml.append(line);
749 xml.append("\n");
750 }
751 else if (line.trim().startsWith("<?xml"))
752 {
753 xml_content = true;
754 xml.append(line);
755 xml.append("\n");
756 }
757 else
758 {
759 System.err.println(line);
760 }
761 }
762 buffered_in = null;
763 }
764 catch (Exception error)
765 {
766 System.err.println("Failed when trying to parse XML stream");
767 error.printStackTrace();
768 }
769
770 return xml;
771 }
772
773 /**
774 * Removes characters that are invalid in XML (see
775 * http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
776 */
777 static public String removeInvalidCharacters(String text)
778 {
779 char[] safe_characters = new char[text.length()];
780 int j = 0;
781
782 char[] raw_characters = new char[text.length()];
783 text.getChars(0, text.length(), raw_characters, 0);
784 for (int i = 0; i < raw_characters.length; i++)
785 {
786 char character = raw_characters[i];
787 if ((character >= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF))
788 {
789 safe_characters[j] = character;
790 j++;
791 }
792 }
793
794 return new String(safe_characters, 0, j);
795 }
796
797 static public void setElementTextValue(Element element, String text)
798 {
799 // Remove all text node children
800 NodeList children_nodelist = element.getChildNodes();
801 for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
802 {
803 Node child_node = children_nodelist.item(i);
804 if (child_node.getNodeType() == Node.TEXT_NODE)
805 {
806 element.removeChild(child_node);
807 }
808 }
809
810 // Add a new text node
811 if (text != null)
812 {
813 element.appendChild(element.getOwnerDocument().createTextNode(text));
814 }
815 }
816
817 /**
818 * Set the #text node value of some element.
819 *
820 * @param element
821 * the Element whose value we wish to set
822 * @param value
823 * the new value for the element as a String Soon to be
824 * deprecated!
825 */
826 static final public void setValue(Element element, String value)
827 {
828 // Remove any existing child node(s)
829 clear(element);
830 // Add new text node.
831 if (value != null)
832 {
833 element.appendChild(element.getOwnerDocument().createTextNode(value));
834 }
835 }
836
837 static public void indentXML(Element elem, int depth)
838 {
839 Document doc = elem.getOwnerDocument();
840
841 String startIndentString = "\n";
842 for (int i = 0; i < depth; i++)
843 {
844 startIndentString += "\t";
845 }
846 Node startTextNode = doc.createTextNode(startIndentString);
847
848 String endIndentString = "\n";
849 for (int i = 0; i < depth - 1; i++)
850 {
851 endIndentString += "\t";
852 }
853 Node endTextNode = doc.createTextNode(endIndentString);
854
855 boolean found = false;
856 Node child = elem.getFirstChild();
857 while (child != null)
858 {
859 // first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
860 if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
861 {
862 Node spaceTextNode = child;
863 child = child.getNextSibling();
864 elem.removeChild(spaceTextNode);
865
866 if(child == null) break;
867 }
868
869 // now process normal element nodes as intended
870 if (child.getNodeType() == Node.ELEMENT_NODE)
871 {
872 found = true;
873 break;
874 }
875 child = child.getNextSibling();
876 }
877
878 if (found)
879 {
880 elem.appendChild(endTextNode);
881 }
882
883 child = elem.getFirstChild();
884 while (child != null)
885 {
886 // Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
887 // because the first while loop above would break out when it found an element node and wouldn't have got rid
888 // of all the empty text nodes yet.
889 // This time, beware not to delete the special end and start empty textnodes just added, since
890 // they've been created and inserted specifically.
891 if(child != endTextNode && child != startTextNode
892 && child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
893 {
894 Node spaceTextNode = child;
895 child = child.getNextSibling();
896 elem.removeChild(spaceTextNode);
897
898 if(child == null) break;
899 }
900
901 // go back to processing normal element nodes as intended
902 if (child.getNodeType() == Node.ELEMENT_NODE)
903 {
904 elem.insertBefore(startTextNode.cloneNode(false), child);
905 indentXML((Element) child, depth + 1);
906 }
907 child = child.getNextSibling();
908 }
909 }
910
911 /**
912 * Write an XML document to a given file with the text node of the specified
913 * element unescaped
914 */
915 static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
916 {
917 indentXML(document.getDocumentElement(), 1);
918 try
919 {
920 OutputStream os = new FileOutputStream(xml_file);
921 // Create an output format for our document.
922 OutputFormat f = new OutputFormat(document);
923 f.setEncoding("UTF-8");
924 f.setIndenting(true);
925 f.setLineWidth(0); // Why isn't this working!
926 f.setPreserveSpace(true);
927 if (nonEscapingTagNames != null)
928 {
929 f.setNonEscapingElements(nonEscapingTagNames);
930 }
931 // Create the necessary writer stream for serialization.
932 OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
933 Writer w = new BufferedWriter(osw);
934 // Generate a new serializer from the above.
935 XMLSerializer s = new XMLSerializer(w, f);
936 s.asDOMSerializer();
937 // Finally serialize the document to file.
938 s.serialize(document);
939 // And close.
940 os.close();
941 }
942 catch (Exception exception)
943 {
944 DebugStream.printStackTrace(exception);
945 }
946 }
947
948 /** Write an XML document to a given file */
949 static public void writeXMLFile(File xml_file, Document document)
950 {
951 writeXMLFile(xml_file, document, null);
952 }
953
954 public static void printXMLNode(Node e)
955 {
956 printXMLNode(e, 0);
957 }
958
959 public static void printXMLNode(Node e, int depth)
960 { //recursive method call using DOM API...
961
962 for (int i = 0; i < depth; i++)
963 System.out.print(' ');
964
965 if (e.getNodeType() == Node.TEXT_NODE)
966 {
967 //System.out.println("text") ;
968 if (e.getNodeValue() != "")
969 {
970 System.out.println(e.getNodeValue());
971 }
972 return;
973 }
974
975 System.out.print('<');
976 System.out.print(e.getNodeName());
977 NamedNodeMap attrs = e.getAttributes();
978 if (attrs != null)
979 {
980 for (int i = 0; i < attrs.getLength(); i++)
981 {
982 Node attr = attrs.item(i);
983 System.out.print(' ');
984 System.out.print(attr.getNodeName());
985 System.out.print("=\"");
986 System.out.print(attr.getNodeValue());
987 System.out.print('"');
988 }
989 }
990 NodeList children = e.getChildNodes();
991
992 if (children == null || children.getLength() == 0)
993 System.out.println("/>");
994 else
995 {
996
997 System.out.println('>');
998
999 int len = children.getLength();
1000 for (int i = 0; i < len; i++)
1001 {
1002 printXMLNode(children.item(i), depth + 1);
1003 }
1004
1005 for (int i = 0; i < depth; i++)
1006 System.out.print(' ');
1007
1008 System.out.println("</" + e.getNodeName() + ">");
1009 }
1010
1011 }
1012
1013 public static String xmlNodeToString(Node e)
1014 {
1015 StringBuffer sb = new StringBuffer("");
1016 xmlNodeToString(sb, e, true, "\t", 2);
1017 return sb.toString();
1018 }
1019
1020 public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
1021 {
1022
1023 if (e.getNodeType() == Node.CDATA_SECTION_NODE)
1024 {
1025 if (e.getNodeValue() != "")
1026 {
1027 String text = e.getNodeValue();
1028 sb.append("<![CDATA[");
1029 sb.append(text);
1030 sb.append("]]>");
1031 }
1032 return;
1033 }
1034
1035 if (e.getNodeType() == Node.TEXT_NODE)
1036 {
1037 if (e.getNodeValue() != "")
1038 {
1039 String text = e.getNodeValue();
1040 text = text.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("[\\n\\r\\t\\s]*$", "");
1041 for (Character c : text.toCharArray())
1042 {
1043 if (c.equals('\n'))
1044 {
1045 text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
1046 break;
1047 }
1048
1049 if (!Character.isWhitespace(c))
1050 {
1051 break;
1052 }
1053 }
1054 sb.append(text);
1055 }
1056 return;
1057 }
1058
1059 if (e.getNodeType() == Node.COMMENT_NODE)
1060 {
1061 if (e.getNodeValue() != "")
1062 {
1063 sb.append("<!--\n" + e.getNodeValue().trim() + "\n-->\n");
1064 }
1065 return;
1066 }
1067
1068 if (indent)
1069 {
1070 for (int i = 0; i < depth; i++)
1071 {
1072 sb.append(indentString);
1073 }
1074 }
1075
1076 sb.append('<');
1077 sb.append(e.getNodeName());
1078 NamedNodeMap attrs = e.getAttributes();
1079 if (attrs != null)
1080 {
1081 for (int i = 0; i < attrs.getLength(); i++)
1082 {
1083 Node attr = attrs.item(i);
1084 sb.append(' ');
1085 sb.append(attr.getNodeName());
1086 sb.append("=\"");
1087 sb.append(attr.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;"));
1088 sb.append('"');
1089 }
1090 }
1091 NodeList children = e.getChildNodes();
1092
1093 boolean hasElements = false;
1094 boolean indentSwapped = false;
1095 for (int i = 0; i < children.getLength(); i++)
1096 {
1097 if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1098 {
1099 hasElements = true;
1100 }
1101 if ((children.item(i).getNodeType() == Node.TEXT_NODE || children.item(i).getNodeType() == Node.CDATA_SECTION_NODE) && indent)
1102 {
1103 if (children.item(i).getNodeValue().trim().length() > 0)
1104 {
1105 indentSwapped = true;
1106 indent = false;
1107 }
1108 }
1109 }
1110
1111 if (children == null || children.getLength() == 0)
1112 {
1113 sb.append("/>");
1114
1115 if (indent)
1116 {
1117 sb.append("\n");
1118 }
1119 }
1120 else
1121 {
1122 sb.append(">");
1123 if (hasElements && indent)
1124 {
1125 sb.append("\n");
1126 }
1127
1128 int len = children.getLength();
1129 for (int i = 0; i < len; i++)
1130 {
1131 xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1132 }
1133
1134 if (indent)
1135 {
1136 for (int i = 0; i < depth; i++)
1137 {
1138 sb.append(indentString);
1139 }
1140 }
1141
1142 sb.append("</" + e.getNodeName() + ">");
1143
1144 if ((hasElements && indent) || indentSwapped)
1145 {
1146 sb.append("\n");
1147 }
1148 }
1149 }
1150
1151 public static String xmlNodeToStringWithoutIndenting(Node e)
1152 {
1153 StringBuffer sb = new StringBuffer("");
1154 xmlNodeToStringWithoutNewline(sb, e, -1);
1155 return sb.toString();
1156 }
1157
1158 public static String xmlNodeToStringWithoutNewline(Node e)
1159 {
1160 StringBuffer sb = new StringBuffer("");
1161 xmlNodeToStringWithoutNewline(sb, e, 0);
1162 return sb.toString();
1163 }
1164
1165 private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1166 {
1167
1168 for (int i = 0; i < depth; i++)
1169 {
1170 sb.append(' ');
1171 }
1172
1173 if (e.getNodeType() == Node.TEXT_NODE)
1174 {
1175 if (e.getNodeValue() != "")
1176 {
1177 sb.append(e.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replace(">", "&gt;"));
1178 }
1179 return;
1180 }
1181
1182 if (e.getNodeType() == Node.COMMENT_NODE)
1183 {
1184 if (e.getNodeValue() != "")
1185 {
1186 sb.append("<!--" + e.getNodeValue() + "-->");
1187 }
1188 return;
1189 }
1190
1191 sb.append('<');
1192 sb.append(e.getNodeName());
1193 NamedNodeMap attrs = e.getAttributes();
1194 if (attrs != null)
1195 {
1196 for (int i = 0; i < attrs.getLength(); i++)
1197 {
1198 Node attr = attrs.item(i);
1199 sb.append(' ');
1200 sb.append(attr.getNodeName());
1201 sb.append("=\"");
1202 sb.append(attr.getNodeValue());
1203 sb.append('"');
1204 }
1205 }
1206 NodeList children = e.getChildNodes();
1207
1208 if (children == null || children.getLength() == 0)
1209 sb.append("/>");
1210 else
1211 {
1212
1213 sb.append(">");
1214
1215 int len = children.getLength();
1216 for (int i = 0; i < len; i++)
1217 {
1218 if (depth >= 0)
1219 {
1220 xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1221 }
1222 else
1223 {
1224 xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1225 }
1226 }
1227
1228 for (int i = 0; i < depth; i++)
1229 sb.append(' ');
1230
1231 sb.append("</" + e.getNodeName() + ">");
1232 }
1233 }
1234
1235
1236
1237 // This method will convert an Element to a String too, like xmlNodeToString() above.
1238 // But for a document root element (doc.getDocumentElement()), this method will additionally
1239 // return its processing instruction line at the start (<?xml ... ?>).
1240 // This method copied into GLI from src/java/org/greenstone/gsdl3/util/GSXML.java
1241 public static String elementToString(Element e, boolean indent)
1242 {
1243 String str = "";
1244 try
1245 {
1246 TransformerFactory tf = TransformerFactory.newInstance();
1247 Transformer trans = tf.newTransformer();
1248 StringWriter sw = new StringWriter();
1249 if (indent)
1250 {
1251 trans.setOutputProperty(OutputKeys.INDENT, "yes");
1252 }
1253 else
1254 {
1255 trans.setOutputProperty(OutputKeys.INDENT, "no");
1256 }
1257 trans.transform(new DOMSource(e), new StreamResult(sw));
1258 str = sw.toString();
1259 }
1260 catch (Exception ex)
1261 {
1262 str += "Exception: couldn't write " + e + " to log";
1263 }
1264 finally
1265 {
1266 return str;
1267 }
1268 }
1269}
Note: See TracBrowser for help on using the repository browser.