source: main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java@ 31776

Last change on this file since 31776 was 31776, checked in by ak19, 7 years ago

Kathy described a problem on the mailing list about the AutoLoadConverters msg appearing before XML content when connecting with a client-GLI to a remote GS3, which breaks parsing of the XML. (I found it was present in my GS3 installation from 4 May 2016.) I've narrowed it down to running client-gli with the debug flag turned on, which seemed related to the same problem I'd seen when running gli -debug after the change to SafeProcess in Plugins.java, which I fixed by removing lines preceding XML content before parsing the XML. For client-gli, SafeProcess isn't used which is also why the problem with client-gli is much older, but I'm now using a common and existing solution for both: doing what Plugins.java used to do before the change to SafeProcess, which is call XMLTools.readXMLStream(), which would parse out content before XML. The RemoteGreenstoneServer should only call this method if it actually has some XML content it's dealing with. Could have solved this in RemoteGreenstoneServerAction.java's GetScriptOptions, but am solving it in RemoteGreenstoneServer's sendCommandToServerInternal, since there may be many Actions returning XML, not just GetScriptOptions.

  • Property svn:keywords set to Author Date Id Revision
File size: 33.9 KB
Line 
1package org.greenstone.gatherer.util;
2
3import java.io.*;
4import java.net.*;
5import java.util.*;
6import org.apache.xerces.parsers.*;
7import org.apache.xml.serialize.*;
8import org.greenstone.gatherer.DebugStream;
9import org.w3c.dom.*;
10import org.xml.sax.*;
11
12import java.io.FileReader;
13import java.io.IOException;
14import java.io.StringReader;
15import java.io.StringWriter; // for elementToString()
16
17// SAX
18import org.xml.sax.XMLReader;
19import org.xml.sax.SAXException;
20import org.xml.sax.SAXParseException;
21import org.xml.sax.helpers.DefaultHandler;
22import org.xml.sax.InputSource;
23
24// JAXP
25import javax.xml.parsers.DocumentBuilder;
26import javax.xml.parsers.DocumentBuilderFactory;
27import javax.xml.parsers.FactoryConfigurationError;
28import javax.xml.parsers.ParserConfigurationException;
29import javax.xml.parsers.SAXParser;
30import javax.xml.parsers.SAXParserFactory;
31// for elementToString():
32import javax.xml.transform.OutputKeys;
33import javax.xml.transform.Transformer;
34import javax.xml.transform.TransformerFactory;
35import javax.xml.transform.dom.DOMSource;
36import javax.xml.transform.stream.StreamResult;
37
38
39/** This class is a static class containing useful XML functions */
40public class XMLTools
41{
42 /** extracts the text out of a node */
43 public static Node getNodeTextNode(Element param)
44 {
45 param.normalize();
46 Node n = param.getFirstChild();
47 while (n != null && n.getNodeType() != Node.TEXT_NODE)
48 {
49 n = n.getNextSibling();
50 }
51 return n;
52 }
53
54 /** extracts the text out of a node */
55 public static String getNodeText(Element param)
56 {
57 Node text_node = getNodeTextNode(param);
58 if (text_node == null)
59 {
60 return "";
61 }
62 return text_node.getNodeValue();
63 }
64
65 public static void setNodeText(Element elem, String text)
66 {
67 Node old_text_node = getNodeTextNode(elem);
68 if (old_text_node != null)
69 {
70 elem.removeChild(old_text_node);
71 }
72 Text t = elem.getOwnerDocument().createTextNode(text);
73 elem.appendChild(t);
74 }
75
76 /** returns the (first) child element with the given name */
77 public static Node getChildByTagName(Node n, String name)
78 {
79
80 Node child = n.getFirstChild();
81 while (child != null)
82 {
83 if (child.getNodeName().equals(name))
84 {
85 return child;
86 }
87 child = child.getNextSibling();
88 }
89 return null; //not found
90 }
91
92 /**
93 * returns the (nth) child element with the given name index numbers start
94 * at 0
95 */
96 public static Node getChildByTagNameIndexed(Node n, String name, int index)
97 {
98 if (index == -1)
99 {
100 return getChildByTagName(n, name);
101 }
102 int count = 0;
103 Node child = n.getFirstChild();
104 while (child != null)
105 {
106 if (child.getNodeName().equals(name))
107 {
108 if (count == index)
109 {
110 return child;
111 }
112 else
113 {
114 count++;
115 }
116 }
117 child = child.getNextSibling();
118 }
119 return null; //not found
120 }
121
122 /**
123 * returns the element parent/node_name[@attribute_name='attribute_value']
124 */
125 public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
126 {
127
128 NodeList children = parent.getChildNodes();
129 for (int i = 0; i < children.getLength(); i++)
130 {
131 Node child = children.item(i);
132 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
133 if (child.getNodeName().equals(node_name))
134 {
135 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
136 return (Element) child;
137 }
138 }
139 // not found
140 return null;
141 }
142
143 /**
144 * returns a list of elements
145 * parent/node_name[@attribute_name='attribute_value']
146 */
147 public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
148 {
149 ArrayList elements = new ArrayList();
150 NodeList children = parent.getChildNodes();
151 for (int i = 0; i < children.getLength(); i++)
152 {
153 //System.out.println("getNamedElementList");
154 Node child = children.item(i);
155 //logger.debug("getnamed elem, node nmae="+child.getNodeName());
156 if (child.getNodeName().equals(node_name))
157 {
158 if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
159 elements.add((Element) child);
160 }
161 }
162 // not found
163 if (elements.size() == 0)
164 {
165 elements = null;
166 }
167 return elements;
168 }
169
170 public static void copyAllChildren(Element to, Element from)
171 {
172
173 Document to_doc = to.getOwnerDocument();
174 Node child = from.getFirstChild();
175 while (child != null)
176 {
177 to.appendChild(to_doc.importNode(child, true));
178 child = child.getNextSibling();
179 }
180 }
181
182 /** duplicates all elements in list elements and appends to toElement */
183 public static void duplicateElementList(Document owner, Element toElement, NodeList elements, boolean with_attributes) {
184 int num_elems = elements.getLength();
185 if (num_elems < 1)
186 {
187 return;
188 }
189 for (int i = 0; i < num_elems; i++)
190 {
191 Element to_element = XMLTools.duplicateElement(owner, (Element) elements.item(i), with_attributes);
192 toElement.appendChild(to_element);
193 }
194
195 }
196 /** Duplicates an element */
197 public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
198 {
199 return duplicateElementNS(owner, element, null, with_attributes);
200 }
201
202 /** Duplicates an element */
203 public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
204 {
205 Element duplicate;
206 if (namespace_uri == null)
207 {
208 duplicate = owner.createElement(element.getTagName());
209 }
210 else
211 {
212 duplicate = owner.createElementNS(namespace_uri, element.getTagName());
213 }
214 // Copy element attributes
215 if (with_attributes)
216 {
217 NamedNodeMap attributes = element.getAttributes();
218 for (int i = 0; i < attributes.getLength(); i++)
219 {
220 Node attribute = attributes.item(i);
221 duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
222 }
223 }
224
225 // Copy element children
226 NodeList children = element.getChildNodes();
227 for (int i = 0; i < children.getLength(); i++)
228 {
229 Node child = children.item(i);
230 duplicate.appendChild(owner.importNode(child, true));
231 }
232
233 return duplicate;
234 }
235
236 /** Remove all of the child nodes from a certain node. */
237 static final public void clear(Node node)
238 {
239 while (node.hasChildNodes())
240 {
241 node.removeChild(node.getFirstChild());
242 }
243 }
244
245 static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
246 {
247 ArrayList child_elements = new ArrayList();
248
249 NodeList children_nodelist = parent_element.getChildNodes();
250 for (int i = 0; i < children_nodelist.getLength(); i++)
251 {
252 Node child_node = children_nodelist.item(i);
253 if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
254 {
255 child_elements.add(child_node);
256 }
257 }
258
259 return child_elements;
260 }
261
262 static public String getElementTextValue(Element element)
263 {
264 // Find the first text node child
265 NodeList children_nodelist = element.getChildNodes();
266 for (int i = 0; i < children_nodelist.getLength(); i++)
267 {
268 Node child_node = children_nodelist.item(i);
269 if (child_node.getNodeType() == Node.TEXT_NODE)
270 {
271 return child_node.getNodeValue();
272 }
273 }
274
275 // None found
276 return "";
277 }
278
279 /**
280 * Method to retrieve the value of a given node.
281 *
282 * @param element
283 * The <strong>Element</strong> whose value we wish to find. Soon
284 * to be deprecated!
285 */
286 static final public String getValue(Node element)
287 {
288 if (element == null)
289 {
290 return "";
291 }
292 // If we've been given a subject node first retrieve its value node.
293 if (element.getNodeName().equals("Subject"))
294 {
295 element = getNodeFromNamed(element, "Value");
296 }
297 // If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
298 if (element != null && element.hasChildNodes())
299 {
300 StringBuffer text_buffer = new StringBuffer();
301 NodeList text_nodes = element.getChildNodes();
302 for (int i = 0; i < text_nodes.getLength(); i++)
303 {
304 Node possible_text = text_nodes.item(i);
305 if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
306 {
307 text_buffer.append(possible_text.getNodeValue());
308 }
309 }
310 return text_buffer.toString();
311 }
312 return "";
313 }
314
315 /**
316 * Method to retrieve from the node given, a certain child node with the
317 * specified name.
318 *
319 * @param parent
320 * The <strong>Node</strong> whose children should be searched.
321 * @param name
322 * The required nodes name as a <strong>String</strong>.
323 * @return The requested <strong>Node</strong> if it is found, <i>null</i>
324 * otherwise. Soon to be deprecated!
325 */
326 static final public Node getNodeFromNamed(Node parent, String name)
327 {
328 Node child = null;
329 for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
330 {
331 if (i.getNodeName().equals(name))
332 {
333 child = i;
334 }
335 }
336 return child;
337 }
338
339 static final public String WELLFORMED = "well-formed !";
340 static final public String NOTWELLFORMED = "not well-formed";
341 static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:gslib='http://www.greenstone.org/skinning' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
342 static final private String FOOTER = "</collectionConfig>";
343
344
345 public static Document getDOM(String xml_str)
346 {
347 Document doc = null;
348 try {
349
350 DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
351 InputSource is = new InputSource();
352 is.setCharacterStream(new StringReader(xml_str));
353 doc = db.parse(is);
354
355 } catch (Exception e) {
356 e.printStackTrace();
357 }
358 return doc;
359 }
360
361 public static String parse(String xml_str)
362 {
363 String validation_msg = WELLFORMED;
364 xml_str = HEADER + xml_str + FOOTER;
365 try
366 {
367 SAXParserFactory factory = SAXParserFactory.newInstance();
368 factory.setNamespaceAware(true);
369 //factory.setValidating (true);
370 SAXParser parser = factory.newSAXParser();
371 InputSource iSource = new InputSource(new StringReader(xml_str));
372 // parser.parse (iSource, new DefaultHandler ());
373
374 org.xml.sax.XMLReader reader = parser.getXMLReader();
375 reader.setContentHandler(new DefaultHandler());
376 reader.setErrorHandler(new DefaultHandler());
377 reader.parse(iSource);
378 }
379 catch (FactoryConfigurationError e)
380 {
381 validation_msg = "unable to get a document builder factory";
382 }
383 catch (ParserConfigurationException e)
384 {
385 validation_msg = "unable to configure parser";
386 }
387 catch (SAXParseException e)
388 {
389 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
390 }
391 catch (SAXException e)
392 {
393 validation_msg += " Fatal error: " + e.toString();
394 }
395 catch (IOException e)
396 {
397 validation_msg = "Unable to read the input, i/o error";
398 }
399
400 return validation_msg;
401 }
402
403 //In this method, the parsed string xml_str is not wrapped by the header and footer strings.
404 public static String parseDOM(String xml_str)
405 {
406 String validation_msg = WELLFORMED;
407
408 try
409 {
410 SAXParserFactory factory = SAXParserFactory.newInstance();
411 factory.setNamespaceAware(true);
412 //factory.setValidating (true);
413 SAXParser parser = factory.newSAXParser();
414 InputSource iSource = new InputSource(new StringReader(xml_str));
415 // parser.parse (iSource, new DefaultHandler ());
416
417 org.xml.sax.XMLReader reader = parser.getXMLReader();
418 reader.setContentHandler(new DefaultHandler());
419 reader.setErrorHandler(new DefaultHandler());
420 reader.parse(iSource);
421 }
422 catch (FactoryConfigurationError e)
423 {
424 validation_msg = "unable to get a document builder factory";
425 }
426 catch (ParserConfigurationException e)
427 {
428 validation_msg = "unable to configure parser";
429 }
430 catch (SAXParseException e)
431 {
432 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
433 }
434 catch (SAXException e)
435 {
436 validation_msg += " " + e.toString();
437 }
438 catch (IOException e)
439 {
440 validation_msg = "Unable to read the input, i/o error";
441 }
442
443 return validation_msg;
444 }
445
446 public static String parse(File xml_file)
447 {
448 String validation_msg = WELLFORMED;
449
450 try
451 {
452 SAXParserFactory factory = SAXParserFactory.newInstance();
453 factory.setNamespaceAware(true);
454 //factory.setValidating (true);
455 SAXParser parser = factory.newSAXParser();
456 FileReader r = new FileReader(xml_file);
457 InputSource iSource = new InputSource(r);
458 XMLReader reader = parser.getXMLReader();
459 reader.setContentHandler(new DefaultHandler());
460 reader.setErrorHandler(new DefaultHandler());
461 reader.parse(iSource);
462 }
463 catch (FactoryConfigurationError e)
464 {
465 validation_msg = "unable to get a document builder factory";
466 }
467 catch (ParserConfigurationException e)
468 {
469 validation_msg = "unable to configure parser";
470 }
471 catch (SAXParseException e)
472 {
473 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
474 }
475 catch (SAXException e)
476 {
477 validation_msg += " Fatal error: " + e.toString();
478 }
479 catch (IOException e)
480 {
481 validation_msg = "Unable to read the input, i/o error";
482 }
483
484 return validation_msg;
485 }
486
487 /** Returns a string of the location. */
488 private static String getLocationString(SAXParseException ex)
489 {
490 StringBuffer str = new StringBuffer();
491
492 String systemId = ex.getSystemId();
493 if (systemId != null)
494 {
495 int index = systemId.lastIndexOf('/');
496 if (index != -1)
497 systemId = systemId.substring(index + 1);
498 str.append(systemId);
499 }
500 str.append("(line ");
501 str.append(ex.getLineNumber() - 1);
502 str.append(", column ");
503 str.append(ex.getColumnNumber());
504 str.append("): ");
505
506 return str.toString();
507
508 } // getLocationString(SAXParseException):String
509
510 /** Parse an XML document from a given file path */
511 static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
512 {
513 if (use_class_loader == true)
514 {
515 InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
516 if (is != null)
517 {
518 return parseXML(is);
519 }
520 }
521
522 // Try the file outside the classes directory
523 return parseXMLFile(new File(xml_file_path));
524 }
525
526 /** Parse an XML document from a given file */
527 static public Document parseXMLFile(File xml_file)
528 {
529 // No file? No point trying!
530 if (xml_file.exists() == false)
531 {
532 // System.err.println("@@@ file " + xml_file + " does not exist.");
533 return null;
534 }
535
536 try
537 {
538 return parseXML(new FileInputStream(xml_file));
539 }
540 catch (Exception exception)
541 {
542 DebugStream.printStackTrace(exception);
543 return null;
544 }
545 }
546
547 /** Parse an XML document from a given input stream */
548 static public Document parseXML(InputStream xml_input_stream)
549 {
550 Document document = null;
551
552 try
553 {
554 InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
555 document = parseXML(isr);
556 isr.close();
557 xml_input_stream.close();
558 }
559 catch (Exception exception)
560 {
561 DebugStream.printStackTrace(exception);
562 }
563
564 return document;
565 }
566
567 /** Parse an XML document from a given reader */
568 static public Document parseXML(Reader xml_reader)
569 {
570 Document document = null;
571
572 // If debugging, the following will store the XML contents to be parsed,
573 // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
574 String xmlContents = "";
575
576 try
577 {
578 Reader reader = null;
579
580 // (1) By default, GLI will remove any contents preceeding (and invalidating)
581 // the XML and present these lines separately to the user
582 if (!DebugStream.isDebuggingEnabled())
583 {
584 try
585 {
586 reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
587 }
588 catch (Exception e)
589 {
590 System.err.println("Exception while wrapping the reader in parseXML(Reader)");
591 e.printStackTrace();
592 }
593 }
594
595 // (2) If we are running GLI in debug mode:
596 // In case parsing exceptions are thrown (SAX Exceptions), we want to get some
597 // idea of where things went wrong. This will print the "XML" contents to either
598 // system.out (if debugging is off) or to the DebugStream otherwise.
599 // We need to read the XML twice to know the line where things went wrong, so
600 // do the additional reading only if we're debugging
601 else
602 {
603 StringBuffer buf = new StringBuffer();
604 char[] buffer = new char[500];
605 int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
606 while (numCharsRead != -1)
607 {
608 buf.append(buffer, 0, numCharsRead);
609 numCharsRead = xml_reader.read(buffer, 0, buffer.length);
610 }
611 xmlContents = buf.toString();
612 xml_reader.close(); // closing the old Reader
613 xml_reader = null;
614 buffer = null;
615 buf = null;
616 // we need a Reader to parse the same contents as the Reader that was just closed
617 reader = new BufferedReader(new StringReader(xmlContents));
618 //System.err.println("xmlContents:\n" + xmlContents);
619 }
620
621 // (2) The actual XML parsing
622 InputSource isc = new InputSource(reader);
623 DOMParser parser = new DOMParser();
624 parser.setFeature("http://xml.org/sax/features/validation", false);
625 parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
626 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
627 parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
628 parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
629 parser.setEntityResolver(new GLIEntityResolver());
630 parser.parse(isc);
631 document = parser.getDocument();
632
633 }
634 catch (SAXParseException e)
635 {
636 showXMLParseFailureLine(e, xmlContents);
637 }
638 catch (SAXException exception)
639 {
640 System.err.println("SAX exception: " + exception.getMessage());
641 if (DebugStream.isDebuggingEnabled())
642 {
643 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
644 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
645 DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
646 System.exit(-1);
647 }
648 // else, not running in debug mode, so don't exit after exception
649 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
650 DebugStream.printStackTrace(exception);
651 }
652 catch (Exception exception)
653 {
654 DebugStream.printStackTrace(exception);
655 }
656
657 return document;
658 }
659
660 /**
661 * Displays the line (string) where the SAXParseException occurred, given a
662 * String of the entire xml that was being parsed and the SAXParseException
663 * object that was caught. The messages are printed to DebugStream, so run
664 * GLI/FLI with -debug to view this output.
665 *
666 * @param xmlContents
667 * is the entire xml that was being parsed when the exception
668 * occurred
669 * @param e
670 * is the SAXParseException object that was thrown upon parsing
671 * the xmlContents.
672 */
673 public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
674 {
675
676 // There should be no characters at all that preceed the <?xml>... bit.
677 // The first check is for starting spaces:
678 if (xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t"))
679 {
680 DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
681 DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
682 return; // nothing more to do, first error identified
683 }
684
685 // the actual line (String literal) where parsing failed and the SAXParseException occurred.
686 String line = "";
687 int linenumber = e.getLineNumber();
688 DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
689 if (DebugStream.isDebuggingEnabled())
690 {
691 if (linenumber != -1)
692 {
693 String[] lines = xmlContents.split("\n");
694 if (lines.length > 0)
695 {
696 DebugStream.println(" (number of lines: " + lines.length + ")");
697 if (lines.length >= linenumber)
698 {
699 line = lines[linenumber - 1];
700 }
701 else
702 { // error is past the last line
703 line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
704 }
705 }
706 else
707 {
708 DebugStream.print("\n");
709 }
710 lines = null;
711
712 DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
713 DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
714
715 // Uncomment if you want to print out the entire contents of the XML doc:
716 //DebugStream.println("\n\nThis was the XML:\n*********START\n"
717 // + xmlContents + "\n************END\n");
718 }
719 else
720 { // no particular line number, print out all the xml so debugger can inspect it
721 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
722 }
723 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
724 DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
725 System.exit(-1);
726 }
727 else
728 { // not running in debug mode
729 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
730 }
731 }
732
733 static public StringBuffer readXMLStream(InputStream input_stream)
734 {
735 StringBuffer xml = new StringBuffer("");
736 try {
737 InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
738 xml = XMLTools.readXMLStream(new InputStreamReader(input_stream, "UTF-8"));
739 } catch (UnsupportedEncodingException error) {
740 System.err.println("Failed when trying to parse XML stream");
741 error.printStackTrace();
742 }
743
744 return xml;
745 }
746
747 static public StringBuffer readXMLStream(String s) {
748 return XMLTools.readXMLStream(new StringReader(s));
749 }
750
751
752 static public StringBuffer readXMLStream(Reader reader)
753 {
754 StringBuffer xml = new StringBuffer("");
755
756 try
757 {
758 BufferedReader buffered_in = new BufferedReader(reader);
759
760 String line = "";
761 boolean xml_content = false;
762 while ((line = buffered_in.readLine()) != null)
763 {
764 if (xml_content)
765 {
766 xml.append(line);
767 xml.append("\n");
768 }
769 else if (line.trim().startsWith("<?xml"))
770 {
771 xml_content = true;
772 xml.append(line);
773 xml.append("\n");
774 }
775 else
776 {
777 System.err.println(line);
778 }
779 }
780 buffered_in = null;
781 }
782 catch (Exception error)
783 {
784 System.err.println("Failed when trying to parse XML stream");
785 error.printStackTrace();
786 }
787
788 return xml;
789 }
790
791 /**
792 * Removes characters that are invalid in XML (see
793 * http://www.w3.org/TR/2000/REC-xml-20001006#charsets)
794 */
795 static public String removeInvalidCharacters(String text)
796 {
797 char[] safe_characters = new char[text.length()];
798 int j = 0;
799
800 char[] raw_characters = new char[text.length()];
801 text.getChars(0, text.length(), raw_characters, 0);
802 for (int i = 0; i < raw_characters.length; i++)
803 {
804 char character = raw_characters[i];
805 if ((character >= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF))
806 {
807 safe_characters[j] = character;
808 j++;
809 }
810 }
811
812 return new String(safe_characters, 0, j);
813 }
814
815 static public void setElementTextValue(Element element, String text)
816 {
817 // Remove all text node children
818 NodeList children_nodelist = element.getChildNodes();
819 for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
820 {
821 Node child_node = children_nodelist.item(i);
822 if (child_node.getNodeType() == Node.TEXT_NODE)
823 {
824 element.removeChild(child_node);
825 }
826 }
827
828 // Add a new text node
829 if (text != null)
830 {
831 element.appendChild(element.getOwnerDocument().createTextNode(text));
832 }
833 }
834
835 /**
836 * Set the #text node value of some element.
837 *
838 * @param element
839 * the Element whose value we wish to set
840 * @param value
841 * the new value for the element as a String Soon to be
842 * deprecated!
843 */
844 static final public void setValue(Element element, String value)
845 {
846 // Remove any existing child node(s)
847 clear(element);
848 // Add new text node.
849 if (value != null)
850 {
851 element.appendChild(element.getOwnerDocument().createTextNode(value));
852 }
853 }
854
855 static public void indentXML(Element elem, int depth)
856 {
857 Document doc = elem.getOwnerDocument();
858
859 String startIndentString = "\n";
860 for (int i = 0; i < depth; i++)
861 {
862 startIndentString += "\t";
863 }
864 Node startTextNode = doc.createTextNode(startIndentString);
865
866 String endIndentString = "\n";
867 for (int i = 0; i < depth - 1; i++)
868 {
869 endIndentString += "\t";
870 }
871 Node endTextNode = doc.createTextNode(endIndentString);
872
873 boolean found = false;
874 Node child = elem.getFirstChild();
875 while (child != null)
876 {
877 // first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
878 if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
879 {
880 Node spaceTextNode = child;
881 child = child.getNextSibling();
882 elem.removeChild(spaceTextNode);
883
884 if(child == null) break;
885 }
886
887 // now process normal element nodes as intended
888 if (child.getNodeType() == Node.ELEMENT_NODE)
889 {
890 found = true;
891 break;
892 }
893 child = child.getNextSibling();
894 }
895
896 if (found)
897 {
898 elem.appendChild(endTextNode);
899 }
900
901 child = elem.getFirstChild();
902 while (child != null)
903 {
904 // Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
905 // because the first while loop above would break out when it found an element node and wouldn't have got rid
906 // of all the empty text nodes yet.
907 // This time, beware not to delete the special end and start empty textnodes just added, since
908 // they've been created and inserted specifically.
909 if(child != endTextNode && child != startTextNode
910 && child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
911 {
912 Node spaceTextNode = child;
913 child = child.getNextSibling();
914 elem.removeChild(spaceTextNode);
915
916 if(child == null) break;
917 }
918
919 // go back to processing normal element nodes as intended
920 if (child.getNodeType() == Node.ELEMENT_NODE)
921 {
922 elem.insertBefore(startTextNode.cloneNode(false), child);
923 indentXML((Element) child, depth + 1);
924 }
925 child = child.getNextSibling();
926 }
927 }
928
929 /**
930 * Write an XML document to a given file with the text node of the specified
931 * element unescaped
932 */
933 static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
934 {
935 indentXML(document.getDocumentElement(), 1);
936 try
937 {
938 OutputStream os = new FileOutputStream(xml_file);
939 // Create an output format for our document.
940 OutputFormat f = new OutputFormat(document);
941 f.setEncoding("UTF-8");
942 f.setIndenting(true);
943 f.setLineWidth(0); // Why isn't this working!
944 f.setPreserveSpace(true);
945 if (nonEscapingTagNames != null)
946 {
947 f.setNonEscapingElements(nonEscapingTagNames);
948 }
949 // Create the necessary writer stream for serialization.
950 OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
951 Writer w = new BufferedWriter(osw);
952 // Generate a new serializer from the above.
953 XMLSerializer s = new XMLSerializer(w, f);
954 s.asDOMSerializer();
955 // Finally serialize the document to file.
956 s.serialize(document);
957 // And close.
958 os.close();
959 }
960 catch (Exception exception)
961 {
962 DebugStream.printStackTrace(exception);
963 }
964 }
965
966 /** Write an XML document to a given file */
967 static public void writeXMLFile(File xml_file, Document document)
968 {
969 writeXMLFile(xml_file, document, null);
970 }
971
972 public static void printXMLNode(Node e)
973 {
974 printXMLNode(e, 0);
975 }
976
977 public static void printXMLNode(Node e, int depth)
978 { //recursive method call using DOM API...
979
980 for (int i = 0; i < depth; i++)
981 System.out.print(' ');
982
983 if (e.getNodeType() == Node.TEXT_NODE)
984 {
985 //System.out.println("text") ;
986 if (e.getNodeValue() != "")
987 {
988 System.out.println(e.getNodeValue());
989 }
990 return;
991 }
992
993 System.out.print('<');
994 System.out.print(e.getNodeName());
995 NamedNodeMap attrs = e.getAttributes();
996 if (attrs != null)
997 {
998 for (int i = 0; i < attrs.getLength(); i++)
999 {
1000 Node attr = attrs.item(i);
1001 System.out.print(' ');
1002 System.out.print(attr.getNodeName());
1003 System.out.print("=\"");
1004 System.out.print(attr.getNodeValue());
1005 System.out.print('"');
1006 }
1007 }
1008 NodeList children = e.getChildNodes();
1009
1010 if (children == null || children.getLength() == 0)
1011 System.out.println("/>");
1012 else
1013 {
1014
1015 System.out.println('>');
1016
1017 int len = children.getLength();
1018 for (int i = 0; i < len; i++)
1019 {
1020 printXMLNode(children.item(i), depth + 1);
1021 }
1022
1023 for (int i = 0; i < depth; i++)
1024 System.out.print(' ');
1025
1026 System.out.println("</" + e.getNodeName() + ">");
1027 }
1028
1029 }
1030
1031 public static String xmlNodeToString(Node e)
1032 {
1033 StringBuffer sb = new StringBuffer("");
1034 xmlNodeToString(sb, e, true, "\t", 2);
1035 return sb.toString();
1036 }
1037
1038 public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
1039 {
1040
1041 if (e.getNodeType() == Node.CDATA_SECTION_NODE)
1042 {
1043 if (e.getNodeValue() != "")
1044 {
1045 String text = e.getNodeValue();
1046 sb.append("<![CDATA[");
1047 sb.append(text);
1048 sb.append("]]>");
1049 }
1050 return;
1051 }
1052
1053 if (e.getNodeType() == Node.TEXT_NODE)
1054 {
1055 if (e.getNodeValue() != "")
1056 {
1057 String text = e.getNodeValue();
1058 text = text.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("[\\n\\r\\t\\s]*$", "");
1059 for (Character c : text.toCharArray())
1060 {
1061 if (c.equals('\n'))
1062 {
1063 text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
1064 break;
1065 }
1066
1067 if (!Character.isWhitespace(c))
1068 {
1069 break;
1070 }
1071 }
1072 sb.append(text);
1073 }
1074 return;
1075 }
1076
1077 if (e.getNodeType() == Node.COMMENT_NODE)
1078 {
1079 if (e.getNodeValue() != "")
1080 {
1081 sb.append("<!--\n" + e.getNodeValue().trim() + "\n-->\n");
1082 }
1083 return;
1084 }
1085
1086 if (indent)
1087 {
1088 for (int i = 0; i < depth; i++)
1089 {
1090 sb.append(indentString);
1091 }
1092 }
1093
1094 sb.append('<');
1095 sb.append(e.getNodeName());
1096 NamedNodeMap attrs = e.getAttributes();
1097 if (attrs != null)
1098 {
1099 for (int i = 0; i < attrs.getLength(); i++)
1100 {
1101 Node attr = attrs.item(i);
1102 sb.append(' ');
1103 sb.append(attr.getNodeName());
1104 sb.append("=\"");
1105 sb.append(attr.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;"));
1106 sb.append('"');
1107 }
1108 }
1109 NodeList children = e.getChildNodes();
1110
1111 boolean hasElements = false;
1112 boolean indentSwapped = false;
1113 for (int i = 0; i < children.getLength(); i++)
1114 {
1115 if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
1116 {
1117 hasElements = true;
1118 }
1119 if ((children.item(i).getNodeType() == Node.TEXT_NODE || children.item(i).getNodeType() == Node.CDATA_SECTION_NODE) && indent)
1120 {
1121 if (children.item(i).getNodeValue().trim().length() > 0)
1122 {
1123 indentSwapped = true;
1124 indent = false;
1125 }
1126 }
1127 }
1128
1129 if (children == null || children.getLength() == 0)
1130 {
1131 sb.append("/>");
1132
1133 if (indent)
1134 {
1135 sb.append("\n");
1136 }
1137 }
1138 else
1139 {
1140 sb.append(">");
1141 if (hasElements && indent)
1142 {
1143 sb.append("\n");
1144 }
1145
1146 int len = children.getLength();
1147 for (int i = 0; i < len; i++)
1148 {
1149 xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
1150 }
1151
1152 if (indent)
1153 {
1154 for (int i = 0; i < depth; i++)
1155 {
1156 sb.append(indentString);
1157 }
1158 }
1159
1160 sb.append("</" + e.getNodeName() + ">");
1161
1162 if ((hasElements && indent) || indentSwapped)
1163 {
1164 sb.append("\n");
1165 }
1166 }
1167 }
1168
1169 public static String xmlNodeToStringWithoutIndenting(Node e)
1170 {
1171 StringBuffer sb = new StringBuffer("");
1172 xmlNodeToStringWithoutNewline(sb, e, -1);
1173 return sb.toString();
1174 }
1175
1176 public static String xmlNodeToStringWithoutNewline(Node e)
1177 {
1178 StringBuffer sb = new StringBuffer("");
1179 xmlNodeToStringWithoutNewline(sb, e, 0);
1180 return sb.toString();
1181 }
1182
1183 private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
1184 {
1185
1186 for (int i = 0; i < depth; i++)
1187 {
1188 sb.append(' ');
1189 }
1190
1191 if (e.getNodeType() == Node.TEXT_NODE)
1192 {
1193 if (e.getNodeValue() != "")
1194 {
1195 sb.append(e.getNodeValue().replaceAll("&", "&amp;").replaceAll("<", "&lt;").replace(">", "&gt;"));
1196 }
1197 return;
1198 }
1199
1200 if (e.getNodeType() == Node.COMMENT_NODE)
1201 {
1202 if (e.getNodeValue() != "")
1203 {
1204 sb.append("<!--" + e.getNodeValue() + "-->");
1205 }
1206 return;
1207 }
1208
1209 sb.append('<');
1210 sb.append(e.getNodeName());
1211 NamedNodeMap attrs = e.getAttributes();
1212 if (attrs != null)
1213 {
1214 for (int i = 0; i < attrs.getLength(); i++)
1215 {
1216 Node attr = attrs.item(i);
1217 sb.append(' ');
1218 sb.append(attr.getNodeName());
1219 sb.append("=\"");
1220 sb.append(attr.getNodeValue());
1221 sb.append('"');
1222 }
1223 }
1224 NodeList children = e.getChildNodes();
1225
1226 if (children == null || children.getLength() == 0)
1227 sb.append("/>");
1228 else
1229 {
1230
1231 sb.append(">");
1232
1233 int len = children.getLength();
1234 for (int i = 0; i < len; i++)
1235 {
1236 if (depth >= 0)
1237 {
1238 xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
1239 }
1240 else
1241 {
1242 xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
1243 }
1244 }
1245
1246 for (int i = 0; i < depth; i++)
1247 sb.append(' ');
1248
1249 sb.append("</" + e.getNodeName() + ">");
1250 }
1251 }
1252
1253
1254
1255 // This method will convert an Element to a String too, like xmlNodeToString() above.
1256 // But for a document root element (doc.getDocumentElement()), this method will additionally
1257 // return its processing instruction line at the start (<?xml ... ?>).
1258 // This method copied into GLI from src/java/org/greenstone/gsdl3/util/GSXML.java
1259 public static String elementToString(Element e, boolean indent)
1260 {
1261 String str = "";
1262 try
1263 {
1264 TransformerFactory tf = TransformerFactory.newInstance();
1265 Transformer trans = tf.newTransformer();
1266 StringWriter sw = new StringWriter();
1267 if (indent)
1268 {
1269 trans.setOutputProperty(OutputKeys.INDENT, "yes");
1270 }
1271 else
1272 {
1273 trans.setOutputProperty(OutputKeys.INDENT, "no");
1274 }
1275 trans.transform(new DOMSource(e), new StreamResult(sw));
1276 str = sw.toString();
1277 }
1278 catch (Exception ex)
1279 {
1280 str += "Exception: couldn't write " + e + " to log";
1281 }
1282 finally
1283 {
1284 return str;
1285 }
1286 }
1287}
Note: See TracBrowser for help on using the repository browser.