package org.greenstone.gatherer.util; import java.io.*; import java.net.*; import java.util.*; import org.apache.xerces.parsers.*; import org.apache.xml.serialize.*; import org.greenstone.gatherer.DebugStream; import org.w3c.dom.*; import org.xml.sax.*; import java.io.FileReader; import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; // for elementToString() // SAX import org.xml.sax.XMLReader; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.InputSource; // JAXP import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.FactoryConfigurationError; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; // for elementToString(): import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; /** This class is a static class containing useful XML functions */ public class XMLTools { /** extracts the text out of a node */ public static Node getNodeTextNode(Element param) { param.normalize(); Node n = param.getFirstChild(); while (n != null && n.getNodeType() != Node.TEXT_NODE) { n = n.getNextSibling(); } return n; } /** extracts the text out of a node */ public static String getNodeText(Element param) { Node text_node = getNodeTextNode(param); if (text_node == null) { return ""; } return text_node.getNodeValue(); } public static void setNodeText(Element elem, String text) { Node old_text_node = getNodeTextNode(elem); if (old_text_node != null) { elem.removeChild(old_text_node); } Text t = elem.getOwnerDocument().createTextNode(text); elem.appendChild(t); } /** returns the (first) child element with the given name */ public static Node getChildByTagName(Node n, String name) { Node child = n.getFirstChild(); while (child != null) { if (child.getNodeName().equals(name)) { return child; } child = child.getNextSibling(); } return null; //not found } /** * returns the (nth) child element with the given name index numbers start * at 0 */ public static Node getChildByTagNameIndexed(Node n, String name, int index) { if (index == -1) { return getChildByTagName(n, name); } int count = 0; Node child = n.getFirstChild(); while (child != null) { if (child.getNodeName().equals(name)) { if (count == index) { return child; } else { count++; } } child = child.getNextSibling(); } return null; //not found } /** * returns the element parent/node_name[@attribute_name='attribute_value'] */ public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value) { NodeList children = parent.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); //logger.debug("getnamed elem, node nmae="+child.getNodeName()); if (child.getNodeName().equals(node_name)) { if (((Element) child).getAttribute(attribute_name).equals(attribute_value)) return (Element) child; } } // not found return null; } /** * returns a list of elements * parent/node_name[@attribute_name='attribute_value'] */ public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value) { ArrayList elements = new ArrayList(); NodeList children = parent.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { //System.out.println("getNamedElementList"); Node child = children.item(i); //logger.debug("getnamed elem, node nmae="+child.getNodeName()); if (child.getNodeName().equals(node_name)) { if (((Element) child).getAttribute(attribute_name).equals(attribute_value)) elements.add((Element) child); } } // not found if (elements.size() == 0) { elements = null; } return elements; } public static void copyAllChildren(Element to, Element from) { Document to_doc = to.getOwnerDocument(); Node child = from.getFirstChild(); while (child != null) { to.appendChild(to_doc.importNode(child, true)); child = child.getNextSibling(); } } /** duplicates all elements in list elements and appends to toElement */ public static void duplicateElementList(Document owner, Element toElement, NodeList elements, boolean with_attributes) { int num_elems = elements.getLength(); if (num_elems < 1) { return; } for (int i = 0; i < num_elems; i++) { Element to_element = XMLTools.duplicateElement(owner, (Element) elements.item(i), with_attributes); toElement.appendChild(to_element); } } /** Duplicates an element */ public static Element duplicateElement(Document owner, Element element, boolean with_attributes) { return duplicateElementNS(owner, element, null, with_attributes); } /** Duplicates an element */ public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes) { Element duplicate; if (namespace_uri == null) { duplicate = owner.createElement(element.getTagName()); } else { duplicate = owner.createElementNS(namespace_uri, element.getTagName()); } // Copy element attributes if (with_attributes) { NamedNodeMap attributes = element.getAttributes(); for (int i = 0; i < attributes.getLength(); i++) { Node attribute = attributes.item(i); duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue()); } } // Copy element children NodeList children = element.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); duplicate.appendChild(owner.importNode(child, true)); } return duplicate; } /** Remove all of the child nodes from a certain node. */ static final public void clear(Node node) { while (node.hasChildNodes()) { node.removeChild(node.getFirstChild()); } } static public ArrayList getChildElementsByTagName(Element parent_element, String element_name) { ArrayList child_elements = new ArrayList(); NodeList children_nodelist = parent_element.getChildNodes(); for (int i = 0; i < children_nodelist.getLength(); i++) { Node child_node = children_nodelist.item(i); if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name)) { child_elements.add(child_node); } } return child_elements; } static public String getElementTextValue(Element element) { // Find the first text node child NodeList children_nodelist = element.getChildNodes(); for (int i = 0; i < children_nodelist.getLength(); i++) { Node child_node = children_nodelist.item(i); if (child_node.getNodeType() == Node.TEXT_NODE) { return child_node.getNodeValue(); } } // None found return ""; } /** * Method to retrieve the value of a given node. * * @param element * The Element whose value we wish to find. Soon * to be deprecated! */ static final public String getValue(Node element) { if (element == null) { return ""; } // If we've been given a subject node first retrieve its value node. if (element.getNodeName().equals("Subject")) { element = getNodeFromNamed(element, "Value"); } // If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes if (element != null && element.hasChildNodes()) { StringBuffer text_buffer = new StringBuffer(); NodeList text_nodes = element.getChildNodes(); for (int i = 0; i < text_nodes.getLength(); i++) { Node possible_text = text_nodes.item(i); if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE)) { text_buffer.append(possible_text.getNodeValue()); } } return text_buffer.toString(); } return ""; } /** * Method to retrieve from the node given, a certain child node with the * specified name. * * @param parent * The Node whose children should be searched. * @param name * The required nodes name as a String. * @return The requested Node if it is found, null * otherwise. Soon to be deprecated! */ static final public Node getNodeFromNamed(Node parent, String name) { Node child = null; for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling()) { if (i.getNodeName().equals(name)) { child = i; } } return child; } static final public String WELLFORMED = "well-formed !"; static final public String NOTWELLFORMED = "not well-formed"; static final private String HEADER = ""; static final private String FOOTER = ""; public static Document getDOM(String xml_str) { Document doc = null; try { DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder(); InputSource is = new InputSource(); is.setCharacterStream(new StringReader(xml_str)); doc = db.parse(is); } catch (Exception e) { e.printStackTrace(); } return doc; } public static String parse(String xml_str) { String validation_msg = WELLFORMED; xml_str = HEADER + xml_str + FOOTER; try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); //factory.setValidating (true); SAXParser parser = factory.newSAXParser(); InputSource iSource = new InputSource(new StringReader(xml_str)); // parser.parse (iSource, new DefaultHandler ()); org.xml.sax.XMLReader reader = parser.getXMLReader(); reader.setContentHandler(new DefaultHandler()); reader.setErrorHandler(new DefaultHandler()); reader.parse(iSource); } catch (FactoryConfigurationError e) { validation_msg = "unable to get a document builder factory"; } catch (ParserConfigurationException e) { validation_msg = "unable to configure parser"; } catch (SAXParseException e) { validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage(); } catch (SAXException e) { validation_msg += " Fatal error: " + e.toString(); } catch (IOException e) { validation_msg = "Unable to read the input, i/o error"; } return validation_msg; } //In this method, the parsed string xml_str is not wrapped by the header and footer strings. public static String parseDOM(String xml_str) { String validation_msg = WELLFORMED; try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); //factory.setValidating (true); SAXParser parser = factory.newSAXParser(); InputSource iSource = new InputSource(new StringReader(xml_str)); // parser.parse (iSource, new DefaultHandler ()); org.xml.sax.XMLReader reader = parser.getXMLReader(); reader.setContentHandler(new DefaultHandler()); reader.setErrorHandler(new DefaultHandler()); reader.parse(iSource); } catch (FactoryConfigurationError e) { validation_msg = "unable to get a document builder factory"; } catch (ParserConfigurationException e) { validation_msg = "unable to configure parser"; } catch (SAXParseException e) { validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage(); } catch (SAXException e) { validation_msg += " " + e.toString(); } catch (IOException e) { validation_msg = "Unable to read the input, i/o error"; } return validation_msg; } public static String parse(File xml_file) { String validation_msg = WELLFORMED; try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); //factory.setValidating (true); SAXParser parser = factory.newSAXParser(); FileReader r = new FileReader(xml_file); InputSource iSource = new InputSource(r); XMLReader reader = parser.getXMLReader(); reader.setContentHandler(new DefaultHandler()); reader.setErrorHandler(new DefaultHandler()); reader.parse(iSource); } catch (FactoryConfigurationError e) { validation_msg = "unable to get a document builder factory"; } catch (ParserConfigurationException e) { validation_msg = "unable to configure parser"; } catch (SAXParseException e) { validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage(); } catch (SAXException e) { validation_msg += " Fatal error: " + e.toString(); } catch (IOException e) { validation_msg = "Unable to read the input, i/o error"; } return validation_msg; } /** Returns a string of the location. */ private static String getLocationString(SAXParseException ex) { StringBuffer str = new StringBuffer(); String systemId = ex.getSystemId(); if (systemId != null) { int index = systemId.lastIndexOf('/'); if (index != -1) systemId = systemId.substring(index + 1); str.append(systemId); } str.append("(line "); str.append(ex.getLineNumber() - 1); str.append(", column "); str.append(ex.getColumnNumber()); str.append("): "); return str.toString(); } // getLocationString(SAXParseException):String /** Parse an XML document from a given file path */ static public Document parseXMLFile(String xml_file_path, boolean use_class_loader) { if (use_class_loader == true) { InputStream is = JarTools.getResourceAsStream("/" + xml_file_path); if (is != null) { return parseXML(is); } } // Try the file outside the classes directory return parseXMLFile(new File(xml_file_path)); } /** Parse an XML document from a given file */ static public Document parseXMLFile(File xml_file) { // No file? No point trying! if (xml_file.exists() == false) { // System.err.println("@@@ file " + xml_file + " does not exist."); return null; } try { return parseXML(new FileInputStream(xml_file)); } catch (Exception exception) { DebugStream.printStackTrace(exception); return null; } } /** Parse an XML document from a given input stream */ static public Document parseXML(InputStream xml_input_stream) { Document document = null; try { InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8"); document = parseXML(isr); isr.close(); xml_input_stream.close(); } catch (Exception exception) { DebugStream.printStackTrace(exception); } return document; } /** Parse an XML document from a given reader */ static public Document parseXML(Reader xml_reader) { Document document = null; // If debugging, the following will store the XML contents to be parsed, // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on) String xmlContents = ""; try { Reader reader = null; // (1) By default, GLI will remove any contents preceeding (and invalidating) // the XML and present these lines separately to the user if (!DebugStream.isDebuggingEnabled()) { try { reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader)); } catch (Exception e) { System.err.println("Exception while wrapping the reader in parseXML(Reader)"); e.printStackTrace(); } } // (2) If we are running GLI in debug mode: // In case parsing exceptions are thrown (SAX Exceptions), we want to get some // idea of where things went wrong. This will print the "XML" contents to either // system.out (if debugging is off) or to the DebugStream otherwise. // We need to read the XML twice to know the line where things went wrong, so // do the additional reading only if we're debugging else { StringBuffer buf = new StringBuffer(); char[] buffer = new char[500]; int numCharsRead = xml_reader.read(buffer, 0, buffer.length); while (numCharsRead != -1) { buf.append(buffer, 0, numCharsRead); numCharsRead = xml_reader.read(buffer, 0, buffer.length); } xmlContents = buf.toString(); xml_reader.close(); // closing the old Reader xml_reader = null; buffer = null; buf = null; // we need a Reader to parse the same contents as the Reader that was just closed reader = new BufferedReader(new StringReader(xmlContents)); //System.err.println("xmlContents:\n" + xmlContents); } // (2) The actual XML parsing InputSource isc = new InputSource(reader); DOMParser parser = new DOMParser(); parser.setFeature("http://xml.org/sax/features/validation", false); parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true); parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); parser.setEntityResolver(new GLIEntityResolver()); parser.parse(isc); document = parser.getDocument(); } catch (SAXParseException e) { showXMLParseFailureLine(e, xmlContents); } catch (SAXException exception) { System.err.println("SAX exception: " + exception.getMessage()); if (DebugStream.isDebuggingEnabled()) { DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n"); // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML..."); System.exit(-1); } // else, not running in debug mode, so don't exit after exception System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed."); DebugStream.printStackTrace(exception); } catch (Exception exception) { DebugStream.printStackTrace(exception); } return document; } /** * Displays the line (string) where the SAXParseException occurred, given a * String of the entire xml that was being parsed and the SAXParseException * object that was caught. The messages are printed to DebugStream, so run * GLI/FLI with -debug to view this output. * * @param xmlContents * is the entire xml that was being parsed when the exception * occurred * @param e * is the SAXParseException object that was thrown upon parsing * the xmlContents. */ public static void showXMLParseFailureLine(SAXParseException e, String xmlContents) { // There should be no characters at all that preceed the ... bit. // The first check is for starting spaces: if (xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t")) { DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n"); DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents); return; // nothing more to do, first error identified } // the actual line (String literal) where parsing failed and the SAXParseException occurred. String line = ""; int linenumber = e.getLineNumber(); DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber); if (DebugStream.isDebuggingEnabled()) { if (linenumber != -1) { String[] lines = xmlContents.split("\n"); if (lines.length > 0) { DebugStream.println(" (number of lines: " + lines.length + ")"); if (lines.length >= linenumber) { line = lines[linenumber - 1]; } else { // error is past the last line line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1]; } } else { DebugStream.print("\n"); } lines = null; DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END"); DebugStream.println("SAXParseException message: " + e.getMessage() + "\n"); // Uncomment if you want to print out the entire contents of the XML doc: //DebugStream.println("\n\nThis was the XML:\n*********START\n" // + xmlContents + "\n************END\n"); } else { // no particular line number, print out all the xml so debugger can inspect it DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n"); } // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML..."); System.exit(-1); } else { // not running in debug mode System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed."); } } static public StringBuffer readXMLStream(InputStream input_stream) { StringBuffer xml = new StringBuffer(""); try { InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8"); BufferedReader buffered_in = new BufferedReader(isr); String line = ""; boolean xml_content = false; while ((line = buffered_in.readLine()) != null) { if (xml_content) { xml.append(line); xml.append("\n"); } else if (line.trim().startsWith("= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF)) { safe_characters[j] = character; j++; } } return new String(safe_characters, 0, j); } static public void setElementTextValue(Element element, String text) { // Remove all text node children NodeList children_nodelist = element.getChildNodes(); for (int i = children_nodelist.getLength() - 1; i >= 0; i--) { Node child_node = children_nodelist.item(i); if (child_node.getNodeType() == Node.TEXT_NODE) { element.removeChild(child_node); } } // Add a new text node if (text != null) { element.appendChild(element.getOwnerDocument().createTextNode(text)); } } /** * Set the #text node value of some element. * * @param element * the Element whose value we wish to set * @param value * the new value for the element as a String Soon to be * deprecated! */ static final public void setValue(Element element, String value) { // Remove any existing child node(s) clear(element); // Add new text node. if (value != null) { element.appendChild(element.getOwnerDocument().createTextNode(value)); } } static public void indentXML(Element elem, int depth) { Document doc = elem.getOwnerDocument(); String startIndentString = "\n"; for (int i = 0; i < depth; i++) { startIndentString += "\t"; } Node startTextNode = doc.createTextNode(startIndentString); String endIndentString = "\n"; for (int i = 0; i < depth - 1; i++) { endIndentString += "\t"; } Node endTextNode = doc.createTextNode(endIndentString); boolean found = false; Node child = elem.getFirstChild(); while (child != null) { // first clear all empty text nodes (those containing space characters like \n,\r,\t and such) if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$")) { Node spaceTextNode = child; child = child.getNextSibling(); elem.removeChild(spaceTextNode); if(child == null) break; } // now process normal element nodes as intended if (child.getNodeType() == Node.ELEMENT_NODE) { found = true; break; } child = child.getNextSibling(); } if (found) { elem.appendChild(endTextNode); } child = elem.getFirstChild(); while (child != null) { // Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such) // because the first while loop above would break out when it found an element node and wouldn't have got rid // of all the empty text nodes yet. // This time, beware not to delete the special end and start empty textnodes just added, since // they've been created and inserted specifically. if(child != endTextNode && child != startTextNode && child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$")) { Node spaceTextNode = child; child = child.getNextSibling(); elem.removeChild(spaceTextNode); if(child == null) break; } // go back to processing normal element nodes as intended if (child.getNodeType() == Node.ELEMENT_NODE) { elem.insertBefore(startTextNode.cloneNode(false), child); indentXML((Element) child, depth + 1); } child = child.getNextSibling(); } } /** * Write an XML document to a given file with the text node of the specified * element unescaped */ static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames) { indentXML(document.getDocumentElement(), 1); try { OutputStream os = new FileOutputStream(xml_file); // Create an output format for our document. OutputFormat f = new OutputFormat(document); f.setEncoding("UTF-8"); f.setIndenting(true); f.setLineWidth(0); // Why isn't this working! f.setPreserveSpace(true); if (nonEscapingTagNames != null) { f.setNonEscapingElements(nonEscapingTagNames); } // Create the necessary writer stream for serialization. OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8"); Writer w = new BufferedWriter(osw); // Generate a new serializer from the above. XMLSerializer s = new XMLSerializer(w, f); s.asDOMSerializer(); // Finally serialize the document to file. s.serialize(document); // And close. os.close(); } catch (Exception exception) { DebugStream.printStackTrace(exception); } } /** Write an XML document to a given file */ static public void writeXMLFile(File xml_file, Document document) { writeXMLFile(xml_file, document, null); } public static void printXMLNode(Node e) { printXMLNode(e, 0); } public static void printXMLNode(Node e, int depth) { //recursive method call using DOM API... for (int i = 0; i < depth; i++) System.out.print(' '); if (e.getNodeType() == Node.TEXT_NODE) { //System.out.println("text") ; if (e.getNodeValue() != "") { System.out.println(e.getNodeValue()); } return; } System.out.print('<'); System.out.print(e.getNodeName()); NamedNodeMap attrs = e.getAttributes(); if (attrs != null) { for (int i = 0; i < attrs.getLength(); i++) { Node attr = attrs.item(i); System.out.print(' '); System.out.print(attr.getNodeName()); System.out.print("=\""); System.out.print(attr.getNodeValue()); System.out.print('"'); } } NodeList children = e.getChildNodes(); if (children == null || children.getLength() == 0) System.out.println("/>"); else { System.out.println('>'); int len = children.getLength(); for (int i = 0; i < len; i++) { printXMLNode(children.item(i), depth + 1); } for (int i = 0; i < depth; i++) System.out.print(' '); System.out.println(""); } } public static String xmlNodeToString(Node e) { StringBuffer sb = new StringBuffer(""); xmlNodeToString(sb, e, true, "\t", 2); return sb.toString(); } public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth) { if (e.getNodeType() == Node.CDATA_SECTION_NODE) { if (e.getNodeValue() != "") { String text = e.getNodeValue(); sb.append(""); } return; } if (e.getNodeType() == Node.TEXT_NODE) { if (e.getNodeValue() != "") { String text = e.getNodeValue(); text = text.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll("[\\n\\r\\t\\s]*$", ""); for (Character c : text.toCharArray()) { if (c.equals('\n')) { text = text.replaceAll("^[\\n\\r\\t\\s]*", ""); break; } if (!Character.isWhitespace(c)) { break; } } sb.append(text); } return; } if (e.getNodeType() == Node.COMMENT_NODE) { if (e.getNodeValue() != "") { sb.append("\n"); } return; } if (indent) { for (int i = 0; i < depth; i++) { sb.append(indentString); } } sb.append('<'); sb.append(e.getNodeName()); NamedNodeMap attrs = e.getAttributes(); if (attrs != null) { for (int i = 0; i < attrs.getLength(); i++) { Node attr = attrs.item(i); sb.append(' '); sb.append(attr.getNodeName()); sb.append("=\""); sb.append(attr.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">")); sb.append('"'); } } NodeList children = e.getChildNodes(); boolean hasElements = false; boolean indentSwapped = false; for (int i = 0; i < children.getLength(); i++) { if (children.item(i).getNodeType() == Node.ELEMENT_NODE) { hasElements = true; } if ((children.item(i).getNodeType() == Node.TEXT_NODE || children.item(i).getNodeType() == Node.CDATA_SECTION_NODE) && indent) { if (children.item(i).getNodeValue().trim().length() > 0) { indentSwapped = true; indent = false; } } } if (children == null || children.getLength() == 0) { sb.append("/>"); if (indent) { sb.append("\n"); } } else { sb.append(">"); if (hasElements && indent) { sb.append("\n"); } int len = children.getLength(); for (int i = 0; i < len; i++) { xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1); } if (indent) { for (int i = 0; i < depth; i++) { sb.append(indentString); } } sb.append(""); if ((hasElements && indent) || indentSwapped) { sb.append("\n"); } } } public static String xmlNodeToStringWithoutIndenting(Node e) { StringBuffer sb = new StringBuffer(""); xmlNodeToStringWithoutNewline(sb, e, -1); return sb.toString(); } public static String xmlNodeToStringWithoutNewline(Node e) { StringBuffer sb = new StringBuffer(""); xmlNodeToStringWithoutNewline(sb, e, 0); return sb.toString(); } private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth) { for (int i = 0; i < depth; i++) { sb.append(' '); } if (e.getNodeType() == Node.TEXT_NODE) { if (e.getNodeValue() != "") { sb.append(e.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replace(">", ">")); } return; } if (e.getNodeType() == Node.COMMENT_NODE) { if (e.getNodeValue() != "") { sb.append(""); } return; } sb.append('<'); sb.append(e.getNodeName()); NamedNodeMap attrs = e.getAttributes(); if (attrs != null) { for (int i = 0; i < attrs.getLength(); i++) { Node attr = attrs.item(i); sb.append(' '); sb.append(attr.getNodeName()); sb.append("=\""); sb.append(attr.getNodeValue()); sb.append('"'); } } NodeList children = e.getChildNodes(); if (children == null || children.getLength() == 0) sb.append("/>"); else { sb.append(">"); int len = children.getLength(); for (int i = 0; i < len; i++) { if (depth >= 0) { xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1); } else { xmlNodeToStringWithoutNewline(sb, children.item(i), depth); } } for (int i = 0; i < depth; i++) sb.append(' '); sb.append(""); } } // This method will convert an Element to a String too, like xmlNodeToString() above. // But for a document root element (doc.getDocumentElement()), this method will additionally // return its processing instruction line at the start (). // This method copied into GLI from src/java/org/greenstone/gsdl3/util/GSXML.java public static String elementToString(Element e, boolean indent) { String str = ""; try { TransformerFactory tf = TransformerFactory.newInstance(); Transformer trans = tf.newTransformer(); StringWriter sw = new StringWriter(); if (indent) { trans.setOutputProperty(OutputKeys.INDENT, "yes"); } else { trans.setOutputProperty(OutputKeys.INDENT, "no"); } trans.transform(new DOMSource(e), new StreamResult(sw)); str = sw.toString(); } catch (Exception ex) { str += "Exception: couldn't write " + e + " to log"; } finally { return str; } } }