package org.greenstone.gatherer.util;
import java.io.*;
import java.net.*;
import java.util.*;
import org.apache.xerces.parsers.*;
import org.apache.xml.serialize.*;
import org.greenstone.gatherer.DebugStream;
import org.w3c.dom.*;
import org.xml.sax.*;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter; // for elementToString()
// SAX
import org.xml.sax.XMLReader;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.InputSource;
// JAXP
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
// for elementToString():
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
/** This class is a static class containing useful XML functions */
public class XMLTools
{
/** extracts the text out of a node */
public static Node getNodeTextNode(Element param)
{
param.normalize();
Node n = param.getFirstChild();
while (n != null && n.getNodeType() != Node.TEXT_NODE)
{
n = n.getNextSibling();
}
return n;
}
/** extracts the text out of a node */
public static String getNodeText(Element param)
{
Node text_node = getNodeTextNode(param);
if (text_node == null)
{
return "";
}
return text_node.getNodeValue();
}
public static void setNodeText(Element elem, String text)
{
Node old_text_node = getNodeTextNode(elem);
if (old_text_node != null)
{
elem.removeChild(old_text_node);
}
Text t = elem.getOwnerDocument().createTextNode(text);
elem.appendChild(t);
}
/** returns the (first) child element with the given name */
public static Node getChildByTagName(Node n, String name)
{
Node child = n.getFirstChild();
while (child != null)
{
if (child.getNodeName().equals(name))
{
return child;
}
child = child.getNextSibling();
}
return null; //not found
}
/**
* returns the (nth) child element with the given name index numbers start
* at 0
*/
public static Node getChildByTagNameIndexed(Node n, String name, int index)
{
if (index == -1)
{
return getChildByTagName(n, name);
}
int count = 0;
Node child = n.getFirstChild();
while (child != null)
{
if (child.getNodeName().equals(name))
{
if (count == index)
{
return child;
}
else
{
count++;
}
}
child = child.getNextSibling();
}
return null; //not found
}
/**
* returns the element parent/node_name[@attribute_name='attribute_value']
*/
public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value)
{
NodeList children = parent.getChildNodes();
for (int i = 0; i < children.getLength(); i++)
{
Node child = children.item(i);
//logger.debug("getnamed elem, node nmae="+child.getNodeName());
if (child.getNodeName().equals(node_name))
{
if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
return (Element) child;
}
}
// not found
return null;
}
/**
* returns a list of elements
* parent/node_name[@attribute_name='attribute_value']
*/
public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value)
{
ArrayList elements = new ArrayList();
NodeList children = parent.getChildNodes();
for (int i = 0; i < children.getLength(); i++)
{
//System.out.println("getNamedElementList");
Node child = children.item(i);
//logger.debug("getnamed elem, node nmae="+child.getNodeName());
if (child.getNodeName().equals(node_name))
{
if (((Element) child).getAttribute(attribute_name).equals(attribute_value))
elements.add((Element) child);
}
}
// not found
if (elements.size() == 0)
{
elements = null;
}
return elements;
}
public static void copyAllChildren(Element to, Element from)
{
Document to_doc = to.getOwnerDocument();
Node child = from.getFirstChild();
while (child != null)
{
to.appendChild(to_doc.importNode(child, true));
child = child.getNextSibling();
}
}
/** duplicates all elements in list elements and appends to toElement */
public static void duplicateElementList(Document owner, Element toElement, NodeList elements, boolean with_attributes) {
int num_elems = elements.getLength();
if (num_elems < 1)
{
return;
}
for (int i = 0; i < num_elems; i++)
{
Element to_element = XMLTools.duplicateElement(owner, (Element) elements.item(i), with_attributes);
toElement.appendChild(to_element);
}
}
/** Duplicates an element */
public static Element duplicateElement(Document owner, Element element, boolean with_attributes)
{
return duplicateElementNS(owner, element, null, with_attributes);
}
/** Duplicates an element */
public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes)
{
Element duplicate;
if (namespace_uri == null)
{
duplicate = owner.createElement(element.getTagName());
}
else
{
duplicate = owner.createElementNS(namespace_uri, element.getTagName());
}
// Copy element attributes
if (with_attributes)
{
NamedNodeMap attributes = element.getAttributes();
for (int i = 0; i < attributes.getLength(); i++)
{
Node attribute = attributes.item(i);
duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue());
}
}
// Copy element children
NodeList children = element.getChildNodes();
for (int i = 0; i < children.getLength(); i++)
{
Node child = children.item(i);
duplicate.appendChild(owner.importNode(child, true));
}
return duplicate;
}
/** Remove all of the child nodes from a certain node. */
static final public void clear(Node node)
{
while (node.hasChildNodes())
{
node.removeChild(node.getFirstChild());
}
}
static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
{
ArrayList child_elements = new ArrayList();
NodeList children_nodelist = parent_element.getChildNodes();
for (int i = 0; i < children_nodelist.getLength(); i++)
{
Node child_node = children_nodelist.item(i);
if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name))
{
child_elements.add(child_node);
}
}
return child_elements;
}
static public String getElementTextValue(Element element)
{
// Find the first text node child
NodeList children_nodelist = element.getChildNodes();
for (int i = 0; i < children_nodelist.getLength(); i++)
{
Node child_node = children_nodelist.item(i);
if (child_node.getNodeType() == Node.TEXT_NODE)
{
return child_node.getNodeValue();
}
}
// None found
return "";
}
/**
* Method to retrieve the value of a given node.
*
* @param element
* The Element whose value we wish to find. Soon
* to be deprecated!
*/
static final public String getValue(Node element)
{
if (element == null)
{
return "";
}
// If we've been given a subject node first retrieve its value node.
if (element.getNodeName().equals("Subject"))
{
element = getNodeFromNamed(element, "Value");
}
// If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
if (element != null && element.hasChildNodes())
{
StringBuffer text_buffer = new StringBuffer();
NodeList text_nodes = element.getChildNodes();
for (int i = 0; i < text_nodes.getLength(); i++)
{
Node possible_text = text_nodes.item(i);
if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE))
{
text_buffer.append(possible_text.getNodeValue());
}
}
return text_buffer.toString();
}
return "";
}
/**
* Method to retrieve from the node given, a certain child node with the
* specified name.
*
* @param parent
* The Node whose children should be searched.
* @param name
* The required nodes name as a String.
* @return The requested Node if it is found, null
* otherwise. Soon to be deprecated!
*/
static final public Node getNodeFromNamed(Node parent, String name)
{
Node child = null;
for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling())
{
if (i.getNodeName().equals(name))
{
child = i;
}
}
return child;
}
static final public String WELLFORMED = "well-formed !";
static final public String NOTWELLFORMED = "not well-formed";
static final private String HEADER = "";
static final private String FOOTER = "";
public static Document getDOM(String xml_str)
{
Document doc = null;
try {
DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml_str));
doc = db.parse(is);
} catch (Exception e) {
e.printStackTrace();
}
return doc;
}
public static String parse(String xml_str)
{
String validation_msg = WELLFORMED;
xml_str = HEADER + xml_str + FOOTER;
try
{
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
//factory.setValidating (true);
SAXParser parser = factory.newSAXParser();
InputSource iSource = new InputSource(new StringReader(xml_str));
// parser.parse (iSource, new DefaultHandler ());
org.xml.sax.XMLReader reader = parser.getXMLReader();
reader.setContentHandler(new DefaultHandler());
reader.setErrorHandler(new DefaultHandler());
reader.parse(iSource);
}
catch (FactoryConfigurationError e)
{
validation_msg = "unable to get a document builder factory";
}
catch (ParserConfigurationException e)
{
validation_msg = "unable to configure parser";
}
catch (SAXParseException e)
{
validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
}
catch (SAXException e)
{
validation_msg += " Fatal error: " + e.toString();
}
catch (IOException e)
{
validation_msg = "Unable to read the input, i/o error";
}
return validation_msg;
}
//In this method, the parsed string xml_str is not wrapped by the header and footer strings.
public static String parseDOM(String xml_str)
{
String validation_msg = WELLFORMED;
try
{
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
//factory.setValidating (true);
SAXParser parser = factory.newSAXParser();
InputSource iSource = new InputSource(new StringReader(xml_str));
// parser.parse (iSource, new DefaultHandler ());
org.xml.sax.XMLReader reader = parser.getXMLReader();
reader.setContentHandler(new DefaultHandler());
reader.setErrorHandler(new DefaultHandler());
reader.parse(iSource);
}
catch (FactoryConfigurationError e)
{
validation_msg = "unable to get a document builder factory";
}
catch (ParserConfigurationException e)
{
validation_msg = "unable to configure parser";
}
catch (SAXParseException e)
{
validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
}
catch (SAXException e)
{
validation_msg += " " + e.toString();
}
catch (IOException e)
{
validation_msg = "Unable to read the input, i/o error";
}
return validation_msg;
}
public static String parse(File xml_file)
{
String validation_msg = WELLFORMED;
try
{
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
//factory.setValidating (true);
SAXParser parser = factory.newSAXParser();
FileReader r = new FileReader(xml_file);
InputSource iSource = new InputSource(r);
XMLReader reader = parser.getXMLReader();
reader.setContentHandler(new DefaultHandler());
reader.setErrorHandler(new DefaultHandler());
reader.parse(iSource);
}
catch (FactoryConfigurationError e)
{
validation_msg = "unable to get a document builder factory";
}
catch (ParserConfigurationException e)
{
validation_msg = "unable to configure parser";
}
catch (SAXParseException e)
{
validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage();
}
catch (SAXException e)
{
validation_msg += " Fatal error: " + e.toString();
}
catch (IOException e)
{
validation_msg = "Unable to read the input, i/o error";
}
return validation_msg;
}
/** Returns a string of the location. */
private static String getLocationString(SAXParseException ex)
{
StringBuffer str = new StringBuffer();
String systemId = ex.getSystemId();
if (systemId != null)
{
int index = systemId.lastIndexOf('/');
if (index != -1)
systemId = systemId.substring(index + 1);
str.append(systemId);
}
str.append("(line ");
str.append(ex.getLineNumber() - 1);
str.append(", column ");
str.append(ex.getColumnNumber());
str.append("): ");
return str.toString();
} // getLocationString(SAXParseException):String
/** Parse an XML document from a given file path */
static public Document parseXMLFile(String xml_file_path, boolean use_class_loader)
{
if (use_class_loader == true)
{
InputStream is = JarTools.getResourceAsStream("/" + xml_file_path);
if (is != null)
{
return parseXML(is);
}
}
// Try the file outside the classes directory
return parseXMLFile(new File(xml_file_path));
}
/** Parse an XML document from a given file */
static public Document parseXMLFile(File xml_file)
{
// No file? No point trying!
if (xml_file.exists() == false)
{
// System.err.println("@@@ file " + xml_file + " does not exist.");
return null;
}
try
{
return parseXML(new FileInputStream(xml_file));
}
catch (Exception exception)
{
DebugStream.printStackTrace(exception);
return null;
}
}
/** Parse an XML document from a given input stream */
static public Document parseXML(InputStream xml_input_stream)
{
Document document = null;
try
{
InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
document = parseXML(isr);
isr.close();
xml_input_stream.close();
}
catch (Exception exception)
{
DebugStream.printStackTrace(exception);
}
return document;
}
/** Parse an XML document from a given reader */
static public Document parseXML(Reader xml_reader)
{
Document document = null;
// If debugging, the following will store the XML contents to be parsed,
// which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
String xmlContents = "";
try
{
Reader reader = null;
// (1) By default, GLI will remove any contents preceeding (and invalidating)
// the XML and present these lines separately to the user
if (!DebugStream.isDebuggingEnabled())
{
try
{
reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader));
}
catch (Exception e)
{
System.err.println("Exception while wrapping the reader in parseXML(Reader)");
e.printStackTrace();
}
}
// (2) If we are running GLI in debug mode:
// In case parsing exceptions are thrown (SAX Exceptions), we want to get some
// idea of where things went wrong. This will print the "XML" contents to either
// system.out (if debugging is off) or to the DebugStream otherwise.
// We need to read the XML twice to know the line where things went wrong, so
// do the additional reading only if we're debugging
else
{
StringBuffer buf = new StringBuffer();
char[] buffer = new char[500];
int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
while (numCharsRead != -1)
{
buf.append(buffer, 0, numCharsRead);
numCharsRead = xml_reader.read(buffer, 0, buffer.length);
}
xmlContents = buf.toString();
xml_reader.close(); // closing the old Reader
xml_reader = null;
buffer = null;
buf = null;
// we need a Reader to parse the same contents as the Reader that was just closed
reader = new BufferedReader(new StringReader(xmlContents));
//System.err.println("xmlContents:\n" + xmlContents);
}
// (2) The actual XML parsing
InputSource isc = new InputSource(reader);
DOMParser parser = new DOMParser();
parser.setFeature("http://xml.org/sax/features/validation", false);
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
// May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
parser.setEntityResolver(new GLIEntityResolver());
parser.parse(isc);
document = parser.getDocument();
}
catch (SAXParseException e)
{
showXMLParseFailureLine(e, xmlContents);
}
catch (SAXException exception)
{
System.err.println("SAX exception: " + exception.getMessage());
if (DebugStream.isDebuggingEnabled())
{
DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
System.exit(-1);
}
// else, not running in debug mode, so don't exit after exception
System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
DebugStream.printStackTrace(exception);
}
catch (Exception exception)
{
DebugStream.printStackTrace(exception);
}
return document;
}
/**
* Displays the line (string) where the SAXParseException occurred, given a
* String of the entire xml that was being parsed and the SAXParseException
* object that was caught. The messages are printed to DebugStream, so run
* GLI/FLI with -debug to view this output.
*
* @param xmlContents
* is the entire xml that was being parsed when the exception
* occurred
* @param e
* is the SAXParseException object that was thrown upon parsing
* the xmlContents.
*/
public static void showXMLParseFailureLine(SAXParseException e, String xmlContents)
{
// There should be no characters at all that preceed the ... bit.
// The first check is for starting spaces:
if (xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t"))
{
DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
return; // nothing more to do, first error identified
}
// the actual line (String literal) where parsing failed and the SAXParseException occurred.
String line = "";
int linenumber = e.getLineNumber();
DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber);
if (DebugStream.isDebuggingEnabled())
{
if (linenumber != -1)
{
String[] lines = xmlContents.split("\n");
if (lines.length > 0)
{
DebugStream.println(" (number of lines: " + lines.length + ")");
if (lines.length >= linenumber)
{
line = lines[linenumber - 1];
}
else
{ // error is past the last line
line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1];
}
}
else
{
DebugStream.print("\n");
}
lines = null;
DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
// Uncomment if you want to print out the entire contents of the XML doc:
//DebugStream.println("\n\nThis was the XML:\n*********START\n"
// + xmlContents + "\n************END\n");
}
else
{ // no particular line number, print out all the xml so debugger can inspect it
DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n");
}
// Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
System.exit(-1);
}
else
{ // not running in debug mode
System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
}
}
static public StringBuffer readXMLStream(InputStream input_stream)
{
StringBuffer xml = new StringBuffer("");
try
{
InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8");
BufferedReader buffered_in = new BufferedReader(isr);
String line = "";
boolean xml_content = false;
while ((line = buffered_in.readLine()) != null)
{
if (xml_content)
{
xml.append(line);
xml.append("\n");
}
else if (line.trim().startsWith("= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF))
{
safe_characters[j] = character;
j++;
}
}
return new String(safe_characters, 0, j);
}
static public void setElementTextValue(Element element, String text)
{
// Remove all text node children
NodeList children_nodelist = element.getChildNodes();
for (int i = children_nodelist.getLength() - 1; i >= 0; i--)
{
Node child_node = children_nodelist.item(i);
if (child_node.getNodeType() == Node.TEXT_NODE)
{
element.removeChild(child_node);
}
}
// Add a new text node
if (text != null)
{
element.appendChild(element.getOwnerDocument().createTextNode(text));
}
}
/**
* Set the #text node value of some element.
*
* @param element
* the Element whose value we wish to set
* @param value
* the new value for the element as a String Soon to be
* deprecated!
*/
static final public void setValue(Element element, String value)
{
// Remove any existing child node(s)
clear(element);
// Add new text node.
if (value != null)
{
element.appendChild(element.getOwnerDocument().createTextNode(value));
}
}
static public void indentXML(Element elem, int depth)
{
Document doc = elem.getOwnerDocument();
String startIndentString = "\n";
for (int i = 0; i < depth; i++)
{
startIndentString += "\t";
}
Node startTextNode = doc.createTextNode(startIndentString);
String endIndentString = "\n";
for (int i = 0; i < depth - 1; i++)
{
endIndentString += "\t";
}
Node endTextNode = doc.createTextNode(endIndentString);
boolean found = false;
Node child = elem.getFirstChild();
while (child != null)
{
// first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
if(child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
{
Node spaceTextNode = child;
child = child.getNextSibling();
elem.removeChild(spaceTextNode);
if(child == null) break;
}
// now process normal element nodes as intended
if (child.getNodeType() == Node.ELEMENT_NODE)
{
found = true;
break;
}
child = child.getNextSibling();
}
if (found)
{
elem.appendChild(endTextNode);
}
child = elem.getFirstChild();
while (child != null)
{
// Again, need to first clear all empty text nodes (those containing space characters like \n,\r,\t and such)
// because the first while loop above would break out when it found an element node and wouldn't have got rid
// of all the empty text nodes yet.
// This time, beware not to delete the special end and start empty textnodes just added, since
// they've been created and inserted specifically.
if(child != endTextNode && child != startTextNode
&& child.getNodeType() == Node.TEXT_NODE && child.getNodeValue().matches("^\\s*$"))
{
Node spaceTextNode = child;
child = child.getNextSibling();
elem.removeChild(spaceTextNode);
if(child == null) break;
}
// go back to processing normal element nodes as intended
if (child.getNodeType() == Node.ELEMENT_NODE)
{
elem.insertBefore(startTextNode.cloneNode(false), child);
indentXML((Element) child, depth + 1);
}
child = child.getNextSibling();
}
}
/**
* Write an XML document to a given file with the text node of the specified
* element unescaped
*/
static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames)
{
indentXML(document.getDocumentElement(), 1);
try
{
OutputStream os = new FileOutputStream(xml_file);
// Create an output format for our document.
OutputFormat f = new OutputFormat(document);
f.setEncoding("UTF-8");
f.setIndenting(true);
f.setLineWidth(0); // Why isn't this working!
f.setPreserveSpace(true);
if (nonEscapingTagNames != null)
{
f.setNonEscapingElements(nonEscapingTagNames);
}
// Create the necessary writer stream for serialization.
OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
Writer w = new BufferedWriter(osw);
// Generate a new serializer from the above.
XMLSerializer s = new XMLSerializer(w, f);
s.asDOMSerializer();
// Finally serialize the document to file.
s.serialize(document);
// And close.
os.close();
}
catch (Exception exception)
{
DebugStream.printStackTrace(exception);
}
}
/** Write an XML document to a given file */
static public void writeXMLFile(File xml_file, Document document)
{
writeXMLFile(xml_file, document, null);
}
public static void printXMLNode(Node e)
{
printXMLNode(e, 0);
}
public static void printXMLNode(Node e, int depth)
{ //recursive method call using DOM API...
for (int i = 0; i < depth; i++)
System.out.print(' ');
if (e.getNodeType() == Node.TEXT_NODE)
{
//System.out.println("text") ;
if (e.getNodeValue() != "")
{
System.out.println(e.getNodeValue());
}
return;
}
System.out.print('<');
System.out.print(e.getNodeName());
NamedNodeMap attrs = e.getAttributes();
if (attrs != null)
{
for (int i = 0; i < attrs.getLength(); i++)
{
Node attr = attrs.item(i);
System.out.print(' ');
System.out.print(attr.getNodeName());
System.out.print("=\"");
System.out.print(attr.getNodeValue());
System.out.print('"');
}
}
NodeList children = e.getChildNodes();
if (children == null || children.getLength() == 0)
System.out.println("/>");
else
{
System.out.println('>');
int len = children.getLength();
for (int i = 0; i < len; i++)
{
printXMLNode(children.item(i), depth + 1);
}
for (int i = 0; i < depth; i++)
System.out.print(' ');
System.out.println("" + e.getNodeName() + ">");
}
}
public static String xmlNodeToString(Node e)
{
StringBuffer sb = new StringBuffer("");
xmlNodeToString(sb, e, true, "\t", 2);
return sb.toString();
}
public static void xmlNodeToString(StringBuffer sb, Node e, boolean indent, String indentString, int depth)
{
if (e.getNodeType() == Node.CDATA_SECTION_NODE)
{
if (e.getNodeValue() != "")
{
String text = e.getNodeValue();
sb.append("");
}
return;
}
if (e.getNodeType() == Node.TEXT_NODE)
{
if (e.getNodeValue() != "")
{
String text = e.getNodeValue();
text = text.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll("[\\n\\r\\t\\s]*$", "");
for (Character c : text.toCharArray())
{
if (c.equals('\n'))
{
text = text.replaceAll("^[\\n\\r\\t\\s]*", "");
break;
}
if (!Character.isWhitespace(c))
{
break;
}
}
sb.append(text);
}
return;
}
if (e.getNodeType() == Node.COMMENT_NODE)
{
if (e.getNodeValue() != "")
{
sb.append("\n");
}
return;
}
if (indent)
{
for (int i = 0; i < depth; i++)
{
sb.append(indentString);
}
}
sb.append('<');
sb.append(e.getNodeName());
NamedNodeMap attrs = e.getAttributes();
if (attrs != null)
{
for (int i = 0; i < attrs.getLength(); i++)
{
Node attr = attrs.item(i);
sb.append(' ');
sb.append(attr.getNodeName());
sb.append("=\"");
sb.append(attr.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">"));
sb.append('"');
}
}
NodeList children = e.getChildNodes();
boolean hasElements = false;
boolean indentSwapped = false;
for (int i = 0; i < children.getLength(); i++)
{
if (children.item(i).getNodeType() == Node.ELEMENT_NODE)
{
hasElements = true;
}
if ((children.item(i).getNodeType() == Node.TEXT_NODE || children.item(i).getNodeType() == Node.CDATA_SECTION_NODE) && indent)
{
if (children.item(i).getNodeValue().trim().length() > 0)
{
indentSwapped = true;
indent = false;
}
}
}
if (children == null || children.getLength() == 0)
{
sb.append("/>");
if (indent)
{
sb.append("\n");
}
}
else
{
sb.append(">");
if (hasElements && indent)
{
sb.append("\n");
}
int len = children.getLength();
for (int i = 0; i < len; i++)
{
xmlNodeToString(sb, children.item(i), indent, indentString, depth + 1);
}
if (indent)
{
for (int i = 0; i < depth; i++)
{
sb.append(indentString);
}
}
sb.append("" + e.getNodeName() + ">");
if ((hasElements && indent) || indentSwapped)
{
sb.append("\n");
}
}
}
public static String xmlNodeToStringWithoutIndenting(Node e)
{
StringBuffer sb = new StringBuffer("");
xmlNodeToStringWithoutNewline(sb, e, -1);
return sb.toString();
}
public static String xmlNodeToStringWithoutNewline(Node e)
{
StringBuffer sb = new StringBuffer("");
xmlNodeToStringWithoutNewline(sb, e, 0);
return sb.toString();
}
private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth)
{
for (int i = 0; i < depth; i++)
{
sb.append(' ');
}
if (e.getNodeType() == Node.TEXT_NODE)
{
if (e.getNodeValue() != "")
{
sb.append(e.getNodeValue().replaceAll("&", "&").replaceAll("<", "<").replace(">", ">"));
}
return;
}
if (e.getNodeType() == Node.COMMENT_NODE)
{
if (e.getNodeValue() != "")
{
sb.append("");
}
return;
}
sb.append('<');
sb.append(e.getNodeName());
NamedNodeMap attrs = e.getAttributes();
if (attrs != null)
{
for (int i = 0; i < attrs.getLength(); i++)
{
Node attr = attrs.item(i);
sb.append(' ');
sb.append(attr.getNodeName());
sb.append("=\"");
sb.append(attr.getNodeValue());
sb.append('"');
}
}
NodeList children = e.getChildNodes();
if (children == null || children.getLength() == 0)
sb.append("/>");
else
{
sb.append(">");
int len = children.getLength();
for (int i = 0; i < len; i++)
{
if (depth >= 0)
{
xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1);
}
else
{
xmlNodeToStringWithoutNewline(sb, children.item(i), depth);
}
}
for (int i = 0; i < depth; i++)
sb.append(' ');
sb.append("" + e.getNodeName() + ">");
}
}
// This method will convert an Element to a String too, like xmlNodeToString() above.
// But for a document root element (doc.getDocumentElement()), this method will additionally
// return its processing instruction line at the start ().
// This method copied into GLI from src/java/org/greenstone/gsdl3/util/GSXML.java
public static String elementToString(Element e, boolean indent)
{
String str = "";
try
{
TransformerFactory tf = TransformerFactory.newInstance();
Transformer trans = tf.newTransformer();
StringWriter sw = new StringWriter();
if (indent)
{
trans.setOutputProperty(OutputKeys.INDENT, "yes");
}
else
{
trans.setOutputProperty(OutputKeys.INDENT, "no");
}
trans.transform(new DOMSource(e), new StreamResult(sw));
str = sw.toString();
}
catch (Exception ex)
{
str += "Exception: couldn't write " + e + " to log";
}
finally
{
return str;
}
}
}