package org.greenstone.core.util;
import java.io.*;
import java.net.*;
import java.util.*;
import org.apache.xerces.parsers.*;
import org.apache.xml.serialize.*;
import org.greenstone.core.DebugStream;
import org.w3c.dom.*;
import org.xml.sax.*;
/** This class is a static class containing useful XML functions */
public class XMLTools
{
/** Remove all of the child nodes from a certain node. */
static final public void clear(Node node)
{
while (node.hasChildNodes()) {
node.removeChild(node.getFirstChild());
}
}
static public ArrayList getChildElementsByTagName(Element parent_element, String element_name)
{
ArrayList child_elements = new ArrayList();
NodeList children_nodelist = parent_element.getChildNodes();
for (int i = 0; i < children_nodelist.getLength(); i++) {
Node child_node = children_nodelist.item(i);
if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name)) {
child_elements.add(child_node);
}
}
return child_elements;
}
static public String getElementTextValue(Element element)
{
// Find the first text node child
NodeList children_nodelist = element.getChildNodes();
for (int i = 0; i < children_nodelist.getLength(); i++) {
Node child_node = children_nodelist.item(i);
if (child_node.getNodeType() == Node.TEXT_NODE) {
return child_node.getNodeValue();
}
}
// None found
return "";
}
/** Method to retrieve the value of a given node.
* @param element The Element whose value we wish to find.
* Soon to be deprecated!
*/
static final public String getValue(Node element) {
// If we've been given a subject node first retrieve its value node.
if(element.getNodeName().equals("Subject")) {
element = getNodeFromNamed(element, "Value");
}
// If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes
if(element != null && element.hasChildNodes()) {
StringBuffer text_buffer = new StringBuffer();
NodeList text_nodes = element.getChildNodes();
for(int i = 0; i < text_nodes.getLength(); i++) {
Node possible_text = text_nodes.item(i);
if(possible_text.getNodeName().equals(StaticStrings.TEXT_NODE)) {
text_buffer.append(possible_text.getNodeValue());
}
}
return text_buffer.toString();
}
return "";
}
/** Method to retrieve from the node given, a certain child node with the specified name.
* @param parent The Node whose children should be searched.
* @param name The required nodes name as a String.
* @return The requested Node if it is found, null otherwise.
* Soon to be deprecated!
*/
static final public Node getNodeFromNamed(Node parent, String name) {
Node child = null;
for(Node i = parent.getFirstChild(); i != null && child == null;
i = i.getNextSibling()) {
if(i.getNodeName().equals(name)) {
child = i;
}
}
return child;
}
/** Parse an XML document from a given file */
static public Document parseXMLFile(File xml_file)
{
// No file? No point trying!
if (xml_file.exists() == false) {
return null;
}
try {
return parseXML(new FileInputStream(xml_file));
}
catch (Exception exception) {
DebugStream.printStackTrace(exception);
return null;
}
}
/** Parse an XML document from a given input stream */
static public Document parseXML(InputStream xml_input_stream)
{
Document document = null;
try {
InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8");
Reader xml_reader = new BufferedReader(isr);
document = parseXML(xml_reader);
isr.close();
xml_input_stream.close();
}
catch (Exception exception) {
DebugStream.printStackTrace(exception);
}
return document;
}
/** Parse an XML document from a given reader */
static public Document parseXML(Reader xml_reader)
{
Document document = null;
try {
InputSource isc = new InputSource(xml_reader);
DOMParser parser = new DOMParser();
parser.setFeature("http://xml.org/sax/features/validation", false);
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
// May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true);
parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
parser.parse(isc);
document = parser.getDocument();
}
catch (Exception exception) {
DebugStream.printStackTrace(exception);
}
return document;
}
/** Removes characters that are invalid in XML (see http://www.w3.org/TR/2000/REC-xml-20001006#charsets) */
static public String removeInvalidCharacters(String text)
{
char[] safe_characters = new char[text.length()];
int j = 0;
char[] raw_characters = new char[text.length()];
text.getChars(0, text.length(), raw_characters, 0);
for (int i = 0; i < raw_characters.length; i++) {
char character = raw_characters[i];
if ((character >= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF)) {
safe_characters[j] = character;
j++;
}
}
return new String(safe_characters, 0, j);
}
/** Set the #text node value of some element.
* @param element the Element whose value we wish to set
* @param value the new value for the element as a String
* Soon to be deprecated!
*/
static final public void setValue(Element element, String value) {
// Remove any existing child node(s)
clear(element);
// Add new text node.
if (value != null) {
element.appendChild(element.getOwnerDocument().createTextNode(value));
}
}
/** Write an XML document to a given file */
static public void writeXMLFile(File xml_file, Document document)
{
try {
OutputStream os = new FileOutputStream(xml_file);
// Create an output format for our document.
OutputFormat f = new OutputFormat(document);
f.setEncoding("UTF-8");
f.setIndenting(true);
f.setLineWidth(0); // Why isn't this working!
f.setPreserveSpace(false);
// Create the necessary writer stream for serialization.
OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8");
Writer w = new BufferedWriter(osw);
// Generate a new serializer from the above.
XMLSerializer s = new XMLSerializer(w, f);
s.asDOMSerializer();
// Finally serialize the document to file.
s.serialize(document);
// And close.
os.close();
}
catch (Exception exception) {
DebugStream.printStackTrace(exception);
}
}
}