[3235] | 1 | /*
|
---|
| 2 | * XMLConverter.java
|
---|
| 3 | * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
|
---|
| 4 | *
|
---|
| 5 | * This program is free software; you can redistribute it and/or modify
|
---|
| 6 | * it under the terms of the GNU General Public License as published by
|
---|
| 7 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 8 | * (at your option) any later version.
|
---|
| 9 | *
|
---|
| 10 | * This program is distributed in the hope that it will be useful,
|
---|
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 13 | * GNU General Public License for more details.
|
---|
| 14 | *
|
---|
| 15 | * You should have received a copy of the GNU General Public License
|
---|
| 16 | * along with this program; if not, write to the Free Software
|
---|
| 17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 18 | */
|
---|
[3222] | 19 | package org.greenstone.gsdl3.util;
|
---|
| 20 |
|
---|
| 21 | // XML classes
|
---|
[18434] | 22 | import org.w3c.dom.DOMImplementation;
|
---|
[24862] | 23 | import org.w3c.dom.Document;
|
---|
| 24 | import org.w3c.dom.DocumentType;
|
---|
| 25 | import org.w3c.dom.Element;
|
---|
| 26 | import org.w3c.dom.Node;
|
---|
[3768] | 27 | import org.w3c.dom.NodeList;
|
---|
| 28 | import org.w3c.dom.NamedNodeMap;
|
---|
[3222] | 29 | import org.xml.sax.InputSource;
|
---|
[5187] | 30 | import org.xml.sax.EntityResolver;
|
---|
[18434] | 31 | import org.xml.sax.ErrorHandler;
|
---|
| 32 | import org.xml.sax.SAXParseException;
|
---|
[3222] | 33 | import org.apache.xerces.parsers.DOMParser;
|
---|
[18434] | 34 | import org.apache.xerces.dom.*; // for new Documents
|
---|
[3222] | 35 |
|
---|
| 36 | // other java classes
|
---|
[25655] | 37 | import java.io.ByteArrayInputStream;
|
---|
| 38 | import java.io.InputStream;
|
---|
[3222] | 39 | import java.io.Reader;
|
---|
[4087] | 40 | import java.io.InputStreamReader;
|
---|
[3222] | 41 | import java.io.StringReader;
|
---|
| 42 | import java.io.File;
|
---|
[4087] | 43 | import java.io.FileInputStream;
|
---|
[3222] | 44 | import java.io.FileReader;
|
---|
[23791] | 45 | import java.util.regex.*;
|
---|
[3222] | 46 |
|
---|
[13124] | 47 | import org.apache.log4j.*;
|
---|
| 48 |
|
---|
[23791] | 49 | // Apache Commons
|
---|
| 50 | import org.apache.commons.lang3.*;
|
---|
| 51 |
|
---|
| 52 | import java.util.*;
|
---|
| 53 | import java.lang.reflect.*;
|
---|
| 54 |
|
---|
[24862] | 55 | /**
|
---|
| 56 | * XMLConverter - utility class for greenstone
|
---|
| 57 | *
|
---|
| 58 | * parses XML Strings into Documents, converts Nodes to Strings different
|
---|
| 59 | * parsers have different behaviour - can experiment in here now we only use
|
---|
| 60 | * xerces
|
---|
| 61 | *
|
---|
[3222] | 62 | */
|
---|
[24862] | 63 | public class XMLConverter
|
---|
| 64 | {
|
---|
[3222] | 65 |
|
---|
[24862] | 66 | static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.XMLConverter.class.getName());
|
---|
[13124] | 67 |
|
---|
[24863] | 68 | protected EntityResolver resolver = null;
|
---|
| 69 |
|
---|
[24862] | 70 | /** xerces parser */
|
---|
| 71 | protected DOMParser parser = null;
|
---|
[3222] | 72 |
|
---|
[24862] | 73 | private static boolean outputEscaping = true;
|
---|
[3768] | 74 |
|
---|
[24862] | 75 | /** the no-args constructor */
|
---|
| 76 | public XMLConverter()
|
---|
| 77 | {
|
---|
| 78 | try
|
---|
| 79 | {
|
---|
| 80 | this.parser = new DOMParser();
|
---|
| 81 | this.parser.setFeature("http://xml.org/sax/features/validation", false);
|
---|
| 82 | // don't try and load external DTD - no need if we are not validating, and may cause connection errors if a proxy is not set up.
|
---|
| 83 | this.parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
---|
| 84 | // a performance test showed that having this on lead to increased
|
---|
| 85 | // memory use for small-medium docs, and not much gain for large
|
---|
| 86 | // docs.
|
---|
| 87 | // http://www.sosnoski.com/opensrc/xmlbench/conclusions.html
|
---|
| 88 | this.parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
|
---|
| 89 | // add an errorhandler to the parser which will store useful a error message on encountering fatal errors, errors and warnings when parsing
|
---|
| 90 | // this errormessage can then be converted to xhtml and displayed in a browser.
|
---|
| 91 | this.parser.setErrorHandler(new ParseErrorHandler());
|
---|
| 92 | }
|
---|
| 93 | catch (Exception e)
|
---|
| 94 | {
|
---|
| 95 | logger.error(e.getMessage());
|
---|
| 96 | }
|
---|
[3222] | 97 | }
|
---|
| 98 |
|
---|
[24862] | 99 | /** sets the entity resolver. pass in null to unset it */
|
---|
| 100 | public void setEntityResolver(EntityResolver er)
|
---|
| 101 | {
|
---|
[24863] | 102 | this.resolver = er;
|
---|
[24862] | 103 | this.parser.setEntityResolver(er);
|
---|
| 104 | }
|
---|
[16688] | 105 |
|
---|
[24862] | 106 | /**
|
---|
| 107 | * Given a Node representing an Element or Document, will return the
|
---|
| 108 | * Element/docroot Element. Returns null if the Node was not an element.
|
---|
| 109 | */
|
---|
| 110 | public static Element nodeToElement(Node node)
|
---|
| 111 | {
|
---|
| 112 | if (node == null)
|
---|
| 113 | {
|
---|
| 114 | return null;
|
---|
| 115 | }
|
---|
| 116 | short nodeType = node.getNodeType();
|
---|
| 117 |
|
---|
| 118 | if (nodeType == Node.DOCUMENT_NODE)
|
---|
| 119 | {
|
---|
| 120 | Document docNode = (Document) node;
|
---|
| 121 | return docNode.getDocumentElement();
|
---|
| 122 | }
|
---|
| 123 | else if (nodeType == Node.ELEMENT_NODE)
|
---|
| 124 | {
|
---|
| 125 | return (Element) node;
|
---|
| 126 | }
|
---|
| 127 | else
|
---|
| 128 | {
|
---|
| 129 | String message = "Expecting Document or Element node type but got " + node.getNodeName() + "\nReturning null";
|
---|
| 130 | System.err.println(message);
|
---|
| 131 | logger.warn(message);
|
---|
| 132 | return null;
|
---|
| 133 | }
|
---|
[16999] | 134 | }
|
---|
[16688] | 135 |
|
---|
[24862] | 136 | /** returns a DOM Document */
|
---|
| 137 | public Document getDOM(String in)
|
---|
| 138 | {
|
---|
| 139 |
|
---|
| 140 | try
|
---|
| 141 | {
|
---|
| 142 | Reader reader = new StringReader(in);
|
---|
| 143 | InputSource xml_source = new InputSource(reader);
|
---|
| 144 |
|
---|
[24863] | 145 | DOMParser parser = new DOMParser();
|
---|
| 146 | parser.setFeature("http://xml.org/sax/features/validation", false);
|
---|
| 147 | parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
---|
| 148 | parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
|
---|
| 149 | if(resolver != null)
|
---|
| 150 | {
|
---|
| 151 | parser.setEntityResolver(this.resolver);
|
---|
| 152 | }
|
---|
| 153 | parser.setErrorHandler(new ParseErrorHandler());
|
---|
| 154 | parser.parse(xml_source);
|
---|
| 155 |
|
---|
| 156 | Document doc = parser.getDocument();
|
---|
[24862] | 157 |
|
---|
| 158 | return doc;
|
---|
| 159 |
|
---|
| 160 | }
|
---|
| 161 | catch (Exception e)
|
---|
| 162 | {
|
---|
| 163 | logger.error(e.getMessage());
|
---|
| 164 | }
|
---|
| 165 | return null;
|
---|
[16688] | 166 | }
|
---|
[24862] | 167 |
|
---|
| 168 | /** returns a DOM Document */
|
---|
[25655] | 169 | public Document getDOM(String in, String encoding)
|
---|
| 170 | {
|
---|
| 171 | try
|
---|
| 172 | {
|
---|
| 173 | InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream(in.getBytes(encoding)), encoding);
|
---|
| 174 | InputSource xml_source = new InputSource(reader);
|
---|
| 175 |
|
---|
| 176 | DOMParser parser = new DOMParser();
|
---|
| 177 | parser.setFeature("http://xml.org/sax/features/validation", false);
|
---|
| 178 | parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
---|
| 179 | parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
|
---|
| 180 | if(resolver != null)
|
---|
| 181 | {
|
---|
| 182 | parser.setEntityResolver(this.resolver);
|
---|
| 183 | }
|
---|
| 184 | parser.setErrorHandler(new ParseErrorHandler());
|
---|
| 185 | parser.parse(xml_source);
|
---|
| 186 |
|
---|
| 187 | Document doc = parser.getDocument();
|
---|
| 188 |
|
---|
| 189 | return doc;
|
---|
| 190 |
|
---|
| 191 | }
|
---|
| 192 | catch (Exception e)
|
---|
| 193 | {
|
---|
| 194 | logger.error(e.getMessage());
|
---|
| 195 | }
|
---|
| 196 | return null;
|
---|
| 197 | }
|
---|
| 198 |
|
---|
| 199 | /** returns a DOM Document */
|
---|
[24862] | 200 | public Document getDOM(File in)
|
---|
| 201 | {
|
---|
| 202 | try
|
---|
| 203 | {
|
---|
| 204 | FileReader reader = new FileReader(in);
|
---|
| 205 | InputSource xml_source = new InputSource(reader);
|
---|
[24863] | 206 |
|
---|
| 207 | DOMParser parser = new DOMParser();
|
---|
| 208 | parser.setFeature("http://xml.org/sax/features/validation", false);
|
---|
| 209 | parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
---|
| 210 | parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
|
---|
| 211 | if(resolver != null)
|
---|
| 212 | {
|
---|
| 213 | parser.setEntityResolver(this.resolver);
|
---|
| 214 | }
|
---|
| 215 | parser.setErrorHandler(new ParseErrorHandler());
|
---|
| 216 | parser.parse(xml_source);
|
---|
| 217 |
|
---|
| 218 | Document doc = parser.getDocument();
|
---|
[24862] | 219 | return doc;
|
---|
| 220 |
|
---|
| 221 | }
|
---|
| 222 | catch (Exception e)
|
---|
| 223 | {
|
---|
| 224 | logger.error(e.getMessage(), e);
|
---|
| 225 |
|
---|
| 226 | }
|
---|
| 227 | return null;
|
---|
[16688] | 228 | }
|
---|
| 229 |
|
---|
[24862] | 230 | /** returns a DOM document */
|
---|
| 231 | public Document getDOM(File in, String encoding)
|
---|
| 232 | {
|
---|
| 233 | try
|
---|
| 234 | {
|
---|
| 235 |
|
---|
| 236 | InputStreamReader isr = new InputStreamReader(new FileInputStream(in), encoding);
|
---|
| 237 | InputSource xml_source = new InputSource(isr);
|
---|
| 238 |
|
---|
[24863] | 239 | DOMParser parser = new DOMParser();
|
---|
| 240 | parser.setFeature("http://xml.org/sax/features/validation", false);
|
---|
| 241 | parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
---|
| 242 | parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
|
---|
| 243 | if(resolver != null)
|
---|
| 244 | {
|
---|
| 245 | parser.setEntityResolver(this.resolver);
|
---|
| 246 | }
|
---|
| 247 | parser.setErrorHandler(new ParseErrorHandler());
|
---|
| 248 | parser.parse(xml_source);
|
---|
| 249 |
|
---|
| 250 | Document doc = parser.getDocument();
|
---|
[24862] | 251 |
|
---|
| 252 | return doc;
|
---|
| 253 |
|
---|
| 254 | }
|
---|
| 255 | catch (Exception e)
|
---|
| 256 | {
|
---|
| 257 | logger.error(e.getMessage());
|
---|
| 258 | }
|
---|
| 259 | return null;
|
---|
[3222] | 260 | }
|
---|
[4087] | 261 |
|
---|
[24862] | 262 | /** creates a new empty DOM Document */
|
---|
| 263 | public static Document newDOM()
|
---|
| 264 | {
|
---|
| 265 | Document doc = new DocumentImpl();
|
---|
| 266 | return doc;
|
---|
[4087] | 267 | }
|
---|
| 268 |
|
---|
[24862] | 269 | /**
|
---|
| 270 | * This method's parameters represent the parts of the Doctype of this
|
---|
| 271 | * Document that is to be created. For more info see
|
---|
| 272 | * http://xerces.apache.org
|
---|
| 273 | * /xerces-j/apiDocs/org/apache/xerces/dom/DocumentTypeImpl
|
---|
| 274 | * .html#DocumentTypeImpl
|
---|
| 275 | * (org.apache.xerces.dom.CoreDocumentImpl,%20java.lang.String)
|
---|
| 276 | *
|
---|
| 277 | * */
|
---|
| 278 | public static Document newDOM(String qualifiedName, String publicID, String systemID)
|
---|
| 279 | {
|
---|
| 280 | // create empty DOM document
|
---|
| 281 | DocumentImpl docImpl = new DocumentImpl();
|
---|
[3222] | 282 |
|
---|
[24862] | 283 | // Need to use the document to create the docType for it
|
---|
| 284 | DocumentType myDocType = new DocumentTypeImpl(docImpl, qualifiedName, publicID, systemID);
|
---|
[3222] | 285 |
|
---|
[18434] | 286 | // Although we have created the docType using the document, we need to still
|
---|
| 287 | // put it into the empty document we just created
|
---|
[24862] | 288 | try
|
---|
| 289 | {
|
---|
[18434] | 290 | docImpl.appendChild(myDocType);
|
---|
| 291 | }
|
---|
[24862] | 292 | catch (Exception e)
|
---|
| 293 | {
|
---|
| 294 | System.out.println("Could not append docType because: " + e);
|
---|
| 295 | }
|
---|
| 296 |
|
---|
[18434] | 297 | // return the document containing a DocType
|
---|
[24862] | 298 | return docImpl;
|
---|
| 299 | }
|
---|
[3222] | 300 |
|
---|
[24862] | 301 | /** returns the Node as a String */
|
---|
| 302 | public static String getString(Node xmlNode)
|
---|
| 303 | {
|
---|
| 304 | outputEscaping = true;
|
---|
| 305 | StringBuffer xmlRepresentation = new StringBuffer();
|
---|
| 306 | getString(xmlNode, xmlRepresentation, 0, false);
|
---|
| 307 | return xmlRepresentation.toString();
|
---|
| 308 | }
|
---|
[3768] | 309 |
|
---|
[24862] | 310 | /**
|
---|
| 311 | * returns the node as a nicely formatted String - this introduces extra
|
---|
| 312 | * text nodes if the String is read back in as a DOM, so should only be used
|
---|
| 313 | * for printing
|
---|
| 314 | */
|
---|
| 315 | public static String getPrettyString(Node xmlNode)
|
---|
| 316 | {
|
---|
[3768] | 317 |
|
---|
[24862] | 318 | outputEscaping = true;
|
---|
| 319 | StringBuffer xmlRepresentation = new StringBuffer();
|
---|
| 320 | getString(xmlNode, xmlRepresentation, 0, true);
|
---|
| 321 | return xmlRepresentation.toString();
|
---|
| 322 | }
|
---|
[23791] | 323 |
|
---|
[24862] | 324 | /*
|
---|
| 325 | * For the purposes of logger.debug statements, where this is called and
|
---|
| 326 | * hence outputted, returns an empty string if debugging is not enabled
|
---|
| 327 | */
|
---|
| 328 | public static String getPrettyStringLogger(Node xmlNode, Logger log)
|
---|
| 329 | {
|
---|
[23791] | 330 |
|
---|
[24862] | 331 | if (log.isDebugEnabled())
|
---|
| 332 | return getPrettyString(xmlNode);
|
---|
[23791] | 333 |
|
---|
[24862] | 334 | return "";
|
---|
| 335 |
|
---|
[10202] | 336 | }
|
---|
[3768] | 337 |
|
---|
[24862] | 338 | private static void getString(Node xmlNode, StringBuffer xmlRepresentation, int depth, boolean pretty)
|
---|
| 339 | {
|
---|
[3768] | 340 |
|
---|
[24862] | 341 | if (xmlNode == null)
|
---|
| 342 | {
|
---|
| 343 | xmlRepresentation.append("<null>");
|
---|
| 344 | return;
|
---|
[3970] | 345 | }
|
---|
[3768] | 346 |
|
---|
[24862] | 347 | short nodeType = xmlNode.getNodeType();
|
---|
| 348 | String nodeName = xmlNode.getNodeName();
|
---|
[3768] | 349 |
|
---|
[24862] | 350 | if (nodeType == Node.DOCUMENT_NODE)
|
---|
| 351 | {
|
---|
| 352 | Document xmlDocNode = (Document) xmlNode;
|
---|
| 353 |
|
---|
| 354 | //if (xmlDocNode.getDoctype() == null) {
|
---|
| 355 | //System.err.println("Doctype is null.");
|
---|
| 356 | //}
|
---|
| 357 | //else {
|
---|
| 358 | if (xmlDocNode.getDoctype() != null)
|
---|
| 359 | {
|
---|
| 360 | DocumentType dt = xmlDocNode.getDoctype();
|
---|
| 361 |
|
---|
| 362 | String name = dt.getName();
|
---|
| 363 | String pid = dt.getPublicId();
|
---|
| 364 | String sid = dt.getSystemId();
|
---|
| 365 |
|
---|
| 366 | // Use previously assigned name, not dt.getName() again
|
---|
| 367 | String doctype_str = "<!DOCTYPE " + name + " PUBLIC \"" + pid + "\" \"" + sid + "\">\n";
|
---|
| 368 |
|
---|
| 369 | xmlRepresentation.append(doctype_str);
|
---|
| 370 | }
|
---|
| 371 | getString(xmlDocNode.getDocumentElement(), xmlRepresentation, depth, pretty);
|
---|
| 372 | return;
|
---|
[4285] | 373 | }
|
---|
[24862] | 374 | // Handle Element nodes
|
---|
| 375 | if (nodeType == Node.ELEMENT_NODE)
|
---|
| 376 | {
|
---|
| 377 | if (pretty)
|
---|
| 378 | {
|
---|
| 379 | xmlRepresentation.append("\n");
|
---|
| 380 | for (int i = 0; i < depth; i++)
|
---|
| 381 | {
|
---|
| 382 | xmlRepresentation.append(" ");
|
---|
| 383 | }
|
---|
| 384 | }
|
---|
| 385 |
|
---|
| 386 | // Write opening tag
|
---|
| 387 | xmlRepresentation.append("<");
|
---|
| 388 | xmlRepresentation.append(nodeName);
|
---|
| 389 |
|
---|
| 390 | // Write the node attributes
|
---|
| 391 | NamedNodeMap nodeAttributes = xmlNode.getAttributes();
|
---|
| 392 | for (int i = 0; i < nodeAttributes.getLength(); i++)
|
---|
| 393 | {
|
---|
| 394 | Node attribute = nodeAttributes.item(i);
|
---|
| 395 | xmlRepresentation.append(" ");
|
---|
| 396 | xmlRepresentation.append(attribute.getNodeName());
|
---|
| 397 | xmlRepresentation.append("=\"");
|
---|
| 398 | xmlRepresentation.append(attribute.getNodeValue());
|
---|
| 399 | xmlRepresentation.append("\"");
|
---|
| 400 | }
|
---|
| 401 |
|
---|
| 402 | // If the node has no children, close the opening tag and return
|
---|
| 403 | if (xmlNode.hasChildNodes() == false)
|
---|
| 404 | {
|
---|
| 405 | // This produces somewhat ugly output, but it is necessary to compensate
|
---|
| 406 | // for display bugs in Netscape. Firstly, the space is needed before the
|
---|
| 407 | // closing bracket otherwise Netscape will ignore some tags (<br/>, for
|
---|
| 408 | // example). Also, a newline character would be expected after the tag,
|
---|
| 409 | // but this causes problems with the display of links (the link text
|
---|
| 410 | // will contain a newline character, which is displayed badly).
|
---|
| 411 | xmlRepresentation.append(" />");
|
---|
| 412 | return;
|
---|
| 413 | }
|
---|
| 414 |
|
---|
| 415 | // Close the opening tag
|
---|
| 416 | xmlRepresentation.append(">");
|
---|
| 417 |
|
---|
| 418 | // Apply recursively to the children of this node
|
---|
| 419 | // hack for nodes next to text nodes - dont make them pretty
|
---|
| 420 | // this is needed for text inside a <pre> element - any new lines
|
---|
| 421 | // or spaces around the span elements show up in the text
|
---|
| 422 | NodeList children = xmlNode.getChildNodes();
|
---|
| 423 | boolean do_pretty = pretty;
|
---|
| 424 | for (int i = 0; i < children.getLength(); i++)
|
---|
| 425 | {
|
---|
| 426 | if (children.item(i).getNodeType() == Node.TEXT_NODE)
|
---|
| 427 | {
|
---|
| 428 | do_pretty = false; // if there is a text node amongst the children, do teh following nodes in non-pretty mode - hope this doesn't stuff up something else
|
---|
| 429 | }
|
---|
| 430 | getString(children.item(i), xmlRepresentation, depth + 1, do_pretty);
|
---|
| 431 | }
|
---|
| 432 |
|
---|
| 433 | // Write closing tag
|
---|
| 434 | if (pretty)
|
---|
| 435 | {
|
---|
| 436 | if (xmlRepresentation.charAt(xmlRepresentation.length() - 1) == '\n')
|
---|
| 437 | {
|
---|
| 438 | for (int i = 0; i < depth; i++)
|
---|
| 439 | xmlRepresentation.append(" ");
|
---|
| 440 | }
|
---|
| 441 | }
|
---|
| 442 | xmlRepresentation.append("</");
|
---|
| 443 | xmlRepresentation.append(nodeName);
|
---|
| 444 | xmlRepresentation.append(">");
|
---|
| 445 | if (pretty)
|
---|
| 446 | {
|
---|
| 447 | xmlRepresentation.append("\n");
|
---|
| 448 | }
|
---|
[3970] | 449 | }
|
---|
[3768] | 450 |
|
---|
[24862] | 451 | // Handle Text nodes
|
---|
| 452 | else if (nodeType == Node.TEXT_NODE)
|
---|
| 453 | {
|
---|
| 454 | String text = xmlNode.getNodeValue();
|
---|
[23791] | 455 |
|
---|
[24862] | 456 | // Perform output escaping, if required
|
---|
| 457 | // Apache Commons replace method is far superior to String.replaceAll - very fast!
|
---|
| 458 | if (outputEscaping)
|
---|
| 459 | {
|
---|
[3768] | 460 |
|
---|
[24862] | 461 | text = StringUtils.replace(text, "&", "&");
|
---|
| 462 | text = StringUtils.replace(text, "<", "<");
|
---|
| 463 | text = StringUtils.replace(text, ">", ">");
|
---|
| 464 | text = StringUtils.replace(text, "'", "'");
|
---|
| 465 | text = StringUtils.replace(text, "\"", """);
|
---|
| 466 | }
|
---|
[23791] | 467 |
|
---|
[24862] | 468 | // Remove any control-C characters
|
---|
| 469 | text = StringUtils.replace(text, "" + (char) 3, "");
|
---|
[3768] | 470 |
|
---|
[24862] | 471 | xmlRepresentation.append(text);
|
---|
| 472 | }
|
---|
| 473 |
|
---|
| 474 | // Handle Processing Instruction nodes
|
---|
| 475 | else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE)
|
---|
| 476 | {
|
---|
| 477 | if (nodeName.equals("javax.xml.transform.disable-output-escaping"))
|
---|
| 478 | {
|
---|
| 479 | outputEscaping = false;
|
---|
| 480 | }
|
---|
| 481 | else if (nodeName.equals("javax.xml.transform.enable-output-escaping"))
|
---|
| 482 | {
|
---|
| 483 | outputEscaping = true;
|
---|
| 484 | }
|
---|
| 485 | else
|
---|
| 486 | {
|
---|
| 487 | logger.warn("Unhandled processing instruction " + nodeName);
|
---|
| 488 | }
|
---|
| 489 | }
|
---|
| 490 |
|
---|
| 491 | else if (nodeType == Node.COMMENT_NODE)
|
---|
| 492 | {
|
---|
| 493 | String text = xmlNode.getNodeValue();
|
---|
| 494 | xmlRepresentation.append("<!-- ");
|
---|
| 495 | xmlRepresentation.append(text);
|
---|
| 496 | xmlRepresentation.append(" -->");
|
---|
| 497 | }
|
---|
| 498 |
|
---|
| 499 | // A type of node that is not handled yet
|
---|
| 500 | else
|
---|
| 501 | {
|
---|
| 502 | logger.warn("Unknown node type: " + nodeType + " " + getNodeTypeString(nodeType));
|
---|
| 503 | }
|
---|
| 504 |
|
---|
| 505 | return;
|
---|
[3768] | 506 | }
|
---|
| 507 |
|
---|
[24862] | 508 | protected static String getNodeTypeString(short node_type)
|
---|
| 509 | {
|
---|
[3908] | 510 |
|
---|
[24862] | 511 | String type = "";
|
---|
| 512 | switch (node_type)
|
---|
| 513 | {
|
---|
| 514 | case Node.ATTRIBUTE_NODE:
|
---|
| 515 | type = "ATTRIBUTE_NODE";
|
---|
| 516 | break;
|
---|
| 517 | case Node.CDATA_SECTION_NODE:
|
---|
| 518 | type = "CDATA_SECTION_NODE";
|
---|
| 519 | break;
|
---|
| 520 | case Node.COMMENT_NODE:
|
---|
| 521 | type = "COMMENT_NODE";
|
---|
| 522 | break;
|
---|
| 523 | case Node.DOCUMENT_FRAGMENT_NODE:
|
---|
| 524 | type = "DOCUMENT_FRAGMENT_NODE";
|
---|
| 525 | break;
|
---|
| 526 | case Node.DOCUMENT_NODE:
|
---|
| 527 | type = "DOCUMENT_NODE";
|
---|
| 528 | break;
|
---|
| 529 | case Node.DOCUMENT_TYPE_NODE:
|
---|
| 530 | type = "DOCUMENT_TYPE_NODE";
|
---|
| 531 | break;
|
---|
| 532 | case Node.ELEMENT_NODE:
|
---|
| 533 | type = "ELEMENT_NODE";
|
---|
| 534 | break;
|
---|
| 535 | case Node.ENTITY_NODE:
|
---|
| 536 | type = "ENTITY_NODE";
|
---|
| 537 | break;
|
---|
| 538 | case Node.ENTITY_REFERENCE_NODE:
|
---|
| 539 | type = "ENTITY_REFERENCE_NODE";
|
---|
| 540 | break;
|
---|
| 541 | case Node.NOTATION_NODE:
|
---|
| 542 | type = "NOTATION_NODE";
|
---|
| 543 | break;
|
---|
| 544 | case Node.PROCESSING_INSTRUCTION_NODE:
|
---|
| 545 | type = "PROCESSING_INSTRUCTION_NODE";
|
---|
| 546 | break;
|
---|
| 547 | case Node.TEXT_NODE:
|
---|
| 548 | type = "TEXT_NODE";
|
---|
| 549 | break;
|
---|
| 550 | default:
|
---|
| 551 | type = "UNKNOWN";
|
---|
| 552 | }
|
---|
[3908] | 553 |
|
---|
[24862] | 554 | return type;
|
---|
[3908] | 555 | }
|
---|
| 556 |
|
---|
[18434] | 557 | // returns null if there no error occurred during parsing, or else returns the error message
|
---|
[24862] | 558 | public String getParseErrorMessage()
|
---|
| 559 | {
|
---|
| 560 | ParseErrorHandler errorHandler = (ParseErrorHandler) this.parser.getErrorHandler();
|
---|
[18434] | 561 | return errorHandler.getErrorMessage();
|
---|
| 562 | }
|
---|
[24862] | 563 |
|
---|
[18434] | 564 | // Errorhandler for SAXParseExceptions that are errors, fatal errors or warnings. This class can be used to
|
---|
| 565 | // register a handler for any fatal errors, errors and warnings that may occur when parsing an xml file. The
|
---|
| 566 | // errors are printed both to the greenstone.log and to the tomcat console (System.err), and the error message
|
---|
| 567 | // is stored in the errorMessage variable so that it can be retrieved and be used to generate an xhtml error page.
|
---|
[24862] | 568 | static public class ParseErrorHandler implements ErrorHandler
|
---|
| 569 | {
|
---|
[18434] | 570 | protected String errorMessage = null;
|
---|
[24862] | 571 |
|
---|
[18434] | 572 | // Receive notification of a recoverable error.
|
---|
[24862] | 573 | public void error(SAXParseException exception)
|
---|
| 574 | {
|
---|
[18434] | 575 | handleError("Error:\n", exception);
|
---|
| 576 | }
|
---|
[24862] | 577 |
|
---|
| 578 | // Receive notification of a non-recoverable error.
|
---|
| 579 | public void fatalError(SAXParseException exception)
|
---|
| 580 | {
|
---|
[18434] | 581 | handleError("Fatal Error:\n", exception);
|
---|
| 582 | }
|
---|
[24862] | 583 |
|
---|
[18434] | 584 | // Receive notification of a warning.
|
---|
[24862] | 585 | public void warning(SAXParseException exception)
|
---|
| 586 | {
|
---|
[18434] | 587 | handleError("Warning:\n", exception);
|
---|
[24862] | 588 | }
|
---|
| 589 |
|
---|
| 590 | public String toString(SAXParseException e)
|
---|
| 591 | {
|
---|
[18434] | 592 | String msg = e.getMessage();
|
---|
| 593 | msg += "\nOn line(column): " + e.getLineNumber() + "(" + e.getColumnNumber() + ")";
|
---|
| 594 | msg += (e.getPublicId() != null) ? ("\npublic ID: " + e.getPublicId()) : "\nNo public ID";
|
---|
| 595 | msg += (e.getSystemId() != null) ? ("\nsystem ID: " + e.getSystemId()) : "\nNo system ID";
|
---|
[24862] | 596 |
|
---|
[18434] | 597 | return msg;
|
---|
| 598 | }
|
---|
[24862] | 599 |
|
---|
[18434] | 600 | // clears the errorPage variable after first call to this method
|
---|
[24862] | 601 | public String getErrorMessage()
|
---|
| 602 | {
|
---|
[18434] | 603 | String errMsg = this.errorMessage;
|
---|
[24862] | 604 | if (this.errorMessage != null)
|
---|
| 605 | {
|
---|
[18434] | 606 | this.errorMessage = null;
|
---|
| 607 | }
|
---|
| 608 | return errMsg;
|
---|
| 609 | }
|
---|
[24862] | 610 |
|
---|
[18434] | 611 | // sets the errorMessage member variable to the data stored in the exception
|
---|
| 612 | // and writes the errorMessage to the logger and tomcat's System.err
|
---|
[24862] | 613 | protected void handleError(String errorType, SAXParseException exception)
|
---|
| 614 | {
|
---|
| 615 | this.errorMessage = errorType + toString(exception);
|
---|
[18434] | 616 | System.err.println("\n****Error parsing xml:\n" + this.errorMessage + "\n****\n");
|
---|
| 617 | logger.error(this.errorMessage);
|
---|
| 618 | }
|
---|
| 619 | }
|
---|
[3222] | 620 | }
|
---|