[3235] | 1 | /*
|
---|
| 2 | * XMLConverter.java
|
---|
| 3 | * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
|
---|
| 4 | *
|
---|
| 5 | * This program is free software; you can redistribute it and/or modify
|
---|
| 6 | * it under the terms of the GNU General Public License as published by
|
---|
| 7 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 8 | * (at your option) any later version.
|
---|
| 9 | *
|
---|
| 10 | * This program is distributed in the hope that it will be useful,
|
---|
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 13 | * GNU General Public License for more details.
|
---|
| 14 | *
|
---|
| 15 | * You should have received a copy of the GNU General Public License
|
---|
| 16 | * along with this program; if not, write to the Free Software
|
---|
| 17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 18 | */
|
---|
[3222] | 19 | package org.greenstone.gsdl3.util;
|
---|
| 20 |
|
---|
| 21 | // XML classes
|
---|
[18434] | 22 | import org.w3c.dom.DOMImplementation;
|
---|
[24862] | 23 | import org.w3c.dom.Document;
|
---|
| 24 | import org.w3c.dom.DocumentType;
|
---|
| 25 | import org.w3c.dom.Element;
|
---|
| 26 | import org.w3c.dom.Node;
|
---|
[3768] | 27 | import org.w3c.dom.NodeList;
|
---|
| 28 | import org.w3c.dom.NamedNodeMap;
|
---|
[3222] | 29 | import org.xml.sax.InputSource;
|
---|
[5187] | 30 | import org.xml.sax.EntityResolver;
|
---|
[18434] | 31 | import org.xml.sax.ErrorHandler;
|
---|
| 32 | import org.xml.sax.SAXParseException;
|
---|
[3222] | 33 | import org.apache.xerces.parsers.DOMParser;
|
---|
[18434] | 34 | import org.apache.xerces.dom.*; // for new Documents
|
---|
[3222] | 35 |
|
---|
| 36 | // other java classes
|
---|
| 37 | import java.io.Reader;
|
---|
[4087] | 38 | import java.io.InputStreamReader;
|
---|
[3222] | 39 | import java.io.StringReader;
|
---|
| 40 | import java.io.File;
|
---|
[4087] | 41 | import java.io.FileInputStream;
|
---|
[3222] | 42 | import java.io.FileReader;
|
---|
[23791] | 43 | import java.util.regex.*;
|
---|
[3222] | 44 |
|
---|
[13124] | 45 | import org.apache.log4j.*;
|
---|
| 46 |
|
---|
[23791] | 47 | // Apache Commons
|
---|
| 48 | import org.apache.commons.lang3.*;
|
---|
| 49 |
|
---|
| 50 | import java.util.*;
|
---|
| 51 | import java.lang.reflect.*;
|
---|
| 52 |
|
---|
[24862] | 53 | /**
|
---|
| 54 | * XMLConverter - utility class for greenstone
|
---|
| 55 | *
|
---|
| 56 | * parses XML Strings into Documents, converts Nodes to Strings different
|
---|
| 57 | * parsers have different behaviour - can experiment in here now we only use
|
---|
| 58 | * xerces
|
---|
| 59 | *
|
---|
[3222] | 60 | */
|
---|
[24862] | 61 | public class XMLConverter
|
---|
| 62 | {
|
---|
[3222] | 63 |
|
---|
[24862] | 64 | static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.XMLConverter.class.getName());
|
---|
[13124] | 65 |
|
---|
[24863] | 66 | protected EntityResolver resolver = null;
|
---|
| 67 |
|
---|
[24862] | 68 | /** xerces parser */
|
---|
| 69 | protected DOMParser parser = null;
|
---|
[3222] | 70 |
|
---|
[24862] | 71 | private static boolean outputEscaping = true;
|
---|
[3768] | 72 |
|
---|
[24862] | 73 | /** the no-args constructor */
|
---|
| 74 | public XMLConverter()
|
---|
| 75 | {
|
---|
| 76 | try
|
---|
| 77 | {
|
---|
| 78 | this.parser = new DOMParser();
|
---|
| 79 | this.parser.setFeature("http://xml.org/sax/features/validation", false);
|
---|
| 80 | // don't try and load external DTD - no need if we are not validating, and may cause connection errors if a proxy is not set up.
|
---|
| 81 | this.parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
---|
| 82 | // a performance test showed that having this on lead to increased
|
---|
| 83 | // memory use for small-medium docs, and not much gain for large
|
---|
| 84 | // docs.
|
---|
| 85 | // http://www.sosnoski.com/opensrc/xmlbench/conclusions.html
|
---|
| 86 | this.parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
|
---|
| 87 | // add an errorhandler to the parser which will store useful a error message on encountering fatal errors, errors and warnings when parsing
|
---|
| 88 | // this errormessage can then be converted to xhtml and displayed in a browser.
|
---|
| 89 | this.parser.setErrorHandler(new ParseErrorHandler());
|
---|
| 90 | }
|
---|
| 91 | catch (Exception e)
|
---|
| 92 | {
|
---|
| 93 | logger.error(e.getMessage());
|
---|
| 94 | }
|
---|
[3222] | 95 | }
|
---|
| 96 |
|
---|
[24862] | 97 | /** sets the entity resolver. pass in null to unset it */
|
---|
| 98 | public void setEntityResolver(EntityResolver er)
|
---|
| 99 | {
|
---|
[24863] | 100 | this.resolver = er;
|
---|
[24862] | 101 | this.parser.setEntityResolver(er);
|
---|
| 102 | }
|
---|
[16688] | 103 |
|
---|
[24862] | 104 | /**
|
---|
| 105 | * Given a Node representing an Element or Document, will return the
|
---|
| 106 | * Element/docroot Element. Returns null if the Node was not an element.
|
---|
| 107 | */
|
---|
| 108 | public static Element nodeToElement(Node node)
|
---|
| 109 | {
|
---|
| 110 | if (node == null)
|
---|
| 111 | {
|
---|
| 112 | return null;
|
---|
| 113 | }
|
---|
| 114 | short nodeType = node.getNodeType();
|
---|
| 115 |
|
---|
| 116 | if (nodeType == Node.DOCUMENT_NODE)
|
---|
| 117 | {
|
---|
| 118 | Document docNode = (Document) node;
|
---|
| 119 | return docNode.getDocumentElement();
|
---|
| 120 | }
|
---|
| 121 | else if (nodeType == Node.ELEMENT_NODE)
|
---|
| 122 | {
|
---|
| 123 | return (Element) node;
|
---|
| 124 | }
|
---|
| 125 | else
|
---|
| 126 | {
|
---|
| 127 | String message = "Expecting Document or Element node type but got " + node.getNodeName() + "\nReturning null";
|
---|
| 128 | System.err.println(message);
|
---|
| 129 | logger.warn(message);
|
---|
| 130 | return null;
|
---|
| 131 | }
|
---|
[16999] | 132 | }
|
---|
[16688] | 133 |
|
---|
[24862] | 134 | /** returns a DOM Document */
|
---|
| 135 | public Document getDOM(String in)
|
---|
| 136 | {
|
---|
| 137 |
|
---|
| 138 | try
|
---|
| 139 | {
|
---|
| 140 | Reader reader = new StringReader(in);
|
---|
| 141 | InputSource xml_source = new InputSource(reader);
|
---|
| 142 |
|
---|
[24863] | 143 | DOMParser parser = new DOMParser();
|
---|
| 144 | parser.setFeature("http://xml.org/sax/features/validation", false);
|
---|
| 145 | parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
---|
| 146 | parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
|
---|
| 147 | if(resolver != null)
|
---|
| 148 | {
|
---|
| 149 | parser.setEntityResolver(this.resolver);
|
---|
| 150 | }
|
---|
| 151 | parser.setErrorHandler(new ParseErrorHandler());
|
---|
| 152 | parser.parse(xml_source);
|
---|
| 153 |
|
---|
| 154 | Document doc = parser.getDocument();
|
---|
[24862] | 155 |
|
---|
| 156 | return doc;
|
---|
| 157 |
|
---|
| 158 | }
|
---|
| 159 | catch (Exception e)
|
---|
| 160 | {
|
---|
| 161 | logger.error(e.getMessage());
|
---|
| 162 | }
|
---|
| 163 | return null;
|
---|
[16688] | 164 | }
|
---|
[24862] | 165 |
|
---|
| 166 | /** returns a DOM Document */
|
---|
| 167 | public Document getDOM(File in)
|
---|
| 168 | {
|
---|
| 169 | try
|
---|
| 170 | {
|
---|
| 171 | FileReader reader = new FileReader(in);
|
---|
| 172 | InputSource xml_source = new InputSource(reader);
|
---|
[24863] | 173 |
|
---|
| 174 | DOMParser parser = new DOMParser();
|
---|
| 175 | parser.setFeature("http://xml.org/sax/features/validation", false);
|
---|
| 176 | parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
---|
| 177 | parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
|
---|
| 178 | if(resolver != null)
|
---|
| 179 | {
|
---|
| 180 | parser.setEntityResolver(this.resolver);
|
---|
| 181 | }
|
---|
| 182 | parser.setErrorHandler(new ParseErrorHandler());
|
---|
| 183 | parser.parse(xml_source);
|
---|
| 184 |
|
---|
| 185 | Document doc = parser.getDocument();
|
---|
[24862] | 186 | return doc;
|
---|
| 187 |
|
---|
| 188 | }
|
---|
| 189 | catch (Exception e)
|
---|
| 190 | {
|
---|
| 191 | logger.error(e.getMessage(), e);
|
---|
| 192 |
|
---|
| 193 | }
|
---|
| 194 | return null;
|
---|
[16688] | 195 | }
|
---|
| 196 |
|
---|
[24862] | 197 | /** returns a DOM document */
|
---|
| 198 | public Document getDOM(File in, String encoding)
|
---|
| 199 | {
|
---|
| 200 | try
|
---|
| 201 | {
|
---|
| 202 |
|
---|
| 203 | InputStreamReader isr = new InputStreamReader(new FileInputStream(in), encoding);
|
---|
| 204 | InputSource xml_source = new InputSource(isr);
|
---|
| 205 |
|
---|
[24863] | 206 | DOMParser parser = new DOMParser();
|
---|
| 207 | parser.setFeature("http://xml.org/sax/features/validation", false);
|
---|
| 208 | parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
---|
| 209 | parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
|
---|
| 210 | if(resolver != null)
|
---|
| 211 | {
|
---|
| 212 | parser.setEntityResolver(this.resolver);
|
---|
| 213 | }
|
---|
| 214 | parser.setErrorHandler(new ParseErrorHandler());
|
---|
| 215 | parser.parse(xml_source);
|
---|
| 216 |
|
---|
| 217 | Document doc = parser.getDocument();
|
---|
[24862] | 218 |
|
---|
| 219 | return doc;
|
---|
| 220 |
|
---|
| 221 | }
|
---|
| 222 | catch (Exception e)
|
---|
| 223 | {
|
---|
| 224 | logger.error(e.getMessage());
|
---|
| 225 | }
|
---|
| 226 | return null;
|
---|
[3222] | 227 | }
|
---|
[4087] | 228 |
|
---|
[24862] | 229 | /** creates a new empty DOM Document */
|
---|
| 230 | public static Document newDOM()
|
---|
| 231 | {
|
---|
| 232 | Document doc = new DocumentImpl();
|
---|
| 233 | return doc;
|
---|
[4087] | 234 | }
|
---|
| 235 |
|
---|
[24862] | 236 | /**
|
---|
| 237 | * This method's parameters represent the parts of the Doctype of this
|
---|
| 238 | * Document that is to be created. For more info see
|
---|
| 239 | * http://xerces.apache.org
|
---|
| 240 | * /xerces-j/apiDocs/org/apache/xerces/dom/DocumentTypeImpl
|
---|
| 241 | * .html#DocumentTypeImpl
|
---|
| 242 | * (org.apache.xerces.dom.CoreDocumentImpl,%20java.lang.String)
|
---|
| 243 | *
|
---|
| 244 | * */
|
---|
| 245 | public static Document newDOM(String qualifiedName, String publicID, String systemID)
|
---|
| 246 | {
|
---|
| 247 | // create empty DOM document
|
---|
| 248 | DocumentImpl docImpl = new DocumentImpl();
|
---|
[3222] | 249 |
|
---|
[24862] | 250 | // Need to use the document to create the docType for it
|
---|
| 251 | DocumentType myDocType = new DocumentTypeImpl(docImpl, qualifiedName, publicID, systemID);
|
---|
[3222] | 252 |
|
---|
[18434] | 253 | // Although we have created the docType using the document, we need to still
|
---|
| 254 | // put it into the empty document we just created
|
---|
[24862] | 255 | try
|
---|
| 256 | {
|
---|
[18434] | 257 | docImpl.appendChild(myDocType);
|
---|
| 258 | }
|
---|
[24862] | 259 | catch (Exception e)
|
---|
| 260 | {
|
---|
| 261 | System.out.println("Could not append docType because: " + e);
|
---|
| 262 | }
|
---|
| 263 |
|
---|
[18434] | 264 | // return the document containing a DocType
|
---|
[24862] | 265 | return docImpl;
|
---|
| 266 | }
|
---|
[3222] | 267 |
|
---|
[24862] | 268 | /** returns the Node as a String */
|
---|
| 269 | public static String getString(Node xmlNode)
|
---|
| 270 | {
|
---|
| 271 | outputEscaping = true;
|
---|
| 272 | StringBuffer xmlRepresentation = new StringBuffer();
|
---|
| 273 | getString(xmlNode, xmlRepresentation, 0, false);
|
---|
| 274 | return xmlRepresentation.toString();
|
---|
| 275 | }
|
---|
[3768] | 276 |
|
---|
[24862] | 277 | /**
|
---|
| 278 | * returns the node as a nicely formatted String - this introduces extra
|
---|
| 279 | * text nodes if the String is read back in as a DOM, so should only be used
|
---|
| 280 | * for printing
|
---|
| 281 | */
|
---|
| 282 | public static String getPrettyString(Node xmlNode)
|
---|
| 283 | {
|
---|
[3768] | 284 |
|
---|
[24862] | 285 | outputEscaping = true;
|
---|
| 286 | StringBuffer xmlRepresentation = new StringBuffer();
|
---|
| 287 | getString(xmlNode, xmlRepresentation, 0, true);
|
---|
| 288 | return xmlRepresentation.toString();
|
---|
| 289 | }
|
---|
[23791] | 290 |
|
---|
[24862] | 291 | /*
|
---|
| 292 | * For the purposes of logger.debug statements, where this is called and
|
---|
| 293 | * hence outputted, returns an empty string if debugging is not enabled
|
---|
| 294 | */
|
---|
| 295 | public static String getPrettyStringLogger(Node xmlNode, Logger log)
|
---|
| 296 | {
|
---|
[23791] | 297 |
|
---|
[24862] | 298 | if (log.isDebugEnabled())
|
---|
| 299 | return getPrettyString(xmlNode);
|
---|
[23791] | 300 |
|
---|
[24862] | 301 | return "";
|
---|
| 302 |
|
---|
[10202] | 303 | }
|
---|
[3768] | 304 |
|
---|
[24862] | 305 | private static void getString(Node xmlNode, StringBuffer xmlRepresentation, int depth, boolean pretty)
|
---|
| 306 | {
|
---|
[3768] | 307 |
|
---|
[24862] | 308 | if (xmlNode == null)
|
---|
| 309 | {
|
---|
| 310 | xmlRepresentation.append("<null>");
|
---|
| 311 | return;
|
---|
[3970] | 312 | }
|
---|
[3768] | 313 |
|
---|
[24862] | 314 | short nodeType = xmlNode.getNodeType();
|
---|
| 315 | String nodeName = xmlNode.getNodeName();
|
---|
[3768] | 316 |
|
---|
[24862] | 317 | if (nodeType == Node.DOCUMENT_NODE)
|
---|
| 318 | {
|
---|
| 319 | Document xmlDocNode = (Document) xmlNode;
|
---|
| 320 |
|
---|
| 321 | //if (xmlDocNode.getDoctype() == null) {
|
---|
| 322 | //System.err.println("Doctype is null.");
|
---|
| 323 | //}
|
---|
| 324 | //else {
|
---|
| 325 | if (xmlDocNode.getDoctype() != null)
|
---|
| 326 | {
|
---|
| 327 | DocumentType dt = xmlDocNode.getDoctype();
|
---|
| 328 |
|
---|
| 329 | String name = dt.getName();
|
---|
| 330 | String pid = dt.getPublicId();
|
---|
| 331 | String sid = dt.getSystemId();
|
---|
| 332 |
|
---|
| 333 | // Use previously assigned name, not dt.getName() again
|
---|
| 334 | String doctype_str = "<!DOCTYPE " + name + " PUBLIC \"" + pid + "\" \"" + sid + "\">\n";
|
---|
| 335 |
|
---|
| 336 | xmlRepresentation.append(doctype_str);
|
---|
| 337 | }
|
---|
| 338 | getString(xmlDocNode.getDocumentElement(), xmlRepresentation, depth, pretty);
|
---|
| 339 | return;
|
---|
[4285] | 340 | }
|
---|
[24862] | 341 | // Handle Element nodes
|
---|
| 342 | if (nodeType == Node.ELEMENT_NODE)
|
---|
| 343 | {
|
---|
| 344 | if (pretty)
|
---|
| 345 | {
|
---|
| 346 | xmlRepresentation.append("\n");
|
---|
| 347 | for (int i = 0; i < depth; i++)
|
---|
| 348 | {
|
---|
| 349 | xmlRepresentation.append(" ");
|
---|
| 350 | }
|
---|
| 351 | }
|
---|
| 352 |
|
---|
| 353 | // Write opening tag
|
---|
| 354 | xmlRepresentation.append("<");
|
---|
| 355 | xmlRepresentation.append(nodeName);
|
---|
| 356 |
|
---|
| 357 | // Write the node attributes
|
---|
| 358 | NamedNodeMap nodeAttributes = xmlNode.getAttributes();
|
---|
| 359 | for (int i = 0; i < nodeAttributes.getLength(); i++)
|
---|
| 360 | {
|
---|
| 361 | Node attribute = nodeAttributes.item(i);
|
---|
| 362 | xmlRepresentation.append(" ");
|
---|
| 363 | xmlRepresentation.append(attribute.getNodeName());
|
---|
| 364 | xmlRepresentation.append("=\"");
|
---|
| 365 | xmlRepresentation.append(attribute.getNodeValue());
|
---|
| 366 | xmlRepresentation.append("\"");
|
---|
| 367 | }
|
---|
| 368 |
|
---|
| 369 | // If the node has no children, close the opening tag and return
|
---|
| 370 | if (xmlNode.hasChildNodes() == false)
|
---|
| 371 | {
|
---|
| 372 | // This produces somewhat ugly output, but it is necessary to compensate
|
---|
| 373 | // for display bugs in Netscape. Firstly, the space is needed before the
|
---|
| 374 | // closing bracket otherwise Netscape will ignore some tags (<br/>, for
|
---|
| 375 | // example). Also, a newline character would be expected after the tag,
|
---|
| 376 | // but this causes problems with the display of links (the link text
|
---|
| 377 | // will contain a newline character, which is displayed badly).
|
---|
| 378 | xmlRepresentation.append(" />");
|
---|
| 379 | return;
|
---|
| 380 | }
|
---|
| 381 |
|
---|
| 382 | // Close the opening tag
|
---|
| 383 | xmlRepresentation.append(">");
|
---|
| 384 |
|
---|
| 385 | // Apply recursively to the children of this node
|
---|
| 386 | // hack for nodes next to text nodes - dont make them pretty
|
---|
| 387 | // this is needed for text inside a <pre> element - any new lines
|
---|
| 388 | // or spaces around the span elements show up in the text
|
---|
| 389 | NodeList children = xmlNode.getChildNodes();
|
---|
| 390 | boolean do_pretty = pretty;
|
---|
| 391 | for (int i = 0; i < children.getLength(); i++)
|
---|
| 392 | {
|
---|
| 393 | if (children.item(i).getNodeType() == Node.TEXT_NODE)
|
---|
| 394 | {
|
---|
| 395 | do_pretty = false; // if there is a text node amongst the children, do teh following nodes in non-pretty mode - hope this doesn't stuff up something else
|
---|
| 396 | }
|
---|
| 397 | getString(children.item(i), xmlRepresentation, depth + 1, do_pretty);
|
---|
| 398 | }
|
---|
| 399 |
|
---|
| 400 | // Write closing tag
|
---|
| 401 | if (pretty)
|
---|
| 402 | {
|
---|
| 403 | if (xmlRepresentation.charAt(xmlRepresentation.length() - 1) == '\n')
|
---|
| 404 | {
|
---|
| 405 | for (int i = 0; i < depth; i++)
|
---|
| 406 | xmlRepresentation.append(" ");
|
---|
| 407 | }
|
---|
| 408 | }
|
---|
| 409 | xmlRepresentation.append("</");
|
---|
| 410 | xmlRepresentation.append(nodeName);
|
---|
| 411 | xmlRepresentation.append(">");
|
---|
| 412 | if (pretty)
|
---|
| 413 | {
|
---|
| 414 | xmlRepresentation.append("\n");
|
---|
| 415 | }
|
---|
[3970] | 416 | }
|
---|
[3768] | 417 |
|
---|
[24862] | 418 | // Handle Text nodes
|
---|
| 419 | else if (nodeType == Node.TEXT_NODE)
|
---|
| 420 | {
|
---|
| 421 | String text = xmlNode.getNodeValue();
|
---|
[23791] | 422 |
|
---|
[24862] | 423 | // Perform output escaping, if required
|
---|
| 424 | // Apache Commons replace method is far superior to String.replaceAll - very fast!
|
---|
| 425 | if (outputEscaping)
|
---|
| 426 | {
|
---|
[3768] | 427 |
|
---|
[24862] | 428 | text = StringUtils.replace(text, "&", "&");
|
---|
| 429 | text = StringUtils.replace(text, "<", "<");
|
---|
| 430 | text = StringUtils.replace(text, ">", ">");
|
---|
| 431 | text = StringUtils.replace(text, "'", "'");
|
---|
| 432 | text = StringUtils.replace(text, "\"", """);
|
---|
| 433 | }
|
---|
[23791] | 434 |
|
---|
[24862] | 435 | // Remove any control-C characters
|
---|
| 436 | text = StringUtils.replace(text, "" + (char) 3, "");
|
---|
[3768] | 437 |
|
---|
[24862] | 438 | xmlRepresentation.append(text);
|
---|
| 439 | }
|
---|
| 440 |
|
---|
| 441 | // Handle Processing Instruction nodes
|
---|
| 442 | else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE)
|
---|
| 443 | {
|
---|
| 444 | if (nodeName.equals("javax.xml.transform.disable-output-escaping"))
|
---|
| 445 | {
|
---|
| 446 | outputEscaping = false;
|
---|
| 447 | }
|
---|
| 448 | else if (nodeName.equals("javax.xml.transform.enable-output-escaping"))
|
---|
| 449 | {
|
---|
| 450 | outputEscaping = true;
|
---|
| 451 | }
|
---|
| 452 | else
|
---|
| 453 | {
|
---|
| 454 | logger.warn("Unhandled processing instruction " + nodeName);
|
---|
| 455 | }
|
---|
| 456 | }
|
---|
| 457 |
|
---|
| 458 | else if (nodeType == Node.COMMENT_NODE)
|
---|
| 459 | {
|
---|
| 460 | String text = xmlNode.getNodeValue();
|
---|
| 461 | xmlRepresentation.append("<!-- ");
|
---|
| 462 | xmlRepresentation.append(text);
|
---|
| 463 | xmlRepresentation.append(" -->");
|
---|
| 464 | }
|
---|
| 465 |
|
---|
| 466 | // A type of node that is not handled yet
|
---|
| 467 | else
|
---|
| 468 | {
|
---|
| 469 | logger.warn("Unknown node type: " + nodeType + " " + getNodeTypeString(nodeType));
|
---|
| 470 | }
|
---|
| 471 |
|
---|
| 472 | return;
|
---|
[3768] | 473 | }
|
---|
| 474 |
|
---|
[24862] | 475 | protected static String getNodeTypeString(short node_type)
|
---|
| 476 | {
|
---|
[3908] | 477 |
|
---|
[24862] | 478 | String type = "";
|
---|
| 479 | switch (node_type)
|
---|
| 480 | {
|
---|
| 481 | case Node.ATTRIBUTE_NODE:
|
---|
| 482 | type = "ATTRIBUTE_NODE";
|
---|
| 483 | break;
|
---|
| 484 | case Node.CDATA_SECTION_NODE:
|
---|
| 485 | type = "CDATA_SECTION_NODE";
|
---|
| 486 | break;
|
---|
| 487 | case Node.COMMENT_NODE:
|
---|
| 488 | type = "COMMENT_NODE";
|
---|
| 489 | break;
|
---|
| 490 | case Node.DOCUMENT_FRAGMENT_NODE:
|
---|
| 491 | type = "DOCUMENT_FRAGMENT_NODE";
|
---|
| 492 | break;
|
---|
| 493 | case Node.DOCUMENT_NODE:
|
---|
| 494 | type = "DOCUMENT_NODE";
|
---|
| 495 | break;
|
---|
| 496 | case Node.DOCUMENT_TYPE_NODE:
|
---|
| 497 | type = "DOCUMENT_TYPE_NODE";
|
---|
| 498 | break;
|
---|
| 499 | case Node.ELEMENT_NODE:
|
---|
| 500 | type = "ELEMENT_NODE";
|
---|
| 501 | break;
|
---|
| 502 | case Node.ENTITY_NODE:
|
---|
| 503 | type = "ENTITY_NODE";
|
---|
| 504 | break;
|
---|
| 505 | case Node.ENTITY_REFERENCE_NODE:
|
---|
| 506 | type = "ENTITY_REFERENCE_NODE";
|
---|
| 507 | break;
|
---|
| 508 | case Node.NOTATION_NODE:
|
---|
| 509 | type = "NOTATION_NODE";
|
---|
| 510 | break;
|
---|
| 511 | case Node.PROCESSING_INSTRUCTION_NODE:
|
---|
| 512 | type = "PROCESSING_INSTRUCTION_NODE";
|
---|
| 513 | break;
|
---|
| 514 | case Node.TEXT_NODE:
|
---|
| 515 | type = "TEXT_NODE";
|
---|
| 516 | break;
|
---|
| 517 | default:
|
---|
| 518 | type = "UNKNOWN";
|
---|
| 519 | }
|
---|
[3908] | 520 |
|
---|
[24862] | 521 | return type;
|
---|
[3908] | 522 | }
|
---|
| 523 |
|
---|
[18434] | 524 | // returns null if there no error occurred during parsing, or else returns the error message
|
---|
[24862] | 525 | public String getParseErrorMessage()
|
---|
| 526 | {
|
---|
| 527 | ParseErrorHandler errorHandler = (ParseErrorHandler) this.parser.getErrorHandler();
|
---|
[18434] | 528 | return errorHandler.getErrorMessage();
|
---|
| 529 | }
|
---|
[24862] | 530 |
|
---|
[18434] | 531 | // Errorhandler for SAXParseExceptions that are errors, fatal errors or warnings. This class can be used to
|
---|
| 532 | // register a handler for any fatal errors, errors and warnings that may occur when parsing an xml file. The
|
---|
| 533 | // errors are printed both to the greenstone.log and to the tomcat console (System.err), and the error message
|
---|
| 534 | // is stored in the errorMessage variable so that it can be retrieved and be used to generate an xhtml error page.
|
---|
[24862] | 535 | static public class ParseErrorHandler implements ErrorHandler
|
---|
| 536 | {
|
---|
[18434] | 537 | protected String errorMessage = null;
|
---|
[24862] | 538 |
|
---|
[18434] | 539 | // Receive notification of a recoverable error.
|
---|
[24862] | 540 | public void error(SAXParseException exception)
|
---|
| 541 | {
|
---|
[18434] | 542 | handleError("Error:\n", exception);
|
---|
| 543 | }
|
---|
[24862] | 544 |
|
---|
| 545 | // Receive notification of a non-recoverable error.
|
---|
| 546 | public void fatalError(SAXParseException exception)
|
---|
| 547 | {
|
---|
[18434] | 548 | handleError("Fatal Error:\n", exception);
|
---|
| 549 | }
|
---|
[24862] | 550 |
|
---|
[18434] | 551 | // Receive notification of a warning.
|
---|
[24862] | 552 | public void warning(SAXParseException exception)
|
---|
| 553 | {
|
---|
[18434] | 554 | handleError("Warning:\n", exception);
|
---|
[24862] | 555 | }
|
---|
| 556 |
|
---|
| 557 | public String toString(SAXParseException e)
|
---|
| 558 | {
|
---|
[18434] | 559 | String msg = e.getMessage();
|
---|
| 560 | msg += "\nOn line(column): " + e.getLineNumber() + "(" + e.getColumnNumber() + ")";
|
---|
| 561 | msg += (e.getPublicId() != null) ? ("\npublic ID: " + e.getPublicId()) : "\nNo public ID";
|
---|
| 562 | msg += (e.getSystemId() != null) ? ("\nsystem ID: " + e.getSystemId()) : "\nNo system ID";
|
---|
[24862] | 563 |
|
---|
[18434] | 564 | return msg;
|
---|
| 565 | }
|
---|
[24862] | 566 |
|
---|
[18434] | 567 | // clears the errorPage variable after first call to this method
|
---|
[24862] | 568 | public String getErrorMessage()
|
---|
| 569 | {
|
---|
[18434] | 570 | String errMsg = this.errorMessage;
|
---|
[24862] | 571 | if (this.errorMessage != null)
|
---|
| 572 | {
|
---|
[18434] | 573 | this.errorMessage = null;
|
---|
| 574 | }
|
---|
| 575 | return errMsg;
|
---|
| 576 | }
|
---|
[24862] | 577 |
|
---|
[18434] | 578 | // sets the errorMessage member variable to the data stored in the exception
|
---|
| 579 | // and writes the errorMessage to the logger and tomcat's System.err
|
---|
[24862] | 580 | protected void handleError(String errorType, SAXParseException exception)
|
---|
| 581 | {
|
---|
| 582 | this.errorMessage = errorType + toString(exception);
|
---|
[18434] | 583 | System.err.println("\n****Error parsing xml:\n" + this.errorMessage + "\n****\n");
|
---|
| 584 | logger.error(this.errorMessage);
|
---|
| 585 | }
|
---|
| 586 | }
|
---|
[3222] | 587 | }
|
---|