[3235] | 1 | /*
|
---|
| 2 | * XMLConverter.java
|
---|
| 3 | * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
|
---|
| 4 | *
|
---|
| 5 | * This program is free software; you can redistribute it and/or modify
|
---|
| 6 | * it under the terms of the GNU General Public License as published by
|
---|
| 7 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 8 | * (at your option) any later version.
|
---|
| 9 | *
|
---|
| 10 | * This program is distributed in the hope that it will be useful,
|
---|
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 13 | * GNU General Public License for more details.
|
---|
| 14 | *
|
---|
| 15 | * You should have received a copy of the GNU General Public License
|
---|
| 16 | * along with this program; if not, write to the Free Software
|
---|
| 17 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 18 | */
|
---|
[3222] | 19 | package org.greenstone.gsdl3.util;
|
---|
| 20 |
|
---|
| 21 | // XML classes
|
---|
[18434] | 22 | import org.w3c.dom.DOMImplementation;
|
---|
[24862] | 23 | import org.w3c.dom.Document;
|
---|
| 24 | import org.w3c.dom.DocumentType;
|
---|
| 25 | import org.w3c.dom.Element;
|
---|
| 26 | import org.w3c.dom.Node;
|
---|
[3768] | 27 | import org.w3c.dom.NodeList;
|
---|
| 28 | import org.w3c.dom.NamedNodeMap;
|
---|
[3222] | 29 | import org.xml.sax.InputSource;
|
---|
[5187] | 30 | import org.xml.sax.EntityResolver;
|
---|
[18434] | 31 | import org.xml.sax.ErrorHandler;
|
---|
| 32 | import org.xml.sax.SAXParseException;
|
---|
[28962] | 33 | import org.xml.sax.SAXNotRecognizedException;
|
---|
| 34 | import org.xml.sax.SAXNotSupportedException;
|
---|
[3222] | 35 | import org.apache.xerces.parsers.DOMParser;
|
---|
[28962] | 36 | import org.apache.xerces.dom.DocumentImpl; // for new Documents
|
---|
| 37 | import org.apache.xerces.dom.DocumentTypeImpl;
|
---|
[3222] | 38 |
|
---|
| 39 | // other java classes
|
---|
[28849] | 40 | import java.io.BufferedWriter;
|
---|
[25655] | 41 | import java.io.ByteArrayInputStream;
|
---|
[28849] | 42 | import java.io.FileWriter;
|
---|
[25655] | 43 | import java.io.InputStream;
|
---|
[3222] | 44 | import java.io.Reader;
|
---|
[4087] | 45 | import java.io.InputStreamReader;
|
---|
[3222] | 46 | import java.io.StringReader;
|
---|
| 47 | import java.io.File;
|
---|
[4087] | 48 | import java.io.FileInputStream;
|
---|
[3222] | 49 | import java.io.FileReader;
|
---|
[23791] | 50 | import java.util.regex.*;
|
---|
[3222] | 51 |
|
---|
[13124] | 52 | import org.apache.log4j.*;
|
---|
| 53 |
|
---|
[23791] | 54 | // Apache Commons
|
---|
| 55 | import org.apache.commons.lang3.*;
|
---|
| 56 |
|
---|
| 57 | import java.util.*;
|
---|
| 58 | import java.lang.reflect.*;
|
---|
| 59 |
|
---|
[24862] | 60 | /**
|
---|
| 61 | * XMLConverter - utility class for greenstone
|
---|
| 62 | *
|
---|
[28962] | 63 | * generates new Documents
|
---|
| 64 | * parses XML Strings into Documents, converts Nodes to Strings
|
---|
| 65 | * different parsers have different behaviour - can experiment in here
|
---|
| 66 | * at the moment we only use xerces
|
---|
| 67 | * all xerces specific code is in here
|
---|
[3222] | 68 | */
|
---|
[24862] | 69 | public class XMLConverter
|
---|
| 70 | {
|
---|
[3222] | 71 |
|
---|
[24862] | 72 | static Logger logger = Logger.getLogger(org.greenstone.gsdl3.util.XMLConverter.class.getName());
|
---|
[13124] | 73 |
|
---|
[24862] | 74 | /** the no-args constructor */
|
---|
| 75 | public XMLConverter()
|
---|
| 76 | {
|
---|
[3222] | 77 |
|
---|
[24862] | 78 | }
|
---|
[16688] | 79 |
|
---|
[24862] | 80 | /** returns a DOM Document */
|
---|
[28962] | 81 | public static Document getDOM(String in)
|
---|
[24862] | 82 | {
|
---|
| 83 |
|
---|
| 84 | try
|
---|
| 85 | {
|
---|
| 86 | Reader reader = new StringReader(in);
|
---|
| 87 | InputSource xml_source = new InputSource(reader);
|
---|
[28962] | 88 | Document doc = getDOM(xml_source, null);
|
---|
[26168] | 89 | reader.close();
|
---|
[24862] | 90 | return doc;
|
---|
| 91 |
|
---|
| 92 | }
|
---|
| 93 | catch (Exception e)
|
---|
| 94 | {
|
---|
| 95 | logger.error(e.getMessage());
|
---|
[30477] | 96 | logger.error("Input string was:\n" + in);
|
---|
| 97 | e.printStackTrace();
|
---|
[24862] | 98 | }
|
---|
| 99 | return null;
|
---|
[16688] | 100 | }
|
---|
[24862] | 101 |
|
---|
| 102 | /** returns a DOM Document */
|
---|
[28962] | 103 | public static Document getDOM(String in, String encoding)
|
---|
[25655] | 104 | {
|
---|
| 105 | try
|
---|
| 106 | {
|
---|
| 107 | InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream(in.getBytes(encoding)), encoding);
|
---|
| 108 | InputSource xml_source = new InputSource(reader);
|
---|
[28962] | 109 | Document doc = getDOM(xml_source, null);
|
---|
[26168] | 110 | reader.close();
|
---|
[25655] | 111 | return doc;
|
---|
| 112 |
|
---|
| 113 | }
|
---|
| 114 | catch (Exception e)
|
---|
| 115 | {
|
---|
| 116 | logger.error(e.getMessage());
|
---|
[30477] | 117 | logger.error("Input string was:\n" + in);
|
---|
| 118 | e.printStackTrace();
|
---|
[25655] | 119 | }
|
---|
| 120 | return null;
|
---|
| 121 | }
|
---|
[26168] | 122 |
|
---|
[25655] | 123 | /** returns a DOM Document */
|
---|
[28962] | 124 | public static Document getDOM(File in) {
|
---|
[24862] | 125 | try
|
---|
| 126 | {
|
---|
| 127 | FileReader reader = new FileReader(in);
|
---|
| 128 | InputSource xml_source = new InputSource(reader);
|
---|
[28962] | 129 | Document doc = getDOM(xml_source, null);
|
---|
[26168] | 130 | reader.close();
|
---|
[24862] | 131 | return doc;
|
---|
| 132 |
|
---|
| 133 | }
|
---|
| 134 | catch (Exception e)
|
---|
| 135 | {
|
---|
| 136 | logger.error(e.getMessage(), e);
|
---|
[30477] | 137 | logger.error("File was:\n" + in.getPath());
|
---|
| 138 | e.printStackTrace();
|
---|
[24862] | 139 |
|
---|
| 140 | }
|
---|
| 141 | return null;
|
---|
[16688] | 142 | }
|
---|
| 143 |
|
---|
[28962] | 144 | public static Document getDOM(File in, String encoding) {
|
---|
| 145 | return getDOM(in, encoding, null);
|
---|
| 146 | }
|
---|
| 147 |
|
---|
[24862] | 148 | /** returns a DOM document */
|
---|
[28962] | 149 | public static Document getDOM(File in, String encoding, EntityResolver er) {
|
---|
| 150 |
|
---|
| 151 | try {
|
---|
| 152 |
|
---|
| 153 |
|
---|
| 154 | InputStreamReader isr = new InputStreamReader(new FileInputStream(in), encoding);
|
---|
| 155 | InputSource xml_source = new InputSource(isr);
|
---|
| 156 | Document doc = getDOM(xml_source, er);
|
---|
| 157 | isr.close();
|
---|
| 158 | return doc;
|
---|
| 159 |
|
---|
| 160 | }
|
---|
| 161 | catch (Exception e)
|
---|
| 162 | {
|
---|
| 163 | logger.error(e.getMessage());
|
---|
[30477] | 164 | logger.error("File was:\n" + in.getPath());
|
---|
| 165 | e.printStackTrace();
|
---|
[28962] | 166 | }
|
---|
| 167 | return null;
|
---|
| 168 | }
|
---|
[24862] | 169 |
|
---|
[29728] | 170 | public static Document getDOM(File in, EntityResolver er) {
|
---|
| 171 |
|
---|
| 172 | try {
|
---|
| 173 | InputSource xml_source = new InputSource(new FileInputStream(in));
|
---|
| 174 | Document doc = getDOM(xml_source, er);
|
---|
| 175 | return doc;
|
---|
| 176 | }
|
---|
| 177 | catch (Exception e)
|
---|
| 178 | {
|
---|
| 179 | logger.error(e.getMessage());
|
---|
[30477] | 180 | logger.error("File was:\n" + in.getPath());
|
---|
| 181 | e.printStackTrace();
|
---|
[29728] | 182 | }
|
---|
| 183 | return null;
|
---|
| 184 | }
|
---|
| 185 |
|
---|
[28962] | 186 | public static Document getDOM(InputSource source, EntityResolver er) {
|
---|
| 187 |
|
---|
| 188 | try {
|
---|
| 189 | DOMParser parser = new DOMParser();
|
---|
| 190 | parser.setFeature("http://xml.org/sax/features/validation", false);
|
---|
| 191 | // don't try and load external DTD - no need if we are not validating, and may cause connection errors if a proxy is not set up.
|
---|
| 192 | parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
|
---|
| 193 | // a performance test showed that having this on lead to increased
|
---|
| 194 | // memory use for small-medium docs, and not much gain for large
|
---|
| 195 | // docs.
|
---|
| 196 | // http://www.sosnoski.com/opensrc/xmlbench/conclusions.html
|
---|
| 197 | parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", false);
|
---|
| 198 | // add an errorhandler to the parser which will output messages on encountering fatal errors, errors and warnings when parsing
|
---|
| 199 | parser.setErrorHandler(new ParseErrorHandler());
|
---|
| 200 | if (er != null) {
|
---|
| 201 | parser.setEntityResolver(er);
|
---|
| 202 | }
|
---|
| 203 | parser.parse(source);
|
---|
[24862] | 204 |
|
---|
[28962] | 205 | Document doc = parser.getDocument();
|
---|
| 206 | return doc;
|
---|
[26168] | 207 |
|
---|
[28962] | 208 | } catch (Exception e) {
|
---|
| 209 |
|
---|
| 210 | logger.error(e.getMessage());
|
---|
[30477] | 211 | logger.error("InputSource:\n");
|
---|
| 212 | e.printStackTrace();
|
---|
[28962] | 213 | }
|
---|
| 214 | return null;
|
---|
| 215 |
|
---|
| 216 | }
|
---|
[24862] | 217 |
|
---|
| 218 | /** creates a new empty DOM Document */
|
---|
[28962] | 219 | public static Document newDOM()
|
---|
[24862] | 220 | {
|
---|
| 221 | Document doc = new DocumentImpl();
|
---|
| 222 | return doc;
|
---|
[4087] | 223 | }
|
---|
| 224 |
|
---|
[24862] | 225 | /**
|
---|
| 226 | * This method's parameters represent the parts of the Doctype of this
|
---|
| 227 | * Document that is to be created. For more info see
|
---|
| 228 | * http://xerces.apache.org
|
---|
| 229 | * /xerces-j/apiDocs/org/apache/xerces/dom/DocumentTypeImpl
|
---|
| 230 | * .html#DocumentTypeImpl
|
---|
| 231 | * (org.apache.xerces.dom.CoreDocumentImpl,%20java.lang.String)
|
---|
| 232 | *
|
---|
| 233 | * */
|
---|
| 234 | public static Document newDOM(String qualifiedName, String publicID, String systemID)
|
---|
| 235 | {
|
---|
| 236 | // create empty DOM document
|
---|
| 237 | DocumentImpl docImpl = new DocumentImpl();
|
---|
[3222] | 238 |
|
---|
[24862] | 239 | // Need to use the document to create the docType for it
|
---|
| 240 | DocumentType myDocType = new DocumentTypeImpl(docImpl, qualifiedName, publicID, systemID);
|
---|
[3222] | 241 |
|
---|
[18434] | 242 | // Although we have created the docType using the document, we need to still
|
---|
| 243 | // put it into the empty document we just created
|
---|
[24862] | 244 | try
|
---|
| 245 | {
|
---|
[18434] | 246 | docImpl.appendChild(myDocType);
|
---|
| 247 | }
|
---|
[24862] | 248 | catch (Exception e)
|
---|
| 249 | {
|
---|
| 250 | System.out.println("Could not append docType because: " + e);
|
---|
| 251 | }
|
---|
| 252 |
|
---|
[18434] | 253 | // return the document containing a DocType
|
---|
[24862] | 254 | return docImpl;
|
---|
| 255 | }
|
---|
[3222] | 256 |
|
---|
[24862] | 257 | /** returns the Node as a String */
|
---|
| 258 | public static String getString(Node xmlNode)
|
---|
| 259 | {
|
---|
| 260 | StringBuffer xmlRepresentation = new StringBuffer();
|
---|
| 261 | getString(xmlNode, xmlRepresentation, 0, false);
|
---|
| 262 | return xmlRepresentation.toString();
|
---|
| 263 | }
|
---|
[3768] | 264 |
|
---|
[24862] | 265 | /**
|
---|
| 266 | * returns the node as a nicely formatted String - this introduces extra
|
---|
| 267 | * text nodes if the String is read back in as a DOM, so should only be used
|
---|
| 268 | * for printing
|
---|
| 269 | */
|
---|
| 270 | public static String getPrettyString(Node xmlNode)
|
---|
| 271 | {
|
---|
| 272 | StringBuffer xmlRepresentation = new StringBuffer();
|
---|
| 273 | getString(xmlNode, xmlRepresentation, 0, true);
|
---|
| 274 | return xmlRepresentation.toString();
|
---|
| 275 | }
|
---|
[23791] | 276 |
|
---|
[24862] | 277 | /*
|
---|
| 278 | * For the purposes of logger.debug statements, where this is called and
|
---|
| 279 | * hence outputted, returns an empty string if debugging is not enabled
|
---|
| 280 | */
|
---|
| 281 | public static String getPrettyStringLogger(Node xmlNode, Logger log)
|
---|
| 282 | {
|
---|
[23791] | 283 |
|
---|
[24862] | 284 | if (log.isDebugEnabled())
|
---|
| 285 | return getPrettyString(xmlNode);
|
---|
[23791] | 286 |
|
---|
[24862] | 287 | return "";
|
---|
| 288 |
|
---|
[10202] | 289 | }
|
---|
[3768] | 290 |
|
---|
[24862] | 291 | private static void getString(Node xmlNode, StringBuffer xmlRepresentation, int depth, boolean pretty)
|
---|
| 292 | {
|
---|
[3768] | 293 |
|
---|
[24862] | 294 | if (xmlNode == null)
|
---|
| 295 | {
|
---|
| 296 | xmlRepresentation.append("<null>");
|
---|
| 297 | return;
|
---|
[3970] | 298 | }
|
---|
[3768] | 299 |
|
---|
[24862] | 300 | short nodeType = xmlNode.getNodeType();
|
---|
| 301 | String nodeName = xmlNode.getNodeName();
|
---|
[3768] | 302 |
|
---|
[24862] | 303 | if (nodeType == Node.DOCUMENT_NODE)
|
---|
| 304 | {
|
---|
| 305 | Document xmlDocNode = (Document) xmlNode;
|
---|
| 306 |
|
---|
| 307 | //if (xmlDocNode.getDoctype() == null) {
|
---|
| 308 | //System.err.println("Doctype is null.");
|
---|
| 309 | //}
|
---|
| 310 | //else {
|
---|
| 311 | if (xmlDocNode.getDoctype() != null)
|
---|
| 312 | {
|
---|
| 313 | DocumentType dt = xmlDocNode.getDoctype();
|
---|
| 314 |
|
---|
| 315 | String name = dt.getName();
|
---|
| 316 | String pid = dt.getPublicId();
|
---|
| 317 | String sid = dt.getSystemId();
|
---|
| 318 |
|
---|
| 319 | // Use previously assigned name, not dt.getName() again
|
---|
| 320 | String doctype_str = "<!DOCTYPE " + name + " PUBLIC \"" + pid + "\" \"" + sid + "\">\n";
|
---|
| 321 |
|
---|
| 322 | xmlRepresentation.append(doctype_str);
|
---|
| 323 | }
|
---|
| 324 | getString(xmlDocNode.getDocumentElement(), xmlRepresentation, depth, pretty);
|
---|
| 325 | return;
|
---|
[4285] | 326 | }
|
---|
[24862] | 327 | // Handle Element nodes
|
---|
| 328 | if (nodeType == Node.ELEMENT_NODE)
|
---|
| 329 | {
|
---|
| 330 | if (pretty)
|
---|
| 331 | {
|
---|
| 332 | xmlRepresentation.append("\n");
|
---|
| 333 | for (int i = 0; i < depth; i++)
|
---|
| 334 | {
|
---|
| 335 | xmlRepresentation.append(" ");
|
---|
| 336 | }
|
---|
| 337 | }
|
---|
| 338 |
|
---|
| 339 | // Write opening tag
|
---|
| 340 | xmlRepresentation.append("<");
|
---|
| 341 | xmlRepresentation.append(nodeName);
|
---|
| 342 |
|
---|
| 343 | // Write the node attributes
|
---|
| 344 | NamedNodeMap nodeAttributes = xmlNode.getAttributes();
|
---|
| 345 | for (int i = 0; i < nodeAttributes.getLength(); i++)
|
---|
| 346 | {
|
---|
| 347 | Node attribute = nodeAttributes.item(i);
|
---|
| 348 | xmlRepresentation.append(" ");
|
---|
| 349 | xmlRepresentation.append(attribute.getNodeName());
|
---|
| 350 | xmlRepresentation.append("=\"");
|
---|
[30477] | 351 | String attr_val = attribute.getNodeValue();
|
---|
| 352 |
|
---|
| 353 | attr_val = attr_val.replaceAll("&","&");
|
---|
| 354 | attr_val = attr_val.replaceAll("<","<");
|
---|
| 355 | attr_val = attr_val.replaceAll(">",">");
|
---|
| 356 | attr_val = attr_val.replaceAll("\"",""");
|
---|
| 357 | // assume that any of the above chars that was already entity escaped
|
---|
| 358 | // was already correct => return back to how they were
|
---|
| 359 | attr_val = attr_val.replaceAll("&amp;","&");
|
---|
| 360 | attr_val = attr_val.replaceAll("&lt;","<");
|
---|
| 361 | attr_val = attr_val.replaceAll("&gt;",">");
|
---|
| 362 | attr_val = attr_val.replaceAll("&quot;",""");
|
---|
| 363 |
|
---|
| 364 |
|
---|
| 365 | xmlRepresentation.append(attr_val);
|
---|
[24862] | 366 | xmlRepresentation.append("\"");
|
---|
| 367 | }
|
---|
| 368 |
|
---|
| 369 | // If the node has no children, close the opening tag and return
|
---|
| 370 | if (xmlNode.hasChildNodes() == false)
|
---|
| 371 | {
|
---|
| 372 | // This produces somewhat ugly output, but it is necessary to compensate
|
---|
| 373 | // for display bugs in Netscape. Firstly, the space is needed before the
|
---|
| 374 | // closing bracket otherwise Netscape will ignore some tags (<br/>, for
|
---|
| 375 | // example). Also, a newline character would be expected after the tag,
|
---|
| 376 | // but this causes problems with the display of links (the link text
|
---|
| 377 | // will contain a newline character, which is displayed badly).
|
---|
| 378 | xmlRepresentation.append(" />");
|
---|
| 379 | return;
|
---|
| 380 | }
|
---|
| 381 |
|
---|
| 382 | // Close the opening tag
|
---|
| 383 | xmlRepresentation.append(">");
|
---|
| 384 |
|
---|
[28978] | 385 | // Process the children. We process text nodes here, but recursively process other nodes.
|
---|
| 386 | // hack for nodes next to text nodes - dont make them pretty, ie don'e do any new lines or indenting
|
---|
| 387 | // Usually text nodes will be inside their own element. Sometimes we have eg span tags next to text nodes - don't want those indented.
|
---|
| 388 | // also if these are inside a pre tag then the space shows up in the page.
|
---|
| 389 |
|
---|
[24862] | 390 | NodeList children = xmlNode.getChildNodes();
|
---|
| 391 | boolean do_pretty = pretty;
|
---|
[28978] | 392 | boolean output_escaping = true; // record if we have encountered a disable-output-escaping instruction
|
---|
[24862] | 393 | for (int i = 0; i < children.getLength(); i++)
|
---|
| 394 | {
|
---|
[28978] | 395 | Node child = children.item(i);
|
---|
| 396 | short child_type = child.getNodeType();
|
---|
| 397 | if (child_type == Node.PROCESSING_INSTRUCTION_NODE) {
|
---|
| 398 | if (child.getNodeName().equals("javax.xml.transform.disable-output-escaping")) {
|
---|
| 399 | output_escaping = false;
|
---|
| 400 | }
|
---|
| 401 | else if (child.getNodeName().equals("javax.xml.transform.enable-output-escaping")) {
|
---|
| 402 | output_escaping = true;
|
---|
| 403 | }
|
---|
| 404 | else {
|
---|
| 405 | logger.warn("Unhandled processing instruction " + child.getNodeName());
|
---|
| 406 | }
|
---|
| 407 | }
|
---|
| 408 | else if (child_type == Node.TEXT_NODE) {
|
---|
| 409 | do_pretty = false; // if there is a text node amongst the children, do all the following nodes in non-pretty mode - hope this doesn't stuff up something else
|
---|
| 410 | // output the text
|
---|
| 411 | String text = child.getNodeValue();
|
---|
[24862] | 412 |
|
---|
[28978] | 413 | // Perform output escaping, if required
|
---|
| 414 | // Apache Commons replace method is far superior to String.replaceAll - very fast!
|
---|
| 415 | if (output_escaping) {
|
---|
| 416 | text = StringUtils.replace(text, "&", "&");
|
---|
| 417 | text = StringUtils.replace(text, "<", "<");
|
---|
| 418 | text = StringUtils.replace(text, ">", ">");
|
---|
| 419 | text = StringUtils.replace(text, "'", "'");
|
---|
| 420 | text = StringUtils.replace(text, "\"", """);
|
---|
| 421 | }
|
---|
| 422 | // Remove any control-C characters
|
---|
| 423 | text = StringUtils.replace(text, "" + (char) 3, "");
|
---|
| 424 |
|
---|
| 425 | xmlRepresentation.append(text);
|
---|
| 426 |
|
---|
| 427 | }
|
---|
| 428 | else {
|
---|
| 429 | // recursively call getString
|
---|
| 430 | getString(child, xmlRepresentation, depth + 1, do_pretty);
|
---|
| 431 | }
|
---|
| 432 | } // foreach child of the element
|
---|
| 433 |
|
---|
[24862] | 434 | // Write closing tag
|
---|
| 435 | if (pretty)
|
---|
| 436 | {
|
---|
| 437 | if (xmlRepresentation.charAt(xmlRepresentation.length() - 1) == '\n')
|
---|
| 438 | {
|
---|
| 439 | for (int i = 0; i < depth; i++)
|
---|
| 440 | xmlRepresentation.append(" ");
|
---|
| 441 | }
|
---|
| 442 | }
|
---|
| 443 | xmlRepresentation.append("</");
|
---|
| 444 | xmlRepresentation.append(nodeName);
|
---|
| 445 | xmlRepresentation.append(">");
|
---|
| 446 | if (pretty)
|
---|
| 447 | {
|
---|
| 448 | xmlRepresentation.append("\n");
|
---|
| 449 | }
|
---|
[28978] | 450 | } // ELEMENT_NODE
|
---|
[3768] | 451 |
|
---|
[24862] | 452 | else if (nodeType == Node.COMMENT_NODE)
|
---|
| 453 | {
|
---|
| 454 | String text = xmlNode.getNodeValue();
|
---|
| 455 | xmlRepresentation.append("<!-- ");
|
---|
| 456 | xmlRepresentation.append(text);
|
---|
| 457 | xmlRepresentation.append(" -->");
|
---|
| 458 | }
|
---|
| 459 |
|
---|
[28978] | 460 | // TEXT and PROCESSING_INSTRUCTION nodes are handled inside their containing element node
|
---|
[24862] | 461 | // A type of node that is not handled yet
|
---|
| 462 | else
|
---|
| 463 | {
|
---|
| 464 | logger.warn("Unknown node type: " + nodeType + " " + getNodeTypeString(nodeType));
|
---|
| 465 | }
|
---|
| 466 |
|
---|
| 467 | return;
|
---|
[3768] | 468 | }
|
---|
| 469 |
|
---|
[24862] | 470 | protected static String getNodeTypeString(short node_type)
|
---|
| 471 | {
|
---|
[3908] | 472 |
|
---|
[24862] | 473 | String type = "";
|
---|
| 474 | switch (node_type)
|
---|
| 475 | {
|
---|
| 476 | case Node.ATTRIBUTE_NODE:
|
---|
| 477 | type = "ATTRIBUTE_NODE";
|
---|
| 478 | break;
|
---|
| 479 | case Node.CDATA_SECTION_NODE:
|
---|
| 480 | type = "CDATA_SECTION_NODE";
|
---|
| 481 | break;
|
---|
| 482 | case Node.COMMENT_NODE:
|
---|
| 483 | type = "COMMENT_NODE";
|
---|
| 484 | break;
|
---|
| 485 | case Node.DOCUMENT_FRAGMENT_NODE:
|
---|
| 486 | type = "DOCUMENT_FRAGMENT_NODE";
|
---|
| 487 | break;
|
---|
| 488 | case Node.DOCUMENT_NODE:
|
---|
| 489 | type = "DOCUMENT_NODE";
|
---|
| 490 | break;
|
---|
| 491 | case Node.DOCUMENT_TYPE_NODE:
|
---|
| 492 | type = "DOCUMENT_TYPE_NODE";
|
---|
| 493 | break;
|
---|
| 494 | case Node.ELEMENT_NODE:
|
---|
| 495 | type = "ELEMENT_NODE";
|
---|
| 496 | break;
|
---|
| 497 | case Node.ENTITY_NODE:
|
---|
| 498 | type = "ENTITY_NODE";
|
---|
| 499 | break;
|
---|
| 500 | case Node.ENTITY_REFERENCE_NODE:
|
---|
| 501 | type = "ENTITY_REFERENCE_NODE";
|
---|
| 502 | break;
|
---|
| 503 | case Node.NOTATION_NODE:
|
---|
| 504 | type = "NOTATION_NODE";
|
---|
| 505 | break;
|
---|
| 506 | case Node.PROCESSING_INSTRUCTION_NODE:
|
---|
| 507 | type = "PROCESSING_INSTRUCTION_NODE";
|
---|
| 508 | break;
|
---|
| 509 | case Node.TEXT_NODE:
|
---|
| 510 | type = "TEXT_NODE";
|
---|
| 511 | break;
|
---|
| 512 | default:
|
---|
| 513 | type = "UNKNOWN";
|
---|
| 514 | }
|
---|
[3908] | 515 |
|
---|
[24862] | 516 | return type;
|
---|
[3908] | 517 | }
|
---|
| 518 |
|
---|
[18434] | 519 | // returns null if there no error occurred during parsing, or else returns the error message
|
---|
[24862] | 520 |
|
---|
[28962] | 521 | // public String getParseErrorMessage()
|
---|
| 522 | // {
|
---|
| 523 | // ParseErrorHandler errorHandler = (ParseErrorHandler) this.parser.getErrorHandler();
|
---|
| 524 | // return errorHandler.getErrorMessage();
|
---|
| 525 | // }
|
---|
| 526 |
|
---|
[18434] | 527 | // Errorhandler for SAXParseExceptions that are errors, fatal errors or warnings. This class can be used to
|
---|
| 528 | // register a handler for any fatal errors, errors and warnings that may occur when parsing an xml file. The
|
---|
| 529 | // errors are printed both to the greenstone.log and to the tomcat console (System.err), and the error message
|
---|
| 530 | // is stored in the errorMessage variable so that it can be retrieved and be used to generate an xhtml error page.
|
---|
[24862] | 531 | static public class ParseErrorHandler implements ErrorHandler
|
---|
| 532 | {
|
---|
[18434] | 533 | protected String errorMessage = null;
|
---|
[24862] | 534 |
|
---|
[18434] | 535 | // Receive notification of a recoverable error.
|
---|
[24862] | 536 | public void error(SAXParseException exception)
|
---|
| 537 | {
|
---|
[18434] | 538 | handleError("Error:\n", exception);
|
---|
| 539 | }
|
---|
[24862] | 540 |
|
---|
| 541 | // Receive notification of a non-recoverable error.
|
---|
| 542 | public void fatalError(SAXParseException exception)
|
---|
| 543 | {
|
---|
[18434] | 544 | handleError("Fatal Error:\n", exception);
|
---|
| 545 | }
|
---|
[24862] | 546 |
|
---|
[18434] | 547 | // Receive notification of a warning.
|
---|
[24862] | 548 | public void warning(SAXParseException exception)
|
---|
| 549 | {
|
---|
[18434] | 550 | handleError("Warning:\n", exception);
|
---|
[24862] | 551 | }
|
---|
| 552 |
|
---|
| 553 | public String toString(SAXParseException e)
|
---|
| 554 | {
|
---|
[18434] | 555 | String msg = e.getMessage();
|
---|
| 556 | msg += "\nOn line(column): " + e.getLineNumber() + "(" + e.getColumnNumber() + ")";
|
---|
| 557 | msg += (e.getPublicId() != null) ? ("\npublic ID: " + e.getPublicId()) : "\nNo public ID";
|
---|
| 558 | msg += (e.getSystemId() != null) ? ("\nsystem ID: " + e.getSystemId()) : "\nNo system ID";
|
---|
[24862] | 559 |
|
---|
[18434] | 560 | return msg;
|
---|
| 561 | }
|
---|
[24862] | 562 |
|
---|
[18434] | 563 | // clears the errorPage variable after first call to this method
|
---|
[24862] | 564 | public String getErrorMessage()
|
---|
| 565 | {
|
---|
[18434] | 566 | String errMsg = this.errorMessage;
|
---|
[24862] | 567 | if (this.errorMessage != null)
|
---|
| 568 | {
|
---|
[18434] | 569 | this.errorMessage = null;
|
---|
| 570 | }
|
---|
| 571 | return errMsg;
|
---|
| 572 | }
|
---|
[24862] | 573 |
|
---|
[18434] | 574 | // sets the errorMessage member variable to the data stored in the exception
|
---|
| 575 | // and writes the errorMessage to the logger and tomcat's System.err
|
---|
[24862] | 576 | protected void handleError(String errorType, SAXParseException exception)
|
---|
| 577 | {
|
---|
| 578 | this.errorMessage = errorType + toString(exception);
|
---|
[18434] | 579 | System.err.println("\n****Error parsing xml:\n" + this.errorMessage + "\n****\n");
|
---|
| 580 | logger.error(this.errorMessage);
|
---|
| 581 | }
|
---|
| 582 | }
|
---|
[28849] | 583 |
|
---|
[28962] | 584 | public static boolean writeDOM(Element elem, File file) {
|
---|
[28849] | 585 |
|
---|
| 586 | BufferedWriter writer = null;
|
---|
| 587 | boolean success = false;
|
---|
| 588 | try {
|
---|
| 589 | String xml_string = getString(elem);
|
---|
| 590 | // need createNewFile???
|
---|
| 591 | writer = new BufferedWriter(new FileWriter(file));
|
---|
| 592 | writer.write(xml_string);
|
---|
| 593 | success = true;
|
---|
| 594 | }
|
---|
| 595 |
|
---|
| 596 | catch (Exception e) {
|
---|
| 597 | logger.error(e.getMessage());
|
---|
| 598 | success = false;
|
---|
| 599 | }
|
---|
| 600 | finally {
|
---|
| 601 | try {
|
---|
| 602 | if (writer != null) {
|
---|
| 603 | writer.close();
|
---|
| 604 | }
|
---|
| 605 | } catch(Exception e) {
|
---|
| 606 | logger.error("couldn't close the file"+e.getMessage());
|
---|
| 607 | }
|
---|
| 608 | }
|
---|
| 609 | return success;
|
---|
| 610 | }
|
---|
[3222] | 611 | }
|
---|