- Timestamp:
- 2011-11-08T13:45:45+13:00 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/gli/src/org/greenstone/gatherer/util/XMLTools.java
r21548 r24801 1 1 package org.greenstone.gatherer.util; 2 3 2 4 3 import java.io.*; … … 29 28 30 29 /** This class is a static class containing useful XML functions */ 31 public class XMLTools { 32 /** extracts the text out of a node */ 33 public static Node getNodeTextNode (Element param) { 34 param.normalize (); 35 Node n = param.getFirstChild (); 36 while (n!=null && n.getNodeType () !=Node.TEXT_NODE) { 37 n=n.getNextSibling (); 38 } 39 return n; 40 } 41 42 /** extracts the text out of a node */ 43 public static String getNodeText (Element param) { 44 Node text_node = getNodeTextNode (param); 45 if (text_node == null) { 46 return ""; 47 } 48 return text_node.getNodeValue (); 49 } 50 public static void setNodeText (Element elem, String text) { 51 Node old_text_node = getNodeTextNode (elem); 52 if (old_text_node != null) { 53 elem.removeChild (old_text_node); 54 } 55 Text t = elem.getOwnerDocument ().createTextNode (text); 56 elem.appendChild (t); 57 } 58 /** returns the (first) child element with the given name */ 59 public static Node getChildByTagName (Node n, String name) { 60 61 Node child = n.getFirstChild (); 62 while (child!=null) { 63 if (child.getNodeName ().equals (name)) { 64 return child; 65 } 66 child = child.getNextSibling (); 67 } 68 return null; //not found 69 } 70 71 /** returns the (nth) child element with the given name 72 * index numbers start at 0 */ 73 public static Node getChildByTagNameIndexed (Node n, String name, int index) { 74 if (index == -1) { 75 return getChildByTagName (n, name); 76 } 77 int count = 0; 78 Node child = n.getFirstChild (); 79 while (child!=null) { 80 if (child.getNodeName ().equals (name)) { 81 if (count == index) { 82 return child; 83 } else { 84 count++; 85 } 86 } 87 child = child.getNextSibling (); 88 } 89 return null; //not found 90 } 91 92 /** returns the element parent/node_name[@attribute_name='attribute_value'] 93 */ 94 public static Element getNamedElement (Element parent, String node_name, 95 String attribute_name, 96 String attribute_value) { 97 98 NodeList children = parent.getChildNodes (); 99 for (int i=0; i<children.getLength (); i++) { 100 Node child = children.item (i); 101 //logger.debug("getnamed elem, node nmae="+child.getNodeName()); 102 if (child.getNodeName ().equals (node_name)) { 103 if (((Element)child).getAttribute (attribute_name).equals (attribute_value)) 104 return (Element)child; 105 } 106 } 107 // not found 108 return null; 109 } 110 /** returns a list of elements parent/node_name[@attribute_name='attribute_value'] 111 */ 112 public static ArrayList getNamedElementList (Element parent, String node_name, 113 String attribute_name, 114 String attribute_value) { 115 ArrayList elements = new ArrayList (); 116 NodeList children = parent.getChildNodes (); 117 for (int i=0; i<children.getLength (); i++) { 118 //System.out.println("getNamedElementList"); 119 Node child = children.item (i); 120 //logger.debug("getnamed elem, node nmae="+child.getNodeName()); 121 if (child.getNodeName ().equals (node_name)) { 122 if (((Element)child).getAttribute (attribute_name).equals (attribute_value)) 123 elements.add ((Element)child); 124 } 125 } 126 // not found 127 if (elements.size () == 0) { 128 elements = null; 129 } 130 return elements; 131 } 132 public static void copyAllChildren (Element to, Element from) { 133 134 Document to_doc = to.getOwnerDocument (); 135 Node child = from.getFirstChild (); 136 while (child != null) { 137 to.appendChild (to_doc.importNode (child, true)); 138 child = child.getNextSibling (); 139 } 140 } 141 /** Duplicates an element */ 142 public static Element duplicateElement (Document owner, Element element, boolean with_attributes) { 143 return duplicateElementNS (owner, element, null, with_attributes); 144 } 145 146 /** Duplicates an element */ 147 public static Element duplicateElementNS (Document owner, 148 Element element, 149 String namespace_uri, 150 boolean with_attributes) { 151 Element duplicate; 152 if (namespace_uri == null) { 153 duplicate = owner.createElement (element.getTagName ()); 154 } else { 155 duplicate = owner.createElementNS (namespace_uri, element.getTagName ()); 156 } 157 // Copy element attributes 158 if (with_attributes) { 159 NamedNodeMap attributes = element.getAttributes (); 160 for (int i = 0; i < attributes.getLength (); i++) { 161 Node attribute = attributes.item (i); 162 duplicate.setAttribute (attribute.getNodeName (), attribute.getNodeValue ()); 163 } 164 } 165 166 // Copy element children 167 NodeList children = element.getChildNodes (); 168 for (int i = 0; i < children.getLength (); i++) { 169 Node child = children.item (i); 170 duplicate.appendChild (owner.importNode (child, true)); 171 } 172 173 return duplicate; 174 } 175 176 177 /** Remove all of the child nodes from a certain node. */ 178 static final public void clear (Node node) { 179 while (node.hasChildNodes ()) { 180 node.removeChild (node.getFirstChild ()); 181 } 182 } 183 184 185 static public ArrayList getChildElementsByTagName (Element parent_element, String element_name) { 186 ArrayList child_elements = new ArrayList (); 187 188 NodeList children_nodelist = parent_element.getChildNodes (); 189 for (int i = 0; i < children_nodelist.getLength (); i++) { 190 Node child_node = children_nodelist.item (i); 191 if (child_node.getNodeType () == Node.ELEMENT_NODE && child_node.getNodeName ().equals (element_name)) { 192 child_elements.add (child_node); 193 } 194 } 195 196 return child_elements; 197 } 198 199 200 static public String getElementTextValue (Element element) { 201 // Find the first text node child 202 NodeList children_nodelist = element.getChildNodes (); 203 for (int i = 0; i < children_nodelist.getLength (); i++) { 204 Node child_node = children_nodelist.item (i); 205 if (child_node.getNodeType () == Node.TEXT_NODE) { 206 return child_node.getNodeValue (); 207 } 208 } 209 210 // None found 211 return ""; 212 } 213 214 215 /** Method to retrieve the value of a given node. 216 * @param element The <strong>Element</strong> whose value we wish to find. 217 * Soon to be deprecated! 218 */ 219 static final public String getValue (Node element) { 220 if (element == null) { 221 return ""; 222 } 223 // If we've been given a subject node first retrieve its value node. 224 if(element.getNodeName ().equals ("Subject")) { 225 element = getNodeFromNamed (element, "Value"); 226 } 227 // If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes 228 if(element != null && element.hasChildNodes ()) { 229 StringBuffer text_buffer = new StringBuffer (); 230 NodeList text_nodes = element.getChildNodes (); 231 for(int i = 0; i < text_nodes.getLength (); i++) { 232 Node possible_text = text_nodes.item (i); 233 if(possible_text.getNodeName ().equals (StaticStrings.TEXT_NODE)) { 234 text_buffer.append (possible_text.getNodeValue ()); 235 } 236 } 237 return text_buffer.toString (); 238 } 239 return ""; 240 } 241 242 243 /** Method to retrieve from the node given, a certain child node with the specified name. 244 * @param parent The <strong>Node</strong> whose children should be searched. 245 * @param name The required nodes name as a <strong>String</strong>. 246 * @return The requested <strong>Node</strong> if it is found, <i>null</i> otherwise. 247 * Soon to be deprecated! 248 */ 249 static final public Node getNodeFromNamed (Node parent, String name) { 250 Node child = null; 251 for(Node i = parent.getFirstChild (); i != null && child == null; 252 i = i.getNextSibling ()) { 253 if(i.getNodeName ().equals (name)) { 254 child = i; 255 } 256 } 257 return child; 258 } 259 260 static final public String WELLFORMED= "well-formed !"; 261 static final public String NOTWELLFORMED= "not well-formed"; 262 static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>"; 263 static final private String FOOTER = "</collectionConfig>"; 264 265 public static String parse (String xml_str) { 266 String validation_msg = WELLFORMED; 267 xml_str = HEADER + xml_str + FOOTER; 268 try { 269 SAXParserFactory factory = SAXParserFactory.newInstance (); 270 factory.setNamespaceAware (true); 271 //factory.setValidating (true); 272 SAXParser parser = factory.newSAXParser (); 273 InputSource iSource = new InputSource ( new StringReader ( xml_str ) ); 274 // parser.parse (iSource, new DefaultHandler ()); 275 276 org.xml.sax.XMLReader reader = parser.getXMLReader (); 277 reader.setContentHandler(new DefaultHandler()); 278 reader.setErrorHandler(new DefaultHandler()); 279 reader.parse(iSource); 280 } catch (FactoryConfigurationError e) { 281 validation_msg = "unable to get a document builder factory"; 282 } catch (ParserConfigurationException e) { 283 validation_msg = "unable to configure parser"; 284 } catch (SAXParseException e) { 285 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage (); 286 } catch (SAXException e) { 287 validation_msg += " Fatal error: " + e.toString (); 288 } catch (IOException e) { 289 validation_msg = "Unable to read the input, i/o error"; 290 } 291 292 return validation_msg; 293 } 294 //In this method, the parsed string xml_str is not wrapped by the header and footer strings. 295 public static String parseDOM (String xml_str) { 296 String validation_msg = WELLFORMED; 297 298 try { 299 SAXParserFactory factory = SAXParserFactory.newInstance (); 300 factory.setNamespaceAware (true); 301 //factory.setValidating (true); 302 SAXParser parser = factory.newSAXParser (); 303 InputSource iSource = new InputSource ( new StringReader ( xml_str ) ); 304 // parser.parse (iSource, new DefaultHandler ()); 305 306 org.xml.sax.XMLReader reader = parser.getXMLReader (); 307 reader.setContentHandler(new DefaultHandler()); 308 reader.setErrorHandler(new DefaultHandler()); 309 reader.parse(iSource); 310 } catch (FactoryConfigurationError e) { 311 validation_msg = "unable to get a document builder factory"; 312 } catch (ParserConfigurationException e) { 313 validation_msg = "unable to configure parser"; 314 } catch (SAXParseException e) { 315 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage (); 316 } catch (SAXException e) { 317 validation_msg += " " + e.toString (); 318 } catch (IOException e) { 319 validation_msg = "Unable to read the input, i/o error"; 320 } 321 322 return validation_msg; 323 } 324 325 public static String parse (File xml_file) { 326 String validation_msg = WELLFORMED; 327 328 try { 329 SAXParserFactory factory = SAXParserFactory.newInstance (); 330 factory.setNamespaceAware (true); 331 //factory.setValidating (true); 332 SAXParser parser = factory.newSAXParser (); 333 FileReader r = new FileReader(xml_file); 334 InputSource iSource = new InputSource(r); 335 XMLReader reader = parser.getXMLReader (); 336 reader.setContentHandler(new DefaultHandler()); 337 reader.setErrorHandler(new DefaultHandler()); 338 reader.parse(iSource); 339 } catch (FactoryConfigurationError e) { 340 validation_msg = "unable to get a document builder factory"; 341 } catch (ParserConfigurationException e) { 342 validation_msg = "unable to configure parser"; 343 } catch (SAXParseException e) { 344 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage (); 345 } catch (SAXException e) { 346 validation_msg += " Fatal error: " + e.toString (); 347 } catch (IOException e) { 348 validation_msg = "Unable to read the input, i/o error"; 349 } 350 351 return validation_msg; 352 } 353 /** Returns a string of the location. */ 354 private static String getLocationString(SAXParseException ex) { 355 StringBuffer str = new StringBuffer(); 356 357 String systemId = ex.getSystemId(); 358 if (systemId != null) { 359 int index = systemId.lastIndexOf('/'); 360 if (index != -1) 361 systemId = systemId.substring(index + 1); 362 str.append(systemId); 363 } 364 str.append("(line "); 365 str.append(ex.getLineNumber()-1); 366 str.append(", column "); 367 str.append(ex.getColumnNumber()); 368 str.append("): "); 369 370 return str.toString(); 371 372 } // getLocationString(SAXParseException):String 373 374 375 /** Parse an XML document from a given file path */ 376 static public Document parseXMLFile (String xml_file_path, boolean use_class_loader) { 377 if (use_class_loader == true) { 378 InputStream is = JarTools.getResourceAsStream ("/" + xml_file_path); 379 if (is != null) { 380 return parseXML (is); 381 } 382 } 383 384 // Try the file outside the classes directory 385 return parseXMLFile (new File (xml_file_path)); 386 } 387 388 389 /** Parse an XML document from a given file */ 390 static public Document parseXMLFile (File xml_file) { 391 // No file? No point trying! 392 if (xml_file.exists () == false) { 393 return null; 394 } 395 396 try { 397 return parseXML (new FileInputStream (xml_file)); 398 } 399 catch (Exception exception) { 400 DebugStream.printStackTrace (exception); 401 return null; 402 } 403 } 404 405 406 /** Parse an XML document from a given input stream */ 407 static public Document parseXML (InputStream xml_input_stream) { 408 Document document = null; 409 410 try { 411 InputStreamReader isr = new InputStreamReader (xml_input_stream, "UTF-8"); 412 document = parseXML(isr); 413 isr.close (); 414 xml_input_stream.close (); 415 } 416 catch (Exception exception) { 417 DebugStream.printStackTrace (exception); 418 } 419 420 return document; 421 } 422 423 424 /** Parse an XML document from a given reader */ 425 static public Document parseXML (Reader xml_reader) { 426 Document document = null; 427 428 // If debugging, the following will store the XML contents to be parsed, 429 // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on) 430 String xmlContents = ""; 431 432 try { 433 Reader reader = null; 434 435 // (1) By default, GLI will remove any contents preceeding (and invalidating) 436 // the XML and present these lines separately to the user 437 if(!DebugStream.isDebuggingEnabled()) { 438 try { 439 reader = new BufferedReader( new RemoveContentBeforeRootElementXMLReader(xml_reader) ); 440 } catch ( Exception e ) { 441 System.err.println( "Exception while wrapping the reader in parseXML(Reader)" ); 442 e.printStackTrace(); 443 } 444 } 445 446 // (2) If we are running GLI in debug mode: 447 // In case parsing exceptions are thrown (SAX Exceptions), we want to get some 448 // idea of where things went wrong. This will print the "XML" contents to either 449 // system.out (if debugging is off) or to the DebugStream otherwise. 450 // We need to read the XML twice to know the line where things went wrong, so 451 // do the additional reading only if we're debugging 452 else { 453 StringBuffer buf = new StringBuffer(); 454 char[] buffer = new char[500]; 455 int numCharsRead = xml_reader.read(buffer, 0, buffer.length); 456 while(numCharsRead != -1) { 457 buf.append(buffer, 0, numCharsRead); 458 numCharsRead = xml_reader.read(buffer, 0, buffer.length); 459 } 460 xmlContents = buf.toString(); 461 xml_reader.close(); // closing the old Reader 462 xml_reader = null; 463 buffer = null; 464 buf = null; 465 // we need a Reader to parse the same contents as the Reader that was just closed 466 reader = new BufferedReader(new StringReader(xmlContents)); 467 //System.err.println("xmlContents:\n" + xmlContents); 468 } 469 470 // (2) The actual XML parsing 471 InputSource isc = new InputSource (reader); 472 DOMParser parser = new DOMParser (); 473 parser.setFeature ("http://xml.org/sax/features/validation", false); 474 parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 475 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. 476 parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true); 477 parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); 478 parser.parse (isc); 479 document = parser.getDocument (); 480 481 } catch(SAXParseException e) { 482 showXMLParseFailureLine(e, xmlContents); 483 } catch (SAXException exception) { 484 System.err.println ("SAX exception: " + exception.getMessage ()); 485 if(DebugStream.isDebuggingEnabled()) { 486 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" 487 + xmlContents + "\n************END\n"); 488 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 489 DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML..."); 490 System.exit(-1); 491 } 492 // else, not running in debug mode, so don't exit after exception 493 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed."); 494 DebugStream.printStackTrace (exception); 495 } 496 catch (Exception exception) { 497 DebugStream.printStackTrace (exception); 498 } 499 500 return document; 501 } 502 503 /** Displays the line (string) where the SAXParseException occurred, given a String of the 504 * entire xml that was being parsed and the SAXParseException object that was caught. 505 * The messages are printed to DebugStream, so run GLI/FLI with -debug to view this output. 506 * @param xmlContents is the entire xml that was being parsed when the exception occurred 507 * @param e is the SAXParseException object that was thrown upon parsing the xmlContents. 508 */ 509 public static void showXMLParseFailureLine(SAXParseException e, String xmlContents) { 510 511 // There should be no characters at all that preceed the <?xml>... bit. 512 // The first check is for starting spaces: 513 if(xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t")) { 514 DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n"); 515 DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents); 516 return; // nothing more to do, first error identified 517 } 518 519 // the actual line (String literal) where parsing failed and the SAXParseException occurred. 520 String line = ""; 521 int linenumber = e.getLineNumber(); 522 DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber); 523 if(DebugStream.isDebuggingEnabled()) { 524 if(linenumber != -1) { 525 String[] lines = xmlContents.split("\n"); 526 if (lines.length > 0) { 527 DebugStream.println(" (number of lines: " + lines.length + ")"); 528 if(lines.length >= linenumber) { 529 line = lines[linenumber-1]; 530 } else { // error is past the last line 531 line = "Error is past the last line (" + lines.length + "): " + lines[lines.length-1]; 532 } 533 } else { 534 DebugStream.print("\n"); 535 } 536 lines = null; 537 538 DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END"); 539 DebugStream.println("SAXParseException message: " + e.getMessage() + "\n"); 540 541 // Uncomment if you want to print out the entire contents of the XML doc: 542 //DebugStream.println("\n\nThis was the XML:\n*********START\n" 543 // + xmlContents + "\n************END\n"); 544 } else { // no particular line number, print out all the xml so debugger can inspect it 545 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" 546 + xmlContents + "\n************END\n"); 547 } 548 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 549 DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML..."); 550 System.exit(-1); 551 } else { // not running in debug mode 552 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed."); 553 } 554 } 555 556 557 static public StringBuffer readXMLStream (InputStream input_stream) { 558 StringBuffer xml = new StringBuffer (""); 559 560 try { 561 InputStreamReader isr = new InputStreamReader (input_stream, "UTF-8"); 562 BufferedReader buffered_in = new BufferedReader (isr); 563 564 String line = ""; 565 boolean xml_content = false; 566 while((line = buffered_in.readLine ()) != null) { 567 if(xml_content) { 568 xml.append (line); 569 xml.append ("\n"); 570 } 571 else if(line.trim ().startsWith ("<?xml")) { 572 xml_content = true; 573 xml.append (line); 574 xml.append ("\n"); 575 } 576 } 577 buffered_in = null; 578 } 579 catch (Exception error) { 580 System.err.println ("Failed when trying to parse XML stream"); 581 error.printStackTrace (); 582 } 583 584 return xml; 585 } 586 587 588 /** Removes characters that are invalid in XML (see http://www.w3.org/TR/2000/REC-xml-20001006#charsets) */ 589 static public String removeInvalidCharacters (String text) { 590 char[] safe_characters = new char[text.length ()]; 591 int j = 0; 592 593 char[] raw_characters = new char[text.length ()]; 594 text.getChars (0, text.length (), raw_characters, 0); 595 for (int i = 0; i < raw_characters.length; i++) { 596 char character = raw_characters[i]; 597 if ((character >= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF)) { 598 safe_characters[j] = character; 599 j++; 600 } 601 } 602 603 return new String (safe_characters, 0, j); 604 } 605 606 607 static public void setElementTextValue (Element element, String text) { 608 // Remove all text node children 609 NodeList children_nodelist = element.getChildNodes (); 610 for (int i = children_nodelist.getLength () - 1; i >= 0; i--) { 611 Node child_node = children_nodelist.item (i); 612 if (child_node.getNodeType () == Node.TEXT_NODE) { 613 element.removeChild (child_node); 614 } 615 } 616 617 // Add a new text node 618 if (text != null) { 619 element.appendChild (element.getOwnerDocument ().createTextNode (text)); 620 } 621 } 622 623 624 /** Set the #text node value of some element. 625 * @param element the Element whose value we wish to set 626 * @param value the new value for the element as a String 627 * Soon to be deprecated! 628 */ 629 static final public void setValue (Element element, String value) { 630 // Remove any existing child node(s) 631 clear (element); 632 // Add new text node. 633 if (value != null) { 634 element.appendChild (element.getOwnerDocument ().createTextNode (value)); 635 } 636 } 637 638 /** Write an XML document to a given file with the text node of the specified element unescaped*/ 639 static public void writeXMLFile (File xml_file, Document document, String[] nonEscapingTagNames) { 640 try { 641 OutputStream os = new FileOutputStream (xml_file); 642 // Create an output format for our document. 643 OutputFormat f = new OutputFormat (document); 644 f.setEncoding ("UTF-8"); 645 f.setIndenting (true); 646 f.setLineWidth (0); // Why isn't this working! 647 f.setPreserveSpace (false); 648 if (nonEscapingTagNames != null) { 649 f.setNonEscapingElements (nonEscapingTagNames); 650 } 651 // Create the necessary writer stream for serialization. 652 OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8"); 653 Writer w = new BufferedWriter (osw); 654 // Generate a new serializer from the above. 655 XMLSerializer s = new XMLSerializer (w, f); 656 s.asDOMSerializer (); 657 // Finally serialize the document to file. 658 s.serialize (document); 659 // And close. 660 os.close (); 661 } 662 catch (Exception exception) { 663 DebugStream.printStackTrace (exception); 664 } 665 } 666 667 /** Write an XML document to a given file */ 668 static public void writeXMLFile (File xml_file, Document document) { 669 writeXMLFile(xml_file, document, null); 670 } 671 672 public static void printXMLNode (Node e) { 673 printXMLNode (e, 0) ; 674 } 675 676 public static void printXMLNode (Node e, int depth) { //recursive method call using DOM API... 677 678 for (int i=0 ; i<depth ; i++) 679 System.out.print (' ') ; 680 681 if (e.getNodeType () == Node.TEXT_NODE){ 682 //System.out.println("text") ; 683 if (e.getNodeValue () != "") { 684 System.out.println (e.getNodeValue ()) ; 685 } 686 return ; 687 } 688 689 System.out.print ('<'); 690 System.out.print (e.getNodeName ()); 691 NamedNodeMap attrs = e.getAttributes (); 692 if (attrs != null) { 693 for (int i = 0; i < attrs.getLength (); i++) { 694 Node attr = attrs.item (i); 695 System.out.print (' '); 696 System.out.print (attr.getNodeName ()); 697 System.out.print ("=\""); 698 System.out.print (attr.getNodeValue ()); 699 System.out.print ('"'); 700 } 701 } 702 NodeList children = e.getChildNodes (); 703 704 if (children == null || children.getLength () == 0) 705 System.out.println ("/>") ; 706 else { 707 708 System.out.println ('>') ; 709 710 int len = children.getLength (); 711 for (int i = 0; i < len; i++) { 712 printXMLNode (children.item (i), depth + 1); 713 } 714 715 for (int i=0 ; i<depth ; i++) 716 System.out.print (' ') ; 717 718 System.out.println ("</" + e.getNodeName () + ">"); 719 } 720 721 } 722 public static String xmlNodeToString (Node e){ 723 StringBuffer sb = new StringBuffer (""); 724 xmlNodeToString (sb,e,0); 725 return sb.toString (); 726 } 727 728 private static void xmlNodeToString (StringBuffer sb, Node e, int depth){ 729 730 for (int i=0 ; i<depth ; i++) 731 sb.append (' ') ; 732 733 if (e.getNodeType () == Node.TEXT_NODE){ 734 if (e.getNodeValue () != "") { 735 sb.append (e.getNodeValue ()) ; 736 } 737 return ; 738 } 739 740 sb.append ('<'); 741 sb.append (e.getNodeName ()); 742 NamedNodeMap attrs = e.getAttributes (); 743 if (attrs != null) { 744 for (int i = 0; i < attrs.getLength (); i++) { 745 Node attr = attrs.item (i); 746 sb.append (' '); 747 sb.append (attr.getNodeName ()); 748 sb.append ("=\""); 749 sb.append (attr.getNodeValue ()); 750 sb.append ('"'); 751 } 752 } 753 NodeList children = e.getChildNodes (); 754 755 if (children == null || children.getLength () == 0) 756 sb.append ("/>\n") ; 757 else { 758 759 sb.append (">\n") ; 760 761 int len = children.getLength (); 762 for (int i = 0; i < len; i++) { 763 xmlNodeToString (sb,children.item (i), depth + 1); 764 } 765 766 for (int i=0 ; i<depth ; i++) 767 sb.append (' ') ; 768 769 sb.append ("</" + e.getNodeName () + ">\n"); 770 } 771 772 773 } 774 775 public static String xmlNodeToStringWithoutIndenting (Node e) { 776 StringBuffer sb = new StringBuffer (""); 777 xmlNodeToStringWithoutNewline(sb, e, -1); 778 return sb.toString(); 779 } 780 public static String xmlNodeToStringWithoutNewline (Node e){ 781 StringBuffer sb = new StringBuffer (""); 782 xmlNodeToStringWithoutNewline (sb,e,0); 783 return sb.toString (); 784 } 785 786 private static void xmlNodeToStringWithoutNewline (StringBuffer sb, Node e, int depth){ 787 788 for (int i=0 ; i<depth ; i++) 789 sb.append (' ') ; 790 791 if (e.getNodeType () == Node.TEXT_NODE){ 792 if (e.getNodeValue () != "") { 793 sb.append (e.getNodeValue ()) ; 794 } 795 return ; 796 } 797 798 sb.append ('<'); 799 sb.append (e.getNodeName ()); 800 NamedNodeMap attrs = e.getAttributes (); 801 if (attrs != null) { 802 for (int i = 0; i < attrs.getLength (); i++) { 803 Node attr = attrs.item (i); 804 sb.append (' '); 805 sb.append (attr.getNodeName ()); 806 sb.append ("=\""); 807 sb.append (attr.getNodeValue ()); 808 sb.append ('"'); 809 } 810 } 811 NodeList children = e.getChildNodes (); 812 813 if (children == null || children.getLength () == 0) 814 sb.append ("/>") ; 815 else { 816 817 sb.append (">") ; 818 819 int len = children.getLength (); 820 for (int i = 0; i < len; i++) { 821 if (depth >= 0) { 822 xmlNodeToStringWithoutNewline (sb,children.item (i), depth + 1); 823 } else { 824 xmlNodeToStringWithoutNewline (sb,children.item (i), depth); 825 } 826 } 827 828 for (int i=0 ; i<depth ; i++) 829 sb.append (' ') ; 830 831 sb.append ("</" + e.getNodeName () + ">"); 832 } 833 } 30 public class XMLTools 31 { 32 /** extracts the text out of a node */ 33 public static Node getNodeTextNode(Element param) 34 { 35 param.normalize(); 36 Node n = param.getFirstChild(); 37 while (n != null && n.getNodeType() != Node.TEXT_NODE) 38 { 39 n = n.getNextSibling(); 40 } 41 return n; 42 } 43 44 /** extracts the text out of a node */ 45 public static String getNodeText(Element param) 46 { 47 Node text_node = getNodeTextNode(param); 48 if (text_node == null) 49 { 50 return ""; 51 } 52 return text_node.getNodeValue(); 53 } 54 55 public static void setNodeText(Element elem, String text) 56 { 57 Node old_text_node = getNodeTextNode(elem); 58 if (old_text_node != null) 59 { 60 elem.removeChild(old_text_node); 61 } 62 Text t = elem.getOwnerDocument().createTextNode(text); 63 elem.appendChild(t); 64 } 65 66 /** returns the (first) child element with the given name */ 67 public static Node getChildByTagName(Node n, String name) 68 { 69 70 Node child = n.getFirstChild(); 71 while (child != null) 72 { 73 if (child.getNodeName().equals(name)) 74 { 75 return child; 76 } 77 child = child.getNextSibling(); 78 } 79 return null; //not found 80 } 81 82 /** 83 * returns the (nth) child element with the given name index numbers start 84 * at 0 85 */ 86 public static Node getChildByTagNameIndexed(Node n, String name, int index) 87 { 88 if (index == -1) 89 { 90 return getChildByTagName(n, name); 91 } 92 int count = 0; 93 Node child = n.getFirstChild(); 94 while (child != null) 95 { 96 if (child.getNodeName().equals(name)) 97 { 98 if (count == index) 99 { 100 return child; 101 } 102 else 103 { 104 count++; 105 } 106 } 107 child = child.getNextSibling(); 108 } 109 return null; //not found 110 } 111 112 /** 113 * returns the element parent/node_name[@attribute_name='attribute_value'] 114 */ 115 public static Element getNamedElement(Element parent, String node_name, String attribute_name, String attribute_value) 116 { 117 118 NodeList children = parent.getChildNodes(); 119 for (int i = 0; i < children.getLength(); i++) 120 { 121 Node child = children.item(i); 122 //logger.debug("getnamed elem, node nmae="+child.getNodeName()); 123 if (child.getNodeName().equals(node_name)) 124 { 125 if (((Element) child).getAttribute(attribute_name).equals(attribute_value)) 126 return (Element) child; 127 } 128 } 129 // not found 130 return null; 131 } 132 133 /** 134 * returns a list of elements 135 * parent/node_name[@attribute_name='attribute_value'] 136 */ 137 public static ArrayList getNamedElementList(Element parent, String node_name, String attribute_name, String attribute_value) 138 { 139 ArrayList elements = new ArrayList(); 140 NodeList children = parent.getChildNodes(); 141 for (int i = 0; i < children.getLength(); i++) 142 { 143 //System.out.println("getNamedElementList"); 144 Node child = children.item(i); 145 //logger.debug("getnamed elem, node nmae="+child.getNodeName()); 146 if (child.getNodeName().equals(node_name)) 147 { 148 if (((Element) child).getAttribute(attribute_name).equals(attribute_value)) 149 elements.add((Element) child); 150 } 151 } 152 // not found 153 if (elements.size() == 0) 154 { 155 elements = null; 156 } 157 return elements; 158 } 159 160 public static void copyAllChildren(Element to, Element from) 161 { 162 163 Document to_doc = to.getOwnerDocument(); 164 Node child = from.getFirstChild(); 165 while (child != null) 166 { 167 to.appendChild(to_doc.importNode(child, true)); 168 child = child.getNextSibling(); 169 } 170 } 171 172 /** Duplicates an element */ 173 public static Element duplicateElement(Document owner, Element element, boolean with_attributes) 174 { 175 return duplicateElementNS(owner, element, null, with_attributes); 176 } 177 178 /** Duplicates an element */ 179 public static Element duplicateElementNS(Document owner, Element element, String namespace_uri, boolean with_attributes) 180 { 181 Element duplicate; 182 if (namespace_uri == null) 183 { 184 duplicate = owner.createElement(element.getTagName()); 185 } 186 else 187 { 188 duplicate = owner.createElementNS(namespace_uri, element.getTagName()); 189 } 190 // Copy element attributes 191 if (with_attributes) 192 { 193 NamedNodeMap attributes = element.getAttributes(); 194 for (int i = 0; i < attributes.getLength(); i++) 195 { 196 Node attribute = attributes.item(i); 197 duplicate.setAttribute(attribute.getNodeName(), attribute.getNodeValue()); 198 } 199 } 200 201 // Copy element children 202 NodeList children = element.getChildNodes(); 203 for (int i = 0; i < children.getLength(); i++) 204 { 205 Node child = children.item(i); 206 duplicate.appendChild(owner.importNode(child, true)); 207 } 208 209 return duplicate; 210 } 211 212 /** Remove all of the child nodes from a certain node. */ 213 static final public void clear(Node node) 214 { 215 while (node.hasChildNodes()) 216 { 217 node.removeChild(node.getFirstChild()); 218 } 219 } 220 221 static public ArrayList getChildElementsByTagName(Element parent_element, String element_name) 222 { 223 ArrayList child_elements = new ArrayList(); 224 225 NodeList children_nodelist = parent_element.getChildNodes(); 226 for (int i = 0; i < children_nodelist.getLength(); i++) 227 { 228 Node child_node = children_nodelist.item(i); 229 if (child_node.getNodeType() == Node.ELEMENT_NODE && child_node.getNodeName().equals(element_name)) 230 { 231 child_elements.add(child_node); 232 } 233 } 234 235 return child_elements; 236 } 237 238 static public String getElementTextValue(Element element) 239 { 240 // Find the first text node child 241 NodeList children_nodelist = element.getChildNodes(); 242 for (int i = 0; i < children_nodelist.getLength(); i++) 243 { 244 Node child_node = children_nodelist.item(i); 245 if (child_node.getNodeType() == Node.TEXT_NODE) 246 { 247 return child_node.getNodeValue(); 248 } 249 } 250 251 // None found 252 return ""; 253 } 254 255 /** 256 * Method to retrieve the value of a given node. 257 * 258 * @param element 259 * The <strong>Element</strong> whose value we wish to find. Soon 260 * to be deprecated! 261 */ 262 static final public String getValue(Node element) 263 { 264 if (element == null) 265 { 266 return ""; 267 } 268 // If we've been given a subject node first retrieve its value node. 269 if (element.getNodeName().equals("Subject")) 270 { 271 element = getNodeFromNamed(element, "Value"); 272 } 273 // If we've got a value node, then reconstruct the text. Remember that DOM will split text over 256 characters into several text nodes 274 if (element != null && element.hasChildNodes()) 275 { 276 StringBuffer text_buffer = new StringBuffer(); 277 NodeList text_nodes = element.getChildNodes(); 278 for (int i = 0; i < text_nodes.getLength(); i++) 279 { 280 Node possible_text = text_nodes.item(i); 281 if (possible_text.getNodeName().equals(StaticStrings.TEXT_NODE)) 282 { 283 text_buffer.append(possible_text.getNodeValue()); 284 } 285 } 286 return text_buffer.toString(); 287 } 288 return ""; 289 } 290 291 /** 292 * Method to retrieve from the node given, a certain child node with the 293 * specified name. 294 * 295 * @param parent 296 * The <strong>Node</strong> whose children should be searched. 297 * @param name 298 * The required nodes name as a <strong>String</strong>. 299 * @return The requested <strong>Node</strong> if it is found, <i>null</i> 300 * otherwise. Soon to be deprecated! 301 */ 302 static final public Node getNodeFromNamed(Node parent, String name) 303 { 304 Node child = null; 305 for (Node i = parent.getFirstChild(); i != null && child == null; i = i.getNextSibling()) 306 { 307 if (i.getNodeName().equals(name)) 308 { 309 child = i; 310 } 311 } 312 return child; 313 } 314 315 static final public String WELLFORMED = "well-formed !"; 316 static final public String NOTWELLFORMED = "not well-formed"; 317 static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>"; 318 static final private String FOOTER = "</collectionConfig>"; 319 320 public static String parse(String xml_str) 321 { 322 String validation_msg = WELLFORMED; 323 xml_str = HEADER + xml_str + FOOTER; 324 try 325 { 326 SAXParserFactory factory = SAXParserFactory.newInstance(); 327 factory.setNamespaceAware(true); 328 //factory.setValidating (true); 329 SAXParser parser = factory.newSAXParser(); 330 InputSource iSource = new InputSource(new StringReader(xml_str)); 331 // parser.parse (iSource, new DefaultHandler ()); 332 333 org.xml.sax.XMLReader reader = parser.getXMLReader(); 334 reader.setContentHandler(new DefaultHandler()); 335 reader.setErrorHandler(new DefaultHandler()); 336 reader.parse(iSource); 337 } 338 catch (FactoryConfigurationError e) 339 { 340 validation_msg = "unable to get a document builder factory"; 341 } 342 catch (ParserConfigurationException e) 343 { 344 validation_msg = "unable to configure parser"; 345 } 346 catch (SAXParseException e) 347 { 348 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage(); 349 } 350 catch (SAXException e) 351 { 352 validation_msg += " Fatal error: " + e.toString(); 353 } 354 catch (IOException e) 355 { 356 validation_msg = "Unable to read the input, i/o error"; 357 } 358 359 return validation_msg; 360 } 361 362 //In this method, the parsed string xml_str is not wrapped by the header and footer strings. 363 public static String parseDOM(String xml_str) 364 { 365 String validation_msg = WELLFORMED; 366 367 try 368 { 369 SAXParserFactory factory = SAXParserFactory.newInstance(); 370 factory.setNamespaceAware(true); 371 //factory.setValidating (true); 372 SAXParser parser = factory.newSAXParser(); 373 InputSource iSource = new InputSource(new StringReader(xml_str)); 374 // parser.parse (iSource, new DefaultHandler ()); 375 376 org.xml.sax.XMLReader reader = parser.getXMLReader(); 377 reader.setContentHandler(new DefaultHandler()); 378 reader.setErrorHandler(new DefaultHandler()); 379 reader.parse(iSource); 380 } 381 catch (FactoryConfigurationError e) 382 { 383 validation_msg = "unable to get a document builder factory"; 384 } 385 catch (ParserConfigurationException e) 386 { 387 validation_msg = "unable to configure parser"; 388 } 389 catch (SAXParseException e) 390 { 391 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage(); 392 } 393 catch (SAXException e) 394 { 395 validation_msg += " " + e.toString(); 396 } 397 catch (IOException e) 398 { 399 validation_msg = "Unable to read the input, i/o error"; 400 } 401 402 return validation_msg; 403 } 404 405 public static String parse(File xml_file) 406 { 407 String validation_msg = WELLFORMED; 408 409 try 410 { 411 SAXParserFactory factory = SAXParserFactory.newInstance(); 412 factory.setNamespaceAware(true); 413 //factory.setValidating (true); 414 SAXParser parser = factory.newSAXParser(); 415 FileReader r = new FileReader(xml_file); 416 InputSource iSource = new InputSource(r); 417 XMLReader reader = parser.getXMLReader(); 418 reader.setContentHandler(new DefaultHandler()); 419 reader.setErrorHandler(new DefaultHandler()); 420 reader.parse(iSource); 421 } 422 catch (FactoryConfigurationError e) 423 { 424 validation_msg = "unable to get a document builder factory"; 425 } 426 catch (ParserConfigurationException e) 427 { 428 validation_msg = "unable to configure parser"; 429 } 430 catch (SAXParseException e) 431 { 432 validation_msg = NOTWELLFORMED + getLocationString(e) + e.getMessage(); 433 } 434 catch (SAXException e) 435 { 436 validation_msg += " Fatal error: " + e.toString(); 437 } 438 catch (IOException e) 439 { 440 validation_msg = "Unable to read the input, i/o error"; 441 } 442 443 return validation_msg; 444 } 445 446 /** Returns a string of the location. */ 447 private static String getLocationString(SAXParseException ex) 448 { 449 StringBuffer str = new StringBuffer(); 450 451 String systemId = ex.getSystemId(); 452 if (systemId != null) 453 { 454 int index = systemId.lastIndexOf('/'); 455 if (index != -1) 456 systemId = systemId.substring(index + 1); 457 str.append(systemId); 458 } 459 str.append("(line "); 460 str.append(ex.getLineNumber() - 1); 461 str.append(", column "); 462 str.append(ex.getColumnNumber()); 463 str.append("): "); 464 465 return str.toString(); 466 467 } // getLocationString(SAXParseException):String 468 469 /** Parse an XML document from a given file path */ 470 static public Document parseXMLFile(String xml_file_path, boolean use_class_loader) 471 { 472 if (use_class_loader == true) 473 { 474 InputStream is = JarTools.getResourceAsStream("/" + xml_file_path); 475 if (is != null) 476 { 477 return parseXML(is); 478 } 479 } 480 481 // Try the file outside the classes directory 482 return parseXMLFile(new File(xml_file_path)); 483 } 484 485 /** Parse an XML document from a given file */ 486 static public Document parseXMLFile(File xml_file) 487 { 488 // No file? No point trying! 489 if (xml_file.exists() == false) 490 { 491 return null; 492 } 493 494 try 495 { 496 return parseXML(new FileInputStream(xml_file)); 497 } 498 catch (Exception exception) 499 { 500 DebugStream.printStackTrace(exception); 501 return null; 502 } 503 } 504 505 /** Parse an XML document from a given input stream */ 506 static public Document parseXML(InputStream xml_input_stream) 507 { 508 Document document = null; 509 510 try 511 { 512 InputStreamReader isr = new InputStreamReader(xml_input_stream, "UTF-8"); 513 document = parseXML(isr); 514 isr.close(); 515 xml_input_stream.close(); 516 } 517 catch (Exception exception) 518 { 519 DebugStream.printStackTrace(exception); 520 } 521 522 return document; 523 } 524 525 /** Parse an XML document from a given reader */ 526 static public Document parseXML(Reader xml_reader) 527 { 528 Document document = null; 529 530 // If debugging, the following will store the XML contents to be parsed, 531 // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on) 532 String xmlContents = ""; 533 534 try 535 { 536 Reader reader = null; 537 538 // (1) By default, GLI will remove any contents preceeding (and invalidating) 539 // the XML and present these lines separately to the user 540 if (!DebugStream.isDebuggingEnabled()) 541 { 542 try 543 { 544 reader = new BufferedReader(new RemoveContentBeforeRootElementXMLReader(xml_reader)); 545 } 546 catch (Exception e) 547 { 548 System.err.println("Exception while wrapping the reader in parseXML(Reader)"); 549 e.printStackTrace(); 550 } 551 } 552 553 // (2) If we are running GLI in debug mode: 554 // In case parsing exceptions are thrown (SAX Exceptions), we want to get some 555 // idea of where things went wrong. This will print the "XML" contents to either 556 // system.out (if debugging is off) or to the DebugStream otherwise. 557 // We need to read the XML twice to know the line where things went wrong, so 558 // do the additional reading only if we're debugging 559 else 560 { 561 StringBuffer buf = new StringBuffer(); 562 char[] buffer = new char[500]; 563 int numCharsRead = xml_reader.read(buffer, 0, buffer.length); 564 while (numCharsRead != -1) 565 { 566 buf.append(buffer, 0, numCharsRead); 567 numCharsRead = xml_reader.read(buffer, 0, buffer.length); 568 } 569 xmlContents = buf.toString(); 570 xml_reader.close(); // closing the old Reader 571 xml_reader = null; 572 buffer = null; 573 buf = null; 574 // we need a Reader to parse the same contents as the Reader that was just closed 575 reader = new BufferedReader(new StringReader(xmlContents)); 576 //System.err.println("xmlContents:\n" + xmlContents); 577 } 578 579 // (2) The actual XML parsing 580 InputSource isc = new InputSource(reader); 581 DOMParser parser = new DOMParser(); 582 parser.setFeature("http://xml.org/sax/features/validation", false); 583 parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 584 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. 585 parser.setFeature("http://apache.org/xml/features/dom/defer-node-expansion", true); 586 parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); 587 parser.parse(isc); 588 document = parser.getDocument(); 589 590 } 591 catch (SAXParseException e) 592 { 593 showXMLParseFailureLine(e, xmlContents); 594 } 595 catch (SAXException exception) 596 { 597 System.err.println("SAX exception: " + exception.getMessage()); 598 if (DebugStream.isDebuggingEnabled()) 599 { 600 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n"); 601 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 602 DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML..."); 603 System.exit(-1); 604 } 605 // else, not running in debug mode, so don't exit after exception 606 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed."); 607 DebugStream.printStackTrace(exception); 608 } 609 catch (Exception exception) 610 { 611 DebugStream.printStackTrace(exception); 612 } 613 614 return document; 615 } 616 617 /** 618 * Displays the line (string) where the SAXParseException occurred, given a 619 * String of the entire xml that was being parsed and the SAXParseException 620 * object that was caught. The messages are printed to DebugStream, so run 621 * GLI/FLI with -debug to view this output. 622 * 623 * @param xmlContents 624 * is the entire xml that was being parsed when the exception 625 * occurred 626 * @param e 627 * is the SAXParseException object that was thrown upon parsing 628 * the xmlContents. 629 */ 630 public static void showXMLParseFailureLine(SAXParseException e, String xmlContents) 631 { 632 633 // There should be no characters at all that preceed the <?xml>... bit. 634 // The first check is for starting spaces: 635 if (xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t")) 636 { 637 DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n"); 638 DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents); 639 return; // nothing more to do, first error identified 640 } 641 642 // the actual line (String literal) where parsing failed and the SAXParseException occurred. 643 String line = ""; 644 int linenumber = e.getLineNumber(); 645 DebugStream.print("\n****SAXParseException on LINE NUMBER: " + linenumber); 646 if (DebugStream.isDebuggingEnabled()) 647 { 648 if (linenumber != -1) 649 { 650 String[] lines = xmlContents.split("\n"); 651 if (lines.length > 0) 652 { 653 DebugStream.println(" (number of lines: " + lines.length + ")"); 654 if (lines.length >= linenumber) 655 { 656 line = lines[linenumber - 1]; 657 } 658 else 659 { // error is past the last line 660 line = "Error is past the last line (" + lines.length + "): " + lines[lines.length - 1]; 661 } 662 } 663 else 664 { 665 DebugStream.print("\n"); 666 } 667 lines = null; 668 669 DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END"); 670 DebugStream.println("SAXParseException message: " + e.getMessage() + "\n"); 671 672 // Uncomment if you want to print out the entire contents of the XML doc: 673 //DebugStream.println("\n\nThis was the XML:\n*********START\n" 674 // + xmlContents + "\n************END\n"); 675 } 676 else 677 { // no particular line number, print out all the xml so debugger can inspect it 678 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" + xmlContents + "\n************END\n"); 679 } 680 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 681 DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML..."); 682 System.exit(-1); 683 } 684 else 685 { // not running in debug mode 686 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed."); 687 } 688 } 689 690 static public StringBuffer readXMLStream(InputStream input_stream) 691 { 692 StringBuffer xml = new StringBuffer(""); 693 694 try 695 { 696 InputStreamReader isr = new InputStreamReader(input_stream, "UTF-8"); 697 BufferedReader buffered_in = new BufferedReader(isr); 698 699 String line = ""; 700 boolean xml_content = false; 701 while ((line = buffered_in.readLine()) != null) 702 { 703 if (xml_content) 704 { 705 xml.append(line); 706 xml.append("\n"); 707 } 708 else if (line.trim().startsWith("<?xml")) 709 { 710 xml_content = true; 711 xml.append(line); 712 xml.append("\n"); 713 } 714 } 715 buffered_in = null; 716 } 717 catch (Exception error) 718 { 719 System.err.println("Failed when trying to parse XML stream"); 720 error.printStackTrace(); 721 } 722 723 return xml; 724 } 725 726 /** 727 * Removes characters that are invalid in XML (see 728 * http://www.w3.org/TR/2000/REC-xml-20001006#charsets) 729 */ 730 static public String removeInvalidCharacters(String text) 731 { 732 char[] safe_characters = new char[text.length()]; 733 int j = 0; 734 735 char[] raw_characters = new char[text.length()]; 736 text.getChars(0, text.length(), raw_characters, 0); 737 for (int i = 0; i < raw_characters.length; i++) 738 { 739 char character = raw_characters[i]; 740 if ((character >= 0x20 && character <= 0xD7FF) || character == 0x09 || character == 0x0A || character == 0x0D || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF)) 741 { 742 safe_characters[j] = character; 743 j++; 744 } 745 } 746 747 return new String(safe_characters, 0, j); 748 } 749 750 static public void setElementTextValue(Element element, String text) 751 { 752 // Remove all text node children 753 NodeList children_nodelist = element.getChildNodes(); 754 for (int i = children_nodelist.getLength() - 1; i >= 0; i--) 755 { 756 Node child_node = children_nodelist.item(i); 757 if (child_node.getNodeType() == Node.TEXT_NODE) 758 { 759 element.removeChild(child_node); 760 } 761 } 762 763 // Add a new text node 764 if (text != null) 765 { 766 element.appendChild(element.getOwnerDocument().createTextNode(text)); 767 } 768 } 769 770 /** 771 * Set the #text node value of some element. 772 * 773 * @param element 774 * the Element whose value we wish to set 775 * @param value 776 * the new value for the element as a String Soon to be 777 * deprecated! 778 */ 779 static final public void setValue(Element element, String value) 780 { 781 // Remove any existing child node(s) 782 clear(element); 783 // Add new text node. 784 if (value != null) 785 { 786 element.appendChild(element.getOwnerDocument().createTextNode(value)); 787 } 788 } 789 790 /** 791 * Write an XML document to a given file with the text node of the specified 792 * element unescaped 793 */ 794 static public void writeXMLFile(File xml_file, Document document, String[] nonEscapingTagNames) 795 { 796 try 797 { 798 OutputStream os = new FileOutputStream(xml_file); 799 // Create an output format for our document. 800 OutputFormat f = new OutputFormat(document); 801 f.setEncoding("UTF-8"); 802 f.setIndenting(true); 803 f.setLineWidth(0); // Why isn't this working! 804 f.setPreserveSpace(false); 805 if (nonEscapingTagNames != null) 806 { 807 f.setNonEscapingElements(nonEscapingTagNames); 808 } 809 // Create the necessary writer stream for serialization. 810 OutputStreamWriter osw = new OutputStreamWriter(os, "UTF-8"); 811 Writer w = new BufferedWriter(osw); 812 // Generate a new serializer from the above. 813 XMLSerializer s = new XMLSerializer(w, f); 814 s.asDOMSerializer(); 815 // Finally serialize the document to file. 816 s.serialize(document); 817 // And close. 818 os.close(); 819 } 820 catch (Exception exception) 821 { 822 DebugStream.printStackTrace(exception); 823 } 824 } 825 826 /** Write an XML document to a given file */ 827 static public void writeXMLFile(File xml_file, Document document) 828 { 829 writeXMLFile(xml_file, document, null); 830 } 831 832 public static void printXMLNode(Node e) 833 { 834 printXMLNode(e, 0); 835 } 836 837 public static void printXMLNode(Node e, int depth) 838 { //recursive method call using DOM API... 839 840 for (int i = 0; i < depth; i++) 841 System.out.print(' '); 842 843 if (e.getNodeType() == Node.TEXT_NODE) 844 { 845 //System.out.println("text") ; 846 if (e.getNodeValue() != "") 847 { 848 System.out.println(e.getNodeValue()); 849 } 850 return; 851 } 852 853 System.out.print('<'); 854 System.out.print(e.getNodeName()); 855 NamedNodeMap attrs = e.getAttributes(); 856 if (attrs != null) 857 { 858 for (int i = 0; i < attrs.getLength(); i++) 859 { 860 Node attr = attrs.item(i); 861 System.out.print(' '); 862 System.out.print(attr.getNodeName()); 863 System.out.print("=\""); 864 System.out.print(attr.getNodeValue()); 865 System.out.print('"'); 866 } 867 } 868 NodeList children = e.getChildNodes(); 869 870 if (children == null || children.getLength() == 0) 871 System.out.println("/>"); 872 else 873 { 874 875 System.out.println('>'); 876 877 int len = children.getLength(); 878 for (int i = 0; i < len; i++) 879 { 880 printXMLNode(children.item(i), depth + 1); 881 } 882 883 for (int i = 0; i < depth; i++) 884 System.out.print(' '); 885 886 System.out.println("</" + e.getNodeName() + ">"); 887 } 888 889 } 890 891 public static String xmlNodeToString(Node e) 892 { 893 StringBuffer sb = new StringBuffer(""); 894 xmlNodeToString(sb, e, 0); 895 return sb.toString(); 896 } 897 898 private static void xmlNodeToString(StringBuffer sb, Node e, int depth) 899 { 900 901 for (int i = 0; i < depth; i++) 902 sb.append(' '); 903 904 if (e.getNodeType() == Node.TEXT_NODE) 905 { 906 if (e.getNodeValue() != "") 907 { 908 sb.append(e.getNodeValue()); 909 } 910 return; 911 } 912 913 sb.append('<'); 914 sb.append(e.getNodeName()); 915 NamedNodeMap attrs = e.getAttributes(); 916 if (attrs != null) 917 { 918 for (int i = 0; i < attrs.getLength(); i++) 919 { 920 Node attr = attrs.item(i); 921 sb.append(' '); 922 sb.append(attr.getNodeName()); 923 sb.append("=\""); 924 sb.append(attr.getNodeValue()); 925 sb.append('"'); 926 } 927 } 928 NodeList children = e.getChildNodes(); 929 930 if (children == null || children.getLength() == 0) 931 sb.append("/>\n"); 932 else 933 { 934 935 sb.append(">\n"); 936 937 int len = children.getLength(); 938 for (int i = 0; i < len; i++) 939 { 940 xmlNodeToString(sb, children.item(i), depth + 1); 941 } 942 943 for (int i = 0; i < depth; i++) 944 sb.append(' '); 945 946 sb.append("</" + e.getNodeName() + ">\n"); 947 } 948 949 } 950 951 public static String xmlNodeToStringWithoutIndenting(Node e) 952 { 953 StringBuffer sb = new StringBuffer(""); 954 xmlNodeToStringWithoutNewline(sb, e, -1); 955 return sb.toString(); 956 } 957 958 public static String xmlNodeToStringWithoutNewline(Node e) 959 { 960 StringBuffer sb = new StringBuffer(""); 961 xmlNodeToStringWithoutNewline(sb, e, 0); 962 return sb.toString(); 963 } 964 965 private static void xmlNodeToStringWithoutNewline(StringBuffer sb, Node e, int depth) 966 { 967 968 for (int i = 0; i < depth; i++) 969 { 970 sb.append(' '); 971 } 972 973 if (e.getNodeType() == Node.TEXT_NODE) 974 { 975 if (e.getNodeValue() != "") 976 { 977 sb.append(e.getNodeValue()); 978 } 979 return; 980 } 981 982 sb.append('<'); 983 sb.append(e.getNodeName()); 984 NamedNodeMap attrs = e.getAttributes(); 985 if (attrs != null) 986 { 987 for (int i = 0; i < attrs.getLength(); i++) 988 { 989 Node attr = attrs.item(i); 990 sb.append(' '); 991 sb.append(attr.getNodeName()); 992 sb.append("=\""); 993 sb.append(attr.getNodeValue()); 994 sb.append('"'); 995 } 996 } 997 NodeList children = e.getChildNodes(); 998 999 if (children == null || children.getLength() == 0) 1000 sb.append("/>"); 1001 else 1002 { 1003 1004 sb.append(">"); 1005 1006 int len = children.getLength(); 1007 for (int i = 0; i < len; i++) 1008 { 1009 if (depth >= 0) 1010 { 1011 xmlNodeToStringWithoutNewline(sb, children.item(i), depth + 1); 1012 } 1013 else 1014 { 1015 xmlNodeToStringWithoutNewline(sb, children.item(i), depth); 1016 } 1017 } 1018 1019 for (int i = 0; i < depth; i++) 1020 sb.append(' '); 1021 1022 sb.append("</" + e.getNodeName() + ">"); 1023 } 1024 } 834 1025 }
Note:
See TracChangeset
for help on using the changeset viewer.