427 | | |
428 | | try { |
429 | | InputSource isc = new InputSource (xml_reader); |
430 | | DOMParser parser = new DOMParser (); |
431 | | parser.setFeature ("http://xml.org/sax/features/validation", false); |
432 | | parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); |
433 | | // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. |
434 | | parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true); |
435 | | parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); |
436 | | parser.parse (isc); |
437 | | document = parser.getDocument (); |
438 | | } |
439 | | catch (SAXException exception) { |
440 | | System.err.println ("SAX exception: " + exception.getMessage ()); |
441 | | DebugStream.printStackTrace (exception); |
| 427 | |
| 428 | // If debugging, the following will store the XML contents to be parsed, |
| 429 | // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on) |
| 430 | String xmlContents = ""; |
| 431 | |
| 432 | try { |
| 433 | Reader reader = null; |
| 434 | |
| 435 | // (1) By default, GLI will remove any contents preceeding (and invalidating) |
| 436 | // the XML and present these lines separately to the user |
| 437 | if(!DebugStream.isDebuggingEnabled()) { |
| 438 | try { |
| 439 | reader = new BufferedReader( new RemoveContentBeforeRootElementXMLReader(xml_reader) ); |
| 440 | } catch ( Exception e ) { |
| 441 | System.err.println( "Exception while wrapping the reader in parseXML(Reader)" ); |
| 442 | e.printStackTrace(); |
| 443 | } |
| 444 | } |
| 445 | |
| 446 | // (2) If we are running GLI in debug mode: |
| 447 | // In case parsing exceptions are thrown (SAX Exceptions), we want to get some |
| 448 | // idea of where things went wrong. This will print the "XML" contents to either |
| 449 | // system.out (if debugging is off) or to the DebugStream otherwise. |
| 450 | // We need to read the XML twice to know the line where things went wrong, so |
| 451 | // do the additional reading only if we're debugging |
| 452 | else { |
| 453 | StringBuffer buf = new StringBuffer(); |
| 454 | char[] buffer = new char[500]; |
| 455 | int numCharsRead = xml_reader.read(buffer, 0, buffer.length); |
| 456 | while(numCharsRead != -1) { |
| 457 | buf.append(buffer, 0, numCharsRead); |
| 458 | numCharsRead = xml_reader.read(buffer, 0, buffer.length); |
| 459 | } |
| 460 | xmlContents = buf.toString(); |
| 461 | xml_reader.close(); // closing the old Reader |
| 462 | xml_reader = null; |
| 463 | buffer = null; |
| 464 | buf = null; |
| 465 | // we need a Reader to parse the same contents as the Reader that was just closed |
| 466 | reader = new BufferedReader(new StringReader(xmlContents)); |
| 467 | } |
| 468 | |
| 469 | // (2) The actual XML parsing |
| 470 | InputSource isc = new InputSource (reader); |
| 471 | DOMParser parser = new DOMParser (); |
| 472 | parser.setFeature ("http://xml.org/sax/features/validation", false); |
| 473 | parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); |
| 474 | // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. |
| 475 | parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true); |
| 476 | parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); |
| 477 | parser.parse (isc); |
| 478 | document = parser.getDocument (); |
| 479 | |
| 480 | } catch(SAXParseException e) { |
| 481 | showXMLParseFailureLine(e, xmlContents); |
| 482 | } catch (SAXException exception) { |
| 483 | System.err.println ("SAX exception: " + exception.getMessage ()); |
| 484 | if(DebugStream.isDebuggingEnabled()) { |
| 485 | DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" |
| 486 | + xmlContents + "\n************END\n"); |
| 487 | // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? |
| 488 | DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML..."); |
| 489 | System.exit(-1); |
| 490 | } |
| 491 | // else, not running in debug mode, so don't exit after exception |
| 492 | System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed."); |
| 493 | DebugStream.printStackTrace (exception); |
449 | | |
| 501 | |
| 502 | /** Displays the line (string) where the SAXParseException occurred, given a String of the |
| 503 | * entire xml that was being parsed and the SAXParseException object that was caught. |
| 504 | * The messages are printed to DebugStream, so run GLI/FLI with -debug to view this output. |
| 505 | * @param xmlContents is the entire xml that was being parsed when the exception occurred |
| 506 | * @param e is the SAXParseException object that was thrown upon parsing the xmlContents. |
| 507 | */ |
| 508 | public static void showXMLParseFailureLine(SAXParseException e, String xmlContents) { |
| 509 | |
| 510 | // There should be no characters at all that preceed the <?xml>... bit. |
| 511 | // The first check is for starting spaces: |
| 512 | if(xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t")) { |
| 513 | DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n"); |
| 514 | DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents); |
| 515 | return; // nothing more to do, first error identified |
| 516 | } |
| 517 | |
| 518 | // the actual line (String literal) where parsing failed and the SAXParseException occurred. |
| 519 | String line = ""; |
| 520 | int linenumber = e.getLineNumber(); |
| 521 | DebugStream.println("\n****SAXParseException on LINE NUMBER: " + linenumber); |
| 522 | if(DebugStream.isDebuggingEnabled()) { |
| 523 | if(linenumber != -1) { |
| 524 | // find the line in xmlContents string (xmlContents is only set if GLI is run with debugging turned on) |
| 525 | int start = 0; |
| 526 | int end = xmlContents.length(); |
| 527 | for(int i = 1; i <= linenumber; i++) { |
| 528 | end = xmlContents.indexOf("\n"); |
| 529 | if(end > 0) { |
| 530 | line = xmlContents.substring(start, end); |
| 531 | } |
| 532 | start = end+1; |
| 533 | } |
| 534 | DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END"); |
| 535 | DebugStream.println("SAXParseException message: " + e.getMessage() + "\n"); |
| 536 | } else { // no particular line number, print out all the xml so debugger can inspect it |
| 537 | DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" |
| 538 | + xmlContents + "\n************END\n"); |
| 539 | } |
| 540 | // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? |
| 541 | DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML..."); |
| 542 | System.exit(-1); |
| 543 | } else { // not running in debug mode |
| 544 | System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed."); |
| 545 | } |
| 546 | } |
| 547 | |
562 | | try { |
563 | | OutputStream os = new FileOutputStream (xml_file); |
564 | | // Create an output format for our document. |
565 | | OutputFormat f = new OutputFormat (document); |
566 | | f.setEncoding ("UTF-8"); |
567 | | f.setIndenting (true); |
568 | | f.setLineWidth (0); // Why isn't this working! |
569 | | f.setPreserveSpace (false); |
570 | | // Create the necessary writer stream for serialization. |
571 | | OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8"); |
572 | | Writer w = new BufferedWriter (osw); |
573 | | // Generate a new serializer from the above. |
574 | | XMLSerializer s = new XMLSerializer (w, f); |
575 | | s.asDOMSerializer (); |
576 | | // Finally serialize the document to file. |
577 | | s.serialize (document); |
578 | | // And close. |
579 | | os.close (); |
580 | | } |
581 | | catch (Exception exception) { |
582 | | DebugStream.printStackTrace (exception); |
583 | | } |
584 | | } |
585 | | |
| 661 | writeXMLFile(xml_file, document, null); |
| 662 | } |
| 663 | |