- Timestamp:
- 2008-12-10T15:30:13+13:00 (15 years ago)
- Location:
- gli/trunk
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gli/trunk
- Property svn:ignore
-
old new 1 1 jar 2 GLIServer.jar 2 3 GLI.jar 3 GLIServer.jar
-
- Property svn:ignore
-
gli/trunk/src/org/greenstone/gatherer/util/XMLTools.java
r16988 r18170 261 261 static final public String NOTWELLFORMED= "not well-formed"; 262 262 static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>"; 263 263 static final private String FOOTER = "</collectionConfig>"; 264 264 265 265 public static String parse (String xml_str) { … … 332 332 SAXParser parser = factory.newSAXParser (); 333 333 FileReader r = new FileReader(xml_file); 334 334 InputSource iSource = new InputSource(r); 335 335 XMLReader reader = parser.getXMLReader (); 336 336 reader.setContentHandler(new DefaultHandler()); … … 410 410 try { 411 411 InputStreamReader isr = new InputStreamReader (xml_input_stream, "UTF-8"); 412 Reader xml_reader = new BufferedReader (isr); 413 document = parseXML (xml_reader); 412 document = parseXML(isr); 414 413 isr.close (); 415 414 xml_input_stream.close (); … … 425 424 /** Parse an XML document from a given reader */ 426 425 static public Document parseXML (Reader xml_reader) { 427 // If debugging, the following will store the XML contents to be parsed, 428 // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on) 429 String xmlContents = ""; 430 431 Document document = null; 432 try { 433 // (1) In case parsing exceptions are thrown (SAX Exceptions), we want to get some 434 // idea of where things went wrong. This will print the "XML" contents to either 435 // system.out (if debugging is off) or to the DebugStream otherwise. 436 // We need to read the XML twice to know the line where things went wrong, so 437 // do the additional reading only if we're debugging 438 if(DebugStream.isDebuggingEnabled()) { 439 StringBuffer buf = new StringBuffer(); 440 char[] buffer = new char[500]; 441 int numCharsRead = xml_reader.read(buffer, 0, buffer.length); 442 while(numCharsRead != -1) { 443 buf.append(buffer, 0, numCharsRead); 444 numCharsRead = xml_reader.read(buffer, 0, buffer.length); 445 } 446 xmlContents = buf.toString(); 447 xml_reader.close(); // closing the old Reader 448 xml_reader = null; 449 buffer = null; 450 buf = null; 451 // we need a Reader to parse the same contents as the Reader that was just closed 452 xml_reader = new StringReader(xmlContents); 453 } 454 455 // (2) The actual XML parsing 456 InputSource isc = new InputSource (xml_reader); 457 DOMParser parser = new DOMParser (); 458 parser.setFeature ("http://xml.org/sax/features/validation", false); 459 parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 460 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. 461 parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true); 462 parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); 463 parser.parse (isc); 464 document = parser.getDocument (); 465 466 } catch(SAXParseException e) { 467 showXMLParseFailureLine(e, xmlContents); 468 } catch (SAXException exception) { 469 System.err.println ("SAX exception: " + exception.getMessage ()); 470 if(DebugStream.isDebuggingEnabled()) { 471 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" 472 + xmlContents + "\n************END\n"); 473 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 474 DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML..."); 475 System.exit(-1); 476 } 477 // else, not running in debug mode, so don't exit after exception 478 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed."); 479 DebugStream.printStackTrace (exception); 426 Document document = null; 427 428 // If debugging, the following will store the XML contents to be parsed, 429 // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on) 430 String xmlContents = ""; 431 432 try { 433 Reader reader = null; 434 435 // (1) By default, GLI will remove any contents preceeding (and invalidating) 436 // the XML and present these lines separately to the user 437 if(!DebugStream.isDebuggingEnabled()) { 438 try { 439 reader = new BufferedReader( new RemoveContentBeforeRootElementXMLReader(xml_reader) ); 440 } catch ( Exception e ) { 441 System.err.println( "Exception while wrapping the reader in parseXML(Reader)" ); 442 e.printStackTrace(); 443 } 444 } 445 446 // (2) If we are running GLI in debug mode: 447 // In case parsing exceptions are thrown (SAX Exceptions), we want to get some 448 // idea of where things went wrong. This will print the "XML" contents to either 449 // system.out (if debugging is off) or to the DebugStream otherwise. 450 // We need to read the XML twice to know the line where things went wrong, so 451 // do the additional reading only if we're debugging 452 else { 453 StringBuffer buf = new StringBuffer(); 454 char[] buffer = new char[500]; 455 int numCharsRead = xml_reader.read(buffer, 0, buffer.length); 456 while(numCharsRead != -1) { 457 buf.append(buffer, 0, numCharsRead); 458 numCharsRead = xml_reader.read(buffer, 0, buffer.length); 459 } 460 xmlContents = buf.toString(); 461 xml_reader.close(); // closing the old Reader 462 xml_reader = null; 463 buffer = null; 464 buf = null; 465 // we need a Reader to parse the same contents as the Reader that was just closed 466 reader = new BufferedReader(new StringReader(xmlContents)); 467 } 468 469 // (2) The actual XML parsing 470 InputSource isc = new InputSource (reader); 471 DOMParser parser = new DOMParser (); 472 parser.setFeature ("http://xml.org/sax/features/validation", false); 473 parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 474 // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. 475 parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true); 476 parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); 477 parser.parse (isc); 478 document = parser.getDocument (); 479 480 } catch(SAXParseException e) { 481 showXMLParseFailureLine(e, xmlContents); 482 } catch (SAXException exception) { 483 System.err.println ("SAX exception: " + exception.getMessage ()); 484 if(DebugStream.isDebuggingEnabled()) { 485 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" 486 + xmlContents + "\n************END\n"); 487 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 488 DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML..."); 489 System.exit(-1); 490 } 491 // else, not running in debug mode, so don't exit after exception 492 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed."); 493 DebugStream.printStackTrace (exception); 480 494 } 481 495 catch (Exception exception) { … … 493 507 */ 494 508 public static void showXMLParseFailureLine(SAXParseException e, String xmlContents) { 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 509 510 // There should be no characters at all that preceed the <?xml>... bit. 511 // The first check is for starting spaces: 512 if(xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t")) { 513 DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n"); 514 DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents); 515 return; // nothing more to do, first error identified 516 } 517 518 // the actual line (String literal) where parsing failed and the SAXParseException occurred. 519 String line = ""; 520 int linenumber = e.getLineNumber(); 521 DebugStream.println("\n****SAXParseException on LINE NUMBER: " + linenumber); 522 if(DebugStream.isDebuggingEnabled()) { 523 if(linenumber != -1) { 524 // find the line in xmlContents string (xmlContents is only set if GLI is run with debugging turned on) 525 int start = 0; 526 int end = xmlContents.length(); 527 for(int i = 1; i <= linenumber; i++) { 528 end = xmlContents.indexOf("\n"); 529 if(end > 0) { 530 line = xmlContents.substring(start, end); 531 } 532 start = end+1; 533 } 534 DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END"); 535 DebugStream.println("SAXParseException message: " + e.getMessage() + "\n"); 536 } else { // no particular line number, print out all the xml so debugger can inspect it 537 DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n" 538 + xmlContents + "\n************END\n"); 539 } 540 // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 541 DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML..."); 542 System.exit(-1); 543 } else { // not running in debug mode 544 System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed."); 545 } 532 546 } 533 547 … … 625 639 f.setPreserveSpace (false); 626 640 if (nonEscapingTagNames != null) { 627 628 641 f.setNonEscapingElements (nonEscapingTagNames); 642 } 629 643 // Create the necessary writer stream for serialization. 630 644 OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8"); … … 797 811 int len = children.getLength (); 798 812 for (int i = 0; i < len; i++) { 799 813 if (depth >= 0) { 800 814 xmlNodeToStringWithoutNewline (sb,children.item (i), depth + 1); 801 802 803 815 } else { 816 xmlNodeToStringWithoutNewline (sb,children.item (i), depth); 817 } 804 818 } 805 819
Note:
See TracChangeset
for help on using the changeset viewer.