Changeset 15581

Show
Ignore:
Timestamp:
19.05.2008 17:36:48 (11 years ago)
Author:
ak19
Message:

Changed method parseXML(Reader r) to tell us where XML parsing went wrong, as long as GLI/FLI is run with -debug. Added method showXMLParseFailureLine to show the textual contents of the line where parsing failed, also when GLI/FLI is run with -debug.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gli/trunk/src/org/greenstone/gatherer/util/XMLTools.java

    r14053 r15581  
    372372    } // getLocationString(SAXParseException):String 
    373373 
     374 
    374375    /** Parse an XML document from a given file path */ 
    375376    static public Document parseXMLFile (String xml_file_path, boolean use_class_loader) { 
     
    424425    /** Parse an XML document from a given reader */ 
    425426    static public Document parseXML (Reader xml_reader) { 
    426         Document document = null; 
    427          
    428         try { 
     427    // If debugging, the following will store the XML contents to be parsed,  
     428    // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on) 
     429    String xmlContents = ""; 
     430 
     431    Document document = null; 
     432    try { 
     433        // (1) In case parsing exceptions are thrown (SAX Exceptions), we want to get some 
     434        // idea of where things went wrong. This will print the "XML" contents to either 
     435        // system.out (if debugging is off) or to the DebugStream otherwise. 
     436        // We need to read the XML twice to know the line where things went wrong, so 
     437        // do the additional reading only if we're debugging  
     438        if(DebugStream.isDebuggingEnabled()) {       
     439        StringBuffer buf = new StringBuffer(); 
     440        char[] buffer = new char[500]; 
     441        int numCharsRead = xml_reader.read(buffer, 0, buffer.length);  
     442        while(numCharsRead != -1) { 
     443            buf.append(buffer, 0, numCharsRead); 
     444            numCharsRead = xml_reader.read(buffer, 0, buffer.length);  
     445        } 
     446        xmlContents = buf.toString(); 
     447        xml_reader.close(); // closing the old Reader 
     448        xml_reader = null; 
     449        buffer = null; 
     450        buf = null; 
     451        // we need a Reader to parse the same contents as the Reader that was just closed 
     452        xml_reader = new StringReader(xmlContents); 
     453        } 
     454         
     455        // (2) The actual XML parsing 
    429456            InputSource isc       = new InputSource (xml_reader); 
    430             DOMParser parser      = new DOMParser (); 
    431             parser.setFeature ("http://xml.org/sax/features/validation", false); 
     457        DOMParser parser      = new DOMParser (); 
     458        parser.setFeature ("http://xml.org/sax/features/validation", false); 
    432459            parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 
    433460            // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. 
     
    436463            parser.parse (isc); 
    437464            document = parser.getDocument (); 
    438         } 
    439         catch (SAXException exception) { 
    440             System.err.println ("SAX exception: " + exception.getMessage ()); 
    441             DebugStream.printStackTrace (exception); 
     465         
     466    } catch(SAXParseException e) { 
     467        showXMLParseFailureLine(e, xmlContents); 
     468    } catch (SAXException exception) { 
     469        System.err.println ("SAX exception: " + exception.getMessage ()); 
     470        if(DebugStream.isDebuggingEnabled()) { 
     471        DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"  
     472                    + xmlContents + "\n************END\n"); 
     473        // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 
     474        DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");  
     475        System.exit(-1); 
     476        }  
     477        // else, not running in debug mode, so don't exit after exception   
     478        System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed."); 
     479        DebugStream.printStackTrace (exception); 
    442480        } 
    443481        catch (Exception exception) { 
     
    447485        return document; 
    448486    } 
    449      
     487 
     488    /** Displays the line (string) where the SAXParseException occurred, given a String of the  
     489     * entire xml that was being parsed and the SAXParseException object that was caught.  
     490     * The messages are printed to DebugStream, so run GLI/FLI with -debug to view this output.  
     491     * @param xmlContents is the entire xml that was being parsed when the exception occurred 
     492     * @param e is the SAXParseException object that was thrown upon parsing the xmlContents. 
     493     */ 
     494    public static void showXMLParseFailureLine(SAXParseException e, String xmlContents) { 
     495    // the actual line (String literal) where parsing failed and the SAXParseException occurred. 
     496    String line = ""; 
     497    int linenumber = e.getLineNumber(); 
     498    DebugStream.println("\n****SAXParseException on LINE NUMBER: " + linenumber); 
     499    if(DebugStream.isDebuggingEnabled()) { 
     500        if(linenumber != -1) {  
     501        // find the line in xmlContents string (xmlContents is only set if GLI is run with debugging turned on) 
     502        int start = 0; 
     503        int end = xmlContents.length(); 
     504        for(int i = 1; i <= linenumber; i++) { 
     505            end = xmlContents.indexOf("\n"); 
     506            line = xmlContents.substring(start, end); 
     507            start = end+1; 
     508        } 
     509        DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END"); 
     510        DebugStream.println("SAXParseException message: " + e.getMessage() + "\n"); 
     511        } else { // no particular line number, print out all the xml so debugger can inspect it 
     512        DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"  
     513                    + xmlContents + "\n************END\n"); 
     514        } 
     515        // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 
     516        DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");  
     517        System.exit(-1); 
     518    } else { // not running in debug mode 
     519        System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed."); 
     520    } 
     521    } 
     522 
    450523     
    451524    static public StringBuffer readXMLStream (InputStream input_stream) { 
     
    735808            sb.append ("</" + e.getNodeName () + ">"); 
    736809        } 
    737          
    738          
    739     } 
    740      
     810    } 
    741811}