Changeset 15581 for gli/trunk


Ignore:
Timestamp:
2008-05-19T17:36:48+12:00 (16 years ago)
Author:
ak19
Message:

Changed method parseXML(Reader r) to tell us where XML parsing went wrong, as long as GLI/FLI is run with -debug. Added method showXMLParseFailureLine to show the textual contents of the line where parsing failed, also when GLI/FLI is run with -debug.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gli/trunk/src/org/greenstone/gatherer/util/XMLTools.java

    r14053 r15581  
    372372    } // getLocationString(SAXParseException):String
    373373
     374
    374375    /** Parse an XML document from a given file path */
    375376    static public Document parseXMLFile (String xml_file_path, boolean use_class_loader) {
     
    424425    /** Parse an XML document from a given reader */
    425426    static public Document parseXML (Reader xml_reader) {
    426         Document document = null;
    427        
    428         try {
     427    // If debugging, the following will store the XML contents to be parsed,
     428    // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
     429    String xmlContents = "";
     430
     431    Document document = null;
     432    try {
     433        // (1) In case parsing exceptions are thrown (SAX Exceptions), we want to get some
     434        // idea of where things went wrong. This will print the "XML" contents to either
     435        // system.out (if debugging is off) or to the DebugStream otherwise.
     436        // We need to read the XML twice to know the line where things went wrong, so
     437        // do the additional reading only if we're debugging
     438        if(DebugStream.isDebuggingEnabled()) {     
     439        StringBuffer buf = new StringBuffer();
     440        char[] buffer = new char[500];
     441        int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
     442        while(numCharsRead != -1) {
     443            buf.append(buffer, 0, numCharsRead);
     444            numCharsRead = xml_reader.read(buffer, 0, buffer.length);
     445        }
     446        xmlContents = buf.toString();
     447        xml_reader.close(); // closing the old Reader
     448        xml_reader = null;
     449        buffer = null;
     450        buf = null;
     451        // we need a Reader to parse the same contents as the Reader that was just closed
     452        xml_reader = new StringReader(xmlContents);
     453        }
     454       
     455        // (2) The actual XML parsing
    429456            InputSource isc       = new InputSource (xml_reader);
    430             DOMParser parser      = new DOMParser ();
    431             parser.setFeature ("http://xml.org/sax/features/validation", false);
     457        DOMParser parser      = new DOMParser ();
     458        parser.setFeature ("http://xml.org/sax/features/validation", false);
    432459            parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
    433460            // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
     
    436463            parser.parse (isc);
    437464            document = parser.getDocument ();
    438         }
    439         catch (SAXException exception) {
    440             System.err.println ("SAX exception: " + exception.getMessage ());
    441             DebugStream.printStackTrace (exception);
     465       
     466    } catch(SAXParseException e) {
     467        showXMLParseFailureLine(e, xmlContents);
     468    } catch (SAXException exception) {
     469        System.err.println ("SAX exception: " + exception.getMessage ());
     470        if(DebugStream.isDebuggingEnabled()) {
     471        DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"
     472                    + xmlContents + "\n************END\n");
     473        // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
     474        DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
     475        System.exit(-1);
     476        }
     477        // else, not running in debug mode, so don't exit after exception 
     478        System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
     479        DebugStream.printStackTrace (exception);
    442480        }
    443481        catch (Exception exception) {
     
    447485        return document;
    448486    }
    449    
     487
     488    /** Displays the line (string) where the SAXParseException occurred, given a String of the
     489     * entire xml that was being parsed and the SAXParseException object that was caught.
     490     * The messages are printed to DebugStream, so run GLI/FLI with -debug to view this output.
     491     * @param xmlContents is the entire xml that was being parsed when the exception occurred
     492     * @param e is the SAXParseException object that was thrown upon parsing the xmlContents.
     493     */
     494    public static void showXMLParseFailureLine(SAXParseException e, String xmlContents) {
     495    // the actual line (String literal) where parsing failed and the SAXParseException occurred.
     496    String line = "";
     497    int linenumber = e.getLineNumber();
     498    DebugStream.println("\n****SAXParseException on LINE NUMBER: " + linenumber);
     499    if(DebugStream.isDebuggingEnabled()) {
     500        if(linenumber != -1) {
     501        // find the line in xmlContents string (xmlContents is only set if GLI is run with debugging turned on)
     502        int start = 0;
     503        int end = xmlContents.length();
     504        for(int i = 1; i <= linenumber; i++) {
     505            end = xmlContents.indexOf("\n");
     506            line = xmlContents.substring(start, end);
     507            start = end+1;
     508        }
     509        DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
     510        DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
     511        } else { // no particular line number, print out all the xml so debugger can inspect it
     512        DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"
     513                    + xmlContents + "\n************END\n");
     514        }
     515        // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
     516        DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
     517        System.exit(-1);
     518    } else { // not running in debug mode
     519        System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
     520    }
     521    }
     522
    450523   
    451524    static public StringBuffer readXMLStream (InputStream input_stream) {
     
    735808            sb.append ("</" + e.getNodeName () + ">");
    736809        }
    737        
    738        
    739     }
    740    
     810    }
    741811}
Note: See TracChangeset for help on using the changeset viewer.