Changeset 18170 for gli/trunk


Ignore:
Timestamp:
2008-12-10T15:30:13+13:00 (15 years ago)
Author:
oranfry
Message:

merged in code to do error reporting on xml parsing, even without debug mode on

Location:
gli/trunk
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • gli/trunk

    • Property svn:ignore
      •  

        old new  
        11jar
         2GLIServer.jar
        23GLI.jar
        3 GLIServer.jar
  • gli/trunk/src/org/greenstone/gatherer/util/XMLTools.java

    r16988 r18170  
    261261    static final public String NOTWELLFORMED= "not well-formed";
    262262    static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
    263     static final private String FOOTER = "</collectionConfig>";
     263    static final private String FOOTER = "</collectionConfig>";
    264264       
    265265public static String parse (String xml_str) {
     
    332332            SAXParser parser = factory.newSAXParser ();
    333333            FileReader r = new FileReader(xml_file);
    334             InputSource iSource = new InputSource(r);
     334            InputSource iSource = new InputSource(r);
    335335            XMLReader reader = parser.getXMLReader ();
    336336            reader.setContentHandler(new DefaultHandler());
     
    410410        try {
    411411            InputStreamReader isr = new InputStreamReader (xml_input_stream, "UTF-8");
    412             Reader xml_reader = new BufferedReader (isr);
    413             document = parseXML (xml_reader);
     412            document = parseXML(isr);
    414413            isr.close ();
    415414            xml_input_stream.close ();
     
    425424    /** Parse an XML document from a given reader */
    426425    static public Document parseXML (Reader xml_reader) {
    427     // If debugging, the following will store the XML contents to be parsed,
    428     // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
    429     String xmlContents = "";
    430 
    431     Document document = null;
    432     try {
    433         // (1) In case parsing exceptions are thrown (SAX Exceptions), we want to get some
    434         // idea of where things went wrong. This will print the "XML" contents to either
    435         // system.out (if debugging is off) or to the DebugStream otherwise.
    436         // We need to read the XML twice to know the line where things went wrong, so
    437         // do the additional reading only if we're debugging
    438         if(DebugStream.isDebuggingEnabled()) {     
    439         StringBuffer buf = new StringBuffer();
    440         char[] buffer = new char[500];
    441         int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
    442         while(numCharsRead != -1) {
    443             buf.append(buffer, 0, numCharsRead);
    444             numCharsRead = xml_reader.read(buffer, 0, buffer.length);
    445         }
    446         xmlContents = buf.toString();
    447         xml_reader.close(); // closing the old Reader
    448         xml_reader = null;
    449         buffer = null;
    450         buf = null;
    451         // we need a Reader to parse the same contents as the Reader that was just closed
    452         xml_reader = new StringReader(xmlContents);
    453         }
    454        
    455         // (2) The actual XML parsing
    456             InputSource isc       = new InputSource (xml_reader);
    457         DOMParser parser      = new DOMParser ();
    458         parser.setFeature ("http://xml.org/sax/features/validation", false);
    459             parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
    460             // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
    461             parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true);
    462             parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
    463             parser.parse (isc);
    464             document = parser.getDocument ();
    465        
    466     } catch(SAXParseException e) {
    467         showXMLParseFailureLine(e, xmlContents);
    468     } catch (SAXException exception) {
    469         System.err.println ("SAX exception: " + exception.getMessage ());
    470         if(DebugStream.isDebuggingEnabled()) {
    471         DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"
    472                     + xmlContents + "\n************END\n");
    473         // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
    474         DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
    475         System.exit(-1);
    476         }
    477         // else, not running in debug mode, so don't exit after exception 
    478         System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
    479         DebugStream.printStackTrace (exception);
     426        Document document = null;
     427
     428    // If debugging, the following will store the XML contents to be parsed,
     429    // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
     430    String xmlContents = "";
     431
     432    try {
     433        Reader reader = null;
     434
     435        // (1) By default, GLI will remove any contents preceeding (and invalidating)
     436        // the XML and present these lines separately to the user
     437        if(!DebugStream.isDebuggingEnabled()) {
     438            try {
     439                reader = new BufferedReader( new RemoveContentBeforeRootElementXMLReader(xml_reader) );
     440            } catch ( Exception e ) {
     441                System.err.println( "Exception while wrapping the reader in parseXML(Reader)" );
     442                e.printStackTrace();
     443            }
     444        }
     445
     446        // (2) If we are running GLI in debug mode:
     447        // In case parsing exceptions are thrown (SAX Exceptions), we want to get some
     448        // idea of where things went wrong. This will print the "XML" contents to either
     449        // system.out (if debugging is off) or to the DebugStream otherwise.
     450        // We need to read the XML twice to know the line where things went wrong, so
     451        // do the additional reading only if we're debugging
     452        else {
     453            StringBuffer buf = new StringBuffer();
     454            char[] buffer = new char[500];
     455            int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
     456            while(numCharsRead != -1) {
     457                buf.append(buffer, 0, numCharsRead);
     458                numCharsRead = xml_reader.read(buffer, 0, buffer.length);
     459            }
     460            xmlContents = buf.toString();
     461            xml_reader.close(); // closing the old Reader
     462            xml_reader = null;
     463            buffer = null;
     464            buf = null;
     465            // we need a Reader to parse the same contents as the Reader that was just closed
     466            reader = new BufferedReader(new StringReader(xmlContents));
     467        }
     468       
     469        // (2) The actual XML parsing
     470        InputSource isc       = new InputSource (reader);
     471        DOMParser parser      = new DOMParser ();
     472        parser.setFeature ("http://xml.org/sax/features/validation", false);
     473        parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
     474        // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
     475        parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true);
     476        parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
     477        parser.parse (isc);
     478        document = parser.getDocument ();
     479       
     480    } catch(SAXParseException e) {
     481        showXMLParseFailureLine(e, xmlContents);
     482    } catch (SAXException exception) {
     483        System.err.println ("SAX exception: " + exception.getMessage ());
     484        if(DebugStream.isDebuggingEnabled()) {
     485        DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"
     486                    + xmlContents + "\n************END\n");
     487        // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
     488        DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
     489        System.exit(-1);
     490        }
     491        // else, not running in debug mode, so don't exit after exception 
     492        System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
     493        DebugStream.printStackTrace (exception);
    480494        }
    481495        catch (Exception exception) {
     
    493507     */
    494508    public static void showXMLParseFailureLine(SAXParseException e, String xmlContents) {
    495    
    496     // There should be no characters at all that preceed the <?xml>... bit.
    497     // The first check is for starting spaces:
    498     if(xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t")) {
    499         DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
    500         DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
    501         return; // nothing more to do, first error identified
    502     }
    503 
    504     // the actual line (String literal) where parsing failed and the SAXParseException occurred.
    505     String line = "";
    506     int linenumber = e.getLineNumber();
    507     DebugStream.println("\n****SAXParseException on LINE NUMBER: " + linenumber);
    508     if(DebugStream.isDebuggingEnabled()) {
    509         if(linenumber != -1) {
    510         // find the line in xmlContents string (xmlContents is only set if GLI is run with debugging turned on)
    511         int start = 0;
    512         int end = xmlContents.length();
    513         for(int i = 1; i <= linenumber; i++) {
    514             end = xmlContents.indexOf("\n");
    515             if(end > 0) {
    516             line = xmlContents.substring(start, end);
    517             }
    518             start = end+1;
    519         }
    520         DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
    521         DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
    522         } else { // no particular line number, print out all the xml so debugger can inspect it
    523         DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"
    524                     + xmlContents + "\n************END\n");
    525         }
    526         // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
    527         DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
    528         System.exit(-1);
    529     } else { // not running in debug mode
    530         System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
    531     }
     509   
     510        // There should be no characters at all that preceed the <?xml>... bit.
     511        // The first check is for starting spaces:
     512        if(xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t")) {
     513            DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
     514            DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
     515            return; // nothing more to do, first error identified
     516        }
     517   
     518        // the actual line (String literal) where parsing failed and the SAXParseException occurred.
     519        String line = "";
     520        int linenumber = e.getLineNumber();
     521        DebugStream.println("\n****SAXParseException on LINE NUMBER: " + linenumber);
     522        if(DebugStream.isDebuggingEnabled()) {
     523            if(linenumber != -1) {
     524                // find the line in xmlContents string (xmlContents is only set if GLI is run with debugging turned on)
     525                int start = 0;
     526                int end = xmlContents.length();
     527                for(int i = 1; i <= linenumber; i++) {
     528                    end = xmlContents.indexOf("\n");
     529                    if(end > 0) {
     530                        line = xmlContents.substring(start, end);
     531                    }
     532                    start = end+1;
     533                }
     534                DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
     535                DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
     536            } else { // no particular line number, print out all the xml so debugger can inspect it
     537                DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"
     538                    + xmlContents + "\n************END\n");
     539            }
     540            // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
     541            DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
     542            System.exit(-1);
     543        } else { // not running in debug mode
     544            System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
     545        }
    532546    }
    533547
     
    625639            f.setPreserveSpace (false);
    626640            if (nonEscapingTagNames != null) {
    627           f.setNonEscapingElements (nonEscapingTagNames);
    628         }
     641          f.setNonEscapingElements (nonEscapingTagNames);
     642        }
    629643            // Create the necessary writer stream for serialization.
    630644            OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8");
     
    797811            int len = children.getLength ();
    798812            for (int i = 0; i < len; i++) {
    799           if (depth >= 0) {
     813          if (depth >= 0) {
    800814                xmlNodeToStringWithoutNewline (sb,children.item (i), depth + 1);
    801           } else {
    802         xmlNodeToStringWithoutNewline (sb,children.item (i), depth);
    803           }
     815          } else {
     816        xmlNodeToStringWithoutNewline (sb,children.item (i), depth);
     817          }
    804818            }
    805819           
Note: See TracChangeset for help on using the changeset viewer.