Changeset 18170

Show
Ignore:
Timestamp:
10.12.2008 15:30:13 (11 years ago)
Author:
oranfry
Message:

merged in code to do error reporting on xml parsing, even without debug mode on

Location:
gli/trunk
Files:
1 added
2 modified

Legend:

Unmodified
Added
Removed
  • gli/trunk

    • Property svn:ignore
      •  

        old new  
        11jar 
         2GLIServer.jar 
        23GLI.jar 
        3 GLIServer.jar 
  • gli/trunk/src/org/greenstone/gatherer/util/XMLTools.java

    r16988 r18170  
    261261    static final public String NOTWELLFORMED= "not well-formed"; 
    262262    static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>"; 
    263     static final private String FOOTER = "</collectionConfig>"; 
     263    static final private String FOOTER = "</collectionConfig>"; 
    264264        
    265265public static String parse (String xml_str) { 
     
    332332            SAXParser parser = factory.newSAXParser (); 
    333333            FileReader r = new FileReader(xml_file); 
    334             InputSource iSource = new InputSource(r); 
     334            InputSource iSource = new InputSource(r); 
    335335            XMLReader reader = parser.getXMLReader (); 
    336336            reader.setContentHandler(new DefaultHandler()); 
     
    410410        try { 
    411411            InputStreamReader isr = new InputStreamReader (xml_input_stream, "UTF-8"); 
    412             Reader xml_reader = new BufferedReader (isr); 
    413             document = parseXML (xml_reader); 
     412            document = parseXML(isr); 
    414413            isr.close (); 
    415414            xml_input_stream.close (); 
     
    425424    /** Parse an XML document from a given reader */ 
    426425    static public Document parseXML (Reader xml_reader) { 
    427     // If debugging, the following will store the XML contents to be parsed,  
    428     // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on) 
    429     String xmlContents = ""; 
    430  
    431     Document document = null; 
    432     try { 
    433         // (1) In case parsing exceptions are thrown (SAX Exceptions), we want to get some 
    434         // idea of where things went wrong. This will print the "XML" contents to either 
    435         // system.out (if debugging is off) or to the DebugStream otherwise. 
    436         // We need to read the XML twice to know the line where things went wrong, so 
    437         // do the additional reading only if we're debugging  
    438         if(DebugStream.isDebuggingEnabled()) {       
    439         StringBuffer buf = new StringBuffer(); 
    440         char[] buffer = new char[500]; 
    441         int numCharsRead = xml_reader.read(buffer, 0, buffer.length);  
    442         while(numCharsRead != -1) { 
    443             buf.append(buffer, 0, numCharsRead); 
    444             numCharsRead = xml_reader.read(buffer, 0, buffer.length);  
    445         } 
    446         xmlContents = buf.toString(); 
    447         xml_reader.close(); // closing the old Reader 
    448         xml_reader = null; 
    449         buffer = null; 
    450         buf = null; 
    451         // we need a Reader to parse the same contents as the Reader that was just closed 
    452         xml_reader = new StringReader(xmlContents); 
    453         } 
    454          
    455         // (2) The actual XML parsing 
    456             InputSource isc       = new InputSource (xml_reader); 
    457         DOMParser parser      = new DOMParser (); 
    458         parser.setFeature ("http://xml.org/sax/features/validation", false); 
    459             parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 
    460             // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. 
    461             parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true); 
    462             parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); 
    463             parser.parse (isc); 
    464             document = parser.getDocument (); 
    465          
    466     } catch(SAXParseException e) { 
    467         showXMLParseFailureLine(e, xmlContents); 
    468     } catch (SAXException exception) { 
    469         System.err.println ("SAX exception: " + exception.getMessage ()); 
    470         if(DebugStream.isDebuggingEnabled()) { 
    471         DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"  
    472                     + xmlContents + "\n************END\n"); 
    473         // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 
    474         DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");  
    475         System.exit(-1); 
    476         }  
    477         // else, not running in debug mode, so don't exit after exception   
    478         System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed."); 
    479         DebugStream.printStackTrace (exception); 
     426        Document document = null; 
     427 
     428    // If debugging, the following will store the XML contents to be parsed,  
     429    // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on) 
     430    String xmlContents = ""; 
     431 
     432    try { 
     433        Reader reader = null; 
     434 
     435        // (1) By default, GLI will remove any contents preceeding (and invalidating)  
     436        // the XML and present these lines separately to the user 
     437        if(!DebugStream.isDebuggingEnabled()) { 
     438            try { 
     439                reader = new BufferedReader( new RemoveContentBeforeRootElementXMLReader(xml_reader) ); 
     440            } catch ( Exception e ) { 
     441                System.err.println( "Exception while wrapping the reader in parseXML(Reader)" ); 
     442                e.printStackTrace(); 
     443            } 
     444        }  
     445 
     446        // (2) If we are running GLI in debug mode: 
     447        // In case parsing exceptions are thrown (SAX Exceptions), we want to get some 
     448        // idea of where things went wrong. This will print the "XML" contents to either 
     449        // system.out (if debugging is off) or to the DebugStream otherwise. 
     450        // We need to read the XML twice to know the line where things went wrong, so 
     451        // do the additional reading only if we're debugging  
     452        else { 
     453            StringBuffer buf = new StringBuffer(); 
     454            char[] buffer = new char[500]; 
     455            int numCharsRead = xml_reader.read(buffer, 0, buffer.length);  
     456            while(numCharsRead != -1) { 
     457                buf.append(buffer, 0, numCharsRead); 
     458                numCharsRead = xml_reader.read(buffer, 0, buffer.length);  
     459            } 
     460            xmlContents = buf.toString(); 
     461            xml_reader.close(); // closing the old Reader 
     462            xml_reader = null; 
     463            buffer = null; 
     464            buf = null; 
     465            // we need a Reader to parse the same contents as the Reader that was just closed 
     466            reader = new BufferedReader(new StringReader(xmlContents)); 
     467        } 
     468         
     469        // (2) The actual XML parsing 
     470        InputSource isc       = new InputSource (reader); 
     471        DOMParser parser      = new DOMParser (); 
     472        parser.setFeature ("http://xml.org/sax/features/validation", false); 
     473        parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 
     474        // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. 
     475        parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true); 
     476        parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); 
     477        parser.parse (isc); 
     478        document = parser.getDocument (); 
     479         
     480    } catch(SAXParseException e) { 
     481        showXMLParseFailureLine(e, xmlContents); 
     482    } catch (SAXException exception) { 
     483        System.err.println ("SAX exception: " + exception.getMessage ()); 
     484        if(DebugStream.isDebuggingEnabled()) { 
     485        DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"  
     486                    + xmlContents + "\n************END\n"); 
     487        // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 
     488        DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");  
     489        System.exit(-1); 
     490        }  
     491        // else, not running in debug mode, so don't exit after exception   
     492        System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed."); 
     493        DebugStream.printStackTrace (exception); 
    480494        } 
    481495        catch (Exception exception) { 
     
    493507     */ 
    494508    public static void showXMLParseFailureLine(SAXParseException e, String xmlContents) { 
    495      
    496     // There should be no characters at all that preceed the <?xml>... bit.  
    497     // The first check is for starting spaces: 
    498     if(xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t")) { 
    499         DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n"); 
    500         DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents); 
    501         return; // nothing more to do, first error identified 
    502     } 
    503  
    504     // the actual line (String literal) where parsing failed and the SAXParseException occurred. 
    505     String line = ""; 
    506     int linenumber = e.getLineNumber(); 
    507     DebugStream.println("\n****SAXParseException on LINE NUMBER: " + linenumber); 
    508     if(DebugStream.isDebuggingEnabled()) { 
    509         if(linenumber != -1) {  
    510         // find the line in xmlContents string (xmlContents is only set if GLI is run with debugging turned on) 
    511         int start = 0; 
    512         int end = xmlContents.length(); 
    513         for(int i = 1; i <= linenumber; i++) { 
    514             end = xmlContents.indexOf("\n"); 
    515             if(end > 0) { 
    516             line = xmlContents.substring(start, end); 
    517             } 
    518             start = end+1; 
    519         } 
    520         DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END"); 
    521         DebugStream.println("SAXParseException message: " + e.getMessage() + "\n"); 
    522         } else { // no particular line number, print out all the xml so debugger can inspect it 
    523         DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"  
    524                     + xmlContents + "\n************END\n"); 
    525         } 
    526         // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 
    527         DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");  
    528         System.exit(-1); 
    529     } else { // not running in debug mode 
    530         System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed."); 
    531     } 
     509     
     510        // There should be no characters at all that preceed the <?xml>... bit.  
     511        // The first check is for starting spaces: 
     512        if(xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t")) { 
     513            DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n"); 
     514            DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents); 
     515            return; // nothing more to do, first error identified 
     516        } 
     517     
     518        // the actual line (String literal) where parsing failed and the SAXParseException occurred. 
     519        String line = ""; 
     520        int linenumber = e.getLineNumber(); 
     521        DebugStream.println("\n****SAXParseException on LINE NUMBER: " + linenumber); 
     522        if(DebugStream.isDebuggingEnabled()) { 
     523            if(linenumber != -1) {  
     524                // find the line in xmlContents string (xmlContents is only set if GLI is run with debugging turned on) 
     525                int start = 0; 
     526                int end = xmlContents.length(); 
     527                for(int i = 1; i <= linenumber; i++) { 
     528                    end = xmlContents.indexOf("\n"); 
     529                    if(end > 0) { 
     530                        line = xmlContents.substring(start, end); 
     531                    } 
     532                    start = end+1; 
     533                } 
     534                DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END"); 
     535                DebugStream.println("SAXParseException message: " + e.getMessage() + "\n"); 
     536            } else { // no particular line number, print out all the xml so debugger can inspect it 
     537                DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"  
     538                    + xmlContents + "\n************END\n"); 
     539            } 
     540            // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 
     541            DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");  
     542            System.exit(-1); 
     543        } else { // not running in debug mode 
     544            System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed."); 
     545        } 
    532546    } 
    533547 
     
    625639            f.setPreserveSpace (false); 
    626640            if (nonEscapingTagNames != null) { 
    627           f.setNonEscapingElements (nonEscapingTagNames); 
    628         } 
     641          f.setNonEscapingElements (nonEscapingTagNames); 
     642        } 
    629643            // Create the necessary writer stream for serialization. 
    630644            OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8"); 
     
    797811            int len = children.getLength (); 
    798812            for (int i = 0; i < len; i++) { 
    799           if (depth >= 0) { 
     813          if (depth >= 0) { 
    800814                xmlNodeToStringWithoutNewline (sb,children.item (i), depth + 1); 
    801           } else { 
    802         xmlNodeToStringWithoutNewline (sb,children.item (i), depth); 
    803           } 
     815          } else { 
     816        xmlNodeToStringWithoutNewline (sb,children.item (i), depth); 
     817          } 
    804818            } 
    805819