Ignore:
Timestamp:
2009-01-12T11:17:33+13:00 (15 years ago)
Author:
kjdon
Message:

updated the rtl-gli branch with files from trunk. Result of a merge 14807:18318

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gli/branches/rtl-gli/src/org/greenstone/gatherer/util/XMLTools.java

    r14053 r18353  
    261261    static final public String NOTWELLFORMED= "not well-formed";
    262262    static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
    263     static final private String FOOTER = "</collectionConfig>";
     263    static final private String FOOTER = "</collectionConfig>";
    264264       
    265265public static String parse (String xml_str) {
     
    332332            SAXParser parser = factory.newSAXParser ();
    333333            FileReader r = new FileReader(xml_file);
    334             InputSource iSource = new InputSource(r);
     334            InputSource iSource = new InputSource(r);
    335335            XMLReader reader = parser.getXMLReader ();
    336336            reader.setContentHandler(new DefaultHandler());
     
    372372    } // getLocationString(SAXParseException):String
    373373
     374
    374375    /** Parse an XML document from a given file path */
    375376    static public Document parseXMLFile (String xml_file_path, boolean use_class_loader) {
     
    409410        try {
    410411            InputStreamReader isr = new InputStreamReader (xml_input_stream, "UTF-8");
    411             Reader xml_reader = new BufferedReader (isr);
    412             document = parseXML (xml_reader);
     412            document = parseXML(isr);
    413413            isr.close ();
    414414            xml_input_stream.close ();
     
    425425    static public Document parseXML (Reader xml_reader) {
    426426        Document document = null;
    427        
    428         try {
    429             InputSource isc       = new InputSource (xml_reader);
    430             DOMParser parser      = new DOMParser ();
    431             parser.setFeature ("http://xml.org/sax/features/validation", false);
    432             parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
    433             // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
    434             parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true);
    435             parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
    436             parser.parse (isc);
    437             document = parser.getDocument ();
    438         }
    439         catch (SAXException exception) {
    440             System.err.println ("SAX exception: " + exception.getMessage ());
    441             DebugStream.printStackTrace (exception);
     427
     428    // If debugging, the following will store the XML contents to be parsed,
     429    // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
     430    String xmlContents = "";
     431
     432    try {
     433        Reader reader = null;
     434
     435        // (1) By default, GLI will remove any contents preceeding (and invalidating)
     436        // the XML and present these lines separately to the user
     437        if(!DebugStream.isDebuggingEnabled()) {
     438            try {
     439                reader = new BufferedReader( new RemoveContentBeforeRootElementXMLReader(xml_reader) );
     440            } catch ( Exception e ) {
     441                System.err.println( "Exception while wrapping the reader in parseXML(Reader)" );
     442                e.printStackTrace();
     443            }
     444        }
     445
     446        // (2) If we are running GLI in debug mode:
     447        // In case parsing exceptions are thrown (SAX Exceptions), we want to get some
     448        // idea of where things went wrong. This will print the "XML" contents to either
     449        // system.out (if debugging is off) or to the DebugStream otherwise.
     450        // We need to read the XML twice to know the line where things went wrong, so
     451        // do the additional reading only if we're debugging
     452        else {
     453            StringBuffer buf = new StringBuffer();
     454            char[] buffer = new char[500];
     455            int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
     456            while(numCharsRead != -1) {
     457                buf.append(buffer, 0, numCharsRead);
     458                numCharsRead = xml_reader.read(buffer, 0, buffer.length);
     459            }
     460            xmlContents = buf.toString();
     461            xml_reader.close(); // closing the old Reader
     462            xml_reader = null;
     463            buffer = null;
     464            buf = null;
     465            // we need a Reader to parse the same contents as the Reader that was just closed
     466            reader = new BufferedReader(new StringReader(xmlContents));
     467        }
     468       
     469        // (2) The actual XML parsing
     470        InputSource isc       = new InputSource (reader);
     471        DOMParser parser      = new DOMParser ();
     472        parser.setFeature ("http://xml.org/sax/features/validation", false);
     473        parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
     474        // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
     475        parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true);
     476        parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
     477        parser.parse (isc);
     478        document = parser.getDocument ();
     479       
     480    } catch(SAXParseException e) {
     481        showXMLParseFailureLine(e, xmlContents);
     482    } catch (SAXException exception) {
     483        System.err.println ("SAX exception: " + exception.getMessage ());
     484        if(DebugStream.isDebuggingEnabled()) {
     485        DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"
     486                    + xmlContents + "\n************END\n");
     487        // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
     488        DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
     489        System.exit(-1);
     490        }
     491        // else, not running in debug mode, so don't exit after exception 
     492        System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
     493        DebugStream.printStackTrace (exception);
    442494        }
    443495        catch (Exception exception) {
     
    447499        return document;
    448500    }
    449    
     501
     502    /** Displays the line (string) where the SAXParseException occurred, given a String of the
     503     * entire xml that was being parsed and the SAXParseException object that was caught.
     504     * The messages are printed to DebugStream, so run GLI/FLI with -debug to view this output.
     505     * @param xmlContents is the entire xml that was being parsed when the exception occurred
     506     * @param e is the SAXParseException object that was thrown upon parsing the xmlContents.
     507     */
     508    public static void showXMLParseFailureLine(SAXParseException e, String xmlContents) {
     509   
     510        // There should be no characters at all that preceed the <?xml>... bit.
     511        // The first check is for starting spaces:
     512        if(xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t")) {
     513            DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
     514            DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
     515            return; // nothing more to do, first error identified
     516        }
     517   
     518        // the actual line (String literal) where parsing failed and the SAXParseException occurred.
     519        String line = "";
     520        int linenumber = e.getLineNumber();
     521        DebugStream.println("\n****SAXParseException on LINE NUMBER: " + linenumber);
     522        if(DebugStream.isDebuggingEnabled()) {
     523            if(linenumber != -1) {
     524                // find the line in xmlContents string (xmlContents is only set if GLI is run with debugging turned on)
     525                int start = 0;
     526                int end = xmlContents.length();
     527                for(int i = 1; i <= linenumber; i++) {
     528                    end = xmlContents.indexOf("\n");
     529                    if(end > 0) {
     530                        line = xmlContents.substring(start, end);
     531                    }
     532                    start = end+1;
     533                }
     534                DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
     535                DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
     536            } else { // no particular line number, print out all the xml so debugger can inspect it
     537                DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"
     538                    + xmlContents + "\n************END\n");
     539            }
     540            // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
     541            DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
     542            System.exit(-1);
     543        } else { // not running in debug mode
     544            System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
     545        }
     546    }
     547
    450548   
    451549    static public StringBuffer readXMLStream (InputStream input_stream) {
     
    540638            f.setLineWidth (0); // Why isn't this working!
    541639            f.setPreserveSpace (false);
    542            
    543             f.setNonEscapingElements (nonEscapingTagNames);
     640            if (nonEscapingTagNames != null) {
     641          f.setNonEscapingElements (nonEscapingTagNames);
     642        }
    544643            // Create the necessary writer stream for serialization.
    545644            OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8");
     
    560659    /** Write an XML document to a given file */
    561660    static public void writeXMLFile (File xml_file, Document document) {
    562         try {
    563             OutputStream os = new FileOutputStream (xml_file);
    564             // Create an output format for our document.
    565             OutputFormat f = new OutputFormat (document);
    566             f.setEncoding ("UTF-8");
    567             f.setIndenting (true);
    568             f.setLineWidth (0); // Why isn't this working!
    569             f.setPreserveSpace (false);
    570             // Create the necessary writer stream for serialization.
    571             OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8");
    572             Writer w               = new BufferedWriter (osw);
    573             // Generate a new serializer from the above.
    574             XMLSerializer s        = new XMLSerializer (w, f);
    575             s.asDOMSerializer ();
    576             // Finally serialize the document to file.
    577             s.serialize (document);
    578             // And close.
    579             os.close ();
    580         }
    581         catch (Exception exception) {
    582             DebugStream.printStackTrace (exception);
    583         }
    584     }
    585    
     661      writeXMLFile(xml_file, document, null);
     662    }
     663
    586664    public static void printXMLNode (Node e) {
    587665        printXMLNode (e, 0) ;
     
    686764       
    687765    }
     766
     767  public static String xmlNodeToStringWithoutIndenting (Node e) {
     768    StringBuffer sb = new StringBuffer ("");
     769    xmlNodeToStringWithoutNewline(sb, e, -1);
     770    return sb.toString();
     771  }
    688772    public static String xmlNodeToStringWithoutNewline (Node e){
    689773        StringBuffer sb = new StringBuffer ("");
     
    696780        for (int i=0 ; i<depth ; i++)
    697781            sb.append (' ') ;
    698        
     782     
    699783        if (e.getNodeType () == Node.TEXT_NODE){
    700784            if (e.getNodeValue () != "") {
     
    727811            int len = children.getLength ();
    728812            for (int i = 0; i < len; i++) {
     813          if (depth >= 0) {
    729814                xmlNodeToStringWithoutNewline (sb,children.item (i), depth + 1);
     815          } else {
     816        xmlNodeToStringWithoutNewline (sb,children.item (i), depth);
     817          }
    730818            }
    731819           
     
    735823            sb.append ("</" + e.getNodeName () + ">");
    736824        }
    737        
    738        
    739     }
    740    
     825    }
    741826}
Note: See TracChangeset for help on using the changeset viewer.