Changeset 18353 for gli


Ignore:
Timestamp:
01/12/09 11:17:33 (12 years ago)
Author:
kjdon
Message:

updated the rtl-gli branch with files from trunk. Result of a merge 14807:18318

Location:
gli/branches/rtl-gli/src/org/greenstone/gatherer/util
Files:
2 edited
1 copied

Legend:

Unmodified
Added
Removed
  • gli/branches/rtl-gli/src/org/greenstone/gatherer/util/StaticStrings.java

    r14241 r18353  
    4242    static final public String ACCENTFOLD_OPTION_STR = "accentfold";
    4343    static final public String ALLFIELDS_STR = "allfields";
    44     static final public String ARCPLUG_STR = "ArcPlug";
     44    static final public String ARCPLUG_STR = "ArchivesInfPlugin";
    4545    static final public String ASSIGNED_ATTRIBUTE = "assigned";
    4646    static final public String ASSOCIATIONS_ELEMENT = "Associations";
     
    7373    static final public String COLLECTIONMETADATA_PUBLIC_ELEMENT = "Public";
    7474    static final public String COLLECTIONMETADATA_PUBLIC_STR = "public";
     75    static final public String COLLECTIONMETADATA_COLLECTGROUP_STR = "collectgroup";
    7576    static final public String COLLECTIONMETADATA_STR = "collectionmeta";
    7677    static final public String COLON_CHARACTER = ":";
     
    114115    static final public String FORMAT_END_TAG = "</format>";
    115116    static final public String FURTHER_DIALOG_INDICATOR = "...";
     117    static final public String FEDORA_MODE= "-fedora";
     118    static final public String FEDORA_HOME = "-fedora_home";
     119    static final public String FEDORA_VERSION = "-fedora_version";
     120    static final public String FEDORA_HOSTNAME = "-fedora_hostname";
     121    static final public String FEDORA_PORT     = "-fedora_port";
     122    static final public String FEDORA_USERNAME = "-fedora_username";
     123    static final public String FEDORA_PASSWORD = "-fedora_password";
     124    static final public String FEDORA_PROTOCOL = "-fedora_protocol";
    116125    static final public String GLI_ATTRIBUTE = "gli";
    117126    static final public String GLISERVER_URL_ARGUMENT = "-gliserver_url";
     
    144153    static final public String INT_STR = "int";
    145154    static final public String IMPORT_STR = "import";
    146     static final public String[] KEEP_PLUG = { "GAPlug", "METSPlug" };
     155    static final public String[] KEEP_PLUG = { "GreenstoneXMLPlugin", "GreenstoneMETSPlugin" };
    147156    static final public String LANGUAGE_ARGUMENT = "l=";
    148157    static final public String LANGUAGE_ATTRIBUTE = "language";
     
    181190    static final public String METADATA_TYPE_STR = "metadata";
    182191    static final public String METADATA_XML = "metadata.xml";
    183     static final public String METADATAXMLPLUG_STR = "MetadataXMLPlug";
     192    static final public String METADATAXMLPLUG_STR = "MetadataXMLPlugin";
    184193    static final public String METADATUM_TYPE_STR = "metadatum";
    185194    static final public String MGPP_ATTRIBUTE = "mgpp_enabled";
     
    207216    static final public String PREDEFINED_METADATA_ATTRIBUTE = "predefined";         
    208217    static final public String RBRACKET_CHARACTER = "]";
    209     static final public String RECPLUG_STR = "RecPlug";
     218    static final public String RECPLUG_STR = "DirectoryPlugin";
    210219    static final public String REGEXP_STR = "regexp";
    211220    static final public String REPLACELISTREF_STR = "replaceListRef";
     
    216225    static final public String SECTION_ELEMENT = "Section";
    217226    static final public String SECTION_STR = "section";
     227    static final public String SEPARATE_CJK_OPTION_STR = "separate_cjk";
    218228    static final public String SEPARATOR_ATTRIBUTE = "separator";
    219229    static final public String SEPARATOR_CHARACTER = "/";
     
    246256    static final public String TYPE_ATTRIBUTE = "type";
    247257    static final public String UNKNOWN_ELEMENT = "Unknown";
    248     static final public String UNKNOWNPLUG_STR = "UnknownPlug";
     258    static final public String UNKNOWNPLUG_STR = "UnknownPlugin";
    249259    static final public String USE_METADATA_FILES_ARGUMENT = "use_metadata_files";
    250260    static final public String USE_REMOTE_GREENSTONE_ARGUMENT = "-use_remote_greenstone";
  • gli/branches/rtl-gli/src/org/greenstone/gatherer/util/XMLTools.java

    r14053 r18353  
    261261    static final public String NOTWELLFORMED= "not well-formed";
    262262    static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>";
    263     static final private String FOOTER = "</collectionConfig>";
     263    static final private String FOOTER = "</collectionConfig>";
    264264       
    265265public static String parse (String xml_str) {
     
    332332            SAXParser parser = factory.newSAXParser ();
    333333            FileReader r = new FileReader(xml_file);
    334             InputSource iSource = new InputSource(r);
     334            InputSource iSource = new InputSource(r);
    335335            XMLReader reader = parser.getXMLReader ();
    336336            reader.setContentHandler(new DefaultHandler());
     
    372372    } // getLocationString(SAXParseException):String
    373373
     374
    374375    /** Parse an XML document from a given file path */
    375376    static public Document parseXMLFile (String xml_file_path, boolean use_class_loader) {
     
    409410        try {
    410411            InputStreamReader isr = new InputStreamReader (xml_input_stream, "UTF-8");
    411             Reader xml_reader = new BufferedReader (isr);
    412             document = parseXML (xml_reader);
     412            document = parseXML(isr);
    413413            isr.close ();
    414414            xml_input_stream.close ();
     
    425425    static public Document parseXML (Reader xml_reader) {
    426426        Document document = null;
    427        
    428         try {
    429             InputSource isc       = new InputSource (xml_reader);
    430             DOMParser parser      = new DOMParser ();
    431             parser.setFeature ("http://xml.org/sax/features/validation", false);
    432             parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
    433             // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
    434             parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true);
    435             parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
    436             parser.parse (isc);
    437             document = parser.getDocument ();
    438         }
    439         catch (SAXException exception) {
    440             System.err.println ("SAX exception: " + exception.getMessage ());
    441             DebugStream.printStackTrace (exception);
     427
     428    // If debugging, the following will store the XML contents to be parsed,
     429    // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on)
     430    String xmlContents = "";
     431
     432    try {
     433        Reader reader = null;
     434
     435        // (1) By default, GLI will remove any contents preceeding (and invalidating)
     436        // the XML and present these lines separately to the user
     437        if(!DebugStream.isDebuggingEnabled()) {
     438            try {
     439                reader = new BufferedReader( new RemoveContentBeforeRootElementXMLReader(xml_reader) );
     440            } catch ( Exception e ) {
     441                System.err.println( "Exception while wrapping the reader in parseXML(Reader)" );
     442                e.printStackTrace();
     443            }
     444        }
     445
     446        // (2) If we are running GLI in debug mode:
     447        // In case parsing exceptions are thrown (SAX Exceptions), we want to get some
     448        // idea of where things went wrong. This will print the "XML" contents to either
     449        // system.out (if debugging is off) or to the DebugStream otherwise.
     450        // We need to read the XML twice to know the line where things went wrong, so
     451        // do the additional reading only if we're debugging
     452        else {
     453            StringBuffer buf = new StringBuffer();
     454            char[] buffer = new char[500];
     455            int numCharsRead = xml_reader.read(buffer, 0, buffer.length);
     456            while(numCharsRead != -1) {
     457                buf.append(buffer, 0, numCharsRead);
     458                numCharsRead = xml_reader.read(buffer, 0, buffer.length);
     459            }
     460            xmlContents = buf.toString();
     461            xml_reader.close(); // closing the old Reader
     462            xml_reader = null;
     463            buffer = null;
     464            buf = null;
     465            // we need a Reader to parse the same contents as the Reader that was just closed
     466            reader = new BufferedReader(new StringReader(xmlContents));
     467        }
     468       
     469        // (2) The actual XML parsing
     470        InputSource isc       = new InputSource (reader);
     471        DOMParser parser      = new DOMParser ();
     472        parser.setFeature ("http://xml.org/sax/features/validation", false);
     473        parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
     474        // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster.
     475        parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true);
     476        parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
     477        parser.parse (isc);
     478        document = parser.getDocument ();
     479       
     480    } catch(SAXParseException e) {
     481        showXMLParseFailureLine(e, xmlContents);
     482    } catch (SAXException exception) {
     483        System.err.println ("SAX exception: " + exception.getMessage ());
     484        if(DebugStream.isDebuggingEnabled()) {
     485        DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"
     486                    + xmlContents + "\n************END\n");
     487        // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
     488        DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");
     489        System.exit(-1);
     490        }
     491        // else, not running in debug mode, so don't exit after exception 
     492        System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed.");
     493        DebugStream.printStackTrace (exception);
    442494        }
    443495        catch (Exception exception) {
     
    447499        return document;
    448500    }
    449    
     501
     502    /** Displays the line (string) where the SAXParseException occurred, given a String of the
     503     * entire xml that was being parsed and the SAXParseException object that was caught.
     504     * The messages are printed to DebugStream, so run GLI/FLI with -debug to view this output.
     505     * @param xmlContents is the entire xml that was being parsed when the exception occurred
     506     * @param e is the SAXParseException object that was thrown upon parsing the xmlContents.
     507     */
     508    public static void showXMLParseFailureLine(SAXParseException e, String xmlContents) {
     509   
     510        // There should be no characters at all that preceed the <?xml>... bit.
     511        // The first check is for starting spaces:
     512        if(xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t")) {
     513            DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n");
     514            DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents);
     515            return; // nothing more to do, first error identified
     516        }
     517   
     518        // the actual line (String literal) where parsing failed and the SAXParseException occurred.
     519        String line = "";
     520        int linenumber = e.getLineNumber();
     521        DebugStream.println("\n****SAXParseException on LINE NUMBER: " + linenumber);
     522        if(DebugStream.isDebuggingEnabled()) {
     523            if(linenumber != -1) {
     524                // find the line in xmlContents string (xmlContents is only set if GLI is run with debugging turned on)
     525                int start = 0;
     526                int end = xmlContents.length();
     527                for(int i = 1; i <= linenumber; i++) {
     528                    end = xmlContents.indexOf("\n");
     529                    if(end > 0) {
     530                        line = xmlContents.substring(start, end);
     531                    }
     532                    start = end+1;
     533                }
     534                DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END");
     535                DebugStream.println("SAXParseException message: " + e.getMessage() + "\n");
     536            } else { // no particular line number, print out all the xml so debugger can inspect it
     537                DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"
     538                    + xmlContents + "\n************END\n");
     539            }
     540            // Exit to let the user view the erroneous line/xml before it goes past the screen buffer?
     541            DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");
     542            System.exit(-1);
     543        } else { // not running in debug mode
     544            System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed.");
     545        }
     546    }
     547
    450548   
    451549    static public StringBuffer readXMLStream (InputStream input_stream) {
     
    540638            f.setLineWidth (0); // Why isn't this working!
    541639            f.setPreserveSpace (false);
    542            
    543             f.setNonEscapingElements (nonEscapingTagNames);
     640            if (nonEscapingTagNames != null) {
     641          f.setNonEscapingElements (nonEscapingTagNames);
     642        }
    544643            // Create the necessary writer stream for serialization.
    545644            OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8");
     
    560659    /** Write an XML document to a given file */
    561660    static public void writeXMLFile (File xml_file, Document document) {
    562         try {
    563             OutputStream os = new FileOutputStream (xml_file);
    564             // Create an output format for our document.
    565             OutputFormat f = new OutputFormat (document);
    566             f.setEncoding ("UTF-8");
    567             f.setIndenting (true);
    568             f.setLineWidth (0); // Why isn't this working!
    569             f.setPreserveSpace (false);
    570             // Create the necessary writer stream for serialization.
    571             OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8");
    572             Writer w               = new BufferedWriter (osw);
    573             // Generate a new serializer from the above.
    574             XMLSerializer s        = new XMLSerializer (w, f);
    575             s.asDOMSerializer ();
    576             // Finally serialize the document to file.
    577             s.serialize (document);
    578             // And close.
    579             os.close ();
    580         }
    581         catch (Exception exception) {
    582             DebugStream.printStackTrace (exception);
    583         }
    584     }
    585    
     661      writeXMLFile(xml_file, document, null);
     662    }
     663
    586664    public static void printXMLNode (Node e) {
    587665        printXMLNode (e, 0) ;
     
    686764       
    687765    }
     766
     767  public static String xmlNodeToStringWithoutIndenting (Node e) {
     768    StringBuffer sb = new StringBuffer ("");
     769    xmlNodeToStringWithoutNewline(sb, e, -1);
     770    return sb.toString();
     771  }
    688772    public static String xmlNodeToStringWithoutNewline (Node e){
    689773        StringBuffer sb = new StringBuffer ("");
     
    696780        for (int i=0 ; i<depth ; i++)
    697781            sb.append (' ') ;
    698        
     782     
    699783        if (e.getNodeType () == Node.TEXT_NODE){
    700784            if (e.getNodeValue () != "") {
     
    727811            int len = children.getLength ();
    728812            for (int i = 0; i < len; i++) {
     813          if (depth >= 0) {
    729814                xmlNodeToStringWithoutNewline (sb,children.item (i), depth + 1);
     815          } else {
     816        xmlNodeToStringWithoutNewline (sb,children.item (i), depth);
     817          }
    730818            }
    731819           
     
    735823            sb.append ("</" + e.getNodeName () + ">");
    736824        }
    737        
    738        
    739     }
    740    
     825    }
    741826}
Note: See TracChangeset for help on using the changeset viewer.