Changeset 18353

Show
Ignore:
Timestamp:
12.01.2009 11:17:33 (10 years ago)
Author:
kjdon
Message:

updated the rtl-gli branch with files from trunk. Result of a merge 14807:18318

Location:
gli/branches/rtl-gli/src/org/greenstone/gatherer/util
Files:
2 modified
1 copied

Legend:

Unmodified
Added
Removed
  • gli/branches/rtl-gli/src/org/greenstone/gatherer/util/StaticStrings.java

    r14241 r18353  
    4242    static final public String ACCENTFOLD_OPTION_STR = "accentfold"; 
    4343    static final public String ALLFIELDS_STR = "allfields"; 
    44     static final public String ARCPLUG_STR = "ArcPlug"; 
     44    static final public String ARCPLUG_STR = "ArchivesInfPlugin"; 
    4545    static final public String ASSIGNED_ATTRIBUTE = "assigned"; 
    4646    static final public String ASSOCIATIONS_ELEMENT = "Associations"; 
     
    7373    static final public String COLLECTIONMETADATA_PUBLIC_ELEMENT = "Public"; 
    7474    static final public String COLLECTIONMETADATA_PUBLIC_STR = "public"; 
     75    static final public String COLLECTIONMETADATA_COLLECTGROUP_STR = "collectgroup"; 
    7576    static final public String COLLECTIONMETADATA_STR = "collectionmeta"; 
    7677    static final public String COLON_CHARACTER = ":"; 
     
    114115    static final public String FORMAT_END_TAG = "</format>"; 
    115116    static final public String FURTHER_DIALOG_INDICATOR = "..."; 
     117    static final public String FEDORA_MODE= "-fedora"; 
     118    static final public String FEDORA_HOME = "-fedora_home"; 
     119    static final public String FEDORA_VERSION = "-fedora_version"; 
     120    static final public String FEDORA_HOSTNAME = "-fedora_hostname"; 
     121    static final public String FEDORA_PORT     = "-fedora_port"; 
     122    static final public String FEDORA_USERNAME = "-fedora_username"; 
     123    static final public String FEDORA_PASSWORD = "-fedora_password"; 
     124    static final public String FEDORA_PROTOCOL = "-fedora_protocol"; 
    116125    static final public String GLI_ATTRIBUTE = "gli"; 
    117126    static final public String GLISERVER_URL_ARGUMENT = "-gliserver_url"; 
     
    144153    static final public String INT_STR = "int"; 
    145154    static final public String IMPORT_STR = "import"; 
    146     static final public String[] KEEP_PLUG = { "GAPlug", "METSPlug" }; 
     155    static final public String[] KEEP_PLUG = { "GreenstoneXMLPlugin", "GreenstoneMETSPlugin" }; 
    147156    static final public String LANGUAGE_ARGUMENT = "l="; 
    148157    static final public String LANGUAGE_ATTRIBUTE = "language"; 
     
    181190    static final public String METADATA_TYPE_STR = "metadata"; 
    182191    static final public String METADATA_XML = "metadata.xml"; 
    183     static final public String METADATAXMLPLUG_STR = "MetadataXMLPlug"; 
     192    static final public String METADATAXMLPLUG_STR = "MetadataXMLPlugin"; 
    184193    static final public String METADATUM_TYPE_STR = "metadatum"; 
    185194    static final public String MGPP_ATTRIBUTE = "mgpp_enabled"; 
     
    207216    static final public String PREDEFINED_METADATA_ATTRIBUTE = "predefined";           
    208217    static final public String RBRACKET_CHARACTER = "]"; 
    209     static final public String RECPLUG_STR = "RecPlug"; 
     218    static final public String RECPLUG_STR = "DirectoryPlugin"; 
    210219    static final public String REGEXP_STR = "regexp"; 
    211220    static final public String REPLACELISTREF_STR = "replaceListRef"; 
     
    216225    static final public String SECTION_ELEMENT = "Section"; 
    217226    static final public String SECTION_STR = "section"; 
     227    static final public String SEPARATE_CJK_OPTION_STR = "separate_cjk"; 
    218228    static final public String SEPARATOR_ATTRIBUTE = "separator"; 
    219229    static final public String SEPARATOR_CHARACTER = "/"; 
     
    246256    static final public String TYPE_ATTRIBUTE = "type"; 
    247257    static final public String UNKNOWN_ELEMENT = "Unknown"; 
    248     static final public String UNKNOWNPLUG_STR = "UnknownPlug"; 
     258    static final public String UNKNOWNPLUG_STR = "UnknownPlugin"; 
    249259    static final public String USE_METADATA_FILES_ARGUMENT = "use_metadata_files"; 
    250260    static final public String USE_REMOTE_GREENSTONE_ARGUMENT = "-use_remote_greenstone"; 
  • gli/branches/rtl-gli/src/org/greenstone/gatherer/util/XMLTools.java

    r14053 r18353  
    261261    static final public String NOTWELLFORMED= "not well-formed"; 
    262262    static final private String HEADER = "<?xml version='1.0' encoding='UTF-8'?><collectionConfig xmlns:gsf='http://www.greenstone.org/greenstone3/schema/ConfigFormat' xmlns:xsl='http://www.w3.org/1999/XSL/Transform'>"; 
    263     static final private String FOOTER = "</collectionConfig>"; 
     263    static final private String FOOTER = "</collectionConfig>"; 
    264264        
    265265public static String parse (String xml_str) { 
     
    332332            SAXParser parser = factory.newSAXParser (); 
    333333            FileReader r = new FileReader(xml_file); 
    334             InputSource iSource = new InputSource(r); 
     334            InputSource iSource = new InputSource(r); 
    335335            XMLReader reader = parser.getXMLReader (); 
    336336            reader.setContentHandler(new DefaultHandler()); 
     
    372372    } // getLocationString(SAXParseException):String 
    373373 
     374 
    374375    /** Parse an XML document from a given file path */ 
    375376    static public Document parseXMLFile (String xml_file_path, boolean use_class_loader) { 
     
    409410        try { 
    410411            InputStreamReader isr = new InputStreamReader (xml_input_stream, "UTF-8"); 
    411             Reader xml_reader = new BufferedReader (isr); 
    412             document = parseXML (xml_reader); 
     412            document = parseXML(isr); 
    413413            isr.close (); 
    414414            xml_input_stream.close (); 
     
    425425    static public Document parseXML (Reader xml_reader) { 
    426426        Document document = null; 
    427          
    428         try { 
    429             InputSource isc       = new InputSource (xml_reader); 
    430             DOMParser parser      = new DOMParser (); 
    431             parser.setFeature ("http://xml.org/sax/features/validation", false); 
    432             parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 
    433             // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. 
    434             parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true); 
    435             parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); 
    436             parser.parse (isc); 
    437             document = parser.getDocument (); 
    438         } 
    439         catch (SAXException exception) { 
    440             System.err.println ("SAX exception: " + exception.getMessage ()); 
    441             DebugStream.printStackTrace (exception); 
     427 
     428    // If debugging, the following will store the XML contents to be parsed,  
     429    // which can then be inspected upon encountering a SAXException (need to run GLI with -debug on) 
     430    String xmlContents = ""; 
     431 
     432    try { 
     433        Reader reader = null; 
     434 
     435        // (1) By default, GLI will remove any contents preceeding (and invalidating)  
     436        // the XML and present these lines separately to the user 
     437        if(!DebugStream.isDebuggingEnabled()) { 
     438            try { 
     439                reader = new BufferedReader( new RemoveContentBeforeRootElementXMLReader(xml_reader) ); 
     440            } catch ( Exception e ) { 
     441                System.err.println( "Exception while wrapping the reader in parseXML(Reader)" ); 
     442                e.printStackTrace(); 
     443            } 
     444        }  
     445 
     446        // (2) If we are running GLI in debug mode: 
     447        // In case parsing exceptions are thrown (SAX Exceptions), we want to get some 
     448        // idea of where things went wrong. This will print the "XML" contents to either 
     449        // system.out (if debugging is off) or to the DebugStream otherwise. 
     450        // We need to read the XML twice to know the line where things went wrong, so 
     451        // do the additional reading only if we're debugging  
     452        else { 
     453            StringBuffer buf = new StringBuffer(); 
     454            char[] buffer = new char[500]; 
     455            int numCharsRead = xml_reader.read(buffer, 0, buffer.length);  
     456            while(numCharsRead != -1) { 
     457                buf.append(buffer, 0, numCharsRead); 
     458                numCharsRead = xml_reader.read(buffer, 0, buffer.length);  
     459            } 
     460            xmlContents = buf.toString(); 
     461            xml_reader.close(); // closing the old Reader 
     462            xml_reader = null; 
     463            buffer = null; 
     464            buf = null; 
     465            // we need a Reader to parse the same contents as the Reader that was just closed 
     466            reader = new BufferedReader(new StringReader(xmlContents)); 
     467        } 
     468         
     469        // (2) The actual XML parsing 
     470        InputSource isc       = new InputSource (reader); 
     471        DOMParser parser      = new DOMParser (); 
     472        parser.setFeature ("http://xml.org/sax/features/validation", false); 
     473        parser.setFeature ("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 
     474        // May or may not be ignored, the documentation for Xerces is contradictory. If it works then parsing -should- be faster. 
     475        parser.setFeature ("http://apache.org/xml/features/dom/defer-node-expansion", true); 
     476        parser.setFeature ("http://apache.org/xml/features/dom/include-ignorable-whitespace", false); 
     477        parser.parse (isc); 
     478        document = parser.getDocument (); 
     479         
     480    } catch(SAXParseException e) { 
     481        showXMLParseFailureLine(e, xmlContents); 
     482    } catch (SAXException exception) { 
     483        System.err.println ("SAX exception: " + exception.getMessage ()); 
     484        if(DebugStream.isDebuggingEnabled()) { 
     485        DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"  
     486                    + xmlContents + "\n************END\n"); 
     487        // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 
     488        DebugStream.println("Debug mode: Exiting the program as there was trouble parsing the XML...");  
     489        System.exit(-1); 
     490        }  
     491        // else, not running in debug mode, so don't exit after exception   
     492        System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents that could not be parsed."); 
     493        DebugStream.printStackTrace (exception); 
    442494        } 
    443495        catch (Exception exception) { 
     
    447499        return document; 
    448500    } 
    449      
     501 
     502    /** Displays the line (string) where the SAXParseException occurred, given a String of the  
     503     * entire xml that was being parsed and the SAXParseException object that was caught.  
     504     * The messages are printed to DebugStream, so run GLI/FLI with -debug to view this output.  
     505     * @param xmlContents is the entire xml that was being parsed when the exception occurred 
     506     * @param e is the SAXParseException object that was thrown upon parsing the xmlContents. 
     507     */ 
     508    public static void showXMLParseFailureLine(SAXParseException e, String xmlContents) { 
     509     
     510        // There should be no characters at all that preceed the <?xml>... bit.  
     511        // The first check is for starting spaces: 
     512        if(xmlContents.startsWith("\n") || xmlContents.startsWith(" ") || xmlContents.startsWith("\t")) { 
     513            DebugStream.println("ERROR: illegal start of XML. Space/tab/newline should not preceed xml declaration.\n"); 
     514            DebugStream.println("xmlContents (length is " + xmlContents.length() + "):\n" + xmlContents); 
     515            return; // nothing more to do, first error identified 
     516        } 
     517     
     518        // the actual line (String literal) where parsing failed and the SAXParseException occurred. 
     519        String line = ""; 
     520        int linenumber = e.getLineNumber(); 
     521        DebugStream.println("\n****SAXParseException on LINE NUMBER: " + linenumber); 
     522        if(DebugStream.isDebuggingEnabled()) { 
     523            if(linenumber != -1) {  
     524                // find the line in xmlContents string (xmlContents is only set if GLI is run with debugging turned on) 
     525                int start = 0; 
     526                int end = xmlContents.length(); 
     527                for(int i = 1; i <= linenumber; i++) { 
     528                    end = xmlContents.indexOf("\n"); 
     529                    if(end > 0) { 
     530                        line = xmlContents.substring(start, end); 
     531                    } 
     532                    start = end+1; 
     533                } 
     534                DebugStream.println("The parsing error occurred on this line:\n***********START\n" + line + "\n***********END"); 
     535                DebugStream.println("SAXParseException message: " + e.getMessage() + "\n"); 
     536            } else { // no particular line number, print out all the xml so debugger can inspect it 
     537                DebugStream.println("Encountered a SAX exception when parsing the following:\n*********START\n"  
     538                    + xmlContents + "\n************END\n"); 
     539            } 
     540            // Exit to let the user view the erroneous line/xml before it goes past the screen buffer? 
     541            DebugStream.println("\nDebug mode: Exiting the program as there was trouble parsing the XML...");  
     542            System.exit(-1); 
     543        } else { // not running in debug mode 
     544            System.out.println("***Turn debugging on (run GLI with -debug) to view the XML contents/line that could not be parsed."); 
     545        } 
     546    } 
     547 
    450548     
    451549    static public StringBuffer readXMLStream (InputStream input_stream) { 
     
    540638            f.setLineWidth (0); // Why isn't this working! 
    541639            f.setPreserveSpace (false); 
    542              
    543             f.setNonEscapingElements (nonEscapingTagNames); 
     640            if (nonEscapingTagNames != null) { 
     641          f.setNonEscapingElements (nonEscapingTagNames); 
     642        } 
    544643            // Create the necessary writer stream for serialization. 
    545644            OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8"); 
     
    560659    /** Write an XML document to a given file */ 
    561660    static public void writeXMLFile (File xml_file, Document document) { 
    562         try { 
    563             OutputStream os = new FileOutputStream (xml_file); 
    564             // Create an output format for our document. 
    565             OutputFormat f = new OutputFormat (document); 
    566             f.setEncoding ("UTF-8"); 
    567             f.setIndenting (true); 
    568             f.setLineWidth (0); // Why isn't this working! 
    569             f.setPreserveSpace (false); 
    570             // Create the necessary writer stream for serialization. 
    571             OutputStreamWriter osw = new OutputStreamWriter (os, "UTF-8"); 
    572             Writer w               = new BufferedWriter (osw); 
    573             // Generate a new serializer from the above. 
    574             XMLSerializer s        = new XMLSerializer (w, f); 
    575             s.asDOMSerializer (); 
    576             // Finally serialize the document to file. 
    577             s.serialize (document); 
    578             // And close. 
    579             os.close (); 
    580         } 
    581         catch (Exception exception) { 
    582             DebugStream.printStackTrace (exception); 
    583         } 
    584     } 
    585      
     661      writeXMLFile(xml_file, document, null); 
     662    } 
     663 
    586664    public static void printXMLNode (Node e) { 
    587665        printXMLNode (e, 0) ; 
     
    686764         
    687765    } 
     766 
     767  public static String xmlNodeToStringWithoutIndenting (Node e) { 
     768    StringBuffer sb = new StringBuffer (""); 
     769    xmlNodeToStringWithoutNewline(sb, e, -1); 
     770    return sb.toString(); 
     771  } 
    688772    public static String xmlNodeToStringWithoutNewline (Node e){ 
    689773        StringBuffer sb = new StringBuffer (""); 
     
    696780        for (int i=0 ; i<depth ; i++) 
    697781            sb.append (' ') ; 
    698          
     782       
    699783        if (e.getNodeType () == Node.TEXT_NODE){ 
    700784            if (e.getNodeValue () != "") { 
     
    727811            int len = children.getLength (); 
    728812            for (int i = 0; i < len; i++) { 
     813          if (depth >= 0) { 
    729814                xmlNodeToStringWithoutNewline (sb,children.item (i), depth + 1); 
     815          } else { 
     816        xmlNodeToStringWithoutNewline (sb,children.item (i), depth); 
     817          } 
    730818            } 
    731819             
     
    735823            sb.append ("</" + e.getNodeName () + ">"); 
    736824        } 
    737          
    738          
    739     } 
    740      
     825    } 
    741826}