Changeset 5246


Ignore:
Timestamp:
2003-08-22T09:43:11+12:00 (21 years ago)
Author:
jmt12
Message:

Removed obsolete encoding/decoding code (but not the greenstone ones)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gli/src/org/greenstone/gatherer/util/Utility.java

    r5241 r5246  
    9999    static final public String DICTIONARY = "dictionary";
    100100    static final public String DLS_MDS = "dls.mds";
     101    static final public String ENCODING = "UTF-8";
    101102    static final public String ENGLISH_VALUE = "en";
    102103    /** Definition of an important directory name, in this case the etc (or extra information) directory for the collection. */
     
    197198    return new TreePath(temp);
    198199    }
     200
     201    /** Decodes a string of text so its safe to use in a Greenstone configuration file. Esentially replaces "\n" with a newline.
     202     * @param raw The <strong>String</strong> before decoding, read from the configuration file..
     203     * @return A <strong>String</strong> ready to be placed in a component.
     204     */
     205    static public String decodeGreenstone(String raw) {
     206    raw = raw.replaceAll("&apos;", "\'");
     207    raw = raw.replaceAll("&gt;", ">");
     208    raw = raw.replaceAll("&lt;", "<");
     209    raw = raw.replaceAll("&quot;", "\"");
     210    raw = raw.replaceAll("&#39;", "\'");
     211    raw = raw.replaceAll("\\\\n", "\n");
     212    return raw;
     213    }
     214
    199215    /** Takes a rfc2616 'safe' String and translates it back into its 'unsafe' form. Basically the native c wget decode_string() function, but without pointer stuff. If searches through the String looking for the pattern %xy where x and y are hexidecimal digits and where xy maps to a character.<BR> If x or y are not hexidecimal or % is followed by a \0 then the pattern is left as is.
    200216     * @param encoded The url-safe <strong>String</strong> to be decoded.
     
    247263    return result;
    248264    }
    249     /** Encodes a string of text so its safe to use in a Greenstone configuration file. Esentially replaces newlines with their escaped form.
    250      * @param raw The <strong>String</strong> before encoding.
    251      * @return A <strong>String</strong> which is safe to write to the configuration file.
    252      */
    253     static final private char AMPERSTAMP_CHAR = '&';
    254     static final private char ESCAPE_CHAR = '\\';
    255     static final private char GREATER_THAN_CHAR = '>';
    256     static final private char LESS_THAN_CHAR = '<';
    257     static final private char NEWLINE_CHAR = '\n';
    258     static final private char QUOTE_CHAR = '\'';
    259     static final private char SPEECH_CHAR = '\"';
    260     static final private String ENCODED_AMPERSTAMP_STR = "&amp;";
    261     static final private String ENCODED_GREATER_THAN_STR = "&gt;";
    262     static final private String ENCODED_LESS_THAN_STR = "&lt;";
    263     static final private String ENCODED_SPEECH_STR = "&quot;";
    264     static final private String ESCAPED_NEWLINE_STR = "\\n";
    265 
    266     /** Decodes a string of text so its safe to use in a Greenstone configuration file. Esentially replaces "\n" with a newline.
    267      * @param raw The <strong>String</strong> before decoding, read from the configuration file..
    268      * @return A <strong>String</strong> ready to be placed in a component.
    269      */
    270     static public String decodeGreenstone(String raw) {
    271     raw = raw.replaceAll("&apos;", "\'");
    272     raw = raw.replaceAll("&gt;", ">");
    273     raw = raw.replaceAll("&lt;", "<");
    274     raw = raw.replaceAll("&quot;", "\"");
    275     raw = raw.replaceAll("&#39;", "\'");
    276     raw = raw.replaceAll("\\\\n", "\n");
     265
     266    static public String encodeGreenstone(String raw) {
     267    raw = raw.replaceAll("<", "&lt;");
     268    raw = raw.replaceAll(">", "&gt;");
     269    raw = raw.replaceAll("\n", "\\\\n");
    277270    return raw;
    278     }
    279 
    280     static public String encodeGreenstone(String raw) {
    281     // Once again regex fails to provide the power necessary for me to change strings. What I need to do is replace "<" and ">" with "&lt;" and "&gt;", and replace "\<" and "\>" with "<" and ">".
    282     StringBuffer processed = new StringBuffer();
    283     int index = 0;
    284     while(index < raw.length()) {
    285         char c = raw.charAt(index);
    286         switch(c) {
    287         // Replace a normal new line character with "\n"
    288             case NEWLINE_CHAR:
    289             processed.append(ESCAPED_NEWLINE_STR);
    290             break;
    291         // Replace "\<" with "<", or with "\&lt;" if this is for XML. Similar requirements for "\>".
    292             case ESCAPE_CHAR:
    293             if(index + 1 < raw.length()) {
    294             char d = raw.charAt(index + 1);
    295             if(d == LESS_THAN_CHAR) {
    296                 processed.append(LESS_THAN_CHAR);
    297                 index++;
    298                 break;
    299             }
    300             else if(d == GREATER_THAN_CHAR) {
    301                 processed.append(GREATER_THAN_CHAR);
    302                 index++;
    303                 break;
    304             }
    305             }
    306             // I have no idea how this would happen, but I better watch for it anyway
    307             processed.append(c);
    308             break;
    309         // Replace "<" with "&lt;"
    310             case LESS_THAN_CHAR:
    311             processed.append(ENCODED_LESS_THAN_STR);
    312             break;
    313         // Replace ">" with "&gt;"
    314             case GREATER_THAN_CHAR:
    315             processed.append(ENCODED_GREATER_THAN_STR);
    316             break;
    317             default:
    318             processed.append(c);
    319         }
    320         index++;
    321     }
    322     return processed.toString();
    323     }
    324     /** When retrieve text for, or from the collect.cfg file it may contain characters that can't go into a DOM such as "<" and ">". We also might already have encoded versions "&lt;" and "&gt;". Thus we must encode the former, and double encode the latter. */
    325     static public String encodeXML(String raw) {
    326     StringBuffer processed = new StringBuffer();
    327     int index = 0;
    328     while(index < raw.length()) {
    329         char c = raw.charAt(index);
    330         switch(c) {
    331         case GREATER_THAN_CHAR:
    332         processed.append(ENCODED_GREATER_THAN_STR);
    333         break;
    334         case LESS_THAN_CHAR:
    335         processed.append(ENCODED_LESS_THAN_STR);
    336         break;
    337         case AMPERSTAMP_CHAR:
    338         processed.append(ENCODED_AMPERSTAMP_STR);
    339         break;
    340         default:
    341         processed.append(c);
    342         }
    343         index++;
    344     }
    345     return processed.toString();
    346271    }
    347272
     
    356281        // Create an output format for our document.
    357282        OutputFormat f = new OutputFormat(document);
     283        f.setEncoding(ENCODING);
    358284        f.setIndenting(true);
    359285        f.setLineWidth(0);
    360286        f.setPreserveSpace(false);
    361287        // Create the necessary writer stream for serialization.
    362         OutputStreamWriter osw = new OutputStreamWriter(os);
     288        OutputStreamWriter osw = new OutputStreamWriter(os, ENCODING);
    363289        Writer w               = new BufferedWriter(osw);
    364290        // Generate a new serializer from the above.
     
    824750        try {
    825751        URL url = ClassLoader.getSystemResource(filename);
    826         file = new File(URLDecoder.decode(url.getFile(), "UTF-8"));
     752        file = new File(URLDecoder.decode(url.getFile(), ENCODING));
    827753        url = null;
    828754        }
     
    847773    try {
    848774        FileInputStream fis   = new FileInputStream(file);
    849         InputStreamReader isr = new InputStreamReader(fis);
     775        InputStreamReader isr = new InputStreamReader(fis, ENCODING);
    850776        Reader r              = new BufferedReader(isr);
    851777        InputSource isc       = new InputSource(r);
Note: See TracChangeset for help on using the changeset viewer.