Changeset 16637


Ignore:
Timestamp:
2008-08-04T11:43:10+12:00 (13 years ago)
Author:
ak19
Message:

toString() method tries to display the filename in UTF8 if it is UTF8 encoded, otherwise it tries converting the filename to some other common encodings and failing those, returns the original filename (as the string is stored in the OS) for display

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gli/trunk/src/org/greenstone/gatherer/collection/CollectionTreeNode.java

    r15106 r16637  
    3333import org.greenstone.gatherer.file.FileNode;
    3434import org.greenstone.gatherer.util.JarTools;
     35import java.util.Set;
     36import java.util.Iterator;
     37import java.nio.charset.Charset;
    3538
    3639
     
    4851    private boolean is_srcreplaceable = false;
    4952
     53    private String displayFileName = null;
     54
    5055    public CollectionTreeNode(File file)
    5156    {
     
    5560    // To work with replace_srcdoc_with_html.pl
    5661    this.is_srcreplaceable = CollectionDesignManager.plugin_manager.isFileSrcReplaceable(file);
     62   
     63    displayFileName = calcDisplayString();
     64    }
     65
     66    /** Similar to calcDisplayString, but this version only checks whether the filename
     67     * String is UTF8. If so, it converts it into UTF8 and returns it. If the filename
     68     * is not UTF8, it is returned unchanged.
     69     */
     70    protected String quickCalcDisplayString() {
     71    String filename = super.toString();
     72    try{
     73        String utf8filename = new String(filename.getBytes(), "UTF8");
     74        if(utf8filename.indexOf('\ufffd') == -1) {
     75        return utf8filename;
     76        } else { // contains the character indicating that it's invalid utf8
     77        // return the original string
     78        return filename;
     79        }
     80    } catch(java.io.UnsupportedEncodingException e) {
     81        return filename;
     82    }
     83    }
     84
     85    /** This method returns a string representation of the filenodes in the Collection
     86     * Tree, that can then be displayed in the tree.
     87     * We'll initially assume that the filenames are utf8 encoded and so convert the
     88     * filename into utf8 for proper presentation in the Collection tree pane.
     89     * If the filenames are not utf8, then the conversion would have introduced funny
     90     * characters. Therefore, when converting to utf8, if the converted filename
     91     * contains the special character '\ufffd', then we know the conversion did not work
     92     * and we return the original string which may or may not be properly presented by
     93     * default.
     94     * See http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/CharsetDecoder.html
     95     * which says "How a decoding error is handled depends upon the action requested for
     96     * that type of error, which is described by an instance of the CodingErrorAction class.
     97     * The possible error actions are to ignore the erroneous input, report the error to
     98     * the invoker via the returned CoderResult object, or replace the erroneous input with
     99     * the current value of the replacement string. The replacement has the initial value
     100     * "\uFFFD"; its value may be changed via the replaceWith method."
     101     * The following made me think that String(byte[], String charsetName) constructor may
     102     * use the replacement value \uFFFD.
     103     * http://www.experts-exchange.com/Programming/Programming_Languages/Java/Q_20512969.html
     104     * mentions the following which made me think of this:
     105     * convertedStr = convertedStr.replace('\ufffd', ' ');
     106     */
     107    protected String calcDisplayString() {
     108    String filename = super.toString();
     109
     110    try{
     111        String[] charsets = {"UTF-8", "ISO-8859-1", "US-ASCII", "UTF-16BE", "UTF-16LE", "UTF-16"};
     112        for(int i = 0; i < charsets.length; i++) {
     113        String charset = charsets[i];
     114        String convertedfilename = new String(filename.getBytes(), charset);
     115        if(convertedfilename.indexOf('\ufffd') != -1) {
     116            // encountered the character that indicates when the conversion is invalid
     117            convertedfilename = null;
     118            continue; // try to use the next charset to encode the filename as
     119        }
     120        else { // valid conversion
     121            System.err.println("Filename was encoded in: " + charset);
     122            return convertedfilename;
     123        }
     124        }
     125    } catch(java.io.UnsupportedEncodingException e) {
     126        return filename;
     127    }
     128
     129    return filename; // attempted conversions all failed
    57130    }
    58131
     
    77150    return is_srcreplaceable;
    78151    }
     152
     153    /** This method returns a string representation of the filenodes in the Collection
     154     * Tree, which is what will be displayed in the tree. It tries to convert it into
     155     * some common encoding formats. Failing that, the unchanged filepath is returned.
     156     * @see displayString
     157     */
     158    public String toString()
     159    {
     160    if(displayFileName == null) {   
     161      displayFileName = calcDisplayString();
     162    }
     163    return displayFileName;
     164    }
    79165}
Note: See TracChangeset for help on using the changeset viewer.