Changeset 16637

Show
Ignore:
Timestamp:
04.08.2008 11:43:10 (11 years ago)
Author:
ak19
Message:

toString() method tries to display the filename in UTF8 if it is UTF8 encoded, otherwise it tries converting the filename to some other common encodings and failing those, returns the original filename (as the string is stored in the OS) for display

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gli/trunk/src/org/greenstone/gatherer/collection/CollectionTreeNode.java

    r15106 r16637  
    3333import org.greenstone.gatherer.file.FileNode; 
    3434import org.greenstone.gatherer.util.JarTools; 
     35import java.util.Set; 
     36import java.util.Iterator; 
     37import java.nio.charset.Charset; 
    3538 
    3639 
     
    4851    private boolean is_srcreplaceable = false; 
    4952 
     53    private String displayFileName = null; 
     54 
    5055    public CollectionTreeNode(File file) 
    5156    { 
     
    5560    // To work with replace_srcdoc_with_html.pl 
    5661    this.is_srcreplaceable = CollectionDesignManager.plugin_manager.isFileSrcReplaceable(file); 
     62     
     63    displayFileName = calcDisplayString(); 
     64    } 
     65 
     66    /** Similar to calcDisplayString, but this version only checks whether the filename 
     67     * String is UTF8. If so, it converts it into UTF8 and returns it. If the filename 
     68     * is not UTF8, it is returned unchanged. 
     69     */  
     70    protected String quickCalcDisplayString() { 
     71    String filename = super.toString(); 
     72    try{ 
     73        String utf8filename = new String(filename.getBytes(), "UTF8"); 
     74        if(utf8filename.indexOf('\ufffd') == -1) { 
     75        return utf8filename; 
     76        } else { // contains the character indicating that it's invalid utf8 
     77        // return the original string 
     78        return filename; 
     79        } 
     80    } catch(java.io.UnsupportedEncodingException e) { 
     81        return filename; 
     82    } 
     83    } 
     84 
     85    /** This method returns a string representation of the filenodes in the Collection 
     86     * Tree, that can then be displayed in the tree.  
     87     * We'll initially assume that the filenames are utf8 encoded and so convert the 
     88     * filename into utf8 for proper presentation in the Collection tree pane. 
     89     * If the filenames are not utf8, then the conversion would have introduced funny 
     90     * characters. Therefore, when converting to utf8, if the converted filename  
     91     * contains the special character '\ufffd', then we know the conversion did not work 
     92     * and we return the original string which may or may not be properly presented by 
     93     * default. 
     94     * See http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/CharsetDecoder.html 
     95     * which says "How a decoding error is handled depends upon the action requested for  
     96     * that type of error, which is described by an instance of the CodingErrorAction class. 
     97     * The possible error actions are to ignore the erroneous input, report the error to 
     98     * the invoker via the returned CoderResult object, or replace the erroneous input with 
     99     * the current value of the replacement string. The replacement has the initial value  
     100     * "\uFFFD"; its value may be changed via the replaceWith method." 
     101     * The following made me think that String(byte[], String charsetName) constructor may 
     102     * use the replacement value \uFFFD. 
     103     * http://www.experts-exchange.com/Programming/Programming_Languages/Java/Q_20512969.html 
     104     * mentions the following which made me think of this:  
     105     * convertedStr = convertedStr.replace('\ufffd', ' '); 
     106     */ 
     107    protected String calcDisplayString() { 
     108    String filename = super.toString(); 
     109 
     110    try{ 
     111        String[] charsets = {"UTF-8", "ISO-8859-1", "US-ASCII", "UTF-16BE", "UTF-16LE", "UTF-16"}; 
     112        for(int i = 0; i < charsets.length; i++) { 
     113        String charset = charsets[i]; 
     114        String convertedfilename = new String(filename.getBytes(), charset); 
     115        if(convertedfilename.indexOf('\ufffd') != -1) { 
     116            // encountered the character that indicates when the conversion is invalid 
     117            convertedfilename = null; 
     118            continue; // try to use the next charset to encode the filename as 
     119        }  
     120        else { // valid conversion 
     121            System.err.println("Filename was encoded in: " + charset); 
     122            return convertedfilename; 
     123        } 
     124        } 
     125    } catch(java.io.UnsupportedEncodingException e) { 
     126        return filename; 
     127    } 
     128 
     129    return filename; // attempted conversions all failed 
    57130    } 
    58131 
     
    77150    return is_srcreplaceable; 
    78151    } 
     152 
     153    /** This method returns a string representation of the filenodes in the Collection 
     154     * Tree, which is what will be displayed in the tree. It tries to convert it into 
     155     * some common encoding formats. Failing that, the unchanged filepath is returned. 
     156     * @see displayString 
     157     */  
     158    public String toString()  
     159    { 
     160    if(displayFileName == null) {     
     161      displayFileName = calcDisplayString(); 
     162    } 
     163    return displayFileName; 
     164    } 
    79165}