Ignore:
Timestamp:
2010-12-09T22:27:33+13:00 (13 years ago)
Author:
ak19
Message:

GLI now has a gs.FilenameEncoding metadata field which appears like all the others in GLI's EnrichPane, but is unique in that this metadata (once set, changed or removed) must be applied to the affected filenames in the Collection Tree. More importantly, the changes made for this are to allow GLI's java code to interact with the recent changes to Perl where strings were made unicode-aware (for proper regex matching) but which required other changes elsewhere. To still support filenames with different encodings Perl used URL encoded versions of filenames representing characters' code point values in URL encoding. This required that GLI write out URL encoded filenames to the metadata.xml files that are associated with each folder level of a collection, so that Perl can read them. In this way, they can both speak of the same filenames. Only works on unicode 16 (such as latin-1), non-UTF8 systems. The latter is a requirement since Java uses the filesystem encoding from startup. If it is UTF8, non-recognised characters are replaced by the invalid char for UTF8. This process being destructive, we can't get the original filenames' bytecodes back. The changes made to GLI will work on Windows which is UTF-16 (windows codepage 1252), presumably also Macs (some kind of UTF-16) and also works on Native Latin 1 Linux systems. UTF-8 Linux systems need to be reconfigured to Native Latin-1, or if not installed, an administrator can install it easily.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/file/FileNode.java

    r16984 r23433  
    88import javax.swing.tree.*;
    99import org.greenstone.gatherer.DebugStream;
     10import org.greenstone.gatherer.metadata.FilenameEncoding;
    1011import org.greenstone.gatherer.util.ArrayTools;
    1112
     
    1920    protected File file = null;
    2021    protected FileSystemModel model = null;
    21     protected MutableTreeNode parent = null;
     22    protected MutableTreeNode parent = null;   
     23
     24    protected String urlEncodedFileName = "";
     25    protected String urlEncodedFilePath = "";
     26    protected String filenameEncoding = "";
    2227    /** The string that is displayed as the filename. Attempts to be in the correct encoding. */
    2328    protected String displayFileName = null;
    24 
     29   
    2530
    2631    public FileNode(File file)
    2732    {
    28     this.file = file;
    29 
    30     // Files cannot have children
    31     if (file != null && !file.isDirectory()) { //file.isFile()) {
    32         // Cache this result to prevent unceasing missing disk messages being thrown if the
    33         // removable media was, um, removed after directory mapped
    34         this.allows_children = false;
    35         displayFileName = calcDisplayString();
    36     }   
    37     }
    38 
    39 
    40      /** This method returns a string representation of the filenodes in the Collection
    41      * Tree, that can then be displayed in the tree.
    42      * We'll initially assume that the filenames are utf8 encoded and so convert the
    43      * filename into utf8 for proper presentation in the Collection tree pane.
    44      * If the filenames are not utf8, then the conversion would have introduced funny
    45      * characters. Therefore, when converting to utf8, if the converted filename
    46      * contains the special character '\ufffd', then we know the conversion did not work
    47      * and we return the original string which may or may not be properly presented by
    48      * default.
    49      * See http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/CharsetDecoder.html
    50      * which says "How a decoding error is handled depends upon the action requested for
    51      * that type of error, which is described by an instance of the CodingErrorAction class.
    52      * The possible error actions are to ignore the erroneous input, report the error to
    53      * the invoker via the returned CoderResult object, or replace the erroneous input with
    54      * the current value of the replacement string. The replacement has the initial value
    55      * "\uFFFD"; its value may be changed via the replaceWith method."
    56      * The following made me think that String(byte[], String charsetName) constructor may
    57      * use the replacement value \uFFFD.
    58      * http://www.experts-exchange.com/Programming/Programming_Languages/Java/Q_20512969.html
    59      * mentions the following which made me think of this:
    60      * convertedStr = convertedStr.replace('\ufffd', ' ');
    61      */
    62     protected String calcDisplayString() {
    63     String filename = file.getName();
    64     try{
    65         String utf8filename = new String(filename.getBytes(), "UTF8");
    66         if(utf8filename.indexOf('\ufffd') == -1) {
    67         return utf8filename;
    68         } else { // contains the character indicating that it's invalid utf8
    69         // return the original string
    70         return filename;
    71         }
    72     } catch(java.io.UnsupportedEncodingException e) {
    73         return filename;
    74     }
     33        this.file = file;
     34
     35        if (file != null) {
     36            // Files cannot have children
     37            if(file.isFile()) {
     38                // Cache this result to prevent unceasing missing disk messages being thrown if the
     39                // removable media was, um, removed after directory mapped
     40                this.allows_children = false;
     41            }
     42            filenameEncoding = "";
     43            urlEncodedFilePath = FilenameEncoding.calcURLEncodedFilePath(file);
     44            urlEncodedFileName = FilenameEncoding.calcURLEncodedFileName(urlEncodedFilePath);
     45
     46            // work out the display string (extra special processing for CollectionTreeNodes)
     47            displayFileName = calcDisplayString();     
     48        }
     49    }
     50
     51    public String getURLEncodedFileName() { return urlEncodedFileName; }
     52
     53    public String getURLEncodedFilePath() { return urlEncodedFilePath; }
     54
     55    public String getFilenameEncoding() { return filenameEncoding; }
     56
     57
     58    /** This method returns a string representation of the filenodes in the tree,
     59    * that can then be displayed in the tree. Overridden in subclass CollectionTreeNode.
     60    * Turn FilenameEncoding.DEBUGGING on to see URLEncoded filenames.
     61    */
     62    protected String calcDisplayString() { 
     63        if(FilenameEncoding.DEBUGGING) {
     64            return getURLEncodedFileName();
     65        } else {
     66            return file.getName();
     67        }
    7568    }
    7669
     
    314307            }
    315308        }
     309       
     310            // in case any filename encodings had gone stale,
     311            // (recalculate these and) refresh the display name
     312            refreshDescendantEncodings();           
     313                   
    316314        }
    317315
     
    327325    }
    328326
    329 
     327    // overridden in subclass CollectionTreeNode to reset and reencode display strings
     328    public void resetDescendantEncodings() {}
     329    public void refreshDescendantEncodings() {}
     330   
     331   
    330332    public void setModel(FileSystemModel model) {
    331333    this.model = model;
Note: See TracChangeset for help on using the changeset viewer.