Ignore:
Timestamp:
2010-12-09T22:27:33+13:00 (13 years ago)
Author:
ak19
Message:

GLI now has a gs.FilenameEncoding metadata field which appears like all the others in GLI's EnrichPane, but is unique in that this metadata (once set, changed or removed) must be applied to the affected filenames in the Collection Tree. More importantly, the changes made for this are to allow GLI's java code to interact with the recent changes to Perl where strings were made unicode-aware (for proper regex matching) but which required other changes elsewhere. To still support filenames with different encodings Perl used URL encoded versions of filenames representing characters' code point values in URL encoding. This required that GLI write out URL encoded filenames to the metadata.xml files that are associated with each folder level of a collection, so that Perl can read them. In this way, they can both speak of the same filenames. Only works on unicode 16 (such as latin-1), non-UTF8 systems. The latter is a requirement since Java uses the filesystem encoding from startup. If it is UTF8, non-recognised characters are replaced by the invalid char for UTF8. This process being destructive, we can't get the original filenames' bytecodes back. The changes made to GLI will work on Windows which is UTF-16 (windows codepage 1252), presumably also Macs (some kind of UTF-16) and also works on Native Latin 1 Linux systems. UTF-8 Linux systems need to be reconfigured to Native Latin-1, or if not installed, an administrator can install it easily.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/collection/CollectionTreeNode.java

    r16838 r23433  
    3232import org.greenstone.gatherer.cdm.CollectionDesignManager;
    3333import org.greenstone.gatherer.file.FileNode;
     34import org.greenstone.gatherer.metadata.FilenameEncoding;
    3435import org.greenstone.gatherer.util.JarTools;
    3536import java.util.Set;
    3637import java.util.Iterator;
    37 import java.nio.charset.Charset;
    3838
    3939
     
    5454    {
    5555    super(file);
     56        // the super call will additionally call calcDisplayString() which will get any
     57        // applicable the filename encoding and apply it to the file's name for display.
    5658
    5759    this.is_explodable = CollectionDesignManager.plugin_manager.isFileExplodable(file);
     
    8183    return is_srcreplaceable;
    8284    }
     85
     86    /** This method returns a string representation of the filenodes in the
     87    * <i>Collection</i> Tree, that can then be displayed in the tree. If the
     88    * filename encoding is specified as metadata with the file, then this
     89    * method will apply that to the filename's bytes to get the displayName.
     90    */
     91    protected String calcDisplayString() {     
     92        // metadata.xml files in collections don't get displayed anyway
     93        if(file.getName().equals("metadata.xml")) {
     94            return super.calcDisplayString();
     95        }
     96       
     97        if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
     98            return super.calcDisplayString();
     99        }
     100
     101        // Else: try to encode the display string from the
     102        // file_to_encoding map. If that fails, just use the locale
     103        String displayName = null;
     104        String urlEncodedPath = getURLEncodedFilePath();
     105        String encoding = FilenameEncoding.findFilenameEncoding(file, urlEncodedPath, false);
     106
     107        // if it isn't the same as the current encoding already applied, reapply
     108        // it which will set the filenameEncoding again as well
     109        if(!encoding.equals(getFilenameEncoding())) {
     110            displayName = reencodeDisplayName(encoding); // may return null
     111        }
     112       
     113        if(displayName == null) {
     114            if(FilenameEncoding.DEBUGGING) {
     115                displayName = getURLEncodedFileName();
     116            } else {
     117                displayName = super.calcDisplayString();
     118            }
     119        }
     120       
     121        return displayName;
     122    }
     123
     124    /** Call this if the filename encoding has changed and needs to be
     125    * recalculated and re-applied to the filename. This is only for display,
     126    * so it only gets applied to the urlEncodedFileName (not urlEncodedFilePath).
     127    * Note that the filenameEncoding may not be the same as the given encoding
     128    * at the end of this method (in case it was a charset alias of the encoding).
     129    * This method both sets and returns the displayFileName member variable.
     130    * @return the reencoded display name, if the encoding was a recognised alias
     131    * in which case the filenameEncoding will store the canonical name.  */
     132    public String reencodeDisplayName(String encoding) {
     133        if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
     134            return displayFileName; // may still be null
     135        }
     136
     137        filenameEncoding = encoding; // clear previous value
     138
     139        if(filenameEncoding.equals("")) {
     140            displayFileName = super.calcDisplayString(); // *FileNode* calcDisplayString
     141            return displayFileName;
     142        }
     143
     144        try{
     145            displayFileName = new String(file.getName().getBytes(), filenameEncoding);
     146        } catch(Exception e) {
     147            // IllegalCharsetName-, UnsupportedCharset- or UnsupportedEncoding-Exception
     148            // Store the unsupported encoding, but display with filesystem (or URL) encoding
     149            filenameEncoding = encoding;
     150           
     151            if(FilenameEncoding.DEBUGGING) {
     152                displayFileName = getURLEncodedFileName();
     153            } else {
     154                displayFileName = super.calcDisplayString();
     155            }
     156        }
     157        return displayFileName;
     158    }
     159       
     160    /** Can call this upon refreshing a FileNode (this CollectionTreeNode). It makes
     161    * sure the display names of the visible nodes in the CollectionTree aren't stale */
     162    public void refreshDescendantEncodings() {
     163        // now recalculate encodings for all visible children
     164       
     165        // Don't bother with the encoding stuff when multiple filename encodings
     166        // are not supported because the system is UTF-8 (not Native-Latin-1) and
     167        // Java interprets all filename bytes as UTF-8, which destructively
     168        // converts unrecognised characters (for UTF-8) into the invalid character.
     169
     170        if(!FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED
     171                || child_nodes == null)
     172        {
     173            return;
     174        }       
     175       
     176        // get and apply the filename encoding, if any
     177        for(int i = 0; i < child_nodes.size(); i++) {
     178            CollectionTreeNode child_node = (CollectionTreeNode) child_nodes.get(i);           
     179            String urlEncodedPath = child_node.getURLEncodedFilePath();     
     180           
     181            String encoding = FilenameEncoding.findFilenameEncoding(
     182                    child_node.getFile(), urlEncodedPath, false);
     183           
     184            // if current encoding is different from the existing one, re-apply encoding
     185            if(!child_node.getFilenameEncoding().equals(encoding)) {
     186            child_node.reencodeDisplayName(encoding);
     187            }
     188        }
     189    }
     190   
     191    // Unused at present
     192    /** Call when the filename encoding of this folder level fileNode has changed
     193    * and therefore the filename encodings of the descendant fileNodes have to be
     194    * reset (their entries in the file_to_encoding hashmap cleared), so that we know
     195    * to recalculate these later.  Call when deleting, moving or renaming col nodes */
     196    public void resetDescendantEncodings() {
     197        if(FilenameEncoding.MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
     198            resetDescendantEncodings(this.file, this.getURLEncodedFilePath());
     199           
     200            // for an actual file/folder delete, want to remove all corresponding
     201            // entries from the hashtable without recalculating any encodings
     202            // (files have been deleted, so no there will be no filenames to display).
     203            // So don't do this here: refreshDescendantEncodings();
     204            // It's done near the end of FileNode.map().
     205        }
     206    }
     207   
     208    // Together with the above, unused at present
     209    private static void resetDescendantEncodings(File f, String urlEncodedPath)
     210    {
     211        // remove this file f's urlencoded path name from the file_to_encoding Map
     212        FilenameEncoding.map.remove(urlEncodedPath);
     213           
     214        if(f.isDirectory()) {
     215            File[] children = f.listFiles();       
     216            for(int i = 0; i < children.length; i++) {
     217                urlEncodedPath = FilenameEncoding.fileToURLEncoding(children[i]);
     218                resetDescendantEncodings(children[i], urlEncodedPath);
     219                        // sets filenameEncoding var
     220            }
     221        }
     222    }
     223
    83224}
Note: See TracChangeset for help on using the changeset viewer.