Ignore:
Timestamp:
2019-12-03T21:06:44+13:00 (4 years ago)
Author:
ak19
Message:

Linux bugfixes to recent commits to do with getting file-level meta assigned to non-ascii filenames or filenames containing plus/ampersand signs to work. Cumulative past commits were sufficient for fixing these issues on Windows. All those changes plus the current ones get it all working on Linux too.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/FilenameEncoding.java

    r33747 r33748  
    7676    /** Compiled pattern for hex entities of characters. These are of the forn "&#x....;" with 1 to 4 digits */
    7777    public static final Pattern HEX_PATTERN = Pattern.compile("(&#x[0-9a-zA-Z]{1,4}+;)");
    78    
     78
     79    /** The hex entity version of the ampersand character.
     80     * We use this in place of the ampersand character in filenames in metadata.xml files to
     81     * preserve the reference to the literal ampersand in the real file name on the file system.
     82     */
     83    public static final String HEX_ENTITY_AMPERSAND = FilenameEncoding.hexEntityForChar("&"); //"&";
     84   
    7985
    8086//*********************** BUSY REFRESHING / REQUIRING  REFRESH *********************
     
    371377   
    372378    public static String fileToURLEncoding(File file) {
     379    // on a UTF-8 file system, DO NOT do the stuff further below,
     380    // just return input filename param, but with any & in the filename replaced with its hex entity
    373381        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
    374             return file.getAbsolutePath();
     382            // protect ampersands in filenames by converting it to its hex entity
     383            String filepath = file.getAbsolutePath();
     384            filepath = filepath.replace("&", HEX_ENTITY_AMPERSAND);
     385            return filepath;
    375386        }
    376387       
     
    419430           
    420431            // Before proceeding, protect & in the filename too.
    421             // &'s ASCII code is 36 in decimal, and 26 in hex, so replace with &
     432            // &'s ASCII code is 36 in decimal, and 26 in hex, so replace with & (HEX_ENTITY_AMPERSAND)
    422433            // But dangerous to do simple replace if there are &#x...; entities in the filename already!
    423434            // That is, we'll want to protect & by replacing with &'s hex value, but we don't want to replace the & in "&#x....;" with the same!
     
    434445            //filename_url_encoded = filename_url_encoded.replace("%2B", "+"); // Don't do this, won't get regex escaped when converted back to a + by caller
    435446            filename_url_encoded = filename_url_encoded.replace("%2B", "+"); // + signs are special, as they will need to be escaped since the caller wants the filename representing a regex
    436             filename_url_encoded = filename_url_encoded.replace("%26", "&"); // convert URL encoding for ampersand into hex entity for ampersand
     447            filename_url_encoded = filename_url_encoded.replace("%26", HEX_ENTITY_AMPERSAND); // convert URL encoding for ampersand into hex entity for ampersand
    437448        }
    438449        catch (Exception e) {
     
    530541    */
    531542    public static String fullFilepathToURLEncoding(String filename) {
    532         if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param
    533             return filename;
     543        // on a UTF-8 file system, DO NOT do the stuff further below,
     544        // just return input filename param, but with any & in the filename replaced with its hex entity
     545        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
     546            return filename.replace("&", HEX_ENTITY_AMPERSAND);
    534547        }
    535548       
     
    554567    public static String relativeFilenameToURLEncoding(String filename) {
    555568        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param
    556             return filename;
     569            return filename.replace("&", HEX_ENTITY_AMPERSAND);
    557570        }
    558571       
     
    567580    public static String filenameToURLEncodingWithPrefixRemoved(String filename, String removeFilePathPrefix) {
    568581        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param
    569             return filename;
     582            return filename.replace("&", HEX_ENTITY_AMPERSAND);
    570583        }
    571584       
Note: See TracChangeset for help on using the changeset viewer.