Ignore:
Timestamp:
2019-12-03T15:04:32+13:00 (4 years ago)
Author:
ak19
Message:

Refactored code to do more inside functions rather than make callers do extra, specific work that requires knowledge of what happened (and didn't happen) inside the called functions. Shifted the new functions about until they appear after Dr Bainbridge's more important functions.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/metadata/FilenameEncoding.java

    r33739 r33744  
    438438        return str;
    439439    }
    440    
    441    
    442     // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter
    443     public static String UNUSED_filenameToURLEncoding(String filename) {
    444         if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
    445             return filename;
    446         }
    447 
    448          // Can't create a URI out of a filename containing spaces. Spaces must be encoded as %20
    449         String filename_url_encoded = filename.replace(" ", "%20");
    450         //filename_url_encoded = filename_url_encoded.replace("&", "%26"); // &'s ASCII code is 36 in decimal, and 26 in hex
    451         //filename_url_encoded = filename_url_encoded.replace("+", "%2B"); // +'s ASCII code is 43 decimal, 2b in hex, 2B when uppercased
    452 
    453         try {
    454             URI filename_uri = new URI(filename_url_encoded);
    455             // The trick:
    456             //  1. toASCIIString() will %xx encode values > 127
    457             //  2. Decode the result to "ISO-8859-1"
    458             //  3. URL encode the bytes to string
    459            
    460             // Step 2 forces the string to be 8-bit values.  It
    461             // doesn't matter if the starting raw filename was *not*
    462             // in the ISO-8859-1 encoding, the effect is to ensure
    463             // we have an 8-bit byte string that (numerically)
    464             // captures the right value.  These numerical values are
    465             // then used to determine how to URL encode it
    466            
    467             String filename_ascii = filename_uri.toASCIIString();
    468             //filename_ascii = filename_ascii.replace("&", "%26"); // &'s ASCII code is 36 in decimal, and 26 in hex
    469             //filename_ascii = filename_ascii.replace("+", "%2B"); // +'s ASCII code is 43 decimal, 2b in hex, 2B when uppercased
    470             String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1");
    471             filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes);
    472            
    473             // DEALING WITH & and + in filenames: NOT WORKING YET
    474             //if(filename_url_encoded.contains("&")) {
    475             //  filename_url_encoded = filename_url_encoded.replace("&", "%36amp;");
    476             //} else if(filename_url_encoded.contains("&")) {
    477             //  filename_url_encoded = filename_url_encoded.replace("&", "%36");
    478             //}
    479            
    480         }
    481         catch (Exception e) {
    482             e.printStackTrace();
    483             // Give up trying to convert
    484             filename_url_encoded = filename;
    485         }
    486         return filename_url_encoded;
    487     }
    488    
    489    
    490     // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter   
    491     public static String filenameToURLEncoding(String filename) {
    492         if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param
    493             return filename;
    494         }
    495         File file = new File (filename);
    496         return fileToURLEncoding(file);
    497     }       
    498440
    499441   
     
    517459            return file.getAbsolutePath();
    518460        }
    519 
    520         // we'll want to protect & by replacing with &'s hex value
    521         // but we don't want to replace &#x....; with the same!
    522         Pattern plain_ampersand_not_hex_prefix_Pattern = Pattern.compile("&[^#]");
    523        
    524        
    525         int containsAmp = 0;
    526         if(file.getName().contains("&")) {
    527             System.err.println("@@@ 1 to encode " + file.getName());
    528             containsAmp = 1;
    529         } else if(file.getName().contains("&")) {
    530             System.err.println("@@@ 2 to encode " + file.getName());
    531             containsAmp = 2;
    532         } else {
    533             System.err.println("@@@ 0 to encode " + file.getName());
    534         }
    535        
    536461       
    537462        String filename_url_encoded = "";
     
    571496           
    572497            String filename_ascii = filename_uri.toASCIIString();
    573             // protect & and + in the filename too
    574             filename_ascii = escapeAllCharWithHexEntity(filename_ascii, '&');
    575            
    576             if(containsAmp > 0) System.err.println("@@@ filename_ascii: " + filename_ascii);
    577            
    578            
    579             //if(containsAmp > 0) System.err.println("@@@ filename_ascii with hexed &: " + filename_ascii);
     498           
     499            // Before proceeding, protect & in the filename too.
     500            // &'s ASCII code is 36 in decimal, and 26 in hex, so replace with &
     501            // But dangerous to do simple replace if there are &#x...; entities in the filename already!
     502            // That is, we'll want to protect & by replacing with &'s hex value, but we don't want to replace the & in "&#x....;" with the same!
     503            //filename_url_encoded = filename_url_encoded.replace("&", "&x26;");// SO THIS IS BAD
     504            //filename_url_encoded = filename_url_encoded.replace("&", hexEntityForChar("&"));// SAME, STILL BAD
     505            filename_ascii = escapeAllCharWithHexEntity(filename_ascii, '&'); // Good: CAREFULLY replaces & that are not part of hex entities
     506           
     507           
    580508            String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1");         
    581509            filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes);
    582            
    583            
    584             //filename_url_encoded = filename_url_encoded.replace("&", "%26"); // &'s ASCII code is 36 in decimal, and 26 in hex
    585             //filename_ascii = filename_ascii.replace("+", "%2B"); // +'s ASCII code is 43 decimal, 2b in hex, 2B when uppercased
    586             //if(containsAmp > 0) System.err.println("@@@ filename_url_encoded: " + filename_url_encoded);
    587510           
    588511        }
     
    672595    }
    673596   
     597 // FURTHER HELPER METHODS
     598 
     599    /**
     600     * Produce the equivalent of method fileToURLEncoding(), but taking a String as input parameter.
     601     * If filename is relative, then the current directory (gli?) will be prefixed to what is returned
     602     * and should be removed manually by the caller. Alternatively, for relative paths, call the variant
     603     * relativeFilenameToURLEncoding(String), which will remove any added filepath prefix.
     604    */
     605    public static String fullFilepathToURLEncoding(String filename) {
     606        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param
     607            return filename;
     608        }
     609       
     610        File file = new File (filename);
     611        //return fileToURLEncoding(file);
     612        String filename_url_encoded = fileToURLEncoding(file);
     613       
     614        // if the current directory (".") was passed in as filename,
     615        // then the filename_url_encoded looks like /full/path/./
     616        // In that case, remove the ./ at the end
     617        if (filename_url_encoded.endsWith(FilenameEncoding.URL_FILE_SEPARATOR+"."+FilenameEncoding.URL_FILE_SEPARATOR)) {
     618            filename_url_encoded = filename_url_encoded.substring(0, filename_url_encoded.length()-2); // cut off /. at end
     619        }
     620       
     621        return filename_url_encoded;
     622    }
     623 
     624    /**
     625     * Produce the equivalent of method fileToURLEncoding(), but taking a String as input parameter
     626     * If filename is a relative path, call this method to get it specially URL encoded.
     627     * This method will remove the current directory that is prefixed as an intermediary step.
     628    */
     629    public static String relativeFilenameToURLEncoding(String filename) {
     630        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param
     631            return filename;
     632        }
     633       
     634        String curr_directory_path = FilenameEncoding.fullFilepathToURLEncoding(".");
     635        return filenameToURLEncodingWithPrefixRemoved(filename, curr_directory_path);
     636       
     637        /*
     638        String curr_directory_path = FilenameEncoding.fullFilepathToURLEncoding(".");       
     639        if (curr_directory_path.endsWith("."+FilenameEncoding.URL_FILE_SEPARATOR)) {
     640            curr_directory_path = curr_directory_path.substring(0, curr_directory_path.length()-2); // cut off /. at end           
     641        }
     642       
     643        File file = new File (filename);       
     644        String filename_url_encoded = fileToURLEncoding(file); // returns a full filepath
     645       
     646        // now lop off the current dir prefix that FilenameEncoding.filenameToURLEncoding(STRING) variant would have added
     647        filename_url_encoded = filename_url_encoded.substring(curr_directory_path.length());
     648        // remove any remaining slash prefix
     649        if (filename_url_encoded.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
     650            filename_url_encoded = filename_url_encoded.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
     651        }
     652       
     653        return filename_url_encoded;
     654        */
     655    }
     656   
     657    /**
     658     * Produce the equivalent of method fileToURLEncoding(), but taking a String as input parameter
     659     * Convenience method that will return the specially URL encoded version of filename
     660     * with the provided removeFilePathPrefix removed */
     661    public static String filenameToURLEncodingWithPrefixRemoved(String filename, String removeFilePathPrefix) {
     662        if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param
     663            return filename;
     664        }
     665       
     666        /*if (removeFilePathPrefix.endsWith("."+FilenameEncoding.URL_FILE_SEPARATOR)) {
     667            removeFilePathPrefix = removeFilePathPrefix.substring(0, removeFilePathPrefix.length()-2); // cut off /. at end     
     668        }
     669        */
     670       
     671        File file = new File (filename);       
     672        String filename_url_encoded = fileToURLEncoding(file); // returns a full filepath
     673       
     674        System.err.println("@@@ full url encoded filename: " + filename_url_encoded);
     675       
     676        // now lop off the given removeFilePathPrefix that FilenameEncoding.filenameToURLEncoding(STRING) variant would have added
     677        filename_url_encoded = filename_url_encoded.substring(removeFilePathPrefix.length());
     678        // remove any remaining slash prefix
     679        if (filename_url_encoded.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
     680            filename_url_encoded = filename_url_encoded.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
     681        }
     682       
     683        return filename_url_encoded;
     684    }
    674685}
Note: See TracChangeset for help on using the changeset viewer.