import java.io.*; import java.lang.*; import java.net.*; public class DirList { public static String raw_filename_to_url_encoded(String fileName) { String urlEncoded = ""; try { byte[] bytes = fileName.getBytes(); for(int i = 0; i < bytes.length; i++) { // mask each byte (by applying & 0xFF) to make the signed // byte (in the range -128 to 127) unsigned (in the range // 0 to 255). int byteVal = (int)(bytes[i] & 0xFF); if(byteVal > 127) { urlEncoded += String.format("%%%02X", (int)byteVal); } else { urlEncoded += String.format("%c",(char)byteVal); } } } catch (Exception e) { e.printStackTrace(); } return urlEncoded; } public static String iso_8859_1_filename_to_url_encoded(String raw_bytes_filename) { String urlEncoded = ""; try { // By this point we have a UTF-8 encoded string that captures // what the ISO-8859-1 (Latin-1) character is that corresponded to the // 8-bit numeric value for that character in the filename // on the file system // For example: // File system char: = %E2 // Equivalent Latin 1 char: = %E2 // Mapped to UTF-8: = // Our task is to take the string the contains and ensure that // we "see" it as byte [] raw_bytes = raw_bytes_filename.getBytes("ISO-8859-1"); String unicode_filename = new String(raw_bytes,"UTF-8"); for(int i = 0; i < unicode_filename.length(); i++) { char charVal = unicode_filename.charAt(i); if((int)charVal > 127) { urlEncoded += String.format("%%%02X", (int)charVal); } else { urlEncoded += String.format("%c",(char)charVal); } } } catch (Exception e) { e.printStackTrace(); } return urlEncoded; } public static void main(String[] args) { String file_encoding = System.getProperty("file.encoding"); String file_encoding_pkg = System.getProperty("file.encoding.pkg"); String sun_io_unicode_encoding = System.getProperty("sun.io.unicode.encoding"); String sun_jnu_encoding = System.getProperty("sun.jnu.encoding"); System.out.println("==========================="); System.out.println("file.encoding: " + file_encoding); System.out.println("file.encoding.pkg: " + file_encoding_pkg); System.out.println("sun_io_unicode_encoding: " + sun_io_unicode_encoding); System.out.println("sun_jnu_encoding: " + sun_jnu_encoding); System.out.println("==========================="); if (args.length>0) { File folder = new File(args[0]); File[] listOfFiles = folder.listFiles(); for (int i = 0; i < listOfFiles.length; i++) { File dir_or_file = listOfFiles[i]; URI df_uri = dir_or_file.toURI(); String df_ascii = df_uri.toASCIIString(); System.out.println("Before file test, toASCII URI: " + df_ascii); if (listOfFiles[i].isFile()) { File file = listOfFiles[i]; URI filename_uri = file.toURI(); try { // The trick: // 1. toASCIIString() will %xx encode values > 127 // 2. Decode the result to "ISO-8859-1" // 3. URL encode the bytes to string // Step 2 forces the string to be 8-bit values. It // doesn't matter if the starting raw filename was *not* // in the ISO-8859-1 encoding, the effect is to ensure // we have an 8-bit byte string that (numerically) // captures the right value. These numerical values are // then used to determine how to URL encode it String filename_ascii = filename_uri.toASCIIString(); String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1"); String filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes); filename_url_encoded = filename_url_encoded.replaceAll(" ","%20"); System.out.println("File " + filename_url_encoded); try { File test_file = new File(new URI(filename_url_encoded.replaceAll(" ","%20"))); if (test_file.exists()) { System.out.println(" ... and I can see it!!!"); } } catch (Exception e) { e.printStackTrace(); } } catch (Exception e) { e.printStackTrace(); } } else if (listOfFiles[i].isDirectory()) { File sub_folder = listOfFiles[i]; System.out.println("Directory " + sub_folder.getName()); } } } } }