[23331] | 1 | import java.io.*;
|
---|
| 2 | import java.lang.*;
|
---|
| 3 | import java.net.*;
|
---|
| 4 |
|
---|
| 5 | public class DirList
|
---|
| 6 | {
|
---|
| 7 |
|
---|
| 8 |
|
---|
| 9 | public static String raw_filename_to_url_encoded(String fileName)
|
---|
| 10 | {
|
---|
| 11 | String urlEncoded = "";
|
---|
| 12 | try {
|
---|
| 13 | byte[] bytes = fileName.getBytes();
|
---|
| 14 |
|
---|
| 15 | for(int i = 0; i < bytes.length; i++) {
|
---|
| 16 | // mask each byte (by applying & 0xFF) to make the signed
|
---|
| 17 | // byte (in the range -128 to 127) unsigned (in the range
|
---|
| 18 | // 0 to 255).
|
---|
| 19 |
|
---|
| 20 | int byteVal = (int)(bytes[i] & 0xFF);
|
---|
| 21 |
|
---|
| 22 | if(byteVal > 127) {
|
---|
| 23 | urlEncoded += String.format("%%%02X", (int)byteVal);
|
---|
| 24 | } else {
|
---|
| 25 | urlEncoded += String.format("%c",(char)byteVal);
|
---|
| 26 | }
|
---|
| 27 | }
|
---|
| 28 | }
|
---|
| 29 | catch (Exception e) {
|
---|
| 30 | e.printStackTrace();
|
---|
| 31 | }
|
---|
| 32 |
|
---|
| 33 | return urlEncoded;
|
---|
| 34 | }
|
---|
| 35 |
|
---|
| 36 | public static String iso_8859_1_filename_to_url_encoded(String raw_bytes_filename)
|
---|
| 37 | {
|
---|
| 38 | String urlEncoded = "";
|
---|
| 39 |
|
---|
| 40 | try {
|
---|
| 41 | // By this point we have a UTF-8 encoded string that captures
|
---|
| 42 | // what the ISO-8859-1 (Latin-1) character is that corresponded to the
|
---|
| 43 | // 8-bit numeric value for that character in the filename
|
---|
| 44 | // on the file system
|
---|
| 45 |
|
---|
| 46 | // For example:
|
---|
| 47 | // File system char: <lower-case beta char in Latin-7> = %E2
|
---|
| 48 | // Equivalent Latin 1 char: <lower-case a with circumflex> = %E2
|
---|
| 49 | // Mapped to UTF-8: <lower-case a with circumflex> = <C3><A2>
|
---|
| 50 |
|
---|
| 51 | // Our task is to take the string the contains <C3><A2> and ensure that
|
---|
| 52 | // we "see" it as <E2>
|
---|
| 53 |
|
---|
| 54 | byte [] raw_bytes = raw_bytes_filename.getBytes("ISO-8859-1");
|
---|
| 55 | String unicode_filename = new String(raw_bytes,"UTF-8");
|
---|
| 56 |
|
---|
| 57 | for(int i = 0; i < unicode_filename.length(); i++) {
|
---|
| 58 | char charVal = unicode_filename.charAt(i);
|
---|
| 59 | if((int)charVal > 127) {
|
---|
| 60 | urlEncoded += String.format("%%%02X", (int)charVal);
|
---|
| 61 | } else {
|
---|
| 62 | urlEncoded += String.format("%c",(char)charVal);
|
---|
| 63 | }
|
---|
| 64 | }
|
---|
| 65 | }
|
---|
| 66 | catch (Exception e) {
|
---|
| 67 | e.printStackTrace();
|
---|
| 68 | }
|
---|
| 69 |
|
---|
| 70 | return urlEncoded;
|
---|
| 71 | }
|
---|
| 72 |
|
---|
| 73 |
|
---|
| 74 | public static void main(String[] args)
|
---|
| 75 | {
|
---|
| 76 |
|
---|
| 77 | String file_encoding = System.getProperty("file.encoding");
|
---|
| 78 | String file_encoding_pkg = System.getProperty("file.encoding.pkg");
|
---|
| 79 |
|
---|
[23332] | 80 | String sun_io_unicode_encoding = System.getProperty("sun.io.unicode.encoding");
|
---|
| 81 | String sun_jnu_encoding = System.getProperty("sun.jnu.encoding");
|
---|
| 82 |
|
---|
[23331] | 83 | System.out.println("===========================");
|
---|
[23332] | 84 | System.out.println("file.encoding: " + file_encoding);
|
---|
| 85 | System.out.println("file.encoding.pkg: " + file_encoding_pkg);
|
---|
| 86 | System.out.println("sun_io_unicode_encoding: " + sun_io_unicode_encoding);
|
---|
| 87 | System.out.println("sun_jnu_encoding: " + sun_jnu_encoding);
|
---|
[23331] | 88 | System.out.println("===========================");
|
---|
| 89 |
|
---|
| 90 | if (args.length>0) {
|
---|
| 91 |
|
---|
| 92 | File folder = new File(args[0]);
|
---|
| 93 | File[] listOfFiles = folder.listFiles();
|
---|
| 94 |
|
---|
| 95 | for (int i = 0; i < listOfFiles.length; i++) {
|
---|
| 96 |
|
---|
| 97 | File dir_or_file = listOfFiles[i];
|
---|
| 98 | URI df_uri = dir_or_file.toURI();
|
---|
| 99 | String df_ascii = df_uri.toASCIIString();
|
---|
| 100 | System.out.println("Before file test, toASCII URI: " + df_ascii);
|
---|
| 101 |
|
---|
| 102 | if (listOfFiles[i].isFile()) {
|
---|
| 103 | File file = listOfFiles[i];
|
---|
| 104 | URI filename_uri = file.toURI();
|
---|
| 105 |
|
---|
| 106 | try {
|
---|
| 107 | // The trick:
|
---|
| 108 | // 1. toASCIIString() will %xx encode values > 127
|
---|
| 109 | // 2. Decode the result to "ISO-8859-1"
|
---|
| 110 | // 3. URL encode the bytes to string
|
---|
| 111 |
|
---|
| 112 | // Step 2 forces the string to be 8-bit values. It
|
---|
| 113 | // doesn't matter if the starting raw filename was *not*
|
---|
| 114 | // in the ISO-8859-1 encoding, the effect is to ensure
|
---|
| 115 | // we have an 8-bit byte string that (numerically)
|
---|
| 116 | // captures the right value. These numerical values are
|
---|
| 117 | // then used to determine how to URL encode it
|
---|
| 118 |
|
---|
| 119 | String filename_ascii = filename_uri.toASCIIString();
|
---|
| 120 | String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1");
|
---|
| 121 | String filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes);
|
---|
| 122 |
|
---|
| 123 | filename_url_encoded = filename_url_encoded.replaceAll(" ","%20");
|
---|
| 124 |
|
---|
| 125 | System.out.println("File " + filename_url_encoded);
|
---|
| 126 |
|
---|
| 127 | try {
|
---|
| 128 | File test_file = new File(new URI(filename_url_encoded.replaceAll(" ","%20")));
|
---|
| 129 | if (test_file.exists()) {
|
---|
| 130 | System.out.println(" ... and I can see it!!!");
|
---|
| 131 | }
|
---|
| 132 | }
|
---|
| 133 | catch (Exception e) {
|
---|
| 134 | e.printStackTrace();
|
---|
| 135 | }
|
---|
| 136 |
|
---|
| 137 |
|
---|
| 138 | }
|
---|
| 139 | catch (Exception e) {
|
---|
| 140 | e.printStackTrace();
|
---|
| 141 | }
|
---|
| 142 | } else if (listOfFiles[i].isDirectory()) {
|
---|
| 143 | File sub_folder = listOfFiles[i];
|
---|
| 144 | System.out.println("Directory " + sub_folder.getName());
|
---|
| 145 | }
|
---|
| 146 | }
|
---|
| 147 | }
|
---|
| 148 | }
|
---|
| 149 |
|
---|
| 150 | }
|
---|