1 | import java.io.*;
|
---|
2 | import java.lang.*;
|
---|
3 | import java.net.*;
|
---|
4 |
|
---|
5 | public class DirList
|
---|
6 | {
|
---|
7 |
|
---|
8 |
|
---|
9 | public static String raw_filename_to_url_encoded(String fileName)
|
---|
10 | {
|
---|
11 | String urlEncoded = "";
|
---|
12 | try {
|
---|
13 | byte[] bytes = fileName.getBytes();
|
---|
14 |
|
---|
15 | for(int i = 0; i < bytes.length; i++) {
|
---|
16 | // mask each byte (by applying & 0xFF) to make the signed
|
---|
17 | // byte (in the range -128 to 127) unsigned (in the range
|
---|
18 | // 0 to 255).
|
---|
19 |
|
---|
20 | int byteVal = (int)(bytes[i] & 0xFF);
|
---|
21 |
|
---|
22 | if(byteVal > 127) {
|
---|
23 | urlEncoded += String.format("%%%02X", (int)byteVal);
|
---|
24 | } else {
|
---|
25 | urlEncoded += String.format("%c",(char)byteVal);
|
---|
26 | }
|
---|
27 | }
|
---|
28 | }
|
---|
29 | catch (Exception e) {
|
---|
30 | e.printStackTrace();
|
---|
31 | }
|
---|
32 |
|
---|
33 | return urlEncoded;
|
---|
34 | }
|
---|
35 |
|
---|
36 | public static String iso_8859_1_filename_to_url_encoded(String raw_bytes_filename)
|
---|
37 | {
|
---|
38 | String urlEncoded = "";
|
---|
39 |
|
---|
40 | try {
|
---|
41 | // By this point we have a UTF-8 encoded string that captures
|
---|
42 | // what the ISO-8859-1 (Latin-1) character is that corresponded to the
|
---|
43 | // 8-bit numeric value for that character in the filename
|
---|
44 | // on the file system
|
---|
45 |
|
---|
46 | // For example:
|
---|
47 | // File system char: <lower-case beta char in Latin-7> = %E2
|
---|
48 | // Equivalent Latin 1 char: <lower-case a with circumflex> = %E2
|
---|
49 | // Mapped to UTF-8: <lower-case a with circumflex> = <C3><A2>
|
---|
50 |
|
---|
51 | // Our task is to take the string the contains <C3><A2> and ensure that
|
---|
52 | // we "see" it as <E2>
|
---|
53 |
|
---|
54 | byte [] raw_bytes = raw_bytes_filename.getBytes("ISO-8859-1");
|
---|
55 | String unicode_filename = new String(raw_bytes,"UTF-8");
|
---|
56 |
|
---|
57 | for(int i = 0; i < unicode_filename.length(); i++) {
|
---|
58 | char charVal = unicode_filename.charAt(i);
|
---|
59 | if((int)charVal > 127) {
|
---|
60 | urlEncoded += String.format("%%%02X", (int)charVal);
|
---|
61 | } else {
|
---|
62 | urlEncoded += String.format("%c",(char)charVal);
|
---|
63 | }
|
---|
64 | }
|
---|
65 | }
|
---|
66 | catch (Exception e) {
|
---|
67 | e.printStackTrace();
|
---|
68 | }
|
---|
69 |
|
---|
70 | return urlEncoded;
|
---|
71 | }
|
---|
72 |
|
---|
73 |
|
---|
74 | public static void main(String[] args)
|
---|
75 | {
|
---|
76 |
|
---|
77 | String file_encoding = System.getProperty("file.encoding");
|
---|
78 | String file_encoding_pkg = System.getProperty("file.encoding.pkg");
|
---|
79 |
|
---|
80 | System.out.println("===========================");
|
---|
81 | System.out.println("file.encoding: " + file_encoding);
|
---|
82 | System.out.println("file.encoding.pkg: " + file_encoding_pkg);
|
---|
83 | System.out.println("===========================");
|
---|
84 |
|
---|
85 | if (args.length>0) {
|
---|
86 |
|
---|
87 | File folder = new File(args[0]);
|
---|
88 | File[] listOfFiles = folder.listFiles();
|
---|
89 |
|
---|
90 | for (int i = 0; i < listOfFiles.length; i++) {
|
---|
91 |
|
---|
92 | File dir_or_file = listOfFiles[i];
|
---|
93 | URI df_uri = dir_or_file.toURI();
|
---|
94 | String df_ascii = df_uri.toASCIIString();
|
---|
95 | System.out.println("Before file test, toASCII URI: " + df_ascii);
|
---|
96 |
|
---|
97 | if (listOfFiles[i].isFile()) {
|
---|
98 | File file = listOfFiles[i];
|
---|
99 | URI filename_uri = file.toURI();
|
---|
100 |
|
---|
101 | try {
|
---|
102 | // The trick:
|
---|
103 | // 1. toASCIIString() will %xx encode values > 127
|
---|
104 | // 2. Decode the result to "ISO-8859-1"
|
---|
105 | // 3. URL encode the bytes to string
|
---|
106 |
|
---|
107 | // Step 2 forces the string to be 8-bit values. It
|
---|
108 | // doesn't matter if the starting raw filename was *not*
|
---|
109 | // in the ISO-8859-1 encoding, the effect is to ensure
|
---|
110 | // we have an 8-bit byte string that (numerically)
|
---|
111 | // captures the right value. These numerical values are
|
---|
112 | // then used to determine how to URL encode it
|
---|
113 |
|
---|
114 | String filename_ascii = filename_uri.toASCIIString();
|
---|
115 | String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1");
|
---|
116 | String filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes);
|
---|
117 |
|
---|
118 | filename_url_encoded = filename_url_encoded.replaceAll(" ","%20");
|
---|
119 |
|
---|
120 | System.out.println("File " + filename_url_encoded);
|
---|
121 |
|
---|
122 | try {
|
---|
123 | File test_file = new File(new URI(filename_url_encoded.replaceAll(" ","%20")));
|
---|
124 | if (test_file.exists()) {
|
---|
125 | System.out.println(" ... and I can see it!!!");
|
---|
126 | }
|
---|
127 | }
|
---|
128 | catch (Exception e) {
|
---|
129 | e.printStackTrace();
|
---|
130 | }
|
---|
131 |
|
---|
132 |
|
---|
133 | }
|
---|
134 | catch (Exception e) {
|
---|
135 | e.printStackTrace();
|
---|
136 | }
|
---|
137 | } else if (listOfFiles[i].isDirectory()) {
|
---|
138 | File sub_folder = listOfFiles[i];
|
---|
139 | System.out.println("Directory " + sub_folder.getName());
|
---|
140 | }
|
---|
141 | }
|
---|
142 | }
|
---|
143 | }
|
---|
144 |
|
---|
145 | }
|
---|