1 | import java.io.*;
|
---|
2 | import java.lang.*;
|
---|
3 | import java.net.*;
|
---|
4 |
|
---|
5 | public class DirList
|
---|
6 | {
|
---|
7 |
|
---|
8 | public static String raw_filename_to_url_encoded(String fileName)
|
---|
9 | {
|
---|
10 | String urlEncoded = "";
|
---|
11 | try {
|
---|
12 | byte[] bytes = fileName.getBytes();
|
---|
13 |
|
---|
14 | for(int i = 0; i < bytes.length; i++) {
|
---|
15 | // mask each byte (by applying & 0xFF) to make the signed
|
---|
16 | // byte (in the range -128 to 127) unsigned (in the range
|
---|
17 | // 0 to 255).
|
---|
18 |
|
---|
19 | int byteVal = (int)(bytes[i] & 0xFF);
|
---|
20 |
|
---|
21 | if(byteVal > 127) {
|
---|
22 | urlEncoded += String.format("%%%02X", (int)byteVal);
|
---|
23 | } else {
|
---|
24 | urlEncoded += String.format("%c",(char)byteVal);
|
---|
25 | }
|
---|
26 | }
|
---|
27 | }
|
---|
28 | catch (Exception e) {
|
---|
29 | e.printStackTrace();
|
---|
30 | }
|
---|
31 |
|
---|
32 | return urlEncoded;
|
---|
33 | }
|
---|
34 |
|
---|
35 | // For unicode codepoints see:
|
---|
36 | // http://unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT for ISO8859-1 (Latin-1)
|
---|
37 | // where 0xE2 maps to codepoint 0x00E2 and is defined as "Latin small letter a with circumflex"
|
---|
38 | // http://unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT for ISO8859-7 (Greek)
|
---|
39 | // where 0xE2 maps to codepoint 0x03B2 and is defined as "Greek small letter beta"
|
---|
40 |
|
---|
41 | public static String iso_8859_1_filename_to_url_encoded(String raw_bytes_filename)
|
---|
42 | {
|
---|
43 | String urlEncoded = "";
|
---|
44 |
|
---|
45 | try {
|
---|
46 | // By this point we have a UTF-8 encoded string that captures
|
---|
47 | // what the ISO-8859-1 (Latin-1) character is that corresponded to the
|
---|
48 | // 8-bit numeric value for that character in the filename
|
---|
49 | // on the file system
|
---|
50 |
|
---|
51 | // For example:
|
---|
52 | // File system char: <lower-case beta char in Latin-7> = %E2
|
---|
53 | // Equivalent Latin 1 char: <lower-case a with circumflex> = %E2
|
---|
54 | // Mapped to UTF-8: <lower-case a with circumflex> = <C3><A2>
|
---|
55 |
|
---|
56 | // Our task is to take the string the contains <C3><A2> and ensure that
|
---|
57 | // we "see" it as <E2>
|
---|
58 |
|
---|
59 | byte [] raw_bytes = raw_bytes_filename.getBytes("ISO-8859-1");
|
---|
60 | String unicode_filename = new String(raw_bytes,"UTF-8");
|
---|
61 |
|
---|
62 | for(int i = 0; i < unicode_filename.length(); i++) {
|
---|
63 | char charVal = unicode_filename.charAt(i);
|
---|
64 | if((int)charVal > 127) {
|
---|
65 | urlEncoded += String.format("%%%02X", (int)charVal);
|
---|
66 | } else {
|
---|
67 | urlEncoded += String.format("%c",(char)charVal);
|
---|
68 | }
|
---|
69 | }
|
---|
70 | }
|
---|
71 | catch (Exception e) {
|
---|
72 | e.printStackTrace();
|
---|
73 | }
|
---|
74 |
|
---|
75 | return urlEncoded;
|
---|
76 | }
|
---|
77 |
|
---|
78 |
|
---|
79 | public static void main(String[] args)
|
---|
80 | {
|
---|
81 | if(args.length < 1) {
|
---|
82 | System.out.println("Usage: DirList <directory>");
|
---|
83 | System.exit(-1);
|
---|
84 | }
|
---|
85 |
|
---|
86 |
|
---|
87 | File folder = new File(args[0]);
|
---|
88 | File[] listOfFiles = folder.listFiles();
|
---|
89 |
|
---|
90 | for (int i = 0; i < listOfFiles.length; i++) {
|
---|
91 | if (listOfFiles[i].isFile()) {
|
---|
92 | File file = listOfFiles[i];
|
---|
93 | URI filename_uri = file.toURI();
|
---|
94 | try {
|
---|
95 | // The trick:
|
---|
96 | // 1. toASCIIString() will %xx encode values > 127
|
---|
97 | // 2. Decode the result to "ISO-8859-1"
|
---|
98 | // 3. URL encode the bytes to string
|
---|
99 |
|
---|
100 | // Step 2 forces the string to be 8-bit values. It
|
---|
101 | // doesn't matter if the starting raw filename was *not*
|
---|
102 | // in the ISO-8859-1 encoding, the effect is to ensure
|
---|
103 | // we have an 8-bit byte string that (numerically)
|
---|
104 | // captures the right value. These numerical values are
|
---|
105 | // then used to determine how to URL encode it
|
---|
106 |
|
---|
107 | String filename_ascii = filename_uri.toASCIIString();
|
---|
108 | String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1");
|
---|
109 | String filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes);
|
---|
110 |
|
---|
111 | filename_url_encoded = filename_url_encoded.replaceAll(" ","%20");
|
---|
112 |
|
---|
113 | System.out.println("File " + filename_url_encoded);
|
---|
114 |
|
---|
115 | try {
|
---|
116 | File test_file = new File(new URI(filename_url_encoded.replaceAll(" ","%20")));
|
---|
117 | if (test_file.exists()) {
|
---|
118 | System.out.println(" ... and I can see it!!!");
|
---|
119 | }
|
---|
120 | }
|
---|
121 | catch (Exception e) {
|
---|
122 | e.printStackTrace();
|
---|
123 | }
|
---|
124 |
|
---|
125 |
|
---|
126 | }
|
---|
127 | catch (Exception e) {
|
---|
128 | e.printStackTrace();
|
---|
129 | }
|
---|
130 | } else if (listOfFiles[i].isDirectory()) {
|
---|
131 | File sub_folder = listOfFiles[i];
|
---|
132 | System.out.println("Directory " + sub_folder.getName());
|
---|
133 | } else {
|
---|
134 | System.out.println("*** Not file or dir. Can't see file: " + listOfFiles[i]);
|
---|
135 | }
|
---|
136 | }
|
---|
137 | }
|
---|
138 |
|
---|
139 | }
|
---|