source: test-collections/trunk/filename-encodings/bin/script/DirList.java@ 23830

Last change on this file since 23830 was 23332, checked in by davidb, 13 years ago

Additional properties printed out

File size: 4.4 KB
Line 
1import java.io.*;
2import java.lang.*;
3import java.net.*;
4
5public class DirList
6{
7
8
9 public static String raw_filename_to_url_encoded(String fileName)
10 {
11 String urlEncoded = "";
12 try {
13 byte[] bytes = fileName.getBytes();
14
15 for(int i = 0; i < bytes.length; i++) {
16 // mask each byte (by applying & 0xFF) to make the signed
17 // byte (in the range -128 to 127) unsigned (in the range
18 // 0 to 255).
19
20 int byteVal = (int)(bytes[i] & 0xFF);
21
22 if(byteVal > 127) {
23 urlEncoded += String.format("%%%02X", (int)byteVal);
24 } else {
25 urlEncoded += String.format("%c",(char)byteVal);
26 }
27 }
28 }
29 catch (Exception e) {
30 e.printStackTrace();
31 }
32
33 return urlEncoded;
34 }
35
36 public static String iso_8859_1_filename_to_url_encoded(String raw_bytes_filename)
37 {
38 String urlEncoded = "";
39
40 try {
41 // By this point we have a UTF-8 encoded string that captures
42 // what the ISO-8859-1 (Latin-1) character is that corresponded to the
43 // 8-bit numeric value for that character in the filename
44 // on the file system
45
46 // For example:
47 // File system char: <lower-case beta char in Latin-7> = %E2
48 // Equivalent Latin 1 char: <lower-case a with circumflex> = %E2
49 // Mapped to UTF-8: <lower-case a with circumflex> = <C3><A2>
50
51 // Our task is to take the string the contains <C3><A2> and ensure that
52 // we "see" it as <E2>
53
54 byte [] raw_bytes = raw_bytes_filename.getBytes("ISO-8859-1");
55 String unicode_filename = new String(raw_bytes,"UTF-8");
56
57 for(int i = 0; i < unicode_filename.length(); i++) {
58 char charVal = unicode_filename.charAt(i);
59 if((int)charVal > 127) {
60 urlEncoded += String.format("%%%02X", (int)charVal);
61 } else {
62 urlEncoded += String.format("%c",(char)charVal);
63 }
64 }
65 }
66 catch (Exception e) {
67 e.printStackTrace();
68 }
69
70 return urlEncoded;
71 }
72
73
74 public static void main(String[] args)
75 {
76
77 String file_encoding = System.getProperty("file.encoding");
78 String file_encoding_pkg = System.getProperty("file.encoding.pkg");
79
80 String sun_io_unicode_encoding = System.getProperty("sun.io.unicode.encoding");
81 String sun_jnu_encoding = System.getProperty("sun.jnu.encoding");
82
83 System.out.println("===========================");
84 System.out.println("file.encoding: " + file_encoding);
85 System.out.println("file.encoding.pkg: " + file_encoding_pkg);
86 System.out.println("sun_io_unicode_encoding: " + sun_io_unicode_encoding);
87 System.out.println("sun_jnu_encoding: " + sun_jnu_encoding);
88 System.out.println("===========================");
89
90 if (args.length>0) {
91
92 File folder = new File(args[0]);
93 File[] listOfFiles = folder.listFiles();
94
95 for (int i = 0; i < listOfFiles.length; i++) {
96
97 File dir_or_file = listOfFiles[i];
98 URI df_uri = dir_or_file.toURI();
99 String df_ascii = df_uri.toASCIIString();
100 System.out.println("Before file test, toASCII URI: " + df_ascii);
101
102 if (listOfFiles[i].isFile()) {
103 File file = listOfFiles[i];
104 URI filename_uri = file.toURI();
105
106 try {
107 // The trick:
108 // 1. toASCIIString() will %xx encode values > 127
109 // 2. Decode the result to "ISO-8859-1"
110 // 3. URL encode the bytes to string
111
112 // Step 2 forces the string to be 8-bit values. It
113 // doesn't matter if the starting raw filename was *not*
114 // in the ISO-8859-1 encoding, the effect is to ensure
115 // we have an 8-bit byte string that (numerically)
116 // captures the right value. These numerical values are
117 // then used to determine how to URL encode it
118
119 String filename_ascii = filename_uri.toASCIIString();
120 String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1");
121 String filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes);
122
123 filename_url_encoded = filename_url_encoded.replaceAll(" ","%20");
124
125 System.out.println("File " + filename_url_encoded);
126
127 try {
128 File test_file = new File(new URI(filename_url_encoded.replaceAll(" ","%20")));
129 if (test_file.exists()) {
130 System.out.println(" ... and I can see it!!!");
131 }
132 }
133 catch (Exception e) {
134 e.printStackTrace();
135 }
136
137
138 }
139 catch (Exception e) {
140 e.printStackTrace();
141 }
142 } else if (listOfFiles[i].isDirectory()) {
143 File sub_folder = listOfFiles[i];
144 System.out.println("Directory " + sub_folder.getName());
145 }
146 }
147 }
148 }
149
150}
Note: See TracBrowser for help on using the repository browser.