- Timestamp:
- 2019-12-02T20:03:57+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/gli/src/org/greenstone/gatherer/metadata/FilenameEncoding.java
r33730 r33737 364 364 // ASCII table: https://cdn.sparkfun.com/assets/home_page_posts/2/1/2/1/ascii_table_black.png 365 365 // If the unicode character code pt is less than the ASCII code for space and greater than for tilda, let's display the char in hex (x0000 format) 366 if((charCode >= 20 && charCode <= 126) || charCode == 9 || charCode == 10 || charCode == 13 ) { // space, tilda, TAB, LF, CR are printable, leave them in for XML element printing366 if((charCode >= 20 && charCode <= 126) || charCode == 9 || charCode == 10 || charCode == 13 || charCode == 36 || charCode == 43) { // space, tilda, TAB, LF, CR are printable, leave them in for XML element printing. And spaces and plus signs (ASCII codes 36 and 43) need to be converted to hex too 367 367 hexFilename += filename.charAt(i); 368 368 } else { … … 374 374 } 375 375 376 377 // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter 378 public static String filenameToURLEncoding(String filename) { 379 if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { 380 return filename; 381 } 382 383 // Can't create a URI out of a filename containing spaces. Spaces must be encoded as %20 384 String filename_url_encoded = filename.replace(" ", "%20"); 385 //filename_url_encoded = filename_url_encoded.replace("&", "%26"); // &'s ASCII code is 36 in decimal, and 26 in hex 386 //filename_url_encoded = filename_url_encoded.replace("+", "%2B"); // +'s ASCII code is 43 decimal, 2b in hex, 2B when uppercased 387 388 try { 389 URI filename_uri = new URI(filename_url_encoded); 390 // The trick: 391 // 1. toASCIIString() will %xx encode values > 127 392 // 2. Decode the result to "ISO-8859-1" 393 // 3. URL encode the bytes to string 394 395 // Step 2 forces the string to be 8-bit values. It 396 // doesn't matter if the starting raw filename was *not* 397 // in the ISO-8859-1 encoding, the effect is to ensure 398 // we have an 8-bit byte string that (numerically) 399 // captures the right value. These numerical values are 400 // then used to determine how to URL encode it 401 402 String filename_ascii = filename_uri.toASCIIString(); 403 //filename_ascii = filename_ascii.replace("&", "%26"); // &'s ASCII code is 36 in decimal, and 26 in hex 404 //filename_ascii = filename_ascii.replace("+", "%2B"); // +'s ASCII code is 43 decimal, 2b in hex, 2B when uppercased 405 String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1"); 406 filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes); 407 408 // DEALING WITH & and + in filenames: NOT WORKING YET 409 //if(filename_url_encoded.contains("&")) { 410 // filename_url_encoded = filename_url_encoded.replace("&", "%36amp;"); 411 //} else if(filename_url_encoded.contains("&")) { 412 // filename_url_encoded = filename_url_encoded.replace("&", "%36"); 413 //} 414 415 } 416 catch (Exception e) { 417 e.printStackTrace(); 418 // Give up trying to convert 419 filename_url_encoded = filename; 420 } 421 return filename_url_encoded; 422 } 423 424 425 // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter 426 public static String _filenameToURLEncoding(String filename) { 427 if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param 428 return filename; 429 } 430 431 File file = new File (filename); 432 return fileToURLEncoding(file); 433 } 434 435 376 436 // Dr Bainbridge's methods 377 437 /* On Linux machines that are set to using an ISO-8859 (Latin) type encoding,
Note:
See TracChangeset
for help on using the changeset viewer.