source: main/trunk/gli/src/org/greenstone/gatherer/metadata/FilenameEncoding.java@ 33737

Last change on this file since 33737 was 33737, checked in by ak19, 4 years ago

A larger fix but not complete fix to the problem of attaching and retaining file level assigned meta to filenames containing non-ASCII characters. 1. Committing intermediate version of bugfix containing the idea suggested by Kathy to reuse the steps in fileToURLEncoding(File) for a String parameter as she felt that since the String represents a filename, a URI object should be instantiable on a String. Worked with some massaging. Can't yet get the new fileToURLEncoding(String) to work by calling fileToURLEncoding(File). So am committing the version of fileToURLEncoding(String) that is largely a copy of fileToURLEncoding(File), until I can get the simpler variant working. 2. The new method is called after each successful parseXML call from MetadataXMLFile, so that the decoded entities resulting from parseXML() are reencoded in the DOM. This allows us to retain the correct filenames originally mentioned in metadata.xml files, do proper comparisons against them to attach/modify further metdata and so that the correct values get written out again into metadata.xml files. 3. Still want to get simpler version of fileToURLEncoding(String) to work that reuses fileToURLEncoding(File). 4. Want to get ampersand and plus signs in filenames to work (+ signs in filenames are lost when filenames are converted to URL). 5. Still need to investigate the missing ex. metadata for filenames containing non-ASCII.

File size: 24.6 KB
Line 
1/**
2 *############################################################################
3 * A component of the Greenstone Librarian Interface, part of the Greenstone
4 * digital library suite from the New Zealand Digital Library Project at the
5 * University of Waikato, New Zealand.
6 *
7 * Author: Michael Dewsnip, NZDL Project, University of Waikato, NZ
8 *
9 * Copyright (C) 2010 Greenstone Digital Library Project
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *############################################################################
25 */
26
27package org.greenstone.gatherer.metadata;
28
29import java.io.File;
30import java.net.*;
31import java.nio.charset.*;
32import java.util.*;
33import org.greenstone.gatherer.collection.CollectionManager;
34import org.greenstone.gatherer.DebugStream;
35
36import java.util.regex.Matcher;
37import java.util.regex.Pattern;
38
39
40
41/** Static access class that contains many of the methods used to work with filename encodings.
42* Works closely with classes FileNode, CollectionTreeNode, MetadataXMLFile, MetadataXMLFileManager
43* to maintain a map of URLEncodedFilenames to their filename encodings.
44* The process of filename encoding further affects the CollectionManager which refreshes its CollectionTree,
45* FileManager (move, delete, rename actions), MetadataValueTableModel, EnrichPane. */
46
47public class FilenameEncoding {
48 /** Display of filenames in the trees are in URL encoding, if debugging */
49 public static boolean DEBUGGING = false;
50
51 /** Set to false by Gatherer if the locale is UTF-8, as Java's handling is
52 * such that non-UTF8 filename encodings on a UTF-8 locale are destructively
53 * converted so that the bytecodes in the filename are not preserved. */
54 public static boolean MULTIPLE_FILENAME_ENCODINGS_SUPPORTED = false;
55
56 /** Also set by Gatherer.
57 * If the OS supports multiple filename encodings, we will be working with URL strings
58 * and the applicable separators are always the forward slash ("/") not File.separator.
59 * If multiple filename encodings are not supported, we're dealing with File.separator. */
60 public static String URL_FILE_SEPARATOR = File.separator;
61
62
63 /** gs.filenameEncoding is a special sort of metadata that is not merely to be stored along
64 * with a file, but is to be applied in real-time on the file's name in the CollectionTree
65 * display. Since FileNodes are constantly destroyed and reconstructed by that Tree when
66 * its nodes are expanded and contracted, storing the filename encodings of each file along
67 * with the file in a FileNode doesn't help because it doesn't last. Instead of rediscovering
68 * the encoding at every stage by querying the metadataXML file, we store the encodings for
69 * fast access: in a map of (URLEncodedFilePath, filename-encoding) pairs.
70 * The current design of the map is to only store any active filename metadata assigned
71 * directly at that file/folder's level, and if there is none discovered at that level, then
72 * storing the empty string for it. Therefore, if the hashmap contains no entry for
73 * a file, it means this still needs to be retrieved. */
74 public static Map map = new HashMap();
75
76//*********************** BUSY REFRESHING / REQUIRING REFRESH *********************
77
78 /** Set to true if filename encoding metadata was changed. Called by the enter keyPress
79 * event in gui.EnrichPane and when the gs.FilenameEncoding field loses focus. */
80 private static boolean refreshRequired = false;
81
82 synchronized public static boolean isRefreshRequired() {
83 return refreshRequired;
84 }
85
86 synchronized public static void setRefreshRequired(boolean state) {
87 if(MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
88 refreshRequired = state;
89 } else {
90 refreshRequired = false;
91 }
92 }
93
94//************************** MAP RETRIEVAL METHODS ******************************
95
96 /** Returns the cumulative gs.filenameEncoding metadata
97 * assigned to a file inside the collection. */
98 public static String findFilenameEncoding(
99 File file, String urlEncodedFilePath, boolean bruteForceLookup)
100 {
101 //if(bruteForceLookup) {
102 // return findFilenameEncodingBruteForce(file, urlEncodedFilePath, bruteForceLookup);
103 //}
104
105 String encoding = "";
106
107 // Check any assigned encoding at this level, starting with the map first
108 // and else retrieving the filename encoding from the metadata file
109 if(!map.containsKey(urlEncodedFilePath)) {
110
111 // Check for filename encoding metadata *directly* associated with the file
112 // Now don't need to get any inherited encoding metadata here, because of
113 // the way we're storing and retrieving encoding information from the map.
114 ArrayList list = MetadataXMLFileManager.getMetadataAssignedDirectlyToFile(file, true); // true: gets gs.filenameEncoding only
115 if(!list.isEmpty()) {
116 MetadataValue metavalue = (MetadataValue)list.get(0); // get(list.size()-1);
117 encoding = metavalue.getValue();
118 } // else no filename encoding set yet at this level
119
120 // Now we've done a lookup at this level cache the result in the map,
121 // including empty strings, to indicate that we've done a full lookup
122 map.put(urlEncodedFilePath, encoding);
123 }
124 else { // an entry exists in the map, get it from there
125 encoding = (String)map.get(urlEncodedFilePath);
126 }
127
128 // if no meta was specified at at the file level, look for any inherited metadata
129 if(encoding.equals("")) {
130 encoding = getInheritedFilenameEncoding(urlEncodedFilePath, file);
131 }
132
133 //System.err.println("\n@@@@Looked for: " + urlEncodedFilePath + " | found: " + encoding);
134 return encoding; // found something in map, may still be "", but it's what was stored
135 }
136
137 /** Checks the file-to-encoding map for all the superfolders of the given
138 * filename in sequence for an applicable encoding. Note that the file/folder
139 * at the level of urlFoldername (and dir) has already been inspected. */
140 static public String getInheritedFilenameEncoding(String urlFoldername, File dir)
141 {
142 String encoding = "";
143 boolean done = false;
144
145 // don't want to search past import folder which is as
146 // far as we need to go to determine inherited encodings
147 File importDir = new File(CollectionManager.getLoadedCollectionImportDirectoryPath());
148 if(dir.equals(importDir)) { // if the top-level dir was already checked, we're done
149 done = true;
150 }
151
152 // For directories, first remove trailing file separator in order to start checking from higher level folders
153 int lastIndex = urlFoldername.length()-1;
154 char urlFileSeparatorChar = URL_FILE_SEPARATOR.charAt(0);
155 if(urlFoldername.charAt(lastIndex) == urlFileSeparatorChar) {
156 urlFoldername = urlFoldername.substring(0, lastIndex);
157 }
158
159 while(!done) {
160 // get the folder that's one level up
161 dir = dir.getParentFile();
162
163 int index = urlFoldername.lastIndexOf(URL_FILE_SEPARATOR);
164 if(index == -1) { // no more slashes
165 done = true;
166 } else {
167 urlFoldername = urlFoldername.substring(0, index);
168 }
169
170 // now look in the map to see whether there's an encoding for this folder
171 String folder = urlFoldername + URL_FILE_SEPARATOR;
172 if(map.containsKey(folder)) {
173 encoding = (String)map.get(folder); // may be ""
174 } else { // no entry in map, so look in the metadata.xml at this folder level
175 ArrayList list = MetadataXMLFileManager.getMetadataAssignedDirectlyToFile(
176 dir, true); // true: gets gs.filenameEncoding only
177 if(!list.isEmpty()) {
178 MetadataValue metavalue = (MetadataValue)list.get(0); // get(list.size()-1);
179 encoding = metavalue.getValue();
180 }
181 map.put(folder, encoding); // may be ""
182 }
183
184 if(!encoding.equals("")){
185 done = true;
186 } // else if "", loop to check next folder up
187 else if(dir.equals(importDir)) { // don't iterate past the import folder, which we've now checked
188 done = true;
189 }
190 }
191
192 return encoding;
193 }
194
195 /** Called by GUIManager when a collection is closed. This then empties the
196 * file-to-encoding map which is applicable only on a per-collection basis */
197 static public void closeCollection() {
198 //printFilenameMap("Closing collection. Clearing file-to-encoding map of entries:");
199 map.clear();
200 }
201
202 // Useful for debugging: prints contents of file-to-encoding map
203 static public void printFilenameMap(String heading) {
204 System.err.println("\n********************************************");
205 System.err.println(heading.toUpperCase());
206 Iterator entries = map.entrySet().iterator();
207 while(entries.hasNext()) {
208 Map.Entry entry = (Map.Entry)entries.next();
209 System.err.println("+ " + (String)entry.getKey() + ": " + (String)entry.getValue());
210 }
211 System.err.println("********************************************\n");
212 }
213
214 // UNUSED at present. Brute force version of the findFilenameEncoding() method
215 // Doesn't use the map, but gets *all* the metadata assigned to a file/folder to
216 // work out the encoding applicable to a file/folder.
217 public static String findFilenameEncodingBruteForce(File file, String urlEncodedFilename,
218 boolean bruteForceLookup)
219 {
220 System.err.println("\n***** BRUTE FORCE getFilenameEncoding() called\n");
221
222
223 String encoding = "";
224
225 // Check for filename encoding metadata *directly* associated with the file
226 // Now don't need to get any inherited encoding metadata here, because of
227 // the way we're storing and retrieving encoding information from the map.
228
229 ArrayList list = MetadataXMLFileManager.getMetadataAssignedToFile(file, true); // true: gets gs.filenameEncoding only
230 if(!list.isEmpty()) {
231 // try to get the filename encoding meta that was assigned last to this
232 // file, even though it makes no sense to have multiple values for it
233 MetadataValue metavalue = (MetadataValue)list.get(list.size()-1);
234 encoding = metavalue.getValue();
235
236 if(encoding == null) { // unlikely ???
237 System.err.println("**** ERROR: encoding for "
238 + urlEncodedFilename + " is NULL!");
239 encoding = "";
240 }
241 } // else no filename encoding set yet, perhaps
242 //System.err.println("**** Found encoding for " + urlEncodedFilename + " " + encoding);
243 return encoding;
244 }
245
246//****************************** APPLYING ENCODINGS TO FILENAMES *****************************
247
248 /** URL encoded version of the byte codes of the given file's name */
249 public static String calcURLEncodedFilePath(File file) {
250 if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
251 return file.getAbsolutePath();
252 }
253 else {
254 String filename = fileToURLEncoding(file);
255 return filename;
256 }
257 }
258
259 /** URL encoded version of the byte codes of this file's name */
260 public static String calcURLEncodedFileName(String urlfilepath) {
261 String filename = urlfilepath;
262 if(filename.endsWith(URL_FILE_SEPARATOR)) { // directory, remove trailing slash
263 filename = filename.substring(0, filename.length() - 1);
264 }
265
266 // remove the directory prefix (if any) to get the filename
267 int index = filename.lastIndexOf(URL_FILE_SEPARATOR);
268 if(index != -1) {
269 filename = filename.substring(index+1); // skip separator
270 }
271
272 return filename;
273 }
274
275 /** Given a string representing an alias to an official encoding (and unofficial ones
276 * starting with "Latin-"), attempts to work out what the canonical encoding for that is.
277 * If the given encoding is unrecognised, it is returned as is. */
278 public static String canonicalEncodingName(String encoding) {
279 String canonicalEncoding = encoding;
280 try {
281 // Latin-1 -> ISO-8859-1
282 String alias = canonicalEncoding.toLowerCase();
283 if(alias.startsWith("latin")){
284 canonicalEncoding = "ISO-8859" + alias.substring("latin".length());
285 }
286
287 // canonical encoding for official aliases
288 canonicalEncoding = Charset.forName(canonicalEncoding).name();
289 return canonicalEncoding;
290 } catch (Exception e) {
291 System.err.println("(Could not recognise encoding (alias): "
292 + encoding + ".)");
293 return encoding; // no alias could be found, return the original parameter
294 }
295 }
296
297//************************* GETTING THE URL ENCODING OF FILENAMES *********************************
298
299 /**
300 * Given a String containing hexentities, will convert back into the unicode version of the String.
301 * e.g. A string like "02 Tēnā Koutou\.mp3" will be returned as "02 Tena Koutou\.mp3" with macrons on e and a
302 * I've tested this in a separate file that imports java.util.regex.Matcher and java.util.regex.Pattern
303 * and contains a copy of Utility.debugUnicodeString(String) with the following main function:
304 public static void main(String args[]) {
305 String str = "02 Tēnā Koutou\\.mp3"; // or more basic case: String str = "mmmmānnnnēpppp\\.txt";
306 System.err.println("About to decode hex string: " + str);
307 String result = decodeStringContainingHexEntities(str);
308 System.err.println("Decoded hex string: " + result + " - debug unicode form: " + debugUnicodeString(result));
309 }
310 */
311 public static String decodeStringContainingHexEntities(String str) {
312 String result = "";
313 Pattern hexPattern = Pattern.compile("(&#x[0-9a-zA-Z]{1,4}+;)");
314 Matcher matcher = hexPattern.matcher(str);
315
316 int searchFromIndex = 0;
317 int endMatchIndex = -1;
318
319 while(matcher.find(searchFromIndex)) {
320 String hexPart = matcher.group();
321 //System.err.println("Found hexpart match: " + hexPart);
322
323 int startMatchIndex = matcher.start();
324 endMatchIndex = matcher.end();
325 result += str.substring(searchFromIndex, startMatchIndex);
326
327 String hexNumberStr = hexPart.substring(3, hexPart.length()-1); // lose the "&#x" prefix and the ";" suffix to get just the hex number portion of the match
328 // https://stackoverflow.com/questions/16625865/java-unicode-to-hex-string
329 // https://stackoverflow.com/questions/11194513/convert-hex-string-to-int
330
331 //System.err.println("hexNumberStr so far: " + hexNumberStr);
332 int tmpDigit = Integer.parseInt(hexNumberStr);
333 //System.err.println("As digit: " + tmpDigit);
334 hexNumberStr = String.format("%04d", tmpDigit);
335 //System.err.println("2 hexNumberStr so far: " + hexNumberStr);
336 hexNumberStr = "0x" + hexNumberStr; // e.g "0xDDDD"
337 //int hexNumber = Integer.parseInt(hexNumberStr);
338 int hexNumber = Integer.decode(hexNumberStr);
339 String hexNumberAsChar = Character.toString((char) hexNumber);
340 result += hexNumberAsChar;
341
342 searchFromIndex = endMatchIndex;
343
344 }
345
346 if(endMatchIndex != -1) { // attach any suffix once we finished processing all the hex codes
347 result += str.substring(endMatchIndex);
348 //System.err.println("suffix: " + str.substring(endMatchIndex));
349 }
350 else { // there were no hex codes to decode, return string as is
351 result = str;
352 }
353
354 return result;
355 }
356
357 /** Attempting to produce the equivalent method fileToURLEncoding() above, but taking a String as input parameter */
358 public static String fileNameToHex(String filename) {
359
360 String hexFilename = "";
361 for(int i = 0; i < filename.length(); i++) {
362 int charCode = filename.codePointAt(i); // unicode codepoint / ASCII code
363
364 // ASCII table: https://cdn.sparkfun.com/assets/home_page_posts/2/1/2/1/ascii_table_black.png
365 // If the unicode character code pt is less than the ASCII code for space and greater than for tilda, let's display the char in hex (x0000 format)
366 if((charCode >= 20 && charCode <= 126) || charCode == 9 || charCode == 10 || charCode == 13 || charCode == 36 || charCode == 43) { // space, tilda, TAB, LF, CR are printable, leave them in for XML element printing. And spaces and plus signs (ASCII codes 36 and 43) need to be converted to hex too
367 hexFilename += filename.charAt(i);
368 } else {
369 hexFilename += "&#x" + String.format("%x", charCode).toUpperCase() + ";"; // looks like: "&#x[up-to-4-hexdigits-in-UPPERCASE];"
370 }
371 }
372
373 return hexFilename;
374 }
375
376
377 // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter
378 public static String filenameToURLEncoding(String filename) {
379 if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
380 return filename;
381 }
382
383 // Can't create a URI out of a filename containing spaces. Spaces must be encoded as %20
384 String filename_url_encoded = filename.replace(" ", "%20");
385 //filename_url_encoded = filename_url_encoded.replace("&", "%26"); // &'s ASCII code is 36 in decimal, and 26 in hex
386 //filename_url_encoded = filename_url_encoded.replace("+", "%2B"); // +'s ASCII code is 43 decimal, 2b in hex, 2B when uppercased
387
388 try {
389 URI filename_uri = new URI(filename_url_encoded);
390 // The trick:
391 // 1. toASCIIString() will %xx encode values > 127
392 // 2. Decode the result to "ISO-8859-1"
393 // 3. URL encode the bytes to string
394
395 // Step 2 forces the string to be 8-bit values. It
396 // doesn't matter if the starting raw filename was *not*
397 // in the ISO-8859-1 encoding, the effect is to ensure
398 // we have an 8-bit byte string that (numerically)
399 // captures the right value. These numerical values are
400 // then used to determine how to URL encode it
401
402 String filename_ascii = filename_uri.toASCIIString();
403 //filename_ascii = filename_ascii.replace("&", "%26"); // &'s ASCII code is 36 in decimal, and 26 in hex
404 //filename_ascii = filename_ascii.replace("+", "%2B"); // +'s ASCII code is 43 decimal, 2b in hex, 2B when uppercased
405 String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1");
406 filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes);
407
408 // DEALING WITH & and + in filenames: NOT WORKING YET
409 //if(filename_url_encoded.contains("&amp;")) {
410 // filename_url_encoded = filename_url_encoded.replace("&amp;", "%36amp;");
411 //} else if(filename_url_encoded.contains("&")) {
412 // filename_url_encoded = filename_url_encoded.replace("&", "%36");
413 //}
414
415 }
416 catch (Exception e) {
417 e.printStackTrace();
418 // Give up trying to convert
419 filename_url_encoded = filename;
420 }
421 return filename_url_encoded;
422 }
423
424
425 // follows Dr Bainbridge's method below, but with a String parameter instead of a file parameter
426 public static String _filenameToURLEncoding(String filename) {
427 if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param
428 return filename;
429 }
430
431 File file = new File (filename);
432 return fileToURLEncoding(file);
433 }
434
435
436 // Dr Bainbridge's methods
437 /* On Linux machines that are set to using an ISO-8859 (Latin) type encoding,
438 * we can work with URL-encoded filenames in Java. Java works with whatever
439 * encoding the filesystem uses. Unlike systems working with UTF-8, where Java
440 * interprets filenames as UTF-8 (a destructive process since characters invalid
441 * for UTF-8 are replaced with the invalid character, which means the original
442 * character's byte codes can not be regained), working with an ISO-8859-1
443 * system means the original byte codes of the characters are preserved,
444 * regardless of whether the characters represent ISO-8859-1 or not. Such byte
445 * codes are converted by the following method to the correct URL versions of
446 * the strings that the filenames represent (that is, the correct URL representations
447 * of the filenames in their original encodings). This is useful for interactions with
448 * Perl as Java and Perl can use URL-encoded filenames to talk about the same files
449 * on the file system, instead of having to work out what encoding they are in. */
450
451 public static String fileToURLEncoding(File file) {
452 if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
453 return file.getAbsolutePath();
454 }
455
456 String filename_url_encoded = "";
457
458 // The following test for whether the file exists or not is a problem
459 // when a File object--whose actual file is in the process of being moved
460 // and therefore temporarily does not 'exist' on the actual system--can't
461 // be URL encoded: the following would return "" when a file doesn't exist.
462 // So commenting out the test.
463 /*
464 if(!file.getName().equals("recycle")) {
465 if(!file.isFile() && !file.isDirectory()) {
466 System.err.println("*** ERROR. Java can't see file: " + file.getAbsolutePath());
467 return "";
468 }
469
470 if(!file.exists()) {
471 System.err.println("*** NOTE: File doesn't exist: " + file.getAbsolutePath());
472 return ""; //file.getName();
473 }
474 }
475 */
476
477 URI filename_uri = file.toURI();
478 try {
479 // The trick:
480 // 1. toASCIIString() will %xx encode values > 127
481 // 2. Decode the result to "ISO-8859-1"
482 // 3. URL encode the bytes to string
483
484 // Step 2 forces the string to be 8-bit values. It
485 // doesn't matter if the starting raw filename was *not*
486 // in the ISO-8859-1 encoding, the effect is to ensure
487 // we have an 8-bit byte string that (numerically)
488 // captures the right value. These numerical values are
489 // then used to determine how to URL encode it
490
491 String filename_ascii = filename_uri.toASCIIString();
492 String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1");
493 filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes);
494
495 }
496 catch (Exception e) {
497 e.printStackTrace();
498 // Give up trying to convert
499 filename_url_encoded = file.getAbsolutePath();
500 }
501 return filename_url_encoded;
502 }
503
504 // For unicode codepoints see:
505 // http://unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT for ISO8859-1 (Latin-1)
506 // where 0xE2 maps to codepoint 0x00E2 and is defined as "Latin small letter a with circumflex"
507 // http://unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT for ISO8859-7 (Greek)
508 // where 0xE2 maps to codepoint 0x03B2 and is defined as "Greek small letter beta"
509 public static String iso_8859_1_filename_to_url_encoded(String raw_bytes_filename)
510 throws Exception
511 {
512 String urlEncoded = "";
513
514 try {
515 // By this point we have a UTF-8 encoded string that captures
516 // what the ISO-8859-1 (Latin-1) character is that corresponded to the
517 // 8-bit numeric value for that character in the filename
518 // on the file system
519
520 // For example:
521 // File system char: <lower-case beta char in Latin-7> = %E2
522 // Equivalent Latin 1 char: <lower-case a with circumflex> = %E2
523 // Mapped to UTF-8: <lower-case a with circumflex> = <C3><A2>
524
525 // Our task is to take the string the contains <C3><A2> and ensure that
526 // we "see" it as <E2>
527
528 byte [] raw_bytes = raw_bytes_filename.getBytes("ISO-8859-1");
529 String unicode_filename = new String(raw_bytes,"UTF-8");
530
531 for(int i = 0; i < unicode_filename.length(); i++) {
532 char charVal = unicode_filename.charAt(i);
533 if ((int)charVal > 255) {
534 urlEncoded += String.format("&#x%02X;", (int)charVal);
535 }
536 else if((int)charVal > 127) {
537 urlEncoded += String.format("%%%02X", (int)charVal);
538 } else {
539 urlEncoded += String.format("%c", (char)charVal);
540 }
541 }
542 }
543 catch (Exception e) {
544 //e.printStackTrace();
545 throw(e);
546 }
547
548 return urlEncoded;
549 }
550
551 // unused for now
552 public static String raw_filename_to_url_encoded(String fileName)
553 throws Exception
554 {
555 String urlEncoded = "";
556 try {
557 byte[] bytes = fileName.getBytes();
558
559 for(int i = 0; i < bytes.length; i++) {
560 // mask each byte (by applying & 0xFF) to make the signed
561 // byte (in the range -128 to 127) unsigned (in the range
562 // 0 to 255).
563
564 int byteVal = (int)(bytes[i] & 0xFF);
565
566 if(byteVal > 127) {
567 urlEncoded += String.format("%%%02X", (int)byteVal);
568 } else {
569 urlEncoded += String.format("%c",(char)byteVal);
570 }
571 }
572 }
573 catch (Exception e) {
574 //e.printStackTrace();
575 throw(e);
576 }
577
578 return urlEncoded;
579 }
580
581}
Note: See TracBrowser for help on using the repository browser.