- Timestamp:
- 2020-09-15T20:26:19+12:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/gli/src/org/greenstone/gatherer/util/Utility.java
r33777 r34394 94 94 } 95 95 } 96 97 // Copied from GS3 main java code at GSDL3SRCHOME\src\java\org\greenstone/util\Misc.java 98 // Debugging function to print a string's non-basic chars in hex, so stringToHex on all non-basic and non-printable ASCII 99 // Dr Bainbridge said that printing anything with charCode over 128 in hex is okay, but I'd already made extra allowances for non-printable ASCII 100 // Based on https://stackoverflow.com/questions/923863/converting-a-string-to-hexadecimal-in-java 101 public static String debugUnicodeString(String str) { 102 String result = ""; 103 for(int i = 0; i < str.length(); i++) { 104 int charCode = str.codePointAt(i); // unicode codepoint / ASCII code 105 106 // ASCII table: https://cdn.sparkfun.com/assets/home_page_posts/2/1/2/1/ascii_table_black.png 107 // If the unicode character code pt is less than the ASCII code for space and greater than for tilda, let's display the char in hex (x0000 format) 108 if((charCode >= 20 && charCode <= 126) || charCode == 9 || charCode == 10 || charCode == 13) { // space, tilda, TAB, LF, CR are printable, leave them in for XML element printing 109 result += str.charAt(i); 110 } else { 111 result += "x{" + String.format("%04x", charCode) + "}"; // looks like: x{4-char-codepoint} 112 } 113 } 114 115 return result; 116 } 117 118 /** 96 97 // Copied from GS3 main java code at GSDL3SRCHOME\src\java\org\greenstone/util\Misc.java 98 // Debugging function to print a string's non-basic chars in hex, so stringToHex on all non-basic and non-printable ASCII 99 // Dr Bainbridge said that printing anything with charCode over 128 in hex is okay, but I'd already made extra allowances for non-printable ASCII 100 // Based on https://stackoverflow.com/questions/923863/converting-a-string-to-hexadecimal-in-java 101 public static String debugUnicodeString(String str) { 102 String result = ""; 103 for(int i = 0; i < str.length(); i++) { 104 int charCode = str.codePointAt(i); // unicode codepoint / ASCII code 105 106 // ASCII table: https://cdn.sparkfun.com/assets/home_page_posts/2/1/2/1/ascii_table_black.png 107 // If the unicode character code pt is less than the ASCII code for space and greater than for tilda, let's display the char in hex (x0000 format) 108 if((charCode >= 20 && charCode <= 126) || charCode == 9 || charCode == 10 || charCode == 13) { // space, tilda, TAB, LF, CR are printable, leave them in for XML element printing 109 result += str.charAt(i); 110 } else { 111 result += "x{" + String.format("%04x", charCode) + "}"; // looks like: x{4-char-codepoint} 112 } 113 } 114 115 return result; 116 } 117 118 // Version of debugUnicodeString that, on Windows, mimics perl unicode::debug_unicode_string 119 // exactly by producing hex/unicode codepoints for ALL codepoints beyond ASCII 120 public static String stringToHex(String str) { 121 String result = ""; 122 for(int i = 0; i < str.length(); i++) { 123 int charCode = str.codePointAt(i); // unicode codepoint / ASCII code 124 125 if(charCode <=127) { // ASCII 126 result += str.charAt(i); 127 } else { // non-ASCII 128 result += "\\x{" + String.format("%04x", charCode) + "}"; // looks like: \x{4-char-codepoint} 129 } 130 } 131 132 return result; 133 } 134 135 /** 119 136 * returns the short filename (8.3) for a file in Windows 120 137 *
Note:
See TracChangeset
for help on using the changeset viewer.