Changeset 5246
- Timestamp:
- 2003-08-22T09:43:11+12:00 (21 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gli/src/org/greenstone/gatherer/util/Utility.java
r5241 r5246 99 99 static final public String DICTIONARY = "dictionary"; 100 100 static final public String DLS_MDS = "dls.mds"; 101 static final public String ENCODING = "UTF-8"; 101 102 static final public String ENGLISH_VALUE = "en"; 102 103 /** Definition of an important directory name, in this case the etc (or extra information) directory for the collection. */ … … 197 198 return new TreePath(temp); 198 199 } 200 201 /** Decodes a string of text so its safe to use in a Greenstone configuration file. Esentially replaces "\n" with a newline. 202 * @param raw The <strong>String</strong> before decoding, read from the configuration file.. 203 * @return A <strong>String</strong> ready to be placed in a component. 204 */ 205 static public String decodeGreenstone(String raw) { 206 raw = raw.replaceAll("'", "\'"); 207 raw = raw.replaceAll(">", ">"); 208 raw = raw.replaceAll("<", "<"); 209 raw = raw.replaceAll(""", "\""); 210 raw = raw.replaceAll("'", "\'"); 211 raw = raw.replaceAll("\\\\n", "\n"); 212 return raw; 213 } 214 199 215 /** Takes a rfc2616 'safe' String and translates it back into its 'unsafe' form. Basically the native c wget decode_string() function, but without pointer stuff. If searches through the String looking for the pattern %xy where x and y are hexidecimal digits and where xy maps to a character.<BR> If x or y are not hexidecimal or % is followed by a \0 then the pattern is left as is. 200 216 * @param encoded The url-safe <strong>String</strong> to be decoded. … … 247 263 return result; 248 264 } 249 /** Encodes a string of text so its safe to use in a Greenstone configuration file. Esentially replaces newlines with their escaped form. 250 * @param raw The <strong>String</strong> before encoding. 251 * @return A <strong>String</strong> which is safe to write to the configuration file. 252 */ 253 static final private char AMPERSTAMP_CHAR = '&'; 254 static final private char ESCAPE_CHAR = '\\'; 255 static final private char GREATER_THAN_CHAR = '>'; 256 static final private char LESS_THAN_CHAR = '<'; 257 static final private char NEWLINE_CHAR = '\n'; 258 static final private char QUOTE_CHAR = '\''; 259 static final private char SPEECH_CHAR = '\"'; 260 static final private String ENCODED_AMPERSTAMP_STR = "&"; 261 static final private String ENCODED_GREATER_THAN_STR = ">"; 262 static final private String ENCODED_LESS_THAN_STR = "<"; 263 static final private String ENCODED_SPEECH_STR = """; 264 static final private String ESCAPED_NEWLINE_STR = "\\n"; 265 266 /** Decodes a string of text so its safe to use in a Greenstone configuration file. Esentially replaces "\n" with a newline. 267 * @param raw The <strong>String</strong> before decoding, read from the configuration file.. 268 * @return A <strong>String</strong> ready to be placed in a component. 269 */ 270 static public String decodeGreenstone(String raw) { 271 raw = raw.replaceAll("'", "\'"); 272 raw = raw.replaceAll(">", ">"); 273 raw = raw.replaceAll("<", "<"); 274 raw = raw.replaceAll(""", "\""); 275 raw = raw.replaceAll("'", "\'"); 276 raw = raw.replaceAll("\\\\n", "\n"); 265 266 static public String encodeGreenstone(String raw) { 267 raw = raw.replaceAll("<", "<"); 268 raw = raw.replaceAll(">", ">"); 269 raw = raw.replaceAll("\n", "\\\\n"); 277 270 return raw; 278 }279 280 static public String encodeGreenstone(String raw) {281 // Once again regex fails to provide the power necessary for me to change strings. What I need to do is replace "<" and ">" with "<" and ">", and replace "\<" and "\>" with "<" and ">".282 StringBuffer processed = new StringBuffer();283 int index = 0;284 while(index < raw.length()) {285 char c = raw.charAt(index);286 switch(c) {287 // Replace a normal new line character with "\n"288 case NEWLINE_CHAR:289 processed.append(ESCAPED_NEWLINE_STR);290 break;291 // Replace "\<" with "<", or with "\<" if this is for XML. Similar requirements for "\>".292 case ESCAPE_CHAR:293 if(index + 1 < raw.length()) {294 char d = raw.charAt(index + 1);295 if(d == LESS_THAN_CHAR) {296 processed.append(LESS_THAN_CHAR);297 index++;298 break;299 }300 else if(d == GREATER_THAN_CHAR) {301 processed.append(GREATER_THAN_CHAR);302 index++;303 break;304 }305 }306 // I have no idea how this would happen, but I better watch for it anyway307 processed.append(c);308 break;309 // Replace "<" with "<"310 case LESS_THAN_CHAR:311 processed.append(ENCODED_LESS_THAN_STR);312 break;313 // Replace ">" with ">"314 case GREATER_THAN_CHAR:315 processed.append(ENCODED_GREATER_THAN_STR);316 break;317 default:318 processed.append(c);319 }320 index++;321 }322 return processed.toString();323 }324 /** When retrieve text for, or from the collect.cfg file it may contain characters that can't go into a DOM such as "<" and ">". We also might already have encoded versions "<" and ">". Thus we must encode the former, and double encode the latter. */325 static public String encodeXML(String raw) {326 StringBuffer processed = new StringBuffer();327 int index = 0;328 while(index < raw.length()) {329 char c = raw.charAt(index);330 switch(c) {331 case GREATER_THAN_CHAR:332 processed.append(ENCODED_GREATER_THAN_STR);333 break;334 case LESS_THAN_CHAR:335 processed.append(ENCODED_LESS_THAN_STR);336 break;337 case AMPERSTAMP_CHAR:338 processed.append(ENCODED_AMPERSTAMP_STR);339 break;340 default:341 processed.append(c);342 }343 index++;344 }345 return processed.toString();346 271 } 347 272 … … 356 281 // Create an output format for our document. 357 282 OutputFormat f = new OutputFormat(document); 283 f.setEncoding(ENCODING); 358 284 f.setIndenting(true); 359 285 f.setLineWidth(0); 360 286 f.setPreserveSpace(false); 361 287 // Create the necessary writer stream for serialization. 362 OutputStreamWriter osw = new OutputStreamWriter(os );288 OutputStreamWriter osw = new OutputStreamWriter(os, ENCODING); 363 289 Writer w = new BufferedWriter(osw); 364 290 // Generate a new serializer from the above. … … 824 750 try { 825 751 URL url = ClassLoader.getSystemResource(filename); 826 file = new File(URLDecoder.decode(url.getFile(), "UTF-8"));752 file = new File(URLDecoder.decode(url.getFile(), ENCODING)); 827 753 url = null; 828 754 } … … 847 773 try { 848 774 FileInputStream fis = new FileInputStream(file); 849 InputStreamReader isr = new InputStreamReader(fis );775 InputStreamReader isr = new InputStreamReader(fis, ENCODING); 850 776 Reader r = new BufferedReader(isr); 851 777 InputSource isc = new InputSource(r);
Note:
See TracChangeset
for help on using the changeset viewer.