- Timestamp:
- 2003-05-27T15:40:47+12:00 (21 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gli/src/org/greenstone/gatherer/util/DecodeHTMLReader.java
r4293 r4364 5 5 6 6 public class DecodeHTMLReader 7 7 extends PushbackReader { 8 8 9 10 11 9 public DecodeHTMLReader(Reader source) { 10 super(source, 4); 11 } 12 12 13 14 15 16 13 /** Read a single character. */ 14 public int read() { 15 return decode(); 16 } 17 17 18 19 20 21 22 23 24 25 26 18 /** Read characters into a portion of an array. */ 19 public int read(char[] cbuf, int off, int len) { 20 int count = 0; 21 for(int i = off; i < len && ready(); i++) { 22 cbuf[i] = (char)decode(); 23 count++; 24 } 25 return count; 26 } 27 27 28 29 30 31 32 33 34 35 28 public boolean ready() { 29 try { 30 return super.ready(); 31 } 32 catch (Exception error) { 33 } 34 return false; 35 } 36 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 37 /** Retrieve the next character off the stream. Unfortunately I have to do this a character at a time (which is slow). I also have to keep in mind that if a suspect encoded character turns out not to be I have to replace the extra characters. */ 38 private int decode() { 39 int character; 40 try { 41 character = super.read(); 42 } 43 catch (Exception error) { 44 character = ' '; 45 } 46 try { 47 if(character == '&') { 48 int amp = super.read(); 49 switch(amp) { 50 case 'a': 51 case 'A': 52 int ampap = super.read(); 53 int ampapo = super.read(); 54 int ampapos = super.read(); 55 int ampapossemi = super.read(); 56 if((ampap == 'p' || ampap == 'P') && (ampapo == 'o' || ampapo == 'O') && (ampapos == 's' || ampapos == 'S') && ampapossemi == ';') { 57 // Read an ' so return an apostrophy 58 return '\''; 59 } 60 // Not a apos. Return the characters removed in the correct order. 61 super.unread(ampapossemi); 62 super.unread(ampapos); 63 super.unread(ampapo); 64 super.unread(ampap); 65 break; 66 case 'g': 67 case 'G': 68 int ampgt = super.read(); 69 int ampgtsemi = super.read(); 70 if((ampgt == 't' || ampgt == 'T') && ampgtsemi == ';') { 71 return '>'; 72 } 73 super.unread(ampgtsemi); 74 super.unread(ampgt); 75 break; 76 case 'l': 77 case 'L': 78 int amplt = super.read(); 79 int ampltsemi = super.read(); 80 if((amplt == 't' || amplt == 'T') && ampltsemi == ';') { 81 return '<'; 82 } 83 super.unread(ampltsemi); 84 super.unread(amplt); 85 break; 86 case 'q': 87 case 'Q': 88 int ampqu = super.read(); 89 int ampquo = super.read(); 90 int ampquot = super.read(); 91 int ampquotsemi = super.read(); 92 if((ampqu == 'u' || ampqu == 'U') && (ampquo == 'o' || ampquo == 'O') && (ampquot == 't' || ampquot == 'T') && ampquotsemi == ';') { 93 return '\"'; 94 } 95 super.unread(ampquotsemi); 96 super.unread(ampquot); 97 super.unread(ampquo); 98 super.unread(ampqu); 99 break; 100 case '#': 101 int amphash = super.read(); 102 int amphash3 = super.read(); 103 int amphash39 = super.read(); 104 int amphash39semi = super.read(); 105 if(amphash == '#' && amphash3 == '3' && amphash39 == '9' && amphash39semi == ';') { 106 return '\''; 107 } 108 super.unread(amphash39semi); 109 super.unread(amphash39); 110 super.unread(amphash3); 111 super.unread(amphash); 112 break; 113 } 114 // Not a suspect. Return the character removed. 115 super.unread(amp); 116 } 117 } 118 catch (Exception error) { 119 Gatherer.printStackTrace(error); 120 } 121 // Nothing special. Simply return the character extracted. 122 return character; 123 } 124 124 }
Note:
See TracChangeset
for help on using the changeset viewer.