[12188] | 1 | package org.greenstone.gsdl3.gs3build.util;
|
---|
| 2 |
|
---|
| 3 | import java.io.*;
|
---|
| 4 | import java.util.*;
|
---|
| 5 |
|
---|
| 6 | import org.greenstone.gsdl3.gs3build.doctypes.DocumentLoader;
|
---|
| 7 |
|
---|
| 8 | public class HTMLEntity
|
---|
| 9 | { String name;
|
---|
| 10 | int code;
|
---|
| 11 | static Vector entities = null;
|
---|
| 12 | static Vector nmapping = null;
|
---|
| 13 |
|
---|
| 14 | public HTMLEntity(String name, int code)
|
---|
| 15 | { int start, end, at;
|
---|
| 16 | HTMLEntity compareEntity;
|
---|
| 17 |
|
---|
| 18 | if (HTMLEntity.entities == null)
|
---|
| 19 | { HTMLEntity.entities = new Vector();
|
---|
| 20 | }
|
---|
| 21 | if (HTMLEntity.nmapping == null)
|
---|
| 22 | { HTMLEntity.nmapping = new Vector();
|
---|
| 23 | }
|
---|
| 24 |
|
---|
| 25 | this.name = name;
|
---|
| 26 | this.code = code;
|
---|
| 27 |
|
---|
| 28 | entities.addElement(this);
|
---|
| 29 | start = 0;
|
---|
| 30 | end = nmapping.size();
|
---|
| 31 | while (start != end)
|
---|
| 32 | { at = (start + end) / 2;
|
---|
| 33 |
|
---|
| 34 | compareEntity =
|
---|
| 35 | (HTMLEntity) entities.elementAt(((Integer) nmapping.elementAt(at)).intValue());
|
---|
| 36 | if (compareEntity.code > code)
|
---|
| 37 | { end = at;
|
---|
| 38 | }
|
---|
| 39 | else
|
---|
| 40 | { start = at + 1;
|
---|
| 41 | }
|
---|
| 42 | }
|
---|
| 43 | if (end == nmapping.size())
|
---|
| 44 | { nmapping.addElement(new Integer(nmapping.size()));
|
---|
| 45 | }
|
---|
| 46 | else
|
---|
| 47 | { nmapping.insertElementAt(new Integer(nmapping.size()), start);
|
---|
| 48 | }
|
---|
| 49 | }
|
---|
| 50 |
|
---|
| 51 | public String toString()
|
---|
| 52 | { Character c;
|
---|
| 53 |
|
---|
| 54 | c = new Character((char) this.code);
|
---|
| 55 | return c.toString();
|
---|
| 56 | }
|
---|
| 57 |
|
---|
| 58 | public static String codeString(char c)
|
---|
| 59 | { StringBuffer reply;
|
---|
| 60 | String codestring;
|
---|
| 61 |
|
---|
| 62 | if (c < 128 || c == '&' || c == '<' || c == '>')
|
---|
| 63 | { return null;
|
---|
| 64 | }
|
---|
| 65 | codestring = Integer.toString((int) c);
|
---|
| 66 | reply = new StringBuffer("&#");
|
---|
| 67 | reply.append(codestring);
|
---|
| 68 | reply.append(";");
|
---|
| 69 |
|
---|
| 70 | return reply.toString();
|
---|
| 71 | }
|
---|
| 72 |
|
---|
| 73 | public static String nameCodeString(char c)
|
---|
| 74 | { int start, end, at, compare;
|
---|
| 75 | HTMLEntity compareEntity;
|
---|
| 76 |
|
---|
| 77 | start = 0;
|
---|
| 78 | end = nmapping.size();
|
---|
| 79 | while (start != end)
|
---|
| 80 | { at = (start + end) / 2;
|
---|
| 81 |
|
---|
| 82 | compareEntity =
|
---|
| 83 | (HTMLEntity) entities.elementAt(((Integer) nmapping.elementAt(at)).intValue());
|
---|
| 84 | if (compareEntity.code == (int) c)
|
---|
| 85 | { StringBuffer reply;
|
---|
| 86 |
|
---|
| 87 | reply = new StringBuffer("&");
|
---|
| 88 | reply.append(compareEntity.name);
|
---|
| 89 | reply.append(";");
|
---|
| 90 | return reply.toString();
|
---|
| 91 | }
|
---|
| 92 | else if (compareEntity.code < c)
|
---|
| 93 | { end = at;
|
---|
| 94 | }
|
---|
| 95 | else
|
---|
| 96 | { start = at + 1;
|
---|
| 97 | }
|
---|
| 98 | }
|
---|
| 99 | return codeString(c);
|
---|
| 100 | }
|
---|
| 101 |
|
---|
| 102 | public static void getEntities(String source)
|
---|
| 103 | { HTMLEntity newentity;
|
---|
| 104 | int pos;
|
---|
| 105 | int start;
|
---|
| 106 | String name;
|
---|
| 107 | int code;
|
---|
| 108 |
|
---|
| 109 | if (source == null)
|
---|
| 110 | { return;
|
---|
| 111 | }
|
---|
| 112 |
|
---|
| 113 | start = 0;
|
---|
| 114 | while (start < source.length())
|
---|
| 115 | { pos = start;
|
---|
| 116 | while (source.charAt(pos) != ':')
|
---|
| 117 | { pos ++;
|
---|
| 118 | }
|
---|
| 119 | name = source.substring(start, pos);
|
---|
| 120 |
|
---|
| 121 | // skip colon and reset
|
---|
| 122 | pos ++;
|
---|
| 123 | start = pos;
|
---|
| 124 |
|
---|
| 125 | while(pos < source.length() && source.charAt(pos) > ' ')
|
---|
| 126 | { pos ++;
|
---|
| 127 | }
|
---|
| 128 | code = Integer.parseInt(source.substring(start, pos));
|
---|
| 129 |
|
---|
| 130 | newentity = new HTMLEntity(name, code);
|
---|
| 131 |
|
---|
| 132 | while ( pos < source.length() && source.charAt(pos) < ' ')
|
---|
| 133 | { pos ++;
|
---|
| 134 | }
|
---|
| 135 | start = pos;
|
---|
| 136 | }
|
---|
| 137 | }
|
---|
| 138 |
|
---|
| 139 | public static void getEntities(InputStream in)
|
---|
| 140 | { getEntities(DocumentLoader.getAsString(in));
|
---|
| 141 | }
|
---|
| 142 |
|
---|
| 143 | public static void getEntities(File file)
|
---|
| 144 | { FileInputStream in;
|
---|
| 145 |
|
---|
| 146 | try
|
---|
| 147 | { in = new FileInputStream(file);
|
---|
| 148 | getEntities(in);
|
---|
| 149 | in.close();
|
---|
| 150 | }
|
---|
| 151 | catch (IOException io)
|
---|
| 152 | { System.out.println("No entities");
|
---|
| 153 | }
|
---|
| 154 | }
|
---|
| 155 |
|
---|
| 156 | public static void getEntities()
|
---|
| 157 | { ClassLoader loader;
|
---|
| 158 | InputStream in;
|
---|
| 159 |
|
---|
| 160 | in = ClassLoader.getSystemResourceAsStream("org\\greenstone\\gsdl3\\gs3build\\util\\Entities.map");
|
---|
| 161 | if (in == null)
|
---|
| 162 | { System.out.println("Unable to load Entities.map from org.greenstone.gsdl3.gs3build.util");
|
---|
| 163 | }
|
---|
| 164 | getEntities(in);
|
---|
| 165 | }
|
---|
| 166 |
|
---|
| 167 | public static String encodeText(String source, boolean named)
|
---|
| 168 | { StringBuffer reply;
|
---|
| 169 | int start,at;
|
---|
| 170 |
|
---|
| 171 | if (source == null)
|
---|
| 172 | { return null;
|
---|
| 173 | }
|
---|
| 174 |
|
---|
| 175 | if (HTMLEntity.entities == null)
|
---|
| 176 | { getEntities();
|
---|
| 177 | }
|
---|
| 178 |
|
---|
| 179 | reply = new StringBuffer();
|
---|
| 180 | at = 0;
|
---|
| 181 | start = 0;
|
---|
| 182 | while (at < source.length())
|
---|
| 183 | { if (source.charAt(at) > 128)
|
---|
| 184 | { if (at != start)
|
---|
| 185 | { reply.append(source.substring(start, at));
|
---|
| 186 | }
|
---|
| 187 |
|
---|
| 188 | if (named)
|
---|
| 189 | { reply.append(nameCodeString(source.charAt(at)));
|
---|
| 190 | }
|
---|
| 191 | else
|
---|
| 192 | { reply.append(codeString(source.charAt(at)));
|
---|
| 193 | }
|
---|
| 194 | start = at + 1;
|
---|
| 195 | }
|
---|
| 196 | at ++;
|
---|
| 197 | }
|
---|
| 198 |
|
---|
| 199 | return reply.toString();
|
---|
| 200 | }
|
---|
| 201 |
|
---|
| 202 | public static String decodeText(String source)
|
---|
| 203 | { int start, end, loff, roff;
|
---|
| 204 | int sstart, send, test;
|
---|
| 205 | int startoff, endoff, baseoff;
|
---|
| 206 | boolean matched;
|
---|
| 207 | String tail;
|
---|
| 208 | StringBuffer reply;
|
---|
| 209 |
|
---|
| 210 | if (source == null)
|
---|
| 211 | { return null;
|
---|
| 212 | }
|
---|
| 213 |
|
---|
| 214 | if (HTMLEntity.entities == null)
|
---|
| 215 | { getEntities();
|
---|
| 216 | }
|
---|
| 217 |
|
---|
| 218 | reply = new StringBuffer();
|
---|
| 219 | baseoff = 0;
|
---|
| 220 |
|
---|
| 221 | tail = source;
|
---|
| 222 | do
|
---|
| 223 | { // get next ampersand
|
---|
| 224 | startoff = tail.indexOf('&');
|
---|
| 225 | if (startoff < 0 || startoff == tail.length() - 1)
|
---|
| 226 | { break;
|
---|
| 227 | }
|
---|
| 228 |
|
---|
| 229 | // if it's followed by a hash, evaluate as a number
|
---|
| 230 | if (tail.charAt(startoff + 1) == '#')
|
---|
| 231 | { if (startoff < tail.length() - 2 &&
|
---|
| 232 | tail.charAt(startoff+2) >= '0' &&
|
---|
| 233 | tail.charAt(startoff+2) <= '9')
|
---|
| 234 | { int code;
|
---|
| 235 |
|
---|
| 236 | loff = startoff + 2;
|
---|
| 237 | while (tail.charAt(loff) >= '0' &&
|
---|
| 238 | tail.charAt(loff) <= '9')
|
---|
| 239 | { loff ++;
|
---|
| 240 | }
|
---|
| 241 |
|
---|
| 242 | code = Integer.parseInt(tail.substring(startoff+2, loff));
|
---|
| 243 | reply.append(tail.substring(0, startoff));
|
---|
| 244 | reply.append(new Character((char) code));
|
---|
| 245 |
|
---|
| 246 | if (tail.charAt(loff) == ';')
|
---|
| 247 | { loff ++;
|
---|
| 248 | }
|
---|
| 249 | loff = loff - startoff - 1;
|
---|
| 250 | }
|
---|
| 251 | else
|
---|
| 252 | { loff = 0;
|
---|
| 253 | reply.append(tail.substring(0, startoff+1));
|
---|
| 254 | }
|
---|
| 255 | }
|
---|
| 256 | else
|
---|
| 257 | { start = 0;
|
---|
| 258 | end = HTMLEntity.entities.size();
|
---|
| 259 | loff = 0;
|
---|
| 260 |
|
---|
| 261 | while (startoff+1+loff < tail.length() && start != end)
|
---|
| 262 | { sstart = start;
|
---|
| 263 | send = end;
|
---|
| 264 | test = (sstart + send) >> 1;
|
---|
| 265 |
|
---|
| 266 | matched = false;
|
---|
| 267 |
|
---|
| 268 | roff = startoff + 1 + loff;
|
---|
| 269 |
|
---|
| 270 | while (sstart != send)
|
---|
| 271 | { test = (sstart + send) >> 1;
|
---|
| 272 |
|
---|
| 273 | if (tail.charAt(roff) < ((HTMLEntity) HTMLEntity.entities.elementAt(test)).name.charAt(loff))
|
---|
| 274 | { send = test;
|
---|
| 275 | }
|
---|
| 276 | else if (tail.charAt(roff) > ((HTMLEntity) HTMLEntity.entities.elementAt(test)).name.charAt(loff))
|
---|
| 277 | { sstart = test + 1;
|
---|
| 278 | }
|
---|
| 279 | else
|
---|
| 280 | { break;
|
---|
| 281 | }
|
---|
| 282 | }
|
---|
| 283 |
|
---|
| 284 | // System.out.println(sstart+">"+send+":"+loff);
|
---|
| 285 |
|
---|
| 286 | if (sstart != send) // found a match
|
---|
| 287 | { sstart = test;
|
---|
| 288 | while (sstart >= start &&
|
---|
| 289 | tail.charAt(roff) ==
|
---|
| 290 | ((HTMLEntity) HTMLEntity.entities.elementAt(sstart)).name.charAt(loff))
|
---|
| 291 | { sstart --;
|
---|
| 292 | }
|
---|
| 293 | sstart ++;
|
---|
| 294 |
|
---|
| 295 | send = test + 1;
|
---|
| 296 | while (send < end &&
|
---|
| 297 | tail.charAt(roff) ==
|
---|
| 298 | ((HTMLEntity) HTMLEntity.entities.elementAt(send)).name.charAt(loff))
|
---|
| 299 | { send ++;
|
---|
| 300 | }
|
---|
| 301 |
|
---|
| 302 | // System.out.println(sstart+"!"+send+":"+loff);
|
---|
| 303 |
|
---|
| 304 | start = sstart;
|
---|
| 305 | end = send;
|
---|
| 306 | }
|
---|
| 307 | else
|
---|
| 308 | { loff = 0;
|
---|
| 309 | reply.append(tail.substring(0, startoff+1));
|
---|
| 310 | break;
|
---|
| 311 | }
|
---|
| 312 |
|
---|
| 313 | // System.out.println(start+">"+end+":"+loff);
|
---|
| 314 |
|
---|
| 315 | loff ++;
|
---|
| 316 | if (end == start + 1 &&
|
---|
| 317 | ((HTMLEntity) HTMLEntity.entities.elementAt(test)).name.length() == loff)
|
---|
| 318 | { reply.append(tail.substring(0, startoff));
|
---|
| 319 | reply.append(((HTMLEntity) HTMLEntity.entities.elementAt(test)).toString());
|
---|
| 320 | if (roff < tail.length() - 1 && tail.charAt(roff+1) == ';')
|
---|
| 321 | { loff ++;
|
---|
| 322 | }
|
---|
| 323 | break;
|
---|
| 324 | }
|
---|
| 325 | }
|
---|
| 326 | }
|
---|
| 327 |
|
---|
| 328 | tail = tail.substring(startoff + 1 + loff);
|
---|
| 329 | } while (tail != null && tail.length() > 0);
|
---|
| 330 |
|
---|
| 331 | if (tail != null)
|
---|
| 332 | { reply.append(tail);
|
---|
| 333 | }
|
---|
| 334 | return reply.toString();
|
---|
| 335 | }
|
---|
| 336 |
|
---|
| 337 | public static void main(String args[])
|
---|
| 338 | { String reply;
|
---|
| 339 |
|
---|
| 340 | getEntities();
|
---|
| 341 |
|
---|
| 342 | reply = decodeText(args[0]);
|
---|
| 343 | System.out.println(reply);
|
---|
| 344 | }
|
---|
| 345 | }
|
---|
| 346 |
|
---|