package org.greenstone.gsdl3.gs3build.util; import java.io.*; import java.util.*; import org.greenstone.gsdl3.gs3build.doctypes.DocumentLoader; public class HTMLEntity { String name; int code; static Vector entities = null; static Vector nmapping = null; public HTMLEntity(String name, int code) { int start, end, at; HTMLEntity compareEntity; if (HTMLEntity.entities == null) { HTMLEntity.entities = new Vector(); } if (HTMLEntity.nmapping == null) { HTMLEntity.nmapping = new Vector(); } this.name = name; this.code = code; entities.addElement(this); start = 0; end = nmapping.size(); while (start != end) { at = (start + end) / 2; compareEntity = (HTMLEntity) entities.elementAt(((Integer) nmapping.elementAt(at)).intValue()); if (compareEntity.code > code) { end = at; } else { start = at + 1; } } if (end == nmapping.size()) { nmapping.addElement(new Integer(nmapping.size())); } else { nmapping.insertElementAt(new Integer(nmapping.size()), start); } } public String toString() { Character c; c = new Character((char) this.code); return c.toString(); } public static String codeString(char c) { StringBuffer reply; String codestring; if (c < 128 || c == '&' || c == '<' || c == '>') { return null; } codestring = Integer.toString((int) c); reply = new StringBuffer("&#"); reply.append(codestring); reply.append(";"); return reply.toString(); } public static String nameCodeString(char c) { int start, end, at, compare; HTMLEntity compareEntity; start = 0; end = nmapping.size(); while (start != end) { at = (start + end) / 2; compareEntity = (HTMLEntity) entities.elementAt(((Integer) nmapping.elementAt(at)).intValue()); if (compareEntity.code == (int) c) { StringBuffer reply; reply = new StringBuffer("&"); reply.append(compareEntity.name); reply.append(";"); return reply.toString(); } else if (compareEntity.code < c) { end = at; } else { start = at + 1; } } return codeString(c); } public static void getEntities(String source) { HTMLEntity newentity; int pos; int start; String name; int code; if (source == null) { return; } start = 0; while (start < source.length()) { pos = start; while (source.charAt(pos) != ':') { pos ++; } name = source.substring(start, pos); // skip colon and reset pos ++; start = pos; while(pos < source.length() && source.charAt(pos) > ' ') { pos ++; } code = Integer.parseInt(source.substring(start, pos)); newentity = new HTMLEntity(name, code); while ( pos < source.length() && source.charAt(pos) < ' ') { pos ++; } start = pos; } } public static void getEntities(InputStream in) { getEntities(DocumentLoader.getAsString(in)); } public static void getEntities(File file) { FileInputStream in; try { in = new FileInputStream(file); getEntities(in); in.close(); } catch (IOException io) { System.out.println("No entities"); } } public static void getEntities() { ClassLoader loader; InputStream in; in = ClassLoader.getSystemResourceAsStream("org\\greenstone\\gsdl3\\gs3build\\util\\Entities.map"); if (in == null) { System.out.println("Unable to load Entities.map from org.greenstone.gsdl3.gs3build.util"); } getEntities(in); } public static String encodeText(String source, boolean named) { StringBuffer reply; int start,at; if (source == null) { return null; } if (HTMLEntity.entities == null) { getEntities(); } reply = new StringBuffer(); at = 0; start = 0; while (at < source.length()) { if (source.charAt(at) > 128) { if (at != start) { reply.append(source.substring(start, at)); } if (named) { reply.append(nameCodeString(source.charAt(at))); } else { reply.append(codeString(source.charAt(at))); } start = at + 1; } at ++; } return reply.toString(); } public static String decodeText(String source) { int start, end, loff, roff; int sstart, send, test; int startoff, endoff, baseoff; boolean matched; String tail; StringBuffer reply; if (source == null) { return null; } if (HTMLEntity.entities == null) { getEntities(); } reply = new StringBuffer(); baseoff = 0; tail = source; do { // get next ampersand startoff = tail.indexOf('&'); if (startoff < 0 || startoff == tail.length() - 1) { break; } // if it's followed by a hash, evaluate as a number if (tail.charAt(startoff + 1) == '#') { if (startoff < tail.length() - 2 && tail.charAt(startoff+2) >= '0' && tail.charAt(startoff+2) <= '9') { int code; loff = startoff + 2; while (tail.charAt(loff) >= '0' && tail.charAt(loff) <= '9') { loff ++; } code = Integer.parseInt(tail.substring(startoff+2, loff)); reply.append(tail.substring(0, startoff)); reply.append(new Character((char) code)); if (tail.charAt(loff) == ';') { loff ++; } loff = loff - startoff - 1; } else { loff = 0; reply.append(tail.substring(0, startoff+1)); } } else { start = 0; end = HTMLEntity.entities.size(); loff = 0; while (startoff+1+loff < tail.length() && start != end) { sstart = start; send = end; test = (sstart + send) >> 1; matched = false; roff = startoff + 1 + loff; while (sstart != send) { test = (sstart + send) >> 1; if (tail.charAt(roff) < ((HTMLEntity) HTMLEntity.entities.elementAt(test)).name.charAt(loff)) { send = test; } else if (tail.charAt(roff) > ((HTMLEntity) HTMLEntity.entities.elementAt(test)).name.charAt(loff)) { sstart = test + 1; } else { break; } } // System.out.println(sstart+">"+send+":"+loff); if (sstart != send) // found a match { sstart = test; while (sstart >= start && tail.charAt(roff) == ((HTMLEntity) HTMLEntity.entities.elementAt(sstart)).name.charAt(loff)) { sstart --; } sstart ++; send = test + 1; while (send < end && tail.charAt(roff) == ((HTMLEntity) HTMLEntity.entities.elementAt(send)).name.charAt(loff)) { send ++; } // System.out.println(sstart+"!"+send+":"+loff); start = sstart; end = send; } else { loff = 0; reply.append(tail.substring(0, startoff+1)); break; } // System.out.println(start+">"+end+":"+loff); loff ++; if (end == start + 1 && ((HTMLEntity) HTMLEntity.entities.elementAt(test)).name.length() == loff) { reply.append(tail.substring(0, startoff)); reply.append(((HTMLEntity) HTMLEntity.entities.elementAt(test)).toString()); if (roff < tail.length() - 1 && tail.charAt(roff+1) == ';') { loff ++; } break; } } } tail = tail.substring(startoff + 1 + loff); } while (tail != null && tail.length() > 0); if (tail != null) { reply.append(tail); } return reply.toString(); } public static void main(String args[]) { String reply; getEntities(); reply = decodeText(args[0]); System.out.println(reply); } }