source: trunk/greenstone3-extensions/gs3build/src/org/greenstone/gsdl3/gs3build/util/HTMLEntity.java@ 12188

Last change on this file since 12188 was 12188, checked in by kjdon, 18 years ago

Initial revision

  • Property svn:keywords set to Author Date Id Revision
File size: 7.9 KB
Line 
1package org.greenstone.gsdl3.gs3build.util;
2
3import java.io.*;
4import java.util.*;
5
6import org.greenstone.gsdl3.gs3build.doctypes.DocumentLoader;
7
8public class HTMLEntity
9{ String name;
10 int code;
11 static Vector entities = null;
12 static Vector nmapping = null;
13
14 public HTMLEntity(String name, int code)
15 { int start, end, at;
16 HTMLEntity compareEntity;
17
18 if (HTMLEntity.entities == null)
19 { HTMLEntity.entities = new Vector();
20 }
21 if (HTMLEntity.nmapping == null)
22 { HTMLEntity.nmapping = new Vector();
23 }
24
25 this.name = name;
26 this.code = code;
27
28 entities.addElement(this);
29 start = 0;
30 end = nmapping.size();
31 while (start != end)
32 { at = (start + end) / 2;
33
34 compareEntity =
35 (HTMLEntity) entities.elementAt(((Integer) nmapping.elementAt(at)).intValue());
36 if (compareEntity.code > code)
37 { end = at;
38 }
39 else
40 { start = at + 1;
41 }
42 }
43 if (end == nmapping.size())
44 { nmapping.addElement(new Integer(nmapping.size()));
45 }
46 else
47 { nmapping.insertElementAt(new Integer(nmapping.size()), start);
48 }
49 }
50
51 public String toString()
52 { Character c;
53
54 c = new Character((char) this.code);
55 return c.toString();
56 }
57
58 public static String codeString(char c)
59 { StringBuffer reply;
60 String codestring;
61
62 if (c < 128 || c == '&' || c == '<' || c == '>')
63 { return null;
64 }
65 codestring = Integer.toString((int) c);
66 reply = new StringBuffer("&#");
67 reply.append(codestring);
68 reply.append(";");
69
70 return reply.toString();
71 }
72
73 public static String nameCodeString(char c)
74 { int start, end, at, compare;
75 HTMLEntity compareEntity;
76
77 start = 0;
78 end = nmapping.size();
79 while (start != end)
80 { at = (start + end) / 2;
81
82 compareEntity =
83 (HTMLEntity) entities.elementAt(((Integer) nmapping.elementAt(at)).intValue());
84 if (compareEntity.code == (int) c)
85 { StringBuffer reply;
86
87 reply = new StringBuffer("&");
88 reply.append(compareEntity.name);
89 reply.append(";");
90 return reply.toString();
91 }
92 else if (compareEntity.code < c)
93 { end = at;
94 }
95 else
96 { start = at + 1;
97 }
98 }
99 return codeString(c);
100 }
101
102 public static void getEntities(String source)
103 { HTMLEntity newentity;
104 int pos;
105 int start;
106 String name;
107 int code;
108
109 if (source == null)
110 { return;
111 }
112
113 start = 0;
114 while (start < source.length())
115 { pos = start;
116 while (source.charAt(pos) != ':')
117 { pos ++;
118 }
119 name = source.substring(start, pos);
120
121 // skip colon and reset
122 pos ++;
123 start = pos;
124
125 while(pos < source.length() && source.charAt(pos) > ' ')
126 { pos ++;
127 }
128 code = Integer.parseInt(source.substring(start, pos));
129
130 newentity = new HTMLEntity(name, code);
131
132 while ( pos < source.length() && source.charAt(pos) < ' ')
133 { pos ++;
134 }
135 start = pos;
136 }
137 }
138
139 public static void getEntities(InputStream in)
140 { getEntities(DocumentLoader.getAsString(in));
141 }
142
143 public static void getEntities(File file)
144 { FileInputStream in;
145
146 try
147 { in = new FileInputStream(file);
148 getEntities(in);
149 in.close();
150 }
151 catch (IOException io)
152 { System.out.println("No entities");
153 }
154 }
155
156 public static void getEntities()
157 { ClassLoader loader;
158 InputStream in;
159
160 in = ClassLoader.getSystemResourceAsStream("org\\greenstone\\gsdl3\\gs3build\\util\\Entities.map");
161 if (in == null)
162 { System.out.println("Unable to load Entities.map from org.greenstone.gsdl3.gs3build.util");
163 }
164 getEntities(in);
165 }
166
167 public static String encodeText(String source, boolean named)
168 { StringBuffer reply;
169 int start,at;
170
171 if (source == null)
172 { return null;
173 }
174
175 if (HTMLEntity.entities == null)
176 { getEntities();
177 }
178
179 reply = new StringBuffer();
180 at = 0;
181 start = 0;
182 while (at < source.length())
183 { if (source.charAt(at) > 128)
184 { if (at != start)
185 { reply.append(source.substring(start, at));
186 }
187
188 if (named)
189 { reply.append(nameCodeString(source.charAt(at)));
190 }
191 else
192 { reply.append(codeString(source.charAt(at)));
193 }
194 start = at + 1;
195 }
196 at ++;
197 }
198
199 return reply.toString();
200 }
201
202 public static String decodeText(String source)
203 { int start, end, loff, roff;
204 int sstart, send, test;
205 int startoff, endoff, baseoff;
206 boolean matched;
207 String tail;
208 StringBuffer reply;
209
210 if (source == null)
211 { return null;
212 }
213
214 if (HTMLEntity.entities == null)
215 { getEntities();
216 }
217
218 reply = new StringBuffer();
219 baseoff = 0;
220
221 tail = source;
222 do
223 { // get next ampersand
224 startoff = tail.indexOf('&');
225 if (startoff < 0 || startoff == tail.length() - 1)
226 { break;
227 }
228
229 // if it's followed by a hash, evaluate as a number
230 if (tail.charAt(startoff + 1) == '#')
231 { if (startoff < tail.length() - 2 &&
232 tail.charAt(startoff+2) >= '0' &&
233 tail.charAt(startoff+2) <= '9')
234 { int code;
235
236 loff = startoff + 2;
237 while (tail.charAt(loff) >= '0' &&
238 tail.charAt(loff) <= '9')
239 { loff ++;
240 }
241
242 code = Integer.parseInt(tail.substring(startoff+2, loff));
243 reply.append(tail.substring(0, startoff));
244 reply.append(new Character((char) code));
245
246 if (tail.charAt(loff) == ';')
247 { loff ++;
248 }
249 loff = loff - startoff - 1;
250 }
251 else
252 { loff = 0;
253 reply.append(tail.substring(0, startoff+1));
254 }
255 }
256 else
257 { start = 0;
258 end = HTMLEntity.entities.size();
259 loff = 0;
260
261 while (startoff+1+loff < tail.length() && start != end)
262 { sstart = start;
263 send = end;
264 test = (sstart + send) >> 1;
265
266 matched = false;
267
268 roff = startoff + 1 + loff;
269
270 while (sstart != send)
271 { test = (sstart + send) >> 1;
272
273 if (tail.charAt(roff) < ((HTMLEntity) HTMLEntity.entities.elementAt(test)).name.charAt(loff))
274 { send = test;
275 }
276 else if (tail.charAt(roff) > ((HTMLEntity) HTMLEntity.entities.elementAt(test)).name.charAt(loff))
277 { sstart = test + 1;
278 }
279 else
280 { break;
281 }
282 }
283
284 // System.out.println(sstart+">"+send+":"+loff);
285
286 if (sstart != send) // found a match
287 { sstart = test;
288 while (sstart >= start &&
289 tail.charAt(roff) ==
290 ((HTMLEntity) HTMLEntity.entities.elementAt(sstart)).name.charAt(loff))
291 { sstart --;
292 }
293 sstart ++;
294
295 send = test + 1;
296 while (send < end &&
297 tail.charAt(roff) ==
298 ((HTMLEntity) HTMLEntity.entities.elementAt(send)).name.charAt(loff))
299 { send ++;
300 }
301
302// System.out.println(sstart+"!"+send+":"+loff);
303
304 start = sstart;
305 end = send;
306 }
307 else
308 { loff = 0;
309 reply.append(tail.substring(0, startoff+1));
310 break;
311 }
312
313// System.out.println(start+">"+end+":"+loff);
314
315 loff ++;
316 if (end == start + 1 &&
317 ((HTMLEntity) HTMLEntity.entities.elementAt(test)).name.length() == loff)
318 { reply.append(tail.substring(0, startoff));
319 reply.append(((HTMLEntity) HTMLEntity.entities.elementAt(test)).toString());
320 if (roff < tail.length() - 1 && tail.charAt(roff+1) == ';')
321 { loff ++;
322 }
323 break;
324 }
325 }
326 }
327
328 tail = tail.substring(startoff + 1 + loff);
329 } while (tail != null && tail.length() > 0);
330
331 if (tail != null)
332 { reply.append(tail);
333 }
334 return reply.toString();
335 }
336
337 public static void main(String args[])
338 { String reply;
339
340 getEntities();
341
342 reply = decodeText(args[0]);
343 System.out.println(reply);
344 }
345}
346
Note: See TracBrowser for help on using the repository browser.