1 | package org.greenstone.gsdl3.gs3build.util;
|
---|
2 |
|
---|
3 | import java.util.*;
|
---|
4 |
|
---|
5 | public class HTMLBlock
|
---|
6 | {
|
---|
7 | Vector content;
|
---|
8 | int tagpos;
|
---|
9 | int endpos;
|
---|
10 |
|
---|
11 | public HTMLBlock(int docpos)
|
---|
12 | {
|
---|
13 | this.content = new Vector(1);
|
---|
14 | this.tagpos = docpos;
|
---|
15 | }
|
---|
16 |
|
---|
17 | public HTMLBlock(int docpos, int endpos)
|
---|
18 | {
|
---|
19 | this.content = new Vector(1);
|
---|
20 | this.tagpos = docpos;
|
---|
21 | this.endpos = endpos;
|
---|
22 | }
|
---|
23 |
|
---|
24 | public void addTag(HTMLTag tag)
|
---|
25 | {
|
---|
26 | if (tag.endPos() > this.endpos){
|
---|
27 | this.endpos = tag.endPos();
|
---|
28 | }
|
---|
29 | content.addElement(tag);
|
---|
30 | }
|
---|
31 |
|
---|
32 | /**
|
---|
33 | * @deprecccate
|
---|
34 | public void addText(String text)
|
---|
35 | { content.addElement(text);
|
---|
36 | }
|
---|
37 | */
|
---|
38 |
|
---|
39 | public void addText(HTMLCText text)
|
---|
40 | {
|
---|
41 | if (text.endPos() > this.endpos){
|
---|
42 | this.endpos = text.endPos();
|
---|
43 | }
|
---|
44 | content.addElement(text);
|
---|
45 | }
|
---|
46 |
|
---|
47 | /**
|
---|
48 | * @return HTML encoded <code>String</code> of the document
|
---|
49 | */
|
---|
50 | public String contentString()
|
---|
51 | {
|
---|
52 | int member;
|
---|
53 | StringBuffer reply;
|
---|
54 | String type;
|
---|
55 |
|
---|
56 | reply = new StringBuffer();
|
---|
57 | for (member = 1; member < content.size() - 1; member ++){
|
---|
58 | /*if (content.elementAt(member) instanceof String){
|
---|
59 | reply.append((String) content.elementAt(member));
|
---|
60 | }
|
---|
61 | else
|
---|
62 | */
|
---|
63 | if (content.elementAt(member) instanceof HTMLCText){
|
---|
64 | reply.append(((HTMLCText) content.elementAt(member)).toString());
|
---|
65 | }
|
---|
66 | }
|
---|
67 | return reply.toString();
|
---|
68 | }
|
---|
69 |
|
---|
70 | /**
|
---|
71 | * Returns document position of start of the block
|
---|
72 | */
|
---|
73 | public int startPos()
|
---|
74 | {
|
---|
75 | return this.tagpos;
|
---|
76 | }
|
---|
77 |
|
---|
78 | /**
|
---|
79 | * Returns document position of end of the block
|
---|
80 | */
|
---|
81 | public int endPos()
|
---|
82 | {
|
---|
83 | return this.tagpos;
|
---|
84 | }
|
---|
85 |
|
---|
86 | /**
|
---|
87 | * @return head (first) tag of the block
|
---|
88 | */
|
---|
89 | public HTMLTag headTag()
|
---|
90 | {
|
---|
91 | return ((HTMLTag) content.elementAt(0));
|
---|
92 | }
|
---|
93 |
|
---|
94 | /**
|
---|
95 | * @return name (as a <code>String</code>) of the tag at the head of the block
|
---|
96 | */
|
---|
97 | public String headTagName()
|
---|
98 | {
|
---|
99 | return ((HTMLTag) content.elementAt(0)).tagName();
|
---|
100 | }
|
---|
101 |
|
---|
102 | /**
|
---|
103 | * @return name of the tail (last) tag of the block as a <code>String</code>
|
---|
104 | */
|
---|
105 | public String tailTagName()
|
---|
106 | {
|
---|
107 | return (((HTMLTag) content.elementAt(content.size() - 1)).tagName());
|
---|
108 | }
|
---|
109 |
|
---|
110 |
|
---|
111 | /**
|
---|
112 | * @return: starting element character position of the item-th item
|
---|
113 | */
|
---|
114 | private int elementStartPos(int item)
|
---|
115 | {
|
---|
116 | Object element;
|
---|
117 | int pos = -1;
|
---|
118 |
|
---|
119 | element = this.content.elementAt(item);
|
---|
120 | if (element instanceof HTMLTag){
|
---|
121 | pos = ((HTMLTag) element).startPos();
|
---|
122 | }
|
---|
123 | else if (element instanceof HTMLCText){
|
---|
124 | pos = ((HTMLCText) element).startPos();
|
---|
125 | }
|
---|
126 | return pos;
|
---|
127 | }
|
---|
128 |
|
---|
129 | /**
|
---|
130 | * @return: ending element character position of the item-th item
|
---|
131 | */
|
---|
132 | private int elementEndPos(int item)
|
---|
133 | {
|
---|
134 | Object element;
|
---|
135 | int pos = -1;
|
---|
136 |
|
---|
137 | element = this.content.elementAt(item);
|
---|
138 | if (element instanceof HTMLTag){
|
---|
139 | pos = ((HTMLTag) element).endPos();
|
---|
140 | }
|
---|
141 | else if (element instanceof HTMLCText){
|
---|
142 | pos = ((HTMLCText) element).endPos();
|
---|
143 | }
|
---|
144 | return pos;
|
---|
145 | }
|
---|
146 |
|
---|
147 | /**
|
---|
148 | * @return the HTMLBlock of the indicated subitems
|
---|
149 | */
|
---|
150 | public HTMLBlock subBlock(int startitem, int enditem)
|
---|
151 | {
|
---|
152 | int i;
|
---|
153 | Object element;
|
---|
154 | HTMLBlock reply;
|
---|
155 | reply = new HTMLBlock(this.elementStartPos(startitem),
|
---|
156 | this.elementEndPos(enditem-1));
|
---|
157 | // copy all the bits
|
---|
158 | for (i = startitem; i < enditem; i++){
|
---|
159 | element = this.content.elementAt(i);
|
---|
160 | if (element instanceof HTMLTag){
|
---|
161 | reply.addTag((HTMLTag) element);
|
---|
162 | }
|
---|
163 | else if (element instanceof HTMLCText){
|
---|
164 | reply.addText((HTMLCText) element);
|
---|
165 | }
|
---|
166 | /*
|
---|
167 | else if (element instanceof String)
|
---|
168 | { reply.addText((String) element);
|
---|
169 | }
|
---|
170 | */
|
---|
171 | }
|
---|
172 | return reply;
|
---|
173 | }
|
---|
174 |
|
---|
175 | /**
|
---|
176 | * Return the position of the given <code>HTMLBlock</code> within this block
|
---|
177 | * @return the tag index of the child block; or < 0 if not a chile
|
---|
178 | */
|
---|
179 | public int find(HTMLBlock subblock)
|
---|
180 | {
|
---|
181 | int start, end, at, attag;
|
---|
182 | HTMLTag tag;
|
---|
183 |
|
---|
184 | start = 0;
|
---|
185 | end = this.content.size();
|
---|
186 | while (start != end){
|
---|
187 | /* compare positions */
|
---|
188 | at = (start + end) / 2;
|
---|
189 | attag = at;
|
---|
190 | while (attag < end &&
|
---|
191 | (content.elementAt(attag) instanceof HTMLTag) == false){
|
---|
192 | attag ++;
|
---|
193 | }
|
---|
194 |
|
---|
195 | if (attag == end){
|
---|
196 | end = at;
|
---|
197 | continue;
|
---|
198 | }
|
---|
199 | tag = (HTMLTag) content.elementAt(attag);
|
---|
200 | if (tag.startPos() > subblock.startPos()){
|
---|
201 | end = at;
|
---|
202 | }
|
---|
203 | else if (tag.startPos() < subblock.startPos()){
|
---|
204 | start = attag + 1;
|
---|
205 | }
|
---|
206 | else {
|
---|
207 | return attag;
|
---|
208 | }
|
---|
209 | }
|
---|
210 | return -1;
|
---|
211 | }
|
---|
212 |
|
---|
213 | /**
|
---|
214 | * @return The raw HTML of the block
|
---|
215 | */
|
---|
216 | public String HTMLString()
|
---|
217 | {
|
---|
218 | int loop;
|
---|
219 | StringBuffer reply;
|
---|
220 |
|
---|
221 | reply = new StringBuffer();
|
---|
222 | for (loop = 0; loop < this.content.size(); loop ++){
|
---|
223 | /*
|
---|
224 | if (content.elementAt(loop) instanceof String)
|
---|
225 | { reply.append((String) content.elementAt(loop));
|
---|
226 | }
|
---|
227 | else
|
---|
228 | */
|
---|
229 | if (content.elementAt(loop) instanceof HTMLCText){
|
---|
230 | reply.append(((HTMLCText) content.elementAt(loop)).toString());
|
---|
231 | }
|
---|
232 | else if (content.elementAt(loop) instanceof HTMLTag){
|
---|
233 | reply.append(((HTMLTag) content.elementAt(loop)).toString());
|
---|
234 | }
|
---|
235 | }
|
---|
236 | return reply.toString();
|
---|
237 | }
|
---|
238 |
|
---|
239 | /**
|
---|
240 | * @return The text of the block - no HTML tags. If no text is present, any
|
---|
241 | * <code>alt</code> information for <code>img</code> tags will be given
|
---|
242 | * instead
|
---|
243 | */
|
---|
244 | public String toString()
|
---|
245 | {
|
---|
246 | int member;
|
---|
247 | StringBuffer reply;
|
---|
248 | StringBuffer ireply;
|
---|
249 |
|
---|
250 | reply = new StringBuffer("");
|
---|
251 | ireply = new StringBuffer("");
|
---|
252 | for (member = 0; member < content.size(); member ++){
|
---|
253 | /*if (content.elementAt(member) instanceof String)
|
---|
254 | { reply.append((String) content.elementAt(member));
|
---|
255 | }
|
---|
256 | */
|
---|
257 | if (content.elementAt(member) instanceof HTMLCText){
|
---|
258 | reply.append(((HTMLCText) content.elementAt(member)).toString());
|
---|
259 | }
|
---|
260 | else if (content.elementAt(member) instanceof HTMLTag){
|
---|
261 | if (((HTMLTag) content.elementAt(member)).tagName().equals("img")){
|
---|
262 | ireply.append(((HTMLTag) content.elementAt(member)).idValue("alt"));
|
---|
263 | }
|
---|
264 | }
|
---|
265 | }
|
---|
266 | reply = HTMLCText.cleanString(reply);
|
---|
267 | ireply = HTMLCText.cleanString(ireply);
|
---|
268 | // reply = reply.trim();
|
---|
269 | if (reply == null || reply.length() == 0){
|
---|
270 | return ireply.toString();
|
---|
271 | }
|
---|
272 | return reply.toString();
|
---|
273 | }
|
---|
274 |
|
---|
275 | public HTMLObject elementAt(int at)
|
---|
276 | {
|
---|
277 | return (HTMLObject) this.content.elementAt(at);
|
---|
278 | }
|
---|
279 |
|
---|
280 | /**
|
---|
281 | * @return the number of elements in the block
|
---|
282 | */
|
---|
283 | public int size()
|
---|
284 | {
|
---|
285 | return this.content.size();
|
---|
286 | }
|
---|
287 |
|
---|
288 | /**
|
---|
289 | * @return an enumeration of all elements in the block
|
---|
290 | */
|
---|
291 | public Enumeration elements()
|
---|
292 | {
|
---|
293 | Enumeration enumer;
|
---|
294 |
|
---|
295 | enumer = new HTMLBlockEnumerator(this);
|
---|
296 | return enumer;
|
---|
297 | }
|
---|
298 |
|
---|
299 | /**
|
---|
300 | * @return an enumeration of all tags in the block
|
---|
301 | */
|
---|
302 | public Enumeration tagElements()
|
---|
303 | {
|
---|
304 | Enumeration enumer;
|
---|
305 |
|
---|
306 | enumer = new HTMLBlockTagEnumerator(this);
|
---|
307 | return enumer;
|
---|
308 | }
|
---|
309 |
|
---|
310 | /**
|
---|
311 | * Finalization method
|
---|
312 | */
|
---|
313 | protected void finalize() throws Throwable
|
---|
314 | {
|
---|
315 | this.content = null;
|
---|
316 | super.finalize();
|
---|
317 | }
|
---|
318 | }
|
---|
319 |
|
---|
320 | final class HTMLBlockEnumerator implements Enumeration
|
---|
321 | {
|
---|
322 | private HTMLBlock block;
|
---|
323 | int member;
|
---|
324 |
|
---|
325 | public HTMLBlockEnumerator(HTMLBlock block)
|
---|
326 | {
|
---|
327 | this.block = block;
|
---|
328 | this.member = 0;
|
---|
329 | }
|
---|
330 |
|
---|
331 | public Object nextElement()
|
---|
332 | {
|
---|
333 | Object element;
|
---|
334 |
|
---|
335 | element = this.block.content.elementAt(member);
|
---|
336 | member ++;
|
---|
337 | return element;
|
---|
338 | }
|
---|
339 |
|
---|
340 | public boolean hasMoreElements()
|
---|
341 | {
|
---|
342 | if (block == null || this.member == this.block.content.size()){
|
---|
343 | return false;
|
---|
344 | }
|
---|
345 | return true;
|
---|
346 | }
|
---|
347 | }
|
---|
348 |
|
---|
349 | final class HTMLBlockTagEnumerator implements Enumeration
|
---|
350 | {
|
---|
351 | private HTMLBlock block;
|
---|
352 | int member;
|
---|
353 |
|
---|
354 | public HTMLBlockTagEnumerator(HTMLBlock block)
|
---|
355 | {
|
---|
356 | this.block = block;
|
---|
357 | this.member = 0;
|
---|
358 | while ( this.member < this.block.content.size() &&
|
---|
359 | this.block.content.elementAt(this.member) instanceof HTMLTag == false){
|
---|
360 | this.member ++;
|
---|
361 | }
|
---|
362 | }
|
---|
363 |
|
---|
364 | public Object nextElement()
|
---|
365 | {
|
---|
366 | Object element;
|
---|
367 |
|
---|
368 | element = this.block.content.elementAt(member);
|
---|
369 | do {
|
---|
370 | member ++;
|
---|
371 | } while (this.block.content.elementAt(member) instanceof HTMLTag == false);
|
---|
372 | return element;
|
---|
373 | }
|
---|
374 |
|
---|
375 | public boolean hasMoreElements()
|
---|
376 | {
|
---|
377 | if (block == null || this.member == this.block.content.size()){
|
---|
378 | return false;
|
---|
379 | }
|
---|
380 | return true;
|
---|
381 | }
|
---|
382 | }
|
---|
383 |
|
---|