1 | package org.greenstone.gsdl3.gs3build.util;
|
---|
2 |
|
---|
3 |
|
---|
4 |
|
---|
5 | public class HTMLDocAnchorList extends HTMLBlockList
|
---|
6 |
|
---|
7 | { HTMLDoc doc;
|
---|
8 |
|
---|
9 | public HTMLDocAnchorList(HTMLDoc doc)
|
---|
10 |
|
---|
11 | { super();
|
---|
12 |
|
---|
13 | this.doc = doc; // remember the document reference
|
---|
14 |
|
---|
15 |
|
---|
16 |
|
---|
17 | super.initialise(doc, anchorStartTags, anchorEndTags, anchorOptEndTags);
|
---|
18 |
|
---|
19 | }
|
---|
20 |
|
---|
21 |
|
---|
22 |
|
---|
23 | /**
|
---|
24 | * Return the HTMLBlock containing the named local anchor and it's
|
---|
25 | * child (contained) tags.
|
---|
26 | *
|
---|
27 | * @param <code>String</code> name of the local anchor to find.
|
---|
28 | * @return <code>HTMLBlock</code> the corresponding block of HTML
|
---|
29 | * contained by the anchor. If the local anchor does not
|
---|
30 | * exist, this value will be <code>null</code>.
|
---|
31 | */
|
---|
32 | public HTMLBlock localAnchor(String name)
|
---|
33 |
|
---|
34 | { int block;
|
---|
35 |
|
---|
36 | String localname;
|
---|
37 |
|
---|
38 |
|
---|
39 |
|
---|
40 | for (block = 0; block < this.size(); block ++)
|
---|
41 |
|
---|
42 | { localname = this.tagBlock(block).headTag().idValue("name");
|
---|
43 |
|
---|
44 | if (localname == null)
|
---|
45 |
|
---|
46 | { continue;
|
---|
47 |
|
---|
48 | }
|
---|
49 |
|
---|
50 | if (name.equalsIgnoreCase(localname))
|
---|
51 |
|
---|
52 | { return this.tagBlock(block);
|
---|
53 |
|
---|
54 | }
|
---|
55 |
|
---|
56 | }
|
---|
57 |
|
---|
58 | return null;
|
---|
59 |
|
---|
60 | }
|
---|
61 |
|
---|
62 | private int indexOf(String base, String subString, int from)
|
---|
63 | { int i, j;
|
---|
64 |
|
---|
65 | for (i = from; i < base.length() - subString.length(); i ++)
|
---|
66 | { if (Character.toLowerCase(base.charAt(i)) == subString.charAt(0))
|
---|
67 | { j = 0;
|
---|
68 | for (j = 1; j < subString.length(); j ++)
|
---|
69 | { if (Character.toLowerCase(base.charAt(i+j)) != subString.charAt(j))
|
---|
70 | { break;
|
---|
71 | }
|
---|
72 | }
|
---|
73 |
|
---|
74 | if (j == subString.length())
|
---|
75 | { return i;
|
---|
76 | }
|
---|
77 | }
|
---|
78 | }
|
---|
79 | return -1;
|
---|
80 | }
|
---|
81 |
|
---|
82 | public void flattenLinks()
|
---|
83 | { int block;
|
---|
84 | String url;
|
---|
85 | char endsWith;
|
---|
86 | StringBuffer newContent = new StringBuffer(doc.getContent());
|
---|
87 | boolean changed = false;
|
---|
88 |
|
---|
89 | // System.out.println("Flattening");
|
---|
90 |
|
---|
91 | for (block = 0; block < this.size(); block ++)
|
---|
92 | { HTMLTag tag;
|
---|
93 | int tagPos;
|
---|
94 | int valueStart, valueEnd;
|
---|
95 |
|
---|
96 | tag = this.tagBlock(block).headTag(); // get the <A> tag itself
|
---|
97 |
|
---|
98 | url = tag.idValue("href"); // if there is no HREF member, abandon this one
|
---|
99 | if (url == null)
|
---|
100 | { System.out.println("No href");
|
---|
101 | continue;
|
---|
102 | }
|
---|
103 |
|
---|
104 | tagPos = tag.startPos();
|
---|
105 | tagPos = indexOf(doc.getContent(), "href", tagPos); // get the HREF itself
|
---|
106 | if (tagPos > tag.endPos() || tagPos < 0) // if none in the tag, skip it
|
---|
107 | { System.out.println("Didn't get href");
|
---|
108 | continue;
|
---|
109 | }
|
---|
110 | tagPos += "href".length(); // skip past it
|
---|
111 |
|
---|
112 | while ( doc.getContent().charAt(tagPos) <= ' ' || // skip pass equals, etc.
|
---|
113 | doc.getContent().charAt(tagPos) == '=')
|
---|
114 | { tagPos ++;
|
---|
115 | }
|
---|
116 |
|
---|
117 | if (doc.getContent().charAt(tagPos) == '"' || // get the demarcating whitespace/quotes
|
---|
118 | doc.getContent().charAt(tagPos) == '\'')
|
---|
119 | { endsWith = doc.getContent().charAt(tagPos);
|
---|
120 | tagPos ++;
|
---|
121 | }
|
---|
122 | else
|
---|
123 | { endsWith = ' ';
|
---|
124 | }
|
---|
125 | valueStart = tagPos;
|
---|
126 |
|
---|
127 | // System.out.println("Flattening " + doc.getContent().length() + " " + tagPos);
|
---|
128 |
|
---|
129 | while (tagPos < doc.getContent().length() &&
|
---|
130 | doc.getContent().charAt(tagPos) != endsWith) // read it in
|
---|
131 | { if (doc.content.charAt(tagPos) == '?' || // replace special characters
|
---|
132 | doc.content.charAt(tagPos) == '&')
|
---|
133 | { newContent.setCharAt(tagPos, '_'); // and note that we've done so
|
---|
134 | changed = true;
|
---|
135 | }
|
---|
136 | tagPos ++;
|
---|
137 | }
|
---|
138 | valueEnd = tagPos;
|
---|
139 |
|
---|
140 | tagPos = indexOf(doc.getContent(), ".html", valueStart);
|
---|
141 | if (tagPos >= 0 && tagPos < valueEnd)
|
---|
142 | { // shuffle around the html bit
|
---|
143 | while (tagPos < valueEnd - 5)
|
---|
144 | { newContent.setCharAt(tagPos, newContent.charAt(tagPos + 5));
|
---|
145 | tagPos ++;
|
---|
146 | }
|
---|
147 | newContent.setCharAt(tagPos ++, '.');
|
---|
148 | newContent.setCharAt(tagPos ++, 'h');
|
---|
149 | newContent.setCharAt(tagPos ++, 't');
|
---|
150 | newContent.setCharAt(tagPos ++, 'm');
|
---|
151 | newContent.setCharAt(tagPos ++, 'l');
|
---|
152 | }
|
---|
153 | }
|
---|
154 |
|
---|
155 | if (changed == true) // replace the document as needsbe
|
---|
156 | { doc.setContent(newContent.toString());
|
---|
157 | }
|
---|
158 | }
|
---|
159 |
|
---|
160 | }
|
---|