source: trunk/greenstone3-extensions/gs3build/src/org/greenstone/gsdl3/gs3build/xpointer/XPointer.java@ 12188

Last change on this file since 12188 was 12188, checked in by kjdon, 18 years ago

Initial revision

  • Property svn:keywords set to Author Date Id Revision
File size: 6.8 KB
Line 
1package org.greenstone.gsdl3.gs3build.xpointer;
2
3import java.io.File;
4
5import java.net.URL;
6
7import javax.xml.parsers.*;
8
9import org.w3c.dom.*;
10import org.apache.xpath.*;
11
12import org.xml.sax.SAXException;
13import org.xml.sax.SAXParseException;
14import org.xml.sax.helpers.DefaultHandler;
15
16import org.greenstone.gsdl3.gs3build.doctypes.HTMLDocumentTools;
17import org.greenstone.gsdl3.gs3build.util.HTMLTidy;
18
19public abstract class XPointer
20{
21 Document document;
22 String xpointer;
23
24 public XPointer(org.w3c.dom.Document document, String xpointer)
25 { this.document = document;
26 this.xpointer = xpointer;
27 // now process the type of description
28 }
29
30 public static String expandXPath(String xpath)
31 {
32 int dashAt;
33 String pathElement;
34 StringBuffer expandedPtr = new StringBuffer();
35
36 if (xpath.startsWith("//")) {
37 xpath = xpath.substring(2);
38 expandedPtr.append("//");
39 }
40 else if (xpath.startsWith("/")) {
41 xpath = xpath.substring(1);
42 expandedPtr.append("/");
43 }
44
45 while (xpath.length() > 0) {
46 dashAt = xpath.indexOf('/');
47 if (dashAt >= 0) {
48 pathElement = xpath.substring(0, dashAt);
49 xpath = xpath.substring(dashAt+1);
50 }
51 else {
52 pathElement = xpath;
53 xpath = "";
54 }
55
56 int c = 0;
57 while (c < pathElement.length()) {
58 if (!Character.isDigit(pathElement.charAt(c)))
59 break;
60 c ++;
61 }
62
63 if (c == pathElement.length()) {
64 expandedPtr.append("*[");
65 expandedPtr.append(pathElement);
66 expandedPtr.append("]");
67 }
68 else {
69 expandedPtr.append(pathElement);
70 }
71
72 if (dashAt >= 0) {
73 expandedPtr.append('/');
74 }
75 }
76 xpath = expandedPtr.toString();
77
78 return xpath;
79 }
80
81 public static XPointer processXPointer(Document document, String xpointer)
82 { // do a "cheap and dirty" detection of range-to
83 if (xpointer.indexOf("range-to") >= 0) {
84 return new RangeToXPointer(document, xpointer);
85 }
86 return new IdentifierXPointer(document, xpointer);
87 }
88
89 public static XPointer processXPointer(Document document, URL url)
90 { String reference = url.getRef();
91
92 reference.trim();
93 if (reference.startsWith("xpointer(")) {
94 reference = reference.substring(9, reference.length() - 1);
95 }
96 else {
97 return null;
98 }
99 // System.out.println(reference);
100
101 return processXPointer(document, reference);
102 }
103
104 public static void printNode(Node node, StringBuffer to, boolean close)
105 {
106 if (node.getNodeType() == org.w3c.dom.Node.TEXT_NODE) {
107 if (!close) {
108 to.append(node.getNodeValue());
109 to.append(" ");
110 }
111 }
112 else if (node.getNodeType() == org.w3c.dom.Node.ENTITY_NODE) {
113 if (!close) {
114 to.append("&");
115 to.append(node.getNodeValue());
116 to.append(";");
117 to.append(" ");
118 }
119 }
120 else if (node.getNodeType() == org.w3c.dom.Node.COMMENT_NODE) {
121 if (!close) {
122 to.append("<!-- ");
123 to.append(node.getNodeValue());
124 to.append(" -->");
125 }
126 }
127 else if (node.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
128 boolean hasChildren = (node.getChildNodes().getLength() > 0);
129
130 if (close && !hasChildren) {
131 return;
132 }
133 to.append("<");
134 if (close) {
135 to.append("/");
136 }
137 to.append(node.getNodeName());
138
139 if (!close) {
140 NamedNodeMap attributes = ((Element) node).getAttributes();
141 for (int a = 0; a < attributes.getLength(); a ++) {
142 Node attributeNode = attributes.item(a);
143 to.append(" ");
144 to.append(attributeNode.getNodeName());
145
146 String value = attributeNode.getNodeValue();
147 if (value != null && value.length() > 0) {
148 to.append("=\"");
149 to.append(value);
150 to.append("\"");
151 }
152 }
153 }
154
155 if (!hasChildren) {
156 to.append(" /");
157 }
158 to.append(">");
159 if (close && node.getNodeName()=="Section") {
160 to.append((char) 3);
161 }
162 }
163 }
164
165 public static Node getNextNode(Node currentNode) {
166
167 // Try to get the first valid child
168 NodeList children = currentNode.getChildNodes();
169 for (int c = 0; c < children.getLength(); c++) {
170 Node childNode = children.item(c);
171
172 if (childNode.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE ||
173 childNode.getNodeType() == org.w3c.dom.Node.COMMENT_NODE ||
174 childNode.getNodeType() == org.w3c.dom.Node.TEXT_NODE ||
175 childNode.getNodeType() == org.w3c.dom.Node.ENTITY_NODE) {
176 return childNode;
177 }
178 }
179
180 // now try a siblings and parents siblings
181 Node siblingNode = null;
182 do {
183 siblingNode = currentNode.getNextSibling();
184 if (siblingNode != null) {
185 // should we be checking type??
186 return siblingNode;
187 }
188
189 currentNode = currentNode.getParentNode();
190 } while (currentNode != null);
191
192 return null;
193 }
194
195
196 public static Node getNextNode(Node currentNode, StringBuffer buffer)
197 { Node reply;
198
199 // Try for children only the once...
200 NodeList children = currentNode.getChildNodes();
201 for (int c = 0; c < children.getLength(); c++) {
202 Node childNode = children.item(c);
203
204 if (childNode.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE ||
205 childNode.getNodeType() == org.w3c.dom.Node.COMMENT_NODE ||
206 childNode.getNodeType() == org.w3c.dom.Node.TEXT_NODE ||
207 childNode.getNodeType() == org.w3c.dom.Node.ENTITY_NODE) {
208 return childNode;
209 }
210 }
211
212 // Repeat looking for a sibling & going up until done...
213 do {
214 reply = currentNode.getNextSibling();
215 if (reply != null) {
216 return reply;
217 }
218
219 currentNode = currentNode.getParentNode();
220 if (buffer != null && currentNode != null) {
221 printNode(currentNode, buffer, true);
222 }
223 } while (currentNode != null);
224
225 return reply;
226 }
227
228 public abstract Node getStartNode();
229
230
231 public static void main(String args[])
232 {
233 try
234 { File file = new File(args[0]);
235
236 Document document;
237 if (args[0].endsWith(".html") || args[0].endsWith(".htm")) {
238 HTMLTidy tidier = new HTMLTidy(file);
239 document = tidier.getDocument();
240
241 HTMLDocumentTools docTools = new HTMLDocumentTools(document);
242 docTools.findSections();
243 System.out.println("Sections found");
244 }
245 else {
246 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
247 DocumentBuilder builder = factory.newDocumentBuilder();
248 document = builder.parse(file);
249 }
250
251 XPointer pointer = null;
252 if (args.length < 2) {
253 }
254 else if (args.length == 2) {
255 pointer = XPointer.processXPointer(document, args[1]);
256 }
257 else {
258 pointer = XPointer.processXPointer(document, args[1]+"/range-to("+args[2]+")");
259 }
260 if (pointer != null) {
261 System.out.println(pointer.toString());
262 }
263 }
264 catch (FactoryConfigurationError e) {
265 System.err.println(e);
266 }
267 catch (ParserConfigurationException ex) {
268 System.err.println(ex);
269 }
270 catch (SAXException ex) {
271 System.err.println(ex);
272 }
273 catch (java.io.IOException ex) {
274 System.err.println(ex);
275 }
276 }
277}
278
279
280
281
Note: See TracBrowser for help on using the repository browser.