source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/xpointer/XPointer.java@ 6290

Last change on this file since 6290 was 6290, checked in by cs025, 20 years ago

XPointer first draft implementation

  • Property svn:keywords set to Author Date Id Revision
File size: 5.9 KB
Line 
1package org.greenstone.gsdl3.gs3build.xpointer;
2
3import java.io.File;
4
5import java.net.URL;
6
7import javax.xml.parsers.*;
8
9import org.w3c.dom.*;
10import org.apache.xpath.*;
11import org.w3c.tidy.Tidy;
12
13import org.xml.sax.SAXException;
14import org.xml.sax.SAXParseException;
15import org.xml.sax.helpers.DefaultHandler;
16
17import org.greenstone.gsdl3.gs3build.doctypes.HTMLDocumentTools;
18import org.greenstone.gsdl3.gs3build.util.HTMLTidy;
19
20public abstract class XPointer
21{
22 Document document;
23 String xpointer;
24
25 public XPointer(org.w3c.dom.Document document, String xpointer)
26 { this.document = document;
27 this.xpointer = xpointer;
28 // now process the type of description
29 }
30
31 public static String expandXPath(String xpath)
32 {
33 int dashAt;
34 String pathElement;
35 StringBuffer expandedPtr = new StringBuffer();
36
37 if (xpath.startsWith("//")) {
38 xpath = xpath.substring(2);
39 expandedPtr.append("//");
40 }
41 else if (xpath.startsWith("/")) {
42 xpath = xpath.substring(1);
43 expandedPtr.append("/");
44 }
45
46 while (xpath.length() > 0) {
47 dashAt = xpath.indexOf('/');
48 if (dashAt >= 0) {
49 pathElement = xpath.substring(0, dashAt);
50 xpath = xpath.substring(dashAt+1);
51 }
52 else {
53 pathElement = xpath;
54 xpath = "";
55 }
56
57 int c = 0;
58 while (c < pathElement.length()) {
59 if (!Character.isDigit(pathElement.charAt(c)))
60 break;
61 c ++;
62 }
63
64 if (c == pathElement.length()) {
65 expandedPtr.append("*[");
66 expandedPtr.append(pathElement);
67 expandedPtr.append("]");
68 }
69 else {
70 expandedPtr.append(pathElement);
71 }
72
73 if (dashAt >= 0) {
74 expandedPtr.append('/');
75 }
76 }
77 xpath = expandedPtr.toString();
78
79 return xpath;
80 }
81
82 public static XPointer processXPointer(Document document, String xpointer)
83 { // do a "cheap and dirty" detection of range-to
84 if (xpointer.indexOf("range-to") >= 0) {
85 return new RangeToXPointer(document, xpointer);
86 }
87 return new IdentifierXPointer(document, xpointer);
88 }
89
90 public static XPointer processXPointer(Document document, URL url)
91 { String reference = url.getRef();
92
93 reference.trim();
94 if (reference.startsWith("xpointer(")) {
95 reference = reference.substring(9, reference.length() - 1);
96 }
97 else {
98 return null;
99 }
100 // System.out.println(reference);
101
102 return processXPointer(document, reference);
103 }
104
105 public static void printNode(Node node, StringBuffer to, boolean close)
106 {
107 if (node.getNodeType() == org.w3c.dom.Node.TEXT_NODE) {
108 if (!close) {
109 to.append(node.getNodeValue());
110 }
111 }
112 else if (node.getNodeType() == org.w3c.dom.Node.ENTITY_NODE) {
113 if (!close) {
114 to.append("&");
115 to.append(node.getNodeValue());
116 to.append(";");
117 }
118 }
119 else if (node.getNodeType() == org.w3c.dom.Node.COMMENT_NODE) {
120 if (!close) {
121 to.append("<!-- ");
122 to.append(node.getNodeValue());
123 to.append(" -->");
124 }
125 }
126 else if (node.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
127 boolean hasChildren = (node.getChildNodes().getLength() > 0);
128
129 if (close && !hasChildren) {
130 return;
131 }
132 to.append("<");
133 if (close) {
134 to.append("/");
135 }
136 to.append(node.getNodeName());
137
138 if (!close) {
139 NamedNodeMap attributes = ((Element) node).getAttributes();
140 for (int a = 0; a < attributes.getLength(); a ++) {
141 Node attributeNode = attributes.item(a);
142 to.append(" ");
143 to.append(attributeNode.getNodeName());
144
145 String value = attributeNode.getNodeValue();
146 if (value != null && value.length() > 0) {
147 to.append("=\"");
148 to.append(value);
149 to.append("\"");
150 }
151 }
152 }
153
154 if (!hasChildren) {
155 to.append(" /");
156 }
157 to.append(">");
158 }
159 }
160
161 public static Node getNextNode(Node currentNode, StringBuffer buffer)
162 { Node reply;
163
164 // Try for children only the once...
165 NodeList children = currentNode.getChildNodes();
166 for (int c = 0; c < children.getLength(); c++) {
167 Node childNode = children.item(c);
168
169 if (childNode.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE ||
170 childNode.getNodeType() == org.w3c.dom.Node.COMMENT_NODE ||
171 childNode.getNodeType() == org.w3c.dom.Node.TEXT_NODE ||
172 childNode.getNodeType() == org.w3c.dom.Node.ENTITY_NODE) {
173 return childNode;
174 }
175 }
176
177 // Repeat looking for a sibling & going up until done...
178 do {
179 reply = currentNode.getNextSibling();
180 if (reply != null) {
181 return reply;
182 }
183
184 currentNode = currentNode.getParentNode();
185 if (buffer != null && currentNode != null) {
186 printNode(currentNode, buffer, true);
187 }
188 } while (currentNode != null);
189
190 return reply;
191 }
192
193 public abstract Node getStartNode();
194
195
196 public static void main(String args[])
197 {
198 try
199 { File file = new File(args[0]);
200
201 Document document;
202 if (args[0].endsWith(".html") || args[0].endsWith(".htm")) {
203 Tidy tidy = new Tidy();
204 tidy.setMakeClean(true);
205 tidy.setXmlTags(true);
206 document = tidy.parseDOM(new java.io.FileInputStream(file), null);
207
208 HTMLTidy tidier = new HTMLTidy(file);
209 document = tidier.getDocument();
210
211 HTMLDocumentTools docTools = new HTMLDocumentTools(document);
212 docTools.findSections();
213 System.out.println("Sections found");
214 }
215 else {
216 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
217 DocumentBuilder builder = factory.newDocumentBuilder();
218 document = builder.parse(file);
219 }
220
221 XPointer pointer;
222 if (args.length == 2) {
223 pointer = XPointer.processXPointer(document, args[1]);
224 }
225 else {
226 pointer = XPointer.processXPointer(document, args[1]+"/range-to("+args[2]+")");
227 }
228 System.out.println(pointer.toString());
229 }
230 catch (FactoryConfigurationError e) {
231 System.err.println(e);
232 }
233 catch (ParserConfigurationException ex) {
234 System.err.println(ex);
235 }
236 catch (SAXException ex) {
237 System.err.println(ex);
238 }
239 catch (java.io.IOException ex) {
240 System.err.println(ex);
241 }
242 }
243}
244
245
246
247
Note: See TracBrowser for help on using the repository browser.