source: trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/xpointer/XPointer.java@ 7466

Last change on this file since 7466 was 6347, checked in by cs025, 20 years ago

Small changes in main function

  • Property svn:keywords set to Author Date Id Revision
File size: 5.8 KB
Line 
1package org.greenstone.gsdl3.gs3build.xpointer;
2
3import java.io.File;
4
5import java.net.URL;
6
7import javax.xml.parsers.*;
8
9import org.w3c.dom.*;
10import org.apache.xpath.*;
11
12import org.xml.sax.SAXException;
13import org.xml.sax.SAXParseException;
14import org.xml.sax.helpers.DefaultHandler;
15
16import org.greenstone.gsdl3.gs3build.doctypes.HTMLDocumentTools;
17import org.greenstone.gsdl3.gs3build.util.HTMLTidy;
18
19public abstract class XPointer
20{
21 Document document;
22 String xpointer;
23
24 public XPointer(org.w3c.dom.Document document, String xpointer)
25 { this.document = document;
26 this.xpointer = xpointer;
27 // now process the type of description
28 }
29
30 public static String expandXPath(String xpath)
31 {
32 int dashAt;
33 String pathElement;
34 StringBuffer expandedPtr = new StringBuffer();
35
36 if (xpath.startsWith("//")) {
37 xpath = xpath.substring(2);
38 expandedPtr.append("//");
39 }
40 else if (xpath.startsWith("/")) {
41 xpath = xpath.substring(1);
42 expandedPtr.append("/");
43 }
44
45 while (xpath.length() > 0) {
46 dashAt = xpath.indexOf('/');
47 if (dashAt >= 0) {
48 pathElement = xpath.substring(0, dashAt);
49 xpath = xpath.substring(dashAt+1);
50 }
51 else {
52 pathElement = xpath;
53 xpath = "";
54 }
55
56 int c = 0;
57 while (c < pathElement.length()) {
58 if (!Character.isDigit(pathElement.charAt(c)))
59 break;
60 c ++;
61 }
62
63 if (c == pathElement.length()) {
64 expandedPtr.append("*[");
65 expandedPtr.append(pathElement);
66 expandedPtr.append("]");
67 }
68 else {
69 expandedPtr.append(pathElement);
70 }
71
72 if (dashAt >= 0) {
73 expandedPtr.append('/');
74 }
75 }
76 xpath = expandedPtr.toString();
77
78 return xpath;
79 }
80
81 public static XPointer processXPointer(Document document, String xpointer)
82 { // do a "cheap and dirty" detection of range-to
83 if (xpointer.indexOf("range-to") >= 0) {
84 return new RangeToXPointer(document, xpointer);
85 }
86 return new IdentifierXPointer(document, xpointer);
87 }
88
89 public static XPointer processXPointer(Document document, URL url)
90 { String reference = url.getRef();
91
92 reference.trim();
93 if (reference.startsWith("xpointer(")) {
94 reference = reference.substring(9, reference.length() - 1);
95 }
96 else {
97 return null;
98 }
99 // System.out.println(reference);
100
101 return processXPointer(document, reference);
102 }
103
104 public static void printNode(Node node, StringBuffer to, boolean close)
105 {
106 if (node.getNodeType() == org.w3c.dom.Node.TEXT_NODE) {
107 if (!close) {
108 to.append(node.getNodeValue());
109 }
110 }
111 else if (node.getNodeType() == org.w3c.dom.Node.ENTITY_NODE) {
112 if (!close) {
113 to.append("&");
114 to.append(node.getNodeValue());
115 to.append(";");
116 }
117 }
118 else if (node.getNodeType() == org.w3c.dom.Node.COMMENT_NODE) {
119 if (!close) {
120 to.append("<!-- ");
121 to.append(node.getNodeValue());
122 to.append(" -->");
123 }
124 }
125 else if (node.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
126 boolean hasChildren = (node.getChildNodes().getLength() > 0);
127
128 if (close && !hasChildren) {
129 return;
130 }
131 to.append("<");
132 if (close) {
133 to.append("/");
134 }
135 to.append(node.getNodeName());
136
137 if (!close) {
138 NamedNodeMap attributes = ((Element) node).getAttributes();
139 for (int a = 0; a < attributes.getLength(); a ++) {
140 Node attributeNode = attributes.item(a);
141 to.append(" ");
142 to.append(attributeNode.getNodeName());
143
144 String value = attributeNode.getNodeValue();
145 if (value != null && value.length() > 0) {
146 to.append("=\"");
147 to.append(value);
148 to.append("\"");
149 }
150 }
151 }
152
153 if (!hasChildren) {
154 to.append(" /");
155 }
156 to.append(">");
157 }
158 }
159
160 public static Node getNextNode(Node currentNode, StringBuffer buffer)
161 { Node reply;
162
163 // Try for children only the once...
164 NodeList children = currentNode.getChildNodes();
165 for (int c = 0; c < children.getLength(); c++) {
166 Node childNode = children.item(c);
167
168 if (childNode.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE ||
169 childNode.getNodeType() == org.w3c.dom.Node.COMMENT_NODE ||
170 childNode.getNodeType() == org.w3c.dom.Node.TEXT_NODE ||
171 childNode.getNodeType() == org.w3c.dom.Node.ENTITY_NODE) {
172 return childNode;
173 }
174 }
175
176 // Repeat looking for a sibling & going up until done...
177 do {
178 reply = currentNode.getNextSibling();
179 if (reply != null) {
180 return reply;
181 }
182
183 currentNode = currentNode.getParentNode();
184 if (buffer != null && currentNode != null) {
185 printNode(currentNode, buffer, true);
186 }
187 } while (currentNode != null);
188
189 return reply;
190 }
191
192 public abstract Node getStartNode();
193
194
195 public static void main(String args[])
196 {
197 try
198 { File file = new File(args[0]);
199
200 Document document;
201 if (args[0].endsWith(".html") || args[0].endsWith(".htm")) {
202 HTMLTidy tidier = new HTMLTidy(file);
203 document = tidier.getDocument();
204
205 HTMLDocumentTools docTools = new HTMLDocumentTools(document);
206 docTools.findSections();
207 System.out.println("Sections found");
208 }
209 else {
210 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
211 DocumentBuilder builder = factory.newDocumentBuilder();
212 document = builder.parse(file);
213 }
214
215 XPointer pointer = null;
216 if (args.length < 2) {
217 }
218 else if (args.length == 2) {
219 pointer = XPointer.processXPointer(document, args[1]);
220 }
221 else {
222 pointer = XPointer.processXPointer(document, args[1]+"/range-to("+args[2]+")");
223 }
224 if (pointer != null) {
225 System.out.println(pointer.toString());
226 }
227 }
228 catch (FactoryConfigurationError e) {
229 System.err.println(e);
230 }
231 catch (ParserConfigurationException ex) {
232 System.err.println(ex);
233 }
234 catch (SAXException ex) {
235 System.err.println(ex);
236 }
237 catch (java.io.IOException ex) {
238 System.err.println(ex);
239 }
240 }
241}
242
243
244
245
Note: See TracBrowser for help on using the repository browser.