Changeset 8926
- Timestamp:
- 2005-01-21T15:17:22+13:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/gs3build/doctypes/HTMLDocument.java
r7466 r8926 13 13 import org.greenstone.gsdl3.gs3build.util.*; 14 14 import org.greenstone.gsdl3.gs3build.xpointer.XPointer; 15 import org.greenstone.gsdl3.util.XMLConverter; 15 16 16 17 import org.w3c.dom.*; … … 35 36 { super(url); 36 37 37 this.loadDocument(url );38 this.loadDocument(url, false); 38 39 39 40 this._extractDocumentFiles(); 40 41 this._extractDocumentMetadata(); 41 42 // extract the files before resolving the images 43 resolveImages(); 42 44 HTMLDocumentTools docTools = new HTMLDocumentTools(this.domDocument); 43 45 docTools.setMetsDocument(this); … … 55 57 { 56 58 // HTMLDoc htmlDoc; 59 57 60 HTMLTidy tidyDoc; 58 61 if (url.getProtocol().equals("file")) … … 67 70 68 71 this.domDocument = tidyDoc.getDocument(); 69 70 72 Runtime.getRuntime().gc(); 71 73 // System.out.println(Runtime.getRuntime().freeMemory() + " " + Runtime.getRuntime().totalMemory()); … … 172 174 continue; 173 175 } 174 175 176 try 176 177 { // make the url for the image, and then add it to the document list of … … 185 186 } 186 187 188 187 189 /** 188 190 HTMLBlock codedContent = htmlDoc.getCodedContent(); … … 214 216 } 215 217 218 // I think this is used for single section documents, while getSectionText 219 // is used for sectioned documents 220 // we will use the domDocument rather than reading it in again to another HTMLDoc. 216 221 public String getDocumentText() 217 222 { 218 HTMLDoc htmlDoc; 223 XMLConverter converter = new XMLConverter(); 224 return converter.getPrettyString(this.domDocument.getDocumentElement()); 225 /* HTMLDoc htmlDoc; 219 226 URL url =(URL) this.fileSet.getFile(0).getLocation(); 220 227 … … 228 235 } 229 236 return htmlDoc.getContent(); 230 } 231 232 public Document getDOMDocument() 233 { 237 */ 238 } 239 240 private void resolveImages() { 241 242 // find the path of the url relative to the collection 243 URL full_path = this.fileSet.getFile(0).getLocation(); 244 245 String base_url; 246 if (full_path.getProtocol().equals("file")) { 247 base_url = full_path.getPath(); 248 int import_pos = base_url.indexOf("import"); 249 base_url = base_url.substring(import_pos); 250 base_url = "_httpcollection_/"+base_url; 251 } else { 252 base_url = full_path.toString(); 253 } 254 255 // need to take off the last part 256 base_url = base_url.substring(0, base_url.lastIndexOf("/")+1); 257 258 NodeList metadata = this.domDocument.getElementsByTagName("img"); 259 for (int n = 0; n < metadata.getLength(); n ++) { 260 Node node = metadata.item(n); 261 Element element = (Element) node; 262 263 String location = element.getAttribute("src"); 264 if (location != null && location.length() > 0 && isRelative(location)) { 265 // modify the source url 266 element.setAttribute("src", base_url+location); 267 } 268 } 269 } 270 271 private boolean isRelative(String location) { 272 273 if ( location.startsWith("http:") || location.startsWith("file:")) { 274 return false; 275 } 276 return true; 277 } 278 279 public Document getDOMDocument() 280 { 234 281 if (this.domDocument == null) { 235 282 URL url =(URL) this.fileSet.getFile(0).getLocation(); 236 283 this.loadDocument(url); 284 resolveImages(); 237 285 } 238 286 return this.domDocument; … … 269 317 URL url =(URL) this.fileSet.getFile(0).getLocation(); 270 318 this.loadDocument(url); 319 resolveImages(); 271 320 } 272 321
Note:
See TracChangeset
for help on using the changeset viewer.