Changeset 38871 for main/trunk/greenstone3
- Timestamp:
- 2024-03-24T22:31:53+13:00 (3 months ago)
- Location:
- main/trunk/greenstone3/src/java/org/greenstone
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone3/src/java/org/greenstone/applet/GsdlCollageApplet/CURL.java
r38855 r38871 38 38 //input = url.openStream(); 39 39 40 } 41 catch (MalformedURLException e) { 42 43 url_valid = false; 44 } 45 catch (IOException e) { 46 47 url_valid = false; 48 } 49 } 50 51 /** Starts processing the given url for images and links 52 * This variant of the constructor is needed for GS3. 53 * @param url_str The url to examine */ 54 public CURL(String url_str, String base_url) { 55 href_links = new Vector(); 56 src_links = new Vector(); 57 link_links = new Vector(); 58 background_links = new Vector(); 59 60 try { 61 url = new URL(url_str); 62 input = url.openStream(); 63 // hereafter, we will build up URLs by concatenating to baseURL not url_str 64 url = new URL(base_url); 40 65 } 41 66 catch (MalformedURLException e) { … … 618 643 619 644 url_str = url_str.replaceAll("&","&"); 620 645 646 // Greenstone3 adds jsessionIDs at the end of URLs, 647 // remove them, as they mess up some things, such as filetype and image identification 648 /* 649 int jsessionID_index = url_str.toLowerCase().indexOf(";jsessionid="); 650 //String jsessionID = ""; 651 if(jsessionID_index >= 0) { 652 //jsessionID = url_str.substr(jsessionID_index); 653 url_str = url_str.substring(0, jsessionID_index); 654 655 } 656 */ 657 621 658 if(state == HREF_FINAL ) { 622 659 try { … … 663 700 664 701 static private String guessContentType(String text) { 702 703 int jsessionID_index = text.toLowerCase().indexOf(";jsessionid="); 704 if(jsessionID_index >= 0) { 705 text = text.substring(0, jsessionID_index); 706 } 707 665 708 if(text.endsWith("/")) { 666 709 return "text/html"; … … 677 720 if(urlStr.indexOf(".") == -1) { // if no filetype specified, assume HTML? 678 721 return "text/html"; 722 } else if (urlStr.indexOf(".jpg") != -1 || urlStr.indexOf(".jpeg") != -1) { 723 return "image/jpeg"; 679 724 } 680 725 } -
main/trunk/greenstone3/src/java/org/greenstone/applet/GsdlCollageApplet/DownloadUrls.java
r38855 r38871 23 23 // for GS3 24 24 String gs3CollImgPath = null; 25 String baseURL = null; 25 26 26 27 /** Refers to applet */ … … 113 114 } 114 115 115 public void setupForGS3(String gs3CollImgPath )116 public void setupForGS3(String gs3CollImgPath, String baseURL) 116 117 { 117 118 this.gs3CollImgPath = gs3CollImgPath; 119 this.baseURL = baseURL; 118 120 } 119 121 … … 199 201 String url_lstring = url_string.toLowerCase(); 200 202 203 204 // greenstone3 can add jsessionids at end, which messes up image file extension detection 205 int jsessionID_index = url_lstring.indexOf(";jsessionid="); 206 if(jsessionID_index >= 0) { 207 url_lstring = url_lstring.substring(0, jsessionID_index); 208 } 209 201 210 if (image_type_ == null) 202 211 return true; … … 265 274 266 275 image_mustnothave_ = original_image_mustnothave_; 267 276 268 277 if (image_mustnothave_ != null && url_string.indexOf(image_mustnothave_) >= 0) { 269 278 image_mustnothave_ = original_image_mustnothave_; … … 279 288 } 280 289 290 } else { // already visited this image link 291 System.err.println("\t####" + url_string + " already visited - filter_image returning false"); 292 // Isn't it that if we've already visited the image link once before, 293 // we've dealt with it anyway once before (in one way or another: decided it 294 // didn't pass the filter, or added the image for download if it did pass the 295 // filters ) so we don't process this image again again? 296 return false; 281 297 } 282 298 283 299 } 284 300 285 301 return true; 286 302 } … … 376 392 377 393 if (image_visited(url.toString(),img_name)) return; 378 379 /*if(app_.gsdlversion == 3) {380 if(url.toString().indexOf(this.gs3CollImgPath) == -1) {381 return;382 }383 }*/384 394 385 395 int size = download_images_.downloadImage(tracker,url, from_url, img_name); … … 421 431 422 432 // connect to the url 423 CURL curl = new CURL(new_url); 433 CURL curl = (app_.gsdlversion == 3) ? new CURL(new_url, this.baseURL) : new CURL(new_url); 434 424 435 if (curl.connected_ok()) 425 436 { … … 436 447 // get all the <code><img src=</code> links into a vector 437 448 Vector src_links = curl.getSrcLinks(); 438 439 449 440 450 if (verbosity_ >= 2) { … … 490 500 // get all the <code><a href=</code> links into a vector 491 501 Vector href_links = curl.getHrefLinks(); 492 493 502 494 503 if (verbosity_ >= 2) { … … 513 522 514 523 img_name = url_string.substring(url_string.lastIndexOf("/") + 1, url_string.length()); 515 524 if (verbosity_ >= 2) { 525 System.err.println(" Filtered: href_link[" + i + "] = " + url_string); 526 } 516 527 if ((external_links_ != null) && (!external_links_.isEmpty())) { 517 528 String ext = (String) external_links_.get(img_name); -
main/trunk/greenstone3/src/java/org/greenstone/applet/GsdlCollageApplet/GsdlCollageApplet.java
r38855 r38871 26 26 String collection = null; 27 27 String gs3CollImgPath = null; 28 28 String baseURL = null; 29 29 30 /** Amount of error checking output produced <br> 30 31 * Ranges from 0 - no output to 3 - maximum output */ … … 213 214 } 214 215 } 215 else{216 document_root = gwcgi;217 }216 //else{ 217 //document_root = "greenstone3"; 218 //} 218 219 } 219 220 … … 267 268 if(library != null && site != null && collection != null) { 268 269 this.gs3CollImgPath = library + "/sites/" + site + "/collect/" + collection; 270 if(href_musthave == null) { 271 href_musthave = this.gs3CollImgPath; 272 } else { 273 href_musthave += "%" + this.gs3CollImgPath; 274 } 269 275 if(verbosity_ >= 3) { 270 System.err.println(" Will keep an eye out forgs3CollImgPath: " + gs3CollImgPath);276 System.err.println("href_musthave includes gs3CollImgPath: " + gs3CollImgPath); 271 277 } 272 278 } 279 // starting URL (image_url) may not be base_url 280 this.baseURL = getParameter("baseurl"); 281 // TODO: is it not the other way: gwcgi should be param set to base_url 282 // and image_url/starting_url should be obtained from parameters also? 273 283 } 274 284 } … … 284 294 image_ignore, imageType_,document_root,verbosity_,trk); 285 295 286 download_thread_.setupForGS3(this.gs3CollImgPath );296 download_thread_.setupForGS3(this.gs3CollImgPath, this.baseURL); 287 297 288 298 // starts the display image thread with the currently downloaded images … … 410 420 * 411 421 * If the URL is a CGI script URL, it should be tidied up so that it is 412 * appropriate to tag eattrib=value pairs on the end. This means it422 * appropriate to tag attrib=value pairs on the end. This means it 413 423 * must either end with a "?" or (if it contains a question-mark 414 424 * internally) end with a "&". */ -
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GsdlCollageBrowse.java
r38855 r38871 95 95 //app_info += "<param name='gwcgi' value='http://localhost:8383/greenstone3/library' /><param name='classifier' value='CL3.1' /><param name='hrefMustHave' value='CL3' /><param name='imageMustNotHave' value='hl=%x=%gt=%gc=%.pr' /><param name='imageType' value='.jpg%%.png' /><param name='maxDepth' value='500' /><param name='maxDisplay' value='25' /><param name='refreshDelay' value='1500' /><param name='isJava2' value='auto' /><param name='bgcolor' value='#96c29a' />The Collage Applet.</"+GSXML.APPLET_ELEM+">"; // TODO 96 96 app_info += "<param name='gsdlversion' value='3' />"; 97 app_info += "<param name='documentroot' value='library/collection/" + this.cluster_name + "/'/>"; 98 app_info += "<param name='sitename' value='localsite' />"; 99 app_info += "<param name='verbosity' value='3' />"; 100 app_info += "<param name='gwcgi' value='http://localhost:8383/greenstone3/library/collection/smallbea/' /><param name='classifier' value='CL2.3' /><param name='hrefMustHave' value='smallbea' /><param name='imageMustNotHave' value='hl=%x=%gt=%gc=%.pr' /><param name='imageType' value='.jpg%%.png' /><param name='maxDepth' value='500' /><param name='maxDisplay' value='25' /><param name='refreshDelay' value='1500' /><param name='isJava2' value='auto' /><param name='bgcolor' value='#96c29a' />The Collage Applet.</"+GSXML.APPLET_ELEM+">"; // TODO 97 app_info += "<param name='baseurl' value='http://localhost:8383/greenstone3/' />"; 98 //app_info += "<param name='documentroot' value='library/collection/" + this.cluster_name + "/'/>"; 99 app_info += "<param name='documentroot' value='greenstone3'/>\n"; 100 app_info += "<param name='sitename' value='localsite' />\n"; 101 app_info += "<param name='verbosity' value='3' />\n"; 102 app_info += "<param name='imageType' value='.jpg%%.png' />\n"; 103 //app_info += "<param name='hrefMustHave' value='smallbea' />\n"; 104 //app_info += "<param name='imageMustNotHave' value='hl=%x=%gt=%gc=%.pr' />\n"; 105 app_info += "<param name='imageMustNotHave' value='interfaces/' />\n"; 106 app_info += "<param name='gwcgi' value='http://localhost:8383/greenstone3/library/collection/smallbea/' /><param name='classifier' value='CL2.3' /><param name='maxDepth' value='500' /><param name='maxDisplay' value='25' /><param name='refreshDelay' value='1500' /><param name='isJava2' value='auto' /><param name='bgcolor' value='#96c29a' />The Collage Applet.</"+GSXML.APPLET_ELEM+">"; // TODO 101 107 102 108 Document dom = this.converter.getDOM(app_info);
Note:
See TracChangeset
for help on using the changeset viewer.