Changeset 11419
- Timestamp:
- 2006-03-21T09:32:05+12:00 (18 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/java/org/nzdl/gsdl/GsdlCollageApplet/DownloadUrls.java
r7334 r11419 30 30 String starting_url_ = null; 31 31 32 /** the root directory of Greenstone*/ 33 String document_root_ = null; 34 35 32 36 /** CHRIS - Holds the contents of the collection's assoc directory */ 33 37 // File[] assocDir_ = null; … … 59 63 boolean thread_running_ = true; 60 64 61 int verbosity_ ;65 int verbosity_ = 0; 62 66 63 67 /** Constructor to initialise a download thread from which images are found, … … 74 78 DownloadImages download_images, String starting_url, 75 79 String href_musthave, String image_mustnothave, 76 String image_ignore, String image_type, int verbosity)80 String image_ignore, String image_type, String document_root,int verbosity) 77 81 { 78 82 super("DownloadUrls"); … … 85 89 image_ignore_ = image_ignore; 86 90 image_type_ = image_type; 87 91 document_root_ = document_root; 88 92 verbosity_ = verbosity; 93 94 95 System.err.println("starting_url_ " + starting_url +"\n"+ 96 "href_musthave_ " + href_musthave +"\n"+ 97 "image_mustnothave_" + image_mustnothave+"\n"+ 98 "image_ignore_ "+ image_ignore+"\n"+ 99 "image_type_ "+ image_type+"\n"+ 100 "document root "+ document_root_ 101 ); 102 103 104 105 89 106 } 90 107 … … 137 154 138 155 tmp = image_type_.substring(0, image_type_.indexOf("%")); 156 139 157 if (image_type_.length() > image_type_.indexOf("%") + 1) 140 image_type_ = image_type_.substring(image_type_.indexOf("%") + 1, image_type_.length()); 158 image_type_ = image_type_.substring(image_type_.indexOf("%") + 1, image_type_.length()); 141 159 else 142 160 image_type_ = null; 143 144 if ( url_lstring.endsWith(tmp)) {161 162 if (!tmp.trim().equals("") && url_lstring.endsWith(tmp)) { 145 163 image_type_ = original_image_type_; 146 164 return true; … … 164 182 public boolean filter_image(String url_string) 165 183 { 184 166 185 if (image_ignore_==null || !url_string.startsWith(image_ignore_)) 167 186 { … … 181 200 else 182 201 image_mustnothave_ = null; 183 184 if (url_string.indexOf(tmp) >= 0) { 202 203 204 205 if (!tmp.trim().equals("") && url_string.indexOf(tmp) >= 0) { 206 185 207 image_mustnothave_ = original_image_mustnothave_; 186 208 return false; 187 209 } 188 } 189 210 } 211 212 image_mustnothave_ = original_image_mustnothave_; 213 190 214 if (image_mustnothave_ != null && url_string.indexOf(image_mustnothave_) >= 0) { 191 215 image_mustnothave_ = original_image_mustnothave_; … … 198 222 199 223 image_mustnothave_ = original_image_mustnothave_; 200 return true;224 201 225 } 202 } 203 } 204 205 return false; 226 227 } 228 229 } 230 231 return true; 206 232 } 207 233 … … 351 377 public void rec_add_images(String new_url, int depth) 352 378 { 353 System.err.println("Parsing url = " + new_url); 379 380 381 System.err.println("Parsing url = " + new_url); 354 382 355 383 if (already_visited(new_url)) return; … … 384 412 URL url = (URL)src_links.elementAt(i); 385 413 String url_string = url.toString(); 414 415 //System.err.println(" source links " + i + " [" + url_string +"]"); 386 416 387 417 if (verbosity_ >= 3) { … … 394 424 { 395 425 img_name = url_string.substring(url_string.lastIndexOf("/") + 1, url_string.length()); 396 426 397 427 if (verbosity_ >= 2) { 398 428 System.err.println(" Filtered: src_link[" + i + "] = " + url_string); … … 401 431 if ((external_links_ != null) && (!external_links_.isEmpty())) { 402 432 String ext = (String) external_links_.get(img_name); 403 404 if (ext != null) 433 434 435 if (ext != null){ 405 436 add_image(url, ext, img_name); 406 else 437 438 } 439 else{ 440 407 441 add_image(url, new_url, img_name); 442 } 408 443 } 409 444 else { 445 410 446 add_image(url, new_url, img_name); 411 447 } 448 449 412 450 } 451 413 452 } 414 453 … … 418 457 Vector href_links = curl.getHrefLinks(); 419 458 459 460 if (verbosity_ >= 2) { 461 System.err.println(" Got href links... there are " + href_links.size() + " of them."); 462 } 463 464 420 465 // process each of the href links according to the parameters given. 421 466 for (int i = 0; i < href_links.size(); i++) … … 423 468 URL url = (URL)href_links.elementAt(i); 424 469 String url_string = url.toString(); 425 470 471 //System.err.println(" href links " + i + "[" + url_string +"]"); 472 473 426 474 if (image_file_extension(url_string)) 427 475 { 476 428 477 if (filter_image(url_string)) 478 429 479 { 430 480 img_name = url_string.substring(url_string.lastIndexOf("/") + 1, url_string.length()); … … 447 497 if (filter_href(url_string,new_url,depth)) 448 498 { 499 500 System.out.println("*************************************"); 449 501 rec_add_images(url_string,depth+1); 450 502 … … 471 523 try { 472 524 473 if (starting_url_.indexOf( "gsdl") >= 0) {525 if (starting_url_.indexOf(document_root_) >= 0 ){ 474 526 external_links_ = null; 475 527 return; … … 519 571 System.err.println("Starting download thread."); 520 572 visited_url_ = new Hashtable(); 521 rec_add_images(starting_url_,1); 573 574 rec_add_images(starting_url_,1); 522 575 523 576 System.err.println("Download thread finished.");
Note:
See TracChangeset
for help on using the changeset viewer.