Ignore:
Timestamp:
2024-03-24T22:31:53+13:00 (3 months ago)
Author:
anupama
Message:

I've got the CURL and DownloadURLs methods now finding the right image URLs and suffixing them to the correct baseURL. I'm still hardcoding some applet params, including new ones, into the java code to get it this far. However, I'm now hitting a security exception when it tries to download the first correct image whose URL it works out, now it's working those out properly. Some googling seemed to indicate that the applet needs to be signed or not be running in a sandbox. I'm wondering if the appletviewer, not being the server, doesn't allow the applet to not access images on a distinct server URL and that if I could get the applet running as webswing on the server itself, it would have the right permissions to access/download the images. That will be the next step. If that doesn't work, I will need to first try to rewrite this JApplet as an application and see if that change makes a difference.

Location:
main/trunk/greenstone3/src/java/org/greenstone
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone3/src/java/org/greenstone/applet/GsdlCollageApplet/CURL.java

    r38855 r38871  
    3838         //input = url.openStream();
    3939
     40    }
     41    catch (MalformedURLException e) {
     42         
     43        url_valid = false;
     44    }
     45    catch (IOException e) {
     46     
     47        url_valid = false;
     48    }
     49    }
     50
     51    /** Starts processing the given url for images and links
     52     * This variant of the constructor is needed for GS3.
     53     *  @param url_str The url to examine */
     54    public CURL(String url_str, String base_url) {
     55    href_links = new Vector();
     56    src_links = new Vector();
     57    link_links = new Vector();
     58    background_links = new Vector();
     59
     60    try {
     61         url = new URL(url_str);         
     62         input = url.openStream();
     63         // hereafter, we will build up URLs by concatenating to baseURL not url_str
     64         url = new URL(base_url);
    4065    }
    4166    catch (MalformedURLException e) {
     
    618643       
    619644        url_str =   url_str.replaceAll("&","&");
    620    
     645
     646    // Greenstone3 adds jsessionIDs at the end of URLs,
     647    // remove them, as they mess up some things, such as filetype and image identification
     648    /*
     649    int jsessionID_index = url_str.toLowerCase().indexOf(";jsessionid=");
     650    //String jsessionID = "";
     651    if(jsessionID_index >= 0) {
     652        //jsessionID = url_str.substr(jsessionID_index);
     653        url_str = url_str.substring(0, jsessionID_index);
     654
     655    }
     656    */
     657
    621658    if(state == HREF_FINAL ) {
    622659        try {
     
    663700   
    664701    static private String guessContentType(String text) {
     702
     703    int jsessionID_index = text.toLowerCase().indexOf(";jsessionid=");
     704    if(jsessionID_index >= 0) {
     705        text = text.substring(0, jsessionID_index);
     706    }
     707   
    665708    if(text.endsWith("/")) {
    666709        return "text/html";
     
    677720        if(urlStr.indexOf(".") == -1) { // if no filetype specified, assume HTML?
    678721        return "text/html";
     722        } else if (urlStr.indexOf(".jpg") != -1 || urlStr.indexOf(".jpeg") != -1) {
     723        return "image/jpeg";
    679724        }
    680725    }
  • main/trunk/greenstone3/src/java/org/greenstone/applet/GsdlCollageApplet/DownloadUrls.java

    r38855 r38871  
    2323    // for GS3   
    2424    String gs3CollImgPath = null;
     25    String baseURL = null;
    2526   
    2627    /** Refers to applet */
     
    113114    }
    114115
    115     public void setupForGS3(String gs3CollImgPath)
     116    public void setupForGS3(String gs3CollImgPath, String baseURL)
    116117    {
    117118    this.gs3CollImgPath = gs3CollImgPath;
     119    this.baseURL = baseURL;
    118120    }
    119121 
     
    199201    String url_lstring = url_string.toLowerCase();
    200202
     203
     204    // greenstone3 can add jsessionids at end, which messes up image file extension detection
     205    int jsessionID_index = url_lstring.indexOf(";jsessionid=");
     206    if(jsessionID_index >= 0) {
     207        url_lstring = url_lstring.substring(0, jsessionID_index);
     208    }
     209   
    201210    if (image_type_ == null)
    202211        return true;
     
    265274
    266275            image_mustnothave_ = original_image_mustnothave_;
    267                    
     276           
    268277            if (image_mustnothave_ != null && url_string.indexOf(image_mustnothave_) >= 0) {
    269278            image_mustnothave_ = original_image_mustnothave_;
     
    279288        }
    280289   
     290        } else { // already visited this image link
     291        System.err.println("\t####" + url_string + " already visited - filter_image returning false");
     292        // Isn't it that if we've already visited the image link once before,
     293        // we've dealt with it anyway once before (in one way or another: decided it
     294        // didn't pass the filter, or added the image for download if it did pass the
     295        // filters ) so we don't process this image again again?
     296        return false;
    281297        }
    282298           
    283299    }
    284    
     300   
    285301    return true;
    286302    }
     
    376392           
    377393           if (image_visited(url.toString(),img_name)) return;
    378 
    379        /*if(app_.gsdlversion == 3) {
    380            if(url.toString().indexOf(this.gs3CollImgPath) == -1) {
    381            return;
    382            }
    383            }*/
    384394
    385395       int size = download_images_.downloadImage(tracker,url, from_url, img_name);
     
    421431
    422432    // connect to the url
    423     CURL curl = new CURL(new_url);
     433    CURL curl = (app_.gsdlversion == 3) ? new CURL(new_url, this.baseURL) : new CURL(new_url);
     434
    424435    if (curl.connected_ok())
    425436    {
     
    436447        // get all the <code><img src=</code> links into a vector
    437448        Vector src_links = curl.getSrcLinks();
    438 
    439449               
    440450        if (verbosity_ >= 2) {
     
    490500        // get all the <code><a href=</code> links into a vector
    491501        Vector href_links = curl.getHrefLinks();
    492        
    493502     
    494503        if (verbosity_ >= 2) {
     
    513522                                       
    514523                    img_name = url_string.substring(url_string.lastIndexOf("/") + 1, url_string.length());
    515 
     524                    if (verbosity_ >= 2) {
     525                    System.err.println("    Filtered: href_link[" + i + "] = " + url_string);
     526                    }
    516527                    if ((external_links_ != null) && (!external_links_.isEmpty())) {
    517528                    String ext = (String) external_links_.get(img_name);
  • main/trunk/greenstone3/src/java/org/greenstone/applet/GsdlCollageApplet/GsdlCollageApplet.java

    r38855 r38871  
    2626    String collection = null;
    2727    String gs3CollImgPath = null;
    28    
     28    String baseURL = null;
     29   
    2930    /** Amount of error checking output produced <br>
    3031     *  Ranges from 0 - no output to 3 - maximum output */
     
    213214        }
    214215        }
    215         else{
    216         document_root = gwcgi; 
    217         }
     216        //else{
     217        //document_root = "greenstone3";
     218        //}
    218219    }
    219220
     
    267268        if(library != null && site != null && collection != null) {
    268269            this.gs3CollImgPath = library + "/sites/" + site + "/collect/" + collection;
     270            if(href_musthave == null) {
     271                href_musthave = this.gs3CollImgPath;
     272            } else {
     273            href_musthave += "%" + this.gs3CollImgPath;
     274            }         
    269275            if(verbosity_ >= 3) {
    270             System.err.println("Will keep an eye out for gs3CollImgPath: " + gs3CollImgPath);
     276            System.err.println("href_musthave includes gs3CollImgPath: " + gs3CollImgPath);
    271277            }
    272278        }
     279        // starting URL (image_url) may not be base_url
     280        this.baseURL = getParameter("baseurl");
     281        // TODO: is it not the other way: gwcgi should be param set to base_url
     282        // and image_url/starting_url should be obtained from parameters also?
    273283        }
    274284    }
     
    284294                        image_ignore, imageType_,document_root,verbosity_,trk);
    285295
    286     download_thread_.setupForGS3(this.gs3CollImgPath);
     296    download_thread_.setupForGS3(this.gs3CollImgPath, this.baseURL);
    287297   
    288298    // starts the display image thread with the currently downloaded images
     
    410420     *
    411421     * If the URL is a CGI script URL, it should be tidied up so that it is
    412      * appropriate to tage attrib=value pairs on the end.  This means it
     422     * appropriate to tag attrib=value pairs on the end.  This means it
    413423     * must either end with a "?" or (if it contains a question-mark
    414424     * internally) end with a "&". */
  • main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GsdlCollageBrowse.java

    r38855 r38871  
    9595    //app_info += "<param name='gwcgi' value='http://localhost:8383/greenstone3/library' /><param name='classifier' value='CL3.1' /><param name='hrefMustHave' value='CL3' /><param name='imageMustNotHave' value='hl=%x=%gt=%gc=%.pr' /><param name='imageType' value='.jpg%%.png' /><param name='maxDepth' value='500' /><param name='maxDisplay' value='25' /><param name='refreshDelay' value='1500' /><param name='isJava2' value='auto' /><param name='bgcolor' value='#96c29a' />The Collage Applet.</"+GSXML.APPLET_ELEM+">"; // TODO
    9696    app_info += "<param name='gsdlversion' value='3' />";
    97     app_info += "<param name='documentroot' value='library/collection/" + this.cluster_name + "/'/>";
    98     app_info += "<param name='sitename' value='localsite' />";
    99     app_info += "<param name='verbosity' value='3' />";
    100     app_info += "<param name='gwcgi' value='http://localhost:8383/greenstone3/library/collection/smallbea/' /><param name='classifier' value='CL2.3' /><param name='hrefMustHave' value='smallbea' /><param name='imageMustNotHave' value='hl=%x=%gt=%gc=%.pr' /><param name='imageType' value='.jpg%%.png' /><param name='maxDepth' value='500' /><param name='maxDisplay' value='25' /><param name='refreshDelay' value='1500' /><param name='isJava2' value='auto' /><param name='bgcolor' value='#96c29a' />The Collage Applet.</"+GSXML.APPLET_ELEM+">"; // TODO
     97    app_info += "<param name='baseurl' value='http://localhost:8383/greenstone3/' />";
     98    //app_info += "<param name='documentroot' value='library/collection/" + this.cluster_name + "/'/>";
     99    app_info += "<param name='documentroot' value='greenstone3'/>\n";
     100    app_info += "<param name='sitename' value='localsite' />\n";
     101    app_info += "<param name='verbosity' value='3' />\n";
     102    app_info += "<param name='imageType' value='.jpg%%.png' />\n";
     103    //app_info += "<param name='hrefMustHave' value='smallbea' />\n";
     104    //app_info += "<param name='imageMustNotHave' value='hl=%x=%gt=%gc=%.pr' />\n";
     105    app_info += "<param name='imageMustNotHave' value='interfaces/' />\n";
     106    app_info += "<param name='gwcgi' value='http://localhost:8383/greenstone3/library/collection/smallbea/' /><param name='classifier' value='CL2.3' /><param name='maxDepth' value='500' /><param name='maxDisplay' value='25' /><param name='refreshDelay' value='1500' /><param name='isJava2' value='auto' /><param name='bgcolor' value='#96c29a' />The Collage Applet.</"+GSXML.APPLET_ELEM+">"; // TODO
    101107   
    102108    Document dom = this.converter.getDOM(app_info);
Note: See TracChangeset for help on using the changeset viewer.