Changeset 17235

Show
Ignore:
Timestamp:
10.09.2008 19:25:45 (11 years ago)
Author:
ak19
Message:

To bypass wget's unintuitive way of dealing with URLs that refer to dir-listings where the final slash is missing (and still continue to work properly with URLs like nzdl.org/niupepa which redirect to entirely different URLs), added method getRedirectURL()--along with helper functions--which is consulted by DownloadButtonListener?.actionPerformed() to work out the real URL of the resource, prior to launching Wget with the URL to be retrieved.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gli/trunk/src/org/greenstone/gatherer/gui/DownloadPane.java

    r14254 r17235  
    223223        String launch = launch_str.toString();       
    224224        System.err.println("*** launch = " + launch); 
    225          
     225 
    226226        URL launch_url = new URL(launch); 
    227227        URLConnection launch_connection = launch_url.openConnection(); 
     
    495495    implements ActionListener { 
    496496    public void actionPerformed(ActionEvent event) { 
    497          
     497 
    498498        if(checkURL(true) && checkProxy() == true) { 
    499499         
     500        // Proxy settings are now set. Check that the url is not a redirect, else get 
     501        // redirect url (we do this step in order to avoid some unintuitive behaviour from wget) 
     502        Download current_download = (Download)download_map.get(mode); 
     503        Argument arg_url = current_download.getArgument("url"); 
     504        String url_str = arg_url.getValue(); 
     505        String redirect_url_str = getRedirectURL(url_str); 
     506         
     507        // only update the Argument and its GUI ArgumentControl if the URL 
     508        // has in fact changed 
     509        if(!url_str.equals(redirect_url_str)) { 
     510            arg_url.setValue(redirect_url_str); 
     511            updateArgument(arg_url, redirect_url_str); 
     512        } 
     513     
    500514        getter.newDownloadJob((Download)download_map.get(mode) ,mode,proxy_url); 
    501515        } 
    502516    } 
    503517    } 
     518 
     519    /** 
     520     * The Java code here will retrieve the page at the given url. If the response code is  
     521     * a redirect, it will get the redirect url so that wget may be called with the proper url.  
     522     * This preprocessing of the URL is necessary because: 
     523     * Wget does not behave the way the browser does when faced with urls of the form 
     524     * http://www.englishhistory.net/tudor/citizens and if that page does not exist. 
     525     * The directory listing with a slash at the end (http://www.englishhistory.net/tudor/citizens/) 
     526     * does exist, however. In order to prevent wget from assuming that the root URL 
     527     * to traverse is http://www.englishhistory.net/tudor/ instead of the intended 
     528     * http://www.englishhistory.net/tudor/citizens/, we need give wget the redirect location 
     529     * that's returned when we initially make a request for http://www.englishhistory.net/tudor/citizens 
     530     * The proper url is sent back in the Location header, allowing us to bypass wget's  
     531     * unexpected behaviour. 
     532     * This method ensures that urls like http://www.nzdl.org/niupepa also continue to work: 
     533     * there is no http://www.nzdl.org/niupepa/ page, because this url actually redirects to an  
     534     * entirely different URL. 
     535     * @return the redirect url for the given url if any redirection is involved, or the 
     536     * url_str. 
     537     */ 
     538    private String getRedirectURL(String url_str) { 
     539    HttpURLConnection connection = null; 
     540    if(url_str.startsWith("http:")) { // only test http urls 
     541        try { 
     542        URL url = new URL(url_str);  
     543        connection = (HttpURLConnection)url.openConnection(); //new HttpURLConnection(url); 
     544        // don't let it automatically follow redirects, since we want to 
     545        // find out whether we are dealing with redirects in the first place 
     546        connection.setInstanceFollowRedirects(false); 
     547         
     548        // now check for whether we get a redirect response 
     549        // HTTP Codes 3xx are redirects, http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html 
     550        int responseCode = connection.getResponseCode(); 
     551        if(responseCode >= 300 && responseCode < 400) {  
     552            String responseMsg = connection.getResponseMessage(); 
     553 
     554            // Get the Location header since this specifies the new location of the resource 
     555            String location = connection.getHeaderField("Location"); 
     556             
     557            // this becomes the url that wget should download from 
     558            url_str = location.trim(); 
     559        } 
     560         
     561        connection.disconnect(); 
     562        } catch(Exception e) { 
     563        if(connection != null) { 
     564            connection.disconnect(); 
     565        } 
     566        System.err.println("Checking redirection. Tried to connect to " 
     567                   + url_str + ",\nbut got exception: " + e); 
     568        }        
     569    }  
     570 
     571    return url_str; 
     572    } 
     573 
    504574     
     575    /** For a string-based Argument whose value has changed, this method 
     576     * updates the GUI ArgumentControl's value correspondingly. */ 
     577    private void updateArgument(Argument arg, String value) { 
     578    for(int i = 0; i < options_pane.getComponentCount(); i++) { 
     579        Component component = options_pane.getComponent(i); 
     580        if(component instanceof ArgumentControl) { 
     581        ArgumentControl control = (ArgumentControl)component; 
     582        if(control.getArgument() == arg) { 
     583            control.setValue(value); 
     584            control.repaint(); 
     585        } 
     586        } 
     587    } 
     588    } 
    505589 
    506590    private boolean checkURL(boolean checkRequired){