Changeset 17235 for gli


Ignore:
Timestamp:
09/10/08 19:25:45 (12 years ago)
Author:
ak19
Message:

To bypass wget's unintuitive way of dealing with URLs that refer to dir-listings where the final slash is missing (and still continue to work properly with URLs like nzdl.org/niupepa which redirect to entirely different URLs), added method getRedirectURL()--along with helper functions--which is consulted by DownloadButtonListener.actionPerformed() to work out the real URL of the resource, prior to launching Wget with the URL to be retrieved.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gli/trunk/src/org/greenstone/gatherer/gui/DownloadPane.java

    r14254 r17235  
    223223        String launch = launch_str.toString();     
    224224        System.err.println("*** launch = " + launch);
    225        
     225
    226226        URL launch_url = new URL(launch);
    227227        URLConnection launch_connection = launch_url.openConnection();
     
    495495    implements ActionListener {
    496496    public void actionPerformed(ActionEvent event) {
    497        
     497
    498498        if(checkURL(true) && checkProxy() == true) {
    499499       
     500        // Proxy settings are now set. Check that the url is not a redirect, else get
     501        // redirect url (we do this step in order to avoid some unintuitive behaviour from wget)
     502        Download current_download = (Download)download_map.get(mode);
     503        Argument arg_url = current_download.getArgument("url");
     504        String url_str = arg_url.getValue();
     505        String redirect_url_str = getRedirectURL(url_str);
     506       
     507        // only update the Argument and its GUI ArgumentControl if the URL
     508        // has in fact changed
     509        if(!url_str.equals(redirect_url_str)) {
     510            arg_url.setValue(redirect_url_str);
     511            updateArgument(arg_url, redirect_url_str);
     512        }
     513   
    500514        getter.newDownloadJob((Download)download_map.get(mode) ,mode,proxy_url);
    501515        }
    502516    }
    503517    }
     518
     519    /**
     520     * The Java code here will retrieve the page at the given url. If the response code is
     521     * a redirect, it will get the redirect url so that wget may be called with the proper url.
     522     * This preprocessing of the URL is necessary because:
     523     * Wget does not behave the way the browser does when faced with urls of the form
     524     * http://www.englishhistory.net/tudor/citizens and if that page does not exist.
     525     * The directory listing with a slash at the end (http://www.englishhistory.net/tudor/citizens/)
     526     * does exist, however. In order to prevent wget from assuming that the root URL
     527     * to traverse is http://www.englishhistory.net/tudor/ instead of the intended
     528     * http://www.englishhistory.net/tudor/citizens/, we need give wget the redirect location
     529     * that's returned when we initially make a request for http://www.englishhistory.net/tudor/citizens
     530     * The proper url is sent back in the Location header, allowing us to bypass wget's
     531     * unexpected behaviour.
     532     * This method ensures that urls like http://www.nzdl.org/niupepa also continue to work:
     533     * there is no http://www.nzdl.org/niupepa/ page, because this url actually redirects to an
     534     * entirely different URL.
     535     * @return the redirect url for the given url if any redirection is involved, or the
     536     * url_str.
     537     */
     538    private String getRedirectURL(String url_str) {
     539    HttpURLConnection connection = null;
     540    if(url_str.startsWith("http:")) { // only test http urls
     541        try {
     542        URL url = new URL(url_str);
     543        connection = (HttpURLConnection)url.openConnection(); //new HttpURLConnection(url);
     544        // don't let it automatically follow redirects, since we want to
     545        // find out whether we are dealing with redirects in the first place
     546        connection.setInstanceFollowRedirects(false);
     547       
     548        // now check for whether we get a redirect response
     549        // HTTP Codes 3xx are redirects, http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
     550        int responseCode = connection.getResponseCode();
     551        if(responseCode >= 300 && responseCode < 400) {
     552            String responseMsg = connection.getResponseMessage();
     553
     554            // Get the Location header since this specifies the new location of the resource
     555            String location = connection.getHeaderField("Location");
     556           
     557            // this becomes the url that wget should download from
     558            url_str = location.trim();
     559        }
     560       
     561        connection.disconnect();
     562        } catch(Exception e) {
     563        if(connection != null) {
     564            connection.disconnect();
     565        }
     566        System.err.println("Checking redirection. Tried to connect to "
     567                   + url_str + ",\nbut got exception: " + e);
     568        }       
     569    }
     570
     571    return url_str;
     572    }
     573
    504574   
     575    /** For a string-based Argument whose value has changed, this method
     576     * updates the GUI ArgumentControl's value correspondingly. */
     577    private void updateArgument(Argument arg, String value) {
     578    for(int i = 0; i < options_pane.getComponentCount(); i++) {
     579        Component component = options_pane.getComponent(i);
     580        if(component instanceof ArgumentControl) {
     581        ArgumentControl control = (ArgumentControl)component;
     582        if(control.getArgument() == arg) {
     583            control.setValue(value);
     584            control.repaint();
     585        }
     586        }
     587    }
     588    }
    505589
    506590    private boolean checkURL(boolean checkRequired){
Note: See TracChangeset for help on using the changeset viewer.