Changeset 31851 for main/trunk


Ignore:
Timestamp:
2017-08-03T19:28:08+12:00 (7 years ago)
Author:
ak19
Message:
  1. Fixes to get proxying to work on Windows. 2. Fixes to timeout if a page doesn't exist and it takes forever to read. Both for downloading from a URL and getting server info (perl code), and also in Java code, when doing a getRedirectURL(). Generally, a URL is correct and when wget is launched, a cancel operation in the Java GUI successfully causes and interrupt which then terminates wget. However, if the URL doesn't exist, either when getting serer info or when downloading, the wget launched by the perl seems to block or something, and the interrupt is not noticed until the wget is manually terminated through the task manager. Then the interrupt is finally noticed. If pages would indicate they don't exist, then it wouldn't have been a problem. This issue is now circumvented through setting a read-timeout, to stop retrieving pages that don't exist but that take forever to access anyway as they don't indicate that they don't exist. A connect timeout is for if you get proxy details wrong or something like that and it takes forever to connect.
Location:
main/trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/gli/src/org/greenstone/gatherer/gui/DownloadPane.java

    r31843 r31851  
    613613    boolean noMoreRedirects = false;
    614614    boolean gotException = false;
    615 
     615    final int TIMEOUT = 2 * 1000; // ms
     616   
    616617    HttpURLConnection connection = null;
    617618    if(url_str.startsWith("http:") || url_str.startsWith("https:")) { // only test http urls
     
    625626        // find out whether we are dealing with redirects in the first place
    626627        connection.setInstanceFollowRedirects(false);
    627         connection.setConnectTimeout(5 * 1000); // ms
     628        // Connection timeout: if we can't connect, like if the proxy is wrong, don't wait forever
     629        // Read timeout: *idle time* when retrieving a link. Don't wait forever to retrieve a page (e.g. if page doesn't exist)
     630        // https://stackoverflow.com/questions/6829801/httpurlconnection-setconnecttimeout-has-no-effect
     631        connection.setConnectTimeout(TIMEOUT);
     632        connection.setReadTimeout(TIMEOUT);
    628633       
    629634        // now check for whether we get a redirect response
  • main/trunk/greenstone2/perllib/downloaders/WebDownload.pm

    r17530 r31851  
    115115    }
    116116    #my $cmdWget = "-N -k -x -t 2 -P \"".$hashGeneralOptions->{"cache_dir"}."\" $strWgetOptions $strOptions ".$self->{'url'};
    117     my $cmdWget = "-N -k -x -t 2  $strWgetOptions $strOptions $cache_dir " .$self->{'url'};   
    118 
     117    my $cmdWget = "-N -k -x -t 2 --read-timeout=2 --connect-timeout=2 $strWgetOptions $strOptions $cache_dir " .$self->{'url'};   
     118
     119    #print STDOUT "\n@@@@ RUNNING WGET CMD: $cmdWget\n\n";
     120   
    119121    # Download the web pages
    120122    # print "Start download from $self->{'url'}...\n";
     
    186188    my $strOptions = $self->getWgetOptions();
    187189
    188     my $strBaseCMD = $strOptions." -q -O - \"$self->{'url'}\"";
     190    my $strBaseCMD = $strOptions." --timeout=4 --tries=1 -q -O - \"$self->{'url'}\"";
    189191
    190192 
  • main/trunk/greenstone2/perllib/downloaders/WgetDownload.pm

    r30520 r31851  
    167167    {
    168168
    169     $strOptions .= " -e httpproxy=$self->{'proxy_host'}:$self->{'proxy_port'} ";
     169    if($self->{'url'} =~ m/^https\:/) {
     170        $strOptions .= " -e https_proxy=$self->{'proxy_host'}:$self->{'proxy_port'} "; 
     171    } else {
     172        $strOptions .= " -e http_proxy=$self->{'proxy_host'}:$self->{'proxy_port'} ";
     173    }   
    170174
    171175    if ($self->{'user_name'} && $self->{'user_password'})
     
    179183    }
    180184
     185    if($self->{'no_check_certificate'} && $self->{'url'} =~ m/^https\:/) {
     186        $strOptions .= " --no-check-certificate ";
     187    }
     188   
    181189    return $strOptions;
    182190}
Note: See TracChangeset for help on using the changeset viewer.