Ignore:
Timestamp:
2019-10-24T22:04:37+13:00 (4 years ago)
Author:
ak19
Message:

Incorporating Dr Nichols suggestion to help weed out product sites: if tld of seed URL addresses containing /mi/ is outside NZ, add to list of possible-product-sites.txt. This should be a smaller number hopefully than all urls containing /mi and, because they're located outside nz, more likely to be a product site than not.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/maori-lang-detection/src/org/greenstone/atea/Utility.java

    r33467 r33603  
    22
    33import java.io.*;
     4import java.net.InetAddress;
    45import java.util.zip.GZIPInputStream;
     6
     7import com.maxmind.geoip.*; // for LookupService and Location
    58
    69import org.apache.log4j.Logger;
     
    4144    }
    4245
     46    /**
     47     * Attribution following below is as per https://dev.maxmind.com/geoip/geoip2/geolite2/
     48     *
     49     * This product includes GeoLite2 data created by MaxMind, available from
     50     * <a href="https://www.maxmind.com">https://www.maxmind.com</a>.
     51     *
     52     * Usage:
     53     * https://stackoverflow.com/questions/1415851/best-way-to-get-geo-location-in-java
     54     * version I'm using: https://github.com/maxmind/geoip-api-java
     55     * Newer version: https://maxmind.github.io/GeoIP2-java/
     56     */
     57    public static boolean isDomainInCountry(String domainWithProtocol,
     58                        String countryCode, File geoLiteCityDatFile)
     59    {
     60    countryCode = countryCode.toUpperCase();
     61   
     62    int startIndex = domainWithProtocol.indexOf("//"); // http:// or https:// prefix
     63    startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
     64    String domain = domainWithProtocol.substring(startIndex);   
     65   
     66    boolean result = false;
     67    try {
     68        // pass in the GeoLiteCity.dat file to be able to do the location lookup for domain's IP
     69        LookupService cl = new LookupService(geoLiteCityDatFile, LookupService.GEOIP_MEMORY_CACHE);
     70       
     71        // get IP for domain
     72        InetAddress inetAddress = InetAddress.getByName(domain);
     73        String ipAddress = inetAddress.getHostAddress();
     74       
     75        // get location object for IP
     76        Location location = cl.getLocation(ipAddress);
     77
     78        if(location != null) {
     79        // compare country code with tld parameter
     80        System.err.println("@@@@ Got country code: " + location.countryCode);
     81        result = location.countryCode.equals(countryCode);
     82        } else {
     83        System.err.println("@@@@ No location info in DB for: " + domainWithProtocol);
     84        }
     85    } catch(Exception e) {
     86        e.printStackTrace();
     87        System.err.println("Could not check if domain " + domain + " was in country: " + countryCode);
     88    } finally {
     89        return result;
     90    }
     91    }
    4392}
Note: See TracChangeset for help on using the changeset viewer.