source: gs3-extensions/maori-lang-detection/src/org/greenstone/atea/Utility.java@ 33603

Last change on this file since 33603 was 33603, checked in by ak19, 4 years ago

Incorporating Dr Nichols suggestion to help weed out product sites: if tld of seed URL addresses containing /mi/ is outside NZ, add to list of possible-product-sites.txt. This should be a smaller number hopefully than all urls containing /mi and, because they're located outside nz, more likely to be a product site than not.

File size: 3.1 KB
Line 
1package org.greenstone.atea;
2
3import java.io.*;
4import java.net.InetAddress;
5import java.util.zip.GZIPInputStream;
6
7import com.maxmind.geoip.*; // for LookupService and Location
8
9import org.apache.log4j.Logger;
10
11public class Utility {
12 private static Logger logger = Logger.getLogger(org.greenstone.atea.Utility.class.getName());
13
14 // Run gunzip
15 // To avoid making this linux specific, use Java to unzip, instead of running gunzip as process
16 // https://www.mkyong.com/java/how-to-decompress-file-from-gzip-file/
17 public static boolean unzipFile(File inZipFile, File outFile) {
18
19 byte[] buffer = new byte[1024];
20
21 // try-with-resources will safely close streams/dispose resources on success or error and exceptions
22 try (
23 GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(inZipFile));
24 FileOutputStream out = new FileOutputStream(outFile);
25 ) {
26 int len;
27 while ((len = gzis.read(buffer)) > 0) {
28 out.write(buffer, 0, len);
29 }
30
31 //gzis.close();
32 //out.close();
33
34 logger.debug("Unzipped " + inZipFile + " to " + outFile);
35
36 } catch(IOException ex) {
37 //error("Failed to unzip " + inZipFile);
38 System.err.println("Failed to unzip " + inZipFile);
39 ex.printStackTrace();
40 return false;
41 }
42
43 return true;
44 }
45
46 /**
47 * Attribution following below is as per https://dev.maxmind.com/geoip/geoip2/geolite2/
48 *
49 * This product includes GeoLite2 data created by MaxMind, available from
50 * <a href="https://www.maxmind.com">https://www.maxmind.com</a>.
51 *
52 * Usage:
53 * https://stackoverflow.com/questions/1415851/best-way-to-get-geo-location-in-java
54 * version I'm using: https://github.com/maxmind/geoip-api-java
55 * Newer version: https://maxmind.github.io/GeoIP2-java/
56 */
57 public static boolean isDomainInCountry(String domainWithProtocol,
58 String countryCode, File geoLiteCityDatFile)
59 {
60 countryCode = countryCode.toUpperCase();
61
62 int startIndex = domainWithProtocol.indexOf("//"); // http:// or https:// prefix
63 startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
64 String domain = domainWithProtocol.substring(startIndex);
65
66 boolean result = false;
67 try {
68 // pass in the GeoLiteCity.dat file to be able to do the location lookup for domain's IP
69 LookupService cl = new LookupService(geoLiteCityDatFile, LookupService.GEOIP_MEMORY_CACHE);
70
71 // get IP for domain
72 InetAddress inetAddress = InetAddress.getByName(domain);
73 String ipAddress = inetAddress.getHostAddress();
74
75 // get location object for IP
76 Location location = cl.getLocation(ipAddress);
77
78 if(location != null) {
79 // compare country code with tld parameter
80 System.err.println("@@@@ Got country code: " + location.countryCode);
81 result = location.countryCode.equals(countryCode);
82 } else {
83 System.err.println("@@@@ No location info in DB for: " + domainWithProtocol);
84 }
85 } catch(Exception e) {
86 e.printStackTrace();
87 System.err.println("Could not check if domain " + domain + " was in country: " + countryCode);
88 } finally {
89 return result;
90 }
91 }
92}
Note: See TracBrowser for help on using the repository browser.