source: gs3-extensions/maori-lang-detection/src/org/greenstone/atea/Utility.java@ 33604

Last change on this file since 33604 was 33604, checked in by ak19, 5 years ago
  1. Better output into possible-product-sites.txt including the overseas country code prefix to help decide whether the site is worth keeping or not. 2. Updated whitelisting and top-sites filters to grab the /mi/ subsections of sites that don't appear to be autotranslated. This is done in preparation for blocking out product sites hereafter
File size: 4.1 KB
Line 
1package org.greenstone.atea;
2
3import java.io.*;
4import java.net.InetAddress;
5import java.util.zip.GZIPInputStream;
6
7import com.maxmind.geoip.*; // for LookupService and Location
8
9import org.apache.log4j.Logger;
10
11public class Utility {
12 private static Logger logger = Logger.getLogger(org.greenstone.atea.Utility.class.getName());
13
14 // Run gunzip
15 // To avoid making this linux specific, use Java to unzip, instead of running gunzip as process
16 // https://www.mkyong.com/java/how-to-decompress-file-from-gzip-file/
17 public static boolean unzipFile(File inZipFile, File outFile) {
18
19 byte[] buffer = new byte[1024];
20
21 // try-with-resources will safely close streams/dispose resources on success or error and exceptions
22 try (
23 GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(inZipFile));
24 FileOutputStream out = new FileOutputStream(outFile);
25 ) {
26 int len;
27 while ((len = gzis.read(buffer)) > 0) {
28 out.write(buffer, 0, len);
29 }
30
31 //gzis.close();
32 //out.close();
33
34 logger.debug("Unzipped " + inZipFile + " to " + outFile);
35
36 } catch(IOException ex) {
37 //error("Failed to unzip " + inZipFile);
38 System.err.println("Failed to unzip " + inZipFile);
39 ex.printStackTrace();
40 return false;
41 }
42
43 return true;
44 }
45
46 /**
47 * Attribution following below is as per https://dev.maxmind.com/geoip/geoip2/geolite2/
48 *
49 * This product includes GeoLite2 data created by MaxMind, available from
50 * <a href="https://www.maxmind.com">https://www.maxmind.com</a>.
51 *
52 * Usage:
53 * https://stackoverflow.com/questions/1415851/best-way-to-get-geo-location-in-java
54 * version I'm using: https://github.com/maxmind/geoip-api-java
55 * Newer version: https://maxmind.github.io/GeoIP2-java/
56 *
57 * @return 2 letter countrycode in uppercase or an exception
58 */
59 public static String getCountryCodeOfDomain(String domainWithProtocol, File geoLiteCityDatFile)
60 throws Exception
61 {
62 int startIndex = domainWithProtocol.indexOf("//"); // http:// or https:// prefix
63 startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
64 String domain = domainWithProtocol.substring(startIndex);
65
66 // pass in the GeoLiteCity.dat file to be able to do the location lookup for domain's IP
67 LookupService cl = new LookupService(geoLiteCityDatFile, LookupService.GEOIP_MEMORY_CACHE);
68
69 // get IP for domain
70 InetAddress inetAddress = InetAddress.getByName(domain);
71 String ipAddress = inetAddress.getHostAddress();
72
73 // get location object for IP
74 Location location = cl.getLocation(ipAddress);
75
76 if(location == null) {
77 throw new Exception("@@@@ No location info in DB for: " + domain);
78 } else {
79 return location.countryCode;
80 }
81
82 }
83
84 public static boolean isDomainInCountry(String domainWithProtocol,
85 String countryCode, File geoLiteCityDatFile)
86 {
87 countryCode = countryCode.toUpperCase();
88
89 int startIndex = domainWithProtocol.indexOf("//"); // http:// or https:// prefix
90 startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
91 String domain = domainWithProtocol.substring(startIndex);
92
93 boolean result = false;
94 try {
95 // pass in the GeoLiteCity.dat file to be able to do the location lookup for domain's IP
96 LookupService cl = new LookupService(geoLiteCityDatFile, LookupService.GEOIP_MEMORY_CACHE);
97
98 // get IP for domain
99 InetAddress inetAddress = InetAddress.getByName(domain);
100 String ipAddress = inetAddress.getHostAddress();
101
102 // get location object for IP
103 Location location = cl.getLocation(ipAddress);
104
105 if(location != null) {
106 // compare country code with tld parameter
107 System.err.println("@@@@ Got country code: " + location.countryCode);
108 result = location.countryCode.equals(countryCode);
109 } else {
110 System.err.println("@@@@ No location info in DB for: " + domainWithProtocol);
111 }
112 } catch(Exception e) {
113 e.printStackTrace();
114 System.err.println("Could not check if domain " + domain + " was in country: " + countryCode);
115 } finally {
116 return result;
117 }
118 }
119}
Note: See TracBrowser for help on using the repository browser.