1 | package org.greenstone.atea;
|
---|
2 |
|
---|
3 | import java.io.*;
|
---|
4 | import java.net.InetAddress;
|
---|
5 | import java.util.zip.GZIPInputStream;
|
---|
6 |
|
---|
7 | import com.maxmind.geoip.*; // for LookupService and Location
|
---|
8 |
|
---|
9 | import org.apache.log4j.Logger;
|
---|
10 |
|
---|
11 | public class Utility {
|
---|
12 | private static Logger logger = Logger.getLogger(org.greenstone.atea.Utility.class.getName());
|
---|
13 |
|
---|
14 | // Run gunzip
|
---|
15 | // To avoid making this linux specific, use Java to unzip, instead of running gunzip as process
|
---|
16 | // https://www.mkyong.com/java/how-to-decompress-file-from-gzip-file/
|
---|
17 | public static boolean unzipFile(File inZipFile, File outFile) {
|
---|
18 |
|
---|
19 | byte[] buffer = new byte[1024];
|
---|
20 |
|
---|
21 | // try-with-resources will safely close streams/dispose resources on success or error and exceptions
|
---|
22 | try (
|
---|
23 | GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(inZipFile));
|
---|
24 | FileOutputStream out = new FileOutputStream(outFile);
|
---|
25 | ) {
|
---|
26 | int len;
|
---|
27 | while ((len = gzis.read(buffer)) > 0) {
|
---|
28 | out.write(buffer, 0, len);
|
---|
29 | }
|
---|
30 |
|
---|
31 | //gzis.close();
|
---|
32 | //out.close();
|
---|
33 |
|
---|
34 | logger.debug("Unzipped " + inZipFile + " to " + outFile);
|
---|
35 |
|
---|
36 | } catch(IOException ex) {
|
---|
37 | //error("Failed to unzip " + inZipFile);
|
---|
38 | System.err.println("Failed to unzip " + inZipFile);
|
---|
39 | ex.printStackTrace();
|
---|
40 | return false;
|
---|
41 | }
|
---|
42 |
|
---|
43 | return true;
|
---|
44 | }
|
---|
45 |
|
---|
46 | /**
|
---|
47 | * Attribution following below is as per https://dev.maxmind.com/geoip/geoip2/geolite2/
|
---|
48 | *
|
---|
49 | * This product includes GeoLite2 data created by MaxMind, available from
|
---|
50 | * <a href="https://www.maxmind.com">https://www.maxmind.com</a>.
|
---|
51 | *
|
---|
52 | * Usage:
|
---|
53 | * https://stackoverflow.com/questions/1415851/best-way-to-get-geo-location-in-java
|
---|
54 | * version I'm using: https://github.com/maxmind/geoip-api-java
|
---|
55 | * Newer version: https://maxmind.github.io/GeoIP2-java/
|
---|
56 | *
|
---|
57 | * @return 2 letter countrycode in uppercase or an exception
|
---|
58 | */
|
---|
59 | public static String getCountryCodeOfDomain(String domainWithProtocol, File geoLiteCityDatFile)
|
---|
60 | throws Exception
|
---|
61 | {
|
---|
62 | int startIndex = domainWithProtocol.indexOf("//"); // http:// or https:// prefix
|
---|
63 | startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
|
---|
64 | String domain = domainWithProtocol.substring(startIndex);
|
---|
65 |
|
---|
66 | // pass in the GeoLiteCity.dat file to be able to do the location lookup for domain's IP
|
---|
67 | LookupService cl = new LookupService(geoLiteCityDatFile, LookupService.GEOIP_MEMORY_CACHE);
|
---|
68 |
|
---|
69 | // get IP for domain
|
---|
70 | InetAddress inetAddress = InetAddress.getByName(domain);
|
---|
71 | String ipAddress = inetAddress.getHostAddress();
|
---|
72 |
|
---|
73 | // get location object for IP
|
---|
74 | Location location = cl.getLocation(ipAddress);
|
---|
75 |
|
---|
76 | if(location == null) {
|
---|
77 | throw new Exception("@@@@ No location info in DB for: " + domain);
|
---|
78 | } else {
|
---|
79 | return location.countryCode;
|
---|
80 | }
|
---|
81 |
|
---|
82 | }
|
---|
83 |
|
---|
84 | /** Work out the 'domain' for a given url.
|
---|
85 | * This retains any www. or subdomain prefix.
|
---|
86 | */
|
---|
87 | public static String getDomainForURL(String url, boolean withProtocol) {
|
---|
88 | int startIndex = startIndex = url.indexOf("//"); // for http:// or https:// prefix
|
---|
89 | startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
|
---|
90 | // the keep the URL around in case param withProtocol=true
|
---|
91 | String protocol = (startIndex == -1) ? "" : url.substring(0, startIndex);
|
---|
92 |
|
---|
93 | String domain = url.substring(startIndex);
|
---|
94 | int endIndex = domain.indexOf("/");
|
---|
95 | if(endIndex == -1) endIndex = domain.length();
|
---|
96 | domain = domain.substring(0, endIndex);
|
---|
97 |
|
---|
98 | if(withProtocol) {
|
---|
99 | // now that we have the domain (everything to the first / when there is no protocol)
|
---|
100 | // can glue the protocol back on
|
---|
101 | domain = protocol + domain;
|
---|
102 | }
|
---|
103 |
|
---|
104 | return domain;
|
---|
105 | }
|
---|
106 |
|
---|
107 | public static boolean isDomainInCountry(String domainWithProtocol,
|
---|
108 | String countryCode, File geoLiteCityDatFile)
|
---|
109 | {
|
---|
110 | countryCode = countryCode.toUpperCase();
|
---|
111 |
|
---|
112 | int startIndex = domainWithProtocol.indexOf("//"); // http:// or https:// prefix
|
---|
113 | startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
|
---|
114 | String domain = domainWithProtocol.substring(startIndex);
|
---|
115 |
|
---|
116 | boolean result = false;
|
---|
117 | try {
|
---|
118 | // pass in the GeoLiteCity.dat file to be able to do the location lookup for domain's IP
|
---|
119 | LookupService cl = new LookupService(geoLiteCityDatFile, LookupService.GEOIP_MEMORY_CACHE);
|
---|
120 |
|
---|
121 | // get IP for domain
|
---|
122 | InetAddress inetAddress = InetAddress.getByName(domain);
|
---|
123 | String ipAddress = inetAddress.getHostAddress();
|
---|
124 |
|
---|
125 | // get location object for IP
|
---|
126 | Location location = cl.getLocation(ipAddress);
|
---|
127 |
|
---|
128 | if(location != null) {
|
---|
129 | // compare country code with tld parameter
|
---|
130 | System.err.println("@@@@ Got country code: " + location.countryCode);
|
---|
131 | result = location.countryCode.equals(countryCode);
|
---|
132 | } else {
|
---|
133 | System.err.println("@@@@ No location info in DB for: " + domainWithProtocol);
|
---|
134 | }
|
---|
135 | } catch(Exception e) {
|
---|
136 | e.printStackTrace();
|
---|
137 | System.err.println("Could not check if domain " + domain + " was in country: " + countryCode);
|
---|
138 | } finally {
|
---|
139 | return result;
|
---|
140 | }
|
---|
141 | }
|
---|
142 | }
|
---|