1 | package org.greenstone.atea;
|
---|
2 |
|
---|
3 | import java.io.*;
|
---|
4 | import java.net.InetAddress;
|
---|
5 | import java.util.zip.GZIPInputStream;
|
---|
6 |
|
---|
7 | import com.maxmind.geoip.*; // for LookupService and Location
|
---|
8 |
|
---|
9 | import org.apache.log4j.Logger;
|
---|
10 |
|
---|
11 | public class Utility {
|
---|
12 | private static Logger logger = Logger.getLogger(org.greenstone.atea.Utility.class.getName());
|
---|
13 |
|
---|
14 | public static String getFilePath(File file) {
|
---|
15 | try {
|
---|
16 | return file.getCanonicalPath();
|
---|
17 | } catch(IOException e) {
|
---|
18 | return file.getAbsolutePath();
|
---|
19 | }
|
---|
20 | }
|
---|
21 |
|
---|
22 | // Run gunzip
|
---|
23 | // To avoid making this linux specific, use Java to unzip, instead of running gunzip as process
|
---|
24 | // https://www.mkyong.com/java/how-to-decompress-file-from-gzip-file/
|
---|
25 | public static boolean unzipFile(File inZipFile, File outFile) {
|
---|
26 |
|
---|
27 | byte[] buffer = new byte[1024];
|
---|
28 |
|
---|
29 | // try-with-resources will safely close streams/dispose resources on success or error and exceptions
|
---|
30 | try (
|
---|
31 | GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(inZipFile));
|
---|
32 | FileOutputStream out = new FileOutputStream(outFile);
|
---|
33 | ) {
|
---|
34 | int len;
|
---|
35 | while ((len = gzis.read(buffer)) > 0) {
|
---|
36 | out.write(buffer, 0, len);
|
---|
37 | }
|
---|
38 |
|
---|
39 | //gzis.close();
|
---|
40 | //out.close();
|
---|
41 |
|
---|
42 | logger.debug("Unzipped " + inZipFile + " to " + outFile);
|
---|
43 |
|
---|
44 | } catch(IOException ex) {
|
---|
45 | //error("Failed to unzip " + inZipFile);
|
---|
46 | System.err.println("Failed to unzip " + inZipFile);
|
---|
47 | ex.printStackTrace();
|
---|
48 | return false;
|
---|
49 | }
|
---|
50 |
|
---|
51 | return true;
|
---|
52 | }
|
---|
53 |
|
---|
54 | /**
|
---|
55 | * Attribution following below is as per https://dev.maxmind.com/geoip/geoip2/geolite2/
|
---|
56 | *
|
---|
57 | * This product includes GeoLite2 data created by MaxMind, available from
|
---|
58 | * <a href="https://www.maxmind.com">https://www.maxmind.com</a>.
|
---|
59 | *
|
---|
60 | * Usage:
|
---|
61 | * https://stackoverflow.com/questions/1415851/best-way-to-get-geo-location-in-java
|
---|
62 | * version I'm using: https://github.com/maxmind/geoip-api-java
|
---|
63 | * Newer version: https://maxmind.github.io/GeoIP2-java/
|
---|
64 | *
|
---|
65 | * @return 2 letter countrycode in uppercase or an exception
|
---|
66 | */
|
---|
67 | public static String getCountryCodeOfDomain(String domainWithProtocol, File geoLiteCityDatFile)
|
---|
68 | throws Exception
|
---|
69 | {
|
---|
70 | //int startIndex = domainWithProtocol.indexOf("//"); // http:// or https:// prefix
|
---|
71 | //startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
|
---|
72 | String domain = stripProtocolFromURL(domainWithProtocol); //domainWithProtocol.substring(startIndex);
|
---|
73 |
|
---|
74 | // pass in the GeoLiteCity.dat file to be able to do the location lookup for domain's IP
|
---|
75 | LookupService cl = new LookupService(geoLiteCityDatFile, LookupService.GEOIP_MEMORY_CACHE);
|
---|
76 |
|
---|
77 | // get IP for domain
|
---|
78 | InetAddress inetAddress = InetAddress.getByName(domain);
|
---|
79 | String ipAddress = inetAddress.getHostAddress();
|
---|
80 |
|
---|
81 | // get location object for IP
|
---|
82 | Location location = cl.getLocation(ipAddress);
|
---|
83 |
|
---|
84 | if(location == null) {
|
---|
85 | throw new Exception("@@@@ No location info in DB for: " + domain);
|
---|
86 | } else {
|
---|
87 | return location.countryCode;
|
---|
88 | }
|
---|
89 |
|
---|
90 | }
|
---|
91 |
|
---|
92 | public static String stripProtocolAndWWWFromURL(String url) {
|
---|
93 | url = stripProtocolFromURL(url);
|
---|
94 |
|
---|
95 | if(url.startsWith("www.")) { // also strip any "www." at start
|
---|
96 | url = url.substring(4);
|
---|
97 | }
|
---|
98 |
|
---|
99 | return url;
|
---|
100 | }
|
---|
101 |
|
---|
102 | public static String stripProtocolFromURL(String url) {
|
---|
103 | int startIndex = url.indexOf("//"); // for http:// or https:// prefix
|
---|
104 | startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
|
---|
105 | return url.substring(startIndex);
|
---|
106 | }
|
---|
107 |
|
---|
108 |
|
---|
109 | /** Work out the 'domain' for a given url.
|
---|
110 | * This retains any www. or subdomain prefix.
|
---|
111 | */
|
---|
112 | public static String getDomainForURL(String url, boolean withProtocol) {
|
---|
113 | int startIndex = startIndex = url.indexOf("//"); // for http:// or https:// prefix
|
---|
114 | startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
|
---|
115 | // keep the protocol around in case param withProtocol=true
|
---|
116 | String protocol = (startIndex == -1) ? "" : url.substring(0, startIndex);
|
---|
117 |
|
---|
118 | String domain = url.substring(startIndex);
|
---|
119 | int endIndex = domain.indexOf("/");
|
---|
120 | if(endIndex == -1) endIndex = domain.length();
|
---|
121 | domain = domain.substring(0, endIndex);
|
---|
122 |
|
---|
123 | if(withProtocol) {
|
---|
124 | // now that we have the domain (everything to the first / when there is no protocol)
|
---|
125 | // can glue the protocol back on
|
---|
126 | domain = protocol + domain;
|
---|
127 | }
|
---|
128 |
|
---|
129 | return domain;
|
---|
130 | }
|
---|
131 |
|
---|
132 | public static boolean isDomainInCountry(String domainWithProtocol,
|
---|
133 | String countryCode, File geoLiteCityDatFile)
|
---|
134 | {
|
---|
135 | countryCode = countryCode.toUpperCase();
|
---|
136 |
|
---|
137 | int startIndex = domainWithProtocol.indexOf("//"); // http:// or https:// prefix
|
---|
138 | startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
|
---|
139 | String domain = domainWithProtocol.substring(startIndex);
|
---|
140 |
|
---|
141 | boolean result = false;
|
---|
142 | try {
|
---|
143 | // pass in the GeoLiteCity.dat file to be able to do the location lookup for domain's IP
|
---|
144 | LookupService cl = new LookupService(geoLiteCityDatFile, LookupService.GEOIP_MEMORY_CACHE);
|
---|
145 |
|
---|
146 | // get IP for domain
|
---|
147 | InetAddress inetAddress = InetAddress.getByName(domain);
|
---|
148 | String ipAddress = inetAddress.getHostAddress();
|
---|
149 |
|
---|
150 | // get location object for IP
|
---|
151 | Location location = cl.getLocation(ipAddress);
|
---|
152 |
|
---|
153 | if(location != null) {
|
---|
154 | // compare country code with tld parameter
|
---|
155 | System.err.println("@@@@ Got country code: " + location.countryCode);
|
---|
156 | result = location.countryCode.equals(countryCode);
|
---|
157 | } else {
|
---|
158 | System.err.println("@@@@ No location info in DB for: " + domainWithProtocol);
|
---|
159 | }
|
---|
160 | } catch(Exception e) {
|
---|
161 | e.printStackTrace();
|
---|
162 | System.err.println("Could not check if domain " + domain + " was in country: " + countryCode);
|
---|
163 | } finally {
|
---|
164 | return result;
|
---|
165 | }
|
---|
166 | }
|
---|
167 | }
|
---|