source: other-projects/maori-lang-detection/src/org/greenstone/atea/Utility.java@ 34000

Last change on this file since 34000 was 34000, checked in by ak19, 4 years ago

Some debugging and other minor changes

File size: 5.7 KB
Line 
1package org.greenstone.atea;
2
3import java.io.*;
4import java.net.InetAddress;
5import java.util.zip.GZIPInputStream;
6
7import com.maxmind.geoip.*; // for LookupService and Location
8
9import org.apache.log4j.Logger;
10
11public class Utility {
12 private static Logger logger = Logger.getLogger(org.greenstone.atea.Utility.class.getName());
13
14 public static String getFilePath(File file) {
15 try {
16 return file.getCanonicalPath();
17 } catch(IOException e) {
18 return file.getAbsolutePath();
19 }
20 }
21
22 // Run gunzip
23 // To avoid making this linux specific, use Java to unzip, instead of running gunzip as process
24 // https://www.mkyong.com/java/how-to-decompress-file-from-gzip-file/
25 public static boolean unzipFile(File inZipFile, File outFile) {
26
27 byte[] buffer = new byte[1024];
28
29 // try-with-resources will safely close streams/dispose resources on success or error and exceptions
30 try (
31 GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(inZipFile));
32 FileOutputStream out = new FileOutputStream(outFile);
33 ) {
34 int len;
35 while ((len = gzis.read(buffer)) > 0) {
36 out.write(buffer, 0, len);
37 }
38
39 //gzis.close();
40 //out.close();
41
42 logger.debug("Unzipped " + inZipFile + " to " + outFile);
43
44 } catch(IOException ex) {
45 //error("Failed to unzip " + inZipFile);
46 System.err.println("Failed to unzip " + inZipFile);
47 ex.printStackTrace();
48 return false;
49 }
50
51 return true;
52 }
53
54 /**
55 * Attribution following below is as per https://dev.maxmind.com/geoip/geoip2/geolite2/
56 *
57 * This product includes GeoLite2 data created by MaxMind, available from
58 * <a href="https://www.maxmind.com">https://www.maxmind.com</a>.
59 *
60 * Usage:
61 * https://stackoverflow.com/questions/1415851/best-way-to-get-geo-location-in-java
62 * version I'm using: https://github.com/maxmind/geoip-api-java
63 * Newer version: https://maxmind.github.io/GeoIP2-java/
64 *
65 * @return 2 letter countrycode in uppercase or an exception
66 */
67 public static String getCountryCodeOfDomain(String domainWithProtocol, File geoLiteCityDatFile)
68 throws Exception
69 {
70 //int startIndex = domainWithProtocol.indexOf("//"); // http:// or https:// prefix
71 //startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
72 String domain = stripProtocolFromURL(domainWithProtocol); //domainWithProtocol.substring(startIndex);
73
74 // pass in the GeoLiteCity.dat file to be able to do the location lookup for domain's IP
75 LookupService cl = new LookupService(geoLiteCityDatFile, LookupService.GEOIP_MEMORY_CACHE);
76
77 // get IP for domain
78 InetAddress inetAddress = InetAddress.getByName(domain);
79 String ipAddress = inetAddress.getHostAddress();
80
81 // get location object for IP
82 Location location = cl.getLocation(ipAddress);
83
84 if(location == null) {
85 throw new Exception("@@@@ No location info in DB for: " + domain);
86 } else {
87 return location.countryCode;
88 }
89
90 }
91
92 public static String stripProtocolAndWWWFromURL(String url) {
93 url = stripProtocolFromURL(url);
94
95 if(url.startsWith("www.")) { // also strip any "www." at start
96 url = url.substring(4);
97 }
98
99 return url;
100 }
101
102 public static String stripProtocolFromURL(String url) {
103 int startIndex = url.indexOf("//"); // for http:// or https:// prefix
104 startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
105 return url.substring(startIndex);
106 }
107
108
109 /** Work out the 'domain' for a given url.
110 * This retains any www. or subdomain prefix.
111 */
112 public static String getDomainForURL(String url, boolean withProtocol) {
113 int startIndex = startIndex = url.indexOf("//"); // for http:// or https:// prefix
114 startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
115 // keep the protocol around in case param withProtocol=true
116 String protocol = (startIndex == -1) ? "" : url.substring(0, startIndex);
117
118 String domain = url.substring(startIndex);
119 int endIndex = domain.indexOf("/");
120 if(endIndex == -1) endIndex = domain.length();
121 domain = domain.substring(0, endIndex);
122
123 if(withProtocol) {
124 // now that we have the domain (everything to the first / when there is no protocol)
125 // can glue the protocol back on
126 domain = protocol + domain;
127 }
128
129 return domain;
130 }
131
132 public static boolean isDomainInCountry(String domainWithProtocol,
133 String countryCode, File geoLiteCityDatFile)
134 {
135 countryCode = countryCode.toUpperCase();
136
137 int startIndex = domainWithProtocol.indexOf("//"); // http:// or https:// prefix
138 startIndex = (startIndex == -1) ? 0 : (startIndex+2); // skip past the protocol's // portion
139 String domain = domainWithProtocol.substring(startIndex);
140
141 boolean result = false;
142 try {
143 // pass in the GeoLiteCity.dat file to be able to do the location lookup for domain's IP
144 LookupService cl = new LookupService(geoLiteCityDatFile, LookupService.GEOIP_MEMORY_CACHE);
145
146 // get IP for domain
147 InetAddress inetAddress = InetAddress.getByName(domain);
148 String ipAddress = inetAddress.getHostAddress();
149
150 // get location object for IP
151 Location location = cl.getLocation(ipAddress);
152
153 if(location != null) {
154 // compare country code with tld parameter
155 System.err.println("@@@@ Got country code: " + location.countryCode);
156 result = location.countryCode.equals(countryCode);
157 } else {
158 System.err.println("@@@@ No location info in DB for: " + domainWithProtocol);
159 }
160 } catch(Exception e) {
161 e.printStackTrace();
162 System.err.println("Could not check if domain " + domain + " was in country: " + countryCode);
163 } finally {
164 return result;
165 }
166 }
167}
Note: See TracBrowser for help on using the repository browser.