source: other-projects/maori-lang-detection/src/org/greenstone/atea/CountryCodeCountsMapData.java@ 33796

Last change on this file since 33796 was 33796, checked in by ak19, 4 years ago

Instead of a hack for US' count being too great that its histogram goes past the max latitude North, added more general code to widen histograms in cases where the value of count produces too great a latitude North value for histograms, as other countries like France and China also needed the same solution.

File size: 14.5 KB
Line 
1package org.greenstone.atea;
2
3import java.io.*;
4
5import org.apache.commons.csv.*;
6import org.apache.log4j.Logger;
7
8import com.google.gson.*;
9
10import java.util.HashMap;
11import java.util.LinkedList;
12import java.util.List;
13import java.util.Map;
14
15// For working with GeoJSON's Simple Features in Java
16import mil.nga.sf.geojson.Feature;
17import mil.nga.sf.geojson.FeatureCollection;
18import mil.nga.sf.geojson.FeatureConverter;
19import mil.nga.sf.geojson.Geometry;
20import mil.nga.sf.geojson.MultiPoint;
21import mil.nga.sf.geojson.Polygon;
22import mil.nga.sf.geojson.Position;
23
24
25/** Simple Features GeoJSON Java
26 * https://ngageoint.github.io/simple-features-geojson-java/ - liks to API and more
27 *
28 * https://mvnrepository.com/artifact/mil.nga.sf/sf-geojson (https://github.com/ngageoint/simple-features-geojson-java/)
29 *
30 * Also need the basic data types used by the Geometry objects above:
31 * https://mvnrepository.com/artifact/mil.nga/sf (https://github.com/ngageoint/simple-features-java)
32 *
33 * Further helper jars needed (because of encountering the exception documented at
34 * stackoverflow.com/questions/36278293/java-lang-classnotfoundexception-com-fasterxml-jackson-core-jsonprocessingexcep/36279872)
35 * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core/2.10.0
36 * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind
37 * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-annotations/2.10.0
38 */
39
40/**
41 * This class needs the gson library, and now the sf(-2.02).jar and sf-geojson(-2.02).jar files too
42 * to create and store Simple Features geo json objects with Java.
43 * I copied the gson jar file from GS3.
44 *
45 * TO COMPILE:
46 * maori-lang-detection/src$
47 * javac -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData.java
48 *
49 * TO RUN:
50 * maori-lang-detection/src$
51 * java -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData ../mongodb-data/countrycodes.json ../mongodb-data/counts.json
52 *
53 */
54public class CountryCodeCountsMapData {
55 static Logger logger = Logger.getLogger(org.greenstone.atea.CountryCodeCountsMapData.class.getName());
56
57 //Map<String, JsonObject> countryToJsonMap;
58 JsonArray countryCodesJsonArray;
59 JsonArray countryCountsJsonArray;
60
61 // North-central Antarctica coords
62 private final double ANTARCTICA_LNG = 57.0d;
63 private final double ANTARCTICA_LAT = -70.0d;
64 // For EU coords, spot in Atlantic Ocean close to western European coast.
65 private final double EU_LNG = -20.0d;
66 private final double EU_LAT = 50.0d;
67
68 public CountryCodeCountsMapData(String countryCoordsJSONFile, String countryCountsJSONFile) throws Exception {
69
70 // map of country codes to lat, lng json for that country code
71 Map<String, JsonObject> countryToJsonMap = new HashMap<String, JsonObject>();
72
73 // Parse json file of country codes and put into a JsonArray.
74 // then put into map of each country code to its JsonObject.
75 countryCodesJsonArray = parseJSONFile(countryCoordsJSONFile);
76 for(JsonElement obj : countryCodesJsonArray) {
77 JsonObject countryCodeJson = obj.getAsJsonObject();
78 countryToJsonMap.put(countryCodeJson.get("country").getAsString(), countryCodeJson);
79 }
80
81 // Parse json file of country code counts
82 // Then for each JsonObject in this file,
83 // find a match on its country code in the map created above to get a country code JsonObject
84 // Get the longitude and latitude of the JsonObject that matched that country code.
85 // Add this lng,lat location information to the current JsonObject from the counts file.
86 countryCountsJsonArray = parseJSONFile(countryCountsJSONFile);
87
88 for(JsonElement obj : countryCountsJsonArray) {
89 JsonObject json = obj.getAsJsonObject();
90 String countryCode = json.get("_id").getAsString().toUpperCase();
91 // set the property back as uppercase and with property name "countrycode" instead of "_id"
92 json.remove("_id");
93 json.addProperty("countrycode", countryCode);
94
95 int count = (int)json.get("count").getAsDouble();
96
97 //logger.info("Got country code: " + countryCode);
98 //logger.info(" count: " + count);
99
100 // locate in countryCode map
101 JsonObject countryCodeJson = countryToJsonMap.get(countryCode);
102
103 if(countryCodeJson != null) {
104 //logger.info("Found in map: " + countryCodeJson.toString());
105
106 // for geojson, want longitude then latitude
107 Double lng = countryCodeJson.get("longitude").getAsDouble();
108 Double lat = countryCodeJson.get("latitude").getAsDouble();
109 //logger.info("long: " + Double.toString(lng) + ", lat: " + Double.toString(lat));
110 String countryName = countryCodeJson.get("name").getAsString();
111
112 // let's add lat and lng fields to countryCounts object
113 json.addProperty("lng", lng); // adds Number: https://javadoc.io/static/com.google.code.gson/gson/2.8.5/com/google/gson/JsonObject.html
114 json.addProperty("lat", lat);
115 json.addProperty("region", countryName);
116
117 } else {
118 logger.info("No geolocation info found for country code " + countryCode);
119 if(countryCode.equals("EU")) {
120 //logger.info("Unlisted country code: EU");
121 // add lat and lng for Europe
122 json.addProperty("lng", EU_LNG);
123 json.addProperty("lat", EU_LAT);
124 json.addProperty("region", "Europe");
125 }
126 else if(countryCode.equals("UNKNOWN")) {
127 //logger.info("Unlisted country code: UNKNOWN");
128 // add lat and lng for Antarctica
129 json.addProperty("lng", ANTARCTICA_LNG);
130 json.addProperty("lat", ANTARCTICA_LAT);
131 json.addProperty("region", "UNKNOWN");
132 } else {
133 logger.error("ERROR: entirely unknown country code: " + countryCode);
134 }
135 }
136 }
137
138 }
139
140 /** Convert mongodb tabular output of json records stored in the given file
141 * into a JsonArray.
142 */
143 public JsonArray parseJSONFile(String filename) throws Exception {
144 JsonArray jsonArray = null;
145 // read into string
146 try (
147 BufferedReader reader = new BufferedReader(new FileReader(filename));
148 ) {
149 StringBuilder str = //new StringBuilder();
150 new StringBuilder("[");
151 String line;
152 while((line = reader.readLine()) != null) {
153 line = line.replaceAll("/\\* [^\\/]* \\*/", "");
154 str.append(line);
155 if(line.endsWith("}")) {
156 str.append(",\n");
157 }
158 }
159 // replace last comma with closing bracket
160 String fileContents = str.substring(0, str.length()-2) + "]";
161
162 //System.err.println("Got file:\n" + fileContents);
163
164 // https://stackoverflow.com/questions/2591098/how-to-parse-json-in-java
165 jsonArray = new JsonParser().parse(fileContents).getAsJsonArray();
166
167 } catch(Exception e) {
168 throw e;
169 }
170
171
172 return jsonArray;
173 }
174
175 /**
176 * Reading
177 * https://www.here.xyz/api/concepts/geojsonbasics/
178 * https://ngageoint.github.io/simple-features-geojson-java/docs/api/
179 *
180 * https://stackoverflow.com/questions/55621480/cant-access-coordinates-member-of-geojson-feature-collection
181 *
182 * Downloaded geojson simple features' jar file from maven, but it didn't work:
183 * a more private version of MultiPoint.java is not included in the jar file (there's only
184 * mil.nga.sf.geojson.MultiPoint , whereas
185 * mil.nga.sf.MultiPoint is missing
186 *
187 * This seems to have gone wrong at
188 * https://github.com/ngageoint/simple-features-geojson-java/tree/master/src/main/java/mil/nga/sf
189 * but the one at
190 * https://github.com/ngageoint/simple-features-java/tree/master/src/main/java/mil/nga/sf
191 * has it. So I've been trying to build that, but don't have the correct version of maven.
192 */
193 public Geometry toMultiPointGeoJson() {
194 //System.err.println("toGeoJSON() is not yet implemented.");
195
196 List<Position> points = new LinkedList<Position>();
197
198 for(JsonElement obj : this.countryCountsJsonArray) {
199 JsonObject json = obj.getAsJsonObject();
200 Double lng = json.get("lng").getAsDouble();
201 Double lat = json.get("lat").getAsDouble();
202
203 Position point = new Position(lng, lat);
204 points.add(point);
205 }
206
207 Geometry multiPoint = new MultiPoint(points);
208
209 return multiPoint;
210 }
211
212 // https://javadoc.io/static/com.google.code.gson/gson/2.8.5/index.html
213 public FeatureCollection toFeatureCollection() {
214 final int HISTOGRAM_WIDTH = 4;
215
216 FeatureCollection featureCollection = new FeatureCollection();
217
218 for(JsonElement obj : this.countryCountsJsonArray) {
219 JsonObject json = obj.getAsJsonObject();
220
221 String countryCode = json.get("countrycode").getAsString();
222 String region = json.get("region").getAsString();
223 int count = json.get("count").getAsInt();
224
225 // make a histogram for each country
226 Geometry rectangle = this.toPolygon(json, count, HISTOGRAM_WIDTH);
227
228 Feature countryFeature = new Feature(rectangle);
229 Map<String, Object> featureProperties = new HashMap<String, Object>();
230 featureProperties.put("count", new Integer(count));
231 featureProperties.put("code", countryCode);
232 featureProperties.put("region", region);
233 countryFeature.setProperties(featureProperties);
234
235 featureCollection.addFeature(countryFeature);
236 }
237
238 return featureCollection;
239 }
240
241
242 // create rectangular "histogram" for each country code
243 private Geometry toPolygon(JsonObject json, int count, int HISTOGRAM_WIDTH) {
244 int half_width = HISTOGRAM_WIDTH/2;
245 double vertical_factor = 1.0;
246
247 Double lng = json.get("lng").getAsDouble();
248 Double lat = json.get("lat").getAsDouble();
249
250 String countryCode = json.get("countrycode").getAsString();
251
252
253 //create the 4 corners of the rectangle
254 // West is negative, east is positive, south is negative, north is positive
255 // See http://www.learnz.org.nz/sites/learnz.org.nz/files/lat-long-geo-data-01_0.jpg
256 // But since the histograms grow vertically/northwards and we can't go past a latitude of 90,
257 // to compensate, we increase the width of the histograms by the same factor as our inability
258 // to grow northwards.
259 Double north = lat + (vertical_factor * count);
260 while (north > 90) {
261 // recalculate north after decreasing histogram's vertical growth
262 // by the same factor as we increase its width
263 vertical_factor = vertical_factor/2.0;
264 half_width = 2 * half_width;
265 north = lat + (vertical_factor * count);
266 }
267 Double east = lng + half_width;
268 Double west = lng - half_width;
269 Double south = lat;
270
271 List<List<Position>> outerList = new LinkedList<List<Position>>();
272 List<Position> points = new LinkedList<Position>();
273 outerList.add(points);
274
275
276 points.add(new Position(west, south)); // Position(lng, lat) not Position(lat, lng)
277 points.add(new Position(west, north));
278 points.add(new Position(east, north));
279 points.add(new Position(east, south));
280
281
282 Geometry rectangle = new Polygon(outerList);
283
284 // Coords: a List of List of Positions, see https://ngageoint.github.io/simple-features-geojson-java/docs/api/
285 // https://www.here.xyz/api/concepts/geojsonbasics/#polygon
286
287 return rectangle;
288 }
289
290 public String writeMultiPointGeoJsonToFile(File folder) {
291 final String filename = "multipoint.json";
292 File outFile = new File(folder, filename);
293
294 Geometry geometry = this.toMultiPointGeoJson();
295 String multiPointGeojsonString = FeatureConverter.toStringValue(geometry);
296 System.err.println("\nMap data as MultiPoint geometry:\n" + multiPointGeojsonString + "\n");
297 try (
298 Writer writer = new BufferedWriter(new FileWriter(outFile));
299 ) {
300
301 // Some basic re-formatting for some immediate legibility
302 // But pasting the contents of the file (or the System.err output above)
303 // directly into http://geojson.tools/ will instantly reformat the json perfectly anyway.
304 multiPointGeojsonString = multiPointGeojsonString.replace("[[", "\n[\n\t[");
305 multiPointGeojsonString = multiPointGeojsonString.replace("],[", "],\n\t[");
306 multiPointGeojsonString = multiPointGeojsonString.replace("]]", "]\n]");
307
308 writer.write(multiPointGeojsonString + "\n");
309 } catch(Exception e) {
310 logger.error("Unable to write multipoint geojson:\n**********************");
311 logger.error(multiPointGeojsonString);
312 logger.error("**********************\ninto file " + outFile.getAbsolutePath());
313 logger.error(e.getMessage(), e);
314 }
315
316 return outFile.getAbsolutePath();
317
318 }
319
320 public String writeFeaturesGeoJsonToFile(File folder) {
321 final String filename = "geojson-features.json";
322 File outFile = new File(folder, filename);
323
324 FeatureCollection featureColl = this.toFeatureCollection();
325 String featuresGeojsonString = FeatureConverter.toStringValue(featureColl);
326 System.err.println("\nMap data as featurecollection:\n" + featuresGeojsonString + "\n");
327 try (
328 Writer writer = new BufferedWriter(new FileWriter(outFile));
329 ) {
330
331
332 //multiPointGeojsonString = multiPointGeojsonString.replace("[[", "\n[\n\t[");
333 //multiPointGeojsonString = multiPointGeojsonString.replace("],[", "],\n\t[");
334 //multiPointGeojsonString = multiPointGeojsonString.replace("]]", "]\n]");
335
336 writer.write(featuresGeojsonString + "\n");
337 } catch(Exception e) {
338 logger.error("Unable to write multipoint geojson:\n**********************");
339 logger.error(featuresGeojsonString);
340 logger.error("**********************\ninto file " + outFile.getAbsolutePath());
341 logger.error(e.getMessage(), e);
342 }
343
344 return outFile.getAbsolutePath();
345
346 }
347
348
349 // Unfinished and unused
350 public void parseCSVFile(String filename) throws Exception {
351 File csvData = new File(filename);
352 CSVParser parser = CSVParser.parse(csvData, java.nio.charset.Charset.forName("US-ASCII"), CSVFormat.RFC4180);
353 for (CSVRecord csvRecord : parser) {
354 logger.info("Got record: " + csvRecord.toString());
355 }
356 }
357
358 public static void printUsage() {
359 System.err.println("CountryCodeCountsMapData countrycodes.json counts.json");
360 }
361
362 public static void main(String args[]) {
363 if(args.length != 2) {
364 printUsage();
365 System.exit(-1);
366 }
367
368 try {
369 File countsFile = new File(args[1]);
370 File parentFolder = countsFile.getParentFile().getCanonicalFile(); // canonical resolves any .. and . in path
371
372 CountryCodeCountsMapData mapData = new CountryCodeCountsMapData(args[0], args[1]);
373
374 //Geometry geometry = mapData.toMultiPointGeoJSON();
375 //String multiPointGeojsonString = FeatureConverter.toStringValue(geometry);
376 //System.err.println("geometry: " + multiPointGeojsonString);
377
378 String multipointOutFileName = mapData.writeMultiPointGeoJsonToFile(parentFolder);
379 String featuresOutFileName = mapData.writeFeaturesGeoJsonToFile(parentFolder);
380 System.err.println("***********\nWrote mapdata to files " + multipointOutFileName
381 + " and " + featuresOutFileName);
382
383 } catch(Exception e) {
384 logger.error(e.getMessage(), e);
385 }
386 }
387}
Note: See TracBrowser for help on using the repository browser.