source: other-projects/maori-lang-detection/src/org/greenstone/atea/CountryCodeCountsMapData.java@ 33858

Last change on this file since 33858 was 33858, checked in by ak19, 4 years ago

Fixes to the code committed yesterday: correct calculation of the rectangle for a country with high count value.

File size: 20.3 KB
Line 
1package org.greenstone.atea;
2
3import java.io.BufferedReader;
4import java.io.BufferedWriter;
5import java.io.File;
6import java.io.FileReader;
7import java.io.FileWriter;
8import java.io.Writer;
9
10import java.util.HashMap;
11import java.util.LinkedList;
12import java.util.List;
13import java.util.Map;
14
15//import java.lang.Math; //automatically imported apparently
16
17import org.apache.commons.csv.*;
18import org.apache.log4j.Logger;
19
20// Google's gson imports for parsing any kind of json
21import com.google.gson.JsonArray;
22import com.google.gson.JsonElement;
23import com.google.gson.JsonObject;
24import com.google.gson.JsonParser;
25
26// For working with GeoJSON's Simple Features in Java
27import mil.nga.sf.geojson.Feature;
28import mil.nga.sf.geojson.FeatureCollection;
29import mil.nga.sf.geojson.FeatureConverter;
30import mil.nga.sf.geojson.Geometry;
31import mil.nga.sf.geojson.MultiPoint;
32import mil.nga.sf.geojson.Polygon;
33import mil.nga.sf.geojson.Position;
34
35
36/**
37 * Run a mongodb query that produces counts per countrycode like in the following 2 examples:
38 *
39 * 1. count of country codes for all sites
40 * db.Websites.aggregate([
41 *
42 * { $unwind: "$geoLocationCountryCode" },
43 * {
44 * $group: {
45 * _id: "$geoLocationCountryCode",
46 * count: { $sum: 1 }
47 * }
48 * },
49 * { $sort : { count : -1} }
50 * ]);
51 *
52 * Then store the mongodb query result's JSON format output in a file called "counts.json".
53 * Then run this program with counts.json as parameter
54 * Copy the geojson output into http://geojson.tools/
55 *
56 * 2. count of country codes for sites that have at least one page detected as MRI
57 *
58 * db.Websites.aggregate([
59 * {
60 * $match: {
61 * numPagesInMRI: {$gt: 0}
62 * }
63 * },
64 * { $unwind: "$geoLocationCountryCode" },
65 * {
66 * $group: {
67 * _id: {$toLower: '$geoLocationCountryCode'},
68 * count: { $sum: 1 }
69 * }
70 * },
71 * { $sort : { count : -1} }
72 * ]);
73 *
74 * Store the mongodb query result's JSON format output in a file called "counts_sitesWithPagesInMRI.json".
75 * Then run this program with counts_sitesWithPagesInMRI.json as parameter.
76 * Copy the geojson output into http://geojson.tools/
77 *
78 * ##################
79 * TO COMPILE:
80 * maori-lang-detection/src$
81 * javac -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData.java
82 *
83 * TO RUN:
84 * maori-lang-detection/src$
85 * java -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData ../mongodb-data/counts.json
86 *###################
87 *
88 * This class needs the gson library, and now the sf-geojson(-2.02).jar and
89 * helper jars sf(-2.02).jar and 3 jackson jars too,
90 * to create and store Simple Features geo json objects with Java.
91 * I copied the gson jar file from GS3.
92 *
93 * Simple Features GeoJSON Java
94 * https://ngageoint.github.io/simple-features-geojson-java/ - liks to API and more
95 *
96 * https://mvnrepository.com/artifact/mil.nga.sf/sf-geojson (https://github.com/ngageoint/simple-features-geojson-java/)
97 *
98 * Also need the basic data types used by the Geometry objects above:
99 * https://mvnrepository.com/artifact/mil.nga/sf (https://github.com/ngageoint/simple-features-java)
100 *
101 * Further helper jars needed (because of encountering the exception documented at
102 * stackoverflow.com/questions/36278293/java-lang-classnotfoundexception-com-fasterxml-jackson-core-jsonprocessingexcep/36279872)
103 * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core/2.10.0
104 * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind
105 * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-annotations/2.10.0
106 */
107public class CountryCodeCountsMapData {
108 static Logger logger = Logger.getLogger(org.greenstone.atea.CountryCodeCountsMapData.class.getName());
109
110 //Map<String, JsonObject> countryToJsonMap;
111 JsonArray countryCodesJsonArray;
112 JsonArray countryCountsJsonArray;
113
114 // North-central Antarctica coords
115 private final double ANTARCTICA_LNG = 57.0d;
116 private final double ANTARCTICA_LAT = -70.0d;
117 // For EU coords, spot in Atlantic Ocean close to western European coast.
118 private final double EU_LNG = -20.0d;
119 private final double EU_LAT = 50.0d;
120
121 private final String geoJsonFilenameWithSuffix;
122 private final File outputFolder;
123
124 public CountryCodeCountsMapData(String countryCountsJSONFilename) throws Exception {
125
126 // work out the unique filename we're going to save the geojson files under
127 // and the folder we're going to save them into
128 File countryCountsJSONFile = new File(countryCountsJSONFilename);
129 String tailname = countryCountsJSONFile.getName();
130 this.geoJsonFilenameWithSuffix = (tailname.startsWith("counts_")) ? tailname.substring("counts_".length()) : tailname;
131 this.outputFolder = countryCountsJSONFile.getParentFile().getCanonicalFile(); // canonical resolves any .. and . in path
132
133 // locate the countrycodes.json file
134 File countryCoordsJSONFile = new File(this.getClass().getClassLoader().getResource("countrycodes.json").getFile());
135
136 // Create a map of ALL country code names to ALL the country code json objects
137 // that contain the location (lat, lng) info for each country code
138 Map<String, JsonObject> countryToJsonMap = new HashMap<String, JsonObject>();
139
140 // Parse json file of country codes and put into a JsonArray.
141 // then put into map of each country code to its JsonObject.
142 countryCodesJsonArray = parseJSONFile(countryCoordsJSONFile);
143 for(JsonElement obj : countryCodesJsonArray) {
144 JsonObject countryCodeJson = obj.getAsJsonObject();
145 countryToJsonMap.put(countryCodeJson.get("country").getAsString(), countryCodeJson);
146 }
147
148 // Parse json file of country code counts
149 // Then for each JsonObject in this file,
150 // find a match on its country code in the map created above to get a country code JsonObject
151 // Get the longitude and latitude of the JsonObject that matched that country code.
152 // Add this lng,lat location information to the current JsonObject from the counts file.
153 countryCountsJsonArray = parseJSONFile(countryCountsJSONFile);
154
155 for(JsonElement obj : countryCountsJsonArray) {
156 JsonObject json = obj.getAsJsonObject();
157 String countryCode = json.get("_id").getAsString().toUpperCase();
158 // set the property back as uppercase and with property name "countrycode" instead of "_id"
159 json.remove("_id");
160 json.addProperty("countrycode", countryCode);
161
162 int count = (int)json.get("count").getAsDouble();
163
164 //logger.info("Got country code: " + countryCode);
165 //logger.info(" count: " + count);
166
167 // locate in countryCode map
168 JsonObject countryCodeJson = countryToJsonMap.get(countryCode);
169
170 if(countryCodeJson != null) {
171 //logger.info("Found in map: " + countryCodeJson.toString());
172
173 // for geojson, want longitude then latitude
174 Double lng = countryCodeJson.get("longitude").getAsDouble();
175 Double lat = countryCodeJson.get("latitude").getAsDouble();
176 //logger.info("long: " + Double.toString(lng) + ", lat: " + Double.toString(lat));
177 String countryName = countryCodeJson.get("name").getAsString();
178
179 // let's add lat and lng fields to countryCounts object
180 json.addProperty("lng", lng); // adds Number: https://javadoc.io/static/com.google.code.gson/gson/2.8.5/com/google/gson/JsonObject.html
181 json.addProperty("lat", lat);
182 json.addProperty("region", countryName);
183
184 } else {
185 logger.info("No geolocation info found for country code " + countryCode);
186 if(countryCode.equals("EU")) {
187 //logger.info("Unlisted country code: EU");
188 // add lat and lng for Europe
189 json.addProperty("lng", EU_LNG);
190 json.addProperty("lat", EU_LAT);
191 json.addProperty("region", "Europe");
192 }
193 else if(countryCode.equals("UNKNOWN")) {
194 //logger.info("Unlisted country code: UNKNOWN");
195 // add lat and lng for Antarctica
196 json.addProperty("lng", ANTARCTICA_LNG);
197 json.addProperty("lat", ANTARCTICA_LAT);
198 json.addProperty("region", "UNKNOWN");
199 } else {
200 logger.error("ERROR: entirely unknown country code: " + countryCode);
201 }
202 }
203 }
204
205 }
206
207 /** Convert mongodb tabular output of json records stored in the given file
208 * into a JsonArray.
209 */
210 public JsonArray parseJSONFile(File file) throws Exception {
211 JsonArray jsonArray = null;
212 // read into string
213 try (
214 BufferedReader reader = new BufferedReader(new FileReader(file));
215 ) {
216 StringBuilder str = //new StringBuilder();
217 new StringBuilder("[");
218 String line;
219
220 boolean multi_line_comment = false;
221
222 while((line = reader.readLine()) != null) {
223 line = line.trim();
224
225 // ignore any single line comments nested in multi-line symbols
226 if(line.startsWith("/*") && line.endsWith("*/")) {
227 continue; // skip line
228 }
229
230 // skip multi-line comments spread over multiple lines
231 // assumes this ends on a line containing */ without further content on the line.
232 if(line.startsWith("/*") && !line.endsWith("*/")) {
233 multi_line_comment = true;
234 continue; // skip line
235 }
236 if(multi_line_comment) {
237 if(line.contains("*/")) {
238 multi_line_comment = false;
239 }
240
241 continue; // we're in a comment or at end of comment, skip line
242 }
243
244 str.append(line);
245 if(line.endsWith("}")) {
246 str.append(",\n");
247 }
248 }
249 // replace last comma with closing bracket
250 String fileContents = str.substring(0, str.length()-2) + "]";
251
252 //System.err.println("Got file:\n" + fileContents);
253
254 // https://stackoverflow.com/questions/2591098/how-to-parse-json-in-java
255 jsonArray = new JsonParser().parse(fileContents).getAsJsonArray();
256
257 } catch(Exception e) {
258 throw e;
259 }
260
261
262 return jsonArray;
263 }
264
265 /**
266 * Reading
267 * https://www.here.xyz/api/concepts/geojsonbasics/
268 * https://ngageoint.github.io/simple-features-geojson-java/docs/api/
269 *
270 * https://stackoverflow.com/questions/55621480/cant-access-coordinates-member-of-geojson-feature-collection
271 *
272 * Downloaded geojson simple features' jar file from maven, but it didn't work:
273 * a more private version of MultiPoint.java is not included in the jar file (there's only
274 * mil.nga.sf.geojson.MultiPoint , whereas
275 * mil.nga.sf.MultiPoint is missing
276 *
277 * This seems to have gone wrong at
278 * https://github.com/ngageoint/simple-features-geojson-java/tree/master/src/main/java/mil/nga/sf
279 * but the one at
280 * https://github.com/ngageoint/simple-features-java/tree/master/src/main/java/mil/nga/sf
281 * has it. So I've been trying to build that, but don't have the correct version of maven.
282 */
283 public Geometry toMultiPointGeoJson() {
284 //System.err.println("toGeoJSON() is not yet implemented.");
285
286 List<Position> points = new LinkedList<Position>();
287
288 for(JsonElement obj : this.countryCountsJsonArray) {
289 JsonObject json = obj.getAsJsonObject();
290 Double lng = json.get("lng").getAsDouble();
291 Double lat = json.get("lat").getAsDouble();
292
293 Position point = new Position(lng, lat);
294 points.add(point);
295 }
296
297 Geometry multiPoint = new MultiPoint(points);
298
299 return multiPoint;
300 }
301
302 // https://javadoc.io/static/com.google.code.gson/gson/2.8.5/index.html
303 public FeatureCollection toFeatureCollection() {
304 final int HISTOGRAM_WIDTH = 4;
305
306 FeatureCollection featureCollection = new FeatureCollection();
307
308 for(JsonElement obj : this.countryCountsJsonArray) {
309 JsonObject json = obj.getAsJsonObject();
310
311 String countryCode = json.get("countrycode").getAsString();
312 String region = json.get("region").getAsString();
313 int count = json.get("count").getAsInt();
314
315 // make a histogram for each country
316 Geometry rectangle = this.toPolygon(json, count, HISTOGRAM_WIDTH);
317
318 Feature countryFeature = new Feature(rectangle);
319 Map<String, Object> featureProperties = new HashMap<String, Object>();
320 featureProperties.put("count", new Integer(count));
321 featureProperties.put("code", countryCode);
322 featureProperties.put("region", region);
323 countryFeature.setProperties(featureProperties);
324
325 featureCollection.addFeature(countryFeature);
326 }
327
328 return featureCollection;
329 }
330
331 // create rectangular "histogram" for each country code
332 private Geometry toPolygon(JsonObject json, final int count, final int HISTOGRAM_WIDTH) {
333 int half_width = HISTOGRAM_WIDTH/2;
334 double vertical_factor = 1.0;
335
336 final Double lng = json.get("lng").getAsDouble();
337 final Double lat = json.get("lat").getAsDouble();
338
339 String countryCode = json.get("countrycode").getAsString();
340
341
342 //create the 4 corners of the rectangle
343 // West is negative, east is positive, south is negative, north is positive
344 // See http://www.learnz.org.nz/sites/learnz.org.nz/files/lat-long-geo-data-01_0.jpg
345 // But since the histograms grow vertically/northwards and we can't go past a latitude of 90,
346 // to compensate, we increase the width of the histograms by the same factor as our inability
347 // to grow northwards.
348 Double north = lat + (vertical_factor * count);
349
350 while (north > 90) {
351 // recalculate north after decreasing histogram's vertical growth
352 // by the same factor as we increase its width
353 vertical_factor = vertical_factor/2.0;
354 half_width = 2 * half_width;
355 north = lat + (vertical_factor * count);
356 }
357 Double east = lng + half_width;
358 Double west = lng - half_width;
359 Double south = lat;
360 /*
361 System.err.println("For country " + countryCode + ":");
362 System.err.println("north = " + north);
363 System.err.println("south = " + south);
364 System.err.println("east = " + east);
365 System.err.println("west = " + west + "\n");
366 */
367 // Check if we're dealing with very large numbers, in which case, we can have follow off the longitude edges
368 // Max longitude values are -180 to 180. So a max of 360 units between them. (Max latitude is -90 to 90)
369 // "Longitude is in the range -180 and +180 specifying coordinates west and east of the Prime Meridian, respectively.
370 // For reference, the Equator has a latitude of 0°, the North pole has a latitude of 90° north (written 90° N or +90°),
371 // and the South pole has a latitude of -90°."
372 if((east + Math.abs(west)) > 360 || east > 180 || west < -180) {
373 half_width = HISTOGRAM_WIDTH/2; // reset half_width
374
375 double v_tmp_count = Math.sqrt(count);
376 //double h_tmp_count = Math.floor(v_tmp_count);
377 //v_tmp_count = Math.ceil(v_tmp_count);
378 double h_tmp_count = v_tmp_count;
379
380 /*
381 System.err.println("Recalculating polygon for country with high count: " + countryCode + ".");
382 System.err.println("count = " + count);
383 System.err.println("v = " + v_tmp_count);
384 System.err.println("h = " + h_tmp_count);
385 System.err.println("lat = " + lat);
386 System.err.println("lng = " + lng + "\n");
387 */
388
389 north = lat + v_tmp_count;
390 south = lat;
391 east = lng + (h_tmp_count * half_width); // a certain width, half_width, represents one unit in the x axis
392 west = lng - (h_tmp_count * half_width);
393
394 /*
395 System.err.println("north = " + north);
396 System.err.println("south = " + south);
397 System.err.println("east = " + east);
398 System.err.println("west = " + west + "\n");
399 */
400
401 if(north > 90) {
402 // centre vertically on lat
403 north = lat + (v_tmp_count/2);
404 south = lat - (v_tmp_count/2);
405 }
406
407 if(west < -180.0) {
408 double h_diff = -180.0 - west; // west is a larger negative value than -180, so subtracting west from -180 produces a positive h_diff value
409 west = -180.0; // set to extreme western edge
410 east = east + h_diff;
411 }
412 else if(east > 180.0) {
413 double h_diff = east - 180.0; // the country's longitude (lng) is h_diff from the eastern edge
414 east = 180.0; // maximise eastern edge
415 west = west - h_diff; // then grow the remainder of h_tmp_count in the opposite (western/negative) direction
416 }
417
418 // NOTE: Can't centre on country, (lat,lng), as we don't know whether either of lat or lng has gone past the edge
419
420 // Hopefully we don't exceed +90/-90 lat and +/-180 longitude
421 /*
422 System.err.println("north = " + north);
423 System.err.println("south = " + south);
424 System.err.println("east = " + east);
425 System.err.println("west = " + west);
426 */
427 }
428
429 List<List<Position>> outerList = new LinkedList<List<Position>>();
430 List<Position> points = new LinkedList<Position>();
431 outerList.add(points);
432
433
434 points.add(new Position(west, south)); // Position(lng, lat) not Position(lat, lng)
435 points.add(new Position(west, north));
436 points.add(new Position(east, north));
437 points.add(new Position(east, south));
438
439
440 Geometry rectangle = new Polygon(outerList);
441
442 // Coords: a List of List of Positions, see https://ngageoint.github.io/simple-features-geojson-java/docs/api/
443 // https://www.here.xyz/api/concepts/geojsonbasics/#polygon
444
445 return rectangle;
446 }
447
448 public String writeMultiPointGeoJsonToFile() {
449 final String filename = "multipoint_" + this.geoJsonFilenameWithSuffix;
450 File outFile = new File(this.outputFolder, filename);
451
452 Geometry geometry = this.toMultiPointGeoJson();
453 String multiPointGeojsonString = FeatureConverter.toStringValue(geometry);
454 System.err.println("\nMap data as MultiPoint geometry:\n" + multiPointGeojsonString + "\n");
455 try (
456 Writer writer = new BufferedWriter(new FileWriter(outFile));
457 ) {
458
459 // Some basic re-formatting for some immediate legibility
460 // But pasting the contents of the file (or the System.err output above)
461 // directly into http://geojson.tools/ will instantly reformat the json perfectly anyway.
462 multiPointGeojsonString = multiPointGeojsonString.replace("[[", "\n[\n\t[");
463 multiPointGeojsonString = multiPointGeojsonString.replace("],[", "],\n\t[");
464 multiPointGeojsonString = multiPointGeojsonString.replace("]]", "]\n]");
465
466 writer.write(multiPointGeojsonString + "\n");
467 } catch(Exception e) {
468 logger.error("Unable to write multipoint geojson:\n**********************");
469 logger.error(multiPointGeojsonString);
470 logger.error("**********************\ninto file " + outFile.getAbsolutePath());
471 logger.error(e.getMessage(), e);
472 }
473
474 return outFile.getAbsolutePath();
475
476 }
477
478 public String writeFeaturesGeoJsonToFile() {
479 final String filename = "geojson-features_" + this.geoJsonFilenameWithSuffix;
480 File outFile = new File(this.outputFolder, filename);
481
482 FeatureCollection featureColl = this.toFeatureCollection();
483 String featuresGeojsonString = FeatureConverter.toStringValue(featureColl);
484 System.err.println("\nMap data as featurecollection:\n" + featuresGeojsonString + "\n");
485 try (
486 Writer writer = new BufferedWriter(new FileWriter(outFile));
487 ) {
488
489 writer.write(featuresGeojsonString + "\n");
490 } catch(Exception e) {
491 logger.error("Unable to write multipoint geojson:\n**********************");
492 logger.error(featuresGeojsonString);
493 logger.error("**********************\ninto file " + outFile.getAbsolutePath());
494 logger.error(e.getMessage(), e);
495 }
496
497 return outFile.getAbsolutePath();
498
499 }
500
501
502 public int getTotalCount() {
503 int total = 0;
504 for(JsonElement obj : this.countryCountsJsonArray) {
505 JsonObject json = obj.getAsJsonObject();
506 int count = json.get("count").getAsInt();
507 total += count;
508 }
509 return total;
510 }
511
512
513 // Unfinished and unused
514 public void parseCSVFile(String filename) throws Exception {
515 File csvData = new File(filename);
516 CSVParser parser = CSVParser.parse(csvData, java.nio.charset.Charset.forName("US-ASCII"), CSVFormat.RFC4180);
517 for (CSVRecord csvRecord : parser) {
518 logger.info("Got record: " + csvRecord.toString());
519 }
520 }
521
522 public static void printUsage() {
523 System.err.println("CountryCodeCountsMapData <counts-by-countrycode-file>.json");
524 }
525
526 public static void main(String args[]) {
527 if(args.length != 1) {
528 printUsage();
529 System.exit(-1);
530 }
531
532 try {
533 File countsFile = new File(args[0]);
534
535 CountryCodeCountsMapData mapData = new CountryCodeCountsMapData(args[0]);
536
537 String multipointOutFileName = mapData.writeMultiPointGeoJsonToFile();
538 String featuresOutFileName = mapData.writeFeaturesGeoJsonToFile();
539
540 System.err.println("***********\nWrote mapdata to files " + multipointOutFileName
541 + " and " + featuresOutFileName);
542 System.err.println("You can paste the geojson contents of either of these files into the "
543 + "editor at http://geojson.tools/ to see the data arranged on a world map");
544
545 System.err.println("Total count for query: " + mapData.getTotalCount());
546
547 } catch(Exception e) {
548 logger.error(e.getMessage(), e);
549 }
550 }
551}
Note: See TracBrowser for help on using the repository browser.