source: other-projects/maori-lang-detection/src/org/greenstone/atea/CountryCodeCountsMapData.java@ 33812

Last change on this file since 33812 was 33812, checked in by ak19, 4 years ago

Better handling of multi-line comment symbols, so I can now include proper multi-line spanning comments in my .json files

File size: 17.4 KB
Line 
1package org.greenstone.atea;
2
3import java.io.BufferedReader;
4import java.io.BufferedWriter;
5import java.io.File;
6import java.io.FileReader;
7import java.io.FileWriter;
8import java.io.Writer;
9
10import java.util.HashMap;
11import java.util.LinkedList;
12import java.util.List;
13import java.util.Map;
14
15import org.apache.commons.csv.*;
16import org.apache.log4j.Logger;
17
18// Google's gson imports for parsing any kind of json
19import com.google.gson.JsonArray;
20import com.google.gson.JsonElement;
21import com.google.gson.JsonObject;
22import com.google.gson.JsonParser;
23
24// For working with GeoJSON's Simple Features in Java
25import mil.nga.sf.geojson.Feature;
26import mil.nga.sf.geojson.FeatureCollection;
27import mil.nga.sf.geojson.FeatureConverter;
28import mil.nga.sf.geojson.Geometry;
29import mil.nga.sf.geojson.MultiPoint;
30import mil.nga.sf.geojson.Polygon;
31import mil.nga.sf.geojson.Position;
32
33
34/**
35 * Run a mongodb query that produces counts per countrycode like in the following 2 examples:
36 *
37 * 1. count of country codes for all sites
38 * db.Websites.aggregate([
39 *
40 * { $unwind: "$geoLocationCountryCode" },
41 * {
42 * $group: {
43 * _id: "$geoLocationCountryCode",
44 * count: { $sum: 1 }
45 * }
46 * },
47 * { $sort : { count : -1} }
48 * ]);
49 *
50 * Then store the mongodb query result's JSON format output in a file called "counts.json".
51 * Then run this program with counts.json as parameter
52 * Copy the geojson output into http://geojson.tools/
53 *
54 * 2. count of country codes for sites that have at least one page detected as MRI
55 *
56 * db.Websites.aggregate([
57 * {
58 * $match: {
59 * numPagesInMRI: {$gt: 0}
60 * }
61 * },
62 * { $unwind: "$geoLocationCountryCode" },
63 * {
64 * $group: {
65 * _id: {$toLower: '$geoLocationCountryCode'},
66 * count: { $sum: 1 }
67 * }
68 * },
69 * { $sort : { count : -1} }
70 * ]);
71 *
72 * Store the mongodb query result's JSON format output in a file called "counts_sitesWithPagesInMRI.json".
73 * Then run this program with counts_sitesWithPagesInMRI.json as parameter.
74 * Copy the geojson output into http://geojson.tools/
75 *
76 * ##################
77 * TO COMPILE:
78 * maori-lang-detection/src$
79 * javac -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData.java
80 *
81 * TO RUN:
82 * maori-lang-detection/src$
83 * java -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData ../mongodb-data/counts.json
84 *###################
85 *
86 * This class needs the gson library, and now the sf-geojson(-2.02).jar and
87 * helper jars sf(-2.02).jar and 3 jackson jars too,
88 * to create and store Simple Features geo json objects with Java.
89 * I copied the gson jar file from GS3.
90 *
91 * Simple Features GeoJSON Java
92 * https://ngageoint.github.io/simple-features-geojson-java/ - liks to API and more
93 *
94 * https://mvnrepository.com/artifact/mil.nga.sf/sf-geojson (https://github.com/ngageoint/simple-features-geojson-java/)
95 *
96 * Also need the basic data types used by the Geometry objects above:
97 * https://mvnrepository.com/artifact/mil.nga/sf (https://github.com/ngageoint/simple-features-java)
98 *
99 * Further helper jars needed (because of encountering the exception documented at
100 * stackoverflow.com/questions/36278293/java-lang-classnotfoundexception-com-fasterxml-jackson-core-jsonprocessingexcep/36279872)
101 * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core/2.10.0
102 * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind
103 * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-annotations/2.10.0
104 */
105public class CountryCodeCountsMapData {
106 static Logger logger = Logger.getLogger(org.greenstone.atea.CountryCodeCountsMapData.class.getName());
107
108 //Map<String, JsonObject> countryToJsonMap;
109 JsonArray countryCodesJsonArray;
110 JsonArray countryCountsJsonArray;
111
112 // North-central Antarctica coords
113 private final double ANTARCTICA_LNG = 57.0d;
114 private final double ANTARCTICA_LAT = -70.0d;
115 // For EU coords, spot in Atlantic Ocean close to western European coast.
116 private final double EU_LNG = -20.0d;
117 private final double EU_LAT = 50.0d;
118
119 private final String geoJsonFilenameWithSuffix;
120 private final File outputFolder;
121
122 public CountryCodeCountsMapData(String countryCountsJSONFilename) throws Exception {
123
124 // work out the unique filename we're going to save the geojson files under
125 // and the folder we're going to save them into
126 File countryCountsJSONFile = new File(countryCountsJSONFilename);
127 String tailname = countryCountsJSONFile.getName();
128 this.geoJsonFilenameWithSuffix = (tailname.startsWith("counts_")) ? tailname.substring("counts_".length()) : tailname;
129 this.outputFolder = countryCountsJSONFile.getParentFile().getCanonicalFile(); // canonical resolves any .. and . in path
130
131 // locate the countrycodes.json file
132 File countryCoordsJSONFile = new File(this.getClass().getClassLoader().getResource("countrycodes.json").getFile());
133
134 // Create a map of ALL country code names to ALL the country code json objects
135 // that contain the location (lat, lng) info for each country code
136 Map<String, JsonObject> countryToJsonMap = new HashMap<String, JsonObject>();
137
138 // Parse json file of country codes and put into a JsonArray.
139 // then put into map of each country code to its JsonObject.
140 countryCodesJsonArray = parseJSONFile(countryCoordsJSONFile);
141 for(JsonElement obj : countryCodesJsonArray) {
142 JsonObject countryCodeJson = obj.getAsJsonObject();
143 countryToJsonMap.put(countryCodeJson.get("country").getAsString(), countryCodeJson);
144 }
145
146 // Parse json file of country code counts
147 // Then for each JsonObject in this file,
148 // find a match on its country code in the map created above to get a country code JsonObject
149 // Get the longitude and latitude of the JsonObject that matched that country code.
150 // Add this lng,lat location information to the current JsonObject from the counts file.
151 countryCountsJsonArray = parseJSONFile(countryCountsJSONFile);
152
153 for(JsonElement obj : countryCountsJsonArray) {
154 JsonObject json = obj.getAsJsonObject();
155 String countryCode = json.get("_id").getAsString().toUpperCase();
156 // set the property back as uppercase and with property name "countrycode" instead of "_id"
157 json.remove("_id");
158 json.addProperty("countrycode", countryCode);
159
160 int count = (int)json.get("count").getAsDouble();
161
162 //logger.info("Got country code: " + countryCode);
163 //logger.info(" count: " + count);
164
165 // locate in countryCode map
166 JsonObject countryCodeJson = countryToJsonMap.get(countryCode);
167
168 if(countryCodeJson != null) {
169 //logger.info("Found in map: " + countryCodeJson.toString());
170
171 // for geojson, want longitude then latitude
172 Double lng = countryCodeJson.get("longitude").getAsDouble();
173 Double lat = countryCodeJson.get("latitude").getAsDouble();
174 //logger.info("long: " + Double.toString(lng) + ", lat: " + Double.toString(lat));
175 String countryName = countryCodeJson.get("name").getAsString();
176
177 // let's add lat and lng fields to countryCounts object
178 json.addProperty("lng", lng); // adds Number: https://javadoc.io/static/com.google.code.gson/gson/2.8.5/com/google/gson/JsonObject.html
179 json.addProperty("lat", lat);
180 json.addProperty("region", countryName);
181
182 } else {
183 logger.info("No geolocation info found for country code " + countryCode);
184 if(countryCode.equals("EU")) {
185 //logger.info("Unlisted country code: EU");
186 // add lat and lng for Europe
187 json.addProperty("lng", EU_LNG);
188 json.addProperty("lat", EU_LAT);
189 json.addProperty("region", "Europe");
190 }
191 else if(countryCode.equals("UNKNOWN")) {
192 //logger.info("Unlisted country code: UNKNOWN");
193 // add lat and lng for Antarctica
194 json.addProperty("lng", ANTARCTICA_LNG);
195 json.addProperty("lat", ANTARCTICA_LAT);
196 json.addProperty("region", "UNKNOWN");
197 } else {
198 logger.error("ERROR: entirely unknown country code: " + countryCode);
199 }
200 }
201 }
202
203 }
204
205 /** Convert mongodb tabular output of json records stored in the given file
206 * into a JsonArray.
207 */
208 public JsonArray parseJSONFile(File file) throws Exception {
209 JsonArray jsonArray = null;
210 // read into string
211 try (
212 BufferedReader reader = new BufferedReader(new FileReader(file));
213 ) {
214 StringBuilder str = //new StringBuilder();
215 new StringBuilder("[");
216 String line;
217
218 boolean multi_line_comment = false;
219
220 while((line = reader.readLine()) != null) {
221 line = line.trim();
222
223 // ignore any single line comments nested in multi-line symbols
224 if(line.startsWith("/*") && line.endsWith("*/")) {
225 continue; // skip line
226 }
227
228 // skip multi-line comments spread over multiple lines
229 // assumes this ends on a line containing */ without further content on the line.
230 if(line.startsWith("/*") && !line.endsWith("*/")) {
231 multi_line_comment = true;
232 continue; // skip line
233 }
234 if(multi_line_comment) {
235 if(line.contains("*/")) {
236 multi_line_comment = false;
237 }
238
239 continue; // we're in a comment or at end of comment, skip line
240 }
241
242 str.append(line);
243 if(line.endsWith("}")) {
244 str.append(",\n");
245 }
246 }
247 // replace last comma with closing bracket
248 String fileContents = str.substring(0, str.length()-2) + "]";
249
250 //System.err.println("Got file:\n" + fileContents);
251
252 // https://stackoverflow.com/questions/2591098/how-to-parse-json-in-java
253 jsonArray = new JsonParser().parse(fileContents).getAsJsonArray();
254
255 } catch(Exception e) {
256 throw e;
257 }
258
259
260 return jsonArray;
261 }
262
263 /**
264 * Reading
265 * https://www.here.xyz/api/concepts/geojsonbasics/
266 * https://ngageoint.github.io/simple-features-geojson-java/docs/api/
267 *
268 * https://stackoverflow.com/questions/55621480/cant-access-coordinates-member-of-geojson-feature-collection
269 *
270 * Downloaded geojson simple features' jar file from maven, but it didn't work:
271 * a more private version of MultiPoint.java is not included in the jar file (there's only
272 * mil.nga.sf.geojson.MultiPoint , whereas
273 * mil.nga.sf.MultiPoint is missing
274 *
275 * This seems to have gone wrong at
276 * https://github.com/ngageoint/simple-features-geojson-java/tree/master/src/main/java/mil/nga/sf
277 * but the one at
278 * https://github.com/ngageoint/simple-features-java/tree/master/src/main/java/mil/nga/sf
279 * has it. So I've been trying to build that, but don't have the correct version of maven.
280 */
281 public Geometry toMultiPointGeoJson() {
282 //System.err.println("toGeoJSON() is not yet implemented.");
283
284 List<Position> points = new LinkedList<Position>();
285
286 for(JsonElement obj : this.countryCountsJsonArray) {
287 JsonObject json = obj.getAsJsonObject();
288 Double lng = json.get("lng").getAsDouble();
289 Double lat = json.get("lat").getAsDouble();
290
291 Position point = new Position(lng, lat);
292 points.add(point);
293 }
294
295 Geometry multiPoint = new MultiPoint(points);
296
297 return multiPoint;
298 }
299
300 // https://javadoc.io/static/com.google.code.gson/gson/2.8.5/index.html
301 public FeatureCollection toFeatureCollection() {
302 final int HISTOGRAM_WIDTH = 4;
303
304 FeatureCollection featureCollection = new FeatureCollection();
305
306 for(JsonElement obj : this.countryCountsJsonArray) {
307 JsonObject json = obj.getAsJsonObject();
308
309 String countryCode = json.get("countrycode").getAsString();
310 String region = json.get("region").getAsString();
311 int count = json.get("count").getAsInt();
312
313 // make a histogram for each country
314 Geometry rectangle = this.toPolygon(json, count, HISTOGRAM_WIDTH);
315
316 Feature countryFeature = new Feature(rectangle);
317 Map<String, Object> featureProperties = new HashMap<String, Object>();
318 featureProperties.put("count", new Integer(count));
319 featureProperties.put("code", countryCode);
320 featureProperties.put("region", region);
321 countryFeature.setProperties(featureProperties);
322
323 featureCollection.addFeature(countryFeature);
324 }
325
326 return featureCollection;
327 }
328
329 // create rectangular "histogram" for each country code
330 private Geometry toPolygon(JsonObject json, int count, int HISTOGRAM_WIDTH) {
331 int half_width = HISTOGRAM_WIDTH/2;
332 double vertical_factor = 1.0;
333
334 Double lng = json.get("lng").getAsDouble();
335 Double lat = json.get("lat").getAsDouble();
336
337 String countryCode = json.get("countrycode").getAsString();
338
339
340 //create the 4 corners of the rectangle
341 // West is negative, east is positive, south is negative, north is positive
342 // See http://www.learnz.org.nz/sites/learnz.org.nz/files/lat-long-geo-data-01_0.jpg
343 // But since the histograms grow vertically/northwards and we can't go past a latitude of 90,
344 // to compensate, we increase the width of the histograms by the same factor as our inability
345 // to grow northwards.
346 Double north = lat + (vertical_factor * count);
347 while (north > 90) {
348 // recalculate north after decreasing histogram's vertical growth
349 // by the same factor as we increase its width
350 vertical_factor = vertical_factor/2.0;
351 half_width = 2 * half_width;
352 north = lat + (vertical_factor * count);
353 }
354 Double east = lng + half_width;
355 Double west = lng - half_width;
356 Double south = lat;
357
358 List<List<Position>> outerList = new LinkedList<List<Position>>();
359 List<Position> points = new LinkedList<Position>();
360 outerList.add(points);
361
362
363 points.add(new Position(west, south)); // Position(lng, lat) not Position(lat, lng)
364 points.add(new Position(west, north));
365 points.add(new Position(east, north));
366 points.add(new Position(east, south));
367
368
369 Geometry rectangle = new Polygon(outerList);
370
371 // Coords: a List of List of Positions, see https://ngageoint.github.io/simple-features-geojson-java/docs/api/
372 // https://www.here.xyz/api/concepts/geojsonbasics/#polygon
373
374 return rectangle;
375 }
376
377 public String writeMultiPointGeoJsonToFile() {
378 final String filename = "multipoint_" + this.geoJsonFilenameWithSuffix;
379 File outFile = new File(this.outputFolder, filename);
380
381 Geometry geometry = this.toMultiPointGeoJson();
382 String multiPointGeojsonString = FeatureConverter.toStringValue(geometry);
383 System.err.println("\nMap data as MultiPoint geometry:\n" + multiPointGeojsonString + "\n");
384 try (
385 Writer writer = new BufferedWriter(new FileWriter(outFile));
386 ) {
387
388 // Some basic re-formatting for some immediate legibility
389 // But pasting the contents of the file (or the System.err output above)
390 // directly into http://geojson.tools/ will instantly reformat the json perfectly anyway.
391 multiPointGeojsonString = multiPointGeojsonString.replace("[[", "\n[\n\t[");
392 multiPointGeojsonString = multiPointGeojsonString.replace("],[", "],\n\t[");
393 multiPointGeojsonString = multiPointGeojsonString.replace("]]", "]\n]");
394
395 writer.write(multiPointGeojsonString + "\n");
396 } catch(Exception e) {
397 logger.error("Unable to write multipoint geojson:\n**********************");
398 logger.error(multiPointGeojsonString);
399 logger.error("**********************\ninto file " + outFile.getAbsolutePath());
400 logger.error(e.getMessage(), e);
401 }
402
403 return outFile.getAbsolutePath();
404
405 }
406
407 public String writeFeaturesGeoJsonToFile() {
408 final String filename = "geojson-features_" + this.geoJsonFilenameWithSuffix;
409 File outFile = new File(this.outputFolder, filename);
410
411 FeatureCollection featureColl = this.toFeatureCollection();
412 String featuresGeojsonString = FeatureConverter.toStringValue(featureColl);
413 System.err.println("\nMap data as featurecollection:\n" + featuresGeojsonString + "\n");
414 try (
415 Writer writer = new BufferedWriter(new FileWriter(outFile));
416 ) {
417
418 writer.write(featuresGeojsonString + "\n");
419 } catch(Exception e) {
420 logger.error("Unable to write multipoint geojson:\n**********************");
421 logger.error(featuresGeojsonString);
422 logger.error("**********************\ninto file " + outFile.getAbsolutePath());
423 logger.error(e.getMessage(), e);
424 }
425
426 return outFile.getAbsolutePath();
427
428 }
429
430
431 public int getTotalCount() {
432 int total = 0;
433 for(JsonElement obj : this.countryCountsJsonArray) {
434 JsonObject json = obj.getAsJsonObject();
435 int count = json.get("count").getAsInt();
436 total += count;
437 }
438 return total;
439 }
440
441
442 // Unfinished and unused
443 public void parseCSVFile(String filename) throws Exception {
444 File csvData = new File(filename);
445 CSVParser parser = CSVParser.parse(csvData, java.nio.charset.Charset.forName("US-ASCII"), CSVFormat.RFC4180);
446 for (CSVRecord csvRecord : parser) {
447 logger.info("Got record: " + csvRecord.toString());
448 }
449 }
450
451 public static void printUsage() {
452 System.err.println("CountryCodeCountsMapData <counts-by-countrycode-file>.json");
453 }
454
455 public static void main(String args[]) {
456 if(args.length != 1) {
457 printUsage();
458 System.exit(-1);
459 }
460
461 try {
462 File countsFile = new File(args[0]);
463
464 CountryCodeCountsMapData mapData = new CountryCodeCountsMapData(args[0]);
465
466 String multipointOutFileName = mapData.writeMultiPointGeoJsonToFile();
467 String featuresOutFileName = mapData.writeFeaturesGeoJsonToFile();
468
469 System.err.println("***********\nWrote mapdata to files " + multipointOutFileName
470 + " and " + featuresOutFileName);
471 System.err.println("You can paste the geojson contents of either of these files into the "
472 + "editor at http://geojson.tools/ to see the data arranged on a world map");
473
474 System.err.println("Total count for query: " + mapData.getTotalCount());
475
476 } catch(Exception e) {
477 logger.error(e.getMessage(), e);
478 }
479 }
480}
Note: See TracBrowser for help on using the repository browser.