package org.greenstone.atea; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.Writer; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import org.apache.commons.csv.*; import org.apache.log4j.Logger; // Google's gson imports for parsing any kind of json import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.JsonParser; // For working with GeoJSON's Simple Features in Java import mil.nga.sf.geojson.Feature; import mil.nga.sf.geojson.FeatureCollection; import mil.nga.sf.geojson.FeatureConverter; import mil.nga.sf.geojson.Geometry; import mil.nga.sf.geojson.MultiPoint; import mil.nga.sf.geojson.Polygon; import mil.nga.sf.geojson.Position; /** * Run a mongodb query that produces counts per countrycode like in the following 2 examples: * * 1. count of country codes for all sites * db.Websites.aggregate([ * * { $unwind: "$geoLocationCountryCode" }, * { * $group: { * _id: "$geoLocationCountryCode", * count: { $sum: 1 } * } * }, * { $sort : { count : -1} } * ]); * * Then store the mongodb query result's JSON format output in a file called "counts.json". * Then run this program with counts.json as parameter * Copy the geojson output into http://geojson.tools/ * * 2. count of country codes for sites that have at least one page detected as MRI * * db.Websites.aggregate([ * { * $match: { * numPagesInMRI: {$gt: 0} * } * }, * { $unwind: "$geoLocationCountryCode" }, * { * $group: { * _id: {$toLower: '$geoLocationCountryCode'}, * count: { $sum: 1 } * } * }, * { $sort : { count : -1} } * ]); * * Store the mongodb query result's JSON format output in a file called "counts_sitesWithPagesInMRI.json". * Then run this program with counts_sitesWithPagesInMRI.json as parameter. * Copy the geojson output into http://geojson.tools/ * * ################## * TO COMPILE: * maori-lang-detection/src$ * javac -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData.java * * TO RUN: * maori-lang-detection/src$ * java -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData ../mongodb-data/countrycodes.json ../mongodb-data/counts.json *################### * * This class needs the gson library, and now the sf-geojson(-2.02).jar and * helper jars sf(-2.02).jar and 3 jackson jars too, * to create and store Simple Features geo json objects with Java. * I copied the gson jar file from GS3. * * Simple Features GeoJSON Java * https://ngageoint.github.io/simple-features-geojson-java/ - liks to API and more * * https://mvnrepository.com/artifact/mil.nga.sf/sf-geojson (https://github.com/ngageoint/simple-features-geojson-java/) * * Also need the basic data types used by the Geometry objects above: * https://mvnrepository.com/artifact/mil.nga/sf (https://github.com/ngageoint/simple-features-java) * * Further helper jars needed (because of encountering the exception documented at * stackoverflow.com/questions/36278293/java-lang-classnotfoundexception-com-fasterxml-jackson-core-jsonprocessingexcep/36279872) * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core/2.10.0 * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-annotations/2.10.0 */ public class CountryCodeCountsMapData { static Logger logger = Logger.getLogger(org.greenstone.atea.CountryCodeCountsMapData.class.getName()); //Map countryToJsonMap; JsonArray countryCodesJsonArray; JsonArray countryCountsJsonArray; // North-central Antarctica coords private final double ANTARCTICA_LNG = 57.0d; private final double ANTARCTICA_LAT = -70.0d; // For EU coords, spot in Atlantic Ocean close to western European coast. private final double EU_LNG = -20.0d; private final double EU_LAT = 50.0d; private final String geoJsonFilenameWithSuffix; private final File outputFolder; public CountryCodeCountsMapData(String countryCountsJSONFilename) throws Exception { // work out the unique filename we're going to save the geojson files under // and the folder we're going to save them into File countryCountsJSONFile = new File(countryCountsJSONFilename); String tailname = countryCountsJSONFile.getName(); this.geoJsonFilenameWithSuffix = (tailname.startsWith("counts_")) ? tailname.substring("counts_".length()) : tailname; this.outputFolder = countryCountsJSONFile.getParentFile().getCanonicalFile(); // canonical resolves any .. and . in path // locate the countrycodes.json file File countryCoordsJSONFile = new File(this.getClass().getClassLoader().getResource("countrycodes.json").getFile()); // Create a map of ALL country code names to ALL the country code json objects // that contain the location (lat, lng) info for each country code Map countryToJsonMap = new HashMap(); // Parse json file of country codes and put into a JsonArray. // then put into map of each country code to its JsonObject. countryCodesJsonArray = parseJSONFile(countryCoordsJSONFile); for(JsonElement obj : countryCodesJsonArray) { JsonObject countryCodeJson = obj.getAsJsonObject(); countryToJsonMap.put(countryCodeJson.get("country").getAsString(), countryCodeJson); } // Parse json file of country code counts // Then for each JsonObject in this file, // find a match on its country code in the map created above to get a country code JsonObject // Get the longitude and latitude of the JsonObject that matched that country code. // Add this lng,lat location information to the current JsonObject from the counts file. countryCountsJsonArray = parseJSONFile(countryCountsJSONFile); for(JsonElement obj : countryCountsJsonArray) { JsonObject json = obj.getAsJsonObject(); String countryCode = json.get("_id").getAsString().toUpperCase(); // set the property back as uppercase and with property name "countrycode" instead of "_id" json.remove("_id"); json.addProperty("countrycode", countryCode); int count = (int)json.get("count").getAsDouble(); //logger.info("Got country code: " + countryCode); //logger.info(" count: " + count); // locate in countryCode map JsonObject countryCodeJson = countryToJsonMap.get(countryCode); if(countryCodeJson != null) { //logger.info("Found in map: " + countryCodeJson.toString()); // for geojson, want longitude then latitude Double lng = countryCodeJson.get("longitude").getAsDouble(); Double lat = countryCodeJson.get("latitude").getAsDouble(); //logger.info("long: " + Double.toString(lng) + ", lat: " + Double.toString(lat)); String countryName = countryCodeJson.get("name").getAsString(); // let's add lat and lng fields to countryCounts object json.addProperty("lng", lng); // adds Number: https://javadoc.io/static/com.google.code.gson/gson/2.8.5/com/google/gson/JsonObject.html json.addProperty("lat", lat); json.addProperty("region", countryName); } else { logger.info("No geolocation info found for country code " + countryCode); if(countryCode.equals("EU")) { //logger.info("Unlisted country code: EU"); // add lat and lng for Europe json.addProperty("lng", EU_LNG); json.addProperty("lat", EU_LAT); json.addProperty("region", "Europe"); } else if(countryCode.equals("UNKNOWN")) { //logger.info("Unlisted country code: UNKNOWN"); // add lat and lng for Antarctica json.addProperty("lng", ANTARCTICA_LNG); json.addProperty("lat", ANTARCTICA_LAT); json.addProperty("region", "UNKNOWN"); } else { logger.error("ERROR: entirely unknown country code: " + countryCode); } } } } /** Convert mongodb tabular output of json records stored in the given file * into a JsonArray. */ public JsonArray parseJSONFile(File file) throws Exception { JsonArray jsonArray = null; // read into string try ( BufferedReader reader = new BufferedReader(new FileReader(file)); ) { StringBuilder str = //new StringBuilder(); new StringBuilder("["); String line; while((line = reader.readLine()) != null) { line = line.replaceAll("/\\* [^\\/]* \\*/", ""); // get rid of any multiline comments symbols on a single line str.append(line); if(line.endsWith("}")) { str.append(",\n"); } } // replace last comma with closing bracket String fileContents = str.substring(0, str.length()-2) + "]"; //System.err.println("Got file:\n" + fileContents); // https://stackoverflow.com/questions/2591098/how-to-parse-json-in-java jsonArray = new JsonParser().parse(fileContents).getAsJsonArray(); } catch(Exception e) { throw e; } return jsonArray; } /** * Reading * https://www.here.xyz/api/concepts/geojsonbasics/ * https://ngageoint.github.io/simple-features-geojson-java/docs/api/ * * https://stackoverflow.com/questions/55621480/cant-access-coordinates-member-of-geojson-feature-collection * * Downloaded geojson simple features' jar file from maven, but it didn't work: * a more private version of MultiPoint.java is not included in the jar file (there's only * mil.nga.sf.geojson.MultiPoint , whereas * mil.nga.sf.MultiPoint is missing * * This seems to have gone wrong at * https://github.com/ngageoint/simple-features-geojson-java/tree/master/src/main/java/mil/nga/sf * but the one at * https://github.com/ngageoint/simple-features-java/tree/master/src/main/java/mil/nga/sf * has it. So I've been trying to build that, but don't have the correct version of maven. */ public Geometry toMultiPointGeoJson() { //System.err.println("toGeoJSON() is not yet implemented."); List points = new LinkedList(); for(JsonElement obj : this.countryCountsJsonArray) { JsonObject json = obj.getAsJsonObject(); Double lng = json.get("lng").getAsDouble(); Double lat = json.get("lat").getAsDouble(); Position point = new Position(lng, lat); points.add(point); } Geometry multiPoint = new MultiPoint(points); return multiPoint; } // https://javadoc.io/static/com.google.code.gson/gson/2.8.5/index.html public FeatureCollection toFeatureCollection() { final int HISTOGRAM_WIDTH = 4; FeatureCollection featureCollection = new FeatureCollection(); for(JsonElement obj : this.countryCountsJsonArray) { JsonObject json = obj.getAsJsonObject(); String countryCode = json.get("countrycode").getAsString(); String region = json.get("region").getAsString(); int count = json.get("count").getAsInt(); // make a histogram for each country Geometry rectangle = this.toPolygon(json, count, HISTOGRAM_WIDTH); Feature countryFeature = new Feature(rectangle); Map featureProperties = new HashMap(); featureProperties.put("count", new Integer(count)); featureProperties.put("code", countryCode); featureProperties.put("region", region); countryFeature.setProperties(featureProperties); featureCollection.addFeature(countryFeature); } return featureCollection; } // create rectangular "histogram" for each country code private Geometry toPolygon(JsonObject json, int count, int HISTOGRAM_WIDTH) { int half_width = HISTOGRAM_WIDTH/2; double vertical_factor = 1.0; Double lng = json.get("lng").getAsDouble(); Double lat = json.get("lat").getAsDouble(); String countryCode = json.get("countrycode").getAsString(); //create the 4 corners of the rectangle // West is negative, east is positive, south is negative, north is positive // See http://www.learnz.org.nz/sites/learnz.org.nz/files/lat-long-geo-data-01_0.jpg // But since the histograms grow vertically/northwards and we can't go past a latitude of 90, // to compensate, we increase the width of the histograms by the same factor as our inability // to grow northwards. Double north = lat + (vertical_factor * count); while (north > 90) { // recalculate north after decreasing histogram's vertical growth // by the same factor as we increase its width vertical_factor = vertical_factor/2.0; half_width = 2 * half_width; north = lat + (vertical_factor * count); } Double east = lng + half_width; Double west = lng - half_width; Double south = lat; List> outerList = new LinkedList>(); List points = new LinkedList(); outerList.add(points); points.add(new Position(west, south)); // Position(lng, lat) not Position(lat, lng) points.add(new Position(west, north)); points.add(new Position(east, north)); points.add(new Position(east, south)); Geometry rectangle = new Polygon(outerList); // Coords: a List of List of Positions, see https://ngageoint.github.io/simple-features-geojson-java/docs/api/ // https://www.here.xyz/api/concepts/geojsonbasics/#polygon return rectangle; } public String writeMultiPointGeoJsonToFile() { final String filename = "multipoint_" + this.geoJsonFilenameWithSuffix; File outFile = new File(this.outputFolder, filename); Geometry geometry = this.toMultiPointGeoJson(); String multiPointGeojsonString = FeatureConverter.toStringValue(geometry); System.err.println("\nMap data as MultiPoint geometry:\n" + multiPointGeojsonString + "\n"); try ( Writer writer = new BufferedWriter(new FileWriter(outFile)); ) { // Some basic re-formatting for some immediate legibility // But pasting the contents of the file (or the System.err output above) // directly into http://geojson.tools/ will instantly reformat the json perfectly anyway. multiPointGeojsonString = multiPointGeojsonString.replace("[[", "\n[\n\t["); multiPointGeojsonString = multiPointGeojsonString.replace("],[", "],\n\t["); multiPointGeojsonString = multiPointGeojsonString.replace("]]", "]\n]"); writer.write(multiPointGeojsonString + "\n"); } catch(Exception e) { logger.error("Unable to write multipoint geojson:\n**********************"); logger.error(multiPointGeojsonString); logger.error("**********************\ninto file " + outFile.getAbsolutePath()); logger.error(e.getMessage(), e); } return outFile.getAbsolutePath(); } public String writeFeaturesGeoJsonToFile() { final String filename = "geojson-features_" + this.geoJsonFilenameWithSuffix; File outFile = new File(this.outputFolder, filename); FeatureCollection featureColl = this.toFeatureCollection(); String featuresGeojsonString = FeatureConverter.toStringValue(featureColl); System.err.println("\nMap data as featurecollection:\n" + featuresGeojsonString + "\n"); try ( Writer writer = new BufferedWriter(new FileWriter(outFile)); ) { writer.write(featuresGeojsonString + "\n"); } catch(Exception e) { logger.error("Unable to write multipoint geojson:\n**********************"); logger.error(featuresGeojsonString); logger.error("**********************\ninto file " + outFile.getAbsolutePath()); logger.error(e.getMessage(), e); } return outFile.getAbsolutePath(); } public int getTotalCount() { int total = 0; for(JsonElement obj : this.countryCountsJsonArray) { JsonObject json = obj.getAsJsonObject(); int count = json.get("count").getAsInt(); total += count; } return total; } // Unfinished and unused public void parseCSVFile(String filename) throws Exception { File csvData = new File(filename); CSVParser parser = CSVParser.parse(csvData, java.nio.charset.Charset.forName("US-ASCII"), CSVFormat.RFC4180); for (CSVRecord csvRecord : parser) { logger.info("Got record: " + csvRecord.toString()); } } public static void printUsage() { System.err.println("CountryCodeCountsMapData .json"); } public static void main(String args[]) { if(args.length != 1) { printUsage(); System.exit(-1); } try { File countsFile = new File(args[0]); CountryCodeCountsMapData mapData = new CountryCodeCountsMapData(args[0]); String multipointOutFileName = mapData.writeMultiPointGeoJsonToFile(); String featuresOutFileName = mapData.writeFeaturesGeoJsonToFile(); System.err.println("***********\nWrote mapdata to files " + multipointOutFileName + " and " + featuresOutFileName); System.err.println("You can paste the geojson contents of either of these files into the " + "editor at http://geojson.tools/ to see the data arranged on a world map"); System.err.println("Total count for query: " + mapData.getTotalCount()); } catch(Exception e) { logger.error(e.getMessage(), e); } } }