package org.greenstone.atea; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.Writer; import java.net.URLEncoder; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; //import java.lang.Math; //automatically imported apparently import org.apache.commons.csv.*; import org.apache.log4j.Logger; // Google's gson imports for parsing any kind of json import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.JsonParser; // For working with GeoJSON's Simple Features in Java import mil.nga.sf.geojson.Feature; import mil.nga.sf.geojson.FeatureCollection; import mil.nga.sf.geojson.FeatureConverter; import mil.nga.sf.geojson.Geometry; import mil.nga.sf.geojson.MultiPoint; import mil.nga.sf.geojson.Polygon; import mil.nga.sf.geojson.Position; import org.greenstone.util.SafeProcess; /** * Run a mongodb query that produces counts per countrycode like in the following 2 examples: * * 1. count of country codes for all sites * db.Websites.aggregate([ * * { $unwind: "$geoLocationCountryCode" }, * { * $group: { * _id: "$geoLocationCountryCode", * count: { $sum: 1 } * } * }, * { $sort : { count : -1} } * ]); * * Then store the mongodb query result's JSON format output in a file called "counts.json". * Then run this program with counts.json as parameter * Copy the geojson output into http://geojson.tools/ * * 2. count of country codes for sites that have at least one page detected as MRI * * db.Websites.aggregate([ * { * $match: { * numPagesInMRI: {$gt: 0} * } * }, * { $unwind: "$geoLocationCountryCode" }, * { * $group: { * _id: {$toLower: '$geoLocationCountryCode'}, * count: { $sum: 1 } * } * }, * { $sort : { count : -1} } * ]); * * Store the mongodb query result's JSON format output in a file called "counts_sitesWithPagesInMRI.json". * Then run this program with counts_sitesWithPagesInMRI.json as parameter. * Copy the geojson output into http://geojson.tools/ * * ################## * TO COMPILE: * maori-lang-detection/src$ * javac -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData.java * * TO RUN: * maori-lang-detection/src$ * java -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData ../mongodb-data/counts.json *################### * * This class needs the gson library, and now the sf-geojson(-2.02).jar and * helper jars sf(-2.02).jar and 3 jackson jars too, * to create and store Simple Features geo json objects with Java. * I copied the gson jar file from GS3. * * Simple Features GeoJSON Java * https://ngageoint.github.io/simple-features-geojson-java/ - liks to API and more * * https://mvnrepository.com/artifact/mil.nga.sf/sf-geojson (https://github.com/ngageoint/simple-features-geojson-java/) * * Also need the basic data types used by the Geometry objects above: * https://mvnrepository.com/artifact/mil.nga/sf (https://github.com/ngageoint/simple-features-java) * * Further helper jars needed (because of encountering the exception documented at * stackoverflow.com/questions/36278293/java-lang-classnotfoundexception-com-fasterxml-jackson-core-jsonprocessingexcep/36279872) * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core/2.10.0 * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind * https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-annotations/2.10.0 */ public class CountryCodeCountsMapData { static Logger logger = Logger.getLogger(org.greenstone.atea.CountryCodeCountsMapData.class.getName()); static public final String GEOJSON_MAP_TOOL_URL = "http://geojson.io/"; //"http://geojson.tools/"; static private final String DATA_STR = "#data=data:application/json,"; // "http://geojson.io" has a URL API to programmatically access /* See http://geojson.io/ -> Help "I'm a coder geojson.io has an array of cli tools that make it easy to go from a GeoJSON file on your computer to geojson.io." http://geojson.io/#geojson-io-api "Geojson.io API You can interact with geojson.io programmatically in two ways: => URL parameters Browser console" http://geojson.io/#url-api "data=data:application/json, Open the map and load a chunk of GeoJSON data from a URL segment directly onto the map. The GeoJSON data should be encoded as per encodeURIComponent(JSON.stringify(geojson_data)). Example: http://geojson.io/#data=data:application/json,%7B%22type%22%3A%22LineString%22%2C%22coordinates%22%3A%5B%5B0%2C0%5D%2C%5B10%2C10%5D%5D%7D */ public static final int SUPPRESS_MAPDATA_DISPLAY = 0; public static final int PRINT_MAPDATA_TO_SCREEN = 1; //Map countryToJsonMap; JsonArray countryCodesJsonArray; JsonArray countryCountsJsonArray; // North-central Antarctica coords private final double ANTARCTICA_LNG = 57.0d; private final double ANTARCTICA_LAT = -70.0d; // For EU coords, spot in Atlantic Ocean close to western European coast. private final double EU_LNG = -20.0d; private final double EU_LAT = 50.0d; private final String geoJsonFilenameWithSuffix; private final File outputFolder; public CountryCodeCountsMapData(String countryCountsJSONFilename) throws Exception { // work out the unique filename we're going to save the geojson files under // and the folder we're going to save them into File countryCountsJSONFile = new File(countryCountsJSONFilename); String tailname = countryCountsJSONFile.getName(); this.geoJsonFilenameWithSuffix = (tailname.startsWith("counts_")) ? tailname.substring("counts_".length()) : tailname; this.outputFolder = countryCountsJSONFile.getParentFile().getCanonicalFile(); // canonical resolves any .. and . in path // locate the countrycodes.json file File countryCoordsJSONFile = new File(this.getClass().getClassLoader().getResource("countrycodes.json").getFile()); // Create a map of ALL country code names to ALL the country code json objects // that contain the location (lat, lng) info for each country code Map countryToJsonMap = new HashMap(); // Parse json file of country codes and put into a JsonArray. // then put into map of each country code to its JsonObject. countryCodesJsonArray = parseJSONFile(countryCoordsJSONFile); for(JsonElement obj : countryCodesJsonArray) { JsonObject countryCodeJson = obj.getAsJsonObject(); countryToJsonMap.put(countryCodeJson.get("country").getAsString(), countryCodeJson); } // Parse json file of country code counts // Then for each JsonObject in this file, // find a match on its country code in the map created above to get a country code JsonObject // Get the longitude and latitude of the JsonObject that matched that country code. // Add this lng,lat location information to the current JsonObject from the counts file. countryCountsJsonArray = parseJSONFile(countryCountsJSONFile); for(JsonElement obj : countryCountsJsonArray) { JsonObject json = obj.getAsJsonObject(); String countryCode = json.get("_id").getAsString().toUpperCase(); // set the property back as uppercase and with property name "countrycode" instead of "_id" json.remove("_id"); json.addProperty("countrycode", countryCode); int count = (int)json.get("count").getAsDouble(); //logger.info("Got country code: " + countryCode); //logger.info(" count: " + count); // locate in countryCode map JsonObject countryCodeJson = countryToJsonMap.get(countryCode); if(countryCodeJson != null) { //logger.info("Found in map: " + countryCodeJson.toString()); // for geojson, want longitude then latitude Double lng = countryCodeJson.get("longitude").getAsDouble(); Double lat = countryCodeJson.get("latitude").getAsDouble(); //logger.info("long: " + Double.toString(lng) + ", lat: " + Double.toString(lat)); String countryName = countryCodeJson.get("name").getAsString(); // let's add lat and lng fields to countryCounts object json.addProperty("lng", lng); // adds Number: https://javadoc.io/static/com.google.code.gson/gson/2.8.5/com/google/gson/JsonObject.html json.addProperty("lat", lat); json.addProperty("region", countryName); } else { logger.info("No geolocation info found for country code " + countryCode); if(countryCode.equals("EU")) { logger.info(" Adding lat,lng for somewhere around Europe"); //logger.info("Unlisted country code: EU"); // add lat and lng for Europe json.addProperty("lng", EU_LNG); json.addProperty("lat", EU_LAT); json.addProperty("region", "Europe"); } else if(countryCode.equals("UNKNOWN")) { logger.info(" Adding lat,lng for somewhere in Antarctica"); //logger.info("Unlisted country code: UNKNOWN"); // add lat and lng for Antarctica json.addProperty("lng", ANTARCTICA_LNG); json.addProperty("lat", ANTARCTICA_LAT); json.addProperty("region", "UNKNOWN"); } else { logger.error("ERROR: entirely unknown country code: " + countryCode); } } } } /** Convert mongodb tabular output of json records stored in the given file * into a JsonArray. */ public JsonArray parseJSONFile(File file) throws Exception { JsonArray jsonArray = null; // read into string try ( BufferedReader reader = new BufferedReader(new FileReader(file)); ) { StringBuilder str = //new StringBuilder(); new StringBuilder("["); String line; boolean multi_line_comment = false; while((line = reader.readLine()) != null) { line = line.trim(); // ignore any single line comments nested in multi-line symbols if(line.startsWith("/*") && line.endsWith("*/")) { continue; // skip line } // skip multi-line comments spread over multiple lines // assumes this ends on a line containing */ without further content on the line. if(line.startsWith("/*") && !line.endsWith("*/")) { multi_line_comment = true; continue; // skip line } if(multi_line_comment) { if(line.contains("*/")) { multi_line_comment = false; } continue; // we're in a comment or at end of comment, skip line } str.append(line); if(line.endsWith("}")) { str.append(",\n"); } } // replace last comma with closing bracket String fileContents = str.substring(0, str.length()-2) + "]"; //System.err.println("Got file:\n" + fileContents); // https://stackoverflow.com/questions/2591098/how-to-parse-json-in-java jsonArray = new JsonParser().parse(fileContents).getAsJsonArray(); } catch(Exception e) { throw e; } return jsonArray; } /** * Reading * https://www.here.xyz/api/concepts/geojsonbasics/ * https://ngageoint.github.io/simple-features-geojson-java/docs/api/ * * https://stackoverflow.com/questions/55621480/cant-access-coordinates-member-of-geojson-feature-collection * * Downloaded geojson simple features' jar file from maven, but it didn't work: * a more private version of MultiPoint.java is not included in the jar file (there's only * mil.nga.sf.geojson.MultiPoint , whereas * mil.nga.sf.MultiPoint is missing * * This seems to have gone wrong at * https://github.com/ngageoint/simple-features-geojson-java/tree/master/src/main/java/mil/nga/sf * but the one at * https://github.com/ngageoint/simple-features-java/tree/master/src/main/java/mil/nga/sf * has it. So I've been trying to build that, but don't have the correct version of maven. */ public Geometry toMultiPointGeoJson() { //System.err.println("toGeoJSON() is not yet implemented."); List points = new LinkedList(); for(JsonElement obj : this.countryCountsJsonArray) { JsonObject json = obj.getAsJsonObject(); Double lng = json.get("lng").getAsDouble(); Double lat = json.get("lat").getAsDouble(); Position point = new Position(lng, lat); points.add(point); } Geometry multiPoint = new MultiPoint(points); return multiPoint; } // https://javadoc.io/static/com.google.code.gson/gson/2.8.5/index.html public FeatureCollection toFeatureCollection() { final int HISTOGRAM_WIDTH = 4; FeatureCollection featureCollection = new FeatureCollection(); for(JsonElement obj : this.countryCountsJsonArray) { JsonObject json = obj.getAsJsonObject(); String countryCode = json.get("countrycode").getAsString(); String region = json.get("region").getAsString(); int count = json.get("count").getAsInt(); // make a histogram for each country Geometry rectangle = this.toPolygon(json, count, HISTOGRAM_WIDTH); Feature countryFeature = new Feature(rectangle); Map featureProperties = new HashMap(); featureProperties.put("count", new Integer(count)); featureProperties.put("code", countryCode); featureProperties.put("region", region); countryFeature.setProperties(featureProperties); featureCollection.addFeature(countryFeature); } return featureCollection; } // create rectangular "histogram" for each country code private Geometry toPolygon(JsonObject json, final int count, final int HISTOGRAM_WIDTH) { int half_width = HISTOGRAM_WIDTH/2; double vertical_factor = 1.0; final Double lng = json.get("lng").getAsDouble(); final Double lat = json.get("lat").getAsDouble(); String countryCode = json.get("countrycode").getAsString(); //create the 4 corners of the rectangle // West is negative, east is positive, south is negative, north is positive // See http://www.learnz.org.nz/sites/learnz.org.nz/files/lat-long-geo-data-01_0.jpg // But since the histograms grow vertically/northwards and we can't go past a latitude of 90, // to compensate, we increase the width of the histograms by the same factor as our inability // to grow northwards. Double north = lat + (vertical_factor * count); while (north > 90) { // recalculate north after decreasing histogram's vertical growth // by the same factor as we increase its width vertical_factor = vertical_factor/2.0; half_width = 2 * half_width; north = lat + (vertical_factor * count); } Double east = lng + half_width; Double west = lng - half_width; Double south = lat; List pts = recalculateAreaIfLarge(count, HISTOGRAM_WIDTH, countryCode, lat, lng, north, south, east, west); /* System.err.println("For country " + countryCode + ":"); System.err.println("north = " + north); System.err.println("south = " + south); System.err.println("east = " + east); System.err.println("west = " + west + "\n"); System.err.println("-------------"); */ List> outerList = new LinkedList>(); if(pts != null) { outerList.add(pts); } else { List points = new LinkedList(); outerList.add(points); points.add(new Position(west, south)); // Position(lng, lat) not Position(lat, lng) points.add(new Position(west, north)); points.add(new Position(east, north)); points.add(new Position(east, south)); } Geometry rectangle = new Polygon(outerList); // Coords: a List of List of Positions, see https://ngageoint.github.io/simple-features-geojson-java/docs/api/ // https://www.here.xyz/api/concepts/geojsonbasics/#polygon return rectangle; } private List recalculateAreaIfLarge(final int count, final int HISTOGRAM_WIDTH, String countryCode, final Double lat, final Double lng, Double north, Double south, Double east, Double west) { boolean recalculated = false; // Check if we're dealing with very large numbers, in which case, we can have follow off the longitude edges // Max longitude values are -180 to 180. So a max of 360 units between them. (Max latitude is -90 to 90) // "Longitude is in the range -180 and +180 specifying coordinates west and east of the Prime Meridian, respectively. // For reference, the Equator has a latitude of 0°, the North pole has a latitude of 90° north (written 90° N or +90°), // and the South pole has a latitude of -90°." if((east + Math.abs(west)) > 360 || east > 180 || west < -180) { System.err.println("For country " + countryCode + ":"); System.err.println("north = " + north); System.err.println("south = " + south); System.err.println("east = " + east); System.err.println("west = " + west + "\n"); int half_width = HISTOGRAM_WIDTH/2; // reset half_width double v_tmp_count = Math.sqrt(count); //double h_tmp_count = Math.floor(v_tmp_count); //v_tmp_count = Math.ceil(v_tmp_count); double h_tmp_count = v_tmp_count; System.err.println("count = " + count); System.err.println("v = " + v_tmp_count); System.err.println("h = " + h_tmp_count); System.err.println("lat = " + lat); System.err.println("lng = " + lng + "\n"); if(h_tmp_count > 90) { // 360 max width, of which each longitude // is 4 units (horizontal factor = 4, and half-width is half // of that). So max width/h_tmp_count allowed 90 => 360 // longitude on map (-180 to 180). // Put the excess h_tmp_count into v_tmp_count and ensure // that does not go over 90+90 = 180 max. Vertical_factor is 1. System.err.println("Out of longitude range. Attempting to compensate..."); double diff = h_tmp_count - 80.0; // actually 90 wraps on geojson tools, 80 doesn't h_tmp_count -= diff; v_tmp_count = (count/h_tmp_count); if(v_tmp_count > 180 || h_tmp_count > 90) { System.err.println("Warning: still exceeded max latitude and/or longitude range"); } } System.err.println("Recalculating polygon for country with high count: " + countryCode + "."); System.err.println("count = " + count); System.err.println("v = " + v_tmp_count); System.err.println("h = " + h_tmp_count); System.err.println("lat = " + lat); System.err.println("lng = " + lng + "\n"); north = lat + v_tmp_count; south = lat; east = lng + (h_tmp_count * half_width); // a certain width, half_width, represents one unit in the x axis west = lng - (h_tmp_count * half_width); /* System.err.println("north = " + north); System.err.println("south = " + south); System.err.println("east = " + east); System.err.println("west = " + west + "\n"); */ if(north > 90) { // centre vertically on lat north = lat + (v_tmp_count/2); south = lat - (v_tmp_count/2); } if(west < -180.0) { double h_diff = -180.0 - west; // west is a larger negative value than -180, so subtracting west from -180 produces a positive h_diff value west = -180.0; // set to extreme western edge east = east + h_diff; } else if(east > 180.0) { double h_diff = east - 180.0; // the country's longitude (lng) is h_diff from the eastern edge east = 180.0; // maximise eastern edge west = west - h_diff; // then grow the remainder of h_tmp_count in the opposite (western/negative) direction } // NOTE: Can't centre on country, (lat,lng), as we don't know whether either of lat or lng has gone past the edge // Hopefully we don't exceed +90/-90 lat and +/-180 longitude recalculated = true; } else if(west < -140.0) { // past -140 west, the edges don't wrap well in geotools, so shift any points more west/negative than -140: double diff = -140.0 - west; west = -140.0; east += diff; recalculated = true; } if(recalculated) { System.err.println("\nnorth = " + north); System.err.println("south = " + south); System.err.println("east = " + east); System.err.println("west = " + west); List points = new LinkedList(); points.add(new Position(west, south)); // Position(lng, lat) not Position(lat, lng) points.add(new Position(west, north)); points.add(new Position(east, north)); points.add(new Position(east, south)); return points; } return null; } // by default, display mapdata output on screen too public String writeMultiPointGeoJsonToFile() { return writeMultiPointGeoJsonToFile(PRINT_MAPDATA_TO_SCREEN); } public String writeMultiPointGeoJsonToFile(int displayMapData) { final String filename = "multipoint_" + this.geoJsonFilenameWithSuffix; File outFile = new File(this.outputFolder, filename); Geometry geometry = this.toMultiPointGeoJson(); String multiPointGeojsonString = FeatureConverter.toStringValue(geometry); if(displayMapData == PRINT_MAPDATA_TO_SCREEN) { System.err.println("\nMap data as MultiPoint geometry:\n" + multiPointGeojsonString + "\n"); } try ( Writer writer = new BufferedWriter(new FileWriter(outFile)); ) { // Some basic re-formatting for some immediate legibility // But pasting the contents of the file (or the System.err output above) // directly into http://geojson.tools/ or http://geojson.io/ // will instantly reformat the json perfectly anyway. multiPointGeojsonString = multiPointGeojsonString.replace("[[", "\n[\n\t["); multiPointGeojsonString = multiPointGeojsonString.replace("],[", "],\n\t["); multiPointGeojsonString = multiPointGeojsonString.replace("]]", "]\n]"); writer.write(multiPointGeojsonString + "\n"); } catch(Exception e) { logger.error("Unable to write multipoint geojson:\n**********************"); logger.error(multiPointGeojsonString); logger.error("**********************\ninto file " + outFile.getAbsolutePath()); logger.error(e.getMessage(), e); } return outFile.getAbsolutePath(); } // by default, display mapdata output on screen too public String writeFeaturesGeoJsonToFile() { return writeFeaturesGeoJsonToFile(PRINT_MAPDATA_TO_SCREEN); } // write out geojson features to appropriately named file // If displayMapData == PRINT_MAPDATA_TO_SCREEN, then it will also be printed to screen public String writeFeaturesGeoJsonToFile(int displayMapData) { final String filename = "geojson-features_" + this.geoJsonFilenameWithSuffix; File outFile = new File(this.outputFolder, filename); FeatureCollection featureColl = this.toFeatureCollection(); String featuresGeojsonString = FeatureConverter.toStringValue(featureColl); if(displayMapData == PRINT_MAPDATA_TO_SCREEN) { System.err.println("\nMap data as featurecollection:\n" + featuresGeojsonString + "\n"); } try ( Writer writer = new BufferedWriter(new FileWriter(outFile)); ) { writer.write(featuresGeojsonString + "\n"); } catch(Exception e) { logger.error("Unable to write multipoint geojson:\n**********************"); logger.error(featuresGeojsonString); logger.error("**********************\ninto file " + outFile.getAbsolutePath()); logger.error(e.getMessage(), e); } return outFile.getAbsolutePath(); } public String getFeaturesGeoJsonString(boolean uriEncoded) { String featuresGeojsonString = FeatureConverter.toStringValue(this.toFeatureCollection()); if(uriEncoded) { // Want to return encodeURIComponent(JSON.stringify(featuresGeojsonString)); // https://stackoverflow.com/questions/607176/java-equivalent-to-javascripts-encodeuricomponent-that-produces-identical-outpu URLEncoder.encode(featuresGeojsonString); } return featuresGeojsonString; } public String getAsMapURL() { boolean uriEncoded = true; String url = GEOJSON_MAP_TOOL_URL + DATA_STR + getFeaturesGeoJsonString(uriEncoded); return url; } public String geoJsonMapScreenshot(File outputFolder, String fileNamePrefix) { // https://stackoverflow.com/questions/49606051/how-to-take-a-screenshot-in-firefox-headless-selenium-in-java // https://developer.mozilla.org/en-US/docs/Mozilla/Firefox/Headless_mode // /path/to/firefox -P my-profile --screenshot test.jpg https://developer.mozilla.org --window-size=800,1000 // https://stackoverflow.com/questions/15783701/which-characters-need-to-be-escaped-when-using-bash String mapURL = this.getAsMapURL(); String mapURLescapedForBash = mapURL.replace("\"", "\\\"");//.replace("[", "\\[").replace("]", "\\]"); File outputFile = new File(outputFolder + File.separator + fileNamePrefix+".png"); String outputFilePath = Utility.getFilePath(outputFile); String[] cmdArgs = { "firefox", "--screenshot", outputFilePath, mapURLescapedForBash //"'" + mapURL + "'" }; System.err.print("Running:"); for(String arg : cmdArgs) { System.err.print(" " + arg); } System.err.println(); //String cmdArgs = "firefox --screenshot " + outputFilePath + " " + GEOJSON_MAP_TOOL_URL + DATA_STR; //String cmdArgs = "firefox --screenshot " + outputFilePath + " " + "'" + mapURL + "'"; //System.err.println("Running: " + cmdArgs); SafeProcess proc = new SafeProcess(cmdArgs); int retVal = proc.runProcess(); logger.info("Process out: " + proc.getStdOutput()); logger.info("Process err: " + proc.getStdError()); logger.info("Screenshot process returned with: " + retVal); return outputFilePath; } public int getTotalCount() { int total = 0; for(JsonElement obj : this.countryCountsJsonArray) { JsonObject json = obj.getAsJsonObject(); int count = json.get("count").getAsInt(); total += count; } return total; } // Unfinished and unused public void parseCSVFile(String filename) throws Exception { File csvData = new File(filename); CSVParser parser = CSVParser.parse(csvData, java.nio.charset.Charset.forName("US-ASCII"), CSVFormat.RFC4180); for (CSVRecord csvRecord : parser) { logger.info("Got record: " + csvRecord.toString()); } } public static void printUsage() { System.err.println("CountryCodeCountsMapData .json"); } public static void main(String args[]) { if(args.length != 1) { printUsage(); System.exit(-1); } try { File countsFile = new File(args[0]); if(!countsFile.exists()) { System.err.println("File " + countsFile + " does not exist"); System.exit(-1); } CountryCodeCountsMapData mapData = new CountryCodeCountsMapData(args[0]); String multipointOutFileName = mapData.writeMultiPointGeoJsonToFile(); String featuresOutFileName = mapData.writeFeaturesGeoJsonToFile(); System.err.println("***********\nWrote mapdata to files " + multipointOutFileName + " and " + featuresOutFileName); System.err.println("You can paste the geojson contents of either of these files into " + " the editor at " + GEOJSON_MAP_TOOL_URL + " to see the data arranged on a world map"); System.err.println("Total count for query: " + mapData.getTotalCount()); } catch(Exception e) { logger.error(e.getMessage(), e); } } }