Changeset 33805


Ignore:
Timestamp:
2019-12-13T20:08:14+13:00 (4 years ago)
Author:
ak19
Message:
  1. Moving the static countrycodes.json file to conf folder and updated CountryCodeCountsMapData.java to work with its new location. 2. CountryCodeContsMapData.java further sensibly names output files based on input filenames instead of producing identical filenames on each run indepedent of (different) input files. 3. Adding the geojson and map for mongodb query results for counts by country codes of sites where at least 1 page is overall detected by OpenNLP as MRI.
Location:
other-projects/maori-lang-detection
Files:
4 added
1 deleted
1 edited
1 moved

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/conf/countrycodes.json

    r33802 r33805  
     1/* CountryCodes with geolocation from https://developers.google.com/public-data/docs/canonical/countries_csv */
     2/* BEWARE OF ADDING OR MODIFYING COMMENTS IN THIS FILE INTO MULTILINE COMMENTS, AS JAVA CODE CAN'T DEAL WITH THAT */
     3
    14/* 1 */
    25{
  • other-projects/maori-lang-detection/src/org/greenstone/atea/CountryCodeCountsMapData.java

    r33800 r33805  
    3232
    3333
    34 /** Simple Features GeoJSON Java
     34/**
     35 * Run a mongodb query that produces counts per countrycode like in the following 2 examples:
     36 *
     37 * 1. count of country codes for all sites
     38 * db.Websites.aggregate([
     39 *
     40 *   { $unwind: "$geoLocationCountryCode" },
     41 *   {
     42 *       $group: {
     43 *           _id: "$geoLocationCountryCode",
     44 *           count: { $sum: 1 }
     45 *       }
     46 *   },
     47 *   { $sort : { count : -1} }
     48 * ]);
     49 *
     50 * Then store the mongodb query result's JSON format output in a file called "counts.json".
     51 * Then run this program with counts.json as parameter
     52 * Copy the geojson output into http://geojson.tools/
     53 *
     54 * 2. count of country codes for sites that have at least one page detected as MRI
     55 *
     56 * db.Websites.aggregate([
     57 *   {
     58 *       $match: {
     59 *           numPagesInMRI: {$gt: 0}
     60 *       }
     61 *   },
     62 *   { $unwind: "$geoLocationCountryCode" },
     63 *   {
     64 *       $group: {
     65 *           _id: {$toLower: '$geoLocationCountryCode'},
     66 *           count: { $sum: 1 }
     67 *       }
     68 *   },
     69 *   { $sort : { count : -1} }
     70 * ]);
     71 *
     72 * Store the mongodb query result's JSON format output in a file called "counts_sitesWithPagesInMRI.json".
     73 * Then run this program with counts_sitesWithPagesInMRI.json as parameter.
     74 * Copy the geojson output into http://geojson.tools/
     75 *
     76 * ##################
     77 * TO COMPILE:
     78 *    maori-lang-detection/src$
     79 *       javac -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData.java
     80 *
     81 * TO RUN:
     82 *    maori-lang-detection/src$
     83 *       java -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData ../mongodb-data/countrycodes.json ../mongodb-data/counts.json
     84 *###################
     85 *
     86 * This class needs the gson library, and now the sf-geojson(-2.02).jar and
     87 * helper jars sf(-2.02).jar and 3 jackson jars too,
     88 * to create and store Simple Features geo json objects with Java.
     89 * I copied the gson jar file from GS3.
     90 *
     91 * Simple Features GeoJSON Java
    3592 * https://ngageoint.github.io/simple-features-geojson-java/ - liks to API and more
    3693 *
     
    46103 *   https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-annotations/2.10.0
    47104 */
    48 
    49 /**
    50  * This class needs the gson library, and now the sf(-2.02).jar and sf-geojson(-2.02).jar files too
    51  * to create and store Simple Features geo json objects with Java.
    52  * I copied the gson jar file from GS3.
    53  *
    54  * TO COMPILE:
    55  *    maori-lang-detection/src$
    56  *       javac -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData.java
    57  *
    58  * TO RUN:
    59  *    maori-lang-detection/src$
    60  *       java -cp ".:../conf:../lib/*" org/greenstone/atea/CountryCodeCountsMapData ../mongodb-data/countrycodes.json ../mongodb-data/counts.json
    61  *
    62  */
    63105public class CountryCodeCountsMapData {
    64106    static Logger logger = Logger.getLogger(org.greenstone.atea.CountryCodeCountsMapData.class.getName());
     
    74116    private final double EU_LNG = -20.0d;
    75117    private final double EU_LAT = 50.0d;
    76    
    77     public CountryCodeCountsMapData(String countryCoordsJSONFile, String countryCountsJSONFile) throws Exception {
    78    
    79     // map of country codes to lat, lng json for that country code
     118
     119    private final String geoJsonFilenameWithSuffix;
     120    private final File outputFolder;
     121   
     122    public CountryCodeCountsMapData(String countryCountsJSONFilename) throws Exception {
     123
     124    // work out the unique filename we're going to save the geojson files under
     125    // and the folder we're going to save them into
     126    File countryCountsJSONFile = new File(countryCountsJSONFilename);
     127    String tailname = countryCountsJSONFile.getName();
     128    this.geoJsonFilenameWithSuffix = (tailname.startsWith("counts_")) ? tailname.substring("counts_".length()) : tailname; 
     129    this.outputFolder = countryCountsJSONFile.getParentFile().getCanonicalFile(); // canonical resolves any .. and . in path   
     130   
     131    // locate the countrycodes.json file
     132    File countryCoordsJSONFile = new File(this.getClass().getClassLoader().getResource("countrycodes.json").getFile());
     133   
     134    // Create a map of ALL country code names to ALL the country code json objects
     135    // that contain the location (lat, lng) info for each country code
    80136    Map<String, JsonObject> countryToJsonMap = new HashMap<String, JsonObject>();
    81137
     
    150206     * into a JsonArray.
    151207     */
    152     public JsonArray parseJSONFile(String filename) throws Exception {
     208    public JsonArray parseJSONFile(File file) throws Exception {
    153209    JsonArray jsonArray = null;
    154210    // read into string
    155211    try (
    156          BufferedReader reader = new BufferedReader(new FileReader(filename));
     212         BufferedReader reader = new BufferedReader(new FileReader(file));
    157213         ) {
    158214        StringBuilder str = //new StringBuilder();
     
    160216        String line;
    161217        while((line = reader.readLine()) != null) {
    162         line = line.replaceAll("/\\* [^\\/]* \\*/", "");
     218        line = line.replaceAll("/\\* [^\\/]* \\*/", ""); // get rid of any multiline comments symbols on a single line
    163219        str.append(line);
    164220        if(line.endsWith("}")) {
     
    296352    }
    297353   
    298     public String writeMultiPointGeoJsonToFile(File folder) {
    299     final String filename = "multipoint.json";
    300     File outFile = new File(folder, filename);
     354    public String writeMultiPointGeoJsonToFile() {
     355    final String filename = "multipoint_" + this.geoJsonFilenameWithSuffix;
     356    File outFile = new File(this.outputFolder, filename);
    301357
    302358    Geometry geometry = this.toMultiPointGeoJson();
     
    326382    }
    327383   
    328     public String writeFeaturesGeoJsonToFile(File folder) {
    329     final String filename = "geojson-features.json";
    330     File outFile = new File(folder, filename);
     384    public String writeFeaturesGeoJsonToFile() {
     385    final String filename = "geojson-features_" + this.geoJsonFilenameWithSuffix;
     386    File outFile = new File(this.outputFolder, filename);
    331387
    332388    FeatureCollection featureColl = this.toFeatureCollection();
     
    371427   
    372428    public static void printUsage() {
    373     System.err.println("CountryCodeCountsMapData countrycodes.json counts.json");
     429    System.err.println("CountryCodeCountsMapData <counts-by-countrycode-file>.json");
    374430    }
    375431   
    376432    public static void main(String args[]) {
    377     if(args.length != 2) {
     433    if(args.length != 1) {
    378434        printUsage();
    379435        System.exit(-1);
     
    381437   
    382438    try {
    383         File countsFile = new File(args[1]);
    384         File parentFolder = countsFile.getParentFile().getCanonicalFile(); // canonical resolves any .. and . in path
     439        File countsFile = new File(args[0]);
    385440       
    386         CountryCodeCountsMapData mapData = new CountryCodeCountsMapData(args[0], args[1]);
    387 
    388         String multipointOutFileName = mapData.writeMultiPointGeoJsonToFile(parentFolder);     
    389         String featuresOutFileName = mapData.writeFeaturesGeoJsonToFile(parentFolder);
     441        CountryCodeCountsMapData mapData = new CountryCodeCountsMapData(args[0]);
     442
     443        String multipointOutFileName = mapData.writeMultiPointGeoJsonToFile();     
     444        String featuresOutFileName = mapData.writeFeaturesGeoJsonToFile();
    390445       
    391446        System.err.println("***********\nWrote mapdata to files " + multipointOutFileName
Note: See TracChangeset for help on using the changeset viewer.