Changeset 33938


Ignore:
Timestamp:
2020-02-17T16:10:00+13:00 (4 years ago)
Author:
ak19
Message:
  1. Don't regenerate random sample of web page urls and full web page url listing files if they already exist, as we don't want to accidentally lose any manual work done to the random sample file when rerunning the code. 2. Fixed up duplicate logging of command run by SafeProcess by recompiling. 2. System output commands replaced by logger calls where appropriate, since logger is also outputting to console. 4. Clarifying some log4j.props comments
Location:
other-projects/maori-lang-detection
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/conf/log4j.properties.in

    r33643 r33938  
    2424
    2525# We're saying our log "mldlog" is some file that gets appended to,
    26 # and it will write it out to "maori-lang-detector.log" located in whatever we defined the "logpath" to be
     26# and that log messages from DEBUG level onwards should be writen out to
     27# - "maori-lang-detector.log" located in whatever we defined the "logpath" to be
     28# - the console with StdErr
    2729
    2830# Old way of logging:
  • other-projects/maori-lang-detection/src/org/greenstone/atea/CountryCodeCountsMapData.java

    r33926 r33938  
    159159    private final File outputFolder;
    160160
     161    /** The string version of the Feature geojson generated */
     162    private String featuresGeojsonString;
     163   
    161164    // Used to create screenshots
    162165    private final String MONITOR_RESOLUTION = "1920,1080"; // format: "x,y"
     
    292295        String fileContents = str.substring(0, str.length()-2) + "]";
    293296       
    294         //System.err.println("Got file:\n" + fileContents);
     297        //logger.debug("Got file:\n" + fileContents);
    295298       
    296299        // https://stackoverflow.com/questions/2591098/how-to-parse-json-in-java
     
    324327     */
    325328    public Geometry toMultiPointGeoJson() {
    326     //System.err.println("toGeoJSON() is not yet implemented.");
    327329
    328330    List<Position> points = new LinkedList<Position>();
     
    404406
    405407    /*
    406     System.err.println("For country " + countryCode + ":");
    407     System.err.println("north = " + north);
    408     System.err.println("south = " + south);
    409     System.err.println("east = " + east);
    410     System.err.println("west = " + west + "\n");
    411     System.err.println("-------------");
     408    logger.debug("For country " + countryCode + ":");
     409    logger.debug("north = " + north);
     410    logger.debug("south = " + south);
     411    logger.debug("east = " + east);
     412    logger.debug("west = " + west + "\n");
     413    logger.debug("-------------");
    412414    */
    413415       
     
    446448    // and the South pole has a latitude of -90°."
    447449    if((east + Math.abs(west)) > 360 || east > 180 || west < -180) {
    448         System.err.println("For country " + countryCode + ":");
    449         System.err.println("north = " + north);
    450         System.err.println("south = " + south);
    451         System.err.println("east = " + east);
    452         System.err.println("west = " + west + "\n");
     450        logger.debug("For country " + countryCode + ":");
     451        logger.debug("north = " + north);
     452        logger.debug("south = " + south);
     453        logger.debug("east = " + east);
     454        logger.debug("west = " + west + "\n");
    453455
    454456        int half_width = HISTOGRAM_WIDTH/2; // reset half_width
     
    458460        //v_tmp_count = Math.ceil(v_tmp_count);
    459461        double h_tmp_count = v_tmp_count;
    460 System.err.println("count = " + count);
    461         System.err.println("v = " + v_tmp_count);
    462         System.err.println("h = " + h_tmp_count);
    463         System.err.println("lat = " + lat);
    464         System.err.println("lng = " + lng + "\n");
     462      logger.debug("count = " + count);
     463      logger.debug("v = " + v_tmp_count);
     464      logger.debug("h = " + h_tmp_count);
     465      logger.debug("lat = " + lat);
     466      logger.debug("lng = " + lng + "\n");
    465467       
    466468        if(h_tmp_count > 90) { // 360 max width, of which each longitude
     
    471473        // that does not go over 90+90 = 180 max. Vertical_factor is 1.
    472474
    473         System.err.println("Out of longitude range. Attempting to compensate...");
     475        logger.debug("Out of longitude range. Attempting to compensate...");
    474476       
    475477        double diff = h_tmp_count - 80.0; // actually 90 wraps on geojson tools, 80 doesn't
     
    478480       
    479481        if(v_tmp_count > 180 || h_tmp_count > 90) {
    480             System.err.println("Warning: still exceeded max latitude and/or longitude range");         
     482            logger.warn("Warning: still exceeded max latitude and/or longitude range");
    481483        }
    482484       
    483485        }
    484486       
    485         System.err.println("Recalculating polygon for country with high count: " + countryCode + ".");
    486         System.err.println("count = " + count);
    487         System.err.println("v = " + v_tmp_count);
    488         System.err.println("h = " + h_tmp_count);
    489         System.err.println("lat = " + lat);
    490         System.err.println("lng = " + lng + "\n");     
     487        logger.debug("Recalculating polygon for country with high count: " + countryCode + ".");
     488        logger.debug("count = " + count);
     489        logger.debug("v = " + v_tmp_count);
     490        logger.debug("h = " + h_tmp_count);
     491        logger.debug("lat = " + lat);
     492        logger.debug("lng = " + lng + "\n");       
    491493       
    492494       
     
    497499
    498500        /*
    499     System.err.println("north = " + north);
    500     System.err.println("south = " + south);
    501     System.err.println("east = " + east);
    502     System.err.println("west = " + west + "\n");
     501    logger.debug("north = " + north);
     502    logger.debug("south = " + south);
     503    logger.debug("east = " + east);
     504    logger.debug("west = " + west + "\n");
    503505        */
    504506       
     
    538540
    539541    if(recalculated) {
    540         System.err.println("\nnorth = " + north);
    541         System.err.println("south = " + south);
    542         System.err.println("east = " + east);
    543         System.err.println("west = " + west);
     542        logger.debug("\nnorth = " + north);
     543        logger.debug("south = " + south);
     544        logger.debug("east = " + east);
     545        logger.debug("west = " + west);
    544546
    545547       
     
    568570    String multiPointGeojsonString = FeatureConverter.toStringValue(geometry);
    569571    if(displayMapData == PRINT_MAPDATA_TO_SCREEN) {
    570         System.err.println("\nMap data as MultiPoint geometry:\n" + multiPointGeojsonString + "\n");
     572        logger.info("\nMap data as MultiPoint geometry:\n" + multiPointGeojsonString + "\n");
    571573    }
    572574    try (
     
    575577
    576578        // Some basic re-formatting for some immediate legibility
    577         // But pasting the contents of the file (or the System.err output above)
     579        // But pasting the contents of the file (or the System.err/logger.info output above)
    578580        // directly into http://geojson.tools/ or http://geojson.io/
    579581        // will instantly reformat the json perfectly anyway.
     
    605607
    606608    FeatureCollection featureColl = this.toFeatureCollection();
    607     String featuresGeojsonString = FeatureConverter.toStringValue(featureColl);
     609    this.featuresGeojsonString = FeatureConverter.toStringValue(featureColl);
    608610    if(displayMapData == PRINT_MAPDATA_TO_SCREEN) {
    609         System.err.println("\nMap data as featurecollection:\n" + featuresGeojsonString + "\n");
     611        logger.info("\nMap data as featurecollection:\n" + featuresGeojsonString + "\n");
    610612    }
    611613    try (
     
    626628
    627629    public String getFeaturesGeoJsonString(boolean uriEncoded) {
    628     String featuresGeojsonString = FeatureConverter.toStringValue(this.toFeatureCollection());
     630    if(this.featuresGeojsonString == null) {
     631        this.featuresGeojsonString = FeatureConverter.toStringValue(this.toFeatureCollection());
     632    }
    629633    if(uriEncoded) {
    630634        // Want to return encodeURIComponent(JSON.stringify(featuresGeojsonString));
     
    725729    System.err.println();
    726730   
    727     SafeProcess proc = new SafeProcess(cmdArgs);
     731    SafeProcess proc = new SafeProcess(cmdArgs);   
    728732
    729733    int retVal = proc.runProcess();
     
    770774        File countsFile = new File(args[0]);
    771775        if(!countsFile.exists()) {
    772         System.err.println("File " + countsFile + " does not exist");
     776        logger.error("File " + countsFile + " does not exist");
    773777        System.exit(-1);
    774778        }
     
    779783        String featuresOutFileName = mapData.writeFeaturesGeoJsonToFile();
    780784       
    781         System.err.println("***********\nWrote mapdata to files " + multipointOutFileName
     785        logger.info("***********\nWrote mapdata to files " + multipointOutFileName
    782786                   + " and " + featuresOutFileName);
    783         System.err.println("You can paste the geojson contents of either of these files into "
     787        logger.info("You can paste the geojson contents of either of these files into "
    784788                   + " the editor at " + GEOJSON_MAP_TOOL_URL
    785789                   + " to see the data arranged on a world map");
    786790
    787         System.err.println("Total count for query: " + mapData.getTotalCount());
     791        logger.info("Total count for query: " + mapData.getTotalCount());
    788792       
    789793    } catch(Exception e) {
  • other-projects/maori-lang-detection/src/org/greenstone/atea/SummaryTool.java

    r33926 r33938  
    6969    writeURLsToFile(urlsList, outFile, urlsList.size());
    7070   
    71     System.out.println("Wrote all isMRI web page URLs for the sites in input domainsFile\ninto file: "
     71    logger.info("Wrote all isMRI web page URLs for the sites in input domainsFile\ninto file: "
    7272               + Utility.getFilePath(outFile));
    7373    }
     
    7878    writeURLsToFile(urlsList, outFile, urlsList.size());
    7979
    80     System.out.println("Wrote all containsMRI web page URLs for the sites in input domainsFile\ninto file: "
     80    logger.info("Wrote all containsMRI web page URLs for the sites in input domainsFile\ninto file: "
    8181               + Utility.getFilePath(outFile));
    8282    }
     
    109109            // Print out whether there were no isMRI pages for the domain (only containsMRI). A useful thing to know
    110110            if(moreURLs.size() == 0 && filterType == MongoDBQueryer.IS_MRI) {
    111             System.out.println("   " + countryCode + " domain " + domain + " had no webpages where isMRI=true - only containsMRI.");
     111            logger.info("   " + countryCode + " domain " + domain
     112                    + " had no webpages where isMRI=true - only containsMRI.");
    112113            }
    113114
     
    119120        }
    120121        }
    121         System.err.println("");
     122        logger.info("");
    122123    } catch(Exception e) {
    123124        logger.error("Unable to read URLs from file " + Utility.getFilePath(domainsFile));
     
    158159    String filterName = (filterType == MongoDBQueryer.IS_MRI) ? "isMRI" : "containsMRI";
    159160    File outFile = new File(outFolder, filterName+"_full_"+domainsFile.getName());
    160 
     161    String fullFileName = Utility.getFilePath(outFile);
     162    if(outFile.exists()) {
     163        logger.error("WARN: Full web page urls list file already exists: " + fullFileName);
     164        logger.error("      Will not overwrite. Delete file manually and re-run this program.");
     165        return;
     166    }
    161167    writeURLsToFile(urlsList, outFile, N_totalNumPages);
    162     System.out.println("Wrote out full listing of web page URLs for sites in input domainsFile"
    163                + "\ninto file: " + Utility.getFilePath(outFile));
     168    logger.info("Wrote out full listing of web page URLs for sites in input domainsFile"
     169               + "\ninto file: " + fullFileName);
    164170       
    165171    // 3. calculate sample size n for population size N if using 90% confidence and 5% margin of error
    166172    int n_numSampleURLs = calcSampleSize(N_totalNumPages);
    167173
    168     System.err.println("*** N, total number of web pages for which " + filterName + "=true from domain shortlist file: " + N_totalNumPages);
    169     System.err.println("    (out of " + mongodbQueryer.countOfWebpagesMatching(filterType)
     174    logger.info("*** N, total number of web pages for which " + filterName + "=true from domain shortlist file: " + N_totalNumPages);
     175    logger.info("    (out of " + mongodbQueryer.countOfWebpagesMatching(filterType)
    170176               + " web pages across ALL sites for which " + filterName + " = true)");
    171     System.err.println("*** n, sample size of web page URLs: " + n_numSampleURLs);
     177    logger.info("*** n, sample size of web page URLs: " + n_numSampleURLs);
    172178   
    173179    // 4. Shuffle all the URLs and write the first n (sample size) URLs to a file
     
    175181    // https://stackoverflow.com/questions/6284589/setting-a-seed-to-shuffle-arraylist-in-java-deterministically
    176182    Collections.shuffle(urlsList, new Random(FIXED_SEED));
    177    
     183    fullFileName = Utility.getFilePath(outFile);
    178184    outFile = new File(outFolder, "random"+n_numSampleURLs+"_"+domainsFile.getName());
     185   
     186    if(outFile.exists()) {
     187        logger.error("WARN: Full web page urls list file already exists: " + fullFileName);
     188        logger.error("      Will not overwrite. Delete file manually and re-run this program.");
     189        return;
     190    }
    179191    writeURLsToFile(urlsList, outFile, n_numSampleURLs);
    180     System.out.println("Wrote a sample of n=" + n_numSampleURLs + " of web page URLs "
    181                + "for the sites in input domainsFile\ninto file: " + Utility.getFilePath(outFile));
     192    logger.info("Wrote a sample of n=" + n_numSampleURLs + " of web page URLs "
     193            + "for the sites in input domainsFile\ninto file: " + fullFileName);
    182194
    183195    // For N = 6557, z-alpha-over-2 = 1.6449 and m = 0.05 (5%),
     
    223235        Tuple urlInfo = urlsList.get(i);
    224236       
    225         //System.out.println(list.get(i));
     237        //logger.debug(list.get(i));
    226238        writer.write(urlInfo + "\n"); // calls toString() on tuple of url -> countryCode
    227239        }
     
    261273    }
    262274   
    263     System.err.println("*** Wrote file: " + filename);
     275    logger.info("*** Wrote file: " + filename);
    264276
    265277    return filename;
     
    294306    }
    295307   
    296     System.err.println("*** Wrote file: " + filename);
     308    logger.info("*** Wrote file: " + filename);
    297309
    298310    return filename;
     
    321333    }
    322334
    323     System.err.println("*** Wrote file: " + filename);
     335    logger.info("*** Wrote file: " + filename);
    324336    return filename;
    325337    }
     
    341353     */
    342354    public static void main(String args[]) {
     355    SafeProcess.DEBUG = 1;
     356   
    343357    if(args.length >= 2) {
    344358        printUsage();
     
    356370        SummaryTool listing = new SummaryTool(mongodb, outFolder);
    357371
    358         System.out.println("*************************************");
     372        logger.info("*************************************");
    359373       
    360374       
     
    362376        File domainsFile = new File(args[0]);
    363377        if(!domainsFile.exists()) {
    364             System.err.println("File " + domainsFile + " does not exist");
     378            logger.info("File " + domainsFile + " does not exist");
    365379            System.exit(-1);
    366380        }
     
    422436            File geoJsonFile = new File(geoJsonFilename);
    423437            if(!geoJsonFile.exists()) {
    424             System.err.println("@@@ geoJson file " + geoJsonFilename + " not generated!");
     438            logger.info("@@@ geoJson file " + geoJsonFilename + " not generated!");
    425439            continue;
    426440            }
    427441            */
    428             System.err.println("**** Wrote mapdata to file " + geoJsonFilename);
    429             //System.err.println("     Paste the file's geojson contents into "
     442            logger.info("**** Wrote mapdata to file " + geoJsonFilename);
     443            //logger.info("     Paste the file's geojson contents into "
    430444            //+ "the editor at " + CountryCodeCountsMapData.GEOJSON_MAP_TOOL_URL
    431445            //+ " to see the data arranged on a world map");           
    432             System.err.println("Total count for query: " + mapData.getTotalCount());
     446            logger.info("Total count for query: " + mapData.getTotalCount());
    433447           
    434448            // Running the command:
     
    452466            /*boolean uriEncoded = true;
    453467            String mapDataEncodedStr = mapData.getFeaturesGeoJsonString(uriEncoded);
    454             System.err.println("Encoded string: " + mapDataEncodedStr);
     468            logger.info("Encoded string: " + mapDataEncodedStr);
    455469            */
    456470           
    457             System.err.println("Data URL string: " + mapData.getAsMapURL());
    458             System.err.println();
     471            logger.info("Data URL string: " + mapData.getAsMapURL());
     472            logger.info("");
    459473            mapData.geoJsonMapScreenshot(outFolder, tablefilename);
    460             System.err.println("---");
     474            logger.info("---");
    461475
    462476            // TODO: Remove break. For debugging: breaks after first table -> map conversion.
    463             //break;
     477            break;
    464478        }
    465479       
Note: See TracChangeset for help on using the changeset viewer.