Changeset 33938
- Timestamp:
- 2020-02-17T16:10:00+13:00 (4 years ago)
- Location:
- other-projects/maori-lang-detection
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/conf/log4j.properties.in
r33643 r33938 24 24 25 25 # We're saying our log "mldlog" is some file that gets appended to, 26 # and it will write it out to "maori-lang-detector.log" located in whatever we defined the "logpath" to be 26 # and that log messages from DEBUG level onwards should be writen out to 27 # - "maori-lang-detector.log" located in whatever we defined the "logpath" to be 28 # - the console with StdErr 27 29 28 30 # Old way of logging: -
other-projects/maori-lang-detection/src/org/greenstone/atea/CountryCodeCountsMapData.java
r33926 r33938 159 159 private final File outputFolder; 160 160 161 /** The string version of the Feature geojson generated */ 162 private String featuresGeojsonString; 163 161 164 // Used to create screenshots 162 165 private final String MONITOR_RESOLUTION = "1920,1080"; // format: "x,y" … … 292 295 String fileContents = str.substring(0, str.length()-2) + "]"; 293 296 294 // System.err.println("Got file:\n" + fileContents);297 //logger.debug("Got file:\n" + fileContents); 295 298 296 299 // https://stackoverflow.com/questions/2591098/how-to-parse-json-in-java … … 324 327 */ 325 328 public Geometry toMultiPointGeoJson() { 326 //System.err.println("toGeoJSON() is not yet implemented.");327 329 328 330 List<Position> points = new LinkedList<Position>(); … … 404 406 405 407 /* 406 System.err.println("For country " + countryCode + ":");407 System.err.println("north = " + north);408 System.err.println("south = " + south);409 System.err.println("east = " + east);410 System.err.println("west = " + west + "\n");411 System.err.println("-------------");408 logger.debug("For country " + countryCode + ":"); 409 logger.debug("north = " + north); 410 logger.debug("south = " + south); 411 logger.debug("east = " + east); 412 logger.debug("west = " + west + "\n"); 413 logger.debug("-------------"); 412 414 */ 413 415 … … 446 448 // and the South pole has a latitude of -90°." 447 449 if((east + Math.abs(west)) > 360 || east > 180 || west < -180) { 448 System.err.println("For country " + countryCode + ":");449 System.err.println("north = " + north);450 System.err.println("south = " + south);451 System.err.println("east = " + east);452 System.err.println("west = " + west + "\n");450 logger.debug("For country " + countryCode + ":"); 451 logger.debug("north = " + north); 452 logger.debug("south = " + south); 453 logger.debug("east = " + east); 454 logger.debug("west = " + west + "\n"); 453 455 454 456 int half_width = HISTOGRAM_WIDTH/2; // reset half_width … … 458 460 //v_tmp_count = Math.ceil(v_tmp_count); 459 461 double h_tmp_count = v_tmp_count; 460 System.err.println("count = " + count);461 System.err.println("v = " + v_tmp_count);462 System.err.println("h = " + h_tmp_count);463 System.err.println("lat = " + lat);464 System.err.println("lng = " + lng + "\n");462 logger.debug("count = " + count); 463 logger.debug("v = " + v_tmp_count); 464 logger.debug("h = " + h_tmp_count); 465 logger.debug("lat = " + lat); 466 logger.debug("lng = " + lng + "\n"); 465 467 466 468 if(h_tmp_count > 90) { // 360 max width, of which each longitude … … 471 473 // that does not go over 90+90 = 180 max. Vertical_factor is 1. 472 474 473 System.err.println("Out of longitude range. Attempting to compensate...");475 logger.debug("Out of longitude range. Attempting to compensate..."); 474 476 475 477 double diff = h_tmp_count - 80.0; // actually 90 wraps on geojson tools, 80 doesn't … … 478 480 479 481 if(v_tmp_count > 180 || h_tmp_count > 90) { 480 System.err.println("Warning: still exceeded max latitude and/or longitude range");482 logger.warn("Warning: still exceeded max latitude and/or longitude range"); 481 483 } 482 484 483 485 } 484 486 485 System.err.println("Recalculating polygon for country with high count: " + countryCode + ".");486 System.err.println("count = " + count);487 System.err.println("v = " + v_tmp_count);488 System.err.println("h = " + h_tmp_count);489 System.err.println("lat = " + lat);490 System.err.println("lng = " + lng + "\n");487 logger.debug("Recalculating polygon for country with high count: " + countryCode + "."); 488 logger.debug("count = " + count); 489 logger.debug("v = " + v_tmp_count); 490 logger.debug("h = " + h_tmp_count); 491 logger.debug("lat = " + lat); 492 logger.debug("lng = " + lng + "\n"); 491 493 492 494 … … 497 499 498 500 /* 499 System.err.println("north = " + north);500 System.err.println("south = " + south);501 System.err.println("east = " + east);502 System.err.println("west = " + west + "\n");501 logger.debug("north = " + north); 502 logger.debug("south = " + south); 503 logger.debug("east = " + east); 504 logger.debug("west = " + west + "\n"); 503 505 */ 504 506 … … 538 540 539 541 if(recalculated) { 540 System.err.println("\nnorth = " + north);541 System.err.println("south = " + south);542 System.err.println("east = " + east);543 System.err.println("west = " + west);542 logger.debug("\nnorth = " + north); 543 logger.debug("south = " + south); 544 logger.debug("east = " + east); 545 logger.debug("west = " + west); 544 546 545 547 … … 568 570 String multiPointGeojsonString = FeatureConverter.toStringValue(geometry); 569 571 if(displayMapData == PRINT_MAPDATA_TO_SCREEN) { 570 System.err.println("\nMap data as MultiPoint geometry:\n" + multiPointGeojsonString + "\n");572 logger.info("\nMap data as MultiPoint geometry:\n" + multiPointGeojsonString + "\n"); 571 573 } 572 574 try ( … … 575 577 576 578 // Some basic re-formatting for some immediate legibility 577 // But pasting the contents of the file (or the System.err output above)579 // But pasting the contents of the file (or the System.err/logger.info output above) 578 580 // directly into http://geojson.tools/ or http://geojson.io/ 579 581 // will instantly reformat the json perfectly anyway. … … 605 607 606 608 FeatureCollection featureColl = this.toFeatureCollection(); 607 StringfeaturesGeojsonString = FeatureConverter.toStringValue(featureColl);609 this.featuresGeojsonString = FeatureConverter.toStringValue(featureColl); 608 610 if(displayMapData == PRINT_MAPDATA_TO_SCREEN) { 609 System.err.println("\nMap data as featurecollection:\n" + featuresGeojsonString + "\n");611 logger.info("\nMap data as featurecollection:\n" + featuresGeojsonString + "\n"); 610 612 } 611 613 try ( … … 626 628 627 629 public String getFeaturesGeoJsonString(boolean uriEncoded) { 628 String featuresGeojsonString = FeatureConverter.toStringValue(this.toFeatureCollection()); 630 if(this.featuresGeojsonString == null) { 631 this.featuresGeojsonString = FeatureConverter.toStringValue(this.toFeatureCollection()); 632 } 629 633 if(uriEncoded) { 630 634 // Want to return encodeURIComponent(JSON.stringify(featuresGeojsonString)); … … 725 729 System.err.println(); 726 730 727 SafeProcess proc = new SafeProcess(cmdArgs); 731 SafeProcess proc = new SafeProcess(cmdArgs); 728 732 729 733 int retVal = proc.runProcess(); … … 770 774 File countsFile = new File(args[0]); 771 775 if(!countsFile.exists()) { 772 System.err.println("File " + countsFile + " does not exist");776 logger.error("File " + countsFile + " does not exist"); 773 777 System.exit(-1); 774 778 } … … 779 783 String featuresOutFileName = mapData.writeFeaturesGeoJsonToFile(); 780 784 781 System.err.println("***********\nWrote mapdata to files " + multipointOutFileName785 logger.info("***********\nWrote mapdata to files " + multipointOutFileName 782 786 + " and " + featuresOutFileName); 783 System.err.println("You can paste the geojson contents of either of these files into "787 logger.info("You can paste the geojson contents of either of these files into " 784 788 + " the editor at " + GEOJSON_MAP_TOOL_URL 785 789 + " to see the data arranged on a world map"); 786 790 787 System.err.println("Total count for query: " + mapData.getTotalCount());791 logger.info("Total count for query: " + mapData.getTotalCount()); 788 792 789 793 } catch(Exception e) { -
other-projects/maori-lang-detection/src/org/greenstone/atea/SummaryTool.java
r33926 r33938 69 69 writeURLsToFile(urlsList, outFile, urlsList.size()); 70 70 71 System.out.println("Wrote all isMRI web page URLs for the sites in input domainsFile\ninto file: "71 logger.info("Wrote all isMRI web page URLs for the sites in input domainsFile\ninto file: " 72 72 + Utility.getFilePath(outFile)); 73 73 } … … 78 78 writeURLsToFile(urlsList, outFile, urlsList.size()); 79 79 80 System.out.println("Wrote all containsMRI web page URLs for the sites in input domainsFile\ninto file: "80 logger.info("Wrote all containsMRI web page URLs for the sites in input domainsFile\ninto file: " 81 81 + Utility.getFilePath(outFile)); 82 82 } … … 109 109 // Print out whether there were no isMRI pages for the domain (only containsMRI). A useful thing to know 110 110 if(moreURLs.size() == 0 && filterType == MongoDBQueryer.IS_MRI) { 111 System.out.println(" " + countryCode + " domain " + domain + " had no webpages where isMRI=true - only containsMRI."); 111 logger.info(" " + countryCode + " domain " + domain 112 + " had no webpages where isMRI=true - only containsMRI."); 112 113 } 113 114 … … 119 120 } 120 121 } 121 System.err.println("");122 logger.info(""); 122 123 } catch(Exception e) { 123 124 logger.error("Unable to read URLs from file " + Utility.getFilePath(domainsFile)); … … 158 159 String filterName = (filterType == MongoDBQueryer.IS_MRI) ? "isMRI" : "containsMRI"; 159 160 File outFile = new File(outFolder, filterName+"_full_"+domainsFile.getName()); 160 161 String fullFileName = Utility.getFilePath(outFile); 162 if(outFile.exists()) { 163 logger.error("WARN: Full web page urls list file already exists: " + fullFileName); 164 logger.error(" Will not overwrite. Delete file manually and re-run this program."); 165 return; 166 } 161 167 writeURLsToFile(urlsList, outFile, N_totalNumPages); 162 System.out.println("Wrote out full listing of web page URLs for sites in input domainsFile"163 + "\ninto file: " + Utility.getFilePath(outFile));168 logger.info("Wrote out full listing of web page URLs for sites in input domainsFile" 169 + "\ninto file: " + fullFileName); 164 170 165 171 // 3. calculate sample size n for population size N if using 90% confidence and 5% margin of error 166 172 int n_numSampleURLs = calcSampleSize(N_totalNumPages); 167 173 168 System.err.println("*** N, total number of web pages for which " + filterName + "=true from domain shortlist file: " + N_totalNumPages);169 System.err.println(" (out of " + mongodbQueryer.countOfWebpagesMatching(filterType)174 logger.info("*** N, total number of web pages for which " + filterName + "=true from domain shortlist file: " + N_totalNumPages); 175 logger.info(" (out of " + mongodbQueryer.countOfWebpagesMatching(filterType) 170 176 + " web pages across ALL sites for which " + filterName + " = true)"); 171 System.err.println("*** n, sample size of web page URLs: " + n_numSampleURLs);177 logger.info("*** n, sample size of web page URLs: " + n_numSampleURLs); 172 178 173 179 // 4. Shuffle all the URLs and write the first n (sample size) URLs to a file … … 175 181 // https://stackoverflow.com/questions/6284589/setting-a-seed-to-shuffle-arraylist-in-java-deterministically 176 182 Collections.shuffle(urlsList, new Random(FIXED_SEED)); 177 183 fullFileName = Utility.getFilePath(outFile); 178 184 outFile = new File(outFolder, "random"+n_numSampleURLs+"_"+domainsFile.getName()); 185 186 if(outFile.exists()) { 187 logger.error("WARN: Full web page urls list file already exists: " + fullFileName); 188 logger.error(" Will not overwrite. Delete file manually and re-run this program."); 189 return; 190 } 179 191 writeURLsToFile(urlsList, outFile, n_numSampleURLs); 180 System.out.println("Wrote a sample of n=" + n_numSampleURLs + " of web page URLs "181 + "for the sites in input domainsFile\ninto file: " + Utility.getFilePath(outFile));192 logger.info("Wrote a sample of n=" + n_numSampleURLs + " of web page URLs " 193 + "for the sites in input domainsFile\ninto file: " + fullFileName); 182 194 183 195 // For N = 6557, z-alpha-over-2 = 1.6449 and m = 0.05 (5%), … … 223 235 Tuple urlInfo = urlsList.get(i); 224 236 225 // System.out.println(list.get(i));237 //logger.debug(list.get(i)); 226 238 writer.write(urlInfo + "\n"); // calls toString() on tuple of url -> countryCode 227 239 } … … 261 273 } 262 274 263 System.err.println("*** Wrote file: " + filename);275 logger.info("*** Wrote file: " + filename); 264 276 265 277 return filename; … … 294 306 } 295 307 296 System.err.println("*** Wrote file: " + filename);308 logger.info("*** Wrote file: " + filename); 297 309 298 310 return filename; … … 321 333 } 322 334 323 System.err.println("*** Wrote file: " + filename);335 logger.info("*** Wrote file: " + filename); 324 336 return filename; 325 337 } … … 341 353 */ 342 354 public static void main(String args[]) { 355 SafeProcess.DEBUG = 1; 356 343 357 if(args.length >= 2) { 344 358 printUsage(); … … 356 370 SummaryTool listing = new SummaryTool(mongodb, outFolder); 357 371 358 System.out.println("*************************************");372 logger.info("*************************************"); 359 373 360 374 … … 362 376 File domainsFile = new File(args[0]); 363 377 if(!domainsFile.exists()) { 364 System.err.println("File " + domainsFile + " does not exist");378 logger.info("File " + domainsFile + " does not exist"); 365 379 System.exit(-1); 366 380 } … … 422 436 File geoJsonFile = new File(geoJsonFilename); 423 437 if(!geoJsonFile.exists()) { 424 System.err.println("@@@ geoJson file " + geoJsonFilename + " not generated!");438 logger.info("@@@ geoJson file " + geoJsonFilename + " not generated!"); 425 439 continue; 426 440 } 427 441 */ 428 System.err.println("**** Wrote mapdata to file " + geoJsonFilename);429 // System.err.println(" Paste the file's geojson contents into "442 logger.info("**** Wrote mapdata to file " + geoJsonFilename); 443 //logger.info(" Paste the file's geojson contents into " 430 444 //+ "the editor at " + CountryCodeCountsMapData.GEOJSON_MAP_TOOL_URL 431 445 //+ " to see the data arranged on a world map"); 432 System.err.println("Total count for query: " + mapData.getTotalCount());446 logger.info("Total count for query: " + mapData.getTotalCount()); 433 447 434 448 // Running the command: … … 452 466 /*boolean uriEncoded = true; 453 467 String mapDataEncodedStr = mapData.getFeaturesGeoJsonString(uriEncoded); 454 System.err.println("Encoded string: " + mapDataEncodedStr);468 logger.info("Encoded string: " + mapDataEncodedStr); 455 469 */ 456 470 457 System.err.println("Data URL string: " + mapData.getAsMapURL());458 System.err.println();471 logger.info("Data URL string: " + mapData.getAsMapURL()); 472 logger.info(""); 459 473 mapData.geoJsonMapScreenshot(outFolder, tablefilename); 460 System.err.println("---");474 logger.info("---"); 461 475 462 476 // TODO: Remove break. For debugging: breaks after first table -> map conversion. 463 //break;477 break; 464 478 } 465 479
Note:
See TracChangeset
for help on using the changeset viewer.