- Timestamp:
- 2020-02-13T22:40:41+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/src/org/greenstone/atea/SummaryTool.java
r33917 r33919 8 8 /** 9 9 * Runs some of the important mongoDB queries I ran. 10 * 11 * This program expects a folder ../mongo-data-auto to exist. 10 12 * 11 13 * TO COMPILE OR RUN, FIRST DO: … … 26 28 */ 27 29 public class SummaryTool { 30 28 31 static Logger logger = Logger.getLogger(org.greenstone.atea.SummaryTool.class.getName()); 32 33 34 static private final String GEOJSON_FEATURES_FILE_PREFIX = "geojson-features_"; 35 29 36 static private final long FIXED_SEED = 1000; 30 37 31 38 private final MongoDBQueryer mongodbQueryer; 32 39 private File outFolder; 33 34 40 35 41 … … 101 107 // Print out whether there were no isMRI pages for the domain (only containsMRI). A useful thing to know 102 108 if(moreURLs.size() == 0 && filterType == MongoDBQueryer.IS_MRI) { 103 System.out.println(" " + countryCode + " domain " + domain + " had no isMRI webpages- only containsMRI.");109 System.out.println(" " + countryCode + " domain " + domain + " had no webpages where isMRI=true - only containsMRI."); 104 110 } 105 111 … … 149 155 //File outFolder = domainsFile.getParentFile(); 150 156 String filterName = (filterType == MongoDBQueryer.IS_MRI) ? "isMRI" : "containsMRI"; 151 File outFile = new File(outFolder, filterName+"_ "+domainsFile.getName());157 File outFile = new File(outFolder, filterName+"_full_"+domainsFile.getName()); 152 158 153 159 writeURLsToFile(urlsList, outFile, N_totalNumPages); … … 158 164 int n_numSampleURLs = calcSampleSize(N_totalNumPages); 159 165 160 System.err.println("*** N, total number of web pages for which " + filterName + "=true from domain shortlist : " + N_totalNumPages);166 System.err.println("*** N, total number of web pages for which " + filterName + "=true from domain shortlist file: " + N_totalNumPages); 161 167 System.err.println(" (out of " + mongodbQueryer.countOfWebpagesMatching(filterType) 162 168 + " web pages across ALL sites for which " + filterName + " = true)"); … … 172 178 System.out.println("Wrote a sample of n=" + n_numSampleURLs + " of web page URLs " 173 179 + "for the sites in input domainsFile\ninto file: " + Utility.getFilePath(outFile)); 180 181 // For N = 6557, z-alpha-over-2 = 1.6449 and m = 0.05 (5%), 182 // n = (z-alpha-over-2^2 x N) / (z-alpha-over-2^2 + 4 x (N-1) x m^2) 183 // = (1.6449^2Ã6557) ÷ (1.6449^2 + 4 à 6556Ã0.05^2) = 259.88526851 => 260 rounded up. Check. 174 184 } 175 185 … … 384 394 // TODO: generate the tables 385 395 386 mongodb.writeTables(outFolder); 396 String[] tableFileNames = mongodb.writeTables(outFolder); 397 // for each table file name, generate the geojson-features .json file 398 // that GEOJSON_MAP_TOOL_URL takes as input to produce a map. 399 400 for(int i = 1; i < tableFileNames.length; i++) { // empty element at 0 401 String tablefilename = tableFileNames[i] + ".json"; // filenames have no suffix 402 403 File countsTableFile = new File(outFolder, tablefilename); 404 if(!countsTableFile.exists()) { 405 logger.error("@@@ File " + countsTableFile + " does not exist!"); 406 logger.error("@@@ Can't generate map date for this."); 407 continue; 408 } 409 String countsTableFilename = outFolder + File.separator + tablefilename; 410 CountryCodeCountsMapData mapData 411 = new CountryCodeCountsMapData(countsTableFilename); 412 String geoJsonFilename = mapData.writeFeaturesGeoJsonToFile(CountryCodeCountsMapData.SUPPRESS_MAPDATA_DISPLAY); 413 414 415 /* 416 // Ensure the geo-json file generated exists 417 //String geoJsonFilename = outFolder + File.separator 418 //+ GEOJSON_FEATURES_FILE_PREFIX + tablefilename; 419 420 File geoJsonFile = new File(geoJsonFilename); 421 if(!geoJsonFile.exists()) { 422 System.err.println("@@@ geoJson file " + geoJsonFilename + " not generated!"); 423 continue; 424 } 425 */ 426 System.err.println("**** Wrote mapdata to file " + geoJsonFilename); 427 //System.err.println(" Paste the file's geojson contents into " 428 //+ "the editor at " + CountryCodeCountsMapData.GEOJSON_MAP_TOOL_URL 429 //+ " to see the data arranged on a world map"); 430 System.err.println("Total count for query: " + mapData.getTotalCount()); 431 432 /*boolean uriEncoded = true; 433 String mapDataEncodedStr = mapData.getFeaturesGeoJsonString(uriEncoded); 434 System.err.println("Encoded string: " + mapDataEncodedStr); 435 */ 436 437 System.err.println("Data URL string: " + mapData.getAsMapURL()); 438 System.err.println(); 439 mapData.geoJsonMapScreenshot(outFolder, tablefilename); 440 System.err.println("---"); 441 442 // TODO: breaks after first table -> map conversion 443 break; 444 } 445 387 446 } 388 447
Note:
See TracChangeset
for help on using the changeset viewer.