Changeset 33982


Ignore:
Timestamp:
2020-02-26T21:59:55+13:00 (4 years ago)
Author:
ak19
Message:

SummaryTool.java now processed the handcrafted UNIQUE domains counts file for the manually shortlisted domains where at least one webpage contained at least one genuine MRI sentence.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/src/org/greenstone/atea/SummaryTool.java

    r33981 r33982  
    391391        //String containsMRIFile = listing.produceURLsForPagesContainingMRI(domainsFile);
    392392
    393 
    394         // TODO: for NZ, do IS_MRI. For overseas still CONTAINS_MRI
    395         // then also do the shuffle to gen X num of random web page URLs.
    396         //String filename = listing.webPagesOfAllNZSitesAndDomainListing(domainsFile);
     393        // 1. generate full listing of all web page URLs for all domains in input domain file
     394        // 2. AND generate random listing of web page URLs from (1) of appropriate sample size
    397395        listing.mriWebPageListingForDomainListing(domainsFile);
    398396
     
    418416        // TODO: generate the tables
    419417
    420         String[] tableFileNames = mongodb.writeTables(outFolder);
     418        String[] tableFiles = mongodb.writeTables(outFolder);
     419        //List tableFileNames = Arrays.asList(tableFiles);
     420        List<String> tableFileNames = new ArrayList<String>();
     421        for(int i = 0; i < tableFiles.length; i++) {
     422            tableFileNames.add(tableFiles[i]);
     423        }
     424
     425        // Add in the manually created counts (.json) file of manually shortlisted
     426        // (unique) domains which genuinely contain at least one MRI sentence.
     427        // It may not exist unless the user created this file by hand
     428        // AND copied it into the outFolder, "mongodb-data-auto"
     429        tableFileNames.add("6counts_sitesWithPagesContainingMRI_manualShortlist");
     430           
     431       
    421432        // for each table file name, generate the geojson-features .json file
    422433        // that GEOJSON_MAP_TOOL_URL takes as input to produce a map.
     
    424435        ///CountryCodeCountsMapData.openFirefox();
    425436       
    426         for(int i = 1; i < tableFileNames.length; i++) { // empty element at 0
    427             String tablefilename = tableFileNames[i] + ".json"; // filenames have no suffix
     437        for(int i = 1; i < tableFileNames.size(); i++) { // empty element at 0
     438            String tablefilename = tableFileNames.get(i) + ".json"; //tableFileNames[i] + ".json"; // filenames have no suffix
    428439           
    429440            File countsTableFile = new File(outFolder, tablefilename);
    430441            if(!countsTableFile.exists()) {
    431442            logger.error("@@@ File " + countsTableFile + " does not exist!");
    432             logger.error("@@@ Can't generate map date for this.");
     443            logger.error("@@@ Can't generate map data for this.");
    433444            continue;
    434445            }
     
    436447            CountryCodeCountsMapData mapData
    437448            = new CountryCodeCountsMapData(countsTableFilename);
    438             String geoJsonFilename = mapData.writeFeaturesGeoJsonToFile(CountryCodeCountsMapData.SUPPRESS_MAPDATA_DISPLAY);
    439            
    440             // Ensuring the geo-json file generated exists
    441             //String geoJsonFilename = outFolder + File.separator
    442             //+ GEOJSON_FEATURES_FILE_PREFIX + tablefilename;           
     449            String geoJsonFilename = mapData.writeFeaturesGeoJsonToFile(CountryCodeCountsMapData.SUPPRESS_MAPDATA_DISPLAY);         
     450                   
    443451            File geoJsonFile = new File(geoJsonFilename);
    444452            if(!geoJsonFile.exists()) {
Note: See TracChangeset for help on using the changeset viewer.