Changeset 33982

Show
Ignore:
Timestamp:
26.02.2020 21:59:55 (5 weeks ago)
Author:
ak19
Message:

SummaryTool?.java now processed the handcrafted UNIQUE domains counts file for the manually shortlisted domains where at least one webpage contained at least one genuine MRI sentence.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/src/org/greenstone/atea/SummaryTool.java

    r33981 r33982  
    391391        //String containsMRIFile = listing.produceURLsForPagesContainingMRI(domainsFile); 
    392392 
    393  
    394         // TODO: for NZ, do IS_MRI. For overseas still CONTAINS_MRI 
    395         // then also do the shuffle to gen X num of random web page URLs. 
    396         //String filename = listing.webPagesOfAllNZSitesAndDomainListing(domainsFile); 
     393        // 1. generate full listing of all web page URLs for all domains in input domain file 
     394        // 2. AND generate random listing of web page URLs from (1) of appropriate sample size 
    397395        listing.mriWebPageListingForDomainListing(domainsFile); 
    398396 
     
    418416        // TODO: generate the tables 
    419417 
    420         String[] tableFileNames = mongodb.writeTables(outFolder); 
     418        String[] tableFiles = mongodb.writeTables(outFolder); 
     419        //List tableFileNames = Arrays.asList(tableFiles); 
     420        List<String> tableFileNames = new ArrayList<String>(); 
     421        for(int i = 0; i < tableFiles.length; i++) { 
     422            tableFileNames.add(tableFiles[i]); 
     423        } 
     424 
     425        // Add in the manually created counts (.json) file of manually shortlisted 
     426        // (unique) domains which genuinely contain at least one MRI sentence. 
     427        // It may not exist unless the user created this file by hand 
     428        // AND copied it into the outFolder, "mongodb-data-auto" 
     429        tableFileNames.add("6counts_sitesWithPagesContainingMRI_manualShortlist"); 
     430             
     431         
    421432        // for each table file name, generate the geojson-features .json file 
    422433        // that GEOJSON_MAP_TOOL_URL takes as input to produce a map. 
     
    424435        ///CountryCodeCountsMapData.openFirefox(); 
    425436         
    426         for(int i = 1; i < tableFileNames.length; i++) { // empty element at 0 
    427             String tablefilename = tableFileNames[i] + ".json"; // filenames have no suffix 
     437        for(int i = 1; i < tableFileNames.size(); i++) { // empty element at 0 
     438            String tablefilename = tableFileNames.get(i) + ".json"; //tableFileNames[i] + ".json"; // filenames have no suffix 
    428439             
    429440            File countsTableFile = new File(outFolder, tablefilename); 
    430441            if(!countsTableFile.exists()) { 
    431442            logger.error("@@@ File " + countsTableFile + " does not exist!"); 
    432             logger.error("@@@ Can't generate map date for this."); 
     443            logger.error("@@@ Can't generate map data for this."); 
    433444            continue; 
    434445            } 
     
    436447            CountryCodeCountsMapData mapData 
    437448            = new CountryCodeCountsMapData(countsTableFilename); 
    438             String geoJsonFilename = mapData.writeFeaturesGeoJsonToFile(CountryCodeCountsMapData.SUPPRESS_MAPDATA_DISPLAY); 
    439              
    440             // Ensuring the geo-json file generated exists 
    441             //String geoJsonFilename = outFolder + File.separator 
    442             //+ GEOJSON_FEATURES_FILE_PREFIX + tablefilename;            
     449            String geoJsonFilename = mapData.writeFeaturesGeoJsonToFile(CountryCodeCountsMapData.SUPPRESS_MAPDATA_DISPLAY);          
     450                     
    443451            File geoJsonFile = new File(geoJsonFilename); 
    444452            if(!geoJsonFile.exists()) {