Changeset 33917 for other-projects
- Timestamp:
- 2020-02-13T18:18:13+13:00 (4 years ago)
- Location:
- other-projects/maori-lang-detection/src/org/greenstone/atea
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/src/org/greenstone/atea/MongoDBQueryer.java
r33913 r33917 177 177 return urlsList; 178 178 } 179 179 180 /** 181 * Does a mongoDB query like the following, depending on filter type: 182 * db.getCollection('Webpages').find({isMRI: true}).count() 183 * @param filterType can be either IS_MRI or CONTAINS_MRI. 184 * @return the number of webpages that matched the filterType setting. 185 */ 186 public long countOfWebpagesMatching(int filterType) { 187 String query = (filterType == IS_MRI) ? "{isMRI: true}" : "{containsMRI: true}"; 188 long result = -1; 189 MongoCollection<Document> collection = getWebpagesCollection(); 190 191 192 try { 193 BasicDBObject queryObj = BasicDBObject.parse(query); 194 //result = collection.find(queryObj).count(); 195 // https://stackoverflow.com/questions/32683458/how-to-call-count-operation-after-find-with-mongodb-java-driver 196 result = collection.countDocuments(queryObj); 197 198 } catch(Exception e) { 199 logger.error("MongoDB couldn't parse provided query " + query); 200 } 201 202 return result; 203 } 204 180 205 /** 181 206 * RUNNING A MONGODB COLLECTION.AGGREGATE() in JAVA: -
other-projects/maori-lang-detection/src/org/greenstone/atea/SummaryTool.java
r33911 r33917 148 148 // 2. write all the URLs in urlsList to a file 149 149 //File outFolder = domainsFile.getParentFile(); 150 String fil eName = (filterType == MongoDBQueryer.IS_MRI) ? "isMRI_" : "containsMRI_";151 File outFile = new File(outFolder, fil eName+domainsFile.getName());150 String filterName = (filterType == MongoDBQueryer.IS_MRI) ? "isMRI" : "containsMRI"; 151 File outFile = new File(outFolder, filterName+"_"+domainsFile.getName()); 152 152 153 153 writeURLsToFile(urlsList, outFile, N_totalNumPages); … … 156 156 157 157 // 3. calculate sample size n for population size N if using 90% confidence and 5% margin of error 158 int n_numSampleURLs = calcSampleSize(N_totalNumPages); 159 160 System.err.println("*** N, total number of web pages that matched: " + N_totalNumPages); 158 int n_numSampleURLs = calcSampleSize(N_totalNumPages); 159 160 System.err.println("*** N, total number of web pages for which " + filterName + "=true from domain shortlist: " + N_totalNumPages); 161 System.err.println(" (out of " + mongodbQueryer.countOfWebpagesMatching(filterType) 162 + " web pages across ALL sites for which " + filterName + " = true)"); 161 163 System.err.println("*** n, sample size of web page URLs: " + n_numSampleURLs); 162 164
Note:
See TracChangeset
for help on using the changeset viewer.