Changeset 33869
- Timestamp:
- 2020-01-23T22:59:46+13:00 (4 years ago)
- Location:
- other-projects/maori-lang-detection/src/org/greenstone/atea
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/src/org/greenstone/atea/CountryCodeCountsMapData.java
r33867 r33869 601 601 try { 602 602 File countsFile = new File(args[0]); 603 603 if(!countsFile.exists()) { 604 System.err.println("File " + countsFile + " does not exist"); 605 System.exit(-1); 606 } 607 604 608 CountryCodeCountsMapData mapData = new CountryCodeCountsMapData(args[0]); 605 609 -
other-projects/maori-lang-detection/src/org/greenstone/atea/MongoDBAccess.java
r33653 r33869 2 2 3 3 //import org.bson.BSONObject; 4 4 5 5 import com.mongodb.client.MongoCollection; 6 6 import com.mongodb.client.MongoDatabase; 7 7 //import com.mongodb.client.MongoIterable; 8 9 // to use collection.find() filters like eq(), regex() etc 10 import static com.mongodb.client.model.Filters.*; 11 // to use collection.find().projection() filters like include() etc 12 import static com.mongodb.client.model.Projections.*; 13 14 //import org.bson.conversions.Bson; 8 15 import com.mongodb.BasicDBObject; 9 16 import com.mongodb.MongoClient; … … 11 18 import com.mongodb.ServerAddress; 12 19 import com.mongodb.MongoClientOptions; 20 21 import com.mongodb.Block; 13 22 14 23 import org.bson.Document; … … 20 29 import java.util.List; 21 30 import java.util.Properties; 22 31 import java.util.regex.Pattern; 23 32 24 33 import org.apache.log4j.Logger; … … 58 67 59 68 static final String PROPS_FILENAME = "config.properties"; 60 public static final String WEBPAGES_COLLECTION = " webpages";61 public static final String WEBSITES_COLLECTION = " websites";69 public static final String WEBPAGES_COLLECTION = "Webpages"; 70 public static final String WEBSITES_COLLECTION = "Websites"; 62 71 63 72 // configuration details, some with fallback values … … 256 265 */ 257 266 267 /** 268 * Java mongodb find: https://mongodb.github.io/mongo-java-driver/3.4/driver/getting-started/quick-start/ 269 * Java mongodb find filters: https://mongodb.github.io/mongo-java-driver/3.4/javadoc/?com/mongodb/client/model/Filters.html 270 * Java mongodb projection: https://stackoverflow.com/questions/44894497/retrieving-data-with-mongodb-java-driver-3-4-using-find-method-with-projection 271 * mongodb projection: https://docs.mongodb.com/v3.2/reference/method/db.collection.find/#db.collection.find 272 */ 273 public ArrayList<String> queryAllMatchingIsMRIURLs(String domain) { 274 275 final ArrayList<String> urlsList = new ArrayList<String>(); 276 277 // load the "webpages" db table 278 // in mongodb, the equivalent of db tables are called 'collections' 279 280 281 //Pattern pattern = Pattern.compile(".*"+domain+".*"); 282 String pattern = "/"+domain.replace(".", "\\.")+"/"; 283 284 MongoCollection<Document> collection = this.database.getCollection(WEBPAGES_COLLECTION); 285 286 287 Block<Document> storeURL = new Block<Document>() { 288 @Override 289 public void apply(final Document document) { 290 //System.out.println(document.toJson()); 291 String url = document.getString("URL"); 292 // add to our urlsList 293 urlsList.add(url); 294 } 295 }; 296 297 298 299 // do mongodb query: 300 // test example: 301 //collection.find(eq("isMRI", true)).first(); 302 // 303 // db.getCollection('Webpages').find({URL:/.*domain.*/, isMRI: true}, {URL: 1, _id: 0}) 304 collection.find(and(eq("isMRI", true), regex("URL", pattern))).projection(fields(include("URL"), excludeId())).forEach(storeURL); 305 306 307 return urlsList; 308 } 309 258 310 /** https://stackoverflow.com/questions/19938153/do-i-need-to-explicitly-close-connection */ 259 311 public void close() {}
Note:
See TracChangeset
for help on using the changeset viewer.