- Timestamp:
- 2020-01-24T20:48:17+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/src/org/greenstone/atea/MongoDBAccess.java
r33869 r33870 270 270 * Java mongodb projection: https://stackoverflow.com/questions/44894497/retrieving-data-with-mongodb-java-driver-3-4-using-find-method-with-projection 271 271 * mongodb projection: https://docs.mongodb.com/v3.2/reference/method/db.collection.find/#db.collection.find 272 * 273 * Parse MongoDB query into Java: https://stackoverflow.com/questions/17326747/parsing-strings-to-mongodb-query-documents-with-operators-in-java 274 * Maybe also https://stackoverflow.com/questions/48000891/parse-mongodb-json-query-in-java-with-multiple-criteria 275 * https://stackoverflow.com/questions/55029222/parse-mongodb-query-to-java 276 * http://pingax.com/trick-convert-mongo-shell-query-equivalent-java-objects/ 272 277 */ 273 public ArrayList<String> queryAllMatchingIsMRIURLs(String domain) {278 public ArrayList<String> oldWorks_queryAllMatchingIsMRIURLs(String domain) { 274 279 275 280 final ArrayList<String> urlsList = new ArrayList<String>(); 281 282 // remove any http(s)://(www.) from the start of URL first 283 // since it goes into a regex 284 domain = Utility.stripProtocolAndWWWFromURL(domain); 276 285 277 286 // load the "webpages" db table … … 280 289 281 290 //Pattern pattern = Pattern.compile(".*"+domain+".*"); 291 292 // escape dots in domain for regex 282 293 String pattern = "/"+domain.replace(".", "\\.")+"/"; 283 294 … … 291 302 String url = document.getString("URL"); 292 303 // add to our urlsList 304 System.out.println(url); 293 305 urlsList.add(url); 294 306 } … … 301 313 //collection.find(eq("isMRI", true)).first(); 302 314 // 303 // db.getCollection('Webpages').find({URL: /.*domain.*/, isMRI: true}, {URL: 1, _id: 0})315 // db.getCollection('Webpages').find({URL: /domain/, isMRI: true}, {URL: 1, _id: 0}) 304 316 collection.find(and(eq("isMRI", true), regex("URL", pattern))).projection(fields(include("URL"), excludeId())).forEach(storeURL); 305 317 … … 307 319 return urlsList; 308 320 } 321 322 public ArrayList<String> queryAllMatchingIsMRIURLs(String domain) { 323 324 final ArrayList<String> urlsList = new ArrayList<String>(); 325 326 // remove any http(s)://(www.) from the start of URL first 327 // since it goes into a regex 328 domain = Utility.stripProtocolAndWWWFromURL(domain); 329 330 // load the "webpages" db table 331 // in mongodb, the equivalent of db tables are called 'collections' 332 333 334 //Pattern pattern = Pattern.compile(".*"+domain+".*"); 335 336 // escape dots in domain for regex 337 String pattern = "/"+domain.replace(".", "\\.")+"/"; 338 339 MongoCollection<Document> collection = this.database.getCollection(WEBPAGES_COLLECTION); 340 341 342 Block<Document> storeURL = new Block<Document>() { 343 @Override 344 public void apply(final Document document) { 345 //System.out.println(document.toJson()); 346 String url = document.getString("URL"); 347 // add to our urlsList 348 System.out.println(url); 349 urlsList.add(url); 350 } 351 }; 352 353 354 355 // do mongodb query 356 // db.getCollection('Webpages').find({URL: /domain/, isMRI: true}, {URL: 1, _id: 0}) 357 String query = "{URL: /DOMAIN/, isMRI: true}"; 358 query = query.replace("DOMAIN", domain); 359 360 BasicDBObject findObj = BasicDBObject.parse(query); 361 BasicDBObject projectionObj = BasicDBObject.parse("{URL: 1, _id: 0}"); 362 363 //collection.find(and(eq("isMRI", true), regex("URL", pattern))).projection(fields(include("URL"), excludeId())).forEach(storeURL); 364 collection.find(findObj).projection(projectionObj).forEach(storeURL); 365 366 return urlsList; 367 } 368 369 309 370 310 371 /** https://stackoverflow.com/questions/19938153/do-i-need-to-explicitly-close-connection */
Note:
See TracChangeset
for help on using the changeset viewer.