Changeset 33881 for other-projects
- Timestamp:
- 2020-01-30T22:08:00+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/src/org/greenstone/atea/MongoDBAccess.java
r33880 r33881 42 42 import java.io.FileReader; 43 43 import java.io.IOException; 44 import java.io.UncheckedIOException; 44 45 import java.io.Writer; 45 46 … … 321 322 322 323 // code we'll execute in Iterable.forEach() below 324 // see also https://www.baeldung.com/foreach-java 323 325 Block<Document> storeURL = new Block<Document>() { 324 326 @Override … … 362 364 } 363 365 364 /** 365 The mongodb aggregate() we want to run this time: 366 366 /** 367 * RUNNING A MONGODB COLLECTION.AGGREGATE() in JAVA: 368 * 369 * https://stackoverflow.com/questions/31643109/mongodb-aggregation-with-java-driver 370 * https://stackoverflow.com/questions/48000891/parse-mongodb-json-query-in-java-with-multiple-criteria 371 * Not Java: https://stackoverflow.com/questions/39060221/a-pipeline-stage-specification-object-must-contain-exactly-one-field-with-php-mo 372 * 373 * (https://stackoverflow.com/questions/55029222/parse-mongodb-query-to-java) 374 * https://www.programcreek.com/java-api-examples/?api=com.mongodb.client.model.Aggregates 375 * On using group(TExpression) inside collection.aggregate(). 376 * 377 * For forEach lamba expressions, see also https://www.baeldung.com/foreach-java 378 * and https://www.javatpoint.com/java-8-foreach 379 * and https://stackoverflow.com/questions/47979978/ambiguous-reference-to-foreach-when-listing-mongodbs-database-in-java 380 * 381 * 382 * The mongodb aggregate() we want to run this time: 383 * 367 384 db.Websites.aggregate([ 368 385 { … … 400 417 orQuery); 401 418 402 AggregateIterable<Document> output 403 = collection.aggregate(Arrays.asList( 419 // Hopefully the lambda expression (forEach()) at end means 420 // we write out each result Document as we get it 421 collection.aggregate(Arrays.asList( 404 422 match(andQuery), 405 423 unwind("$geoLocationCountryCode"), 406 424 group("NZ", Arrays.asList(sum("count", 1), addToSet("domain", "$domain"))), 407 425 sort(BasicDBObject.parse("{count : -1}")) 408 )); 409 410 // should only have one doc 411 for (Document doc : output) { 412 //System.out.println(doc); 413 System.out.println(doc.toJson()); 414 // https://mongodb.github.io/mongo-java-driver/3.9/javadoc/index.html?org/bson/json/JsonWriterSettings.html 415 //writer.write(doc.toJson(new JsonWriterSettings(JsonMode.STRICT, true/*indent*/)) + NEWLINE); 416 /* 417 JsonWriterSettings writeSettings = new JsonWriterSettings(); 418 writeSettings.builder().outputMode(JsonMode.SHELL).indent(true).build(); 419 writer.write(doc.toJson(writeSettings) + NEWLINE); 420 */ 421 writer.write(prettyPrintJson(doc.toJson()) + NEWLINE); 422 } 426 )).forEach((Block<Document>)doc -> writeDoc(doc, writer)); 427 428 // should only have one doc for NZ since it's a count by geolocation. 423 429 424 430 return; … … 426 432 427 433 /** 428 RUNNING A MONGODB COLLECTION.AGGREGATE() in JAVA: 429 430 https://stackoverflow.com/questions/31643109/mongodb-aggregation-with-java-driver 431 https://stackoverflow.com/questions/48000891/parse-mongodb-json-query-in-java-with-multiple-criteria 432 Not Java: https://stackoverflow.com/questions/39060221/a-pipeline-stage-specification-object-must-contain-exactly-one-field-with-php-mo 433 434 (https://stackoverflow.com/questions/55029222/parse-mongodb-query-to-java) 435 https://www.programcreek.com/java-api-examples/?api=com.mongodb.client.model.Aggregates 436 On using group(TExpression) inside collection.aggregate(). 437 438 439 The aggregate() we want to run: 440 434 * The aggregate() we want to run this time: 435 * 441 436 db.Websites.aggregate([ 442 437 { … … 460 455 { $sort : { count : -1} } 461 456 ]); 462 463 464 https://stackoverflow.com/questions/54746814/jsonwriter-add-a-new-line465 457 */ 466 public void aggregateContainsMRIForOverseas(Writer writer) throws IOException {458 public void aggregateContainsMRIForOverseas(Writer writer) throws UncheckedIOException { 467 459 // working with the WebSites collection, not WebPages collection! 468 460 MongoCollection<Document> collection = this.database.getCollection(WEBSITES_COLLECTION); 469 461 470 /*String matchQuery =471 "$and: ["472 + "{geoLocationCountryCode: {$ne: \"NZ\"}},"473 + "{domain: {$not: /\\.nz/}},"474 + "{numPagesContainingMRI: {$gt: 0}},"475 + "{$or: [{geoLocationCountryCode: \"AU\"}, {urlContainsLangCodeInPath: false}]}"476 + "]";*/477 478 479 480 462 481 463 Bson orQuery = or( … … 488 470 BasicDBObject.parse("{numPagesContainingMRI: {$gt: 0}}"), 489 471 orQuery); 490 472 473 474 collection.aggregate(Arrays.asList( 475 match(andQuery), //match(BasicDBObject.parse(matchQuery)) 476 // match((List<DBObject>)JSON.parse(matchQuery)), 477 unwind("$geoLocationCountryCode"), 478 group("$geoLocationCountryCode", Arrays.asList(sum("count", 1), addToSet("domain", "$domain"))), 479 sort(BasicDBObject.parse("{count : -1}")) 480 )).forEach((Block<Document>)doc -> writeDoc(doc, writer)); 481 482 // casting to Block<Document> necessary because otherwise we see the error at 483 // https://stackoverflow.com/questions/47979978/ambiguous-reference-to-foreach-when-listing-mongodbs-database-in-java 484 485 // Less efficient way is to keep all the results in memory and then 486 // write them out one at a time 487 /* 491 488 AggregateIterable<Document> output 492 489 = collection.aggregate(Arrays.asList( … … 497 494 sort(BasicDBObject.parse("{count : -1}")) 498 495 )); 496 499 497 500 498 for (Document doc : output) { 501 499 //System.out.println(doc); 502 500 System.out.println(doc.toJson()); 503 // https://mongodb.github.io/mongo-java-driver/3.9/javadoc/index.html?org/bson/json/JsonWriterSettings.html 504 //writer.write(doc.toJson(new JsonWriterSettings(JsonMode.STRICT, true/*indent*/)) + NEWLINE); 505 /* 506 JsonWriterSettings writeSettings = new JsonWriterSettings(); 507 writeSettings.builder().outputMode(JsonMode.SHELL).indent(true).build(); 508 writer.write(doc.toJson(writeSettings) + NEWLINE); 509 */ 510 writer.write(prettyPrintJson(doc.toJson()) + NEWLINE); 511 } 512 501 502 } 503 */ 513 504 return; 514 505 } 515 506 516 507 /** 508 * called by lambda forEach() call on Document objects to write them out to a file. 509 * Have to deal with unreported exceptions here that can't be dealt with when doing 510 * the actual forEach(). See 511 * https://stackoverflow.com/questions/39090292/how-to-cleanly-deal-with-unreported-exception-ioexception-in-stream-foreach 512 */ 513 514 public void writeDoc(Document doc, Writer writer) throws UncheckedIOException { 515 //OLD WAY: writer.write(doc.toJson(new JsonWriterSettings(JsonMode.STRICT, true)) + NEWLINE); 516 // Can't control json output to add newlines after each array element, 517 // no matter which JsonMode is used. 518 519 // https://mongodb.github.io/mongo-java-driver/3.9/javadoc/index.html?org/bson/json/JsonWriterSettings.html 520 // Still can't control array element output, 521 // but this way uses newer mongo java driver 3.9(.1). Tried its various JsonModes too: 522 //JsonWriterSettings writeSettings = new JsonWriterSettings(); 523 //writeSettings.builder().outputMode(JsonMode.SHELL).indent(true).build(); 524 //writer.write(doc.toJson(writeSettings) + NEWLINE); 525 526 // Not the JsonWriter of mongodb java driver: 527 // https://stackoverflow.com/questions/54746814/jsonwriter-add-a-new-line 528 529 // Have to use gson's pretty print to produce a json string that contains 530 // newlines after every array element in the json: 531 String jsonStr = prettyPrintJson(doc.toJson()); 532 System.err.println(jsonStr); 533 try { 534 writer.write(jsonStr + NEWLINE); 535 } catch (IOException ex) { 536 //throw ex; 537 throw new UncheckedIOException(ex); 538 } 539 } 517 540 public String prettyPrintJson(String jsonStr) { 518 541 Gson gson = new GsonBuilder().setPrettyPrinting().create();
Note:
See TracChangeset
for help on using the changeset viewer.