Ignore:
Timestamp:
2020-01-30T20:21:31+13:00 (4 years ago)
Author:
ak19
Message:

Have the 2 mongodb aggregate() calls working that

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/src/org/greenstone/atea/MongoDBAccess.java

    r33876 r33879  
    352352
    353353    /**
     354       RUNNING A MONGODB COLLECTION.AGGREGATE() in JAVA:
     355
     356       https://stackoverflow.com/questions/31643109/mongodb-aggregation-with-java-driver
     357       https://stackoverflow.com/questions/48000891/parse-mongodb-json-query-in-java-with-multiple-criteria
     358       Not Java: https://stackoverflow.com/questions/39060221/a-pipeline-stage-specification-object-must-contain-exactly-one-field-with-php-mo
     359
     360       (https://stackoverflow.com/questions/55029222/parse-mongodb-query-to-java)
     361       https://www.programcreek.com/java-api-examples/?api=com.mongodb.client.model.Aggregates
     362       On using group(TExpression) inside collection.aggregate().
     363
     364       
     365       The aggregate() we want to run:
    354366
    355367       db.Websites.aggregate([
    356     {
    357         $match: {
     368       {
     369         $match: {
    358370            $and: [
    359371                {geoLocationCountryCode: {$ne: "NZ"}},
     
    362374                {$or: [{geoLocationCountryCode: "AU"}, {urlContainsLangCodeInPath: false}]}           
    363375            ]
    364         }
    365     },
    366     { $unwind: "$geoLocationCountryCode" },
    367     {
    368         $group: {
     376      }
     377    },
     378    { $unwind: "$geoLocationCountryCode" },
     379    {
     380          $group: {
    369381            _id: {$toLower: '$geoLocationCountryCode'},
    370382            count: { $sum: 1 },
    371383            domain: { $addToSet: '$domain' }
    372         }
    373     },
    374     { $sort : { count : -1} }
    375 ]);
    376 
    377        https://stackoverflow.com/questions/31643109/mongodb-aggregation-with-java-driver
    378        https://stackoverflow.com/questions/48000891/parse-mongodb-json-query-in-java-with-multiple-criteria
    379        Not Java: https://stackoverflow.com/questions/39060221/a-pipeline-stage-specification-object-must-contain-exactly-one-field-with-php-mo
    380 
    381        (https://stackoverflow.com/questions/55029222/parse-mongodb-query-to-java)
    382        https://www.programcreek.com/java-api-examples/?api=com.mongodb.client.model.Aggregates
    383        On using group(TExpression) inside collection.aggregate().
     384          }
     385     },
     386     { $sort : { count : -1} }
     387    ]);
     388
    384389    */
    385390    public String aggregateContainsMRIForOverseas() {
     
    415420         group("$geoLocationCountryCode", Arrays.asList(sum("count", 1), addToSet("domain", "$domain"))),
    416421         sort(BasicDBObject.parse("{count : -1}"))
    417                         ));
     422    ));
    418423   
    419424    for (Document doc : output) {
     
    424429    return "";
    425430    }
     431
     432
     433    /**       
     434       The mongodb aggregate() we want to run this time:
     435
     436       db.Websites.aggregate([
     437       {
     438        $match: {
     439            $and: [
     440                {numPagesContainingMRI: {$gt: 0}},
     441                {$or: [{geoLocationCountryCode:"NZ"},{domain: /\.nz/}]}
     442            ]
     443          }
     444    },
     445    { $unwind: "$geoLocationCountryCode" },
     446    {
     447          $group: {
     448            _id: "nz",
     449            count: { $sum: 1 },
     450            domain: { $addToSet: '$domain' }
     451          }
     452    },
     453    { $sort : { count : -1} }
     454    ]);
     455    */
     456    public String aggregateContainsMRIForNZ() {
     457    // working with the WebSites collection, not WebPages collection!
     458    MongoCollection<Document> collection = this.database.getCollection(WEBSITES_COLLECTION);
     459
     460   
     461    //String isMRI_filter =
     462   
     463    Bson orQuery = or(
     464              BasicDBObject.parse("{geoLocationCountryCode: \"NZ\"}"),
     465              BasicDBObject.parse("{domain: /\\.nz/}")
     466              );
     467    Bson andQuery = and(
     468        BasicDBObject.parse("{numPagesContainingMRI: {$gt: 0}}"),
     469        orQuery);
     470   
     471    AggregateIterable<Document> output
     472        = collection.aggregate(Arrays.asList(
     473         match(andQuery),
     474         unwind("$geoLocationCountryCode"),
     475         group("NZ", Arrays.asList(sum("count", 1), addToSet("domain", "$domain"))),
     476         sort(BasicDBObject.parse("{count : -1}"))
     477     ));
     478
     479    // should only have one doc
     480    for (Document doc : output) {
     481        //System.out.println(doc);
     482        System.out.println(doc.toJson());
     483    }
     484
     485    return "";
     486    }
     487
     488    public void writeToFile(boolean append, String filename, AggregateIterable<Document> output) {
     489   
     490    // should only have one doc
     491    for (Document doc : output) {
     492        //System.out.println(doc);
     493        System.out.println(doc.toJson());
     494    }
     495    }
     496   
    426497   
    427498    /** https://stackoverflow.com/questions/19938153/do-i-need-to-explicitly-close-connection */
Note: See TracChangeset for help on using the changeset viewer.