Ignore:
Timestamp:
2020-01-30T22:54:39+13:00 (4 years ago)
Author:
ak19
Message:

Code now writes both a listing of all non-autotranslated websites and a listing of overseas autotranslated sites.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/src/org/greenstone/atea/MongoDBAccess.java

    r33881 r33882  
    9696    public static final int CONTAINS_MRI = 1;
    9797
     98    /** Some reused fieldnames in the Websites collection */
     99    private static final String FILTER_NUMPAGES_IN_MRI = "numPagesInMRI";
     100    private static final String FILTER_NUMPAGES_CONTAINING_MRI = "numPagesContainingMRI";
     101
    98102    // configuration details, some with fallback values
    99103    private String HOST = "localhost";
     
    402406    ]);
    403407    */
    404     public void aggregateContainsMRIForNZ(Writer writer) throws IOException {
     408    public void aggregateContainsMRIForNZ(Writer writer, int filterType) throws IOException {
    405409    // working with the WebSites collection, not WebPages collection!
    406410    MongoCollection<Document> collection = this.database.getCollection(WEBSITES_COLLECTION);
    407411
    408    
    409     //String isMRI_filter =
     412    String mriFilterString = (filterType == CONTAINS_MRI) ? "{numPagesContainingMRI: {$gt: 0}}" : "{numPagesInMRI: {$gt: 0}}";
    410413   
    411414    Bson orQuery = or(
     
    414417              );
    415418    Bson andQuery = and(
    416         BasicDBObject.parse("{numPagesContainingMRI: {$gt: 0}}"),
     419        BasicDBObject.parse(mriFilterString),
    417420        orQuery);
    418421   
     
    456459    ]);
    457460    */
    458     public void aggregateContainsMRIForOverseas(Writer writer) throws UncheckedIOException {
     461    public void aggregateContainsMRIForOverseas(Writer writer, int filterType,
     462                        boolean isMiInURLPath) throws UncheckedIOException
     463    {
    459464    // working with the WebSites collection, not WebPages collection!
    460465    MongoCollection<Document> collection = this.database.getCollection(WEBSITES_COLLECTION);
    461 
     466   
     467    String mriFilterString = (filterType == CONTAINS_MRI) ? "{numPagesContainingMRI: {$gt: 0}}" : "{numPagesInMRI: {$gt: 0}}";
    462468   
    463469    Bson orQuery = or(
    464470              BasicDBObject.parse("{geoLocationCountryCode: \"AU\"}"),
    465               BasicDBObject.parse("{urlContainsLangCodeInPath: false}")
     471              BasicDBObject.parse("{urlContainsLangCodeInPath: "+ isMiInURLPath +"}")
     472              // e.g. "{urlContainsLangCodeInPath: false}"
    466473              );
    467474    Bson andQuery = and(
    468475        BasicDBObject.parse("{geoLocationCountryCode: {$ne: \"NZ\"}}"),
    469476        BasicDBObject.parse("{domain: {$not: /\\.nz/}}"),
    470         BasicDBObject.parse("{numPagesContainingMRI: {$gt: 0}}"),
     477        BasicDBObject.parse(mriFilterString),
    471478        orQuery);
    472479
Note: See TracChangeset for help on using the changeset viewer.