/* All the websites that have some MRI detected AND which are either in NZ or with NZ TLD or (so if they're from overseas) don't contain /mi or mi.* in URL path. We'll include Australia, to get the valid "kiwiproperty.com" website, otherwise the sole exception, included in the result list. db.getCollection('Websites').find({$and: [ {numPagesContainingMRI: {$gt: 0}}, {$or: [{geoLocationCountryCode: /(NZ|AU)/}, {domain: /\.nz$/}, {urlContainsLangCodeInPath: false}]} ]}).count() 397 Aggregate results by a count of country codes. Also have a domain listing in the output. However, we want to group nz TLD with websites that originate in NZ. So we also want the converse: to remove websites with .nz TLD from any originating country codes that are from outside NZ. db.getCollection('Websites').find({$and: [ {geoLocationCountryCode: {$ne: "NZ"}}, {domain: {$not: /\.nz/}}, {numPagesContainingMRI: {$gt: 0}}, {$or: [{geoLocationCountryCode: "AU"}, {urlContainsLangCodeInPath: false}]} ]}).count() 221 websites db.getCollection('Websites').find({$and: [ {numPagesContainingMRI: {$gt: 0}}, {$or: [{geoLocationCountryCode:"NZ"},{domain: /\.nz/}]} ]}).count() 176 (221 + 176 = 397, which adds up to above.) Counts by country code excluding NZ related sites db.Websites.aggregate([ { $match: { $and: [ {geoLocationCountryCode: {$ne: "NZ"}}, {domain: {$not: /\.nz/}}, {numPagesContainingMRI: {$gt: 0}}, {$or: [{geoLocationCountryCode: "AU"}, {urlContainsLangCodeInPath: false}]} ] } }, { $unwind: "$geoLocationCountryCode" }, { $group: { _id: {$toLower: '$geoLocationCountryCode'}, count: { $sum: 1 }, domain: { $addToSet: '$domain' } } }, { $sort : { count : -1} } ]); Count of NZ related sites - output put under a hardcoded _id of "nz" and once again requesting a domain listing in output: db.Websites.aggregate([ { $match: { $and: [ {numPagesContainingMRI: {$gt: 0}}, {$or: [{geoLocationCountryCode:"NZ"},{domain: /\.nz/}]} ] } }, { $unwind: "$geoLocationCountryCode" }, { $group: { _id: "nz", count: { $sum: 1 }, domain: { $addToSet: '$domain' } } }, { $sort : { count : -1} } ]); */ /* 0 */ { "_id" : "us", "count" : 117.0, "domain" : [ "http://shangrilapress.net", "https://www.terakau.org", "http://dannykahei.tripod.com", "https://in.pinterest.com", "http://takethatvacation.com", "http://malecek.com", "http://word-dialect.blogspot.com", "https://www.blue-frontiers.com", "https://static-promote.weebly.com", "http://www.thesalmons.org", "http://ngarangatahi.tripod.com", "http://tkrow.tripod.com", "http://niken8media.logdown.com", "https://www.vaihaunui.net", "https://www.podrozeady.com", "https://www.nccri.ie", "http://georgegi.tripod.com", "http://www.lunar-occultations.com", "http://frontrowphotos.com", "http://linkvip.top", "http://rangiwewehi.com", "http://www.pressreader.com", "http://anglicanhistory.org", "http://www.unicode.org", "https://wol.jw.org", "http://pumanawawhangara.blogspot.com", "http://hannas-reiseblog.blogspot.com", "http://seapixonline.com", "http://ww25.milfsplease.com", "http://www.wikitree.com", "http://ritusehji.blogspot.com", "https://www.pinterest.it", "http://naturalfatburner.net", "http://kiaorahola.blogspot.com", "http://www.hudl.com", "http://shuttersportnelson.photoshelter.com", "https://www.pinterest.ca", "http://precious-testimonies.com", "http://www.muhammad.com", "http://www.gotquestions.org", "https://www.pinterest.co.uk", "https://biblehub.com", "http://tuhua2010.blogspot.com", "http://www.precious-testimonies.com", "http://www.huapala.org", "https://nl.pinterest.com", "http://www.whoisthatr.com", "https://www.oemsec.com", "https://www.dbnames.net", "https://www.myadsclassified.com", "http://mikebonnice.com", "http://fhr.kiwicelts.com", "https://www.webwiki.com", "https://www.pinterest.jp", "https://kjohnsonnz.blogspot.com", "http://svenskadress.net", "http://www.godrules.net", "https://www.pinterest.fr", "http://rhymebrain.com", "http://www.frogsonline.com", "http://www.v3whois.com", "http://piripi.blogspot.com", "http://www.twttoa.com", "http://wikiedit.org", "https://livestream.com", "http://burkekm001.tripod.com", "https://maorinews.com", "http://www.geni.com", "http://www.waimate.com", "http://m.biblepub.com", "http://wowwars.net", "https://www.natekore2018.com", "http://tatai09.blogspot.com", "https://ebible.org", "http://capsuraotearoa.blogspot.com", "http://bahaiprayers.net", "https://www.breaker.audio", "https://www.pipirikiapapatuanuku.org", "http://www.the-naked.com", "https://phet.colorado.edu", "http://manateina.blogspot.com", "http://tkkpipipaopao.blogspot.com", "https://www.hidroponia.org.mx", "http://mrshamiltonskoolkidz.blogspot.com", "http://aclhokiangarocks.blogspot.com", "http://www.eyecontactsite.com", "http://www.hiroa.pf", "http://www.forensicfashion.com", "http://www.code-postal.com", "http://lianzaconference2012.blogspot.com", "http://mahoraroom8.blogspot.com", "http://korora.econ.yale.edu", "https://docs.google.com", "https://www.indexmundi.com", "https://www.seapixonline.com", "https://www.bible.com", "https://www.knowatom.com", "https://chromium.googlesource.com", "http://www.krassotkin.ru", "http://www.roadsmile.com", "https://www.code-postal.com", "http://blogdepasopor.blogspot.com", "http://eartheum.com", "http://www.steve-wheeler.co.uk", "http://www.mkiwi.com", "http://maaori.com", "https://www.kaifineart.com", "https://png.bible", "https://www.poehalisnami.ua", "http://atopeconlostopes.blogspot.com", "http://www.whoisentry.com", "http://loquevendra318.com", "https://za.pinterest.com", "http://www.namesdir.com", "https://drive.google.com", "http://worldradiomap.com" ] } /* 1 */ { "_id" : "nz", "count" : 176.0, "domain" : [ "http://tmoa.tki.org.nz", "http://www.rotoruanz.com", "https://admin.teara.govt.nz", "http://www.tekura.school.nz", "http://www.tetaurawhiri.govt.nz", "https://www.whanau-tahi.school.nz", "http://auturoa.nz", "http://www.ngamanawainc.co.nz", "http://southerntribes.co.nz", "https://player.vimeo.com", "https://www.components-mart.nz", "http://www.cs.waikato.ac.nz", "https://www.terakipaewhenua.school.nz", "http://oilcrash.com", "https://manawatuheritage.pncc.govt.nz", "http://maori.tki.org.nz", "http://kaupare.co.nz", "http://ngatiporoukiponeke.org.nz", "http://cms.sunsmartschools.co.nz", "http://philipbeadle.co.nz", "http://waitarahistory.org.nz", "http://hangaraumatihiko.tki.org.nz", "https://sexualviolence.victimsinfo.govt.nz", "http://www.kura-porirua.school.nz", "http://www.rakaumanga.school.nz", "http://www.huri-translations.pf", "https://kotahimiriona.co.nz", "http://ngarauhuia.ngatiapakiterato.iwi.nz", "http://videos.e-agent.nz", "http://kurakokiri.maori.nz", "http://kuraaiwi.maori.nz", "http://www.tewikiotereomaori.co.nz", "http://arataua.nz", "http://www.brettgraham.co.nz", "http://anglicanprayerbook.nz", "https://e-ako-pangarau.nzmaths.co.nz", "https://www.pinterest.nz", "http://www.tasteofplenty.co.nz", "http://www.nzpcn.org.nz", "https://www.puau.school.nz", "https://www.rereahu.maori.nz", "http://blog.teara.govt.nz", "http://www.ruralfind.co.nz", "https://www.korokikahukura.co.nz", "http://givealittle.co.nz", "http://tewikiotereomaori.nz", "http://dev.nzpcn.org.nz", "http://www.firstworldwar.tki.org.nz", "http://rsnz.natlib.govt.nz", "http://biketorqueyamaha.co.nz", "http://conference.tpwt.maori.nz", "http://myfathersworld.net.nz", "http://whatonga.school.nz", "https://teaomaori.news", "https://www.ashtangatauranga.co.nz", "http://www.eventcinemas.co.nz", "http://artizani.co.nz", "https://www.stats.govt.nz", "https://keepourmoneyclean.govt.nz", "http://www.teipukarea.maori.nz", "http://kuraproductions.co.nz", "http://www.otepoti.school.nz", "https://register.tpota.org.nz", "http://www.tewhanake.maori.nz", "https://office.e-agent.nz", "http://community.nzdl.org", "https://www.blushandbrows.nz", "https://cdn.tehiku.nz", "http://www.oag.govt.nz", "http://tmmkkm.school.nz", "http://www.tetaumuturunanga.iwi.nz", "http://teaohou.natlib.govt.nz", "http://www.kmk.maori.nz", "https://www.maoritelevision.com", "https://sooty.nz", "http://hana.co.nz", "http://waiata.maori.nz", "http://www.pakanae.maori.nz", "http://www.w3vietnam.org.nz", "http://www.zoomin.co.nz", "http://www.hrc.co.nz", "https://www.wingspan.co.nz", "https://www.cruisetourstauranga.co.nz", "http://kurataiao.tki.org.nz", "http://punareo.co.nz", "http://www.finlaysonpark.school.nz", "http://www.kurakokiri.maori.nz", "https://rapuatearatika.education.govt.nz", "https://www.lcds-display.nz", "http://www.livingheritage.org.nz", "http://www.heartland.co.nz", "http://www.biketorqueyamaha.co.nz", "https://2019.nethui.nz", "http://archerpix.com", "http://www.tkkmmokopuna.school.nz", "http://www.wcl.govt.nz", "https://tiritiowaitangi.govt.nz", "http://rakaumanga.school.nz", "http://holyspirit.nz", "http://crimson.co.nz", "https://www.ngamanawainc.co.nz", "http://rexedra.gen.nz", "http://www.kupengahao.co.nz", "https://www.tematawai.maori.nz", "http://tiritiowaitangi.govt.nz", "http://rurued.school.nz", "http://w3vietnam.org.nz", "https://www.rotorua-rafting.co.nz", "https://www.e-agent.nz", "http://reoora.co.nz", "http://archive.stats.govt.nz", "https://www.dnc.org.nz", "https://liveresults.co.nz", "https://www.taitokerautrust.org.nz", "https://www.infinite-electronic.nz", "https://kaiiwicamp.nz", "http://www.tereowrap.nz", "https://m.wairarapatv.co.nz", "http://ngatiwhakaue.iwi.nz", "http://www.kkmmaungarongo.co.nz", "https://rehuamarae.co.nz", "http://www.tmoa.tki.org.nz", "http://www.gans.co.nz", "http://www.topomap.co.nz", "http://www.electionresults.govt.nz", "http://archive.electionresults.govt.nz", "http://satellites.co.nz", "https://haereheikaiako.co.nz", "http://www.twtop.school.nz", "http://www.waiata.maori.nz", "http://www.temarareo.org", "http://tetaurawhiri.govt.nz", "http://www.28maoribattalion.org.nz", "https://ttw1.cwp.govt.nz", "http://www.methodist.org.nz", "http://avonside.net", "https://www.takitimu.ac.nz", "https://www.terito.school.nz", "https://www.electionresults.org.nz", "http://firstworldwar.tki.org.nz", "http://animations.tewhanake.maori.nz", "https://hepatakakupu.nz", "https://www.zenbu.co.nz", "https://www.sporty.co.nz", "https://www.tasteofplenty.co.nz", "http://otorohanga.directorybusiness.co.nz", "https://www.puhaandpakeha.co.nz", "http://kete.wcl.govt.nz", "https://interactives.stuff.co.nz", "http://maori.livingheritage.org.nz", "https://www.hapuhauora.health.nz", "http://kaiiwicamp.nz", "http://talkingtothecan.com", "http://www.tuwharetoa.iwi.nz", "http://nzpostcard.co.nz", "https://paekupu.co.nz", "http://www.runanga.co.nz", "https://curriculumtool.education.govt.nz", "http://www.matarikifestival.org.nz", "http://www.jeremybaker.nz", "http://ngatipahauwera.co.nz", "http://pukapuka.nz", "http://www.writersfestival.co.nz", "http://temahurehure.maori.nz", "http://pukoro.co.nz", "http://tehauora.org.nz", "http://pukekohe.directorybusiness.co.nz", "http://kmpmusic.co.nz", "http://www.maoriinvestments.co.nz", "https://www.komako.org.nz", "https://www.tuiatematangi.ac.nz" ] } /* 2 */ { "_id" : "de", "count" : 19.0, "domain" : [ "http://www.cartogiraffe.com", "http://etymologie.info", "http://arts.mythologica.fr", "http://svenkirsten.com", "http://weltderberge.de", "https://www.you-fly.com", "http://klaaskoehne.de", "http://www.nierstrasz.org", "https://www.tvteile.de", "http://vulkane.ch", "http://etoile-de-lune.net", "http://www.stephe.de", "http://insecta.pro", "http://m.distanta.1km.net", "https://ersatzteile-fachversand.de", "https://laskar02cinta.page.tl", "http://www.behlig.de", "https://www.cartogiraffe.com", "http://www.udhr.de" ] } /* 3 */ { "_id" : "fr", "count" : 16.0, "domain" : [ "http://rapanui.fr", "http://splaf.free.fr", "https://www.lexilogos.com", "http://mahajana.net", "http://www.gif.ovh", "http://baladeornithologique.com", "http://www.gaudry.be", "http://kihikihi.fr", "http://www.blueheavenisland.com", "http://www.gototahiti.net", "http://www.maraamusurfskirace.com", "http://www.rongo-rongo.com", "http://chantsdeluttes.free.fr", "http://pt.city-usa.net", "https://www.manualscat.com", "http://blueheavenisland.com" ] } /* 4 */ { "_id" : "nl", "count" : 16.0, "domain" : [ "https://www.arrowhead.eu", "http://tonhut.nl", "http://nielsonboutique.co.uk", "http://longhornlaw.net", "http://tetsubo.org", "https://arrowheadproject.azurewebsites.net", "http://hidsonphoto.com", "http://www.gouvernante.info", "http://gouvernante.info", "http://diverosa.com", "https://arrowhead.eu", "http://www.nonlinear.demon.nl", "http://www.encyclo.co.uk", "https://www.henrifloor.nl", "http://skimap.info", "http://wearehomework.com" ] } /* 5 */ { "_id" : "dk", "count" : 8.0, "domain" : [ "http://akona.ngapuhitelevision.com", "http://waiatarangatiratanga.ngapuhitelevision.com", "http://jazz.ngapuhitelevision.com", "http://ngapuhitelevision.com", "http://ngapuhiradio.com", "http://www.rennertweb.de", "http://powhiri.ngapuhitelevision.com", "http://komisch.ngapuhitelevision.com" ] } /* 6 */ { "_id" : "ca", "count" : 7.0, "domain" : [ "http://bcmarina.com", "http://www.myrasplace.net", "http://00.gs", "http://aguadilla.airport-authority.com", "http://bckayak.com", "https://articles.imperialtometric.com", "http://daandehn.com" ] } /* 7 */ { "_id" : "au", "count" : 5.0, "domain" : [ "http://theunderwaterworld.com", "https://www.kiwiproperty.com", "http://fionajack.net", "https://infogram.com", "https://koreromaori.com" ] } /* 8 */ { "_id" : "gb", "count" : 4.0, "domain" : [ "https://omniatlas.com", "http://www.wordsearchfun.com", "http://www.woolrych.org", "http://mikestephens.co.uk" ] } /* 9 */ { "_id" : "es", "count" : 4.0, "domain" : [ "https://www.uv.es", "https://www.reclamaciondevuelos.com", "http://www.info-hoteles.com", "http://www.cruceros-princess.mx" ] } /* 10 */ { "_id" : "cz", "count" : 4.0, "domain" : [ "http://www.henryklahola.nazory.cz", "https://www.fipojobs.com", "http://about.ilikeyou.com", "http://henryklahola.nazory.cz" ] } /* 11 */ { "_id" : "it", "count" : 3.0, "domain" : [ "http://oipaz.net", "http://www.marcosanti.it", "http://www.pegasoesmicamion.com" ] } /* 12 */ { "_id" : "at", "count" : 3.0, "domain" : [ "http://www.tmtmm.net", "http://www.petit-prince.at", "http://petit-prince.at" ] } /* 13 */ { "_id" : "ro", "count" : 2.0, "domain" : [ "http://parohiauceadesus.ro", "http://www.parohiauceadesus.ro" ] } /* 14 */ { "_id" : "ch", "count" : 2.0, "domain" : [ "https://nicoledidi.ch", "https://photos.axelebert.org" ] } /* 15 */ { "_id" : "il", "count" : 2.0, "domain" : [ "https://www.hitiaotera.com", "http://www.daat.ac.il" ] } /* 16 */ { "_id" : "ru", "count" : 1.0, "domain" : [ "https://www.gismeteo.lv" ] } /* 17 */ { "_id" : "bg", "count" : 1.0, "domain" : [ "http://anitra.net" ] } /* 18 */ { "_id" : "mx", "count" : 1.0, "domain" : [ "http://www.gelbukh.com" ] } /* 19 */ { "_id" : "unknown", "count" : 1.0, "domain" : [ "https://www.viveipcl.com" ] } /* 20 */ { "_id" : "jp", "count" : 1.0, "domain" : [ "http://yutaka.it-n.jp" ] } /* 21 */ { "_id" : "cn", "count" : 1.0, "domain" : [ "http://kiwi2china.com" ] } /* 22 */ { "_id" : "ie", "count" : 1.0, "domain" : [ "https://coggle.it" ] } /* 23 */ { "_id" : "ir", "count" : 1.0, "domain" : [ "https://www.dideo.ir" ] } /* 24 */ { "_id" : "fi", "count" : 1.0, "domain" : [ "http://pertti.com" ] }