/* Need to inspect all those URLs with mi in URL path (mi.* or */mi) that are not sites with nz TLD or originating in NZ: db.getCollection('Websites').find({$and: [{numPagesContainingMRI: {$gt: 0}},{urlContainsLangCodeInPath: true}, {domain: {$not: /.nz$/}}, {geoLocationCountryCode: {$ne: "NZ"}}]}).count() 472 (vs: db.getCollection('Websites').find({$and: [{numPagesInMRI: {$gt: 0}},{urlContainsLangCodeInPath: true}, {domain: {$not: /.nz$/}}, {geoLocationCountryCode: {$ne: "NZ"}}]}).count() 209) db.Websites.aggregate([ { $match: { $and: [{numPagesContainingMRI: {$gt: 0}},{urlContainsLangCodeInPath: true}, {domain: {$not: /.nz$/}}, {geoLocationCountryCode: {$ne: "NZ"}}] } }, {$group: {_id: "$geoLocationCountryCode", count: {$sum: 1}, domain: { $addToSet: '$domain' }}}, { $sort : { count : -1} } ]) But leave out Australia, as it's included in the sets for 4 and 5 that include all websites from NZ or AU OR all websites from overseas that don't have mi in URL path. db.getCollection('Websites').find({$and: [{numPagesContainingMRI: {$gt: 0}},{urlContainsLangCodeInPath: true}, {domain: {$not: /.nz$/}}, {geoLocationCountryCode: {$not: /(NZ|AU)/}}]}).count() 471 db.Websites.aggregate([ { $match: { $and: [{numPagesContainingMRI: {$gt: 0}},{urlContainsLangCodeInPath: true}, {domain: {$not: /.nz$/}}, {geoLocationCountryCode: {$not: /(NZ|AU)/}}] } }, {$group: {_id: "$geoLocationCountryCode", count: {$sum: 1}, domain: { $addToSet: '$domain' }}}, { $sort : { count : -1} } ]) */ /* 1 */ { "_id" : "US", "count" : 305.0, "domain" : [ "http://www.sog-pump.com", "http://www.htwindsolarpower.com", "http://www.ycautoc.com", "http://www.restart-industry.com", "https://mi.nyecountdown.com", "http://www.wavesspring.com", "http://www.tubemillcn.com", "http://www.jindunlaobao.com", "https://facebook.roseconverter.com", "https://www.bestpvcfence.com", "http://www.sps-squeegee.com", "http://www.toption-ingredients.com", "http://www.mksmartcard.com", "http://mi.hongwugas.com", "http://www.lishin.cc", "http://infomutt.com", "https://jobdescriptionsample.org", "http://www.bmaxmachine.com", "http://www.sdxhhd.com", "http://binaryoptionsindicators.com", "http://www.mao-shuo.com", "http://www.eternal-friendship.com", "http://www.conele-mixer.com", "http://www.jhc-nonwoven.com", "http://www.forever-moving.com", "http://lingeriefc.com", "http://indigenousblogs.com", "https://www.weld-automation.com", "http://www.homewin88.com", "https://biblia.gospelprime.com.br", "http://www.sunnymaycn.com", "https://www.jlextract.com", "http://www.artmetalcn.com", "http://www.steel-in-china.com", "http://www.nbbvc.com", "https://mamaclub.info", "http://www.zjnbzy.com", "https://www.sdspraybooth.com", "http://www.sokenswitch.com", "https://follow3rs.com", "http://www.autosunsoul.com", "http://www.chinaocan.com", "https://www.prostepper.com", "https://www.yogemcasting.com", "http://www.julongjewelry.cn", "http://www.shengxinsport.com", "http://portal.smart-project.info", "http://cdn.centrallanguageschool.com", "https://www.tianjia-lock.com", "https://www.yourcloudlibrary.com", "http://milfsplease.com", "http://www.risepipe.com", "http://www.pressurelantern.com", "https://www.drickinstruments.com", "http://www.gormeet.com", "http://www.wf-fastener.com", "http://www.windsolarchina.com", "https://www.nbkeming.com", "http://www.mtpak.com", "http://www.nantaidiesel.com", "https://www.judinwire.net", "http://www.weldpipemill.com", "http://www.ictctruss.com", "http://www.analiabriz.com", "http://www.xida-electronics.com", "https://mi.m.wikipedia.org", "https://www.junschem.com", "http://www.bestwaytowhitenteethguide.org", "https://www.airpullfilter.com", "https://worldstarhiphop.roseconverter.com", "http://www.sdtzgloves.com", "http://www.teda-hydraulic.com", "http://www.gmk-valve.com", "https://usahello.org", "https://www.datemypet.com", "http://www.joyseaplywood.com", "http://www.wpcline.com", "https://www.hengweihoseclamp.com", "https://www.td-casting.com", "http://church-of-christ.org", "http://www.shshenyong.com", "http://www.pvcroofingtile.com", "http://www.wrdtubemill.com", "http://www.arjextrailerparts.com", "http://www.mytrickstips.com", "http://www.quickcncmachine.com", "http://www.cnsongben.com", "https://vk.roseconverter.com", "https://www.livehoster.com", "https://cycletraderpro.com", "http://www.focusway-casting.com", "http://www.sindadisplay.com", "https://www.axnewdisplay.com", "http://www.americasportsfloor.com", "http://www.dmdryer.com", "https://mi.centr-zashity.ru", "http://csunplugged.org", "https://www.inpnurseryproducts.com", "https://www.hjfoodmachinery.com", "http://loginmail.online", "http://www.bluekin.com", "http://www.hqftex.com", "http://www.kehengmixing.com", "https://blondewebcamgirl.com", "http://www.bst-elecs.com", "https://www.nickel-alloy.net", "http://www.albertnovosino.com", "http://www.ttyzfilter.com", "https://www.risenltd.com", "https://www.sinodryair.com", "http://www.nicerelay.com", "https://www.pldyes.com", "http://www.china-brewhouse.com", "http://www.ntvigourbrush.com", "http://www.hobbycarbon.com", "http://www.bdknitting.com", "http://www.sxceramic.com", "http://www.secondhormone.com", "https://www.czzhit.com", "http://www.zhonghe222.com", "http://www.church-of-christ.org", "http://www.chinacarbonfibre.com", "http://www.amcbox.com", "http://www.livepro-beauty.com", "http://www.nbyobo.com", "http://www.nyforgedwheels.com", "https://guidebooq.com", "http://www.sunshinebelt.com", "https://www.hello4x4.com", "http://www.seasum.cn", "http://www.jointcontrols.net", "http://www.steelprotectionpack.com", "http://www.ruifeng-leather.com", "http://www.suoxuehuwai.com", "http://www.cnyaonan.com", "http://www-hotmail-com.email", "https://twitter.roseconverter.com", "http://www.qitai-adhesive.com", "http://www.cnfreda.com", "http://www.weld-automation.com", "http://www.jbpcba.com.cn", "http://www.litbright-candles.com", "http://www.tjcywires.com", "http://www.nide-industry.com", "https://www.aquagem.com.cn", "http://www.ladybagcn.com", "http://atoall.com", "http://www.vango-tech.com", "http://www.vigor-industry.com", "http://www.shenhe-bearing.com", "https://mi.lawyers.cafe", "http://www.newbaoquan.com", "https://blockchains.io", "https://www.gigalight.com", "http://www.fanhaopets.com", "http://www.inpnurseryproducts.com", "http://www.huamachinery.com", "http://www.damiser.com", "http://www.shanghailangzhiweld.com", "http://www.yixinhetrade.com", "http://www.cnfeinade.com", "http://www.prostepper.com", "http://www.goethe.de", "https://www.njkeyuda.com", "https://www.kubbamachine.com", "http://www.yrkseal.com", "http://www.xfinsulation.com", "http://www.lanlinprintech.com", "https://www.king-pcb.com", "https://policies.oclc.org", "http://www.ainuogas.com", "http://technobuzzer.com", "https://www.csunplugged.org", "https://www.tkthvac.com", "http://www.brushcutterjusen.com", "http://www.szhaiwang.com", "http://www.gemnice.com", "http://www.richina-tools.com", "https://www.conele-mixer.com", "http://www.accotech.net", "https://www.dshprecision.com", "http://www.nicehut-window.com", "http://www.btmeac.com", "http://jobdescriptionsample.org", "http://www.strongsaw.com", "http://www.cannapresso.com", "https://www.cz-juteng.com", "http://www.aluminum-profiles-supplier.com", "http://www.ledecofr.com", "http://www.rosin-kings.com", "http://www.tkfanen.com", "http://www.sdcncrouter.com", "http://www.cnxh-electric.com", "http://www.twtvalvecn.com", "http://www.envicool.net", "http://www.jiejingfactory.com", "http://www.pamaens.com", "http://www.liweimetal.com", "http://mi.tccasdic.com", "http://www.hzhinew.com", "http://www.huaxinfurnace.com", "http://www.linphos.com", "http://www.sinemagnetic.com", "http://www.longda-inc.com", "http://www.silicone-odm.com", "http://www.weddingfurniture.com", "http://www.touchdisplays-tech.com", "http://www.houshenshoes.com", "http://www.chinacombinerbox.com", "http://www.xzc9.com", "http://mi.broadcastbeat.com", "http://www.czldfloor.com", "http://www.qypaperbox.com", "https://mi.wikipedia.org", "https://www.everfineplastics.com", "https://www.huadongmedical.com", "http://www.chuamotor.com", "http://www.ksdoing.com", "http://www.evaescort.net", "http://www.jlgrating.com", "http://www.qbd-group.com", "https://dwsolo.com", "http://www.czzhit.com", "http://www.hs-stationery.com", "http://www.allutertech.com", "https://www.samsungwiremesh.com", "http://www.wellformpacking.com", "http://www.longs-motor.com", "http://www.kd-physicalrehab.com", "http://www.meihua-wm.com", "http://www.wzdongyi.com", "http://www.pxbaisheng.com", "http://www.wellfit-sportswear.com", "http://www.wosaicabinet.com", "http://www.chinatopcnc.com", "https://drugsinc.eu", "http://www.bigrollscloth.com", "http://www.jiajiebathmirror.com", "http://www.cz-juteng.com", "http://www.shengrunqiche.com", "http://www.luluae.com", "http://www.jpslurrypump.com", "http://www.soontruepackaging.com", "https://www.judipak.com", "http://www.samewe.net", "http://www.infomutt.com", "https://www.aquark.com.cn", "http://www.tongyujiaju.com", "http://www.chinapipemills.com", "http://www.fxctool.com", "http://mi.guoguangelectric.com", "http://topbitcoincard.com", "https://atoall.com", "https://mi.kidspicturedictionary.com", "http://www.chinabosun.com", "http://www.b-packaging.com", "http://www.longxin-global.com", "http://www.ncpcvet.com", "http://www.acouplefortheroad.com", "http://www.ruk-tech.com", "https://www.artiegarden.com", "http://www.fxpremiere.com", "https://www.tymexnetting.com", "http://www.whties.com", "http://blicanada.net", "http://www.homey-tec.com", "http://www.szebo.com", "http://www.cnrgxy.com", "http://www.zhenchengscrew.com", "http://www.honglu-mining.com", "http://www.photoprofix.com", "http://www.supplyfurniture.com", "http://www.linbaymachinery.com", "http://mytrickstips.com", "http://www.ldsolarpv.com", "http://www.qxmic.com", "https://www.fxcc.com", "http://www.ncpcpharma.com", "http://www.evergrowingcage.com", "https://www.rikoooo.com", "https://maxspeedtest.com", "http://www.qjfiberglass.com", "https://www.nbwinwinea.com", "https://www.tjshenzhoutong.com", "http://www.zhengmaoelec.com", "http://www.chinagxmy.com", "http://www.fancyco.com", "http://www.tangres100.com", "https://www.valve-pipe-fitting.com", "http://www.gfh-electric.com", "http://www.chinachairtable.com", "http://www.qymachines.com", "http://www.hzzjair.com", "https://2fish.co", "https://www.fctele.com", "https://vimeo.roseconverter.com", "http://www.viairdoormat.com", "http://www.gecko-kalimba.com", "http://mi.gmpmetalwork.com", "https://www.glorystarlaser.com", "http://www.wanmaroto.com", "https://www.webhostingsecretrevealed.net", "https://www.waterproof-factory.com", "http://www.omnicnc.com" ] } /* 2 */ { "_id" : "CN", "count" : 113.0, "domain" : [ "https://www.szradiant.com", "https://www.outstandingdm.com", "http://www.gmmdjx.com", "http://www.likvchina.com", "https://www.slagremoving.com", "https://www.abdindustrial.com", "https://www.c-superun.com", "https://www.sino-masterbatch.com", "http://www.cntiescarf.com", "https://www.szhtpmart.com", "https://www.dm-compressor.com", "https://www.phhydraulic.com", "https://www.imposalight.com", "https://www.medke.com", "https://www.aerial-display.com", "http://www.koowheel.com", "https://www.haitungchem.com", "http://www.eburn-burner.com", "http://www.medicohongkong.com", "https://www.cntfsolar.com", "https://www.diamante-tech.com", "https://www.aoxinhvacr.com", "https://www.richest-group.com", "http://www.world-starter.com", "http://www.goldenlaser.cc", "https://www.safesworld.com", "https://www.km-medicine.com", "https://www.peptidejymed.com", "https://www.nbhengchen.com", "https://www.xinyuesteel.com", "https://www.lasonparts.com", "https://www.charmingmetal.com", "http://www.abdindustrial.com", "https://www.ngyc.com", "https://www.pacopower.com", "https://www.tjtgsteel.com", "https://www.yangrutingtrade.com", "http://www.wedacdisplays.com", "https://www.gaofeng-petro.com", "https://www.szzhsbag.com", "https://www.ez-walk.com", "https://www.simphoenix.com", "http://www.focuslasersystems.com", "http://www.kavounautoparts.com", "http://www.zypackag.com", "https://www.fc-med.com", "https://www.bestardoors.com", "https://www.foocles.com", "https://www.jsjlmachinery.com", "http://www.wenwencf.com", "https://www.insharevape.com", "https://www.n2o2gas.com", "https://www.special-metal.com", "https://www.dghk-buffer.com", "https://www.nfyo.com", "https://www.changjia-machinery.com", "https://www.chinarfidcard.com", "https://www.jsbotanics.com", "https://www.sjzhgw.com", "http://www.estarspareparts.com", "https://www.zhongxinlighting.com", "https://www.qdruidetai.com", "https://www.study-mandarin.com", "https://www.bescatray.com", "http://www.qjqdvalve.com", "https://www.painting-machine.com", "https://www.tianseoffice.com", "https://www.herbal-ingredients.com", "https://www.qlart.com", "https://www.sehenda-en.com", "https://www.egbadges.com", "http://www.3drambery.com", "http://www.eudemonbaby.com", "https://www.zjyongqi.com", "http://www.jsbotanics.com", "https://www.rswires.com", "https://www.chinawelken.com", "https://www.micropreparedslides.com", "https://www.rykay.com", "http://www.longtopmining.com", "https://www.grandstarcn.com", "http://www.wigglewires.com", "https://www.sdtoplit.com", "https://www.wecare-life.com", "https://www.bailixin.com", "http://www.refinehotelsupply.com", "http://www.prius-automatic.com", "https://www.nbulboy.com", "https://www.jy-glass.com", "https://www.band-ss.com", "https://www.hytokstech.com", "http://www.ankaicnc.com", "https://www.goldnard.com", "http://www.comfortebicycle.com", "https://www.3drambery.com", "https://www.zengrit.com", "https://www.pakite.com", "https://www.xianglin-plastics.com", "https://www.nbjiatong.com", "https://www.inductorchina.com", "https://www.sakysteel.com", "https://www.bofanpc.com", "http://www.coneleqd.com", "http://www.nbwellrun.com", "https://www.jewellrylove.com", "http://www.jetwayamenities.com", "https://www.coffbrewing.com", "https://www.aootan.com", "http://www.yulong-cellulose-cmc.com", "https://english.taiergroup.com", "http://www.czhengfa.com", "https://www.fibereye2.com", "https://www.sitzonechair.com" ] } /* 3 */ { "_id" : "FR", "count" : 19.0, "domain" : [ "https://www.casino.uk.com", "https://www.planetkeyboard.com", "https://mi.gem.agency", "https://mi.hghphuket.com", "https://mi.mehmetdursun.av.tr", "https://mi.mhthread.com", "https://mi.phcoker.com", "https://mi.hyperbaric-chamber.com", "https://mi.usa-casino-online.com", "http://mi.aasraw.com", "http://mi.psychicbonus.com", "https://mi.apicmo.com", "https://www.slotsltd.com", "https://mi.isearch.de", "https://mi.petrpikora.com", "http://mi.fitnessrebates.com", "http://www.gpedia.com", "https://www.expresscasino.com", "http://mi.outboard-boat-motor-repair.com" ] } /* 4 */ { "_id" : "DE", "count" : 8.0, "domain" : [ "https://afrikhepri.org", "http://transposh.org", "https://mi.vessoft.com", "http://www.almancax.com", "https://transposh.org", "https://traynews.com", "https://herocity.de", "https://www.saper-link-news.com" ] } /* 5 */ { "_id" : "NL", "count" : 6.0, "domain" : [ "http://www.spectrumschool.be", "http://www.martinvrijland.nl", "https://www.emergency-live.com", "https://realtytenerife.com", "https://www.bitbybitbook.com", "http://www.cbdolievoordelen.nl" ] } /* 6 */ { "_id" : "CA", "count" : 5.0, "domain" : [ "https://www.wikiplanet.click", "http://newsrule.com", "http://dehaut.com", "https://www.chinanbdb.com", "https://cloudsfeed.com" ] } /* 7 */ { "_id" : "GB", "count" : 2.0, "domain" : [ "https://www.centrallanguageschool.com", "https://www.solasolv.com" ] } /* 8 */ { "_id" : "HK", "count" : 2.0, "domain" : [ "https://www.desunpump.com", "http://www.10turntables.com" ] } /* 9 */ { "_id" : "UNKNOWN", "count" : 2.0, "domain" : [ "https://mi.buyaas.com", "http://en.wiki.wintoflash.com" ] } /* 10 */ { "_id" : "UA", "count" : 2.0, "domain" : [ "http://umsa.admission.center", "http://ukraine.admission.center" ] } /* 11 */ { "_id" : "SG", "count" : 1.0, "domain" : [ "https://omg-solutions.com" ] } /* 12 */ { "_id" : "RU", "count" : 1.0, "domain" : [ "http://www.treningmozga.com" ] } /* 13 */ { "_id" : "ES", "count" : 1.0, "domain" : [ "https://www.torresbus.es" ] } /* 14 */ { "_id" : "IE", "count" : 1.0, "domain" : [ "http://netkiosk.co.uk" ] } /* 15 */ { "_id" : "TR", "count" : 1.0, "domain" : [ "https://www.elitedeluxe.com.tr" ] } /* 16 */ { "_id" : "EU", "count" : 1.0, "domain" : [ "http://www.the-good-stuff-factory.be" ] } /* 17 */ { "_id" : "JP", "count" : 1.0, "domain" : [ "https://forexmania.org" ] }