Changeset 33813 for other-projects/maori-lang-detection/mongodb-data/counts_tentativeNonProductSites.json
- Timestamp:
- 2019-12-18T21:38:44+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/mongodb-data/counts_tentativeNonProductSites.json
r33806 r33813 1 /* 2 3 The websites that have some MRI detected AND which are either in NZ or with NZ TLD 4 or (so if they're from overseas) don't contain /mi or mi.* in URL path. 5 We'll include Australia, to get the valid "kiwiproperty.com" website, 6 otherwise the sole exception, included in the result list. 7 8 9 db.getCollection('Websites').find({$and: [ 10 {numPagesContainingMRI: {$gt: 0}}, 11 {$or: [{geoLocationCountryCode: /(NZ|AU)/}, {domain: /\.nz$/}, {urlContainsLangCodeInPath: false}]} 12 ]}).count() 13 14 397 15 16 Aggregate results by a count of country codes: 17 18 db.Websites.aggregate([ 19 { 20 $match: { 21 $and: [ 22 {numPagesContainingMRI: {$gt: 0}}, 23 {$or: [{geoLocationCountryCode: /(NZ|AU)/}, {domain: /\.nz$/}, {urlContainsLangCodeInPath: false}]} 24 ] 25 } 26 }, 27 { $unwind: "$geoLocationCountryCode" }, 28 { 29 $group: { 30 _id: {$toLower: '$geoLocationCountryCode'}, 31 count: { $sum: 1 } 32 } 33 }, 34 { $sort : { count : -1} } 35 ]); 36 37 */ 38 1 39 /* 1 */ 2 40 { 3 41 "_id" : "us", 4 "count" : 475.042 "count" : 181.0 5 43 } 6 44 7 45 /* 2 */ 8 46 { 9 "_id" : " cn",10 "count" : 114.047 "_id" : "nz", 48 "count" : 89.0 11 49 } 12 50 13 51 /* 3 */ 14 52 { 15 "_id" : " nz",16 "count" : 98.053 "_id" : "au", 54 "count" : 21.0 17 55 } 18 56 19 57 /* 4 */ 20 58 { 59 "_id" : "de", 60 "count" : 19.0 61 } 62 63 /* 5 */ 64 { 21 65 "_id" : "fr", 22 "count" : 36.0 23 } 24 25 /* 5 */ 26 { 27 "_id" : "de", 28 "count" : 26.0 66 "count" : 17.0 29 67 } 30 68 … … 32 70 { 33 71 "_id" : "nl", 34 "count" : 22.072 "count" : 16.0 35 73 } 36 74 37 75 /* 7 */ 38 {39 "_id" : "au",40 "count" : 17.041 }42 43 /* 8 */44 {45 "_id" : "ca",46 "count" : 13.047 }48 49 /* 9 */50 76 { 51 77 "_id" : "dk", … … 53 79 } 54 80 81 /* 8 */ 82 { 83 "_id" : "ca", 84 "count" : 7.0 85 } 86 87 /* 9 */ 88 { 89 "_id" : "es", 90 "count" : 6.0 91 } 92 55 93 /* 10 */ 56 94 { 57 "_id" : " es",58 "count" : 7.095 "_id" : "gb", 96 "count" : 5.0 59 97 } 60 98 61 99 /* 11 */ 62 {63 "_id" : "gb",64 "count" : 7.065 }66 67 /* 12 */68 100 { 69 101 "_id" : "cz", … … 71 103 } 72 104 105 /* 12 */ 106 { 107 "_id" : "at", 108 "count" : 3.0 109 } 110 73 111 /* 13 */ 74 112 { … … 79 117 /* 14 */ 80 118 { 81 "_id" : " at",119 "_id" : "ro", 82 120 "count" : 3.0 83 121 } 84 122 85 123 /* 15 */ 124 { 125 "_id" : "il", 126 "count" : 2.0 127 } 128 129 /* 16 */ 86 130 { 87 131 "_id" : "ch", … … 89 133 } 90 134 91 /* 16 */92 {93 "_id" : "ro",94 "count" : 2.095 }96 97 135 /* 17 */ 98 136 { 99 "_id" : " il",100 "count" : 2.0137 "_id" : "bg", 138 "count" : 1.0 101 139 } 102 140 103 141 /* 18 */ 104 142 { 143 "_id" : "sg", 144 "count" : 1.0 145 } 146 147 /* 19 */ 148 { 149 "_id" : "mx", 150 "count" : 1.0 151 } 152 153 /* 20 */ 154 { 155 "_id" : "ir", 156 "count" : 1.0 157 } 158 159 /* 21 */ 160 { 161 "_id" : "cn", 162 "count" : 1.0 163 } 164 165 /* 22 */ 166 { 167 "_id" : "ie", 168 "count" : 1.0 169 } 170 171 /* 23 */ 172 { 173 "_id" : "jp", 174 "count" : 1.0 175 } 176 177 /* 24 */ 178 { 179 "_id" : "fi", 180 "count" : 1.0 181 } 182 183 /* 25 */ 184 { 185 "_id" : "gr", 186 "count" : 1.0 187 } 188 189 /* 26 */ 190 { 191 "_id" : "ru", 192 "count" : 1.0 193 } 194 195 /* 27 */ 196 { 105 197 "_id" : "unknown", 106 "count" : 2.0 107 } 108 109 /* 19 */ 110 { 111 "_id" : "hk", 112 "count" : 2.0 113 } 114 115 /* 20 */ 116 { 117 "_id" : "jp", 118 "count" : 2.0 119 } 120 121 /* 21 */ 122 { 123 "_id" : "ie", 124 "count" : 2.0 125 } 126 127 /* 22 */ 128 { 129 "_id" : "ua", 130 "count" : 2.0 131 } 132 133 /* 23 */ 134 { 135 "_id" : "se", 136 "count" : 1.0 137 } 138 139 /* 24 */ 140 { 141 "_id" : "gr", 142 "count" : 1.0 143 } 144 145 /* 25 */ 146 { 147 "_id" : "ru", 148 "count" : 1.0 149 } 150 151 /* 26 */ 152 { 153 "_id" : "eu", 154 "count" : 1.0 155 } 156 157 /* 27 */ 158 { 159 "_id" : "bg", 160 "count" : 1.0 161 } 162 163 /* 28 */ 164 { 165 "_id" : "fi", 166 "count" : 1.0 167 } 168 169 /* 29 */ 170 { 171 "_id" : "sg", 172 "count" : 1.0 173 } 174 175 /* 30 */ 176 { 177 "_id" : "tr", 178 "count" : 1.0 179 } 180 181 /* 31 */ 182 { 183 "_id" : "mx", 184 "count" : 1.0 185 } 186 187 /* 32 */ 188 { 189 "_id" : "ir", 190 "count" : 1.0 191 } 198 "count" : 1.0 199 } 200
Note:
See TracChangeset
for help on using the changeset viewer.