Changeset 33813 for other-projects/maori-lang-detection/mongodb-data/counts_sitesWithPagesContainingMRI.json
- Timestamp:
- 2019-12-18T21:38:44+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/mongodb-data/counts_sitesWithPagesContainingMRI.json
r33803 r33813 1 /* 2 Number of sites containing at least one sentence for which OpenNLP detected the best language = MRI 3 4 db.getCollection('Websites').find({numPagesContainingMRI: {$gt: 0}}).count() 5 868 6 7 8 Obviously, the following should be equal to that: 9 10 db.getCollection('Websites').find({ $or: [ { numPagesInMRI: { $gt: 0 } }, { numPagesContainingMRI: {$gt: 0} } ] } ).count() 11 868 12 13 14 Count of country codes for sites that have at least one page containing at least one sentence detected as MRI by OpenNLP: 15 16 db.Websites.aggregate([ 17 { 18 $match: { 19 numPagesContainingMRI: {$gt: 0} 20 } 21 }, 22 { $unwind: "$geoLocationCountryCode" }, 23 { 24 $group: { 25 _id: {$toLower: '$geoLocationCountryCode'}, 26 count: { $sum: 1 } 27 } 28 }, 29 { $sort : { count : -1} } 30 ]); 31 32 */ 33 1 34 /* 1 */ 2 35 { 3 36 "_id" : "us", 4 "count" : 4 79.037 "count" : 486.0 5 38 } 6 39 … … 14 47 { 15 48 "_id" : "nz", 16 "count" : 98.049 "count" : 89.0 17 50 } 18 51 … … 38 71 { 39 72 "_id" : "au", 40 "count" : 2 0.073 "count" : 21.0 41 74 } 42 75 … … 44 77 { 45 78 "_id" : "ca", 46 "count" : 1 3.079 "count" : 12.0 47 80 } 48 81 … … 55 88 /* 10 */ 56 89 { 90 "_id" : "es", 91 "count" : 7.0 92 } 93 94 /* 11 */ 95 { 57 96 "_id" : "gb", 58 97 "count" : 7.0 59 98 } 60 99 61 /* 11 */62 {63 "_id" : "es",64 "count" : 7.065 }66 67 100 /* 12 */ 68 101 { … … 73 106 /* 13 */ 74 107 { 108 "_id" : "unknown", 109 "count" : 3.0 110 } 111 112 /* 14 */ 113 { 114 "_id" : "at", 115 "count" : 3.0 116 } 117 118 /* 15 */ 119 { 120 "_id" : "ro", 121 "count" : 3.0 122 } 123 124 /* 16 */ 125 { 75 126 "_id" : "it", 76 127 "count" : 3.0 77 128 } 78 129 79 /* 14 */ 80 { 81 "_id" : "at", 82 "count" : 3.0 83 } 84 85 /* 15 */ 130 /* 17 */ 131 { 132 "_id" : "sg", 133 "count" : 2.0 134 } 135 136 /* 18 */ 137 { 138 "_id" : "jp", 139 "count" : 2.0 140 } 141 142 /* 19 */ 143 { 144 "_id" : "ie", 145 "count" : 2.0 146 } 147 148 /* 20 */ 149 { 150 "_id" : "hk", 151 "count" : 2.0 152 } 153 154 /* 21 */ 155 { 156 "_id" : "ua", 157 "count" : 2.0 158 } 159 160 /* 22 */ 161 { 162 "_id" : "ru", 163 "count" : 2.0 164 } 165 166 /* 23 */ 167 { 168 "_id" : "ch", 169 "count" : 2.0 170 } 171 172 /* 24 */ 86 173 { 87 174 "_id" : "il", … … 89 176 } 90 177 91 /* 16 */ 92 { 93 "_id" : "ch", 94 "count" : 2.0 95 } 96 97 /* 17 */ 98 { 99 "_id" : "ro", 100 "count" : 2.0 101 } 102 103 /* 18 */ 104 { 105 "_id" : "ru", 106 "count" : 2.0 107 } 108 109 /* 19 */ 110 { 111 "_id" : "ie", 112 "count" : 2.0 113 } 114 115 /* 20 */ 116 { 117 "_id" : "jp", 118 "count" : 2.0 119 } 120 121 /* 21 */ 122 { 123 "_id" : "hk", 124 "count" : 2.0 125 } 126 127 /* 22 */ 128 { 129 "_id" : "ua", 130 "count" : 2.0 131 } 132 133 /* 23 */ 134 { 135 "_id" : "unknown", 136 "count" : 2.0 137 } 138 139 /* 24 */ 178 /* 25 */ 179 { 180 "_id" : "tr", 181 "count" : 1.0 182 } 183 184 /* 26 */ 140 185 { 141 186 "_id" : "mx", … … 143 188 } 144 189 145 /* 25 */ 190 /* 27 */ 191 { 192 "_id" : "ir", 193 "count" : 1.0 194 } 195 196 /* 28 */ 197 { 198 "_id" : "gr", 199 "count" : 1.0 200 } 201 202 /* 29 */ 203 { 204 "_id" : "bg", 205 "count" : 1.0 206 } 207 208 /* 30 */ 209 { 210 "_id" : "eu", 211 "count" : 1.0 212 } 213 214 /* 31 */ 146 215 { 147 216 "_id" : "fi", 148 217 "count" : 1.0 149 218 } 150 151 /* 26 */152 {153 "_id" : "se",154 "count" : 1.0155 }156 157 /* 27 */158 {159 "_id" : "gr",160 "count" : 1.0161 }162 163 /* 28 */164 {165 "_id" : "eu",166 "count" : 1.0167 }168 169 /* 29 */170 {171 "_id" : "bg",172 "count" : 1.0173 }174 175 /* 30 */176 {177 "_id" : "ir",178 "count" : 1.0179 }180 181 /* 31 */182 {183 "_id" : "tr",184 "count" : 1.0185 }186 187 /* 32 */188 {189 "_id" : "sg",190 "count" : 1.0191 }
Note:
See TracChangeset
for help on using the changeset viewer.