source: other-projects/maori-lang-detection/mongodb-data/5b_count_containsMRI_groupedByNZorOverseasNoFilter.json@ 33894

Last change on this file since 33894 was 33894, checked in by ak19, 4 years ago
  1. Adding map, counts.json and geo-json files for 5b count of sites by countrycode with numPagesContainingMRI > 0, unfiltered by mi in URL path or not. 2. Tables file has mongodb query code for 5b data. 3. Map, counts.json and geo-json files for 6 (count of sites by country code from manual short listing of sites) now renamed to reflect that the shortlist considers ALL manually selected sites regardless of mi in URL path or not.
File size: 2.5 KB
Line 
1/**
2Count by Country Code of sites with numPagesInMRI > 0
3without skipping any overseas sites with mi in URL path.
4
5
6Combination of
7OVERSEAS
8db.Websites.aggregate([
9 {
10 $match: {
11 $and: [
12 {geoLocationCountryCode: {$ne: "NZ"}},
13 {domain: {$not: /\.nz/}},
14 {numPagesContainingMRI: {$gt: 0}}
15 ]
16 }
17 },
18 { $unwind: "$geoLocationCountryCode" },
19 {
20 $group: {
21 _id: {$toLower: '$geoLocationCountryCode'},
22 count: { $sum: 1 }
23 }
24 },
25 { $sort : { count : -1} }
26]);
27
28db.Websites.aggregate([
29 {
30 $match: {
31 $and: [
32 {numPagesContainingMRI: {$gt: 0}},
33 {$or: [{geoLocationCountryCode:"NZ"},{domain: /\.nz/}]}
34 ]
35 }
36 },
37 { $unwind: "$geoLocationCountryCode" },
38 {
39 $group: {
40 _id: "nz",
41 count: { $sum: 1 }
42 }
43 },
44 { $sort : { count : -1} }
45]);
46
47*/
48
49
50/* 2 */
51{
52 "_id" : "us",
53 "count" : 422.0
54}
55
56/* 1 */
57{
58 "_id" : "nz",
59 "count" : 176.0
60}
61
62/* 2 */
63{
64 "_id" : "cn",
65 "count" : 114.0
66}
67
68/* 3 */
69{
70 "_id" : "fr",
71 "count" : 35.0
72}
73
74/* 4 */
75{
76 "_id" : "de",
77 "count" : 27.0
78}
79
80/* 5 */
81{
82 "_id" : "nl",
83 "count" : 22.0
84}
85
86/* 6 */
87{
88 "_id" : "ca",
89 "count" : 12.0
90}
91
92/* 7 */
93{
94 "_id" : "dk",
95 "count" : 8.0
96}
97
98/* 8 */
99{
100 "_id" : "gb",
101 "count" : 6.0
102}
103
104/* 9 */
105{
106 "_id" : "au",
107 "count" : 5.0
108}
109
110/* 10 */
111{
112 "_id" : "es",
113 "count" : 5.0
114}
115
116/* 11 */
117{
118 "_id" : "cz",
119 "count" : 4.0
120}
121
122/* 12 */
123{
124 "_id" : "at",
125 "count" : 3.0
126}
127
128/* 13 */
129{
130 "_id" : "it",
131 "count" : 3.0
132}
133
134/* 14 */
135{
136 "_id" : "unknown",
137 "count" : 3.0
138}
139
140/* 15 */
141{
142 "_id" : "hk",
143 "count" : 2.0
144}
145
146/* 16 */
147{
148 "_id" : "ie",
149 "count" : 2.0
150}
151
152/* 17 */
153{
154 "_id" : "jp",
155 "count" : 2.0
156}
157
158/* 18 */
159{
160 "_id" : "ch",
161 "count" : 2.0
162}
163
164/* 19 */
165{
166 "_id" : "ua",
167 "count" : 2.0
168}
169
170/* 20 */
171{
172 "_id" : "ru",
173 "count" : 2.0
174}
175
176/* 21 */
177{
178 "_id" : "il",
179 "count" : 2.0
180}
181
182/* 22 */
183{
184 "_id" : "ro",
185 "count" : 2.0
186}
187
188/* 23 */
189{
190 "_id" : "sg",
191 "count" : 1.0
192}
193
194/* 24 */
195{
196 "_id" : "eu",
197 "count" : 1.0
198}
199
200/* 25 */
201{
202 "_id" : "ir",
203 "count" : 1.0
204}
205
206/* 26 */
207{
208 "_id" : "bg",
209 "count" : 1.0
210}
211
212/* 27 */
213{
214 "_id" : "fi",
215 "count" : 1.0
216}
217
218/* 28 */
219{
220 "_id" : "tr",
221 "count" : 1.0
222}
223
224/* 29 */
225{
226 "_id" : "mx",
227 "count" : 1.0
228}
Note: See TracBrowser for help on using the repository browser.