Changeset 33894 for other-projects


Ignore:
Timestamp:
2020-02-03T23:20:33+13:00 (4 years ago)
Author:
ak19
Message:
  1. Adding map, counts.json and geo-json files for 5b count of sites by countrycode with numPagesContainingMRI > 0, unfiltered by mi in URL path or not. 2. Tables file has mongodb query code for 5b data. 3. Map, counts.json and geo-json files for 6 (count of sites by country code from manual short listing of sites) now renamed to reflect that the shortlist considers ALL manually selected sites regardless of mi in URL path or not.
Location:
other-projects/maori-lang-detection/mongodb-data
Files:
4 added
1 edited
4 moved

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/mongodb-data/6counts_sitesWithPagesContainingMRI_manualShortlist.json

    r33893 r33894  
    88{
    99    "_id" : "nz",
    10     "count" : 176.0
     10    "count" : 126.0
    1111}
    1212{
  • other-projects/maori-lang-detection/mongodb-data/6geojson-features_sitesWithPagesContainingMRI_manualShortlist.json

    r33893 r33894  
    1 {"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[170.885971,-40.900557],[170.885971,47.099443],[178.885971,47.099443],[178.885971,-40.900557],[170.885971,-40.900557]]]},"properties":{"code":"NZ","count":176,"region":"New Zealand"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-97.712891,37.09024],[-97.712891,66.09024],[-93.712891,66.09024],[-93.712891,37.09024],[-97.712891,37.09024]]]},"properties":{"code":"US","count":29,"region":"United States"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[131.775136,-25.274398],[131.775136,-23.274398],[135.775136,-23.274398],[135.775136,-25.274398],[131.775136,-25.274398]]]},"properties":{"code":"AU","count":2,"region":"Australia"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[8.451526,51.165691],[8.451526,53.165691],[12.451526,53.165691],[12.451526,51.165691],[8.451526,51.165691]]]},"properties":{"code":"DE","count":2,"region":"Germany"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[7.501785,56.26392],[7.501785,58.26392],[11.501785,58.26392],[11.501785,56.26392],[7.501785,56.26392]]]},"properties":{"code":"DK","count":2,"region":"Denmark"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[23.48583,42.733883],[23.48583,43.733883],[27.48583,43.733883],[27.48583,42.733883],[23.48583,42.733883]]]},"properties":{"code":"BG","count":1,"region":"Bulgaria"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[13.472962,49.817492],[13.472962,50.817492],[17.472962000000003,50.817492],[17.472962000000003,49.817492],[13.472962,49.817492]]]},"properties":{"code":"CZ","count":1,"region":"Czech Republic"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-5.74922,40.463667],[-5.74922,41.463667],[-1.7492200000000002,41.463667],[-1.7492200000000002,40.463667],[-5.74922,40.463667]]]},"properties":{"code":"ES","count":1,"region":"Spain"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[0.21374899999999997,46.227638],[0.21374899999999997,47.227638],[4.213749,47.227638],[4.213749,46.227638],[0.21374899999999997,46.227638]]]},"properties":{"code":"FR","count":1,"region":"France"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-10.24389,53.41291],[-10.24389,54.41291],[-6.24389,54.41291],[-6.24389,53.41291],[-10.24389,53.41291]]]},"properties":{"code":"IE","count":1,"region":"Ireland"}}]}
     1{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[172.885971,-40.900557],[172.885971,85.09944300000001],[176.885971,85.09944300000001],[176.885971,-40.900557],[172.885971,-40.900557]]]},"properties":{"code":"NZ","count":126,"region":"New Zealand"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-97.712891,37.09024],[-97.712891,66.09024],[-93.712891,66.09024],[-93.712891,37.09024],[-97.712891,37.09024]]]},"properties":{"code":"US","count":29,"region":"United States"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[131.775136,-25.274398],[131.775136,-23.274398],[135.775136,-23.274398],[135.775136,-25.274398],[131.775136,-25.274398]]]},"properties":{"code":"AU","count":2,"region":"Australia"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[8.451526,51.165691],[8.451526,53.165691],[12.451526,53.165691],[12.451526,51.165691],[8.451526,51.165691]]]},"properties":{"code":"DE","count":2,"region":"Germany"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[7.501785,56.26392],[7.501785,58.26392],[11.501785,58.26392],[11.501785,56.26392],[7.501785,56.26392]]]},"properties":{"code":"DK","count":2,"region":"Denmark"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[23.48583,42.733883],[23.48583,43.733883],[27.48583,43.733883],[27.48583,42.733883],[23.48583,42.733883]]]},"properties":{"code":"BG","count":1,"region":"Bulgaria"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[13.472962,49.817492],[13.472962,50.817492],[17.472962000000003,50.817492],[17.472962000000003,49.817492],[13.472962,49.817492]]]},"properties":{"code":"CZ","count":1,"region":"Czech Republic"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-5.74922,40.463667],[-5.74922,41.463667],[-1.7492200000000002,41.463667],[-1.7492200000000002,40.463667],[-5.74922,40.463667]]]},"properties":{"code":"ES","count":1,"region":"Spain"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[0.21374899999999997,46.227638],[0.21374899999999997,47.227638],[4.213749,47.227638],[4.213749,46.227638],[0.21374899999999997,46.227638]]]},"properties":{"code":"FR","count":1,"region":"France"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-10.24389,53.41291],[-10.24389,54.41291],[-6.24389,54.41291],[-6.24389,53.41291],[-10.24389,53.41291]]]},"properties":{"code":"IE","count":1,"region":"Ireland"}}]}
  • other-projects/maori-lang-detection/mongodb-data/tables.txt

    r33889 r33894  
    216216]);
    217217
     218
     2195b. Table 5b:
     220Table of count of sites with numPagesCoMRI > 0
     221
     222Combine the following two:
     223
     224- OVERSEAS
     225
     226db.Websites.aggregate([
     227    {
     228        $match: {
     229            $and: [
     230                {geoLocationCountryCode: {$ne: "NZ"}},
     231                {domain: {$not: /\.nz/}},
     232                {numPagesContainingMRI: {$gt: 0}}
     233            ]
     234        }
     235    },
     236    { $unwind: "$geoLocationCountryCode" },
     237    {
     238        $group: {
     239            _id: {$toLower: '$geoLocationCountryCode'},
     240            count: { $sum: 1 },
     241            /*domain: { $addToSet: '$domain' },*/
     242            numPagesInMRICount: { $sum: '$numPagesInMRI' },
     243            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' },
     244            totalPagesAcrossMatchingSites: { $sum: '$totalPages'}
     245        }
     246    },
     247    { $sort : { count : -1} }
     248]);
     249
     250- NZ:
     251
     252db.Websites.aggregate([
     253    {
     254        $match: {
     255            $and: [
     256                {numPagesContainingMRI: {$gt: 0}},
     257                {$or: [{geoLocationCountryCode:"NZ"},{domain: /\.nz/}]}
     258            ]
     259        }
     260    },
     261    { $unwind: "$geoLocationCountryCode" },
     262    {
     263        $group: {
     264            _id: "nz",
     265            count: { $sum: 1 },
     266            /*domain: { $addToSet: '$domain' },*/
     267            numPagesInMRICount: { $sum: '$numPagesInMRI' },
     268            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' },
     269            totalPagesAcrossMatchingSites: { $sum: '$totalPages'}
     270        }
     271    },
     272    { $sort : { count : -1} }
     273]);
     274
Note: See TracChangeset for help on using the changeset viewer.