Changeset 33894

Show
Ignore:
Timestamp:
03.02.2020 23:20:33 (2 weeks ago)
Author:
ak19
Message:

1. Adding map, counts.json and geo-json files for 5b count of sites by countrycode with numPagesContainingMRI > 0, unfiltered by mi in URL path or not. 2. Tables file has mongodb query code for 5b data. 3. Map, counts.json and geo-json files for 6 (count of sites by country code from manual short listing of sites) now renamed to reflect that the shortlist considers ALL manually selected sites regardless of mi in URL path or not.

Location:
other-projects/maori-lang-detection/mongodb-data
Files:
4 added
1 modified
4 moved

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/mongodb-data/6counts_sitesWithPagesContainingMRI_manualShortlist.json

    r33848 r33894  
    88{ 
    99    "_id" : "nz", 
    10     "count" : 176.0 
     10    "count" : 126.0 
    1111} 
    1212{ 
  • other-projects/maori-lang-detection/mongodb-data/6geojson-features_sitesWithPagesContainingMRI_manualShortlist.json

    r33848 r33894  
    1 {"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[170.885971,-40.900557],[170.885971,47.099443],[178.885971,47.099443],[178.885971,-40.900557],[170.885971,-40.900557]]]},"properties":{"code":"NZ","count":176,"region":"New Zealand"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-97.712891,37.09024],[-97.712891,66.09024],[-93.712891,66.09024],[-93.712891,37.09024],[-97.712891,37.09024]]]},"properties":{"code":"US","count":29,"region":"United States"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[131.775136,-25.274398],[131.775136,-23.274398],[135.775136,-23.274398],[135.775136,-25.274398],[131.775136,-25.274398]]]},"properties":{"code":"AU","count":2,"region":"Australia"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[8.451526,51.165691],[8.451526,53.165691],[12.451526,53.165691],[12.451526,51.165691],[8.451526,51.165691]]]},"properties":{"code":"DE","count":2,"region":"Germany"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[7.501785,56.26392],[7.501785,58.26392],[11.501785,58.26392],[11.501785,56.26392],[7.501785,56.26392]]]},"properties":{"code":"DK","count":2,"region":"Denmark"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[23.48583,42.733883],[23.48583,43.733883],[27.48583,43.733883],[27.48583,42.733883],[23.48583,42.733883]]]},"properties":{"code":"BG","count":1,"region":"Bulgaria"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[13.472962,49.817492],[13.472962,50.817492],[17.472962000000003,50.817492],[17.472962000000003,49.817492],[13.472962,49.817492]]]},"properties":{"code":"CZ","count":1,"region":"Czech Republic"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-5.74922,40.463667],[-5.74922,41.463667],[-1.7492200000000002,41.463667],[-1.7492200000000002,40.463667],[-5.74922,40.463667]]]},"properties":{"code":"ES","count":1,"region":"Spain"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[0.21374899999999997,46.227638],[0.21374899999999997,47.227638],[4.213749,47.227638],[4.213749,46.227638],[0.21374899999999997,46.227638]]]},"properties":{"code":"FR","count":1,"region":"France"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-10.24389,53.41291],[-10.24389,54.41291],[-6.24389,54.41291],[-6.24389,53.41291],[-10.24389,53.41291]]]},"properties":{"code":"IE","count":1,"region":"Ireland"}}]} 
     1{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[172.885971,-40.900557],[172.885971,85.09944300000001],[176.885971,85.09944300000001],[176.885971,-40.900557],[172.885971,-40.900557]]]},"properties":{"code":"NZ","count":126,"region":"New Zealand"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-97.712891,37.09024],[-97.712891,66.09024],[-93.712891,66.09024],[-93.712891,37.09024],[-97.712891,37.09024]]]},"properties":{"code":"US","count":29,"region":"United States"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[131.775136,-25.274398],[131.775136,-23.274398],[135.775136,-23.274398],[135.775136,-25.274398],[131.775136,-25.274398]]]},"properties":{"code":"AU","count":2,"region":"Australia"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[8.451526,51.165691],[8.451526,53.165691],[12.451526,53.165691],[12.451526,51.165691],[8.451526,51.165691]]]},"properties":{"code":"DE","count":2,"region":"Germany"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[7.501785,56.26392],[7.501785,58.26392],[11.501785,58.26392],[11.501785,56.26392],[7.501785,56.26392]]]},"properties":{"code":"DK","count":2,"region":"Denmark"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[23.48583,42.733883],[23.48583,43.733883],[27.48583,43.733883],[27.48583,42.733883],[23.48583,42.733883]]]},"properties":{"code":"BG","count":1,"region":"Bulgaria"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[13.472962,49.817492],[13.472962,50.817492],[17.472962000000003,50.817492],[17.472962000000003,49.817492],[13.472962,49.817492]]]},"properties":{"code":"CZ","count":1,"region":"Czech Republic"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-5.74922,40.463667],[-5.74922,41.463667],[-1.7492200000000002,41.463667],[-1.7492200000000002,40.463667],[-5.74922,40.463667]]]},"properties":{"code":"ES","count":1,"region":"Spain"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[0.21374899999999997,46.227638],[0.21374899999999997,47.227638],[4.213749,47.227638],[4.213749,46.227638],[0.21374899999999997,46.227638]]]},"properties":{"code":"FR","count":1,"region":"France"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-10.24389,53.41291],[-10.24389,54.41291],[-6.24389,54.41291],[-6.24389,53.41291],[-10.24389,53.41291]]]},"properties":{"code":"IE","count":1,"region":"Ireland"}}]} 
  • other-projects/maori-lang-detection/mongodb-data/tables.txt

    r33889 r33894  
    216216]); 
    217217 
     218 
     2195b. Table 5b: 
     220Table of count of sites with numPagesCoMRI > 0 
     221 
     222Combine the following two: 
     223 
     224- OVERSEAS 
     225 
     226db.Websites.aggregate([ 
     227    { 
     228        $match: { 
     229            $and: [ 
     230                {geoLocationCountryCode: {$ne: "NZ"}}, 
     231                {domain: {$not: /\.nz/}}, 
     232                {numPagesContainingMRI: {$gt: 0}} 
     233            ] 
     234        } 
     235    }, 
     236    { $unwind: "$geoLocationCountryCode" }, 
     237    { 
     238        $group: { 
     239            _id: {$toLower: '$geoLocationCountryCode'}, 
     240            count: { $sum: 1 }, 
     241            /*domain: { $addToSet: '$domain' },*/ 
     242            numPagesInMRICount: { $sum: '$numPagesInMRI' }, 
     243            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }, 
     244            totalPagesAcrossMatchingSites: { $sum: '$totalPages'} 
     245        } 
     246    }, 
     247    { $sort : { count : -1} } 
     248]); 
     249 
     250- NZ: 
     251 
     252db.Websites.aggregate([ 
     253    { 
     254        $match: { 
     255            $and: [ 
     256                {numPagesContainingMRI: {$gt: 0}}, 
     257                {$or: [{geoLocationCountryCode:"NZ"},{domain: /\.nz/}]} 
     258            ] 
     259        } 
     260    }, 
     261    { $unwind: "$geoLocationCountryCode" }, 
     262    { 
     263        $group: { 
     264            _id: "nz", 
     265            count: { $sum: 1 }, 
     266            /*domain: { $addToSet: '$domain' },*/ 
     267            numPagesInMRICount: { $sum: '$numPagesInMRI' }, 
     268            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }, 
     269            totalPagesAcrossMatchingSites: { $sum: '$totalPages'} 
     270        } 
     271    }, 
     272    { $sort : { count : -1} } 
     273]); 
     274