Changeset 33847
- Timestamp:
- 2020-01-17T19:32:16+13:00 (4 years ago)
- Location:
- other-projects/maori-lang-detection
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/MoreReading/mongodb.txt
r33843 r33847 1488 1488 TIDIED: 1489 1489 NZ: 176 1490 US: 25+ 3 from US with mi in URL path = 281490 US: 25+4 from US with mi in URL path = 29 1491 1491 AU: 3 1492 1492 DE: 2 … … 1497 1497 FR: 1 1498 1498 IE: 1 1499 TOTAL: 213+ 3 from US with mi in URL path = 2161499 TOTAL: 213+4 from US with mi in URL path = 217 1500 1500 1501 1501 … … 1525 1525 Of interest or possible interest: 1526 1526 US: 1527 !! http://indigenousblogs.com [15/18 blogs work] 1527 !! http://indigenousblogs.com [15/18 blogs work] - has one page in Maori (http://indigenousblogs.com/feeds/mi.xml) 1528 1528 X https://biblia.gospelprime.com.br - misdetection (containsMRI) 1529 1529 X ?https://follow3rs.com - seems dodgy and possibly auto-translated. Can't spell account, misspelled as accout … … 1559 1559 db.getCollection('Webpages').find({$and: [{isMRI: true}, {URL: /indigenousblogs\.com/}]}) 1560 1560 => http://indigenousblogs.com/mi/ 1561 1562 -------------------------- 1563 1564 1565 db.Websites.aggregate([ 1566 { 1567 $match: { 1568 $and: [ 1569 {geoLocationCountryCode: {$ne: "NZ"}}, 1570 {domain: {$not: /\.nz/}}, 1571 {numPagesContainingMRI: {$gt: 0}}, 1572 {$or: [{geoLocationCountryCode: "AU"}, {urlContainsLangCodeInPath: false}]} 1573 ] 1574 } 1575 }, 1576 { $unwind: "$geoLocationCountryCode" }, 1577 { 1578 $group: { 1579 _id: {$toLower: '$geoLocationCountryCode'}, 1580 count: { $sum: 1 }, 1581 domain: { $addToSet: '$domain' }, 1582 numPagesInMRI: { $addToSet: '$numPagesInMRI' }, 1583 numPagesContainingMRI: { $addToSet: '$numPagesContainingMRI' }, 1584 numPagesInMRICount: { $sum: '$numPagesInMRI' }, 1585 numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' } 1586 } 1587 }, 1588 { $sort : { count : -1} } 1589 ]); 1590 1591 1592 To convert json to csv 1593 In gedit replace 1594 \/\*\s*\d+\s*\*\/ => , -
other-projects/maori-lang-detection/mongodb-data/6counts_nonProductSites1_manualShortlist.json
r33844 r33847 12 12 { 13 13 "_id" : "us", 14 "count" : 2 8.014 "count" : 29.0 15 15 } 16 16 { -
other-projects/maori-lang-detection/mongodb-data/6geojson-features_nonProductSites1_manualShortlist.json
r33844 r33847 1 {"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[170.885971,-40.900557],[170.885971,47.099443],[178.885971,47.099443],[178.885971,-40.900557],[170.885971,-40.900557]]]},"properties":{"code":"NZ","count":176,"region":"New Zealand"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-97.712891,37.09024],[-97.712891,6 5.09024],[-93.712891,65.09024],[-93.712891,37.09024],[-97.712891,37.09024]]]},"properties":{"code":"US","count":28,"region":"United States"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[131.775136,-25.274398],[131.775136,-22.274398],[135.775136,-22.274398],[135.775136,-25.274398],[131.775136,-25.274398]]]},"properties":{"code":"AU","count":3,"region":"Australia"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[8.451526,51.165691],[8.451526,53.165691],[12.451526,53.165691],[12.451526,51.165691],[8.451526,51.165691]]]},"properties":{"code":"DE","count":2,"region":"Germany"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[7.501785,56.26392],[7.501785,58.26392],[11.501785,58.26392],[11.501785,56.26392],[7.501785,56.26392]]]},"properties":{"code":"DK","count":2,"region":"Denmark"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[23.48583,42.733883],[23.48583,43.733883],[27.48583,43.733883],[27.48583,42.733883],[23.48583,42.733883]]]},"properties":{"code":"BG","count":1,"region":"Bulgaria"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[13.472962,49.817492],[13.472962,50.817492],[17.472962000000003,50.817492],[17.472962000000003,49.817492],[13.472962,49.817492]]]},"properties":{"code":"CZ","count":1,"region":"Czech Republic"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-5.74922,40.463667],[-5.74922,41.463667],[-1.7492200000000002,41.463667],[-1.7492200000000002,40.463667],[-5.74922,40.463667]]]},"properties":{"code":"ES","count":1,"region":"Spain"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[0.21374899999999997,46.227638],[0.21374899999999997,47.227638],[4.213749,47.227638],[4.213749,46.227638],[0.21374899999999997,46.227638]]]},"properties":{"code":"FR","count":1,"region":"France"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-10.24389,53.41291],[-10.24389,54.41291],[-6.24389,54.41291],[-6.24389,53.41291],[-10.24389,53.41291]]]},"properties":{"code":"IE","count":1,"region":"Ireland"}}]}1 {"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[170.885971,-40.900557],[170.885971,47.099443],[178.885971,47.099443],[178.885971,-40.900557],[170.885971,-40.900557]]]},"properties":{"code":"NZ","count":176,"region":"New Zealand"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-97.712891,37.09024],[-97.712891,66.09024],[-93.712891,66.09024],[-93.712891,37.09024],[-97.712891,37.09024]]]},"properties":{"code":"US","count":29,"region":"United States"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[131.775136,-25.274398],[131.775136,-22.274398],[135.775136,-22.274398],[135.775136,-25.274398],[131.775136,-25.274398]]]},"properties":{"code":"AU","count":3,"region":"Australia"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[8.451526,51.165691],[8.451526,53.165691],[12.451526,53.165691],[12.451526,51.165691],[8.451526,51.165691]]]},"properties":{"code":"DE","count":2,"region":"Germany"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[7.501785,56.26392],[7.501785,58.26392],[11.501785,58.26392],[11.501785,56.26392],[7.501785,56.26392]]]},"properties":{"code":"DK","count":2,"region":"Denmark"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[23.48583,42.733883],[23.48583,43.733883],[27.48583,43.733883],[27.48583,42.733883],[23.48583,42.733883]]]},"properties":{"code":"BG","count":1,"region":"Bulgaria"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[13.472962,49.817492],[13.472962,50.817492],[17.472962000000003,50.817492],[17.472962000000003,49.817492],[13.472962,49.817492]]]},"properties":{"code":"CZ","count":1,"region":"Czech Republic"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-5.74922,40.463667],[-5.74922,41.463667],[-1.7492200000000002,41.463667],[-1.7492200000000002,40.463667],[-5.74922,40.463667]]]},"properties":{"code":"ES","count":1,"region":"Spain"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[0.21374899999999997,46.227638],[0.21374899999999997,47.227638],[4.213749,47.227638],[4.213749,46.227638],[0.21374899999999997,46.227638]]]},"properties":{"code":"FR","count":1,"region":"France"}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-10.24389,53.41291],[-10.24389,54.41291],[-6.24389,54.41291],[-6.24389,53.41291],[-10.24389,53.41291]]]},"properties":{"code":"IE","count":1,"region":"Ireland"}}]}
Note:
See TracChangeset
for help on using the changeset viewer.