Changeset 33889

Show
Ignore:
Timestamp:
03.02.2020 15:48:40 (2 weeks ago)
Author:
ak19
Message:

1. Additional column: totalPagesAcrossMatchingSites. 2. Screengrab of the tables.

Location:
other-projects/maori-lang-detection/mongodb-data
Files:
9 added
8 modified

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/mongodb-data/1a_table_miInUrlPath.csv

    r33848 r33889  
    1 "_id","count","numPagesInMRICount","numPagesContainingMRICount" 
    2 "US","408.0","1169","3872" 
    3 "CN","123.0","281","1144" 
    4 "FR","34.0","754","1091" 
    5 "UNKNOWN","19.0","115","125" 
    6 "NZ","14.0","618","1097" 
    7 "DE","12.0","145","212" 
    8 "NL","8.0","76","115" 
    9 "CA","7.0","29","119" 
    10 "HK","7.0","3","12" 
    11 "AU","7.0","19","117" 
    12 "GB","5.0","3","7" 
    13 "JP","5.0","1","3" 
    14 "UA","4.0","9","10" 
    15 "RU","4.0","0","14" 
    16 "VG","2.0","0","0" 
    17 "SG","2.0","2","13" 
    18 "DK","2.0","0","0" 
    19 "IE","1.0","17","21" 
    20 "ZA","1.0","0","0" 
    21 "TR","1.0","0","2" 
    22 "SE","1.0","0","0" 
    23 "EU","1.0","0","7" 
    24 "CZ","1.0","0","0" 
    25 "ES","1.0","4","4" 
     1"countryCode","count","numPagesInMRICount","numPagesContainingMRICount","totalPagesAcrossMatchingSites" 
     2"US","408.0","1169","3872","199487" 
     3"CN","123.0","281","1144","79576" 
     4"FR","34.0","754","1091","7983" 
     5"UNKNOWN","19.0","115","125","1196" 
     6"NZ","14.0","618","1097","5959" 
     7"DE","12.0","145","212","5718" 
     8"NL","8.0","76","115","8351" 
     9"CA","7.0","29","119","11577" 
     10"AU","7.0","19","117","549" 
     11"HK","7.0","3","12","620" 
     12"JP","5.0","1","3","420" 
     13"GB","5.0","3","7","1948" 
     14"UA","4.0","9","10","1144" 
     15"RU","4.0","0","14","30" 
     16"DK","2.0","0","0","24" 
     17"VG","2.0","0","0","2" 
     18"SG","2.0","2","13","1373" 
     19"ZA","1.0","0","0","2" 
     20"SE","1.0","0","0","6" 
     21"TR","1.0","0","2","505" 
     22"EU","1.0","0","7","250" 
     23"ES","1.0","4","4","3648" 
     24"IE","1.0","17","21","451" 
     25"CZ","1.0","0","0","1" 
     26 
  • other-projects/maori-lang-detection/mongodb-data/1b_table_noMiInUrlPath.csv

    r33848 r33889  
    1 "_id","count","numPagesInMRICount","numPagesContainingMRICount" 
    2 "US","288.0","2136","5452" 
    3 "UNKNOWN","154.0","0","12" 
    4 "NZ","101.0","2035","5129" 
    5 "DE","40.0","1","208" 
    6 "AU","36.0","164","313" 
    7 "FR","35.0","22","244" 
    8 "NL","24.0","127","265" 
    9 "GB","13.0","1","42" 
    10 "CA","12.0","1","209" 
    11 "DK","8.0","4","8" 
    12 "ES","7.0","1","50" 
    13 "CZ","6.0","0","32" 
    14 "JP","5.0","0","100" 
    15 "IT","4.0","0","18" 
    16 "RO","3.0","73","116" 
    17 "IE","3.0","1","3" 
    18 "RU","3.0","0","1" 
    19 "AT","3.0","0","61" 
    20 "IN","3.0","0","0" 
    21 "SE","3.0","0","0" 
    22 "IL","3.0","0","19" 
    23 "CH","3.0","0","4" 
    24 "CN","2.0","0","34" 
    25 "PL","2.0","0","0" 
    26 "CK","2.0","0","0" 
    27 "IO","1.0","0","0" 
    28 "SG","1.0","3","10" 
    29 "FI","1.0","0","29" 
    30 "UA","1.0","0","0" 
    31 "IR","1.0","0","1" 
    32 "ZA","1.0","0","0" 
    33 "MX","1.0","1","21" 
    34 "PT","1.0","0","0" 
    35 "GR","1.0","1","3" 
    36 "PF","1.0","0","0" 
    37 "ME","1.0","0","0" 
    38 "BG","1.0","2","2" 
     1"countryCode","count","numPagesInMRICount","numPagesContainingMRICount","totalPagesAcrossMatchingSites" 
     2"US","288.0","2136","5452","213005" 
     3"UNKNOWN","154.0","0","12","78" 
     4"NZ","101.0","2035","5129","31360" 
     5"DE","40.0","1","208","30046" 
     6"AU","36.0","164","313","10490" 
     7"FR","35.0","22","244","29152" 
     8"NL","24.0","127","265","10918" 
     9"GB","13.0","1","42","2751" 
     10"CA","12.0","1","209","11931" 
     11"DK","8.0","4","8","16" 
     12"ES","7.0","1","50","2671" 
     13"CZ","6.0","0","32","9969" 
     14"JP","5.0","0","100","1005" 
     15"IT","4.0","0","18","684" 
     16"SE","3.0","0","0","3" 
     17"IN","3.0","0","0","110" 
     18"AT","3.0","0","61","984" 
     19"RU","3.0","0","1","1994" 
     20"IE","3.0","1","3","6" 
     21"RO","3.0","73","116","240" 
     22"IL","3.0","0","19","641" 
     23"CH","3.0","0","4","305" 
     24"PL","2.0","0","0","4671" 
     25"CN","2.0","0","34","716" 
     26"CK","2.0","0","0","2" 
     27"PT","1.0","0","0","2852" 
     28"MX","1.0","1","21","236" 
     29"IR","1.0","0","1","5" 
     30"ZA","1.0","0","0","1" 
     31"FI","1.0","0","29","1124" 
     32"UA","1.0","0","0","294" 
     33"SG","1.0","3","10","23" 
     34"IO","1.0","0","0","2" 
     35"BG","1.0","2","2","2" 
     36"ME","1.0","0","0","1" 
     37"PF","1.0","0","0","2" 
     38"GR","1.0","1","3","4" 
     39 
  • other-projects/maori-lang-detection/mongodb-data/1table_allCrawledSites.csv

    r33848 r33889  
    1 "_id","count","numPagesInMRICount","numPagesContainingMRICount" 
    2 "US","696.0","3305","9324" 
    3 "UNKNOWN","173.0","115","137" 
    4 "CN","125.0","281","1178" 
    5 "NZ","115.0","2653","6226" 
    6 "FR","69.0","776","1335" 
    7 "DE","52.0","146","420" 
    8 "AU","43.0","183","430" 
    9 "NL","32.0","203","380" 
    10 "CA","19.0","30","328" 
    11 "GB","18.0","4","49" 
    12 "DK","10.0","4","8" 
    13 "JP","10.0","1","103" 
    14 "ES","8.0","5","54" 
    15 "CZ","7.0","0","32" 
    16 "RU","7.0","0","15" 
    17 "HK","7.0","3","12" 
    18 "UA","5.0","9","10" 
    19 "IE","4.0","18","24" 
    20 "IT","4.0","0","18" 
    21 "SE","4.0","0","0" 
    22 "RO","3.0","73","116" 
    23 "SG","3.0","5","23" 
    24 "AT","3.0","0","61" 
    25 "IN","3.0","0","0" 
    26 "IL","3.0","0","19" 
    27 "CH","3.0","0","4" 
    28 "PL","2.0","0","0" 
    29 "ZA","2.0","0","0" 
    30 "VG","2.0","0","0" 
    31 "CK","2.0","0","0" 
    32 "IO","1.0","0","0" 
    33 "FI","1.0","0","29" 
    34 "IR","1.0","0","1" 
    35 "TR","1.0","0","2" 
    36 "EU","1.0","0","7" 
    37 "PT","1.0","0","0" 
    38 "MX","1.0","1","21" 
    39 "GR","1.0","1","3" 
    40 "PF","1.0","0","0" 
    41 "ME","1.0","0","0" 
    42 "BG","1.0","2","2" 
     1"countryCode","count","numPagesInMRICount","numPagesContainingMRICount","totalPagesAcrossSites" 
     2"US","696.0","3305","9324","412492" 
     3"UNKNOWN","173.0","115","137","1274" 
     4"CN","125.0","281","1178","80292" 
     5"NZ","115.0","2653","6226","37319" 
     6"FR","69.0","776","1335","37135" 
     7"DE","52.0","146","420","35764" 
     8"AU","43.0","183","430","11039" 
     9"NL","32.0","203","380","19269" 
     10"CA","19.0","30","328","23508" 
     11"GB","18.0","4","49","4699" 
     12"JP","10.0","1","103","1425" 
     13"DK","10.0","4","8","40" 
     14"ES","8.0","5","54","6319" 
     15"RU","7.0","0","15","2024" 
     16"HK","7.0","3","12","620" 
     17"CZ","7.0","0","32","9970" 
     18"UA","5.0","9","10","1438" 
     19"SE","4.0","0","0","9" 
     20"IT","4.0","0","18","684" 
     21"IE","4.0","18","24","457" 
     22"IN","3.0","0","0","110" 
     23"SG","3.0","5","23","1396" 
     24"AT","3.0","0","61","984" 
     25"RO","3.0","73","116","240" 
     26"IL","3.0","0","19","641" 
     27"CH","3.0","0","4","305" 
     28"VG","2.0","0","0","2" 
     29"ZA","2.0","0","0","3" 
     30"PL","2.0","0","0","4671" 
     31"CK","2.0","0","0","2" 
     32"PT","1.0","0","0","2852" 
     33"IR","1.0","0","1","5" 
     34"TR","1.0","0","2","505" 
     35"MX","1.0","1","21","236" 
     36"FI","1.0","0","29","1124" 
     37"IO","1.0","0","0","2" 
     38"EU","1.0","0","7","250" 
     39"BG","1.0","2","2","2" 
     40"ME","1.0","0","0","1" 
     41"PF","1.0","0","0","2" 
     42"GR","1.0","1","3","4" 
     43 
  • other-projects/maori-lang-detection/mongodb-data/2table_sitesWithPagesInMRI.csv

    r33886 r33889  
    1 "_id","count","numPagesInMRICount","numPagesContainingMRICount" 
    2 "us","206.0","3305","6327" 
    3 "nz","53.0","2653","5045" 
    4 "cn","32.0","281","542" 
    5 "fr","18.0","776","1101" 
    6 "au","11.0","183","358" 
    7 "nl","10.0","203","216" 
    8 "de","5.0","146","190" 
    9 "dk","4.0","4","4" 
    10 "gb","3.0","4","13" 
    11 "ca","3.0","30","35" 
    12 "unknown","2.0","115","125" 
    13 "ie","2.0","18","24" 
    14 "ua","2.0","9","10" 
    15 "es","2.0","5","5" 
    16 "sg","2.0","5","23" 
    17 "jp","1.0","1","3" 
    18 "hk","1.0","3","8" 
    19 "ro","1.0","73","104" 
    20 "bg","1.0","2","2" 
    21 "gr","1.0","1","3" 
    22 "mx","1.0","1","21" 
     1"countryCode","count","numPagesInMRICount","numPagesContainingMRICount","totalPagesAcrossSitesWithPositiveMRICount" 
     2"us","206.0","3305","6327","174620" 
     3"nz","53.0","2653","5045","21901" 
     4"cn","32.0","281","542","27405" 
     5"fr","18.0","776","1101","17622" 
     6"au","11.0","183","358","1329" 
     7"nl","10.0","203","216","9185" 
     8"de","5.0","146","190","4195" 
     9"dk","4.0","4","4","10" 
     10"gb","3.0","4","13","2935" 
     11"ca","3.0","30","35","2823" 
     12"es","2.0","5","5","3649" 
     13"ua","2.0","9","10","1140" 
     14"sg","2.0","5","23","1357" 
     15"ie","2.0","18","24","454" 
     16"unknown","2.0","115","125","943" 
     17"hk","1.0","3","8","500" 
     18"jp","1.0","1","3","377" 
     19"gr","1.0","1","3","4" 
     20"mx","1.0","1","21","236" 
     21"ro","1.0","73","104","105" 
     22"bg","1.0","2","2","2" 
     23 
  • other-projects/maori-lang-detection/mongodb-data/3table_sitesWithPagesContainingMRI.csv

    r33848 r33889  
    1 "_id","count","numPagesInMRICount","numPagesContainingMRICount" 
    2 "us","486.0","3305","9324" 
    3 "cn","114.0","281","1178" 
    4 "nz","89.0","2653","6226" 
    5 "fr","36.0","776","1335" 
    6 "de","27.0","146","420" 
    7 "nl","22.0","203","380" 
    8 "au","21.0","183","430" 
    9 "ca","12.0","30","328" 
    10 "dk","8.0","4","8" 
    11 "gb","7.0","4","49" 
    12 "es","7.0","5","54" 
    13 "cz","4.0","0","32" 
    14 "unknown","3.0","115","137" 
    15 "ro","3.0","73","116" 
    16 "it","3.0","0","18" 
    17 "at","3.0","0","61" 
    18 "il","2.0","0","19" 
    19 "ua","2.0","9","10" 
    20 "ru","2.0","0","15" 
    21 "ch","2.0","0","4" 
    22 "hk","2.0","3","12" 
    23 "sg","2.0","5","23" 
    24 "ie","2.0","18","24" 
    25 "jp","2.0","1","103" 
    26 "tr","1.0","0","2" 
    27 "mx","1.0","1","21" 
    28 "fi","1.0","0","29" 
    29 "eu","1.0","0","7" 
    30 "gr","1.0","1","3" 
    31 "bg","1.0","2","2" 
    32 "ir","1.0","0","1" 
     1"countryCode","count","numPagesInMRICount","numPagesContainingMRICount","totalPagesAcrossSitesWithPositiveContainsMRI" 
     2"us","486.0","3305","9324","353593" 
     3"cn","114.0","281","1178","78881" 
     4"nz","89.0","2653","6226","36874" 
     5"fr","36.0","776","1335","33647" 
     6"de","27.0","146","420","31884" 
     7"nl","22.0","203","380","16016" 
     8"au","21.0","183","430","5013" 
     9"ca","12.0","30","328","13795" 
     10"dk","8.0","4","8","16" 
     11"gb","7.0","4","49","3736" 
     12"es","7.0","5","54","6318" 
     13"cz","4.0","0","32","4698" 
     14"it","3.0","0","18","396" 
     15"unknown","3.0","115","137","1016" 
     16"ro","3.0","73","116","240" 
     17"at","3.0","0","61","984" 
     18"ua","2.0","9","10","1140" 
     19"ru","2.0","0","15","923" 
     20"il","2.0","0","19","639" 
     21"hk","2.0","3","12","600" 
     22"sg","2.0","5","23","1357" 
     23"ie","2.0","18","24","454" 
     24"jp","2.0","1","103","966" 
     25"ch","2.0","0","4","304" 
     26"fi","1.0","0","29","1124" 
     27"tr","1.0","0","2","505" 
     28"mx","1.0","1","21","236" 
     29"eu","1.0","0","7","250" 
     30"gr","1.0","1","3","4" 
     31"bg","1.0","2","2","2" 
     32"ir","1.0","0","1","5" 
     33 
  • other-projects/maori-lang-detection/mongodb-data/4table_tentativeNonProductSites.csv

    r33848 r33889  
    1 "_id","siteCount","numPagesInMRICount","numPagesContainingMRICount" 
    2 "us","181.0","2212","5579" 
    3 "nz","89.0","2653","6226" 
    4 "au","21.0","183","430" 
    5 "de","19.0","1","208" 
    6 "fr","17.0","22","244" 
    7 "nl","16.0","127","265" 
    8 "dk","8.0","4","8" 
    9 "ca","7.0","1","209" 
    10 "es","6.0","1","50" 
    11 "gb","5.0","1","42" 
    12 "cz","4.0","0","32" 
    13 "at","3.0","0","61" 
    14 "it","3.0","0","18" 
    15 "ro","3.0","73","116" 
    16 "ch","2.0","0","4" 
    17 "il","2.0","0","19" 
    18 "ru","1.0","0","1" 
    19 "jp","1.0","0","100" 
    20 "unknown","1.0","0","12" 
    21 "ie","1.0","1","3" 
    22 "fi","1.0","0","29" 
    23 "sg","1.0","3","10" 
    24 "bg","1.0","2","2" 
    25 "cn","1.0","0","34" 
    26 "gr","1.0","1","3" 
    27 "ir","1.0","0","1" 
    28 "mx","1.0","1","21" 
     1"countryCode","count","numPagesInMRICount","numPagesContainingMRICount","totalPagesAcrossMatchingSites" 
     2"us","181.0","2212","5579","158656" 
     3"nz","89.0","2653","6226","36874" 
     4"au","21.0","183","430","5013" 
     5"de","19.0","1","208","26617" 
     6"fr","17.0","22","244","25705" 
     7"nl","16.0","127","265","7669" 
     8"dk","8.0","4","8","16" 
     9"ca","7.0","1","209","2228" 
     10"es","6.0","1","50","2670" 
     11"gb","5.0","1","42","1813" 
     12"cz","4.0","0","32","4698" 
     13"ro","3.0","73","116","240" 
     14"it","3.0","0","18","396" 
     15"at","3.0","0","61","984" 
     16"il","2.0","0","19","639" 
     17"ch","2.0","0","4","304" 
     18"sg","1.0","3","10","23" 
     19"ir","1.0","0","1","5" 
     20"fi","1.0","0","29","1124" 
     21"ie","1.0","1","3","3" 
     22"ru","1.0","0","1","909" 
     23"jp","1.0","0","100","589" 
     24"mx","1.0","1","21","236" 
     25"unknown","1.0","0","12","73" 
     26"gr","1.0","1","3","4" 
     27"bg","1.0","2","2","2" 
     28"cn","1.0","0","34","270" 
     29 
  • other-projects/maori-lang-detection/mongodb-data/5table_tentativeNonProductSites1.csv

    r33883 r33889  
    1 "_id","siteCount (numPagesContainingMRICount > 0)","numPagesInMRICount","numPagesContainingMRICount" 
    2 "nz","176.0","4360","9641" 
    3 "us","117.0","757","2655" 
    4 "de","19.0","1","208" 
    5 "nl","16.0","127","265" 
    6 "fr","16.0","22","243" 
    7 "dk","8.0","4","8" 
    8 "ca","7.0","1","209" 
    9 "au","5.0","8","102" 
    10 "cz","4.0","0","32" 
    11 "gb","4.0","1","40" 
    12 "es","4.0","1","7" 
    13 "it","3.0","0","18" 
    14 "at","3.0","0","61" 
    15 "ro","2.0","0","12" 
    16 "il","2.0","0","19" 
    17 "ch","2.0","0","4" 
    18 "ir","1.0","0","1" 
    19 "fi","1.0","0","29" 
    20 "ie","1.0","1","3" 
    21 "ru","1.0","0","1" 
    22 "jp","1.0","0","100" 
    23 "mx","1.0","1","21" 
    24 "unknown","1.0","0","12" 
    25 "bg","1.0","2","2" 
    26 "cn","1.0","0","34" 
     1"countryCode","count","numPagesInMRICount","numPagesContainingMRICount","totalPagesAcrossMatchingSites" 
     2"nz","176.0","4360","9641","87657" 
     3"us","117.0","757","2655","113936" 
     4"de","19.0","1","208","26617" 
     5"fr","16.0","22","243","25547" 
     6"nl","16.0","127","265","7669" 
     7"dk","8.0","4","8","16" 
     8"ca","7.0","1","209","2228" 
     9"au","5.0","8","102","560" 
     10"gb","4.0","1","40","1809" 
     11"es","4.0","1","7","1354" 
     12"cz","4.0","0","32","4698" 
     13"it","3.0","0","18","396" 
     14"at","3.0","0","61","984" 
     15"ro","2.0","0","12","135" 
     16"il","2.0","0","19","639" 
     17"ch","2.0","0","4","304" 
     18"fi","1.0","0","29","1124" 
     19"ie","1.0","1","3","3" 
     20"jp","1.0","0","100","589" 
     21"ru","1.0","0","1","909" 
     22"unknown","1.0","0","12","73" 
     23"mx","1.0","1","21","236" 
     24"bg","1.0","2","2","2" 
     25"cn","1.0","0","34","270" 
     26"ir","1.0","0","1","5" 
     27 
  • other-projects/maori-lang-detection/mongodb-data/tables.txt

    r33878 r33889  
    11Instructions for producing the tables: 
    22a. Copy the Javascript version of results for each mongodb query listed below into a text editor. 
    3 b. Then regex replace \/\*\s*\d+\s*\*\/ with "," and embed all the JS inside []. 
     3b. OPTIONAL: Then regex replace \/\*\s*\d+\s*\*\/ with a comma (','), remove the very first comma, and embed all the JS inside []. 
    44c. Paste that Javascript into https://json-csv.com/ to get the CSV tables 
    55 
     
    1717            /*domain: { $addToSet: '$domain' },*/ 
    1818            numPagesInMRICount: { $sum: '$numPagesInMRI' }, 
    19             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' } 
     19            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }, 
     20            totalPagesAcrossSites: { $sum: '$totalPages'} 
    2021        } 
    2122    }, 
     
    3536            /*domain: { $addToSet: '$domain' },*/ 
    3637            numPagesInMRICount: { $sum: '$numPagesInMRI' }, 
    37             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' } 
     38            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }, 
     39            totalPagesAcrossMatchingSites: { $sum: '$totalPages'} 
    3840        } 
    3941    }, 
     
    5355            /*domain: { $addToSet: '$domain' },*/ 
    5456            numPagesInMRICount: { $sum: '$numPagesInMRI' }, 
    55             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' } 
     57            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }, 
     58            totalPagesAcrossMatchingSites: { $sum: '$totalPages'} 
    5659        } 
    5760    }, 
     
    7578            /*domain: { $addToSet: '$domain' },*/ 
    7679            numPagesInMRICount: { $sum: '$numPagesInMRI' }, 
    77             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' } 
     80            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }, 
     81            totalPagesAcrossSitesWithPositiveMRICount: { $sum: '$totalPages'} 
    7882        } 
    7983    }, 
     
    97101            /*domain: { $addToSet: '$domain' },*/ 
    98102            numPagesInMRICount: { $sum: '$numPagesInMRI' }, 
    99             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' } 
     103            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }, 
     104            totalPagesAcrossSitesWithPositiveContainsMRI: { $sum: '$totalPages'} 
    100105        } 
    101106    }, 
     
    122127             /*domain: { $addToSet: '$domain' },*/ 
    123128            numPagesInMRICount: { $sum: '$numPagesInMRI' }, 
    124             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' } 
     129            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }, 
     130            totalPagesAcrossMatchingSites: { $sum: '$totalPages'} 
    125131        } 
    126132    }, 
     
    151157            /*domain: { $addToSet: '$domain' },*/ 
    152158            numPagesInMRICount: { $sum: '$numPagesInMRI' }, 
    153             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' } 
     159            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }, 
     160            totalPagesAcrossMatchingSites: { $sum: '$totalPages'} 
    154161        } 
    155162    }, 
     
    175182            /*domain: { $addToSet: '$domain' },*/ 
    176183            numPagesInMRICount: { $sum: '$numPagesInMRI' }, 
    177             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' } 
    178         } 
    179     }, 
    180     { $sort : { count : -1} } 
    181 ]); 
    182  
    183  
    184 To find NZ web pages in MRI the following may be BETTER,  
     184            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }, 
     185            totalPagesAcrossMatchingSites: { $sum: '$totalPages'} 
     186        } 
     187    }, 
     188    { $sort : { count : -1} } 
     189]); 
     190 
     191 
     192To find NZ web pages IN MRI the following may be BETTER,  
    185193as it looks for sites with positive numPagesINMRI rather than sites that only have positive containingMRI: 
    186194 
     
    201209            domain: { $addToSet: '$domain' }, 
    202210            numPagesInMRICount: { $sum: '$numPagesInMRI' }, 
    203             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' } 
    204         } 
    205     }, 
    206     { $sort : { count : -1} } 
    207 ]); 
    208  
     211            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }, 
     212            totalPagesAcrossMatchingSites: { $sum: '$totalPages'} 
     213        } 
     214    }, 
     215    { $sort : { count : -1} } 
     216]); 
     217