Ignore:
Timestamp:
2020-02-03T15:48:40+13:00 (4 years ago)
Author:
ak19
Message:
  1. Additional column: totalPagesAcrossMatchingSites. 2. Screengrab of the tables.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/mongodb-data/tables.txt

    r33878 r33889  
    11Instructions for producing the tables:
    22a. Copy the Javascript version of results for each mongodb query listed below into a text editor.
    3 b. Then regex replace \/\*\s*\d+\s*\*\/ with "," and embed all the JS inside [].
     3b. OPTIONAL: Then regex replace \/\*\s*\d+\s*\*\/ with a comma (','), remove the very first comma, and embed all the JS inside [].
    44c. Paste that Javascript into https://json-csv.com/ to get the CSV tables
    55
     
    1717            /*domain: { $addToSet: '$domain' },*/
    1818            numPagesInMRICount: { $sum: '$numPagesInMRI' },
    19             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }
     19            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' },
     20            totalPagesAcrossSites: { $sum: '$totalPages'}
    2021        }
    2122    },
     
    3536            /*domain: { $addToSet: '$domain' },*/
    3637            numPagesInMRICount: { $sum: '$numPagesInMRI' },
    37             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }
     38            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' },
     39            totalPagesAcrossMatchingSites: { $sum: '$totalPages'}
    3840        }
    3941    },
     
    5355            /*domain: { $addToSet: '$domain' },*/
    5456            numPagesInMRICount: { $sum: '$numPagesInMRI' },
    55             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }
     57            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' },
     58            totalPagesAcrossMatchingSites: { $sum: '$totalPages'}
    5659        }
    5760    },
     
    7578            /*domain: { $addToSet: '$domain' },*/
    7679            numPagesInMRICount: { $sum: '$numPagesInMRI' },
    77             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }
     80            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' },
     81            totalPagesAcrossSitesWithPositiveMRICount: { $sum: '$totalPages'}
    7882        }
    7983    },
     
    97101            /*domain: { $addToSet: '$domain' },*/
    98102            numPagesInMRICount: { $sum: '$numPagesInMRI' },
    99             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }
     103            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' },
     104            totalPagesAcrossSitesWithPositiveContainsMRI: { $sum: '$totalPages'}
    100105        }
    101106    },
     
    122127             /*domain: { $addToSet: '$domain' },*/
    123128            numPagesInMRICount: { $sum: '$numPagesInMRI' },
    124             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }
     129            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' },
     130            totalPagesAcrossMatchingSites: { $sum: '$totalPages'}
    125131        }
    126132    },
     
    151157            /*domain: { $addToSet: '$domain' },*/
    152158            numPagesInMRICount: { $sum: '$numPagesInMRI' },
    153             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }
     159            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' },
     160            totalPagesAcrossMatchingSites: { $sum: '$totalPages'}
    154161        }
    155162    },
     
    175182            /*domain: { $addToSet: '$domain' },*/
    176183            numPagesInMRICount: { $sum: '$numPagesInMRI' },
    177             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }
    178         }
    179     },
    180     { $sort : { count : -1} }
    181 ]);
    182 
    183 
    184 To find NZ web pages in MRI the following may be BETTER,
     184            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' },
     185            totalPagesAcrossMatchingSites: { $sum: '$totalPages'}
     186        }
     187    },
     188    { $sort : { count : -1} }
     189]);
     190
     191
     192To find NZ web pages IN MRI the following may be BETTER,
    185193as it looks for sites with positive numPagesINMRI rather than sites that only have positive containingMRI:
    186194
     
    201209            domain: { $addToSet: '$domain' },
    202210            numPagesInMRICount: { $sum: '$numPagesInMRI' },
    203             numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' }
    204         }
    205     },
    206     { $sort : { count : -1} }
    207 ]);
    208 
     211            numPagesContainingMRICount: { $sum: '$numPagesContainingMRI' },
     212            totalPagesAcrossMatchingSites: { $sum: '$totalPages'}
     213        }
     214    },
     215    { $sort : { count : -1} }
     216]);
     217
Note: See TracChangeset for help on using the changeset viewer.