Changeset 33948


Ignore:
Timestamp:
2020-02-18T22:56:44+13:00 (4 years ago)
Author:
ak19
Message:

Reviewed the random sampled web page URLs marked as SIGNIFICANTLY_MAORI and reassigned some to POEMS_AND_SONGS and MAORI_PARAGRAPHS, etc.

Location:
other-projects/maori-lang-detection
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/mongodb-data/random260_manualList_globalDomains_whereAPageContainsMRI.txt

    r33947 r33948  
    33https://paekupu.co.nz/word/awatea,NZ,Y,MIXED_TEXT
    44http://pukoro.co.nz/mi/vocab-with-kori/twigs/,NZ,Y,SIGNIFICANTLY_MAORI
    5 http://tiritiowaitangi.govt.nz/maps/landconfiscations.php,NZ,Y,SIGNIFICANTLY_MAORI
     5http://tiritiowaitangi.govt.nz/maps/landconfiscations.php,NZ,Y,MIXED_TEXT
    66http://www.tmoa.tki.org.nz/Karere-Hou/Hine-Takurua-raua-ko-Hine-Raumati-ma-te-pouako,NZ,Y,SIGNIFICANTLY_MAORI
    77https://tetaurawhiri.govt.nz/rauemi-a-ipurangi/rauemi/takaro-raumati/,NZ,Y,NAV
     
    99http://www.tmoa.tki.org.nz/Karere-Hou/He-hotaka-whakangungu,NZ,Y,NAV
    1010https://tetaurawhiri.govt.nz/kaupapa/panui-papaho/kaua-e-whiua-whanaia-dont-pass-it-kick-it/,NZ,Y,SIGNIFICANTLY_MAORI
    11 http://www.pakanae.maori.nz/ng257-wai-o-hokianga.html,NZ,Y,SIGNIFICANTLY_MAORI
     11http://www.pakanae.maori.nz/ng257-wai-o-hokianga.html,NZ,Y,POEMS_OR_SONGS
    1212http://www.tmoa.tki.org.nz/Karere-Hou/He-purongo-TMoA-me-NWRM,NZ,Y,SIGNIFICANTLY_MAORI
    1313http://www.tkkmmokopuna.school.nz/1/easy_pages/3-mana-kura-tahi-tkkm-o-nga-mokopuna,NZ,Y,NAV
     
    1919https://tiritiowaitangi.govt.nz/story/leaduptotreaty.php,NZ,Y,SIGNIFICANTLY_MAORI
    2020http://www.waiata.maori.nz/index.php/browse/genre,NZ,Y,POEMS_OR_SONGS
    21 https://csunplugged.org/mi/topics/,US,Y,SIGNIFICANTLY_MAORI
    22 http://www.teipukarea.maori.nz/research-mi-nz/nga-kaupapa-rangahau-o-mua/,NZ,Y,SIGNIFICANTLY_MAORI
     21https://csunplugged.org/mi/topics/,US,Y,MAORI_PARAGRAPHS
     22http://www.teipukarea.maori.nz/research-mi-nz/nga-kaupapa-rangahau-o-mua/,NZ,Y,MAORI_PARAGRAPHS
    2323https://www.tetaurawhiri.govt.nz/kaupapa/tatai-korero-2/1800/,NZ,Y,SIGNIFICANTLY_MAORI
    2424http://tiritiowaitangi.govt.nz/story/textdifferences.php,NZ,Y,SIGNIFICANTLY_MAORI
    2525http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Hangarau/1.4-Te-whakaputa-i-tetahi-hua-hangarau-no-roto-mai-i-te-matauranga-Maori-hei-whakaea-i-tetahi-tauaki,NZ,Y,NAV
    2626http://www.waiata.maori.nz/en/song/kua-eke-raa-ngaa-tuumanako,NZ,Y,POEMS_OR_SONGS
    27 http://animations.tewhanake.maori.nz/te-kohure/te-wahanga-tuarua/380,NZ,Y,SIGNIFICANTLY_MAORI
     27http://animations.tewhanake.maori.nz/te-kohure/te-wahanga-tuarua/380,NZ,Y,MAORI_PARAGRAPHS
    2828http://www.tmoa.tki.org.nz/Pakiwaitara-Marau/Tutarawananga-Kia-hangai-ki-nga-tamariki,NZ,Y,SIGNIFICANTLY_MAORI
    2929http://firstworldwar.tki.org.nz/mi/nga-kaupapa/tuakiri-me-te-tuku-ihotanga/,NZ,Y,SIGNIFICANTLY_MAORI
     
    3636http://pukoro.co.nz/mi/computer-games/game-3-skiing-game,NZ,Y,WORDS
    3737https://www.tekura.school.nz/dual-providers-mi-nz/first-steps-mi-nz/online-learning-mi-nz/,NZ,Y,SIGNIFICANTLY_MAORI
    38 http://www.tmoa.tki.org.nz/Mataiako/Wahanga-Aromatawai/Te-Reo-Maori/Korero/Takapiringa,NZ,Y,SIGNIFICANTLY_MAORI
    39 https://kupengahao.co.nz/items/he-tautoko-i-te-marautanga-pangarau-te-tauira-me-te-panga/,NZ,Y,SIGNIFICANTLY_MAORI
     38http://www.tmoa.tki.org.nz/Mataiako/Wahanga-Aromatawai/Te-Reo-Maori/Korero/Takapiringa,NZ,Y,MAORI_PARAGRAPHS
     39https://kupengahao.co.nz/items/he-tautoko-i-te-marautanga-pangarau-te-tauira-me-te-panga/,NZ,Y,MAORI_PARAGRAPHS
    4040https://paekupu.co.nz/word/aputa-rarangi,NZ,Y,MIXED_TEXT
    41 http://www.tmoa.tki.org.nz/Mataiako/Te-Waharoa-Ararau/Te-Waharoa-Ararau-Modules,NZ,Y,SIGNIFICANTLY_MAORI
     41http://www.tmoa.tki.org.nz/Mataiako/Te-Waharoa-Ararau/Te-Waharoa-Ararau-Modules,NZ,Y,OTHER_LANGUAGES
    4242https://www.twttoa.com/future-events--contact-details.html,US,Y,MIXED_TEXT
    43 https://biblehub.com/mao/1_kings/6.htm,US,Y,SIGNIFICANTLY_MAORI
     43https://biblehub.com/mao/1_kings/6.htm,US,Y,
    4444http://m.biblepub.com/bibles/mb/19/23,US,Y,SIGNIFICANTLY_MAORI
    4545https://www.tetaurawhiri.govt.nz/ratonga/,NZ,Y,NAV
    4646http://www.tkkmmokopuna.school.nz/1/image_galleries/3-2018-whakaahua,NZ,Y,NAV
    4747http://www.tmoa.tki.org.nz/Pakiwaitara-Marau/Te-Kura-o-Takaro,NZ,Y,SIGNIFICANTLY_MAORI
    48 http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Hauora,NZ,Y,SIGNIFICANTLY_MAORI
     48http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Hauora,NZ,Y,MAORI_PARAGRAPHS
    4949http://www.waiata.maori.nz/mri/song/te-haakii-o-wiremu-taamehana,NZ,Y,POEMS_OR_SONGS
    5050http://kuraaiwi.maori.nz/m333-ng257-tumuaki.html,NZ,Y,LITTLE_TEXT
     
    5252https://www.tetaurawhiri.govt.nz/rauemi-a-ipurangi/rauemi/nga-rerenga-korero-phrases/,NZ,Y,NAV
    5353https://www.tetaurawhiri.govt.nz/kaupapa/panui-papaho/he-whakaari-pukuhohe-reo-maori-hou/,NZ,Y,SIGNIFICANTLY_MAORI
    54 https://ngatiporoukiponeke.org.nz/hikurangi-maunga-kati-ra/,NZ,Y,SIGNIFICANTLY_MAORI
     54https://ngatiporoukiponeke.org.nz/hikurangi-maunga-kati-ra/,NZ,Y,POEMS_OR_SONGS
    5555http://www.tmoa.tki.org.nz/Mataiako/Te-Waharoa-Ararau/Nga-rauemi-mo-TWA,NZ,Y,MIXED_TEXT
    5656http://www.tereowrap.nz/tmwo_upoko04.html,NZ,Y,SIGNIFICANTLY_MAORI
     
    6969http://kuraproductions.co.nz/mi/shows/taniwha-rau,NZ,Y,SIGNIFICANTLY_MAORI
    7070http://www.firstworldwar.tki.org.nz/mi/rauemi/he-tohu-pakanga-he-tohu-kapa/,NZ,Y,SIGNIFICANTLY_MAORI
    71 https://ngatiporoukiponeke.org.nz/karangatia-ra/,NZ,Y,SIGNIFICANTLY_MAORI
     71https://ngatiporoukiponeke.org.nz/karangatia-ra/,NZ,Y,POEMS_OR_SONGS
    7272http://kurakokiri.maori.nz/students.html,NZ,Y,SIGNIFICANTLY_MAORI
    73 http://tmoa.tki.org.nz/Mataiako/Wahanga-Aromatawai/Te-Reo-Maori/Korero/Kawea-te-Rongo-Ki-Mai-He-Whakamatau-Reo-Korero-JOST,NZ,Y,SIGNIFICANTLY_MAORI
    74 https://kupengahao.co.nz/items/tihei-pangarau-te-tau-me-te-taurangi-te-pukapuka-aratohu-ma-te-pouako-taumata-3/,NZ,Y,SIGNIFICANTLY_MAORI
     73http://tmoa.tki.org.nz/Mataiako/Wahanga-Aromatawai/Te-Reo-Maori/Korero/Kawea-te-Rongo-Ki-Mai-He-Whakamatau-Reo-Korero-JOST,NZ,Y,MAORI_PARAGRAPHS
     74https://kupengahao.co.nz/items/tihei-pangarau-te-tau-me-te-taurangi-te-pukapuka-aratohu-ma-te-pouako-taumata-3/,NZ,Y,MAORI_PARAGRAPHS
    7575http://pukoro.co.nz/mi/vocab-with-kori/bib/,NZ,Y,SIGNIFICANTLY_MAORI
    76 http://www.tmoa.tki.org.nz/Mataiako/Wahanga-Aromatawai/Pangarau/Apitihanga-Uiui-Rautaki-GloSS,NZ,Y,SIGNIFICANTLY_MAORI
     76http://www.tmoa.tki.org.nz/Mataiako/Wahanga-Aromatawai/Pangarau/Apitihanga-Uiui-Rautaki-GloSS,NZ,Y,MAORI_PARAGRAPHS
    7777http://tmoa.tki.org.nz/Pakiwaitara-Marau/Te-Kura-Kaupapa-Maori-o-Ngati-Ruanui,NZ,Y,SIGNIFICANTLY_MAORI
    7878https://tehiku.nz/archives/te-tai-tokerau-kapa-haka-regionals-2011/4463/kaitaia-college-seniors,NZ,Y,MIXED_TEXT
     
    8787https://www.tetaurawhiri.govt.nz/rauemi-a-ipurangi/rauemi/christmas/,NZ,Y,NAV
    8888http://tiritiowaitangi.govt.nz/maps/maoriland1939.php,NZ,Y,NAV
    89 http://tmoa.tki.org.nz/Pakiwaitara-Marau/Tutarawananga-Nga-pumanawa-matua,NZ,Y,SIGNIFICANTLY_MAORI
     89http://tmoa.tki.org.nz/Pakiwaitara-Marau/Tutarawananga-Nga-pumanawa-matua,NZ,Y,MAORI_PARAGRAPHS
    9090https://mi.wikipedia.org/wiki/Katea,US,Y,MIXED_TEXT
    9191http://www.livingheritage.org.nz/schools/secondary/te-ara-whanui/toku-kura/index.php,NZ,Y,SIGNIFICANTLY_MAORI
    92 http://www.twtop.school.nz/66/blogs/1-te-ohinga-panui/posts/12-term-2-week-1,NZ,Y,SIGNIFICANTLY_MAORI
     92http://www.twtop.school.nz/66/blogs/1-te-ohinga-panui/posts/12-term-2-week-1,NZ,Y,MAORI_PARAGRAPHS
    9393http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Putaiao/AS-3.3-v1-Te-tatari-i-nga-taukumekume-mo-te-whakamahi-i-nga-rawa-o-Papatuanuku-Tauira,NZ,Y,NAV
    9494https://www.tetaurawhiri.govt.nz/rauemi-a-ipurangi/rauemi/kia-ora-general-phrases/,NZ,Y,NAV
     
    100100http://pukoro.co.nz/mi/learning-points/autumn/,NZ,Y,LITTLE_TEXT
    101101http://tiritiowaitangi.govt.nz/quotes/1920.php,NZ,Y,SIGNIFICANTLY_MAORI
    102 https://kupengahao.co.nz/portfolio-filter/putaiao/,NZ,Y,SIGNIFICANTLY_MAORI
     102https://kupengahao.co.nz/portfolio-filter/putaiao/,NZ,Y,MAORI_PARAGRAPHS
    103103http://m.biblepub.com/bibles/mb/15/3,US,Y,SIGNIFICANTLY_MAORI
    104104http://www.twtop.school.nz/62/learning_cave_sets/22-tuwharetoatanga,NZ,Y,MIXED_TEXT
     
    108108https://www.twttoa.com/rangatira-pita-sharples-5.html,US,Y,NAV
    109109http://www.tmoa.tki.org.nz/Mataiako/Whiriwhiria-tetahi-rauemi-aromatawai,NZ,Y,LITTLE_TEXT
    110 http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Nga-whainga-paetae-o-TMoA,NZ,Y,SIGNIFICANTLY_MAORI
     110http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Nga-whainga-paetae-o-TMoA,NZ,Y,MAORI_PARAGRAPHS
    111111http://pukoro.co.nz/mi/crafts/making-animation,NZ,Y,SIGNIFICANTLY_MAORI
    112112http://pukoro.co.nz/mi/crafts/making-scones/,NZ,Y,SIGNIFICANTLY_MAORI
     
    119119http://pukoro.co.nz/mi/kohanga-of-the-week/te-wharekura-o-tauranga-moana/,NZ,Y,NAV
    120120http://tmoa.tki.org.nz/Karere-Hou/Wanangatia-te-Putanga-Tauira,NZ,Y,SIGNIFICANTLY_MAORI
    121 https://kupengahao.co.nz/items/te-poutama-tau-pukapuka-tuarima-te-tapiritanga-te-tangohanga-me-te-uara-tu/,NZ,Y,SIGNIFICANTLY_MAORI
     121https://kupengahao.co.nz/items/te-poutama-tau-pukapuka-tuarima-te-tapiritanga-te-tangohanga-me-te-uara-tu/,NZ,Y,MAORI_PARAGRAPHS
    122122http://www.tmoa.tki.org.nz/Kura/Te-Reo-a-Waha/Nga-ataata/Te-Reo-a-Waha-Tau-7-8,NZ,Y,NAV
    123123http://www.firstworldwar.tki.org.nz/mi/rauemi/mano-reo-mano-whakaaro/,NZ,Y,SIGNIFICANTLY_MAORI
     
    126126http://m.biblepub.com/bibles/mb/1/42,US,Y,SIGNIFICANTLY_MAORI
    127127http://www.tmoa.tki.org.nz/Nga-Whanaketanga-Rumaki-Maori/Nga-Whanaketanga-Rumaki-Maori-me-Ka-Hikitia-Accelerating-Success-2013-2017,NZ,Y,SIGNIFICANTLY_MAORI
    128 http://tmoa.tki.org.nz/Karere-Hou/Matariki,NZ,Y,SIGNIFICANTLY_MAORI
     128http://tmoa.tki.org.nz/Karere-Hou/Matariki,NZ,Y,MAORI_PARAGRAPHS
    129129https://www.terito.school.nz/2017-3/,NZ,Y,NAV
    130130https://mi.m.wikipedia.org/wiki/1788,US,Y,WORDS
     
    156156http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/He-Karere-Hou,NZ,Y,SIGNIFICANTLY_MAORI
    157157http://www.waiata.maori.nz/en/song/aku-kanohi-kau,NZ,Y,POEMS_OR_SONGS
    158 http://tmoa.tki.org.nz/Karere-Hou/Te-Tautoko-76,NZ,Y,SIGNIFICANTLY_MAORI
     158http://tmoa.tki.org.nz/Karere-Hou/Te-Tautoko-76,NZ,Y,MAORI_PARAGRAPHS
    159159http://www.tmoa.tki.org.nz/Karere-Hou/Te-whakahounga-o-Te-Waharoa-Ararau,NZ,Y,SIGNIFICANTLY_MAORI
    160160http://pukoro.co.nz/mi/stories/the-disheartened-possum/,NZ,Y,SIGNIFICANTLY_MAORI
     
    187187https://tetaurawhiri.govt.nz/rauemi-a-ipurangi/rauemi/akina-te-reo-maramataka/,NZ,Y,NAV
    188188http://tmoa.tki.org.nz/Te-Marautanga-o-Aotearoa/Te-Anga/Te-Putake-o-Te-Marautanga-o-Aotearoa,NZ,Y,SIGNIFICANTLY_MAORI
    189 http://ngatipahauwera.co.nz/nga-konae/,NZ,Y,SIGNIFICANTLY_MAORI
     189http://ngatipahauwera.co.nz/nga-konae/,NZ,Y,POEMS_OR_SONGS
    190190https://mi.m.wikipedia.org/wiki/1748,US,Y,WORDS
    191191http://tmoa.tki.org.nz/Mataiako/Ako/Ka-arotahi-te-aromatawai-ki-te-akonga,NZ,Y,SIGNIFICANTLY_MAORI
    192192http://www.waiata.maori.nz/mri/song/aku-kanohi-kau,NZ,Y,POEMS_OR_SONGS
    193193https://sexualviolence.victimsinfo.govt.nz/mi/glossary/showterm/kaitohutohu+p%c4%81rurenga+koeretanga/,NZ,Y,SIGNIFICANTLY_MAORI
    194 http://tmoa.tki.org.nz/Kura/He-Reo-Moteatea-Tirohanga-whanui/Ka-Eke-ki-Wairaka-Maori/Ka-Eke-ki-Wairaka,NZ,Y,SIGNIFICANTLY_MAORI
     194http://tmoa.tki.org.nz/Kura/He-Reo-Moteatea-Tirohanga-whanui/Ka-Eke-ki-Wairaka-Maori/Ka-Eke-ki-Wairaka,NZ,Y,POEMS_OR_SONGS
    195195http://www.waiata.maori.nz/en/song/mahara,NZ,Y,POEMS_OR_SONGS
    196196https://mi.m.wikipedia.org/wiki/1886,US,Y,WORDS
     
    237237https://www.tetaurawhiri.govt.nz/kaupapa/panui-papaho/te-reo-maori-revitalisation-celebrated/,NZ,Y,SIGNIFICANTLY_MAORI
    238238http://tmoa.tki.org.nz/Pakiwaitara-Marau/Te-Kura-o-Titahi-ki-te-Raki,NZ,Y,SIGNIFICANTLY_MAORI
    239 http://kuraproductions.co.nz/mi/shows/only-in-aotearoa,NZ,Y,SIGNIFICANTLY_MAORI
     239http://kuraproductions.co.nz/mi/shows/only-in-aotearoa,NZ,Y,MAORI_PARAGRAPHS
    240240http://www.tmoa.tki.org.nz/Akonga,NZ,Y,MAORI_PARAGRAPHS
    241241http://www.tmoa.tki.org.nz/Pakiwaitara-Marau/Tatari-Tautoko-Tauawhi-Nawton-School,NZ,Y,SIGNIFICANTLY_MAORI
     
    247247http://tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Te-Reo-Rangatira/TRR-1.11-Te-whakaatu-i-nga-putanga-rangahau,NZ,Y,NAV
    248248https://tetaurawhiri.govt.nz/kaupapa/panui-papaho/nga-tohu-reo-maori/,NZ,Y,SIGNIFICANTLY_MAORI
    249 http://pukoro.co.nz/mi/songs-and-lyrics/at-school/,NZ,Y,SIGNIFICANTLY_MAORI
     249http://pukoro.co.nz/mi/songs-and-lyrics/at-school/,NZ,Y,POEMS_OR_SONGS
    250250http://pukoro.co.nz/mi/learning-points/test-learning-points/,NZ,Y,SINGLE_MRI_SENTENCE
    251 http://www.kura-porirua.school.nz/194/profile_areas/5-te-rarangi-kaimahi-staff-directory,NZ,Y,SIGNIFICANTLY_MAORI
     251http://www.kura-porirua.school.nz/194/profile_areas/5-te-rarangi-kaimahi-staff-directory,NZ,Y,NAV
    252252http://www.tmoa.tki.org.nz/Pakiwaitara-Marau,NZ,Y,SIGNIFICANTLY_MAORI
    253253http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/He-Maherehere-Paerewa-Paetae2,NZ,Y,NAV
  • other-projects/maori-lang-detection/src/org/greenstone/atea/ManualURLInspection.java

    r33946 r33948  
    492492    return "";
    493493    }
     494
     495
     496    public void reviewQualityLevelFieldFor(/*String basicDomain,*/ String fieldValue) {
     497    final String USER_PROMPT = "Enter qualityLevel value of\n\t? | (N)AV | (L)ITTLE_TEXT | (M)IXED_TEXT | (S)IGNIFICANTLY_MAORI | MAORI_(P)ARAGRAPHS"
     498        + "\n\t | PO(E)MS_OR_SONGS | S(I)NGLE_MRI_SENTENCE | (W)ORDS | (O)THER_LANGUAGES\n\tfor (%d): %s - %s > ";
     499        //"Enter isMRI value of Y|N|? for (" + count + "): " + url + " - " + countryCode + " > ";
     500   
     501    boolean terminate = false;
     502    CSVParser parser = null;
     503   
     504    try {
     505        parser = CSVParser.parse(webPageURLsCSVFile, java.nio.charset.Charset.forName("US-ASCII"), CSVFormat.RFC4180);
     506    } catch(Exception e) {
     507        logger.error("Failed to parse input CSV file " + Utility.getFilePath(webPageURLsCSVFile), e);
     508        return;
     509    }
     510   
     511    try (
     512         CSVPrinter csvWriter = new CSVPrinter(new FileWriter(tmpOutFile), CSVFormat.DEFAULT.withQuoteMode(QuoteMode.MINIMAL));
     513         ) {
     514
     515        int recordCount = 0;
     516        for (CSVRecord csvRecord : parser) {       
     517       
     518        String url = csvRecord.get(URL_COLUMN);
     519        if(url.equals("")) { // skip empty lines
     520            continue;
     521        }
     522       
     523        recordCount++;
     524
     525       
     526        String basicURL = Utility.stripProtocolAndWWWFromURL(Utility.getDomainForURL(url, false));
     527        /*
     528        if(!basicURL.equals(basicDomain)) {
     529            continue; // skip URLs we're not interested in
     530        }
     531        */
     532       
     533        // Work out default if basic URLs present in defaults map
     534        // If it is, use its value as default for this URL         
     535        //String predefQualityLevel = predefinedDefaultsMap.get(basicURL);
     536       
     537        String countryCode = csvRecord.get(COUNTRY_CODE_COLUMN);
     538        String isReallyInMRI = "";
     539        String qualityLevel = "";
     540       
     541        if(csvRecord.isSet(IS_REALLY_IN_MRI_COLUMN)) {
     542            isReallyInMRI = csvRecord.get(IS_REALLY_IN_MRI_COLUMN);
     543        }
     544       
     545        if(csvRecord.isSet(QUALITY_LEVEL_COLUMN)) {
     546            qualityLevel = csvRecord.get(QUALITY_LEVEL_COLUMN);
     547
     548            // Force valid values or ""
     549            qualityLevel = getFullQualityLevelNameUppercased(qualityLevel);
     550        }       
     551       
     552        if(terminate || !qualityLevel.equals(fieldValue)
     553           || basicURL.equals("biblehub.com") || basicURL.equals("m.biblepub.com")) {
     554            // if(terminate) on Ctrl-D, don't stop processing csv records
     555            // Instead, copy remaining records of input csv file into output csv file
     556
     557            // Similarly, if the qualityLevel field does not have the value we're interested in
     558            // then just write it out as-is
     559            csvWriter.printRecord(url, countryCode, isReallyInMRI, qualityLevel);
     560            csvWriter.flush();
     561            logger.info("Got record " + recordCount + ": " + url + " - " + countryCode
     562                + " - " + isReallyInMRI + " - " + qualityLevel);           
     563        }
     564        else {
     565           
     566            // First, display full text for web page record with matching url
     567            // so the user can look at it to decide whether it is indeed overall in MRI or not.
     568            String fulltext = mongodbQueryer.displayFullTextOfPage(url);
     569            System.err.println(String.format("\nFULL-TEXT for record %d:\n%s\n", recordCount, fulltext));
     570           
     571            //logger.info("Got record " + recordCount + ": " + url + " - " + countryCode + " - " + qualityLevel);
     572           
     573            // Read Input until Ctrl-D: read System.In as bufferedReader
     574            // https://stackoverflow.com/questions/5837823/read-input-until-controld
     575            // Ctrl-C is already taken care if, see
     576            // https://coderanch.com/t/279136/java/terminated-program-Control-close-open
     577            // "Whenever a process is terminated/killed(CTRL-C), the file descriptors are released. You really do not need to close the stream in such cases."
     578            // So I just need to flush the csv print writer after every record is written
     579            // and Ctrl-C won't lose any of the data thus far entered by the user.
     580           
     581            BufferedReader systemIn = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));           
     582           
     583            boolean done = false;
     584           
     585            System.out.println(String.format(USER_PROMPT, recordCount, url, countryCode));
     586            //if(predefQualityLevel != null) {
     587            //System.err.println("\tDefault for this domain: " + predefQualityLevel
     588            //+ ". Press Enter to accept >");
     589            //}
     590           
     591            if(qualityLevel.equals(fieldValue)) {
     592            System.err.println("\t" + fieldValue + " entered last time. Press Enter to keep >");
     593            }
     594            while(!done && ((qualityLevel = systemIn.readLine()) != null)) {           
     595            //logger.debug("@@ Got: |" + qualityLevel + "|");
     596
     597            // If the user hit enter, it means they accepted the previous value entered
     598            if(qualityLevel.equals("")) { // User just hit enter without other chars               
     599                qualityLevel = fieldValue;             
     600            }
     601            else {
     602                // force valid values - will return "" if invalid value
     603                qualityLevel = getFullQualityLevelNameUppercased(qualityLevel);
     604            }
     605
     606            // only if qualityLevel entered was invalid, would it now
     607            // have been changed to ""
     608            if(!qualityLevel.equals("")) {             
     609                done = true;
     610            } else {
     611                System.out.println("@@ UNRECOGNISED. "
     612                   + String.format(USER_PROMPT, recordCount, url, countryCode));
     613            }
     614            }           
     615           
     616            // Save the CSV record - even if quality level is null
     617            // Because we don't want to lose the line that used to exist in the file
     618            csvWriter.printRecord(url, countryCode, isReallyInMRI, qualityLevel);
     619            csvWriter.flush();
     620           
     621            if(qualityLevel == null) { // if sys.in readLine() was terminated with Ctrl-D
     622            terminate = true;
     623            System.out.println("--- Got Ctrl-D (Lin)/Ctrl-Z (Win). Terminating. ---");
     624            } else {
     625            System.out.println("User entered: " + qualityLevel);
     626           
     627            }           
     628        }
     629        }
     630
     631        if(terminate = true) {
     632        System.out.println("User entered Ctrl-D (Lin)/Ctrl-Z (Win) - terminating.");
     633        }
     634       
     635    } catch(Exception e) {
     636        e.printStackTrace();
     637        logger.error("Exception occurred when processing CSV file or writing out file:\n"
     638             + Utility.getFilePath(tmpOutFile));
     639        logger.error(e.getMessage(), e);
     640    }
     641
     642    }
    494643   
    495644    public static void printUsage() {
     
    539688        final ManualURLInspection inspector = new ManualURLInspection(mongodb, inputFile);
    540689
    541 
    542690        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {
    543691        public void run() {
     
    553701
    554702        //String filename = inspector.processCSV();
    555         String filename = inspector.processCSV_QualityLevelColumn();
    556        
    557        
    558         logger.info("Generated temp CSV file: " + filename);
     703        //String filename = inspector.processCSV_QualityLevelColumn();
     704
     705       
     706        inspector.reviewQualityLevelFieldFor("SIGNIFICANTLY_MAORI");
     707       
     708        //logger.info("Generated temp CSV file: " + filename);
    559709        logger.info("*************************************");
    560710    } catch(Exception e) {
Note: See TracChangeset for help on using the changeset viewer.