Changeset 33948
- Timestamp:
- 2020-02-18T22:56:44+13:00 (4 years ago)
- Location:
- other-projects/maori-lang-detection
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/mongodb-data/random260_manualList_globalDomains_whereAPageContainsMRI.txt
r33947 r33948 3 3 https://paekupu.co.nz/word/awatea,NZ,Y,MIXED_TEXT 4 4 http://pukoro.co.nz/mi/vocab-with-kori/twigs/,NZ,Y,SIGNIFICANTLY_MAORI 5 http://tiritiowaitangi.govt.nz/maps/landconfiscations.php,NZ,Y, SIGNIFICANTLY_MAORI5 http://tiritiowaitangi.govt.nz/maps/landconfiscations.php,NZ,Y,MIXED_TEXT 6 6 http://www.tmoa.tki.org.nz/Karere-Hou/Hine-Takurua-raua-ko-Hine-Raumati-ma-te-pouako,NZ,Y,SIGNIFICANTLY_MAORI 7 7 https://tetaurawhiri.govt.nz/rauemi-a-ipurangi/rauemi/takaro-raumati/,NZ,Y,NAV … … 9 9 http://www.tmoa.tki.org.nz/Karere-Hou/He-hotaka-whakangungu,NZ,Y,NAV 10 10 https://tetaurawhiri.govt.nz/kaupapa/panui-papaho/kaua-e-whiua-whanaia-dont-pass-it-kick-it/,NZ,Y,SIGNIFICANTLY_MAORI 11 http://www.pakanae.maori.nz/ng257-wai-o-hokianga.html,NZ,Y, SIGNIFICANTLY_MAORI11 http://www.pakanae.maori.nz/ng257-wai-o-hokianga.html,NZ,Y,POEMS_OR_SONGS 12 12 http://www.tmoa.tki.org.nz/Karere-Hou/He-purongo-TMoA-me-NWRM,NZ,Y,SIGNIFICANTLY_MAORI 13 13 http://www.tkkmmokopuna.school.nz/1/easy_pages/3-mana-kura-tahi-tkkm-o-nga-mokopuna,NZ,Y,NAV … … 19 19 https://tiritiowaitangi.govt.nz/story/leaduptotreaty.php,NZ,Y,SIGNIFICANTLY_MAORI 20 20 http://www.waiata.maori.nz/index.php/browse/genre,NZ,Y,POEMS_OR_SONGS 21 https://csunplugged.org/mi/topics/,US,Y, SIGNIFICANTLY_MAORI22 http://www.teipukarea.maori.nz/research-mi-nz/nga-kaupapa-rangahau-o-mua/,NZ,Y, SIGNIFICANTLY_MAORI21 https://csunplugged.org/mi/topics/,US,Y,MAORI_PARAGRAPHS 22 http://www.teipukarea.maori.nz/research-mi-nz/nga-kaupapa-rangahau-o-mua/,NZ,Y,MAORI_PARAGRAPHS 23 23 https://www.tetaurawhiri.govt.nz/kaupapa/tatai-korero-2/1800/,NZ,Y,SIGNIFICANTLY_MAORI 24 24 http://tiritiowaitangi.govt.nz/story/textdifferences.php,NZ,Y,SIGNIFICANTLY_MAORI 25 25 http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Hangarau/1.4-Te-whakaputa-i-tetahi-hua-hangarau-no-roto-mai-i-te-matauranga-Maori-hei-whakaea-i-tetahi-tauaki,NZ,Y,NAV 26 26 http://www.waiata.maori.nz/en/song/kua-eke-raa-ngaa-tuumanako,NZ,Y,POEMS_OR_SONGS 27 http://animations.tewhanake.maori.nz/te-kohure/te-wahanga-tuarua/380,NZ,Y, SIGNIFICANTLY_MAORI27 http://animations.tewhanake.maori.nz/te-kohure/te-wahanga-tuarua/380,NZ,Y,MAORI_PARAGRAPHS 28 28 http://www.tmoa.tki.org.nz/Pakiwaitara-Marau/Tutarawananga-Kia-hangai-ki-nga-tamariki,NZ,Y,SIGNIFICANTLY_MAORI 29 29 http://firstworldwar.tki.org.nz/mi/nga-kaupapa/tuakiri-me-te-tuku-ihotanga/,NZ,Y,SIGNIFICANTLY_MAORI … … 36 36 http://pukoro.co.nz/mi/computer-games/game-3-skiing-game,NZ,Y,WORDS 37 37 https://www.tekura.school.nz/dual-providers-mi-nz/first-steps-mi-nz/online-learning-mi-nz/,NZ,Y,SIGNIFICANTLY_MAORI 38 http://www.tmoa.tki.org.nz/Mataiako/Wahanga-Aromatawai/Te-Reo-Maori/Korero/Takapiringa,NZ,Y, SIGNIFICANTLY_MAORI39 https://kupengahao.co.nz/items/he-tautoko-i-te-marautanga-pangarau-te-tauira-me-te-panga/,NZ,Y, SIGNIFICANTLY_MAORI38 http://www.tmoa.tki.org.nz/Mataiako/Wahanga-Aromatawai/Te-Reo-Maori/Korero/Takapiringa,NZ,Y,MAORI_PARAGRAPHS 39 https://kupengahao.co.nz/items/he-tautoko-i-te-marautanga-pangarau-te-tauira-me-te-panga/,NZ,Y,MAORI_PARAGRAPHS 40 40 https://paekupu.co.nz/word/aputa-rarangi,NZ,Y,MIXED_TEXT 41 http://www.tmoa.tki.org.nz/Mataiako/Te-Waharoa-Ararau/Te-Waharoa-Ararau-Modules,NZ,Y, SIGNIFICANTLY_MAORI41 http://www.tmoa.tki.org.nz/Mataiako/Te-Waharoa-Ararau/Te-Waharoa-Ararau-Modules,NZ,Y,OTHER_LANGUAGES 42 42 https://www.twttoa.com/future-events--contact-details.html,US,Y,MIXED_TEXT 43 https://biblehub.com/mao/1_kings/6.htm,US,Y, SIGNIFICANTLY_MAORI43 https://biblehub.com/mao/1_kings/6.htm,US,Y, 44 44 http://m.biblepub.com/bibles/mb/19/23,US,Y,SIGNIFICANTLY_MAORI 45 45 https://www.tetaurawhiri.govt.nz/ratonga/,NZ,Y,NAV 46 46 http://www.tkkmmokopuna.school.nz/1/image_galleries/3-2018-whakaahua,NZ,Y,NAV 47 47 http://www.tmoa.tki.org.nz/Pakiwaitara-Marau/Te-Kura-o-Takaro,NZ,Y,SIGNIFICANTLY_MAORI 48 http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Hauora,NZ,Y, SIGNIFICANTLY_MAORI48 http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Hauora,NZ,Y,MAORI_PARAGRAPHS 49 49 http://www.waiata.maori.nz/mri/song/te-haakii-o-wiremu-taamehana,NZ,Y,POEMS_OR_SONGS 50 50 http://kuraaiwi.maori.nz/m333-ng257-tumuaki.html,NZ,Y,LITTLE_TEXT … … 52 52 https://www.tetaurawhiri.govt.nz/rauemi-a-ipurangi/rauemi/nga-rerenga-korero-phrases/,NZ,Y,NAV 53 53 https://www.tetaurawhiri.govt.nz/kaupapa/panui-papaho/he-whakaari-pukuhohe-reo-maori-hou/,NZ,Y,SIGNIFICANTLY_MAORI 54 https://ngatiporoukiponeke.org.nz/hikurangi-maunga-kati-ra/,NZ,Y, SIGNIFICANTLY_MAORI54 https://ngatiporoukiponeke.org.nz/hikurangi-maunga-kati-ra/,NZ,Y,POEMS_OR_SONGS 55 55 http://www.tmoa.tki.org.nz/Mataiako/Te-Waharoa-Ararau/Nga-rauemi-mo-TWA,NZ,Y,MIXED_TEXT 56 56 http://www.tereowrap.nz/tmwo_upoko04.html,NZ,Y,SIGNIFICANTLY_MAORI … … 69 69 http://kuraproductions.co.nz/mi/shows/taniwha-rau,NZ,Y,SIGNIFICANTLY_MAORI 70 70 http://www.firstworldwar.tki.org.nz/mi/rauemi/he-tohu-pakanga-he-tohu-kapa/,NZ,Y,SIGNIFICANTLY_MAORI 71 https://ngatiporoukiponeke.org.nz/karangatia-ra/,NZ,Y, SIGNIFICANTLY_MAORI71 https://ngatiporoukiponeke.org.nz/karangatia-ra/,NZ,Y,POEMS_OR_SONGS 72 72 http://kurakokiri.maori.nz/students.html,NZ,Y,SIGNIFICANTLY_MAORI 73 http://tmoa.tki.org.nz/Mataiako/Wahanga-Aromatawai/Te-Reo-Maori/Korero/Kawea-te-Rongo-Ki-Mai-He-Whakamatau-Reo-Korero-JOST,NZ,Y, SIGNIFICANTLY_MAORI74 https://kupengahao.co.nz/items/tihei-pangarau-te-tau-me-te-taurangi-te-pukapuka-aratohu-ma-te-pouako-taumata-3/,NZ,Y, SIGNIFICANTLY_MAORI73 http://tmoa.tki.org.nz/Mataiako/Wahanga-Aromatawai/Te-Reo-Maori/Korero/Kawea-te-Rongo-Ki-Mai-He-Whakamatau-Reo-Korero-JOST,NZ,Y,MAORI_PARAGRAPHS 74 https://kupengahao.co.nz/items/tihei-pangarau-te-tau-me-te-taurangi-te-pukapuka-aratohu-ma-te-pouako-taumata-3/,NZ,Y,MAORI_PARAGRAPHS 75 75 http://pukoro.co.nz/mi/vocab-with-kori/bib/,NZ,Y,SIGNIFICANTLY_MAORI 76 http://www.tmoa.tki.org.nz/Mataiako/Wahanga-Aromatawai/Pangarau/Apitihanga-Uiui-Rautaki-GloSS,NZ,Y, SIGNIFICANTLY_MAORI76 http://www.tmoa.tki.org.nz/Mataiako/Wahanga-Aromatawai/Pangarau/Apitihanga-Uiui-Rautaki-GloSS,NZ,Y,MAORI_PARAGRAPHS 77 77 http://tmoa.tki.org.nz/Pakiwaitara-Marau/Te-Kura-Kaupapa-Maori-o-Ngati-Ruanui,NZ,Y,SIGNIFICANTLY_MAORI 78 78 https://tehiku.nz/archives/te-tai-tokerau-kapa-haka-regionals-2011/4463/kaitaia-college-seniors,NZ,Y,MIXED_TEXT … … 87 87 https://www.tetaurawhiri.govt.nz/rauemi-a-ipurangi/rauemi/christmas/,NZ,Y,NAV 88 88 http://tiritiowaitangi.govt.nz/maps/maoriland1939.php,NZ,Y,NAV 89 http://tmoa.tki.org.nz/Pakiwaitara-Marau/Tutarawananga-Nga-pumanawa-matua,NZ,Y, SIGNIFICANTLY_MAORI89 http://tmoa.tki.org.nz/Pakiwaitara-Marau/Tutarawananga-Nga-pumanawa-matua,NZ,Y,MAORI_PARAGRAPHS 90 90 https://mi.wikipedia.org/wiki/Katea,US,Y,MIXED_TEXT 91 91 http://www.livingheritage.org.nz/schools/secondary/te-ara-whanui/toku-kura/index.php,NZ,Y,SIGNIFICANTLY_MAORI 92 http://www.twtop.school.nz/66/blogs/1-te-ohinga-panui/posts/12-term-2-week-1,NZ,Y, SIGNIFICANTLY_MAORI92 http://www.twtop.school.nz/66/blogs/1-te-ohinga-panui/posts/12-term-2-week-1,NZ,Y,MAORI_PARAGRAPHS 93 93 http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Putaiao/AS-3.3-v1-Te-tatari-i-nga-taukumekume-mo-te-whakamahi-i-nga-rawa-o-Papatuanuku-Tauira,NZ,Y,NAV 94 94 https://www.tetaurawhiri.govt.nz/rauemi-a-ipurangi/rauemi/kia-ora-general-phrases/,NZ,Y,NAV … … 100 100 http://pukoro.co.nz/mi/learning-points/autumn/,NZ,Y,LITTLE_TEXT 101 101 http://tiritiowaitangi.govt.nz/quotes/1920.php,NZ,Y,SIGNIFICANTLY_MAORI 102 https://kupengahao.co.nz/portfolio-filter/putaiao/,NZ,Y, SIGNIFICANTLY_MAORI102 https://kupengahao.co.nz/portfolio-filter/putaiao/,NZ,Y,MAORI_PARAGRAPHS 103 103 http://m.biblepub.com/bibles/mb/15/3,US,Y,SIGNIFICANTLY_MAORI 104 104 http://www.twtop.school.nz/62/learning_cave_sets/22-tuwharetoatanga,NZ,Y,MIXED_TEXT … … 108 108 https://www.twttoa.com/rangatira-pita-sharples-5.html,US,Y,NAV 109 109 http://www.tmoa.tki.org.nz/Mataiako/Whiriwhiria-tetahi-rauemi-aromatawai,NZ,Y,LITTLE_TEXT 110 http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Nga-whainga-paetae-o-TMoA,NZ,Y, SIGNIFICANTLY_MAORI110 http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Nga-whainga-paetae-o-TMoA,NZ,Y,MAORI_PARAGRAPHS 111 111 http://pukoro.co.nz/mi/crafts/making-animation,NZ,Y,SIGNIFICANTLY_MAORI 112 112 http://pukoro.co.nz/mi/crafts/making-scones/,NZ,Y,SIGNIFICANTLY_MAORI … … 119 119 http://pukoro.co.nz/mi/kohanga-of-the-week/te-wharekura-o-tauranga-moana/,NZ,Y,NAV 120 120 http://tmoa.tki.org.nz/Karere-Hou/Wanangatia-te-Putanga-Tauira,NZ,Y,SIGNIFICANTLY_MAORI 121 https://kupengahao.co.nz/items/te-poutama-tau-pukapuka-tuarima-te-tapiritanga-te-tangohanga-me-te-uara-tu/,NZ,Y, SIGNIFICANTLY_MAORI121 https://kupengahao.co.nz/items/te-poutama-tau-pukapuka-tuarima-te-tapiritanga-te-tangohanga-me-te-uara-tu/,NZ,Y,MAORI_PARAGRAPHS 122 122 http://www.tmoa.tki.org.nz/Kura/Te-Reo-a-Waha/Nga-ataata/Te-Reo-a-Waha-Tau-7-8,NZ,Y,NAV 123 123 http://www.firstworldwar.tki.org.nz/mi/rauemi/mano-reo-mano-whakaaro/,NZ,Y,SIGNIFICANTLY_MAORI … … 126 126 http://m.biblepub.com/bibles/mb/1/42,US,Y,SIGNIFICANTLY_MAORI 127 127 http://www.tmoa.tki.org.nz/Nga-Whanaketanga-Rumaki-Maori/Nga-Whanaketanga-Rumaki-Maori-me-Ka-Hikitia-Accelerating-Success-2013-2017,NZ,Y,SIGNIFICANTLY_MAORI 128 http://tmoa.tki.org.nz/Karere-Hou/Matariki,NZ,Y, SIGNIFICANTLY_MAORI128 http://tmoa.tki.org.nz/Karere-Hou/Matariki,NZ,Y,MAORI_PARAGRAPHS 129 129 https://www.terito.school.nz/2017-3/,NZ,Y,NAV 130 130 https://mi.m.wikipedia.org/wiki/1788,US,Y,WORDS … … 156 156 http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/He-Karere-Hou,NZ,Y,SIGNIFICANTLY_MAORI 157 157 http://www.waiata.maori.nz/en/song/aku-kanohi-kau,NZ,Y,POEMS_OR_SONGS 158 http://tmoa.tki.org.nz/Karere-Hou/Te-Tautoko-76,NZ,Y, SIGNIFICANTLY_MAORI158 http://tmoa.tki.org.nz/Karere-Hou/Te-Tautoko-76,NZ,Y,MAORI_PARAGRAPHS 159 159 http://www.tmoa.tki.org.nz/Karere-Hou/Te-whakahounga-o-Te-Waharoa-Ararau,NZ,Y,SIGNIFICANTLY_MAORI 160 160 http://pukoro.co.nz/mi/stories/the-disheartened-possum/,NZ,Y,SIGNIFICANTLY_MAORI … … 187 187 https://tetaurawhiri.govt.nz/rauemi-a-ipurangi/rauemi/akina-te-reo-maramataka/,NZ,Y,NAV 188 188 http://tmoa.tki.org.nz/Te-Marautanga-o-Aotearoa/Te-Anga/Te-Putake-o-Te-Marautanga-o-Aotearoa,NZ,Y,SIGNIFICANTLY_MAORI 189 http://ngatipahauwera.co.nz/nga-konae/,NZ,Y, SIGNIFICANTLY_MAORI189 http://ngatipahauwera.co.nz/nga-konae/,NZ,Y,POEMS_OR_SONGS 190 190 https://mi.m.wikipedia.org/wiki/1748,US,Y,WORDS 191 191 http://tmoa.tki.org.nz/Mataiako/Ako/Ka-arotahi-te-aromatawai-ki-te-akonga,NZ,Y,SIGNIFICANTLY_MAORI 192 192 http://www.waiata.maori.nz/mri/song/aku-kanohi-kau,NZ,Y,POEMS_OR_SONGS 193 193 https://sexualviolence.victimsinfo.govt.nz/mi/glossary/showterm/kaitohutohu+p%c4%81rurenga+koeretanga/,NZ,Y,SIGNIFICANTLY_MAORI 194 http://tmoa.tki.org.nz/Kura/He-Reo-Moteatea-Tirohanga-whanui/Ka-Eke-ki-Wairaka-Maori/Ka-Eke-ki-Wairaka,NZ,Y, SIGNIFICANTLY_MAORI194 http://tmoa.tki.org.nz/Kura/He-Reo-Moteatea-Tirohanga-whanui/Ka-Eke-ki-Wairaka-Maori/Ka-Eke-ki-Wairaka,NZ,Y,POEMS_OR_SONGS 195 195 http://www.waiata.maori.nz/en/song/mahara,NZ,Y,POEMS_OR_SONGS 196 196 https://mi.m.wikipedia.org/wiki/1886,US,Y,WORDS … … 237 237 https://www.tetaurawhiri.govt.nz/kaupapa/panui-papaho/te-reo-maori-revitalisation-celebrated/,NZ,Y,SIGNIFICANTLY_MAORI 238 238 http://tmoa.tki.org.nz/Pakiwaitara-Marau/Te-Kura-o-Titahi-ki-te-Raki,NZ,Y,SIGNIFICANTLY_MAORI 239 http://kuraproductions.co.nz/mi/shows/only-in-aotearoa,NZ,Y, SIGNIFICANTLY_MAORI239 http://kuraproductions.co.nz/mi/shows/only-in-aotearoa,NZ,Y,MAORI_PARAGRAPHS 240 240 http://www.tmoa.tki.org.nz/Akonga,NZ,Y,MAORI_PARAGRAPHS 241 241 http://www.tmoa.tki.org.nz/Pakiwaitara-Marau/Tatari-Tautoko-Tauawhi-Nawton-School,NZ,Y,SIGNIFICANTLY_MAORI … … 247 247 http://tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/Te-Reo-Rangatira/TRR-1.11-Te-whakaatu-i-nga-putanga-rangahau,NZ,Y,NAV 248 248 https://tetaurawhiri.govt.nz/kaupapa/panui-papaho/nga-tohu-reo-maori/,NZ,Y,SIGNIFICANTLY_MAORI 249 http://pukoro.co.nz/mi/songs-and-lyrics/at-school/,NZ,Y, SIGNIFICANTLY_MAORI249 http://pukoro.co.nz/mi/songs-and-lyrics/at-school/,NZ,Y,POEMS_OR_SONGS 250 250 http://pukoro.co.nz/mi/learning-points/test-learning-points/,NZ,Y,SINGLE_MRI_SENTENCE 251 http://www.kura-porirua.school.nz/194/profile_areas/5-te-rarangi-kaimahi-staff-directory,NZ,Y, SIGNIFICANTLY_MAORI251 http://www.kura-porirua.school.nz/194/profile_areas/5-te-rarangi-kaimahi-staff-directory,NZ,Y,NAV 252 252 http://www.tmoa.tki.org.nz/Pakiwaitara-Marau,NZ,Y,SIGNIFICANTLY_MAORI 253 253 http://www.tmoa.tki.org.nz/Taumata-Matauranga-a-Motu-Ka-Taea/He-Maherehere-Paerewa-Paetae2,NZ,Y,NAV -
other-projects/maori-lang-detection/src/org/greenstone/atea/ManualURLInspection.java
r33946 r33948 492 492 return ""; 493 493 } 494 495 496 public void reviewQualityLevelFieldFor(/*String basicDomain,*/ String fieldValue) { 497 final String USER_PROMPT = "Enter qualityLevel value of\n\t? | (N)AV | (L)ITTLE_TEXT | (M)IXED_TEXT | (S)IGNIFICANTLY_MAORI | MAORI_(P)ARAGRAPHS" 498 + "\n\t | PO(E)MS_OR_SONGS | S(I)NGLE_MRI_SENTENCE | (W)ORDS | (O)THER_LANGUAGES\n\tfor (%d): %s - %s > "; 499 //"Enter isMRI value of Y|N|? for (" + count + "): " + url + " - " + countryCode + " > "; 500 501 boolean terminate = false; 502 CSVParser parser = null; 503 504 try { 505 parser = CSVParser.parse(webPageURLsCSVFile, java.nio.charset.Charset.forName("US-ASCII"), CSVFormat.RFC4180); 506 } catch(Exception e) { 507 logger.error("Failed to parse input CSV file " + Utility.getFilePath(webPageURLsCSVFile), e); 508 return; 509 } 510 511 try ( 512 CSVPrinter csvWriter = new CSVPrinter(new FileWriter(tmpOutFile), CSVFormat.DEFAULT.withQuoteMode(QuoteMode.MINIMAL)); 513 ) { 514 515 int recordCount = 0; 516 for (CSVRecord csvRecord : parser) { 517 518 String url = csvRecord.get(URL_COLUMN); 519 if(url.equals("")) { // skip empty lines 520 continue; 521 } 522 523 recordCount++; 524 525 526 String basicURL = Utility.stripProtocolAndWWWFromURL(Utility.getDomainForURL(url, false)); 527 /* 528 if(!basicURL.equals(basicDomain)) { 529 continue; // skip URLs we're not interested in 530 } 531 */ 532 533 // Work out default if basic URLs present in defaults map 534 // If it is, use its value as default for this URL 535 //String predefQualityLevel = predefinedDefaultsMap.get(basicURL); 536 537 String countryCode = csvRecord.get(COUNTRY_CODE_COLUMN); 538 String isReallyInMRI = ""; 539 String qualityLevel = ""; 540 541 if(csvRecord.isSet(IS_REALLY_IN_MRI_COLUMN)) { 542 isReallyInMRI = csvRecord.get(IS_REALLY_IN_MRI_COLUMN); 543 } 544 545 if(csvRecord.isSet(QUALITY_LEVEL_COLUMN)) { 546 qualityLevel = csvRecord.get(QUALITY_LEVEL_COLUMN); 547 548 // Force valid values or "" 549 qualityLevel = getFullQualityLevelNameUppercased(qualityLevel); 550 } 551 552 if(terminate || !qualityLevel.equals(fieldValue) 553 || basicURL.equals("biblehub.com") || basicURL.equals("m.biblepub.com")) { 554 // if(terminate) on Ctrl-D, don't stop processing csv records 555 // Instead, copy remaining records of input csv file into output csv file 556 557 // Similarly, if the qualityLevel field does not have the value we're interested in 558 // then just write it out as-is 559 csvWriter.printRecord(url, countryCode, isReallyInMRI, qualityLevel); 560 csvWriter.flush(); 561 logger.info("Got record " + recordCount + ": " + url + " - " + countryCode 562 + " - " + isReallyInMRI + " - " + qualityLevel); 563 } 564 else { 565 566 // First, display full text for web page record with matching url 567 // so the user can look at it to decide whether it is indeed overall in MRI or not. 568 String fulltext = mongodbQueryer.displayFullTextOfPage(url); 569 System.err.println(String.format("\nFULL-TEXT for record %d:\n%s\n", recordCount, fulltext)); 570 571 //logger.info("Got record " + recordCount + ": " + url + " - " + countryCode + " - " + qualityLevel); 572 573 // Read Input until Ctrl-D: read System.In as bufferedReader 574 // https://stackoverflow.com/questions/5837823/read-input-until-controld 575 // Ctrl-C is already taken care if, see 576 // https://coderanch.com/t/279136/java/terminated-program-Control-close-open 577 // "Whenever a process is terminated/killed(CTRL-C), the file descriptors are released. You really do not need to close the stream in such cases." 578 // So I just need to flush the csv print writer after every record is written 579 // and Ctrl-C won't lose any of the data thus far entered by the user. 580 581 BufferedReader systemIn = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); 582 583 boolean done = false; 584 585 System.out.println(String.format(USER_PROMPT, recordCount, url, countryCode)); 586 //if(predefQualityLevel != null) { 587 //System.err.println("\tDefault for this domain: " + predefQualityLevel 588 //+ ". Press Enter to accept >"); 589 //} 590 591 if(qualityLevel.equals(fieldValue)) { 592 System.err.println("\t" + fieldValue + " entered last time. Press Enter to keep >"); 593 } 594 while(!done && ((qualityLevel = systemIn.readLine()) != null)) { 595 //logger.debug("@@ Got: |" + qualityLevel + "|"); 596 597 // If the user hit enter, it means they accepted the previous value entered 598 if(qualityLevel.equals("")) { // User just hit enter without other chars 599 qualityLevel = fieldValue; 600 } 601 else { 602 // force valid values - will return "" if invalid value 603 qualityLevel = getFullQualityLevelNameUppercased(qualityLevel); 604 } 605 606 // only if qualityLevel entered was invalid, would it now 607 // have been changed to "" 608 if(!qualityLevel.equals("")) { 609 done = true; 610 } else { 611 System.out.println("@@ UNRECOGNISED. " 612 + String.format(USER_PROMPT, recordCount, url, countryCode)); 613 } 614 } 615 616 // Save the CSV record - even if quality level is null 617 // Because we don't want to lose the line that used to exist in the file 618 csvWriter.printRecord(url, countryCode, isReallyInMRI, qualityLevel); 619 csvWriter.flush(); 620 621 if(qualityLevel == null) { // if sys.in readLine() was terminated with Ctrl-D 622 terminate = true; 623 System.out.println("--- Got Ctrl-D (Lin)/Ctrl-Z (Win). Terminating. ---"); 624 } else { 625 System.out.println("User entered: " + qualityLevel); 626 627 } 628 } 629 } 630 631 if(terminate = true) { 632 System.out.println("User entered Ctrl-D (Lin)/Ctrl-Z (Win) - terminating."); 633 } 634 635 } catch(Exception e) { 636 e.printStackTrace(); 637 logger.error("Exception occurred when processing CSV file or writing out file:\n" 638 + Utility.getFilePath(tmpOutFile)); 639 logger.error(e.getMessage(), e); 640 } 641 642 } 494 643 495 644 public static void printUsage() { … … 539 688 final ManualURLInspection inspector = new ManualURLInspection(mongodb, inputFile); 540 689 541 542 690 Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() { 543 691 public void run() { … … 553 701 554 702 //String filename = inspector.processCSV(); 555 String filename = inspector.processCSV_QualityLevelColumn(); 556 557 558 logger.info("Generated temp CSV file: " + filename); 703 //String filename = inspector.processCSV_QualityLevelColumn(); 704 705 706 inspector.reviewQualityLevelFieldFor("SIGNIFICANTLY_MAORI"); 707 708 //logger.info("Generated temp CSV file: " + filename); 559 709 logger.info("*************************************"); 560 710 } catch(Exception e) {
Note:
See TracChangeset
for help on using the changeset viewer.