source: gs3-extensions/maori-lang-detection/src/org/greenstone/atea/MaoriTextDetector.java@ 33585

Last change on this file since 33585 was 33585, checked in by ak19, 5 years ago

Much simpler way of using sentence and language detection model to work on a single sentence at a time. Not sure if it is truly best way, but at least as good or better than my older attempts. Committing with debugging.

File size: 26.0 KB
Line 
1/**
2 * Class that uses OpenNLP with the Language Detection Model to determine, with a default
3 * or configurable level of confidence, whether text (from a file or stdin) is in Māori or not.
4 * Internal functions can be used for detecting any of the 103 languages currently supported by
5 * the OpenNLP Language Detection Model.
6 *
7 * http://opennlp.apache.org/news/model-langdetect-183.html
8 * language detector model: http://opennlp.apache.org/models.html
9 * Pre-trained models for OpenNLP 1.5: http://opennlp.sourceforge.net/models-1.5/
10 * Use of Apache OpenNLP in general:
11 * http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#intro.api
12 * Use of OpenNLP for language detection:
13 * http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#tools.langdetect
14 *
15 * This code was based on the information and sample code at the above links and the links dispersed throughout this file.
16 * See also the accompanying README file.
17 *
18 * July 2019
19 */
20
21package org.greenstone.atea;
22
23import java.io.*;
24import opennlp.tools.langdetect.*;
25import opennlp.tools.sentdetect.*;
26import opennlp.tools.util.*;
27
28import java.util.ArrayList;
29
30/**
31 * EXPORT OPENNLP_HOME environment variable to be your apache OpenNLP installation.
32 * Create a folder called "models" within the $OPENNLP_HOME folder, and put the file "langdetect-183.bin" in there
33 * (which is the language detection model zipped up and renamed to .bin extension).
34 *
35 * Then, to compile this program, do the following from the "src" folder (the folder containing this java file):
36 * maori-lang-detection/src$ javac -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" org/greenstone/atea/MaoriTextDetector.java
37 *
38 * To run this program, issue one of the following commands from the "src" folder (the folder containing this java file):
39 *
40 * maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" org.greenstone.atea.MaoriTextDetector --help
41 *
42 * maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" org.greenstone.atea.MaoriTextDetector --file <full/path/to/textfile>
43 *
44 * maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" org.greenstone.atea.MaoriTextDetector -
45 * Press enter. This variant of the program expects text to stream in from standard input.
46 * If entering text manually, then remember to press Ctrl-D to indicate the usual end of StdIn.
47 *
48 * https://stackoverflow.com/questions/219585/including-all-the-jars-in-a-directory-within-the-java-classpath
49 * Also has information on how to run this class if it's in a Java package.
50 */
51public class MaoriTextDetector {
52 /** The 3 letter language code for Maori in ISO 639-2 or ISO 639-3 */
53 public static final String MAORI_3LETTER_CODE = "mri";
54 public static final double DEFAULT_MINIMUM_CONFIDENCE = 0.50;
55
56 /** Configurable: cut off minimum confidence value,
57 greater or equal to which determines that the best predicted language is acceptable to user of MaoriTextDetector. */
58 public final double MINIMUM_CONFIDENCE;
59
60 /** silentMode set to false means MaoriTextDetector won't print helpful messages while running. Set to true to run silently. */
61 public final boolean silentMode;
62
63 private final String OPENNLP_MODELS_RELATIVE_PATH = "models" + File.separator;
64
65 /** Language Detection Model file for OpenNLP is expected to be at $OPENNLP_HOME/models/langdetect-183.bin */
66 private final String LANG_DETECT_MODEL_RELATIVE_PATH = OPENNLP_MODELS_RELATIVE_PATH + "langdetect-183.bin";
67
68 /**
69 * The LanguageDetectorModel object that will do the actual language detection/prediction for us.
70 * Created once in the constructor, can be used as often as needed thereafter.
71 */
72 private LanguageDetector myCategorizer = null;
73
74 /**
75 * The Sentence Detection object that does the sentence splitting for the language
76 * the sentece model was trained for.
77 */
78 private SentenceDetectorME sentenceDetector = null;
79
80 /** String taken from our university website, https://www.waikato.ac.nz/maori/ */
81 public static final String TEST_MRI_INPUT_TEXT = "Ko tēnei te Whare Wānanga o Waikato e whakatau nei i ngā iwi o te ao, ki roto i te riu o te awa e rere nei, ki runga i te whenua e hora nei, ki raro i te taumaru o ngā maunga whakaruru e tau awhi nei.";
82
83 /** test input string for a negative result */
84 public static final String TEST_ENG_INPUT_TEXT = "The main program exits with -1 if an Exception occurred when attempting to detect the text's language";
85
86
87 public MaoriTextDetector(boolean silentMode) throws Exception {
88 this(silentMode, DEFAULT_MINIMUM_CONFIDENCE);
89 }
90
91 /** Constructor that uses the sentence Model we trained for Māori */
92 public MaoriTextDetector(boolean silentMode, double min_confidence) throws Exception {
93 this(silentMode, min_confidence, "mri-sent_trained.bin");
94 }
95
96 /** More general constructor that can use sentence detector models for other languages */
97 public MaoriTextDetector(boolean silentMode, double min_confidence,
98 String sentenceModelFileName) throws Exception
99 {
100 this.silentMode = silentMode;
101 this.MINIMUM_CONFIDENCE = min_confidence;
102
103 // 1. Check we can find the Language Detect Model file in the correct location (check that $OPENNLP_HOME/models/langdetect-183.bin exists);
104 String langDetectModelPath = System.getenv("OPENNLP_HOME");
105 if(System.getenv("OPENNLP_HOME") == null) {
106 throw new Exception("\n\t*** Environment variable OPENNLP_HOME must be set to your Apache OpenNLP installation folder.");
107 }
108 langDetectModelPath = langDetectModelPath + File.separator + LANG_DETECT_MODEL_RELATIVE_PATH;
109 File langDetectModelBinFile = new File(langDetectModelPath);
110 if(!langDetectModelBinFile.exists()) {
111 throw new Exception("\n\t*** " + langDetectModelBinFile.getPath() + " doesn't exist."
112 + "\n\t*** Ensure the $OPENNLP_HOME folder contains a 'models' folder"
113 + "\n\t*** with the model file 'langdetect-183.bin' in it.");
114 }
115
116
117 // 2. Set up our language detector Model and the Categorizer for language predictions based on the Model.
118 // http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#intro.api
119 // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
120 try (InputStream modelIn = new FileInputStream(langDetectModelPath)) {
121
122 LanguageDetectorModel model = new LanguageDetectorModel(modelIn);
123
124 // http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#tools.langdetect
125 this.myCategorizer = new LanguageDetectorME(model);
126 }/*catch(Exception e) {
127 e.printStackTrace();
128 }*/
129
130 // instantiating function should handle critical exceptions. Constructors shouldn't.
131
132
133
134 // 3. Set up our sentence model and SentenceDetector object
135 String sentenceModelPath = System.getenv("OPENNLP_HOME") + File.separator
136 + OPENNLP_MODELS_RELATIVE_PATH + sentenceModelFileName; // "mri-sent_trained.bin" default
137 File sentenceModelBinFile = new File(sentenceModelPath);
138 if(!sentenceModelBinFile.exists()) {
139 throw new Exception("\n\t*** " + sentenceModelBinFile.getPath() + " doesn't exist."
140 + "\n\t*** Ensure the $OPENNLP_HOME folder contains a 'models' folder"
141 + "\n\t*** with the model file "+sentenceModelFileName+" in it.");
142 }
143 try (InputStream modelIn = new FileInputStream(sentenceModelPath)) {
144 // https://www.tutorialspoint.com/opennlp/opennlp_sentence_detection.htm
145 SentenceModel sentenceModel = new SentenceModel(modelIn);
146 this.sentenceDetector = new SentenceDetectorME(sentenceModel);
147
148 } // instantiating function should handle this critical exception
149 }
150
151 /**
152 * In this class' constructor, need to have set up the Sentence Detection Model
153 * for the langCode passed in to this function in order for the output to make
154 * sense for that language.
155 */
156 public ArrayList<String> getAllSentencesInLanguage(String langCode, String text, double confidenceCutoff)
157 {
158
159 // we'll be storing just those sentences in text that are in the denoted language code
160 ArrayList<String> mriSentences = new ArrayList<String>();
161 // OpenNLP language detection works best with a minimum of 2 sentences
162 // See https://opennlp.apache.org/news/model-langdetect-183.html
163 // "It is important to note that this model is trained for and works well with
164 // longer texts that have at least 2 sentences or more from the same language."
165
166 // For evaluating single languages, I used a very small data set and found that
167 // if the primary language detected is MRI AND if the confidence is >= 0.1, the
168 // results appear reasonably to be in te reo Māori.
169
170 String[] sentences = sentenceDetector.sentDetect(text);
171
172 for(int i = 0; i < sentences.length; i++) {
173 String sentence = sentences[i];
174
175 //System.err.println(sentence);
176
177 Language bestLanguage = myCategorizer.predictLanguage(sentence);
178 double confidence = bestLanguage.getConfidence();
179
180 if(bestLanguage.getLang().equals(langCode) && confidence >= confidenceCutoff) {
181 System.err.println("Adding sentence: " + sentence + "\n");
182 mriSentences.add(sentence);
183 } else {
184 System.err.println("SKIPPING sentence: " + sentence + "\n");
185 }
186 }
187 return mriSentences;
188 }
189
190
191 public ArrayList<String> getAllSentencesInMaori(String text) throws Exception {
192 // big assumption here: that we can split incoming text into sentences
193 // for any language (using the Māori language trained sentence model),
194 // despite not knowing what language those sentences are in
195 // Hinges on MRI sentences detection being similar to at least ENG equivalent
196
197
198 // we'll be storing just those sentences in text that are in Māori.
199
200 // OpenNLP language detection works best with a minimum of 2 sentences
201 // See https://opennlp.apache.org/news/model-langdetect-183.html
202 // "It is important to note that this model is trained for and works well with
203 // longer texts that have at least 2 sentences or more from the same language."
204
205 // For evaluating single languages, I used a very small data set and found that
206 // if the primary language detected is MRI AND if the confidence is >= 0.1, the
207 // results appear reasonably to be in te reo Māori.
208
209 final double confidenceCutoff = 0.1;
210 return getAllSentencesInLanguage(MAORI_3LETTER_CODE, text, confidenceCutoff);
211 }
212
213
214 /**
215 * @return true if the input text is Maori (mri) with MINIMUM_CONFIDENCE levels of confidence (if set,
216 * else DEFAULT_MINIMUM_CONFIDENCE levels of confidence).
217 */
218 public boolean isTextInMaori(String text) {
219 return isTextInLanguage(MAORI_3LETTER_CODE, text);
220 }
221
222 /** @param langCode is 3 letter language code, ISO 639-2/3
223 * https://www.loc.gov/standards/iso639-2/php/code_list.php
224 * https://en.wikipedia.org/wiki/ISO_639-3
225 * @return true if the input text is Maori (mri) with MINIMUM_CONFIDENCE levels of confidence (if set,
226 * else DEFAULT_MINIMUM_CONFIDENCE levels of confidence).
227 */
228 public boolean isTextInLanguage(String langCode, String text) {
229 // Get the most probable language
230 Language bestLanguage = myCategorizer.predictLanguage(text);
231 doPrint("Best language: " + bestLanguage.getLang());
232 doPrint("Best language confidence: " + bestLanguage.getConfidence());
233
234 return (bestLanguage.getLang().equals(langCode) && bestLanguage.getConfidence() >= this.MINIMUM_CONFIDENCE);
235 }
236
237
238 /**
239 * Handle "smaller" textfiles/streams of text read in.
240 * Return value is the same as for isTextInMaori(String text);
241 */
242 public boolean isTextInMaori(BufferedReader reader) throws Exception {
243 return isTextInLanguage(MAORI_3LETTER_CODE, reader);
244 }
245 /**
246 * Handle "smaller" textfiles/streams of text read in.
247 * Return value is the same as for isTextInLanguage(String langCode, String text);
248 */
249 public boolean isTextInLanguage(String langCode, BufferedReader reader) throws Exception {
250 // https://stackoverflow.com/questions/326390/how-do-i-create-a-java-string-from-the-contents-of-a-file
251
252 StringBuilder text = new StringBuilder();
253 String line = null;
254
255
256 while((line = reader.readLine()) != null) { // readLine removes newline separator
257 text.append(line + "\n"); // add back (unix style) line ending
258 }
259 return isTextInLanguage(langCode, text.toString());
260 }
261
262 /*
263 * Need better handling of "larger" textfiles/streams of text read in:
264 * what if multiple languages with high confidence every NUM_LINES read in?
265 * Does this mean the file is multi-lingual with each section dominated by a different language?
266 * How best to convey such information to the user?
267 */
268 /**
269 * Rudimentary attempt to deal with very large files.
270 * Return value is the same as for isTextInMaori(String text);
271 */
272 public boolean isLargeTextInMaori(BufferedReader reader) throws Exception {
273 return isLargeTextInLanguage(MAORI_3LETTER_CODE, reader);
274 }
275
276 /**
277 * Rudimentary attempt to deal with very large files.
278 * Return value is the same as for isTextInLanguage(String langCode, String text);
279 */
280 public boolean isLargeTextInLanguage(String langCode, BufferedReader reader) throws Exception {
281 // https://stackoverflow.com/questions/326390/how-do-i-create-a-java-string-from-the-contents-of-a-file
282
283 final int NUM_LINES = 100; // arbitrary 100 lines read, predict language, calculate confidence
284
285 StringBuilder text = new StringBuilder();
286 String line = null;
287
288 double cumulativeConfidence = 0;
289 int numLoops = 0;
290
291 int i = 0;
292 String language = null;
293
294 while((line = reader.readLine()) != null) { // readLine removes newline separator
295 text.append(line + "\n"); // add back (unix style) line ending
296
297 i++; // read nth line of numLoop
298
299
300 if(i == NUM_LINES) { // arbitrary 100 lines read, predict language, calculate confidence
301
302
303 Language bestLanguage = myCategorizer.predictLanguage(text.toString());
304 if(language != null && !bestLanguage.getLang().equals(language)) { // predicted lang of current n lines not the same as predicted lang for prev n lines
305 doPrintErr("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");
306 }
307 language = bestLanguage.getLang();
308 cumulativeConfidence += bestLanguage.getConfidence();
309
310 doPrintErr("Best predicted language for last " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")");
311
312 // finished analysing language of NUM_LINES of text
313 text = new StringBuilder();
314 i = 0;
315 numLoops++;
316 }
317 }
318
319 // process any (remaining) text that was less than n NUM_LINES
320 if(!text.toString().equals("")) {
321 text.append(line + "\n"); // add back (unix style) line ending
322 i++;
323
324 Language bestLanguage = myCategorizer.predictLanguage(text.toString());
325
326 if(language != null && !bestLanguage.getLang().equals(language)) { // predicted lang of current n lines not the same as predicted lang for prev n lines
327 doPrintErr("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");
328 }
329 language = bestLanguage.getLang();
330 cumulativeConfidence += bestLanguage.getConfidence();
331 doPrintErr("Best predicted language for final " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")");
332 }
333
334
335 int totalLinesRead = numLoops * NUM_LINES + i; // not used
336 double avgConfidence = cumulativeConfidence/(numLoops + 1); // not quite the average as the text processed outside the loop may have fewer lines than NUM_LINES
337
338
339 return (language.equals(langCode) && avgConfidence >= this.MINIMUM_CONFIDENCE);
340 }
341
342
343 /**
344 * Prints to STDOUT the predicted languages of the input text in order of descending confidence.
345 * UNUSED.
346 */
347 public void predictedLanguages(String text) {
348 // Get an array with the most probable languages
349
350 Language[] languages = myCategorizer.predictLanguages(text);
351
352 if(languages == null || languages.length <= 0) {
353 doPrintErr("No languages predicted for the input text");
354 } else {
355 for(int i = 0; i < languages.length; i++) {
356 doPrint("Language prediction " + i + ": " + languages[i]);
357 }
358 }
359
360 }
361
362 public void doPrint(String msg) {
363 doPrint(this.silentMode, msg);
364 }
365 public void doPrintErr(String msg) {
366 doPrintErr(this.silentMode, msg);
367 }
368
369 /********** STATIC METHODS *************/
370
371 public static void doPrint(boolean runSilent, String msg) {
372 if(!runSilent) System.out.println(msg);
373 }
374 public static void doPrintErr(boolean runSilent, String msg) {
375 if(!runSilent) System.err.println(msg);
376 }
377
378 public static void printUsage() {
379 System.err.println("Run this program with:");
380 System.err.println("\t--help (-h)\tfor seeing this usage message again");
381 System.err.println("\t-\tto have input text read from STDIN (as always, hit Ctrl-D to mark end of text stream)");
382 System.err.println("\t--file (-f)\tto provide an input file path");
383 System.err.println("\t--silent (-s): optional, to run silently and just exit with exit value.");
384 System.err.println("\t--min-confidence (-c): optional, to override the default minimum confidence value (" + DEFAULT_MINIMUM_CONFIDENCE + ")");
385 System.err.println("\t\tof the predicted language that will be considered acceptable.");
386 System.err.println();
387 System.err.println("This program terminates with exit value:");
388 System.err.println("\t0 if the input text is in Maori");
389 System.err.println("\t1 if input text is not in Maori");
390 System.err.println();
391 System.err.println("\t-1 if the input arguments were wrong");
392 System.err.println("\t255(!) if an Exception occurred in instantiating the MaoriTextDetector when attempting to detect the text's language");
393 System.err.println("\t2 if the user asked to run this program with --help/-h.");
394 System.err.println();
395 }
396
397 /**
398 * The main program exits with:
399 * 0 if text is in Maori;
400 * 1 if text is not in Maori;
401 *
402 * -1 if the input arguments were wrong
403 * 255(!) if an Exception occurred in instantiating the MaoriTextDetector when attempting to detect the text's language
404 * QTODO: why does the program exit value end up as 255 and not -1 when returnVal remains at -1 on Exception?
405 * 2 if the user asked to run this program with --help/-h.
406 */
407 public static void main(String args[]) {
408 int returnVal = -1;
409
410 // 1. Check input arguments
411 boolean printUsage = false;
412 boolean readFromStdIn = false;
413 File inFile = null;
414 boolean runSilent = false;
415 double minConfidence = -1;
416
417 for (int i = 0; !printUsage && i < args.length; i++) {
418
419 // check for help first and quit after printing usage
420 if(args[i].equals("--help") || args[i].equals("-h")) {
421 printUsage = true;
422 returnVal = 2;
423 } else if(args[i].equals("--silent") || args[i].equals("-s")) {
424 runSilent = true;
425 } else if(args[i].equals("--min-confidence") || args[i].equals("-c")) {
426 i++;
427 if(i >= args.length) {
428 doPrintErr(runSilent, "ERROR: No minimum confidence value provided with --min-confidence|-c flag.\n");
429 printUsage = true;
430 returnVal = -1;
431 } else {
432 try {
433 minConfidence = Double.parseDouble(args[i]);
434 if(minConfidence < 0 || minConfidence > 1) {
435 throw new NumberFormatException("Number out of range, must be between 0-1");
436 }
437 } catch(NumberFormatException nfe) {
438 doPrintErr(runSilent, "ERROR: value for min-confidence is the wrong format or out of range. It must be a (decimal point) number between 0-1.\n");
439 printUsage = true;
440 returnVal = -1;
441 }
442 }
443 } else if(args[i].equals("-")) {
444 readFromStdIn = true;
445 //break; // don't bother continuing to check input arguments for any --file flag if we're told to read from stdin
446 } else if(args[i].equals("--file") || args[i].equals("-f")) {
447 i++;
448 if(i >= args.length) {
449 doPrintErr(runSilent, "ERROR: No input file provided with --file|-f flag.\n");
450 printUsage = true;
451 returnVal = -1;
452 } else {
453 String filePath = args[i];
454 inFile = new File(filePath);
455 if(!inFile.isFile()) {
456 doPrintErr(runSilent, "ERROR: Can't read text. Input file argument provided does not exist or is not a file.\n");
457 printUsage = true;
458 returnVal = -1;
459 }
460 }
461 } else { // unrecognised input argument
462 doPrintErr(runSilent, "ERROR: Unrecognised " + i + "th argument to this program.\n");
463 printUsage = true;
464 returnVal = -1;
465 }
466 }
467
468 if(returnVal != 2) { // returnVal == 2 for help. Only if the user did not request --help/-h, do we continue to make sure the arguments provided are sane
469 if(!readFromStdIn && inFile == null) { // at least one input source must be provided
470 doPrintErr(runSilent, "ERROR: must specify source to read text from, either STDIN (-) or input file (--file <file>).\n");
471 printUsage = true;
472 returnVal = -1;
473 }
474
475 if(readFromStdIn && inFile != null) { // this program can't be asked to read from stdin and from an input file
476 doPrintErr(runSilent, "ERROR: instructed to read from both STDIN and from an input file. Not possible.\n");
477 printUsage = true;
478 returnVal = -1;
479 }
480 }
481
482 if(printUsage) {
483 // If not running silent print usage.
484 // OR if expressly asked for help, then it doesn't matter if we're running silent: still print usage to stderr.
485 if(returnVal == 2 || !runSilent) {
486 printUsage();
487 }
488 System.exit(returnVal);
489 }
490
491
492 // 2. Finally, we can now do the actual language detection
493 try {
494 MaoriTextDetector maoriTextDetector = null;
495 if(minConfidence == -1) {
496 maoriTextDetector = new MaoriTextDetector(runSilent);
497 } else {
498 maoriTextDetector = new MaoriTextDetector(runSilent, minConfidence);
499 }
500
501 // TODO
502 maoriTextDetector.getAllSentencesInMaori(
503 "Primary sources ~ Published Maramataka Mo Te Tau 1885, Nepia: Te Haaringi, Kai-ta Pukapuka, kei Hehitingi Tiriti, 1884. Maramataka Mo Te Tau 1886, Nepia: Na te Haaringi i ta ki tona Whare Perehi Pukapuka, 1885. Maramataka Mo Te Tau 1887, Nepia: Na te Haaringi i ta ki tona Whare Perehi Pukapuka, 1886. Maramataka Mo Te Tau 1888, Nepia: Na te Haaringi i ta ki tona Whare Perehi Pukapuka, 1887. Maramataka Mo Te Tau 1889, Nepia: Na te Haaringi i ta ki tona Whare Perehi Pukapuka, 1888. Maramataka Mo Te Tau 1890, Nepia: Na te Haaringi i ta ki tona Whare Perehi Pukapuka, 1889. Maramataka Mo Te Tau 1891, Kihipane: Na te Muri i ta ki tona Whare Perehi Pukapuka, 1890. Maramataka Mo Te Tau 1892, Nepia: Na te Haaringi, i ta ki tona Whare Perehi Pukapuka, 1891. Maramataka Mo Te Tau 1893, Kihipane: Na te Muri i ta ki tona Whare Perehi Pukapuka, 1892. Maramataka Mo Te Tau 1894, Kihipane: Na te Muri i ta ki tona Whare Perehi Pukapuka, 1893. Maramataka Me Te Tau 1895, Kihipane: Na te Muri i Ta ki tona whare perehi pukapuka, 1894. Maramataka Mo Te Tau 1896, Kihipane: Na te Muri i ta ki tona Whare Perehi Pukapuka, 1895. Maramataka Mo Te Tau 1897, Kihipane: Na te Muri i ta ki tona Whare Perehi Pukapuka 1896. Maramataka Mo Te Tau 1898, Turanga: Na te Wiremu Hapata i ta ki Te Rau Kahikatea, 1897. Ko Te Paipera Tapu Ara, Ko Te Kawenata Tawhito Me Te Kawenata Hou, He Mea Whakamaori Mai No Nga Reo I Oroko-Tuhituhia Ai, Ranana: He mea ta ki te perehi a W.M.Watts ma te Komiti Ta Paipera mo Ingarangi mo Te Ao Katoa, 1868. Ko Te Pukapuka O Nga Inoinga, Me Era Atu Tikanga, I Whakaritea E Te Hahi O Ingarani, Mo Te Minitatanga O Nga Hakarameta, O Era Atu Ritenga a Te Hahi: Me Nga Waiata Ano Hoki a Rawiri, Me Te Tikanga Mo Te Whiriwhiringa, Mo Te Whakaturanga, Me Te Whakatapunga O Nga Pihopa, O Nga Piriti, Me Nga Rikona, Me Nga Himene, Ranana: I taia tenei ki te perehi o te Komiti mo te whakapuaki i to mohiotanga ki a te Karaiti, 1858. Ko Te Pukapuka O Nga Inoinga, Me Era Atu Tikanga, I Whakaritea E Te Hahi O Ingarani, Mo Te Minitatanga O Nga Hakarameta, O Era Atu Ritenga a Te Hahi: Me Nga Waiata Ano Hoki a Rawiri, Me Te Tikanga Mo Te Whiriwhiringa, Mo Te Whakaturanga, Me Te Whakatapunga O Nga Pihopa, O Nga Piriti, Me Nga Rikona. 1883. The Book of Common Prayer, and Administration of the Sacraments, and Other Rites and Ceremonies of the Church, According to the Use of the United Church of England and Ireland: Together with the Proper Lessons for Sundays and Other Holy-Days, and a New Version of the Psalms of David, Oxford: Printed at 134 the University Press, 1852. The Book of Common Prayer and Administration of the Sacraments, and Other Rites and Ceremonies of the Church, According to the Church of England: Together with the Psalter or Psalms of David, Printed as They Are to Be Sung or Said in Churches: And the Form and Manner of Making, Ordaining, and Consecrating of Bishops, Priests, and Deacons, London: G.E. Eyre and W. Spottiswoode, after 1871 but before 1877. Brown, A.N., The Journals of A.N. Brown C.M.S. Missionary Tauranga Covering the Years 1840 to 1842, Tauranga: The Elms Trust, 1990 (Commemorative Edition). ______________, Select Sermons of A.N. Brown, Tauranga: The Elms Trust, 1997. Fitzgerald, Caroline (ed.), Te Wiremu Henry Williams: Early Years in the North, Wellington: Huia Publishers, 2011. The Hawke's Bay Almanac, Napier: James Wood, Hawke's Bay Herald, 1862, 1863, 1867.");
504
505
506 //boolean textIsInMaori = maoriTextDetector.isTextInMaori(TEST_MRI_INPUT_TEXT); // test hardcoded string
507 boolean textIsInMaori = false;
508
509 // Using try with resources, https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
510 if(inFile != null) {
511 doPrint(runSilent, "Reading text from file " + inFile.getPath());
512 try (BufferedReader reader = new BufferedReader(new FileReader(inFile))) {
513 textIsInMaori = maoriTextDetector.isTextInMaori(reader);
514 } // let outer try deal with any file/reading exceptions
515 }
516 else if (readFromStdIn) {
517 doPrint(runSilent, "Waiting to read text from STDIN... (press Ctrl-D when done entering text)>");
518 try (BufferedReader reader = new BufferedReader(new InputStreamReader(System.in))) {
519 textIsInMaori = maoriTextDetector.isTextInMaori(reader);
520 } // let outer try deal with any file/reading exceptions
521 }
522
523 if(textIsInMaori) {
524 returnVal = 0;
525 } else {
526 returnVal = 1;
527 }
528
529 } catch(Exception e) {
530 e.printStackTrace();
531
532 } finally {
533 doPrint(runSilent, "Exitting program with returnVal " + returnVal + "...\n");
534 System.exit(returnVal);
535 }
536 }
537
538}
Note: See TracBrowser for help on using the repository browser.