Context Navigation

source: gs3-extensions/maori-lang-detection/src/org/greenstone/atea/MaoriTextDetector.java@ 33585

Last change on this file since 33585 was 33585, checked in by ak19, 5 years ago
Much simpler way of using sentence and language detection model to work on a single sentence at a time. Not sure if it is truly best way, but at least as good or better than my older attempts. Committing with debugging.
File size: 26.0 KB

Line
1	/**
2	* Class that uses OpenNLP with the Language Detection Model to determine, with a default
3	* or configurable level of confidence, whether text (from a file or stdin) is in MÄori or not.
4	* Internal functions can be used for detecting any of the 103 languages currently supported by
5	* the OpenNLP Language Detection Model.
6	*
7	* http://opennlp.apache.org/news/model-langdetect-183.html
8	* language detector model: http://opennlp.apache.org/models.html
9	* Pre-trained models for OpenNLP 1.5: http://opennlp.sourceforge.net/models-1.5/
10	* Use of Apache OpenNLP in general:
11	* http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#intro.api
12	* Use of OpenNLP for language detection:
13	* http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#tools.langdetect
14	*
15	* This code was based on the information and sample code at the above links and the links dispersed throughout this file.
16	* See also the accompanying README file.
17	*
18	* July 2019
19	*/
20
21	package org.greenstone.atea;
22
23	import java.io.*;
24	import opennlp.tools.langdetect.*;
25	import opennlp.tools.sentdetect.*;
26	import opennlp.tools.util.*;
27
28	import java.util.ArrayList;
29
30	/**
31	* EXPORT OPENNLP_HOME environment variable to be your apache OpenNLP installation.
32	* Create a folder called "models" within the $OPENNLP_HOME folder, and put the file "langdetect-183.bin" in there
33	* (which is the language detection model zipped up and renamed to .bin extension).
34	*
35	* Then, to compile this program, do the following from the "src" folder (the folder containing this java file):
36	* maori-lang-detection/src$ javac -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" org/greenstone/atea/MaoriTextDetector.java
37	*
38	* To run this program, issue one of the following commands from the "src" folder (the folder containing this java file):
39	*
40	* maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" org.greenstone.atea.MaoriTextDetector --help
41	*
42	* maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" org.greenstone.atea.MaoriTextDetector --file <full/path/to/textfile>
43	*
44	* maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" org.greenstone.atea.MaoriTextDetector -
45	* Press enter. This variant of the program expects text to stream in from standard input.
46	* If entering text manually, then remember to press Ctrl-D to indicate the usual end of StdIn.
47	*
48	* https://stackoverflow.com/questions/219585/including-all-the-jars-in-a-directory-within-the-java-classpath
49	* Also has information on how to run this class if it's in a Java package.
50	*/
51	public class MaoriTextDetector {
52	/** The 3 letter language code for Maori in ISO 639-2 or ISO 639-3 */
53	public static final String MAORI_3LETTER_CODE = "mri";
54	public static final double DEFAULT_MINIMUM_CONFIDENCE = 0.50;
55
56	/** Configurable: cut off minimum confidence value,
57	greater or equal to which determines that the best predicted language is acceptable to user of MaoriTextDetector. */
58	public final double MINIMUM_CONFIDENCE;
59
60	/** silentMode set to false means MaoriTextDetector won't print helpful messages while running. Set to true to run silently. */
61	public final boolean silentMode;
62
63	private final String OPENNLP_MODELS_RELATIVE_PATH = "models" + File.separator;
64
65	/** Language Detection Model file for OpenNLP is expected to be at $OPENNLP_HOME/models/langdetect-183.bin */
66	private final String LANG_DETECT_MODEL_RELATIVE_PATH = OPENNLP_MODELS_RELATIVE_PATH + "langdetect-183.bin";
67
68	/**
69	* The LanguageDetectorModel object that will do the actual language detection/prediction for us.
70	* Created once in the constructor, can be used as often as needed thereafter.
71	*/
72	private LanguageDetector myCategorizer = null;
73
74	/**
75	* The Sentence Detection object that does the sentence splitting for the language
76	* the sentece model was trained for.
77	*/
78	private SentenceDetectorME sentenceDetector = null;
79
80	/** String taken from our university website, https://www.waikato.ac.nz/maori/ */
81	public static final String TEST_MRI_INPUT_TEXT = "Ko tÄnei te Whare WÄnanga o Waikato e whakatau nei i ngÄ iwi o te ao, ki roto i te riu o te awa e rere nei, ki runga i te whenua e hora nei, ki raro i te taumaru o ngÄ maunga whakaruru e tau awhi nei.";
82
83	/** test input string for a negative result */
84	public static final String TEST_ENG_INPUT_TEXT = "The main program exits with -1 if an Exception occurred when attempting to detect the text's language";
85
86
87	public MaoriTextDetector(boolean silentMode) throws Exception {
88	this(silentMode, DEFAULT_MINIMUM_CONFIDENCE);
89	}
90
91	/** Constructor that uses the sentence Model we trained for MÄori */
92	public MaoriTextDetector(boolean silentMode, double min_confidence) throws Exception {
93	this(silentMode, min_confidence, "mri-sent_trained.bin");
94	}
95
96	/** More general constructor that can use sentence detector models for other languages */
97	public MaoriTextDetector(boolean silentMode, double min_confidence,
98	String sentenceModelFileName) throws Exception
99	{
100	this.silentMode = silentMode;
101	this.MINIMUM_CONFIDENCE = min_confidence;
102
103	// 1. Check we can find the Language Detect Model file in the correct location (check that $OPENNLP_HOME/models/langdetect-183.bin exists);
104	String langDetectModelPath = System.getenv("OPENNLP_HOME");
105	if(System.getenv("OPENNLP_HOME") == null) {
106	throw new Exception("\n\t*** Environment variable OPENNLP_HOME must be set to your Apache OpenNLP installation folder.");
107	}
108	langDetectModelPath = langDetectModelPath + File.separator + LANG_DETECT_MODEL_RELATIVE_PATH;
109	File langDetectModelBinFile = new File(langDetectModelPath);
110	if(!langDetectModelBinFile.exists()) {
111	throw new Exception("\n\t*** " + langDetectModelBinFile.getPath() + " doesn't exist."
112	+ "\n\t*** Ensure the $OPENNLP_HOME folder contains a 'models' folder"
113	+ "\n\t*** with the model file 'langdetect-183.bin' in it.");
114	}
115
116
117	// 2. Set up our language detector Model and the Categorizer for language predictions based on the Model.
118	// http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#intro.api
119	// https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
120	try (InputStream modelIn = new FileInputStream(langDetectModelPath)) {
121
122	LanguageDetectorModel model = new LanguageDetectorModel(modelIn);
123
124	// http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#tools.langdetect
125	this.myCategorizer = new LanguageDetectorME(model);
126	}/*catch(Exception e) {
127	e.printStackTrace();
128	}*/
129
130	// instantiating function should handle critical exceptions. Constructors shouldn't.
131
132
133
134	// 3. Set up our sentence model and SentenceDetector object
135	String sentenceModelPath = System.getenv("OPENNLP_HOME") + File.separator
136	+ OPENNLP_MODELS_RELATIVE_PATH + sentenceModelFileName; // "mri-sent_trained.bin" default
137	File sentenceModelBinFile = new File(sentenceModelPath);
138	if(!sentenceModelBinFile.exists()) {
139	throw new Exception("\n\t*** " + sentenceModelBinFile.getPath() + " doesn't exist."
140	+ "\n\t*** Ensure the $OPENNLP_HOME folder contains a 'models' folder"
141	+ "\n\t*** with the model file "+sentenceModelFileName+" in it.");
142	}
143	try (InputStream modelIn = new FileInputStream(sentenceModelPath)) {
144	// https://www.tutorialspoint.com/opennlp/opennlp_sentence_detection.htm
145	SentenceModel sentenceModel = new SentenceModel(modelIn);
146	this.sentenceDetector = new SentenceDetectorME(sentenceModel);
147
148	} // instantiating function should handle this critical exception
149	}
150
151	/**
152	* In this class' constructor, need to have set up the Sentence Detection Model
153	* for the langCode passed in to this function in order for the output to make
154	* sense for that language.
155	*/
156	public ArrayList<String> getAllSentencesInLanguage(String langCode, String text, double confidenceCutoff)
157	{
158
159	// we'll be storing just those sentences in text that are in the denoted language code
160	ArrayList<String> mriSentences = new ArrayList<String>();
161	// OpenNLP language detection works best with a minimum of 2 sentences
162	// See https://opennlp.apache.org/news/model-langdetect-183.html
163	// "It is important to note that this model is trained for and works well with
164	// longer texts that have at least 2 sentences or more from the same language."
165
166	// For evaluating single languages, I used a very small data set and found that
167	// if the primary language detected is MRI AND if the confidence is >= 0.1, the
168	// results appear reasonably to be in te reo MÄori.
169
170	String[] sentences = sentenceDetector.sentDetect(text);
171
172	for(int i = 0; i < sentences.length; i++) {
173	String sentence = sentences[i];
174
175	//System.err.println(sentence);
176
177	Language bestLanguage = myCategorizer.predictLanguage(sentence);
178	double confidence = bestLanguage.getConfidence();
179
180	if(bestLanguage.getLang().equals(langCode) && confidence >= confidenceCutoff) {
181	System.err.println("Adding sentence: " + sentence + "\n");
182	mriSentences.add(sentence);
183	} else {
184	System.err.println("SKIPPING sentence: " + sentence + "\n");
185	}
186	}
187	return mriSentences;
188	}
189
190
191	public ArrayList<String> getAllSentencesInMaori(String text) throws Exception {
192	// big assumption here: that we can split incoming text into sentences
193	// for any language (using the MÄori language trained sentence model),
194	// despite not knowing what language those sentences are in
195	// Hinges on MRI sentences detection being similar to at least ENG equivalent
196
197
198	// we'll be storing just those sentences in text that are in MÄori.
199
200	// OpenNLP language detection works best with a minimum of 2 sentences
201	// See https://opennlp.apache.org/news/model-langdetect-183.html
202	// "It is important to note that this model is trained for and works well with
203	// longer texts that have at least 2 sentences or more from the same language."
204
205	// For evaluating single languages, I used a very small data set and found that
206	// if the primary language detected is MRI AND if the confidence is >= 0.1, the
207	// results appear reasonably to be in te reo MÄori.
208
209	final double confidenceCutoff = 0.1;
210	return getAllSentencesInLanguage(MAORI_3LETTER_CODE, text, confidenceCutoff);
211	}
212
213
214	/**
215	* @return true if the input text is Maori (mri) with MINIMUM_CONFIDENCE levels of confidence (if set,
216	* else DEFAULT_MINIMUM_CONFIDENCE levels of confidence).
217	*/
218	public boolean isTextInMaori(String text) {
219	return isTextInLanguage(MAORI_3LETTER_CODE, text);
220	}
221
222	/** @param langCode is 3 letter language code, ISO 639-2/3
223	* https://www.loc.gov/standards/iso639-2/php/code_list.php
224	* https://en.wikipedia.org/wiki/ISO_639-3
225	* @return true if the input text is Maori (mri) with MINIMUM_CONFIDENCE levels of confidence (if set,
226	* else DEFAULT_MINIMUM_CONFIDENCE levels of confidence).
227	*/
228	public boolean isTextInLanguage(String langCode, String text) {
229	// Get the most probable language
230	Language bestLanguage = myCategorizer.predictLanguage(text);
231	doPrint("Best language: " + bestLanguage.getLang());
232	doPrint("Best language confidence: " + bestLanguage.getConfidence());
233
234	return (bestLanguage.getLang().equals(langCode) && bestLanguage.getConfidence() >= this.MINIMUM_CONFIDENCE);
235	}
236
237
238	/**
239	* Handle "smaller" textfiles/streams of text read in.
240	* Return value is the same as for isTextInMaori(String text);
241	*/
242	public boolean isTextInMaori(BufferedReader reader) throws Exception {
243	return isTextInLanguage(MAORI_3LETTER_CODE, reader);
244	}
245	/**
246	* Handle "smaller" textfiles/streams of text read in.
247	* Return value is the same as for isTextInLanguage(String langCode, String text);
248	*/
249	public boolean isTextInLanguage(String langCode, BufferedReader reader) throws Exception {
250	// https://stackoverflow.com/questions/326390/how-do-i-create-a-java-string-from-the-contents-of-a-file
251
252	StringBuilder text = new StringBuilder();
253	String line = null;
254
255
256	while((line = reader.readLine()) != null) { // readLine removes newline separator
257	text.append(line + "\n"); // add back (unix style) line ending
258	}
259	return isTextInLanguage(langCode, text.toString());
260	}
261
262	/*
263	* Need better handling of "larger" textfiles/streams of text read in:
264	* what if multiple languages with high confidence every NUM_LINES read in?
265	* Does this mean the file is multi-lingual with each section dominated by a different language?
266	* How best to convey such information to the user?
267	*/
268	/**
269	* Rudimentary attempt to deal with very large files.
270	* Return value is the same as for isTextInMaori(String text);
271	*/
272	public boolean isLargeTextInMaori(BufferedReader reader) throws Exception {
273	return isLargeTextInLanguage(MAORI_3LETTER_CODE, reader);
274	}
275
276	/**
277	* Rudimentary attempt to deal with very large files.
278	* Return value is the same as for isTextInLanguage(String langCode, String text);
279	*/
280	public boolean isLargeTextInLanguage(String langCode, BufferedReader reader) throws Exception {
281	// https://stackoverflow.com/questions/326390/how-do-i-create-a-java-string-from-the-contents-of-a-file
282
283	final int NUM_LINES = 100; // arbitrary 100 lines read, predict language, calculate confidence
284
285	StringBuilder text = new StringBuilder();
286	String line = null;
287
288	double cumulativeConfidence = 0;
289	int numLoops = 0;
290
291	int i = 0;
292	String language = null;
293
294	while((line = reader.readLine()) != null) { // readLine removes newline separator
295	text.append(line + "\n"); // add back (unix style) line ending
296
297	i++; // read nth line of numLoop
298
299
300	if(i == NUM_LINES) { // arbitrary 100 lines read, predict language, calculate confidence
301
302
303	Language bestLanguage = myCategorizer.predictLanguage(text.toString());
304	if(language != null && !bestLanguage.getLang().equals(language)) { // predicted lang of current n lines not the same as predicted lang for prev n lines
305	doPrintErr("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");
306	}
307	language = bestLanguage.getLang();
308	cumulativeConfidence += bestLanguage.getConfidence();
309
310	doPrintErr("Best predicted language for last " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")");
311
312	// finished analysing language of NUM_LINES of text
313	text = new StringBuilder();
314	i = 0;
315	numLoops++;
316	}
317	}
318
319	// process any (remaining) text that was less than n NUM_LINES
320	if(!text.toString().equals("")) {
321	text.append(line + "\n"); // add back (unix style) line ending
322	i++;
323
324	Language bestLanguage = myCategorizer.predictLanguage(text.toString());
325
326	if(language != null && !bestLanguage.getLang().equals(language)) { // predicted lang of current n lines not the same as predicted lang for prev n lines
327	doPrintErr("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");
328	}
329	language = bestLanguage.getLang();
330	cumulativeConfidence += bestLanguage.getConfidence();
331	doPrintErr("Best predicted language for final " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")");
332	}
333
334
335	int totalLinesRead = numLoops * NUM_LINES + i; // not used
336	double avgConfidence = cumulativeConfidence/(numLoops + 1); // not quite the average as the text processed outside the loop may have fewer lines than NUM_LINES
337
338
339	return (language.equals(langCode) && avgConfidence >= this.MINIMUM_CONFIDENCE);
340	}
341
342
343	/**
344	* Prints to STDOUT the predicted languages of the input text in order of descending confidence.
345	* UNUSED.
346	*/
347	public void predictedLanguages(String text) {
348	// Get an array with the most probable languages
349
350	Language[] languages = myCategorizer.predictLanguages(text);
351
352	if(languages == null \|\| languages.length <= 0) {
353	doPrintErr("No languages predicted for the input text");
354	} else {
355	for(int i = 0; i < languages.length; i++) {
356	doPrint("Language prediction " + i + ": " + languages[i]);
357	}
358	}
359
360	}
361
362	public void doPrint(String msg) {
363	doPrint(this.silentMode, msg);
364	}
365	public void doPrintErr(String msg) {
366	doPrintErr(this.silentMode, msg);
367	}
368
369	/******** STATIC METHODS ***********/
370
371	public static void doPrint(boolean runSilent, String msg) {
372	if(!runSilent) System.out.println(msg);
373	}
374	public static void doPrintErr(boolean runSilent, String msg) {
375	if(!runSilent) System.err.println(msg);
376	}
377
378	public static void printUsage() {
379	System.err.println("Run this program with:");
380	System.err.println("\t--help (-h)\tfor seeing this usage message again");
381	System.err.println("\t-\tto have input text read from STDIN (as always, hit Ctrl-D to mark end of text stream)");
382	System.err.println("\t--file (-f)\tto provide an input file path");
383	System.err.println("\t--silent (-s): optional, to run silently and just exit with exit value.");
384	System.err.println("\t--min-confidence (-c): optional, to override the default minimum confidence value (" + DEFAULT_MINIMUM_CONFIDENCE + ")");
385	System.err.println("\t\tof the predicted language that will be considered acceptable.");
386	System.err.println();
387	System.err.println("This program terminates with exit value:");
388	System.err.println("\t0 if the input text is in Maori");
389	System.err.println("\t1 if input text is not in Maori");
390	System.err.println();
391	System.err.println("\t-1 if the input arguments were wrong");
392	System.err.println("\t255(!) if an Exception occurred in instantiating the MaoriTextDetector when attempting to detect the text's language");
393	System.err.println("\t2 if the user asked to run this program with --help/-h.");
394	System.err.println();
395	}
396
397	/**
398	* The main program exits with:
399	* 0 if text is in Maori;
400	* 1 if text is not in Maori;
401	*
402	* -1 if the input arguments were wrong
403	* 255(!) if an Exception occurred in instantiating the MaoriTextDetector when attempting to detect the text's language
404	* QTODO: why does the program exit value end up as 255 and not -1 when returnVal remains at -1 on Exception?
405	* 2 if the user asked to run this program with --help/-h.
406	*/
407	public static void main(String args[]) {
408	int returnVal = -1;
409
410	// 1. Check input arguments
411	boolean printUsage = false;
412	boolean readFromStdIn = false;
413	File inFile = null;
414	boolean runSilent = false;
415	double minConfidence = -1;
416
417	for (int i = 0; !printUsage && i < args.length; i++) {
418
419	// check for help first and quit after printing usage
420	if(args[i].equals("--help") \|\| args[i].equals("-h")) {
421	printUsage = true;
422	returnVal = 2;
423	} else if(args[i].equals("--silent") \|\| args[i].equals("-s")) {
424	runSilent = true;
425	} else if(args[i].equals("--min-confidence") \|\| args[i].equals("-c")) {
426	i++;
427	if(i >= args.length) {
428	doPrintErr(runSilent, "ERROR: No minimum confidence value provided with --min-confidence\|-c flag.\n");
429	printUsage = true;
430	returnVal = -1;
431	} else {
432	try {
433	minConfidence = Double.parseDouble(args[i]);
434	if(minConfidence < 0 \|\| minConfidence > 1) {
435	throw new NumberFormatException("Number out of range, must be between 0-1");
436	}
437	} catch(NumberFormatException nfe) {
438	doPrintErr(runSilent, "ERROR: value for min-confidence is the wrong format or out of range. It must be a (decimal point) number between 0-1.\n");
439	printUsage = true;
440	returnVal = -1;
441	}
442	}
443	} else if(args[i].equals("-")) {
444	readFromStdIn = true;
445	//break; // don't bother continuing to check input arguments for any --file flag if we're told to read from stdin
446	} else if(args[i].equals("--file") \|\| args[i].equals("-f")) {
447	i++;
448	if(i >= args.length) {
449	doPrintErr(runSilent, "ERROR: No input file provided with --file\|-f flag.\n");
450	printUsage = true;
451	returnVal = -1;
452	} else {
453	String filePath = args[i];
454	inFile = new File(filePath);
455	if(!inFile.isFile()) {
456	doPrintErr(runSilent, "ERROR: Can't read text. Input file argument provided does not exist or is not a file.\n");
457	printUsage = true;
458	returnVal = -1;
459	}
460	}
461	} else { // unrecognised input argument
462	doPrintErr(runSilent, "ERROR: Unrecognised " + i + "th argument to this program.\n");
463	printUsage = true;
464	returnVal = -1;
465	}
466	}
467
468	if(returnVal != 2) { // returnVal == 2 for help. Only if the user did not request --help/-h, do we continue to make sure the arguments provided are sane
469	if(!readFromStdIn && inFile == null) { // at least one input source must be provided
470	doPrintErr(runSilent, "ERROR: must specify source to read text from, either STDIN (-) or input file (--file <file>).\n");
471	printUsage = true;
472	returnVal = -1;
473	}
474
475	if(readFromStdIn && inFile != null) { // this program can't be asked to read from stdin and from an input file
476	doPrintErr(runSilent, "ERROR: instructed to read from both STDIN and from an input file. Not possible.\n");
477	printUsage = true;
478	returnVal = -1;
479	}
480	}
481
482	if(printUsage) {
483	// If not running silent print usage.
484	// OR if expressly asked for help, then it doesn't matter if we're running silent: still print usage to stderr.
485	if(returnVal == 2 \|\| !runSilent) {
486	printUsage();
487	}
488	System.exit(returnVal);
489	}
490
491
492	// 2. Finally, we can now do the actual language detection
493	try {
494	MaoriTextDetector maoriTextDetector = null;
495	if(minConfidence == -1) {
496	maoriTextDetector = new MaoriTextDetector(runSilent);
497	} else {
498	maoriTextDetector = new MaoriTextDetector(runSilent, minConfidence);
499	}
500
501	// TODO
502	maoriTextDetector.getAllSentencesInMaori(
503	"Primary sources ~ Published Maramataka Mo Te Tau 1885, Nepia: Te Haaringi, Kai-ta Pukapuka, kei Hehitingi Tiriti, 1884. Maramataka Mo Te Tau 1886, Nepia: Na te Haaringi i ta ki tona Whare Perehi Pukapuka, 1885. Maramataka Mo Te Tau 1887, Nepia: Na te Haaringi i ta ki tona Whare Perehi Pukapuka, 1886. Maramataka Mo Te Tau 1888, Nepia: Na te Haaringi i ta ki tona Whare Perehi Pukapuka, 1887. Maramataka Mo Te Tau 1889, Nepia: Na te Haaringi i ta ki tona Whare Perehi Pukapuka, 1888. Maramataka Mo Te Tau 1890, Nepia: Na te Haaringi i ta ki tona Whare Perehi Pukapuka, 1889. Maramataka Mo Te Tau 1891, Kihipane: Na te Muri i ta ki tona Whare Perehi Pukapuka, 1890. Maramataka Mo Te Tau 1892, Nepia: Na te Haaringi, i ta ki tona Whare Perehi Pukapuka, 1891. Maramataka Mo Te Tau 1893, Kihipane: Na te Muri i ta ki tona Whare Perehi Pukapuka, 1892. Maramataka Mo Te Tau 1894, Kihipane: Na te Muri i ta ki tona Whare Perehi Pukapuka, 1893. Maramataka Me Te Tau 1895, Kihipane: Na te Muri i Ta ki tona whare perehi pukapuka, 1894. Maramataka Mo Te Tau 1896, Kihipane: Na te Muri i ta ki tona Whare Perehi Pukapuka, 1895. Maramataka Mo Te Tau 1897, Kihipane: Na te Muri i ta ki tona Whare Perehi Pukapuka 1896. Maramataka Mo Te Tau 1898, Turanga: Na te Wiremu Hapata i ta ki Te Rau Kahikatea, 1897. Ko Te Paipera Tapu Ara, Ko Te Kawenata Tawhito Me Te Kawenata Hou, He Mea Whakamaori Mai No Nga Reo I Oroko-Tuhituhia Ai, Ranana: He mea ta ki te perehi a W.M.Watts ma te Komiti Ta Paipera mo Ingarangi mo Te Ao Katoa, 1868. Ko Te Pukapuka O Nga Inoinga, Me Era Atu Tikanga, I Whakaritea E Te Hahi O Ingarani, Mo Te Minitatanga O Nga Hakarameta, O Era Atu Ritenga a Te Hahi: Me Nga Waiata Ano Hoki a Rawiri, Me Te Tikanga Mo Te Whiriwhiringa, Mo Te Whakaturanga, Me Te Whakatapunga O Nga Pihopa, O Nga Piriti, Me Nga Rikona, Me Nga Himene, Ranana: I taia tenei ki te perehi o te Komiti mo te whakapuaki i to mohiotanga ki a te Karaiti, 1858. Ko Te Pukapuka O Nga Inoinga, Me Era Atu Tikanga, I Whakaritea E Te Hahi O Ingarani, Mo Te Minitatanga O Nga Hakarameta, O Era Atu Ritenga a Te Hahi: Me Nga Waiata Ano Hoki a Rawiri, Me Te Tikanga Mo Te Whiriwhiringa, Mo Te Whakaturanga, Me Te Whakatapunga O Nga Pihopa, O Nga Piriti, Me Nga Rikona. 1883. The Book of Common Prayer, and Administration of the Sacraments, and Other Rites and Ceremonies of the Church, According to the Use of the United Church of England and Ireland: Together with the Proper Lessons for Sundays and Other Holy-Days, and a New Version of the Psalms of David, Oxford: Printed at 134 the University Press, 1852. The Book of Common Prayer and Administration of the Sacraments, and Other Rites and Ceremonies of the Church, According to the Church of England: Together with the Psalter or Psalms of David, Printed as They Are to Be Sung or Said in Churches: And the Form and Manner of Making, Ordaining, and Consecrating of Bishops, Priests, and Deacons, London: G.E. Eyre and W. Spottiswoode, after 1871 but before 1877. Brown, A.N., The Journals of A.N. Brown C.M.S. Missionary Tauranga Covering the Years 1840 to 1842, Tauranga: The Elms Trust, 1990 (Commemorative Edition). ______________, Select Sermons of A.N. Brown, Tauranga: The Elms Trust, 1997. Fitzgerald, Caroline (ed.), Te Wiremu Henry Williams: Early Years in the North, Wellington: Huia Publishers, 2011. The Hawke's Bay Almanac, Napier: James Wood, Hawke's Bay Herald, 1862, 1863, 1867.");
504
505
506	//boolean textIsInMaori = maoriTextDetector.isTextInMaori(TEST_MRI_INPUT_TEXT); // test hardcoded string
507	boolean textIsInMaori = false;
508
509	// Using try with resources, https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
510	if(inFile != null) {
511	doPrint(runSilent, "Reading text from file " + inFile.getPath());
512	try (BufferedReader reader = new BufferedReader(new FileReader(inFile))) {
513	textIsInMaori = maoriTextDetector.isTextInMaori(reader);
514	} // let outer try deal with any file/reading exceptions
515	}
516	else if (readFromStdIn) {
517	doPrint(runSilent, "Waiting to read text from STDIN... (press Ctrl-D when done entering text)>");
518	try (BufferedReader reader = new BufferedReader(new InputStreamReader(System.in))) {
519	textIsInMaori = maoriTextDetector.isTextInMaori(reader);
520	} // let outer try deal with any file/reading exceptions
521	}
522
523	if(textIsInMaori) {
524	returnVal = 0;
525	} else {
526	returnVal = 1;
527	}
528
529	} catch(Exception e) {
530	e.printStackTrace();
531
532	} finally {
533	doPrint(runSilent, "Exitting program with returnVal " + returnVal + "...\n");
534	System.exit(returnVal);
535	}
536	}
537
538	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: