Changeset 33338 for gs3-extensions
- Timestamp:
- 2019-07-20T23:24:46+12:00 (5 years ago)
- Location:
- gs3-extensions/maori-lang-detection/src
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/maori-lang-detection/src/MaoriTextDetector.java
r33337 r33338 17 17 * EXPORT OPENNLP_HOME environment variable to be your apache OpenNLP installation. 18 18 * Then, to compile this program: 19 * maori-lang-detection/src$ javac -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" Maori Detector.java19 * maori-lang-detection/src$ javac -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" MaoriTextDetector.java 20 20 * To run this program, one of: 21 21 * 22 * maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" Maori Detector --help22 * maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriTextDetector --help 23 23 * 24 * maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" Maori Detector --file <full/path/to/textfile>24 * maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriTextDetector --file <full/path/to/textfile> 25 25 * 26 * maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" Maori Detector -26 * maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriTextDetector - 27 27 * which expects text to stream in from standard input. 28 28 * If entering text manually, then remember to press Ctrl-D to indicate the usual end of StdIn. … … 31 31 * Also has information on how to run this class if it's in a Java package. 32 32 */ 33 public class Maori Detector {33 public class MaoriTextDetector { 34 34 /** The 3 letter language code for Maori in ISO 639-2 or ISO 639-3 */ 35 35 public static final String MAORI_3LETTER_CODE = "mri"; … … 37 37 38 38 /** Configurable: cut off minimum confidence value, 39 greater or equal to which determines that the best predicted language is acceptable to user of Maori Detector. */39 greater or equal to which determines that the best predicted language is acceptable to user of MaoriTextDetector. */ 40 40 public final double MINIMUM_CONFIDENCE; 41 /** silentMode set to false means Maori Detector won't print helpful messages while running. Set to true to run silently. */41 /** silentMode set to false means MaoriTextDetector won't print helpful messages while running. Set to true to run silently. */ 42 42 public final boolean silentMode; 43 43 … … 56 56 57 57 58 public Maori Detector(boolean silentMode) throws Exception {58 public MaoriTextDetector(boolean silentMode) throws Exception { 59 59 this(silentMode, DEFAULT_MINIMUM_CONFIDENCE); 60 60 } 61 61 62 public Maori Detector(boolean silentMode, double min_confidence) throws Exception {62 public MaoriTextDetector(boolean silentMode, double min_confidence) throws Exception { 63 63 this.silentMode = silentMode; 64 64 this.MINIMUM_CONFIDENCE = min_confidence; … … 110 110 // Get the most probable language 111 111 Language bestLanguage = myCategorizer.predictLanguage(text); 112 System.out.println("Best language: " + bestLanguage.getLang());113 System.out.println("Best language confidence: " + bestLanguage.getConfidence());112 doPrint("Best language: " + bestLanguage.getLang()); 113 doPrint("Best language confidence: " + bestLanguage.getConfidence()); 114 114 115 115 return (bestLanguage.getLang().equals(langCode) && bestLanguage.getConfidence() >= this.MINIMUM_CONFIDENCE); … … 184 184 Language bestLanguage = myCategorizer.predictLanguage(text.toString()); 185 185 if(language != null && !bestLanguage.getLang().equals(language)) { // predicted lang of current n lines not the same as predicted lang for prev n lines 186 System.err.println("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");186 doPrintErr("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language."); 187 187 } 188 188 language = bestLanguage.getLang(); 189 189 cumulativeConfidence += bestLanguage.getConfidence(); 190 190 191 System.err.println("Best predicted language for last " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")");191 doPrintErr("Best predicted language for last " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")"); 192 192 193 193 // finished analysing language of NUM_LINES of text … … 206 206 207 207 if(language != null && !bestLanguage.getLang().equals(language)) { // predicted lang of current n lines not the same as predicted lang for prev n lines 208 System.err.println("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");208 doPrintErr("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language."); 209 209 } 210 210 language = bestLanguage.getLang(); 211 211 cumulativeConfidence += bestLanguage.getConfidence(); 212 System.err.println("Best predicted language for final " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")");212 doPrintErr("Best predicted language for final " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")"); 213 213 } 214 214 … … 220 220 return (language.equals(langCode) && avgConfidence >= this.MINIMUM_CONFIDENCE); 221 221 } 222 223 222 224 223 … … 233 232 234 233 if(languages == null || languages.length <= 0) { 235 System.err.println("No languages predicted for the input text");234 doPrintErr("No languages predicted for the input text"); 236 235 } else { 237 236 for(int i = 0; i < languages.length; i++) { 238 System.out.println("Language prediction " + i + ": " + languages[i]); 239 } 240 } 241 242 } 243 237 doPrint("Language prediction " + i + ": " + languages[i]); 238 } 239 } 240 241 } 242 243 public void doPrint(String msg) { 244 doPrint(this.silentMode, msg); 245 } 246 public void doPrintErr(String msg) { 247 doPrintErr(this.silentMode, msg); 248 } 249 250 /********** STATIC METHODS *************/ 251 252 public static void doPrint(boolean runSilent, String msg) { 253 if(!runSilent) System.out.println(msg); 254 } 255 public static void doPrintErr(boolean runSilent, String msg) { 256 if(!runSilent) System.err.println(msg); 257 } 258 244 259 public static void printUsage() { 245 260 System.err.println("Run this program with:"); … … 256 271 System.err.println(); 257 272 System.err.println("\t-1 if the input arguments were wrong"); 258 System.err.println("\t255(!) if an Exception occurred in instantiating the Maori Detector when attempting to detect the text's language");273 System.err.println("\t255(!) if an Exception occurred in instantiating the MaoriTextDetector when attempting to detect the text's language"); 259 274 System.err.println("\t2 if the user asked to run this program with --help/-h."); 260 275 System.err.println(); … … 267 282 * 268 283 * -1 if the input arguments were wrong 269 * 255(!) if an Exception occurred in instantiating the Maori Detector when attempting to detect the text's language284 * 255(!) if an Exception occurred in instantiating the MaoriTextDetector when attempting to detect the text's language 270 285 * QTODO: why does the program exit value end up as 255 and not -1 when returnVal remains at -1 on Exception? 271 286 * 2 if the user asked to run this program with --help/-h. … … 292 307 i++; 293 308 if(i >= args.length) { 294 System.err.println("ERROR: No minimum confidence value provided with --min-confidence|-c flag.\n");309 doPrintErr(runSilent, "ERROR: No minimum confidence value provided with --min-confidence|-c flag.\n"); 295 310 printUsage = true; 296 311 returnVal = -1; … … 302 317 } 303 318 } catch(NumberFormatException nfe) { 304 System.err.println("ERROR: value for min-confidence is the wrong format or out of range. It must be a (decimal point) number between 0-1.\n");319 doPrintErr(runSilent, "ERROR: value for min-confidence is the wrong format or out of range. It must be a (decimal point) number between 0-1.\n"); 305 320 printUsage = true; 306 321 returnVal = -1; … … 313 328 i++; 314 329 if(i >= args.length) { 315 System.err.println("ERROR: No input file provided with --file|-f flag.\n");330 doPrintErr(runSilent, "ERROR: No input file provided with --file|-f flag.\n"); 316 331 printUsage = true; 317 332 returnVal = -1; … … 320 335 inFile = new File(filePath); 321 336 if(!inFile.isFile()) { 322 System.err.println("ERROR: Can't read text. Input file argument provided does not exist or is not a file.\n");337 doPrintErr(runSilent, "ERROR: Can't read text. Input file argument provided does not exist or is not a file.\n"); 323 338 printUsage = true; 324 339 returnVal = -1; … … 326 341 } 327 342 } else { // unrecognised input argument 328 System.err.println("ERROR: Unrecognised " + i + "th argument to this program.\n");343 doPrintErr(runSilent, "ERROR: Unrecognised " + i + "th argument to this program.\n"); 329 344 printUsage = true; 330 345 returnVal = -1; … … 333 348 334 349 if(!readFromStdIn && inFile == null) { // at least one input source must be provided 335 System.err.println("ERROR: must specify source to read text from, either STDIN (-) or input file (--file <file>).\n");350 doPrintErr(runSilent, "ERROR: must specify source to read text from, either STDIN (-) or input file (--file <file>).\n"); 336 351 printUsage = true; 337 352 returnVal = -1; … … 339 354 340 355 if(readFromStdIn && inFile != null) { // this program can't be asked to read from stdin and from an input file 341 System.err.println("ERROR: instructed to read from both STDIN and from an input file. Not possible.\n");356 doPrintErr(runSilent, "ERROR: instructed to read from both STDIN and from an input file. Not possible.\n"); 342 357 printUsage = true; 343 358 returnVal = -1; … … 345 360 346 361 if(printUsage) { 347 if(!runSilent || returnVal == 2) { // if expressly asked for help or not running silent 362 // If not running silent print usage. 363 // OR if expressly asked for help, then it doesn't matter if we're running silent: still print usage to stderr. 364 if(returnVal == 2 || !runSilent) { 348 365 printUsage(); 349 366 } … … 352 369 353 370 try { 354 Maori Detector maoriTextDetector = null;371 MaoriTextDetector maoriTextDetector = null; 355 372 if(minConfidence == -1) { 356 maoriTextDetector = new Maori Detector(runSilent);373 maoriTextDetector = new MaoriTextDetector(runSilent); 357 374 } else { 358 maoriTextDetector = new Maori Detector(runSilent, minConfidence);359 } 360 361 //boolean textIsInMaori = maoriTextDetector.isTextInMaori(TEST_MRI_INPUT_TEXT); 375 maoriTextDetector = new MaoriTextDetector(runSilent, minConfidence); 376 } 377 378 //boolean textIsInMaori = maoriTextDetector.isTextInMaori(TEST_MRI_INPUT_TEXT); // test hardcoded string 362 379 boolean textIsInMaori = false; 363 380 364 381 // Using try with resources, https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html 365 382 if(inFile != null) { 366 System.err.println("Reading text from file " + inFile.getPath());383 doPrint(runSilent, "Reading text from file " + inFile.getPath()); 367 384 try (BufferedReader reader = new BufferedReader(new FileReader(inFile))) { 368 385 textIsInMaori = maoriTextDetector.isTextInMaori(reader); … … 370 387 } 371 388 else if (readFromStdIn) { 372 System.err.println("Waiting to read text from STDIN... (press Ctrl-D when done entering text)>");389 doPrint(runSilent, "Waiting to read text from STDIN... (press Ctrl-D when done entering text)>"); 373 390 try (BufferedReader reader = new BufferedReader(new InputStreamReader(System.in))) { 374 391 textIsInMaori = maoriTextDetector.isTextInMaori(reader); … … 386 403 387 404 } finally { 388 System.err.println("Exitting program with returnVal " + returnVal + "...\n");405 doPrint(runSilent, "Exitting program with returnVal " + returnVal + "...\n"); 389 406 System.exit(returnVal); 390 407 } 391 408 } 392 393 // test hardcoded string394 public static void oldMain(String args[]) {395 int returnVal = -1;396 boolean silentMode = false;397 398 try {399 MaoriDetector maoriTextDetector = new MaoriDetector(silentMode);400 401 boolean textIsInMaori = maoriTextDetector.isTextInMaori(TEST_MRI_INPUT_TEXT);402 if(textIsInMaori) {403 returnVal = 0;404 } else {405 returnVal = 1;406 }407 408 } catch(Exception e) {409 e.printStackTrace();410 } finally {411 System.err.println("Exitting program with returnVal " + returnVal + "...\n");412 System.exit(returnVal);413 }414 }415 416 409 417 410 }
Note:
See TracChangeset
for help on using the changeset viewer.