Changeset 33338

Show
Ignore:
Timestamp:
20.07.2019 23:24:46 (5 weeks ago)
Author:
ak19
Message:

1.After renaming the java class, changed all occurrences of the old name MaoriDetector? to MaoriTextDetector?; 2. Now can run in silent mode done, except when the usage is explicitly invoked with the double-minus help or minus-h flag.

Location:
gs3-extensions/maori-lang-detection/src
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • gs3-extensions/maori-lang-detection/src/MaoriTextDetector.java

    r33337 r33338  
    1717 * EXPORT OPENNLP_HOME environment variable to be your apache OpenNLP installation. 
    1818 * Then, to compile this program: 
    19  *    maori-lang-detection/src$ javac -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" MaoriDetector.java 
     19 *    maori-lang-detection/src$ javac -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" MaoriTextDetector.java 
    2020 * To run this program, one of: 
    2121 * 
    22  *    maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriDetector --help 
     22 *    maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriTextDetector --help 
    2323 * 
    24  *    maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriDetector --file <full/path/to/textfile> 
     24 *    maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriTextDetector --file <full/path/to/textfile> 
    2525 * 
    26  *    maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriDetector - 
     26 *    maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriTextDetector - 
    2727 *       which expects text to stream in from standard input. 
    2828 *       If entering text manually, then remember to press Ctrl-D to indicate the usual end of StdIn. 
     
    3131 * Also has information on how to run this class if it's in a Java package. 
    3232 */ 
    33 public class MaoriDetector { 
     33public class MaoriTextDetector { 
    3434    /** The 3 letter language code for Maori in ISO 639-2 or ISO 639-3 */  
    3535    public static final String MAORI_3LETTER_CODE = "mri"; 
     
    3737 
    3838    /** Configurable: cut off minimum confidence value, 
    39     greater or equal to which determines that the best predicted language is acceptable to user of MaoriDetector. */ 
     39    greater or equal to which determines that the best predicted language is acceptable to user of MaoriTextDetector. */ 
    4040    public final double MINIMUM_CONFIDENCE; 
    41     /** silentMode set to false means MaoriDetector won't print helpful messages while running. Set to true to run silently. */ 
     41    /** silentMode set to false means MaoriTextDetector won't print helpful messages while running. Set to true to run silently. */ 
    4242    public final boolean silentMode; 
    4343 
     
    5656     
    5757     
    58     public MaoriDetector(boolean silentMode) throws Exception { 
     58    public MaoriTextDetector(boolean silentMode) throws Exception { 
    5959    this(silentMode, DEFAULT_MINIMUM_CONFIDENCE); 
    6060    } 
    6161     
    62     public MaoriDetector(boolean silentMode, double min_confidence) throws Exception { 
     62    public MaoriTextDetector(boolean silentMode, double min_confidence) throws Exception { 
    6363    this.silentMode = silentMode; 
    6464    this.MINIMUM_CONFIDENCE = min_confidence; 
     
    110110    // Get the most probable language 
    111111    Language bestLanguage = myCategorizer.predictLanguage(text); 
    112     System.out.println("Best language: " + bestLanguage.getLang()); 
    113     System.out.println("Best language confidence: " + bestLanguage.getConfidence()); 
     112    doPrint("Best language: " + bestLanguage.getLang()); 
     113    doPrint("Best language confidence: " + bestLanguage.getConfidence()); 
    114114 
    115115    return (bestLanguage.getLang().equals(langCode) && bestLanguage.getConfidence() >= this.MINIMUM_CONFIDENCE); 
     
    184184        Language bestLanguage = myCategorizer.predictLanguage(text.toString()); 
    185185        if(language != null && !bestLanguage.getLang().equals(language)) { // predicted lang of current n lines not the same as predicted lang for prev n lines 
    186             System.err.println("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");            
     186            doPrintErr("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");            
    187187        } 
    188188        language = bestLanguage.getLang(); 
    189189        cumulativeConfidence += bestLanguage.getConfidence(); 
    190190         
    191         System.err.println("Best predicted language for last " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")");  
     191        doPrintErr("Best predicted language for last " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")");  
    192192         
    193193        // finished analysing language of NUM_LINES of text 
     
    206206         
    207207        if(language != null && !bestLanguage.getLang().equals(language)) { // predicted lang of current n lines not the same as predicted lang for prev n lines 
    208         System.err.println("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");            
     208        doPrintErr("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");            
    209209        } 
    210210        language = bestLanguage.getLang(); 
    211211        cumulativeConfidence += bestLanguage.getConfidence(); 
    212         System.err.println("Best predicted language for final " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")"); 
     212        doPrintErr("Best predicted language for final " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")"); 
    213213    } 
    214214     
     
    220220    return (language.equals(langCode) && avgConfidence >= this.MINIMUM_CONFIDENCE); 
    221221    } 
    222  
    223222     
    224223 
     
    233232     
    234233    if(languages == null || languages.length <= 0) { 
    235         System.err.println("No languages predicted for the input text"); 
     234        doPrintErr("No languages predicted for the input text"); 
    236235    } else { 
    237236        for(int i = 0; i < languages.length; i++) { 
    238         System.out.println("Language prediction " + i + ": " + languages[i]); 
    239         } 
    240     } 
    241      
    242     } 
    243  
     237        doPrint("Language prediction " + i + ": " + languages[i]); 
     238        } 
     239    } 
     240     
     241    } 
     242 
     243    public void doPrint(String msg) { 
     244    doPrint(this.silentMode, msg); 
     245    } 
     246    public void doPrintErr(String msg) { 
     247    doPrintErr(this.silentMode, msg); 
     248    } 
     249 
     250    /********** STATIC METHODS *************/ 
     251 
     252    public static void doPrint(boolean runSilent, String msg) { 
     253    if(!runSilent) System.out.println(msg);  
     254    } 
     255    public static void doPrintErr(boolean runSilent, String msg) { 
     256    if(!runSilent) System.err.println(msg);  
     257    } 
     258     
    244259    public static void printUsage() { 
    245260    System.err.println("Run this program with:"); 
     
    256271    System.err.println(); 
    257272    System.err.println("\t-1 if the input arguments were wrong"); 
    258     System.err.println("\t255(!) if an Exception occurred in instantiating the MaoriDetector when attempting to detect the text's language"); 
     273    System.err.println("\t255(!) if an Exception occurred in instantiating the MaoriTextDetector when attempting to detect the text's language"); 
    259274    System.err.println("\t2 if the user asked to run this program with --help/-h."); 
    260275    System.err.println(); 
     
    267282     * 
    268283     *    -1 if the input arguments were wrong 
    269      *    255(!) if an Exception occurred in instantiating the MaoriDetector when attempting to detect the text's language 
     284     *    255(!) if an Exception occurred in instantiating the MaoriTextDetector when attempting to detect the text's language 
    270285     * QTODO: why does the program exit value end up as 255 and not -1 when returnVal remains at -1 on Exception? 
    271286     *    2 if the user asked to run this program with --help/-h. 
     
    292307        i++; 
    293308        if(i >= args.length) { 
    294             System.err.println("ERROR: No minimum confidence value provided with --min-confidence|-c flag.\n"); 
     309            doPrintErr(runSilent, "ERROR: No minimum confidence value provided with --min-confidence|-c flag.\n"); 
    295310            printUsage = true; 
    296311            returnVal = -1; 
     
    302317            } 
    303318            } catch(NumberFormatException nfe) { 
    304             System.err.println("ERROR: value for min-confidence is the wrong format or out of range. It must be a (decimal point) number between 0-1.\n"); 
     319            doPrintErr(runSilent, "ERROR: value for min-confidence is the wrong format or out of range. It must be a (decimal point) number between 0-1.\n"); 
    305320            printUsage = true; 
    306321            returnVal = -1; 
     
    313328        i++; 
    314329        if(i >= args.length) { 
    315             System.err.println("ERROR: No input file provided with --file|-f flag.\n"); 
     330            doPrintErr(runSilent, "ERROR: No input file provided with --file|-f flag.\n"); 
    316331            printUsage = true; 
    317332            returnVal = -1; 
     
    320335            inFile = new File(filePath); 
    321336            if(!inFile.isFile()) { 
    322             System.err.println("ERROR: Can't read text. Input file argument provided does not exist or is not a file.\n"); 
     337            doPrintErr(runSilent, "ERROR: Can't read text. Input file argument provided does not exist or is not a file.\n"); 
    323338            printUsage = true; 
    324339            returnVal = -1; 
     
    326341        } 
    327342        } else { // unrecognised input argument 
    328         System.err.println("ERROR: Unrecognised " + i + "th argument to this program.\n"); 
     343        doPrintErr(runSilent, "ERROR: Unrecognised " + i + "th argument to this program.\n"); 
    329344        printUsage = true; 
    330345        returnVal = -1; 
     
    333348 
    334349    if(!readFromStdIn && inFile == null) { // at least one input source must be provided 
    335         System.err.println("ERROR: must specify source to read text from, either STDIN (-) or input file (--file <file>).\n"); 
     350        doPrintErr(runSilent, "ERROR: must specify source to read text from, either STDIN (-) or input file (--file <file>).\n"); 
    336351        printUsage = true; 
    337352        returnVal = -1; 
     
    339354     
    340355    if(readFromStdIn && inFile != null) { // this program can't be asked to read from stdin and from an input file 
    341         System.err.println("ERROR: instructed to read from both STDIN and from an input file. Not possible.\n"); 
     356        doPrintErr(runSilent, "ERROR: instructed to read from both STDIN and from an input file. Not possible.\n"); 
    342357        printUsage = true; 
    343358        returnVal = -1; 
     
    345360     
    346361    if(printUsage) { 
    347         if(!runSilent || returnVal == 2) { // if expressly asked for help or not running silent 
     362        // If not running silent print usage. 
     363        // OR if expressly asked for help, then it doesn't matter if we're running silent: still print usage to stderr. 
     364        if(returnVal == 2 || !runSilent) {           
    348365        printUsage(); 
    349366        } 
     
    352369     
    353370    try { 
    354         MaoriDetector maoriTextDetector = null; 
     371        MaoriTextDetector maoriTextDetector = null; 
    355372        if(minConfidence == -1) { 
    356         maoriTextDetector = new MaoriDetector(runSilent); 
     373        maoriTextDetector = new MaoriTextDetector(runSilent); 
    357374        } else { 
    358         maoriTextDetector = new MaoriDetector(runSilent, minConfidence); 
    359         } 
    360          
    361         //boolean textIsInMaori = maoriTextDetector.isTextInMaori(TEST_MRI_INPUT_TEXT); 
     375        maoriTextDetector = new MaoriTextDetector(runSilent, minConfidence); 
     376        } 
     377         
     378        //boolean textIsInMaori = maoriTextDetector.isTextInMaori(TEST_MRI_INPUT_TEXT); // test hardcoded string 
    362379        boolean textIsInMaori = false; 
    363380         
    364381        // Using try with resources, https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html 
    365382        if(inFile != null) { 
    366         System.err.println("Reading text from file " + inFile.getPath()); 
     383        doPrint(runSilent, "Reading text from file " + inFile.getPath()); 
    367384        try (BufferedReader reader = new BufferedReader(new FileReader(inFile))) { 
    368385            textIsInMaori = maoriTextDetector.isTextInMaori(reader); 
     
    370387        } 
    371388        else if (readFromStdIn) { 
    372         System.err.println("Waiting to read text from STDIN... (press Ctrl-D when done entering text)>"); 
     389        doPrint(runSilent, "Waiting to read text from STDIN... (press Ctrl-D when done entering text)>"); 
    373390        try (BufferedReader reader = new BufferedReader(new InputStreamReader(System.in))) { 
    374391            textIsInMaori = maoriTextDetector.isTextInMaori(reader);             
     
    386403         
    387404    } finally { 
    388         System.err.println("Exitting program with returnVal " + returnVal + "...\n"); 
     405        doPrint(runSilent, "Exitting program with returnVal " + returnVal + "...\n"); 
    389406        System.exit(returnVal); 
    390407    } 
    391408    } 
    392  
    393     // test hardcoded string 
    394     public static void oldMain(String args[]) { 
    395     int returnVal = -1; 
    396     boolean silentMode = false; 
    397      
    398     try { 
    399         MaoriDetector maoriTextDetector = new MaoriDetector(silentMode); 
    400          
    401         boolean textIsInMaori = maoriTextDetector.isTextInMaori(TEST_MRI_INPUT_TEXT); 
    402         if(textIsInMaori) { 
    403         returnVal = 0; 
    404         } else { 
    405         returnVal = 1; 
    406         } 
    407          
    408     } catch(Exception e) { 
    409         e.printStackTrace(); 
    410     } finally { 
    411         System.err.println("Exitting program with returnVal " + returnVal + "...\n"); 
    412         System.exit(returnVal); 
    413     } 
    414     } 
    415  
    416409     
    417410}