Changeset 33338 for gs3-extensions


Ignore:
Timestamp:
2019-07-20T23:24:46+12:00 (5 years ago)
Author:
ak19
Message:

1.After renaming the java class, changed all occurrences of the old name MaoriDetector to MaoriTextDetector; 2. Now can run in silent mode done, except when the usage is explicitly invoked with the double-minus help or minus-h flag.

Location:
gs3-extensions/maori-lang-detection/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/maori-lang-detection/src/MaoriTextDetector.java

    r33337 r33338  
    1717 * EXPORT OPENNLP_HOME environment variable to be your apache OpenNLP installation.
    1818 * Then, to compile this program:
    19  *    maori-lang-detection/src$ javac -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" MaoriDetector.java
     19 *    maori-lang-detection/src$ javac -cp ".:$OPENNLP_HOME/lib/opennlp-tools-1.9.1.jar" MaoriTextDetector.java
    2020 * To run this program, one of:
    2121 *
    22  *    maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriDetector --help
     22 *    maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriTextDetector --help
    2323 *
    24  *    maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriDetector --file <full/path/to/textfile>
     24 *    maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriTextDetector --file <full/path/to/textfile>
    2525 *
    26  *    maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriDetector -
     26 *    maori-lang-detection/src$ java -cp ".:$OPENNLP_HOME/lib/*" MaoriTextDetector -
    2727 *       which expects text to stream in from standard input.
    2828 *       If entering text manually, then remember to press Ctrl-D to indicate the usual end of StdIn.
     
    3131 * Also has information on how to run this class if it's in a Java package.
    3232 */
    33 public class MaoriDetector {
     33public class MaoriTextDetector {
    3434    /** The 3 letter language code for Maori in ISO 639-2 or ISO 639-3 */
    3535    public static final String MAORI_3LETTER_CODE = "mri";
     
    3737
    3838    /** Configurable: cut off minimum confidence value,
    39     greater or equal to which determines that the best predicted language is acceptable to user of MaoriDetector. */
     39    greater or equal to which determines that the best predicted language is acceptable to user of MaoriTextDetector. */
    4040    public final double MINIMUM_CONFIDENCE;
    41     /** silentMode set to false means MaoriDetector won't print helpful messages while running. Set to true to run silently. */
     41    /** silentMode set to false means MaoriTextDetector won't print helpful messages while running. Set to true to run silently. */
    4242    public final boolean silentMode;
    4343
     
    5656   
    5757   
    58     public MaoriDetector(boolean silentMode) throws Exception {
     58    public MaoriTextDetector(boolean silentMode) throws Exception {
    5959    this(silentMode, DEFAULT_MINIMUM_CONFIDENCE);
    6060    }
    6161   
    62     public MaoriDetector(boolean silentMode, double min_confidence) throws Exception {
     62    public MaoriTextDetector(boolean silentMode, double min_confidence) throws Exception {
    6363    this.silentMode = silentMode;
    6464    this.MINIMUM_CONFIDENCE = min_confidence;
     
    110110    // Get the most probable language
    111111    Language bestLanguage = myCategorizer.predictLanguage(text);
    112     System.out.println("Best language: " + bestLanguage.getLang());
    113     System.out.println("Best language confidence: " + bestLanguage.getConfidence());
     112    doPrint("Best language: " + bestLanguage.getLang());
     113    doPrint("Best language confidence: " + bestLanguage.getConfidence());
    114114
    115115    return (bestLanguage.getLang().equals(langCode) && bestLanguage.getConfidence() >= this.MINIMUM_CONFIDENCE);
     
    184184        Language bestLanguage = myCategorizer.predictLanguage(text.toString());
    185185        if(language != null && !bestLanguage.getLang().equals(language)) { // predicted lang of current n lines not the same as predicted lang for prev n lines
    186             System.err.println("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");           
     186            doPrintErr("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");           
    187187        }
    188188        language = bestLanguage.getLang();
    189189        cumulativeConfidence += bestLanguage.getConfidence();
    190190       
    191         System.err.println("Best predicted language for last " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")");
     191        doPrintErr("Best predicted language for last " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")");
    192192       
    193193        // finished analysing language of NUM_LINES of text
     
    206206       
    207207        if(language != null && !bestLanguage.getLang().equals(language)) { // predicted lang of current n lines not the same as predicted lang for prev n lines
    208         System.err.println("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");           
     208        doPrintErr("**** WARNING: text seems to contain content in multiple languages or unable to consistently predict the same language.");           
    209209        }
    210210        language = bestLanguage.getLang();
    211211        cumulativeConfidence += bestLanguage.getConfidence();
    212         System.err.println("Best predicted language for final " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")");
     212        doPrintErr("Best predicted language for final " + NUM_LINES + " lines: " + language + "(confidence: " + bestLanguage.getConfidence() + ")");
    213213    }
    214214   
     
    220220    return (language.equals(langCode) && avgConfidence >= this.MINIMUM_CONFIDENCE);
    221221    }
    222 
    223222   
    224223
     
    233232   
    234233    if(languages == null || languages.length <= 0) {
    235         System.err.println("No languages predicted for the input text");
     234        doPrintErr("No languages predicted for the input text");
    236235    } else {
    237236        for(int i = 0; i < languages.length; i++) {
    238         System.out.println("Language prediction " + i + ": " + languages[i]);
    239         }
    240     }
    241    
    242     }
    243 
     237        doPrint("Language prediction " + i + ": " + languages[i]);
     238        }
     239    }
     240   
     241    }
     242
     243    public void doPrint(String msg) {
     244    doPrint(this.silentMode, msg);
     245    }
     246    public void doPrintErr(String msg) {
     247    doPrintErr(this.silentMode, msg);
     248    }
     249
     250    /********** STATIC METHODS *************/
     251
     252    public static void doPrint(boolean runSilent, String msg) {
     253    if(!runSilent) System.out.println(msg);
     254    }
     255    public static void doPrintErr(boolean runSilent, String msg) {
     256    if(!runSilent) System.err.println(msg);
     257    }
     258   
    244259    public static void printUsage() {
    245260    System.err.println("Run this program with:");
     
    256271    System.err.println();
    257272    System.err.println("\t-1 if the input arguments were wrong");
    258     System.err.println("\t255(!) if an Exception occurred in instantiating the MaoriDetector when attempting to detect the text's language");
     273    System.err.println("\t255(!) if an Exception occurred in instantiating the MaoriTextDetector when attempting to detect the text's language");
    259274    System.err.println("\t2 if the user asked to run this program with --help/-h.");
    260275    System.err.println();
     
    267282     *
    268283     *    -1 if the input arguments were wrong
    269      *    255(!) if an Exception occurred in instantiating the MaoriDetector when attempting to detect the text's language
     284     *    255(!) if an Exception occurred in instantiating the MaoriTextDetector when attempting to detect the text's language
    270285     * QTODO: why does the program exit value end up as 255 and not -1 when returnVal remains at -1 on Exception?
    271286     *    2 if the user asked to run this program with --help/-h.
     
    292307        i++;
    293308        if(i >= args.length) {
    294             System.err.println("ERROR: No minimum confidence value provided with --min-confidence|-c flag.\n");
     309            doPrintErr(runSilent, "ERROR: No minimum confidence value provided with --min-confidence|-c flag.\n");
    295310            printUsage = true;
    296311            returnVal = -1;
     
    302317            }
    303318            } catch(NumberFormatException nfe) {
    304             System.err.println("ERROR: value for min-confidence is the wrong format or out of range. It must be a (decimal point) number between 0-1.\n");
     319            doPrintErr(runSilent, "ERROR: value for min-confidence is the wrong format or out of range. It must be a (decimal point) number between 0-1.\n");
    305320            printUsage = true;
    306321            returnVal = -1;
     
    313328        i++;
    314329        if(i >= args.length) {
    315             System.err.println("ERROR: No input file provided with --file|-f flag.\n");
     330            doPrintErr(runSilent, "ERROR: No input file provided with --file|-f flag.\n");
    316331            printUsage = true;
    317332            returnVal = -1;
     
    320335            inFile = new File(filePath);
    321336            if(!inFile.isFile()) {
    322             System.err.println("ERROR: Can't read text. Input file argument provided does not exist or is not a file.\n");
     337            doPrintErr(runSilent, "ERROR: Can't read text. Input file argument provided does not exist or is not a file.\n");
    323338            printUsage = true;
    324339            returnVal = -1;
     
    326341        }
    327342        } else { // unrecognised input argument
    328         System.err.println("ERROR: Unrecognised " + i + "th argument to this program.\n");
     343        doPrintErr(runSilent, "ERROR: Unrecognised " + i + "th argument to this program.\n");
    329344        printUsage = true;
    330345        returnVal = -1;
     
    333348
    334349    if(!readFromStdIn && inFile == null) { // at least one input source must be provided
    335         System.err.println("ERROR: must specify source to read text from, either STDIN (-) or input file (--file <file>).\n");
     350        doPrintErr(runSilent, "ERROR: must specify source to read text from, either STDIN (-) or input file (--file <file>).\n");
    336351        printUsage = true;
    337352        returnVal = -1;
     
    339354   
    340355    if(readFromStdIn && inFile != null) { // this program can't be asked to read from stdin and from an input file
    341         System.err.println("ERROR: instructed to read from both STDIN and from an input file. Not possible.\n");
     356        doPrintErr(runSilent, "ERROR: instructed to read from both STDIN and from an input file. Not possible.\n");
    342357        printUsage = true;
    343358        returnVal = -1;
     
    345360   
    346361    if(printUsage) {
    347         if(!runSilent || returnVal == 2) { // if expressly asked for help or not running silent
     362        // If not running silent print usage.
     363        // OR if expressly asked for help, then it doesn't matter if we're running silent: still print usage to stderr.
     364        if(returnVal == 2 || !runSilent) {         
    348365        printUsage();
    349366        }
     
    352369   
    353370    try {
    354         MaoriDetector maoriTextDetector = null;
     371        MaoriTextDetector maoriTextDetector = null;
    355372        if(minConfidence == -1) {
    356         maoriTextDetector = new MaoriDetector(runSilent);
     373        maoriTextDetector = new MaoriTextDetector(runSilent);
    357374        } else {
    358         maoriTextDetector = new MaoriDetector(runSilent, minConfidence);
    359         }
    360        
    361         //boolean textIsInMaori = maoriTextDetector.isTextInMaori(TEST_MRI_INPUT_TEXT);
     375        maoriTextDetector = new MaoriTextDetector(runSilent, minConfidence);
     376        }
     377       
     378        //boolean textIsInMaori = maoriTextDetector.isTextInMaori(TEST_MRI_INPUT_TEXT); // test hardcoded string
    362379        boolean textIsInMaori = false;
    363380       
    364381        // Using try with resources, https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
    365382        if(inFile != null) {
    366         System.err.println("Reading text from file " + inFile.getPath());
     383        doPrint(runSilent, "Reading text from file " + inFile.getPath());
    367384        try (BufferedReader reader = new BufferedReader(new FileReader(inFile))) {
    368385            textIsInMaori = maoriTextDetector.isTextInMaori(reader);
     
    370387        }
    371388        else if (readFromStdIn) {
    372         System.err.println("Waiting to read text from STDIN... (press Ctrl-D when done entering text)>");
     389        doPrint(runSilent, "Waiting to read text from STDIN... (press Ctrl-D when done entering text)>");
    373390        try (BufferedReader reader = new BufferedReader(new InputStreamReader(System.in))) {
    374391            textIsInMaori = maoriTextDetector.isTextInMaori(reader);           
     
    386403       
    387404    } finally {
    388         System.err.println("Exitting program with returnVal " + returnVal + "...\n");
     405        doPrint(runSilent, "Exitting program with returnVal " + returnVal + "...\n");
    389406        System.exit(returnVal);
    390407    }
    391408    }
    392 
    393     // test hardcoded string
    394     public static void oldMain(String args[]) {
    395     int returnVal = -1;
    396     boolean silentMode = false;
    397    
    398     try {
    399         MaoriDetector maoriTextDetector = new MaoriDetector(silentMode);
    400        
    401         boolean textIsInMaori = maoriTextDetector.isTextInMaori(TEST_MRI_INPUT_TEXT);
    402         if(textIsInMaori) {
    403         returnVal = 0;
    404         } else {
    405         returnVal = 1;
    406         }
    407        
    408     } catch(Exception e) {
    409         e.printStackTrace();
    410     } finally {
    411         System.err.println("Exitting program with returnVal " + returnVal + "...\n");
    412         System.exit(returnVal);
    413     }
    414     }
    415 
    416409   
    417410}
Note: See TracChangeset for help on using the changeset viewer.