source: gs3-extensions/maori-lang-detection/src/MaoriDetector.java@ 33335

Last change on this file since 33335 was 33335, checked in by ak19, 5 years ago

First java file for Māori language detection using openNLP with the LanguageDetectionModel. Instructions and reading links are within the README.txt. Though maybe I don't need to commit apache's openNLP binary zip and the LanguageDetectionModel.bin (actually a zip) file, I'm including that too. Near the end of the README.txt instructions, it covers the steps on how to compile and run the new Java file called MaoriDetector.java. At present, this rudimentary class takes a hardcoded 2 line sentence in Māori taken from our uni website as input and correctly choose mri (3 letter lang code for Māori) as the best predicted language it detected, at over 60%.

File size: 2.7 KB
Line 
1/**
2 * http://opennlp.apache.org/news/model-langdetect-183.html
3 * language detector model: http://opennlp.apache.org/models.html
4 * Use of Apache OpenNLP in general:
5 * http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#intro.api
6 * Use of OpenNLP for language detection:
7 * http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#tools.langdetect
8 *
9 * This code was based on the information and sample code at the above links and the links dispersed throughout this file.
10 */
11
12import java.io.*;
13import opennlp.tools.langdetect.*;
14import opennlp.tools.util.*;
15
16/**
17 * Run as:
18 * wharariki:[115]/Scratch/ak19/openNLP-lang-detect/src>javac -cp ".:$OPENNLP_HOME/lib/*" MaoriDetector.java
19 * wharariki:[116]/Scratch/ak19/openNLP-lang-detect/src>java -cp ".:$OPENNLP_HOME/lib/*" MaoriDetector
20 *
21 * https://stackoverflow.com/questions/219585/including-all-the-jars-in-a-directory-within-the-java-classpath
22 * Also has information on how to run this class if it's in a Java package.
23 */
24public class MaoriDetector {
25
26 /**
27 * Taken from our university website
28 * https://www.waikato.ac.nz/maori/
29 */
30 public static final String TEST_INPUT_TEXT = "Ko tēnei te Whare Wānanga o Waikato e whakatau nei i ngā iwi o te ao, ki roto i te riu o te awa e rere nei, ki runga i te whenua e hora nei, ki raro i te taumaru o ngā maunga whakaruru e tau awhi nei.";
31
32 public static void main(String args[]) {
33 // http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#intro.api
34 // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
35 try (InputStream modelIn = new FileInputStream("/Scratch/ak19/openNLP-lang-detect/langdetect-183.bin")) {
36
37 LanguageDetectorModel model = new LanguageDetectorModel(modelIn);
38
39 // http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#tools.langdetect
40 LanguageDetector myCategorizer = new LanguageDetectorME(model);
41
42 // Get the most probable language
43 Language bestLanguage = myCategorizer.predictLanguage(TEST_INPUT_TEXT);
44 System.out.println("Best language: " + bestLanguage.getLang());
45 System.out.println("Best language confidence: " + bestLanguage.getConfidence());
46
47
48 // Get an array with the most probable languages
49 Language[] languages = myCategorizer.predictLanguages(TEST_INPUT_TEXT);
50 /*
51 if(languages == null || languages.length <= 0) {
52 System.err.println("No languages predicted for the input text");
53 } else {
54 for(int i = 0; i < languages.length; i++) {
55 System.out.println("Language prediction " + i + ": " + languages[i]);
56 }
57 }*/
58
59 } catch(Exception e) {
60 e.printStackTrace();
61 }
62
63 System.err.println("Exitting program...\n");
64 System.exit(0);
65 }
66}
Note: See TracBrowser for help on using the repository browser.