1 | /**
|
---|
2 | * http://opennlp.apache.org/news/model-langdetect-183.html
|
---|
3 | * language detector model: http://opennlp.apache.org/models.html
|
---|
4 | * Use of Apache OpenNLP in general:
|
---|
5 | * http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#intro.api
|
---|
6 | * Use of OpenNLP for language detection:
|
---|
7 | * http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#tools.langdetect
|
---|
8 | *
|
---|
9 | * This code was based on the information and sample code at the above links and the links dispersed throughout this file.
|
---|
10 | */
|
---|
11 |
|
---|
12 | import java.io.*;
|
---|
13 | import opennlp.tools.langdetect.*;
|
---|
14 | import opennlp.tools.util.*;
|
---|
15 |
|
---|
16 | /**
|
---|
17 | * Run as:
|
---|
18 | * wharariki:[115]/Scratch/ak19/openNLP-lang-detect/src>javac -cp ".:$OPENNLP_HOME/lib/*" MaoriDetector.java
|
---|
19 | * wharariki:[116]/Scratch/ak19/openNLP-lang-detect/src>java -cp ".:$OPENNLP_HOME/lib/*" MaoriDetector
|
---|
20 | *
|
---|
21 | * https://stackoverflow.com/questions/219585/including-all-the-jars-in-a-directory-within-the-java-classpath
|
---|
22 | * Also has information on how to run this class if it's in a Java package.
|
---|
23 | */
|
---|
24 | public class MaoriDetector {
|
---|
25 |
|
---|
26 | /**
|
---|
27 | * Taken from our university website
|
---|
28 | * https://www.waikato.ac.nz/maori/
|
---|
29 | */
|
---|
30 | public static final String TEST_INPUT_TEXT = "Ko tÄnei te Whare WÄnanga o Waikato e whakatau nei i ngÄ iwi o te ao, ki roto i te riu o te awa e rere nei, ki runga i te whenua e hora nei, ki raro i te taumaru o ngÄ maunga whakaruru e tau awhi nei.";
|
---|
31 |
|
---|
32 | public static void main(String args[]) {
|
---|
33 | // http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#intro.api
|
---|
34 | // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
|
---|
35 | try (InputStream modelIn = new FileInputStream("/Scratch/ak19/openNLP-lang-detect/langdetect-183.bin")) {
|
---|
36 |
|
---|
37 | LanguageDetectorModel model = new LanguageDetectorModel(modelIn);
|
---|
38 |
|
---|
39 | // http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#tools.langdetect
|
---|
40 | LanguageDetector myCategorizer = new LanguageDetectorME(model);
|
---|
41 |
|
---|
42 | // Get the most probable language
|
---|
43 | Language bestLanguage = myCategorizer.predictLanguage(TEST_INPUT_TEXT);
|
---|
44 | System.out.println("Best language: " + bestLanguage.getLang());
|
---|
45 | System.out.println("Best language confidence: " + bestLanguage.getConfidence());
|
---|
46 |
|
---|
47 |
|
---|
48 | // Get an array with the most probable languages
|
---|
49 | Language[] languages = myCategorizer.predictLanguages(TEST_INPUT_TEXT);
|
---|
50 | /*
|
---|
51 | if(languages == null || languages.length <= 0) {
|
---|
52 | System.err.println("No languages predicted for the input text");
|
---|
53 | } else {
|
---|
54 | for(int i = 0; i < languages.length; i++) {
|
---|
55 | System.out.println("Language prediction " + i + ": " + languages[i]);
|
---|
56 | }
|
---|
57 | }*/
|
---|
58 |
|
---|
59 | } catch(Exception e) {
|
---|
60 | e.printStackTrace();
|
---|
61 | }
|
---|
62 |
|
---|
63 | System.err.println("Exitting program...\n");
|
---|
64 | System.exit(0);
|
---|
65 | }
|
---|
66 | }
|
---|