Changeset 33651 for other-projects/maori-lang-detection/src/org/greenstone/atea/TextLanguageDetector.java
- Timestamp:
- 2019-11-12T18:11:39+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/maori-lang-detection/src/org/greenstone/atea/TextLanguageDetector.java
r33634 r33651 184 184 for(int i = 1; i < sentences.length; i++) { 185 185 // glue every two adjacent sentences together 186 String sentence = sentences[i-1];186 String doubleSentence = sentences[i-1]; 187 187 188 188 String separator = ". "; 189 189 // if the sentence already ends with a terminating punctuation character, 190 190 // then separator is just a space 191 sentence = sentence.trim();192 if( sentence.endsWith(".") || sentence.endsWith("?") || sentence.endsWith("!")) {191 doubleSentence = doubleSentence.trim(); 192 if(doubleSentence.endsWith(".") || doubleSentence.endsWith("?") || doubleSentence.endsWith("!")) { 193 193 separator = " "; 194 194 } 195 sentence = sentence + separator + sentences[i];195 doubleSentence = doubleSentence + separator + sentences[i]; 196 196 197 197 //System.err.println(sentence); 198 198 199 Language bestLanguage = myCategorizer.predictLanguage( sentence);199 Language bestLanguage = myCategorizer.predictLanguage(doubleSentence); 200 200 double confidence = bestLanguage.getConfidence(); 201 201 202 sentencesList.add(new SentenceInfo(confidence, bestLanguage.getLang(), sentence));202 sentencesList.add(new SentenceInfo(confidence, bestLanguage.getLang(), doubleSentence)); 203 203 } 204 204
Note:
See TracChangeset
for help on using the changeset viewer.