source: other-projects/the-macronizer/trunk/src/java/monogram/restorer/MonogramRestorer.java@ 30062

Last change on this file since 30062 was 30062, checked in by davidb, 9 years ago

Removal/Tidy-up of debug statements

File size: 7.8 KB
Line 
1/*
2 * This class dose all the hard work. I will look up the input token in the MonogramModel and return.
3 */
4package monogram.restorer;
5
6import monogram.model.MonogramFactory;
7import monogram.model.MonogramModel;
8import monogram.model.ListModel;
9import util.StringUtil;
10import web.servlets.DirectInput;
11
12/**
13* @author University of Waikato - Te Whare Wānanga o Waikato
14 * @version 1.0
15 * @since 2014-11-20
16 */
17public class MonogramRestorer {
18
19 private boolean preserveMacrons;
20 private MonogramModel macronModel;
21 private MonogramModel doubleVowelModel;
22 private ListModel blackListModel;
23
24 private String previousToken;
25 private String currentToken;
26 private Selector selector;
27
28 public MonogramRestorer(boolean preserveMacrons) {
29 this.preserveMacrons = preserveMacrons;
30 macronModel = MonogramFactory.getMacronModel();
31 doubleVowelModel = MonogramFactory.getDoubleVowelModel();
32 blackListModel = MonogramFactory.getBlackList();
33 previousToken = "";
34 selector = new Selector(3);
35 }
36
37 public String restore(String token) {
38
39 // Test to see if it should preserve the macrons already in the input texts. If not remove all macrons.
40 if (!preserveMacrons && StringUtil.containsAccents(token)) {
41 token = StringUtil.removeAccents(token);
42 }
43
44
45 // Make lower case copy of token.
46 final String tokenLowerCase = token.toLowerCase();
47
48 String restoredToken = token;
49 //If the word is in the black list return the token with no macron.
50if(blackListModel.contains(token)){return restoredToken;}
51 // Is selector > 0 it is on a sequence
52 if (selector.isMacronTokenSequence()) {
53 if (restoreByMacronModel(tokenLowerCase)) {
54
55 //Restore capitalization
56 restoredToken = StringUtil.copyCapitalization(token, currentToken);
57 // If restoredToken is not the same as token then macron/s have been add so add <mark> html. This is the yellow background around the words on the front end.
58 if(!restoredToken.equals(token)){
59 // only add <mark> html tag if it directinput and not being output to a file.
60 if(DirectInput.DI){restoredToken = "<mark>"+restoredToken+"</mark> ";}
61 }
62
63 //if the tonken can not be restored by the MacronModel try and restor it with the DoubleVowelModel.
64 } else if (restoreByDoubleVowelModel(tokenLowerCase)) {
65 restoredToken = StringUtil.copyDVowelCapitalization(token, currentToken);
66 } else {
67 currentToken = tokenLowerCase;
68 }
69 }
70 // else if it is not in a sequence
71 else {
72 // Try and restor it with the DoubleVowelModel
73 if (restoreByDoubleVowelModel(tokenLowerCase)) {
74 restoredToken = StringUtil.copyDVowelCapitalization(token, currentToken);
75 }
76 // try and restor it with the MacronModel
77 else if (restoreByMacronModel(tokenLowerCase)) {
78 restoredToken = StringUtil.copyCapitalization(token, currentToken);
79 }
80 // Dose not need to be restored
81 else {
82 currentToken = tokenLowerCase;
83 }
84 }
85 previousToken = currentToken;
86 return restoredToken;
87 }
88
89 // trys to restore by using the MacronModel and return boolean if it has.
90 private boolean restoreByMacronModel(String token) {
91
92 // Test to see if token is in the Distinct Transformation hashset.
93 if (macronModel.isDistinctTransformation(token)) {
94 //if token is in Distinct Transformation hashset make the distinct ransformation then move along the sequence.
95 currentToken = macronModel.getDistinctTransformation(token);
96 selector.incrementMacronTokenSequence();
97 return true;
98 }
99 // If token is in Indistinct Transformation hashset
100 else if (macronModel.isIndistinctTransformation(token)) {
101 final String[] transformations = macronModel.getIndistinctTransformation(token);
102 double maxProbability = Double.MIN_VALUE;
103 String maxToken = token;
104 // loops all transformation and finds the one with the highest probability given the previous token.
105 for (String transformation : transformations) {
106 //double probability = macronModel.getIndistinctProbability(transformation);
107 double probability = 1.0;
108 //if macron model has a probability for this transformation then get its probability.
109 if (macronModel.containsMonogramProbability(transformation, previousToken)) {
110 probability *= macronModel.getMonogramProbability(transformation, previousToken);
111 }
112
113 // if transformation not in macron model then try and find probability in Indistinct probability
114 if (probability == 1.0) {
115 probability = macronModel.getIndistinctProbability(transformation);
116 }
117
118 //If probability is better then any before it update maxProbability and maxToken.
119 if (probability >= maxProbability) {
120 maxProbability = probability;
121 maxToken = transformation;
122 }
123 }
124 currentToken = maxToken;
125 selector.incrementMacronTokenSequence();
126 return true;
127 }
128 return false;
129 }
130
131 private boolean restoreByDoubleVowelModel(String token) {
132 if (doubleVowelModel.isDistinctTransformation(token)) {
133 try {
134 currentToken = doubleVowelModel.getDistinctTransformation(token);
135 selector.incrementDoubleVowelTokenSequence();
136 return true;
137 } catch (Exception e) {
138 System.err.println("double vowel error 1");
139 System.exit(0);
140 }
141 } else if (doubleVowelModel.isIndistinctTransformation(token)) {
142 String[] transformations = doubleVowelModel.getIndistinctTransformation(token);
143 Double maxProbability = Double.MIN_VALUE;
144 String maxToken = token;
145 for (String transformation : transformations) {
146 //double probability = doubleVowelModel.getIndistinctProbability(transformation);
147
148 double probability = 1.0;
149 if (doubleVowelModel.containsMonogramProbability(transformation, previousToken)) {
150 probability *= doubleVowelModel.getMonogramProbability(transformation, previousToken);
151 }
152 if (probability == 1.0) {
153 probability = doubleVowelModel.getIndistinctProbability(transformation);
154 }
155 if (probability >= maxProbability) {
156 maxProbability = probability;
157 maxToken = transformation;
158 }
159 }
160 currentToken = maxToken;
161 selector.incrementDoubleVowelTokenSequence();
162 return true;
163 }
164 return false;
165 }
166
167 private class Selector {
168
169 private int max;
170 private int current;
171
172 public Selector(int max) {
173 this.max = max;
174 current = max;
175 }
176
177 public void incrementMacronTokenSequence() {
178 if (current < max) {
179 current++;
180 }
181 }
182
183 public void incrementDoubleVowelTokenSequence() {
184 if (current > 0) {
185 current--;
186 }
187 }
188
189 public boolean isMacronTokenSequence() {
190 return current > 0;
191 }
192
193 public boolean isDoubleVowelTokenSequence() {
194 return current == 0;
195 }
196 }
197}
Note: See TracBrowser for help on using the repository browser.