source: other-projects/the-macronizer/trunk/src/java/monogram/restorer/MonogramRestorer.java@ 35719

Last change on this file since 35719 was 35719, checked in by cstephen, 2 years ago

Add support for JSON response to direct input queries. Cleanup other components.

File size: 7.9 KB
Line 
1/*
2 * This class dose all the hard work. I will look up the input token in the MonogramModel and return.
3 */
4package monogram.restorer;
5
6import monogram.model.MonogramFactory;
7import monogram.model.MonogramModel;
8import monogram.model.ListModel;
9import util.StringUtil;
10
11/**
12 * @author University of Waikato - Te Whare Wānanga o Waikato
13 * @version 1.0
14 * @since 2014-11-20
15 */
16public class MonogramRestorer {
17
18 private boolean preserveMacrons;
19 private MonogramModel macronModel;
20 private MonogramModel doubleVowelModel;
21 private ListModel blackListModel;
22
23 private String previousToken;
24 private String currentToken;
25 private Selector selector;
26
27 public MonogramRestorer(boolean preserveMacrons) {
28 this.preserveMacrons = preserveMacrons;
29 macronModel = MonogramFactory.getMacronModel();
30 doubleVowelModel = MonogramFactory.getDoubleVowelModel();
31 blackListModel = MonogramFactory.getBlackList();
32 previousToken = "";
33 selector = new Selector(3);
34 }
35
36 public String restore(String token, Boolean markupChangedWords) {
37 // Test to see if it should preserve the macrons already in the input texts. If
38 // not remove all macrons.
39 if (!preserveMacrons && StringUtil.containsAccents(token)) {
40 token = StringUtil.removeAccents(token);
41 }
42
43 // Make lower case copy of token.
44 final String tokenLowerCase = token.toLowerCase();
45
46 String restoredToken = token;
47 // If the word is in the black list return the token with no macron.
48 if (blackListModel.contains(token)) {
49 return restoredToken;
50 }
51 // Is selector > 0 it is on a sequence
52 if (selector.isMacronTokenSequence()) {
53 if (restoreByMacronModel(tokenLowerCase)) {
54 // Restore capitalization
55 restoredToken = StringUtil.copyCapitalization(token, currentToken);
56 // If restoredToken is not the same as token then macron/s have been add so add
57 // <mark> html. This is the yellow background around the words on the front end.
58 if (!restoredToken.equals(token)) {
59 // only add <mark> html tag if it directinput and not being output to a file.
60 if (markupChangedWords) {
61 restoredToken = "<mark>" + restoredToken + "</mark> ";
62 }
63 }
64
65 // if the tonken can not be restored by the MacronModel try and restor it with
66 // the DoubleVowelModel.
67 } else if (restoreByDoubleVowelModel(tokenLowerCase)) {
68 restoredToken = StringUtil.copyDVowelCapitalization(token, currentToken);
69 } else {
70 currentToken = tokenLowerCase;
71 }
72 }
73 // else if it is not in a sequence
74 else {
75 // Try and restor it with the DoubleVowelModel
76 if (restoreByDoubleVowelModel(tokenLowerCase)) {
77 restoredToken = StringUtil.copyDVowelCapitalization(token, currentToken);
78 }
79 // try and restor it with the MacronModel
80 else if (restoreByMacronModel(tokenLowerCase)) {
81 restoredToken = StringUtil.copyCapitalization(token, currentToken);
82 }
83 // Dose not need to be restored
84 else {
85 currentToken = tokenLowerCase;
86 }
87 }
88 previousToken = currentToken;
89 return restoredToken;
90 }
91
92 // trys to restore by using the MacronModel and return boolean if it has.
93 private boolean restoreByMacronModel(String token) {
94
95 // Test to see if token is in the Distinct Transformation hashset.
96 if (macronModel.isDistinctTransformation(token)) {
97 // if token is in Distinct Transformation hashset make the distinct
98 // ransformation then move along the sequence.
99 currentToken = macronModel.getDistinctTransformation(token);
100 selector.incrementMacronTokenSequence();
101 return true;
102 }
103 // If token is in Indistinct Transformation hashset
104 else if (macronModel.isIndistinctTransformation(token)) {
105 final String[] transformations = macronModel.getIndistinctTransformation(token);
106 double maxProbability = Double.MIN_VALUE;
107 String maxToken = token;
108 // loops all transformation and finds the one with the highest probability given
109 // the previous token.
110 for (String transformation : transformations) {
111 // double probability = macronModel.getIndistinctProbability(transformation);
112 double probability = 1.0;
113 // if macron model has a probability for this transformation then get its
114 // probability.
115 if (macronModel.containsMonogramProbability(transformation, previousToken)) {
116 probability *= macronModel.getMonogramProbability(transformation, previousToken);
117 }
118
119 // if transformation not in macron model then try and find probability in
120 // Indistinct probability
121 if (probability == 1.0) {
122 probability = macronModel.getIndistinctProbability(transformation);
123 }
124
125 // If probability is better then any before it update maxProbability and
126 // maxToken.
127 if (probability >= maxProbability) {
128 maxProbability = probability;
129 maxToken = transformation;
130 }
131 }
132 currentToken = maxToken;
133 selector.incrementMacronTokenSequence();
134 return true;
135 }
136 return false;
137 }
138
139 private boolean restoreByDoubleVowelModel(String token) {
140 if (doubleVowelModel.isDistinctTransformation(token)) {
141 try {
142 currentToken = doubleVowelModel.getDistinctTransformation(token);
143 selector.incrementDoubleVowelTokenSequence();
144 return true;
145 } catch (Exception e) {
146 System.err.println("double vowel error 1");
147 System.exit(0);
148 }
149 } else if (doubleVowelModel.isIndistinctTransformation(token)) {
150 String[] transformations = doubleVowelModel.getIndistinctTransformation(token);
151 Double maxProbability = Double.MIN_VALUE;
152 String maxToken = token;
153 for (String transformation : transformations) {
154 // double probability =
155 // doubleVowelModel.getIndistinctProbability(transformation);
156
157 double probability = 1.0;
158 if (doubleVowelModel.containsMonogramProbability(transformation, previousToken)) {
159 probability *= doubleVowelModel.getMonogramProbability(transformation, previousToken);
160 }
161 if (probability == 1.0) {
162 probability = doubleVowelModel.getIndistinctProbability(transformation);
163 }
164 if (probability >= maxProbability) {
165 maxProbability = probability;
166 maxToken = transformation;
167 }
168 }
169 currentToken = maxToken;
170 selector.incrementDoubleVowelTokenSequence();
171 return true;
172 }
173 return false;
174 }
175
176 private class Selector {
177
178 private int max;
179 private int current;
180
181 public Selector(int max) {
182 this.max = max;
183 current = max;
184 }
185
186 public void incrementMacronTokenSequence() {
187 if (current < max) {
188 current++;
189 }
190 }
191
192 public void incrementDoubleVowelTokenSequence() {
193 if (current > 0) {
194 current--;
195 }
196 }
197
198 public boolean isMacronTokenSequence() {
199 return current > 0;
200 }
201
202 // public boolean isDoubleVowelTokenSequence() {
203 // return current == 0;
204 // }
205 }
206}
Note: See TracBrowser for help on using the repository browser.