1 | /*
|
---|
2 | * This class dose all the hard work. I will look up the input token in the MonogramModel and return.
|
---|
3 | */
|
---|
4 | package monogram.restorer;
|
---|
5 |
|
---|
6 | import monogram.model.MonogramFactory;
|
---|
7 | import monogram.model.MonogramModel;
|
---|
8 | import monogram.model.ListModel;
|
---|
9 | import util.StringUtil;
|
---|
10 | import web.servlets.DirectInput;
|
---|
11 |
|
---|
12 | /**
|
---|
13 | * @author University of Waikato - Te Whare WÄnanga o Waikato
|
---|
14 | * @version 1.0
|
---|
15 | * @since 2014-11-20
|
---|
16 | */
|
---|
17 | public class MonogramRestorer {
|
---|
18 |
|
---|
19 | private boolean preserveMacrons;
|
---|
20 | private MonogramModel macronModel;
|
---|
21 | private MonogramModel doubleVowelModel;
|
---|
22 | private ListModel blackListModel;
|
---|
23 |
|
---|
24 | private String previousToken;
|
---|
25 | private String currentToken;
|
---|
26 | private Selector selector;
|
---|
27 |
|
---|
28 | public MonogramRestorer(boolean preserveMacrons) {
|
---|
29 | System.out.println("test 9");
|
---|
30 | this.preserveMacrons = preserveMacrons;
|
---|
31 | System.out.println("test 10");
|
---|
32 | macronModel = MonogramFactory.getMacronModel();
|
---|
33 | System.out.println("test 11");
|
---|
34 | doubleVowelModel = MonogramFactory.getDoubleVowelModel();
|
---|
35 | System.out.println("test 12");
|
---|
36 | blackListModel = MonogramFactory.getBlackList();
|
---|
37 | previousToken = "";
|
---|
38 | selector = new Selector(3);
|
---|
39 | System.out.println("test done3");
|
---|
40 | }
|
---|
41 |
|
---|
42 | public String restore(String token) {
|
---|
43 |
|
---|
44 | // Test to see if it should preserve the macrons already in the input texts. If not remove all macrons.
|
---|
45 | if (!preserveMacrons && StringUtil.containsAccents(token)) {
|
---|
46 | token = StringUtil.removeAccents(token);
|
---|
47 | }
|
---|
48 |
|
---|
49 |
|
---|
50 | // Make lower case copy of token.
|
---|
51 | final String tokenLowerCase = token.toLowerCase();
|
---|
52 |
|
---|
53 | String restoredToken = token;
|
---|
54 | //If the word is in the black list return the token with no macron.
|
---|
55 | if(blackListModel.contains(token)){return restoredToken;}
|
---|
56 | // Is selector > 0 it is on a sequence
|
---|
57 | if (selector.isMacronTokenSequence()) {
|
---|
58 | if (restoreByMacronModel(tokenLowerCase)) {
|
---|
59 |
|
---|
60 | //Restore capitalization
|
---|
61 | restoredToken = StringUtil.copyCapitalization(token, currentToken);
|
---|
62 | // If restoredToken is not the same as token then macron/s have been add so add <mark> html. This is the yellow background around the words on the front end.
|
---|
63 | if(!restoredToken.equals(token)){
|
---|
64 | // only add <mark> html tag if it directinput and not being output to a file.
|
---|
65 | if(DirectInput.DI){restoredToken = "<mark>"+restoredToken+"</mark> ";}
|
---|
66 | }
|
---|
67 |
|
---|
68 | //if the tonken can not be restored by the MacronModel try and restor it with the DoubleVowelModel.
|
---|
69 | } else if (restoreByDoubleVowelModel(tokenLowerCase)) {
|
---|
70 | restoredToken = StringUtil.copyDVowelCapitalization(token, currentToken);
|
---|
71 | } else {
|
---|
72 | currentToken = tokenLowerCase;
|
---|
73 | }
|
---|
74 | }
|
---|
75 | // else if it is not in a sequence
|
---|
76 | else {
|
---|
77 | // Try and restor it with the DoubleVowelModel
|
---|
78 | if (restoreByDoubleVowelModel(tokenLowerCase)) {
|
---|
79 | restoredToken = StringUtil.copyDVowelCapitalization(token, currentToken);
|
---|
80 | }
|
---|
81 | // try and restor it with the MacronModel
|
---|
82 | else if (restoreByMacronModel(tokenLowerCase)) {
|
---|
83 | restoredToken = StringUtil.copyCapitalization(token, currentToken);
|
---|
84 | }
|
---|
85 | // Dose not need to be restored
|
---|
86 | else {
|
---|
87 | currentToken = tokenLowerCase;
|
---|
88 | }
|
---|
89 | }
|
---|
90 | previousToken = currentToken;
|
---|
91 | return restoredToken;
|
---|
92 | }
|
---|
93 |
|
---|
94 | // trys to restore by using the MacronModel and return boolean if it has.
|
---|
95 | private boolean restoreByMacronModel(String token) {
|
---|
96 |
|
---|
97 | // Test to see if token is in the Distinct Transformation hashset.
|
---|
98 | if (macronModel.isDistinctTransformation(token)) {
|
---|
99 | //if token is in Distinct Transformation hashset make the distinct ransformation then move along the sequence.
|
---|
100 | currentToken = macronModel.getDistinctTransformation(token);
|
---|
101 | selector.incrementMacronTokenSequence();
|
---|
102 | return true;
|
---|
103 | }
|
---|
104 | // If token is in Indistinct Transformation hashset
|
---|
105 | else if (macronModel.isIndistinctTransformation(token)) {
|
---|
106 | final String[] transformations = macronModel.getIndistinctTransformation(token);
|
---|
107 | double maxProbability = Double.MIN_VALUE;
|
---|
108 | String maxToken = token;
|
---|
109 | // loops all transformation and finds the one with the highest probability given the previous token.
|
---|
110 | for (String transformation : transformations) {
|
---|
111 | //double probability = macronModel.getIndistinctProbability(transformation);
|
---|
112 | double probability = 1.0;
|
---|
113 | //if macron model has a probability for this transformation then get its probability.
|
---|
114 | if (macronModel.containsMonogramProbability(transformation, previousToken)) {
|
---|
115 | probability *= macronModel.getMonogramProbability(transformation, previousToken);
|
---|
116 | }
|
---|
117 |
|
---|
118 | // if transformation not in macron model then try and find probability in Indistinct probability
|
---|
119 | if (probability == 1.0) {
|
---|
120 | probability = macronModel.getIndistinctProbability(transformation);
|
---|
121 | }
|
---|
122 |
|
---|
123 | //If probability is better then any before it update maxProbability and maxToken.
|
---|
124 | if (probability >= maxProbability) {
|
---|
125 | maxProbability = probability;
|
---|
126 | maxToken = transformation;
|
---|
127 | }
|
---|
128 | }
|
---|
129 | currentToken = maxToken;
|
---|
130 | selector.incrementMacronTokenSequence();
|
---|
131 | return true;
|
---|
132 | }
|
---|
133 | return false;
|
---|
134 | }
|
---|
135 |
|
---|
136 | private boolean restoreByDoubleVowelModel(String token) {
|
---|
137 | if (doubleVowelModel.isDistinctTransformation(token)) {
|
---|
138 | try {
|
---|
139 | currentToken = doubleVowelModel.getDistinctTransformation(token);
|
---|
140 | selector.incrementDoubleVowelTokenSequence();
|
---|
141 | return true;
|
---|
142 | } catch (Exception e) {
|
---|
143 | System.out.println("double vowel error 1");
|
---|
144 | System.exit(0);
|
---|
145 | }
|
---|
146 | } else if (doubleVowelModel.isIndistinctTransformation(token)) {
|
---|
147 | String[] transformations = doubleVowelModel.getIndistinctTransformation(token);
|
---|
148 | Double maxProbability = Double.MIN_VALUE;
|
---|
149 | String maxToken = token;
|
---|
150 | for (String transformation : transformations) {
|
---|
151 | //double probability = doubleVowelModel.getIndistinctProbability(transformation);
|
---|
152 |
|
---|
153 | double probability = 1.0;
|
---|
154 | if (doubleVowelModel.containsMonogramProbability(transformation, previousToken)) {
|
---|
155 | probability *= doubleVowelModel.getMonogramProbability(transformation, previousToken);
|
---|
156 | }
|
---|
157 | if (probability == 1.0) {
|
---|
158 | probability = doubleVowelModel.getIndistinctProbability(transformation);
|
---|
159 | }
|
---|
160 | if (probability >= maxProbability) {
|
---|
161 | maxProbability = probability;
|
---|
162 | maxToken = transformation;
|
---|
163 | }
|
---|
164 | }
|
---|
165 | currentToken = maxToken;
|
---|
166 | selector.incrementDoubleVowelTokenSequence();
|
---|
167 | return true;
|
---|
168 | }
|
---|
169 | return false;
|
---|
170 | }
|
---|
171 |
|
---|
172 | private class Selector {
|
---|
173 |
|
---|
174 | private int max;
|
---|
175 | private int current;
|
---|
176 |
|
---|
177 | public Selector(int max) {
|
---|
178 | this.max = max;
|
---|
179 | current = max;
|
---|
180 | }
|
---|
181 |
|
---|
182 | public void incrementMacronTokenSequence() {
|
---|
183 | if (current < max) {
|
---|
184 | current++;
|
---|
185 | }
|
---|
186 | }
|
---|
187 |
|
---|
188 | public void incrementDoubleVowelTokenSequence() {
|
---|
189 | if (current > 0) {
|
---|
190 | current--;
|
---|
191 | }
|
---|
192 | }
|
---|
193 |
|
---|
194 | public boolean isMacronTokenSequence() {
|
---|
195 | return current > 0;
|
---|
196 | }
|
---|
197 |
|
---|
198 | public boolean isDoubleVowelTokenSequence() {
|
---|
199 | return current == 0;
|
---|
200 | }
|
---|
201 | }
|
---|
202 | }
|
---|