Context Navigation

source: other-projects/the-macronizer/trunk/src/java/monogram/restorer/MonogramRestorer.java@ 35719

Last change on this file since 35719 was 35719, checked in by cstephen, 2 years ago
Add support for JSON response to direct input queries. Cleanup other components.
File size: 7.9 KB

Line
1	/*
2	* This class dose all the hard work. I will look up the input token in the MonogramModel and return.
3	*/
4	package monogram.restorer;
5
6	import monogram.model.MonogramFactory;
7	import monogram.model.MonogramModel;
8	import monogram.model.ListModel;
9	import util.StringUtil;
10
11	/**
12	* @author University of Waikato - Te Whare WÄnanga o Waikato
13	* @version 1.0
14	* @since 2014-11-20
15	*/
16	public class MonogramRestorer {
17
18	private boolean preserveMacrons;
19	private MonogramModel macronModel;
20	private MonogramModel doubleVowelModel;
21	private ListModel blackListModel;
22
23	private String previousToken;
24	private String currentToken;
25	private Selector selector;
26
27	public MonogramRestorer(boolean preserveMacrons) {
28	this.preserveMacrons = preserveMacrons;
29	macronModel = MonogramFactory.getMacronModel();
30	doubleVowelModel = MonogramFactory.getDoubleVowelModel();
31	blackListModel = MonogramFactory.getBlackList();
32	previousToken = "";
33	selector = new Selector(3);
34	}
35
36	public String restore(String token, Boolean markupChangedWords) {
37	// Test to see if it should preserve the macrons already in the input texts. If
38	// not remove all macrons.
39	if (!preserveMacrons && StringUtil.containsAccents(token)) {
40	token = StringUtil.removeAccents(token);
41	}
42
43	// Make lower case copy of token.
44	final String tokenLowerCase = token.toLowerCase();
45
46	String restoredToken = token;
47	// If the word is in the black list return the token with no macron.
48	if (blackListModel.contains(token)) {
49	return restoredToken;
50	}
51	// Is selector > 0 it is on a sequence
52	if (selector.isMacronTokenSequence()) {
53	if (restoreByMacronModel(tokenLowerCase)) {
54	// Restore capitalization
55	restoredToken = StringUtil.copyCapitalization(token, currentToken);
56	// If restoredToken is not the same as token then macron/s have been add so add
57	// <mark> html. This is the yellow background around the words on the front end.
58	if (!restoredToken.equals(token)) {
59	// only add <mark> html tag if it directinput and not being output to a file.
60	if (markupChangedWords) {
61	restoredToken = "<mark>" + restoredToken + "</mark> ";
62	}
63	}
64
65	// if the tonken can not be restored by the MacronModel try and restor it with
66	// the DoubleVowelModel.
67	} else if (restoreByDoubleVowelModel(tokenLowerCase)) {
68	restoredToken = StringUtil.copyDVowelCapitalization(token, currentToken);
69	} else {
70	currentToken = tokenLowerCase;
71	}
72	}
73	// else if it is not in a sequence
74	else {
75	// Try and restor it with the DoubleVowelModel
76	if (restoreByDoubleVowelModel(tokenLowerCase)) {
77	restoredToken = StringUtil.copyDVowelCapitalization(token, currentToken);
78	}
79	// try and restor it with the MacronModel
80	else if (restoreByMacronModel(tokenLowerCase)) {
81	restoredToken = StringUtil.copyCapitalization(token, currentToken);
82	}
83	// Dose not need to be restored
84	else {
85	currentToken = tokenLowerCase;
86	}
87	}
88	previousToken = currentToken;
89	return restoredToken;
90	}
91
92	// trys to restore by using the MacronModel and return boolean if it has.
93	private boolean restoreByMacronModel(String token) {
94
95	// Test to see if token is in the Distinct Transformation hashset.
96	if (macronModel.isDistinctTransformation(token)) {
97	// if token is in Distinct Transformation hashset make the distinct
98	// ransformation then move along the sequence.
99	currentToken = macronModel.getDistinctTransformation(token);
100	selector.incrementMacronTokenSequence();
101	return true;
102	}
103	// If token is in Indistinct Transformation hashset
104	else if (macronModel.isIndistinctTransformation(token)) {
105	final String[] transformations = macronModel.getIndistinctTransformation(token);
106	double maxProbability = Double.MIN_VALUE;
107	String maxToken = token;
108	// loops all transformation and finds the one with the highest probability given
109	// the previous token.
110	for (String transformation : transformations) {
111	// double probability = macronModel.getIndistinctProbability(transformation);
112	double probability = 1.0;
113	// if macron model has a probability for this transformation then get its
114	// probability.
115	if (macronModel.containsMonogramProbability(transformation, previousToken)) {
116	probability *= macronModel.getMonogramProbability(transformation, previousToken);
117	}
118
119	// if transformation not in macron model then try and find probability in
120	// Indistinct probability
121	if (probability == 1.0) {
122	probability = macronModel.getIndistinctProbability(transformation);
123	}
124
125	// If probability is better then any before it update maxProbability and
126	// maxToken.
127	if (probability >= maxProbability) {
128	maxProbability = probability;
129	maxToken = transformation;
130	}
131	}
132	currentToken = maxToken;
133	selector.incrementMacronTokenSequence();
134	return true;
135	}
136	return false;
137	}
138
139	private boolean restoreByDoubleVowelModel(String token) {
140	if (doubleVowelModel.isDistinctTransformation(token)) {
141	try {
142	currentToken = doubleVowelModel.getDistinctTransformation(token);
143	selector.incrementDoubleVowelTokenSequence();
144	return true;
145	} catch (Exception e) {
146	System.err.println("double vowel error 1");
147	System.exit(0);
148	}
149	} else if (doubleVowelModel.isIndistinctTransformation(token)) {
150	String[] transformations = doubleVowelModel.getIndistinctTransformation(token);
151	Double maxProbability = Double.MIN_VALUE;
152	String maxToken = token;
153	for (String transformation : transformations) {
154	// double probability =
155	// doubleVowelModel.getIndistinctProbability(transformation);
156
157	double probability = 1.0;
158	if (doubleVowelModel.containsMonogramProbability(transformation, previousToken)) {
159	probability *= doubleVowelModel.getMonogramProbability(transformation, previousToken);
160	}
161	if (probability == 1.0) {
162	probability = doubleVowelModel.getIndistinctProbability(transformation);
163	}
164	if (probability >= maxProbability) {
165	maxProbability = probability;
166	maxToken = transformation;
167	}
168	}
169	currentToken = maxToken;
170	selector.incrementDoubleVowelTokenSequence();
171	return true;
172	}
173	return false;
174	}
175
176	private class Selector {
177
178	private int max;
179	private int current;
180
181	public Selector(int max) {
182	this.max = max;
183	current = max;
184	}
185
186	public void incrementMacronTokenSequence() {
187	if (current < max) {
188	current++;
189	}
190	}
191
192	public void incrementDoubleVowelTokenSequence() {
193	if (current > 0) {
194	current--;
195	}
196	}
197
198	public boolean isMacronTokenSequence() {
199	return current > 0;
200	}
201
202	// public boolean isDoubleVowelTokenSequence() {
203	// return current == 0;
204	// }
205	}
206	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: