Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: other-projects/the-macronizer/trunk/src/java/monogram/restorer/MonogramRestorer.java@ 29855

Last change on this file since 29855 was 29855, checked in by davidb, 9 years ago
John's code after refactoring by Tom over the summer of 2014/2015
File size: 8.0 KB

Line
1	/*
2	* This class dose all the hard work. I will look up the input token in the MonogramModel and return.
3	*/
4	package monogram.restorer;
5
6	import monogram.model.MonogramFactory;
7	import monogram.model.MonogramModel;
8	import monogram.model.ListModel;
9	import util.StringUtil;
10	import web.servlets.DirectInput;
11
12	/**
13	* @author University of Waikato - Te Whare WÄnanga o Waikato
14	* @version 1.0
15	* @since 2014-11-20
16	*/
17	public class MonogramRestorer {
18
19	private boolean preserveMacrons;
20	private MonogramModel macronModel;
21	private MonogramModel doubleVowelModel;
22	private ListModel blackListModel;
23
24	private String previousToken;
25	private String currentToken;
26	private Selector selector;
27
28	public MonogramRestorer(boolean preserveMacrons) {
29	System.out.println("test 9");
30	this.preserveMacrons = preserveMacrons;
31	System.out.println("test 10");
32	macronModel = MonogramFactory.getMacronModel();
33	System.out.println("test 11");
34	doubleVowelModel = MonogramFactory.getDoubleVowelModel();
35	System.out.println("test 12");
36	blackListModel = MonogramFactory.getBlackList();
37	previousToken = "";
38	selector = new Selector(3);
39	System.out.println("test done3");
40	}
41
42	public String restore(String token) {
43
44	// Test to see if it should preserve the macrons already in the input texts. If not remove all macrons.
45	if (!preserveMacrons && StringUtil.containsAccents(token)) {
46	token = StringUtil.removeAccents(token);
47	}
48
49
50	// Make lower case copy of token.
51	final String tokenLowerCase = token.toLowerCase();
52
53	String restoredToken = token;
54	//If the word is in the black list return the token with no macron.
55	if(blackListModel.contains(token)){return restoredToken;}
56	// Is selector > 0 it is on a sequence
57	if (selector.isMacronTokenSequence()) {
58	if (restoreByMacronModel(tokenLowerCase)) {
59
60	//Restore capitalization
61	restoredToken = StringUtil.copyCapitalization(token, currentToken);
62	// If restoredToken is not the same as token then macron/s have been add so add <mark> html. This is the yellow background around the words on the front end.
63	if(!restoredToken.equals(token)){
64	// only add <mark> html tag if it directinput and not being output to a file.
65	if(DirectInput.DI){restoredToken = "<mark>"+restoredToken+"</mark> ";}
66	}
67
68	//if the tonken can not be restored by the MacronModel try and restor it with the DoubleVowelModel.
69	} else if (restoreByDoubleVowelModel(tokenLowerCase)) {
70	restoredToken = StringUtil.copyDVowelCapitalization(token, currentToken);
71	} else {
72	currentToken = tokenLowerCase;
73	}
74	}
75	// else if it is not in a sequence
76	else {
77	// Try and restor it with the DoubleVowelModel
78	if (restoreByDoubleVowelModel(tokenLowerCase)) {
79	restoredToken = StringUtil.copyDVowelCapitalization(token, currentToken);
80	}
81	// try and restor it with the MacronModel
82	else if (restoreByMacronModel(tokenLowerCase)) {
83	restoredToken = StringUtil.copyCapitalization(token, currentToken);
84	}
85	// Dose not need to be restored
86	else {
87	currentToken = tokenLowerCase;
88	}
89	}
90	previousToken = currentToken;
91	return restoredToken;
92	}
93
94	// trys to restore by using the MacronModel and return boolean if it has.
95	private boolean restoreByMacronModel(String token) {
96
97	// Test to see if token is in the Distinct Transformation hashset.
98	if (macronModel.isDistinctTransformation(token)) {
99	//if token is in Distinct Transformation hashset make the distinct ransformation then move along the sequence.
100	currentToken = macronModel.getDistinctTransformation(token);
101	selector.incrementMacronTokenSequence();
102	return true;
103	}
104	// If token is in Indistinct Transformation hashset
105	else if (macronModel.isIndistinctTransformation(token)) {
106	final String[] transformations = macronModel.getIndistinctTransformation(token);
107	double maxProbability = Double.MIN_VALUE;
108	String maxToken = token;
109	// loops all transformation and finds the one with the highest probability given the previous token.
110	for (String transformation : transformations) {
111	//double probability = macronModel.getIndistinctProbability(transformation);
112	double probability = 1.0;
113	//if macron model has a probability for this transformation then get its probability.
114	if (macronModel.containsMonogramProbability(transformation, previousToken)) {
115	probability *= macronModel.getMonogramProbability(transformation, previousToken);
116	}
117
118	// if transformation not in macron model then try and find probability in Indistinct probability
119	if (probability == 1.0) {
120	probability = macronModel.getIndistinctProbability(transformation);
121	}
122
123	//If probability is better then any before it update maxProbability and maxToken.
124	if (probability >= maxProbability) {
125	maxProbability = probability;
126	maxToken = transformation;
127	}
128	}
129	currentToken = maxToken;
130	selector.incrementMacronTokenSequence();
131	return true;
132	}
133	return false;
134	}
135
136	private boolean restoreByDoubleVowelModel(String token) {
137	if (doubleVowelModel.isDistinctTransformation(token)) {
138	try {
139	currentToken = doubleVowelModel.getDistinctTransformation(token);
140	selector.incrementDoubleVowelTokenSequence();
141	return true;
142	} catch (Exception e) {
143	System.out.println("double vowel error 1");
144	System.exit(0);
145	}
146	} else if (doubleVowelModel.isIndistinctTransformation(token)) {
147	String[] transformations = doubleVowelModel.getIndistinctTransformation(token);
148	Double maxProbability = Double.MIN_VALUE;
149	String maxToken = token;
150	for (String transformation : transformations) {
151	//double probability = doubleVowelModel.getIndistinctProbability(transformation);
152
153	double probability = 1.0;
154	if (doubleVowelModel.containsMonogramProbability(transformation, previousToken)) {
155	probability *= doubleVowelModel.getMonogramProbability(transformation, previousToken);
156	}
157	if (probability == 1.0) {
158	probability = doubleVowelModel.getIndistinctProbability(transformation);
159	}
160	if (probability >= maxProbability) {
161	maxProbability = probability;
162	maxToken = transformation;
163	}
164	}
165	currentToken = maxToken;
166	selector.incrementDoubleVowelTokenSequence();
167	return true;
168	}
169	return false;
170	}
171
172	private class Selector {
173
174	private int max;
175	private int current;
176
177	public Selector(int max) {
178	this.max = max;
179	current = max;
180	}
181
182	public void incrementMacronTokenSequence() {
183	if (current < max) {
184	current++;
185	}
186	}
187
188	public void incrementDoubleVowelTokenSequence() {
189	if (current > 0) {
190	current--;
191	}
192	}
193
194	public boolean isMacronTokenSequence() {
195	return current > 0;
196	}
197
198	public boolean isDoubleVowelTokenSequence() {
199	return current == 0;
200	}
201	}
202	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: