source: other-projects/the-macronizer/trunk/src/main/java/org/atea/nlptools/macroniser/monogram/model/MonogramModel.java@ 35791

Last change on this file since 35791 was 35791, checked in by cstephen, 3 years ago

Add updated macroniser code. This is a significant change to the codebase:

  • Servlets now send JSON responses that are easier to consume from other services.
  • Error responses are better conveyed and more infomative.
  • Monogram components have been touched up. They now bubble errors up and, where applicable, implement relevant interfaces.
  • The JSP interface has been removed
  • The SQL logging functionality has been deleted. It wasn't used before.
  • Dependencies updated.
File size: 7.9 KB
Line 
1
2package org.atea.nlptools.macroniser.monogram.model;
3
4import java.io.BufferedReader;
5import java.io.IOException;
6import java.io.InputStreamReader;
7import java.io.Reader;
8import java.util.HashMap;
9import java.util.Map;
10
11import org.atea.nlptools.macroniser.util.Pool;
12
13/**
14 *
15 *
16 *
17 * @author University of Waikato - Te Whare Wānanga o Waikato
18 * @version 1.0
19 * @since 2014-11-20
20 */
21public class MonogramModel
22{
23 private final String DISTINCT_TRANSFORMATIONS_PATH = "distinctTransformations.data";
24 private final String INDISTINCT_TRANSFORMATIONS_PATH = "indistinctTransformations.data";
25 private final String MONOGRAM_PROBABILITIES_PATH = "monogramProbabilities.data";
26 private final String CHARSET_ENCODING = "utf-8";
27 private final String DELIMITER = "\t";
28 private final String path;
29 private Map<String, String> distinctTransformations;
30 private Map<String, String[]> indistinctTransformations;
31 private Map<String, Double> indistinctProbabilities;
32 private Map<String, HashMap<String, Double>> monogramProbabilities;
33 private Pool<String> pool;
34
35 /**
36 * Create a model with a local pool, the pool is cleared after the model is constructed.
37 * @param path File path to data directory
38 */
39 protected MonogramModel(String path)
40 {
41 this.path = path;
42 this.pool = new Pool<String>();
43 init();
44 pool.clear();
45 }
46
47 /**
48 * Create a model with a global pool, the pool must be cleared outside of the class.
49 * @param path File path to data directory
50 */
51 protected MonogramModel(String path, Pool<String> pool)
52 {
53 this.path = path;
54 this.pool = pool;
55 init();
56 }
57
58
59 /**
60 * Initializes the values in a monogram model
61 */
62 private void init()
63 {
64 distinctTransformations = new HashMap<String, String>();
65 indistinctTransformations = new HashMap<String, String[]>();
66 indistinctProbabilities = new HashMap<String, Double>();
67 monogramProbabilities = new HashMap<String, HashMap<String, Double>>();
68
69 initDistinctTransformations();
70 initIndistinctTransformations();
71 initProbabilities();
72 }
73
74 /**
75 * Gets a Hashmap with the distinct Transformations
76 * @return Hashmap of distinct Transformations eg(aahia -> āhia)
77 */
78 public Map<String, String> getDistinctTransformations() {
79 return distinctTransformations;
80 }
81
82 public Map<String, String[]> getIndistinctTransformations() {
83 return indistinctTransformations;
84 }
85
86 public Map<String, Double> getIndistinctProbabilities() {
87 return indistinctProbabilities;
88 }
89
90 public Map<String, HashMap<String, Double>> getMonogramProbabilities() {
91 return monogramProbabilities;
92 }
93
94 public String getDistinctTransformation(String token) {
95 return distinctTransformations.get(token);
96 }
97
98 public String[] getIndistinctTransformation(String token) {
99 return indistinctTransformations.get(token);
100 }
101
102 public Double getIndistinctProbability(String token) {
103 return indistinctProbabilities.get(token);
104 }
105
106 public Double getMonogramProbability(String token, String leftToken) {
107 return monogramProbabilities.get(token).get(leftToken);
108 }
109
110 public boolean isDistinctTransformation(String token) {
111 return distinctTransformations.containsKey(token);
112 }
113
114 public boolean isIndistinctTransformation(String token) {
115 return indistinctTransformations.containsKey(token);
116 }
117
118 public boolean containsIndistinctProbability(String token) {
119 return indistinctProbabilities.containsKey(token);
120 }
121
122 public boolean containsMonogramProbability(String token, String leftToken)
123 {
124 if (monogramProbabilities.containsKey(token)) {
125 return monogramProbabilities.get(token).containsKey(leftToken);
126 }
127
128 return false;
129 }
130
131 /**
132 * Reads in transformations from given data file
133 */
134 private void initDistinctTransformations()
135 {
136 BufferedReader reader = null;
137 try
138 {
139 final String filepath = path + "/" + DISTINCT_TRANSFORMATIONS_PATH;
140 reader = new BufferedReader(
141 new InputStreamReader(getClass().getResourceAsStream(filepath), CHARSET_ENCODING)
142 );
143
144 String line;
145 while ((line = reader.readLine()) != null)
146 {
147 final String[] elements = line.split(DELIMITER);
148 distinctTransformations.put(pool.getCanonicalObject(elements[0]), pool.getCanonicalObject(elements[1]));
149 }
150 }
151 catch (IOException e)
152 {
153 e.printStackTrace();
154 }
155 finally
156 {
157 close(reader);
158 }
159 }
160
161 /**
162 * Reads in indistinct transformations from given data file
163 */
164 private void initIndistinctTransformations()
165 {
166 BufferedReader reader = null;
167 try
168 {
169 final String filepath = path + "/" + INDISTINCT_TRANSFORMATIONS_PATH;
170 reader = new BufferedReader(
171 new InputStreamReader(getClass().getResourceAsStream(filepath), CHARSET_ENCODING)
172 );
173
174 String line;
175 while ((line = reader.readLine()) != null)
176 {
177 final String[] elements = line.split(DELIMITER);
178 final String[] transformations = new String[elements.length - 1];
179
180 //System.arraycopy(elements, 1, transformations, 0, elements.length - 1); //copy elements 1-n.
181 for (int i = 0; i < transformations.length; i++) {
182 transformations[i] = pool.getCanonicalObject(elements[i + 1]);
183 }
184
185 indistinctTransformations.put(pool.getCanonicalObject(elements[0]), transformations);
186 }
187 }
188 catch (IOException e)
189 {
190 e.printStackTrace();
191 }
192 finally
193 {
194 close(reader);
195 }
196 }
197
198 /**
199 * Reads in probabilities from given data file
200 */
201 private void initProbabilities()
202 {
203 BufferedReader reader = null;
204 try
205 {
206 final String filepath = path + "/" + MONOGRAM_PROBABILITIES_PATH;
207 reader = new BufferedReader(
208 new InputStreamReader(getClass().getResourceAsStream(filepath), CHARSET_ENCODING)
209 );
210
211 String line;
212 while ((line = reader.readLine()) != null)
213 {
214 final String[] elements = line.split(DELIMITER);
215 final String token = elements[0];
216 final Double probability = Double.parseDouble(elements[1]);
217 indistinctProbabilities.put(token, probability);
218
219 for (int i = 2; i + 1 < elements.length; i += 2)
220 {
221 final String nGramToken = elements[i];
222 final Double nGramProbability = Double.parseDouble(elements[i + 1]);
223
224 if (monogramProbabilities.containsKey(token))
225 {
226 final HashMap<String, Double> map = monogramProbabilities.get(token);
227 map.put(nGramToken, nGramProbability);
228 }
229 else
230 {
231 final HashMap<String, Double> map = new HashMap<String, Double>();
232 map.put(nGramToken, nGramProbability);
233 monogramProbabilities.put(token, map);
234 }
235 }
236 }
237 }
238 catch (IOException e)
239 {
240 e.printStackTrace();
241 }
242 finally
243 {
244 close(reader);
245 }
246 }
247
248 private void close(Reader reader)
249 {
250 if (reader != null)
251 {
252 try {
253 reader.close();
254 }
255 catch (IOException e) {
256 e.printStackTrace();
257 }
258 }
259 }
260}
Note: See TracBrowser for help on using the repository browser.