source: other-projects/the-macronizer/trunk/src/main/java/org/atea/nlptools/macroniser/util/StringUtil.java@ 35791

Last change on this file since 35791 was 35791, checked in by cstephen, 2 years ago

Add updated macroniser code. This is a significant change to the codebase:

  • Servlets now send JSON responses that are easier to consume from other services.
  • Error responses are better conveyed and more infomative.
  • Monogram components have been touched up. They now bubble errors up and, where applicable, implement relevant interfaces.
  • The JSP interface has been removed
  • The SQL logging functionality has been deleted. It wasn't used before.
  • Dependencies updated.
File size: 8.8 KB
Line 
1package org.atea.nlptools.macroniser.util;
2
3import static java.lang.Character.isLowerCase;
4import static java.lang.Character.toLowerCase;
5import static java.lang.Character.toUpperCase;
6
7public class StringUtil
8{
9 public static boolean containsAccents(String str)
10 {
11 if (str == null) {
12 return false;
13 }
14
15 for (char c : str.toCharArray())
16 {
17 if (isAccented(c)) {
18 return true;
19 }
20 }
21
22 return false;
23 }
24
25 public static String removeAccents(String str)
26 {
27 if (str == null || !containsAccents(str)) {
28 return str;
29 }
30
31 final StringBuilder buffer = new StringBuilder(str.length());
32 for (char c : str.toCharArray()) {
33 buffer.append(removeAccent(c));
34 }
35
36 return buffer.toString();
37 }
38
39 public static String toDoubleVowel(String str)
40 {
41 if (str == null || !containsAccents(str)) {
42 return str;
43 }
44
45 final StringBuilder buffer = new StringBuilder();
46 for (char c : str.toCharArray()) {
47 buffer.append(toDoubleVowel(c));
48 }
49
50 return buffer.toString();
51 }
52
53 public static String copyCapitalization(String source, String destination)
54 throws IllegalArgumentException
55 {
56 checkNotNull(source);
57 checkNotNull(destination);
58 checkEqualLength(source, destination);
59
60 final StringBuilder buffer = new StringBuilder(source.length());
61 for (int i = 0; i < source.length(); i++)
62 {
63 buffer.append(
64 isLowerCase(source.charAt(i))
65 ? toLowerCase(destination.charAt(i))
66 : toUpperCase(destination.charAt(i))
67 );
68 }
69
70 return buffer.toString();
71 }
72
73 public static String copyDVowelCapitalization(String dvowel, String destination)
74 {
75 checkNotNull(dvowel);
76 checkNotNull(destination);
77
78 final StringBuilder buffer = new StringBuilder(dvowel.length());
79 for (int i = 0, j = 0; i < destination.length(); i++, j++)
80 {
81 buffer.append(
82 isLowerCase(dvowel.charAt(j))
83 ? toLowerCase(destination.charAt(i))
84 : toUpperCase(destination.charAt(i))
85 );
86
87 if (isAccented(destination.charAt(i))) {
88 j++;
89 }
90 }
91
92 return buffer.toString();
93 }
94
95 public static Boolean isNullOrWhiteSpace(String value)
96 {
97 if (value == null) {
98 return true;
99 }
100
101 Boolean result = true;
102
103 for (int i = 0; i < value.length(); i++)
104 {
105 switch (value.charAt(i)) {
106 case ' ':
107 case '\r':
108 case '\n':
109 case '\t':
110 result = true;
111 break;
112 default:
113 return false;
114 }
115 }
116
117 return result;
118 }
119
120 private static void checkNotNull(String str)
121 throws IllegalArgumentException
122 {
123 if (str == null) {
124 throw new IllegalArgumentException("Required a non null String, found null String.");
125 }
126 }
127
128 private static void checkEqualLength(String s1, String s2)
129 throws IllegalArgumentException
130 {
131 if (s1.length() != s2.length()) {
132 throw new IllegalArgumentException("Required equal String lengths, found unequal String lengths: s1=" + s1.length() + ", s2=" + s2.length());
133 }
134 }
135
136 public static boolean isAccented(char c)
137 {
138 switch (c)
139 {
140 //Set A
141 case 'ā':
142 return true;
143 case 'ē':
144 return true;
145 case 'Ä«':
146 return true;
147 case 'ō':
148 return true;
149 case 'Å«':
150 return true;
151 case 'Ā':
152 return true;
153 case 'Ē':
154 return true;
155 case 'Ī':
156 return true;
157 case 'Ō':
158 return true;
159 case 'Ū':
160 return true;
161 //Set B
162 case 'À':
163 return true;
164 case 'ë':
165 return true;
166 case 'ï':
167 return true;
168 case 'ö':
169 return true;
170 case 'Ì':
171 return true;
172 case 'Ä':
173 return true;
174 case 'Ë':
175 return true;
176 case 'Ï':
177 return true;
178 case 'Ö':
179 return true;
180 case 'Ü':
181 return true;
182 //Set C
183 case 'à':
184 return true;
185 case 'Ú':
186 return true;
187 case 'ì':
188 return true;
189 case 'ò':
190 return true;
191 case 'ù':
192 return true;
193 case 'À':
194 return true;
195 case 'È':
196 return true;
197 case 'Ì':
198 return true;
199 case 'Ò':
200 return true;
201 case 'Ù':
202 return true;
203 //Set D
204 default:
205 return false;
206 }
207 }
208
209 public static char removeAccent(char c)
210 {
211 switch (c)
212 {
213 //Set A
214 case 'ā':
215 return 'a';
216 case 'ē':
217 return 'e';
218 case 'Ä«':
219 return 'i';
220 case 'ō':
221 return 'o';
222 case 'Å«':
223 return 'u';
224 case 'Ā':
225 return 'A';
226 case 'Ē':
227 return 'E';
228 case 'Ī':
229 return 'I';
230 case 'Ō':
231 return 'O';
232 case 'Ū':
233 return 'U';
234 //Set B
235 case 'À':
236 return 'a';
237 case 'ë':
238 return 'e';
239 case 'ï':
240 return 'i';
241 case 'ö':
242 return 'o';
243 case 'Ì':
244 return 'u';
245 case 'Ä':
246 return 'A';
247 case 'Ë':
248 return 'E';
249 case 'Ï':
250 return 'I';
251 case 'Ö':
252 return 'O';
253 case 'Ü':
254 return 'U';
255 //Set C
256 case 'à':
257 return 'a';
258 case 'Ú':
259 return 'e';
260 case 'ì':
261 return 'i';
262 case 'ò':
263 return 'o';
264 case 'ù':
265 return 'u';
266 case 'À':
267 return 'A';
268 case 'È':
269 return 'E';
270 case 'Ì':
271 return 'I';
272 case 'Ò':
273 return 'O';
274 case 'Ù':
275 return 'U';
276 //Set D
277 default:
278 return c;
279 }
280 }
281
282 public static String toDoubleVowel(char c)
283 {
284 switch (c)
285 {
286 //Set A
287 case 'ā':
288 return "aa";
289 case 'ē':
290 return "ee";
291 case 'Ä«':
292 return "ii";
293 case 'ō':
294 return "oo";
295 case 'Å«':
296 return "uu";
297 case 'Ā':
298 return "Aa";
299 case 'Ē':
300 return "Ee";
301 case 'Ī':
302 return "Ii";
303 case 'Ō':
304 return "Oo";
305 case 'Ū':
306 return "Uu";
307 //Set B
308 case 'À':
309 return "aa";
310 case 'ë':
311 return "ee";
312 case 'ï':
313 return "ii";
314 case 'ö':
315 return "oo";
316 case 'Ì':
317 return "uu";
318 case 'Ä':
319 return "Aa";
320 case 'Ë':
321 return "Ee";
322 case 'Ï':
323 return "Ii";
324 case 'Ö':
325 return "Oo";
326 case 'Ü':
327 return "Uu";
328 //Set C
329 case 'à':
330 return "aa";
331 case 'Ú':
332 return "ee";
333 case 'ì':
334 return "ii";
335 case 'ò':
336 return "oo";
337 case 'ù':
338 return "uu";
339 case 'À':
340 return "Aa";
341 case 'È':
342 return "Ee";
343 case 'Ì':
344 return "Ii";
345 case 'Ò':
346 return "Oo";
347 case 'Ù':
348 return "Uu";
349 //Set D
350 default:
351 return Character.toString(c);
352 }
353 }
354}
Note: See TracBrowser for help on using the repository browser.