Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

StringUtil.java@ 35791

Last change on this file since 35791 was 35791, checked in by cstephen, 2 years ago

Add updated macroniser code. This is a significant change to the codebase:

Servlets now send JSON responses that are easier to consume from other services.
Error responses are better conveyed and more infomative.
Monogram components have been touched up. They now bubble errors up and, where applicable, implement relevant interfaces.
The JSP interface has been removed
The SQL logging functionality has been deleted. It wasn't used before.
Dependencies updated.

File size: 8.8 KB

Line
1	package org.atea.nlptools.macroniser.util;
2
3	import static java.lang.Character.isLowerCase;
4	import static java.lang.Character.toLowerCase;
5	import static java.lang.Character.toUpperCase;
6
7	public class StringUtil
8	{
9	public static boolean containsAccents(String str)
10	{
11	if (str == null) {
12	return false;
13	}
14
15	for (char c : str.toCharArray())
16	{
17	if (isAccented(c)) {
18	return true;
19	}
20	}
21
22	return false;
23	}
24
25	public static String removeAccents(String str)
26	{
27	if (str == null \|\| !containsAccents(str)) {
28	return str;
29	}
30
31	final StringBuilder buffer = new StringBuilder(str.length());
32	for (char c : str.toCharArray()) {
33	buffer.append(removeAccent(c));
34	}
35
36	return buffer.toString();
37	}
38
39	public static String toDoubleVowel(String str)
40	{
41	if (str == null \|\| !containsAccents(str)) {
42	return str;
43	}
44
45	final StringBuilder buffer = new StringBuilder();
46	for (char c : str.toCharArray()) {
47	buffer.append(toDoubleVowel(c));
48	}
49
50	return buffer.toString();
51	}
52
53	public static String copyCapitalization(String source, String destination)
54	throws IllegalArgumentException
55	{
56	checkNotNull(source);
57	checkNotNull(destination);
58	checkEqualLength(source, destination);
59
60	final StringBuilder buffer = new StringBuilder(source.length());
61	for (int i = 0; i < source.length(); i++)
62	{
63	buffer.append(
64	isLowerCase(source.charAt(i))
65	? toLowerCase(destination.charAt(i))
66	: toUpperCase(destination.charAt(i))
67	);
68	}
69
70	return buffer.toString();
71	}
72
73	public static String copyDVowelCapitalization(String dvowel, String destination)
74	{
75	checkNotNull(dvowel);
76	checkNotNull(destination);
77
78	final StringBuilder buffer = new StringBuilder(dvowel.length());
79	for (int i = 0, j = 0; i < destination.length(); i++, j++)
80	{
81	buffer.append(
82	isLowerCase(dvowel.charAt(j))
83	? toLowerCase(destination.charAt(i))
84	: toUpperCase(destination.charAt(i))
85	);
86
87	if (isAccented(destination.charAt(i))) {
88	j++;
89	}
90	}
91
92	return buffer.toString();
93	}
94
95	public static Boolean isNullOrWhiteSpace(String value)
96	{
97	if (value == null) {
98	return true;
99	}
100
101	Boolean result = true;
102
103	for (int i = 0; i < value.length(); i++)
104	{
105	switch (value.charAt(i)) {
106	case ' ':
107	case '\r':
108	case '\n':
109	case '\t':
110	result = true;
111	break;
112	default:
113	return false;
114	}
115	}
116
117	return result;
118	}
119
120	private static void checkNotNull(String str)
121	throws IllegalArgumentException
122	{
123	if (str == null) {
124	throw new IllegalArgumentException("Required a non null String, found null String.");
125	}
126	}
127
128	private static void checkEqualLength(String s1, String s2)
129	throws IllegalArgumentException
130	{
131	if (s1.length() != s2.length()) {
132	throw new IllegalArgumentException("Required equal String lengths, found unequal String lengths: s1=" + s1.length() + ", s2=" + s2.length());
133	}
134	}
135
136	public static boolean isAccented(char c)
137	{
138	switch (c)
139	{
140	//Set A
141	case 'Ä':
142	return true;
143	case 'Ä':
144	return true;
145	case 'Ä«':
146	return true;
147	case 'Å':
148	return true;
149	case 'Å«':
150	return true;
151	case 'Ä':
152	return true;
153	case 'Ä':
154	return true;
155	case 'Äª':
156	return true;
157	case 'Å':
158	return true;
159	case 'Åª':
160	return true;
161	//Set B
162	case 'Ã€':
163	return true;
164	case 'Ã«':
165	return true;
166	case 'Ã¯':
167	return true;
168	case 'Ã¶':
169	return true;
170	case 'ÃŒ':
171	return true;
172	case 'Ã':
173	return true;
174	case 'Ã':
175	return true;
176	case 'Ã':
177	return true;
178	case 'Ã':
179	return true;
180	case 'Ã':
181	return true;
182	//Set C
183	case 'Ã ':
184	return true;
185	case 'Ãš':
186	return true;
187	case 'Ã¬':
188	return true;
189	case 'Ã²':
190	return true;
191	case 'Ã¹':
192	return true;
193	case 'Ã':
194	return true;
195	case 'Ã':
196	return true;
197	case 'Ã':
198	return true;
199	case 'Ã':
200	return true;
201	case 'Ã':
202	return true;
203	//Set D
204	default:
205	return false;
206	}
207	}
208
209	public static char removeAccent(char c)
210	{
211	switch (c)
212	{
213	//Set A
214	case 'Ä':
215	return 'a';
216	case 'Ä':
217	return 'e';
218	case 'Ä«':
219	return 'i';
220	case 'Å':
221	return 'o';
222	case 'Å«':
223	return 'u';
224	case 'Ä':
225	return 'A';
226	case 'Ä':
227	return 'E';
228	case 'Äª':
229	return 'I';
230	case 'Å':
231	return 'O';
232	case 'Åª':
233	return 'U';
234	//Set B
235	case 'Ã€':
236	return 'a';
237	case 'Ã«':
238	return 'e';
239	case 'Ã¯':
240	return 'i';
241	case 'Ã¶':
242	return 'o';
243	case 'ÃŒ':
244	return 'u';
245	case 'Ã':
246	return 'A';
247	case 'Ã':
248	return 'E';
249	case 'Ã':
250	return 'I';
251	case 'Ã':
252	return 'O';
253	case 'Ã':
254	return 'U';
255	//Set C
256	case 'Ã ':
257	return 'a';
258	case 'Ãš':
259	return 'e';
260	case 'Ã¬':
261	return 'i';
262	case 'Ã²':
263	return 'o';
264	case 'Ã¹':
265	return 'u';
266	case 'Ã':
267	return 'A';
268	case 'Ã':
269	return 'E';
270	case 'Ã':
271	return 'I';
272	case 'Ã':
273	return 'O';
274	case 'Ã':
275	return 'U';
276	//Set D
277	default:
278	return c;
279	}
280	}
281
282	public static String toDoubleVowel(char c)
283	{
284	switch (c)
285	{
286	//Set A
287	case 'Ä':
288	return "aa";
289	case 'Ä':
290	return "ee";
291	case 'Ä«':
292	return "ii";
293	case 'Å':
294	return "oo";
295	case 'Å«':
296	return "uu";
297	case 'Ä':
298	return "Aa";
299	case 'Ä':
300	return "Ee";
301	case 'Äª':
302	return "Ii";
303	case 'Å':
304	return "Oo";
305	case 'Åª':
306	return "Uu";
307	//Set B
308	case 'Ã€':
309	return "aa";
310	case 'Ã«':
311	return "ee";
312	case 'Ã¯':
313	return "ii";
314	case 'Ã¶':
315	return "oo";
316	case 'ÃŒ':
317	return "uu";
318	case 'Ã':
319	return "Aa";
320	case 'Ã':
321	return "Ee";
322	case 'Ã':
323	return "Ii";
324	case 'Ã':
325	return "Oo";
326	case 'Ã':
327	return "Uu";
328	//Set C
329	case 'Ã ':
330	return "aa";
331	case 'Ãš':
332	return "ee";
333	case 'Ã¬':
334	return "ii";
335	case 'Ã²':
336	return "oo";
337	case 'Ã¹':
338	return "uu";
339	case 'Ã':
340	return "Aa";
341	case 'Ã':
342	return "Ee";
343	case 'Ã':
344	return "Ii";
345	case 'Ã':
346	return "Oo";
347	case 'Ã':
348	return "Uu";
349	//Set D
350	default:
351	return Character.toString(c);
352	}
353	}
354	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: other-projects/the-macronizer/trunk/src/main/java/org/atea/nlptools/macroniser/util/StringUtil.java@ 35791

Download in other formats: