Changeset 35732
- Timestamp:
- 2021-11-11T17:02:12+13:00 (2 years ago)
- Location:
- other-projects/the-macronizer/trunk/src/java
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/the-macronizer/trunk/src/java/monogram/restorer/TxtRestorer.java
r35719 r35732 25 25 26 26 public TxtRestorer() { 27 pattern = Pattern.compile("([a-zA-ZÄ-Å«Ä-Ū0-9]+|\\p{Punct})|(.)" );27 pattern = Pattern.compile("([a-zA-ZÄ-Å«Ä-Ū0-9]+|\\p{Punct})|(.)", Pattern.DOTALL); 28 28 } 29 29 -
other-projects/the-macronizer/trunk/src/java/web/servlets/DirectInput.java
r35722 r35732 10 10 import java.io.OutputStreamWriter; 11 11 import java.io.UnsupportedEncodingException; 12 import java.util.regex.Matcher; 13 import java.util.regex.Pattern; 12 14 13 15 import javax.servlet.RequestDispatcher; … … 108 110 JsonWriter writer = gsonInstance.newJsonWriter(response.getWriter()); 109 111 writer.beginArray(); 110 111 for (String element : restoredFragment.split("\\ ")) 112 113 final Pattern pattern = Pattern.compile("([^\\s]+)|([\\r\\n]+)");// Pattern.compile("([a-zA-ZÄ-Å«Ä-Ū0-9</>]+|\\p{Punct})|([\\r?\\n]+)", Pattern.DOTALL); 114 final Matcher matcher = pattern.matcher(restoredFragment); 115 116 while (matcher.find()) 112 117 { 118 String word = matcher.group(1); 119 final String lineBreak = matcher.group(2); 120 121 if (word == null && lineBreak == null) 122 { 123 continue; 124 } 125 113 126 writer.beginObject(); 114 127 115 if ( element.contains("<mark>"))128 if (word != null) 116 129 { 117 element = element.replace("<mark>", "").replace("</mark>", ""); 118 writer.name("macronised"); 119 writer.value(true); 130 if (word.contains("<mark>")) 131 { 132 word = word.replace("<mark>", "").replace("</mark>", ""); 133 writer.name("macronised"); 134 writer.value(true); 135 } 136 137 writer.name("w"); 138 writer.value(word); 120 139 } 121 140 122 writer.name("w"); 123 writer.value(element); 141 if (lineBreak != null) 142 { 143 writer.name("linebreak"); 144 writer.value(lineBreak); 145 } 124 146 125 147 writer.endObject();
Note:
See TracChangeset
for help on using the changeset viewer.