source: other-projects/the-macronizer/trunk/src/java/monogram/restorer/XMLRestorer.java@ 29855

Last change on this file since 29855 was 29855, checked in by davidb, 9 years ago

John's code after refactoring by Tom over the summer of 2014/2015

File size: 2.3 KB
Line 
1/*
2 * To change this template, choose Tools | Templates
3 * and open the template in the editor.
4 */
5package monogram.restorer;
6
7import util.CharReader;
8import java.io.BufferedWriter;
9import java.io.File;
10import java.io.FileOutputStream;
11import java.io.IOException;
12import java.io.OutputStreamWriter;
13import util.CharacterUtil;
14import util.IOUtil;
15
16/**
17 *
18 * @author OEM
19 */
20public class XMLRestorer {
21
22 private static final String OUTPUT_CHARSET_ENCODING = "utf-8";
23
24 public XMLRestorer() {
25 //default constructor
26 }
27
28 public void restore(File inputFile, String inputCharsetEncoding, File outputFile, boolean preserveMacrons) {
29 MonogramRestorer restorer = new MonogramRestorer(preserveMacrons);
30 final StringBuilder buffer = new StringBuilder();
31 CharReader reader = null;
32 BufferedWriter writer = null;
33 try {
34 reader = new CharReader(inputFile, inputCharsetEncoding);
35 writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), OUTPUT_CHARSET_ENCODING));
36
37 while (reader.hasNextChar()) {
38 char peek = reader.peek();
39 if (peek == '<') {
40 while (reader.hasNextChar()) {
41 char c = reader.nextChar();
42 writer.write(c);
43 if (c == '>') {
44 break;
45 }
46 }
47 } else if (Character.isLetterOrDigit(peek)) {
48 buffer.setLength(0);
49 while (reader.hasNextChar() && Character.isLetterOrDigit(reader.peek())) {
50 buffer.append(reader.nextChar());
51 }
52 final String restoredToken = restorer.restore(buffer.toString());
53 writer.write(restoredToken);
54 } else if (CharacterUtil.isPunctuation(peek)) {
55 final String restoredToken = restorer.restore(String.valueOf(reader.nextChar()));
56 writer.write(restoredToken);
57 } else {
58 writer.write(reader.nextChar());
59 }
60 }
61 } catch (IOException e) {
62 e.printStackTrace();
63 } finally {
64 reader.close();
65 IOUtil.closeWriter(writer);
66 }
67 }
68}
Note: See TracBrowser for help on using the repository browser.