1 | /*
|
---|
2 | * To change this template, choose Tools | Templates
|
---|
3 | * and open the template in the editor.
|
---|
4 | */
|
---|
5 | package monogram.restorer;
|
---|
6 |
|
---|
7 | import util.CharReader;
|
---|
8 | import java.io.BufferedWriter;
|
---|
9 | import java.io.File;
|
---|
10 | import java.io.FileOutputStream;
|
---|
11 | import java.io.IOException;
|
---|
12 | import java.io.OutputStreamWriter;
|
---|
13 | import util.CharacterUtil;
|
---|
14 | import util.IOUtil;
|
---|
15 |
|
---|
16 | /**
|
---|
17 | *
|
---|
18 | * @author OEM
|
---|
19 | */
|
---|
20 | public class XMLRestorer {
|
---|
21 |
|
---|
22 | private static final String OUTPUT_CHARSET_ENCODING = "utf-8";
|
---|
23 |
|
---|
24 | public XMLRestorer() {
|
---|
25 | //default constructor
|
---|
26 | }
|
---|
27 |
|
---|
28 | public void restore(File inputFile, String inputCharsetEncoding, File outputFile, boolean preserveMacrons, boolean markupChangedWords) {
|
---|
29 | MonogramRestorer restorer = new MonogramRestorer(preserveMacrons);
|
---|
30 | final StringBuilder buffer = new StringBuilder();
|
---|
31 | CharReader reader = null;
|
---|
32 | BufferedWriter writer = null;
|
---|
33 | try {
|
---|
34 | reader = new CharReader(inputFile, inputCharsetEncoding);
|
---|
35 | writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), OUTPUT_CHARSET_ENCODING));
|
---|
36 |
|
---|
37 | while (reader.hasNextChar()) {
|
---|
38 | char peek = reader.peek();
|
---|
39 | if (peek == '<') {
|
---|
40 | while (reader.hasNextChar()) {
|
---|
41 | char c = reader.nextChar();
|
---|
42 | writer.write(c);
|
---|
43 | if (c == '>') {
|
---|
44 | break;
|
---|
45 | }
|
---|
46 | }
|
---|
47 | } else if (Character.isLetterOrDigit(peek)) {
|
---|
48 | buffer.setLength(0);
|
---|
49 | while (reader.hasNextChar() && Character.isLetterOrDigit(reader.peek())) {
|
---|
50 | buffer.append(reader.nextChar());
|
---|
51 | }
|
---|
52 | final String restoredToken = restorer.restore(buffer.toString(), markupChangedWords);
|
---|
53 | writer.write(restoredToken);
|
---|
54 | } else if (CharacterUtil.isPunctuation(peek)) {
|
---|
55 | final String restoredToken = restorer.restore(String.valueOf(reader.nextChar()), markupChangedWords);
|
---|
56 | writer.write(restoredToken);
|
---|
57 | } else {
|
---|
58 | writer.write(reader.nextChar());
|
---|
59 | }
|
---|
60 | }
|
---|
61 | } catch (IOException e) {
|
---|
62 | e.printStackTrace();
|
---|
63 | } finally {
|
---|
64 | reader.close();
|
---|
65 | IOUtil.closeWriter(writer);
|
---|
66 | }
|
---|
67 | }
|
---|
68 | }
|
---|