1 | /*
|
---|
2 | * 07/28/2008
|
---|
3 | *
|
---|
4 | * RtfToText.java - Returns the plain text version of RTF documents.
|
---|
5 | *
|
---|
6 | * This library is distributed under a modified BSD license. See the included
|
---|
7 | * RSyntaxTextArea.License.txt file for details.
|
---|
8 | */
|
---|
9 | package org.fife.ui.rsyntaxtextarea;
|
---|
10 |
|
---|
11 | import java.io.*;
|
---|
12 |
|
---|
13 |
|
---|
14 | /**
|
---|
15 | * Gets the plain text version of RTF documents.<p>
|
---|
16 | *
|
---|
17 | * This is used by <code>RtfTransferable</code> to return the plain text
|
---|
18 | * version of the transferable when the receiver does not support RTF.
|
---|
19 | *
|
---|
20 | * @author Robert Futrell
|
---|
21 | * @version 1.0
|
---|
22 | */
|
---|
23 | class RtfToText {
|
---|
24 |
|
---|
25 | private Reader r;
|
---|
26 | private StringBuffer sb;
|
---|
27 | private StringBuffer controlWord;
|
---|
28 | private int blockCount;
|
---|
29 | private boolean inControlWord;
|
---|
30 |
|
---|
31 |
|
---|
32 | /**
|
---|
33 | * Private constructor.
|
---|
34 | *
|
---|
35 | * @param r The reader to read RTF text from.
|
---|
36 | */
|
---|
37 | private RtfToText(Reader r) {
|
---|
38 | this.r = r;
|
---|
39 | sb = new StringBuffer();
|
---|
40 | controlWord = new StringBuffer();
|
---|
41 | blockCount = 0;
|
---|
42 | inControlWord = false;
|
---|
43 | }
|
---|
44 |
|
---|
45 |
|
---|
46 | /**
|
---|
47 | * Converts the RTF text read from this converter's <code>Reader</code>
|
---|
48 | * into plain text. It is the caller's responsibility to close the
|
---|
49 | * reader after this method is called.
|
---|
50 | *
|
---|
51 | * @return The plain text.
|
---|
52 | * @throws IOException If an IO error occurs.
|
---|
53 | */
|
---|
54 | private String convert() throws IOException {
|
---|
55 |
|
---|
56 | // Skip over first curly brace as the whole file is in '{' and '}'
|
---|
57 | int i = r.read();
|
---|
58 | if (i!='{') {
|
---|
59 | throw new IOException("Invalid RTF file");
|
---|
60 | }
|
---|
61 |
|
---|
62 | while ((i=r.read())!=-1) {
|
---|
63 |
|
---|
64 | char ch = (char)i;
|
---|
65 | switch (ch) {
|
---|
66 | case '{':
|
---|
67 | if (inControlWord && controlWord.length()==0) { // "\{"
|
---|
68 | sb.append('{');
|
---|
69 | controlWord.setLength(0);
|
---|
70 | inControlWord = false;
|
---|
71 | }
|
---|
72 | else {
|
---|
73 | blockCount++;
|
---|
74 | }
|
---|
75 | break;
|
---|
76 | case '}':
|
---|
77 | if (inControlWord && controlWord.length()==0) { // "\}"
|
---|
78 | sb.append('}');
|
---|
79 | controlWord.setLength(0);
|
---|
80 | inControlWord = false;
|
---|
81 | }
|
---|
82 | else {
|
---|
83 | blockCount--;
|
---|
84 | }
|
---|
85 | break;
|
---|
86 | case '\\':
|
---|
87 | if (blockCount==0) {
|
---|
88 | if (inControlWord) {
|
---|
89 | if (controlWord.length()==0) { // "\\"
|
---|
90 | sb.append('\\');
|
---|
91 | controlWord.setLength(0);
|
---|
92 | inControlWord = false;
|
---|
93 | }
|
---|
94 | else {
|
---|
95 | endControlWord();
|
---|
96 | }
|
---|
97 | }
|
---|
98 | inControlWord = true;
|
---|
99 | }
|
---|
100 | break;
|
---|
101 | case ' ':
|
---|
102 | if (blockCount==0) {
|
---|
103 | if (inControlWord) {
|
---|
104 | endControlWord();
|
---|
105 | }
|
---|
106 | else {
|
---|
107 | sb.append(' ');
|
---|
108 | }
|
---|
109 | }
|
---|
110 | break;
|
---|
111 | case '\r':
|
---|
112 | case '\n':
|
---|
113 | if (blockCount==0) {
|
---|
114 | if (inControlWord) {
|
---|
115 | endControlWord();
|
---|
116 | }
|
---|
117 | // Otherwise, ignore
|
---|
118 | }
|
---|
119 | break;
|
---|
120 | default:
|
---|
121 | if (blockCount==0) {
|
---|
122 | if (inControlWord) {
|
---|
123 | controlWord.append(ch);
|
---|
124 | }
|
---|
125 | else {
|
---|
126 | sb.append(ch);
|
---|
127 | }
|
---|
128 | }
|
---|
129 | break;
|
---|
130 | }
|
---|
131 |
|
---|
132 | }
|
---|
133 |
|
---|
134 | return sb.toString();
|
---|
135 |
|
---|
136 | }
|
---|
137 |
|
---|
138 |
|
---|
139 | /**
|
---|
140 | * Ends a control word. Checks whether it is a common one that affects
|
---|
141 | * the plain text output (such as "<code>par</code>" or "<code>tab</code>")
|
---|
142 | * and updates the text buffer accordingly.
|
---|
143 | */
|
---|
144 | private void endControlWord() {
|
---|
145 | String word = controlWord.toString();
|
---|
146 | if ("par".equals(word)) {
|
---|
147 | sb.append('\n');
|
---|
148 | }
|
---|
149 | else if ("tab".equals(word)) {
|
---|
150 | sb.append('\t');
|
---|
151 | }
|
---|
152 | controlWord.setLength(0);
|
---|
153 | inControlWord = false;
|
---|
154 | }
|
---|
155 |
|
---|
156 |
|
---|
157 | /**
|
---|
158 | * Converts the contents of the specified byte array representing
|
---|
159 | * an RTF document into plain text.
|
---|
160 | *
|
---|
161 | * @param rtf The byte array representing an RTF document.
|
---|
162 | * @return The contents of the RTF document, in plain text.
|
---|
163 | * @throws IOException If an IO error occurs.
|
---|
164 | */
|
---|
165 | public static String getPlainText(byte[] rtf) throws IOException {
|
---|
166 | return getPlainText(new ByteArrayInputStream(rtf));
|
---|
167 | }
|
---|
168 |
|
---|
169 |
|
---|
170 | /**
|
---|
171 | * Converts the contents of the specified RTF file to plain text.
|
---|
172 | *
|
---|
173 | * @param file The RTF file to convert.
|
---|
174 | * @return The contents of the file, in plain text.
|
---|
175 | * @throws IOException If an IO error occurs.
|
---|
176 | */
|
---|
177 | public static String getPlainText(File file) throws IOException {
|
---|
178 | return getPlainText(new BufferedReader(new FileReader(file)));
|
---|
179 | }
|
---|
180 |
|
---|
181 |
|
---|
182 | /**
|
---|
183 | * Converts the contents of the specified input stream to plain text.
|
---|
184 | * The input stream will be closed when this method returns.
|
---|
185 | *
|
---|
186 | * @param in The input stream to convert.
|
---|
187 | * @return The contents of the stream, in plain text.
|
---|
188 | * @throws IOException If an IO error occurs.
|
---|
189 | */
|
---|
190 | public static String getPlainText(InputStream in) throws IOException {
|
---|
191 | return getPlainText(new InputStreamReader(in, "US-ASCII"));
|
---|
192 | }
|
---|
193 |
|
---|
194 |
|
---|
195 | /**
|
---|
196 | * Converts the contents of the specified <code>Reader</code> to plain text.
|
---|
197 | *
|
---|
198 | * @param r The <code>Reader</code>.
|
---|
199 | * @return The contents of the <code>Reader</code>, in plain text.
|
---|
200 | * @throws IOException If an IO error occurs.
|
---|
201 | */
|
---|
202 | private static String getPlainText(Reader r) throws IOException {
|
---|
203 | try {
|
---|
204 | RtfToText converter = new RtfToText(r);
|
---|
205 | return converter.convert();
|
---|
206 | } finally {
|
---|
207 | r.close();
|
---|
208 | }
|
---|
209 | }
|
---|
210 |
|
---|
211 |
|
---|
212 | /**
|
---|
213 | * Converts the contents of the specified String to plain text.
|
---|
214 | *
|
---|
215 | * @param rtf A string whose contents represent an RTF document.
|
---|
216 | * @return The contents of the String, in plain text.
|
---|
217 | * @throws IOException If an IO error occurs.
|
---|
218 | */
|
---|
219 | public static String getPlainText(String rtf) throws IOException {
|
---|
220 | return getPlainText(new StringReader(rtf));
|
---|
221 | }
|
---|
222 |
|
---|
223 |
|
---|
224 | } |
---|