Last change
on this file since 32225 was 32225, checked in by ak19, 6 years ago |
Committing Dr Bainbridge's (Eclipse) project UTF8-Fix which contains Java code that fixes up files that aren't fully UTF-8, but can contain stray non-UTF-8 chars like Latin-1.
|
File size:
973 bytes
|
Line | |
---|
1 | import java.io.*;
|
---|
2 |
|
---|
3 |
|
---|
4 | public class UTF8Fix{
|
---|
5 |
|
---|
6 | public static void main(String[] args){
|
---|
7 |
|
---|
8 | ByteFixer byte_fixer = new ByteFixer("badencoding.txt");
|
---|
9 | File fout = new File("goodencoding.txt");
|
---|
10 | BufferedOutputStream writer = null;
|
---|
11 |
|
---|
12 | try {
|
---|
13 | writer = new BufferedOutputStream(new FileOutputStream(fout));
|
---|
14 | } catch (FileNotFoundException e) {
|
---|
15 | System.err.println("Problem opening writer, file not found");
|
---|
16 | return;
|
---|
17 | }
|
---|
18 |
|
---|
19 | byte[] character;
|
---|
20 | int count = 0;
|
---|
21 | while(true){
|
---|
22 | character = byte_fixer.getNextCharacter();
|
---|
23 | if(character==null)
|
---|
24 | break;
|
---|
25 |
|
---|
26 | try {
|
---|
27 | writer.write(character);
|
---|
28 | count++;
|
---|
29 | } catch (IOException e) {
|
---|
30 | e.printStackTrace();
|
---|
31 | }
|
---|
32 |
|
---|
33 | if(count%1000==0){
|
---|
34 | System.out.print(".");
|
---|
35 | System.out.flush();
|
---|
36 | if(count==40000){
|
---|
37 | System.out.println();
|
---|
38 | count=0;
|
---|
39 | }
|
---|
40 | }
|
---|
41 | }
|
---|
42 | try {
|
---|
43 | byte_fixer.close();
|
---|
44 | writer.close();
|
---|
45 | } catch (IOException e) {
|
---|
46 | System.err.println("Unable to close reader/writer");
|
---|
47 | }
|
---|
48 | }
|
---|
49 | }
|
---|
Note:
See
TracBrowser
for help on using the repository browser.