Last change
on this file since 32225 was 32225, checked in by ak19, 6 years ago |
Committing Dr Bainbridge's (Eclipse) project UTF8-Fix which contains Java code that fixes up files that aren't fully UTF-8, but can contain stray non-UTF-8 chars like Latin-1.
|
File size:
973 bytes
|
Rev | Line | |
---|
[32225] | 1 | import java.io.*;
|
---|
| 2 |
|
---|
| 3 |
|
---|
| 4 | public class UTF8Fix{
|
---|
| 5 |
|
---|
| 6 | public static void main(String[] args){
|
---|
| 7 |
|
---|
| 8 | ByteFixer byte_fixer = new ByteFixer("badencoding.txt");
|
---|
| 9 | File fout = new File("goodencoding.txt");
|
---|
| 10 | BufferedOutputStream writer = null;
|
---|
| 11 |
|
---|
| 12 | try {
|
---|
| 13 | writer = new BufferedOutputStream(new FileOutputStream(fout));
|
---|
| 14 | } catch (FileNotFoundException e) {
|
---|
| 15 | System.err.println("Problem opening writer, file not found");
|
---|
| 16 | return;
|
---|
| 17 | }
|
---|
| 18 |
|
---|
| 19 | byte[] character;
|
---|
| 20 | int count = 0;
|
---|
| 21 | while(true){
|
---|
| 22 | character = byte_fixer.getNextCharacter();
|
---|
| 23 | if(character==null)
|
---|
| 24 | break;
|
---|
| 25 |
|
---|
| 26 | try {
|
---|
| 27 | writer.write(character);
|
---|
| 28 | count++;
|
---|
| 29 | } catch (IOException e) {
|
---|
| 30 | e.printStackTrace();
|
---|
| 31 | }
|
---|
| 32 |
|
---|
| 33 | if(count%1000==0){
|
---|
| 34 | System.out.print(".");
|
---|
| 35 | System.out.flush();
|
---|
| 36 | if(count==40000){
|
---|
| 37 | System.out.println();
|
---|
| 38 | count=0;
|
---|
| 39 | }
|
---|
| 40 | }
|
---|
| 41 | }
|
---|
| 42 | try {
|
---|
| 43 | byte_fixer.close();
|
---|
| 44 | writer.close();
|
---|
| 45 | } catch (IOException e) {
|
---|
| 46 | System.err.println("Unable to close reader/writer");
|
---|
| 47 | }
|
---|
| 48 | }
|
---|
| 49 | }
|
---|
Note:
See
TracBrowser
for help on using the repository browser.