source: main/trunk/model-sites-dev/von-sparql/collect/nz-natlib-cat/pre-import/UTF8_Fix/src/UTF8Fix.java@ 28790

Last change on this file since 28790 was 28790, checked in by ak19, 10 years ago

Java program designed to fix encoding errors in the NZ catalogue download through governments 'open-data' web site

File size: 1.2 KB
Line 
1import java.io.*;
2
3
4public class UTF8Fix{
5
6 public static void main(String[] args){
7
8 if(args.length < 2 || args.length > 2){
9 System.err.println("USAGE: java UTF8Fix [input] [output]");
10 return;
11 }
12 ByteFixer byte_fixer = new ByteFixer(args[0]);
13 File fout = new File(args[1]);
14 BufferedOutputStream writer = null;
15
16 try {
17 writer = new BufferedOutputStream(new FileOutputStream(fout));
18 } catch (FileNotFoundException e) {
19 System.err.println("Problem opening writer, file not found");
20 System.err.println("USAGE: java UTF8Fix [input] [output]");
21 return;
22 }
23
24 byte[] character;
25 int count = 0;
26 String dots = "";
27 while(true){
28 character = byte_fixer.getNextCharacter();
29 if(character==null)
30 break;
31
32 try {
33 writer.write(character);
34 count++;
35 } catch (IOException e) {
36 e.printStackTrace();
37 }
38
39 if(count%1000000==0){
40 dots+= ".";
41 if(dots.length() > 20){
42 dots = "";
43 System.out.println();
44 }
45 System.out.print("\r" + dots);
46 System.out.flush();
47 }
48 }
49 try {
50 byte_fixer.close();
51 writer.close();
52 } catch (IOException e) {
53 System.err.println("Unable to close reader/writer");
54 }
55 }
56}
Note: See TracBrowser for help on using the repository browser.