source: main/trunk/model-sites-dev/von-sparql/collect/nz-natlib-cat/pre-import/marcXML_Split/src/split.java@ 28734

Last change on this file since 28734 was 28734, checked in by davidb, 10 years ago

Tidy up on usage statement

File size: 2.1 KB
Line 
1import java.io.*;
2import com.google.common.hash.*;
3import com.google.common.io.Files;
4import org.marc4j.*;
5import org.marc4j.marc.Record;
6
7public class split {
8
9 public static void main(String[] args) throws Exception
10 {
11 long StartTime = System.currentTimeMillis();
12
13 //Default values for arguments
14 int NRecords = 250;
15 String OutputPath = "./out/";
16 String InputPath = "./NZDataFull.xml";
17
18 //Incorrect number of arguments supplied
19 if(args.length!=4)
20 {
21 System.err.println("USAGE: java split [-n records_per_file] [-o output_path]");
22 return;
23 }
24 //read arguments
25 for(int i=0; i < args.length; i+=2)
26 {
27 if(args[i].equals("-n"))
28 NRecords = Integer.parseInt(args[i+1]);
29 else if(args[i].equals("-o"))
30 OutputPath = args[i+1];
31 }
32
33 InputStream in;
34 try{
35 in = new FileInputStream(InputPath);
36 }
37 catch(Exception e){
38 System.err.println("./NZDataFull.xml does not exist");
39 return;
40 }
41
42 MarcXmlReader reader = new MarcXmlReader(in);
43 String TempFilename = OutputPath + "/temp.xml";
44 File f;
45 int RecordCount = 0;
46
47 while(reader.hasNext())
48 {
49 f = new File(TempFilename);
50 MarcWriter writer = new MarcXmlWriter(new FileOutputStream(f),true);
51 Record record;
52
53 //Write segment of records to file
54 for(int i=0; (i<NRecords && reader.hasNext()); i++)
55 {
56 record = reader.next();
57 writer.write(record);
58 RecordCount++;
59 }
60 writer.close();
61
62 //Print update
63 if(RecordCount%(NRecords * 50)==0)
64 System.err.print("\rProcessed " + RecordCount + " records");
65
66 //Calculate MD5
67 HashCode hc = Files.hash(f, Hashing.md5());
68 String s = hc.toString();
69
70 //Ensure target folder exists, then rename file to hash string
71 File target = new File(OutputPath+"/"+s.substring(0, 2)+"/"+s.substring(2)+".xml");
72 File parent = target.getParentFile();
73 if(!parent.exists() && !parent.mkdirs()){
74 throw new IllegalStateException("Couldn't create dir " + parent);
75 }
76 f.renameTo(target);
77 }
78 System.err.print("\rProcessed " + RecordCount + " records");
79 System.err.println("\nTime taken: " + (System.currentTimeMillis()-StartTime) + "ms");
80 }
81}
Note: See TracBrowser for help on using the repository browser.