source: main/trunk/model-sites-dev/von-sparql/collect/nz-natlib-cat/pre-import/marcXML_Split/src/backup@ 28675

Last change on this file since 28675 was 28675, checked in by ak19, 10 years ago

Initial cut at files for a Greenstone collectin based on the NZ National Library's Union Catalogue.

File size: 2.1 KB
Line 
1import java.io.*;
2import com.google.common.hash.*;
3import com.google.common.io.Files;
4import org.marc4j.*;
5import org.marc4j.marc.Record;
6
7public class split {
8
9 public static void main(String[] args) throws Exception
10 {
11 long StartTime = System.currentTimeMillis();
12 //Default values for arguments
13 int NRecords = 250;
14 String OutputPath = "./out/";
15
16 //Incorrect number of arguments supplied
17 if(args.length!=5)
18 {
19 System.err.println("USAGE: java split [-n records_per_file] [-o output_path] input.xml");
20 System.err.println("DEFAULT: [-n 200] [-o ./out/]");
21 return;
22 }
23 //read arguments
24 for(int i=0; i < args.length; i+=2)
25 {
26 if(args[i].equals("-n"))
27 NRecords = Integer.parseInt(args[i+1]);
28 else if(args[i].equals("-o"))
29 OutputPath = args[i+1];
30 }
31
32 InputStream in;
33 try{
34 in = new FileInputStream(args[args.length-1]);
35 }
36 catch(Exception e){
37 System.err.println("Input file doesn't exist");
38 return;
39 }
40
41 MarcXmlReader reader = new MarcXmlReader(in);
42 String TempFilename = OutputPath + "/temp.xml";
43 File f;
44 int RecordCount = 0;
45
46 while(reader.hasNext())
47 {
48 f = new File(TempFilename);
49 MarcWriter writer = new MarcXmlWriter(new FileOutputStream(f),true);
50
51 Record record;
52 //Write segment of records to file
53 for(int i=0; (i<NRecords && reader.hasNext()); i++)
54 {
55 record = reader.next();
56 writer.write(record);
57 RecordCount++;
58 }
59 writer.close();
60
61 //Print update
62 if(RecordCount%50000==0)
63 System.err.println("Processed " + RecordCount + " records");
64
65 //Calculate MD5
66 HashCode hc = Files.hash(f, Hashing.md5());
67 String s = hc.toString();
68
69 //Ensure target folder exists, then rename file to hash string
70 File target = new File(OutputPath+"/"+s.substring(0, 2)+"/"+s.substring(2)+".xml");
71 File parent = target.getParentFile();
72 if(!parent.exists() && !parent.mkdirs()){
73 throw new IllegalStateException("Couldn't create dir " + parent);
74 }
75 f.renameTo(target);
76 }
77 System.err.println("Processed " + RecordCount + " records");
78 System.err.println("Time taken: " + (System.currentTimeMillis()-StartTime) + "ms");
79 }
80}
Note: See TracBrowser for help on using the repository browser.