source: other-projects/is-sheet-music-encore/trunk/java-gen-corpus/TabProcessorTextGen.java@ 33007

Last change on this file since 33007 was 33000, checked in by cpb16, 5 years ago

To process a tab-delimited HathiTrust file and filter down to open Music format

File size: 1.5 KB
Line 
1//NAME:Caleb Bird
2//ID: 1289680
3//??References??
4
5import java.io.BufferedReader;
6import java.io.FileReader;
7import java.io.FileWriter;
8
9public class TabProcessorTextGen {
10 public static void main(String[] args) {
11 try{
12
13 String input_filename;
14 if (args.length == 0) {
15 input_filename = "hathiSmall.txt";
16 }
17 else {
18 input_filename = args[0];
19 }
20
21 System.out.println("Processing: " + input_filename);
22
23 FileReader file_reader = new FileReader(input_filename);
24 BufferedReader buf = new BufferedReader(file_reader);
25
26 String line = null;
27 String[] item;
28 FileWriter fw = new FileWriter("HathiDocIDList.txt");
29 //Splits into each record, since readLine splits by "\n"
30
31 int line_num = 0;
32
33 while ((line = buf.readLine()) != null) {
34 line_num++;
35 if (line_num%100000 == 0) {
36 System.out.print(".");
37 System.out.flush();
38 }
39
40 //Splits by tab, leaves "" for every blank entry
41 item = line.split("\t", -1);
42 //Check items to be equal
43 if( item[1].equals("allow") &&
44 item[2].equals("pd") &&
45 item[19].equals("MU") &&
46 (item[24].equals("open")||item[24].equals("page"))){
47 //fw.write(item[0] + "\n");
48
49 }
50 //TESTING PURPOSES
51 fw.write(item[0] + "\n");
52 }
53 buf.close();
54 fw.close();
55 }catch(Exception e){
56 e.printStackTrace();
57 }
58 }
59}
60// Returns 42474
61
62//REFERNECES
63//https://www.javatpoint.com/java-filewriter-class
64//https://docs.oracle.com/javase/8/docs/api/index.html?java/io/FileWriter.html
Note: See TracBrowser for help on using the repository browser.