source: other-projects/is-sheet-music-encore/trunk/java-gen-corpus/javaGenFullIDList.java@ 33047

Last change on this file since 33047 was 33047, checked in by cpb16, 5 years ago

Corpus generator complete!

File size: 1.9 KB
Line 
1//NAME:Caleb Bird
2//ID: 1289680
3//??References??
4
5import java.io.BufferedReader;
6import java.io.FileReader;
7import java.io.FileWriter;
8
9//Creates textfile of all records tagged with XX (MU for example)
10//Used on the textfile that contains every record in hathiTrust
11public class javaGenFullIDList {
12 public static void main(String[] args) {
13 try{
14
15
16 if (args.length != 3) {
17 System.out.println("Usage: TabProcTextGen <inputFilename> <outputFilename> <inputType>");
18 }
19 else {
20 String inputFilename = args[0];
21 String outputFilename = args[1];
22 String inputType = args[2];
23
24
25 System.out.println("Processing: " + inputFilename);
26
27 FileReader fileReader = new FileReader(inputFilename);
28 BufferedReader buf = new BufferedReader(fileReader);
29 //FileWriter fw = new FileWriter("HathiDocIDList.txt");
30 FileWriter fw = new FileWriter(outputFilename);
31 String line = null;
32 String[] item;
33
34 //Splits into each record, since readLine splits by "\n"
35 int line_num = 0;
36
37 while ((line = buf.readLine()) != null) {
38 line_num++;
39 if (line_num%100000 == 0) {
40 System.out.print(".");
41 System.out.flush();
42 }
43
44 //Splits by tab, leaves "" for every blank entry
45 item = line.split("\t", -1);
46 //Check items to be equal
47 if( item[1].equals("allow") &&
48 item[2].equals("pd") &&
49 item[19].equals(inputType) &&
50 (item[24].equals("open")||item[24].equals("page"))){
51 //Encode id
52 String idEncoded = item[0];
53 //idEncoded = idEncoded.replaceAll(":", "+").replaceAll("/", "=");
54 //Write item to file
55 fw.write(idEncoded + "\n");
56 }
57 }
58 buf.close();
59 fw.close();
60 }
61 }catch(Exception e){
62 e.printStackTrace();
63 }
64 }
65}
66
67//REFERNECES
68//https://www.javatpoint.com/java-filewriter-class
69//https://docs.oracle.com/javase/8/docs/api/index.html?java/io/FileWriter.html
70//Email from supervisor (Compx520 ID Error Apr 24)
Note: See TracBrowser for help on using the repository browser.