1 | //NAME:Caleb Bird
|
---|
2 | //ID: 1289680
|
---|
3 | //??References??
|
---|
4 |
|
---|
5 | import java.io.BufferedReader;
|
---|
6 | import java.io.FileReader;
|
---|
7 | import java.io.FileWriter;
|
---|
8 |
|
---|
9 | //Creates textfile of all records tagged with XX (MU for example)
|
---|
10 | //Used on the textfile that contains every record in hathiTrust
|
---|
11 | public class javaGenFullIDList {
|
---|
12 | public static void main(String[] args) {
|
---|
13 | try{
|
---|
14 |
|
---|
15 |
|
---|
16 | if (args.length != 3) {
|
---|
17 | System.out.println("Usage: TabProcTextGen <inputFilename> <outputFilename> <inputType>");
|
---|
18 | }
|
---|
19 | else {
|
---|
20 | String inputFilename = args[0];
|
---|
21 | String outputFilename = args[1];
|
---|
22 | String inputType = args[2];
|
---|
23 |
|
---|
24 |
|
---|
25 | System.out.println("Processing: " + inputFilename);
|
---|
26 |
|
---|
27 | FileReader fileReader = new FileReader(inputFilename);
|
---|
28 | BufferedReader buf = new BufferedReader(fileReader);
|
---|
29 | //FileWriter fw = new FileWriter("HathiDocIDList.txt");
|
---|
30 | FileWriter fw = new FileWriter(outputFilename);
|
---|
31 | String line = null;
|
---|
32 | String[] item;
|
---|
33 |
|
---|
34 | //Splits into each record, since readLine splits by "\n"
|
---|
35 | int line_num = 0;
|
---|
36 |
|
---|
37 | while ((line = buf.readLine()) != null) {
|
---|
38 | line_num++;
|
---|
39 | if (line_num%100000 == 0) {
|
---|
40 | System.out.print(".");
|
---|
41 | System.out.flush();
|
---|
42 | }
|
---|
43 |
|
---|
44 | //Splits by tab, leaves "" for every blank entry
|
---|
45 | item = line.split("\t", -1);
|
---|
46 | //Check items to be equal
|
---|
47 | if( item[1].equals("allow") &&
|
---|
48 | item[2].equals("pd") &&
|
---|
49 | item[19].equals(inputType) &&
|
---|
50 | (item[24].equals("open")||item[24].equals("page"))){
|
---|
51 | //Encode id
|
---|
52 | String idEncoded = item[0];
|
---|
53 | //idEncoded = idEncoded.replaceAll(":", "+").replaceAll("/", "=");
|
---|
54 | //Write item to file
|
---|
55 | fw.write(idEncoded + "\n");
|
---|
56 | }
|
---|
57 | }
|
---|
58 | buf.close();
|
---|
59 | fw.close();
|
---|
60 | }
|
---|
61 | }catch(Exception e){
|
---|
62 | e.printStackTrace();
|
---|
63 | }
|
---|
64 | }
|
---|
65 | }
|
---|
66 |
|
---|
67 | //REFERNECES
|
---|
68 | //https://www.javatpoint.com/java-filewriter-class
|
---|
69 | //https://docs.oracle.com/javase/8/docs/api/index.html?java/io/FileWriter.html
|
---|
70 | //Email from supervisor (Compx520 ID Error Apr 24)
|
---|