[33009] | 1 | //NAME:Caleb Bird
|
---|
| 2 | //ID: 1289680
|
---|
| 3 | //??References??
|
---|
| 4 |
|
---|
| 5 | import java.io.BufferedReader;
|
---|
| 6 | import java.io.FileReader;
|
---|
| 7 | import java.io.FileWriter;
|
---|
| 8 |
|
---|
| 9 | public class TabProcTextGen {
|
---|
| 10 | public static void main(String[] args) {
|
---|
| 11 | try{
|
---|
| 12 |
|
---|
| 13 |
|
---|
| 14 | if (args.length != 3) {
|
---|
| 15 | System.out.println("Usage: TabProcTextGen <inputFilename> <outputFilename> <inputType>");
|
---|
| 16 | }
|
---|
| 17 | else {
|
---|
| 18 | String inputFilename = args[0];
|
---|
| 19 | String outputFilename = args[1];
|
---|
| 20 | String inputType = args[2];
|
---|
| 21 |
|
---|
| 22 |
|
---|
| 23 | System.out.println("Processing: " + inputFilename);
|
---|
| 24 |
|
---|
| 25 | FileReader fileReader = new FileReader(inputFilename);
|
---|
| 26 | BufferedReader buf = new BufferedReader(fileReader);
|
---|
| 27 | //FileWriter fw = new FileWriter("HathiDocIDList.txt");
|
---|
| 28 | FileWriter fw = new FileWriter(outputFilename);
|
---|
| 29 | String line = null;
|
---|
| 30 | String[] item;
|
---|
| 31 |
|
---|
| 32 | //Splits into each record, since readLine splits by "\n"
|
---|
| 33 | int line_num = 0;
|
---|
| 34 |
|
---|
| 35 | while ((line = buf.readLine()) != null) {
|
---|
| 36 | line_num++;
|
---|
| 37 | if (line_num%100000 == 0) {
|
---|
| 38 | System.out.print(".");
|
---|
| 39 | System.out.flush();
|
---|
| 40 | }
|
---|
| 41 |
|
---|
| 42 | //Splits by tab, leaves "" for every blank entry
|
---|
| 43 | item = line.split("\t", -1);
|
---|
| 44 | //Check items to be equal
|
---|
| 45 | if( item[1].equals("allow") &&
|
---|
| 46 | item[2].equals("pd") &&
|
---|
| 47 | item[19].equals(inputType) &&
|
---|
| 48 | (item[24].equals("open")||item[24].equals("page"))){
|
---|
[33044] | 49 | //Encode id
|
---|
| 50 | String idEncoded = item[0];
|
---|
| 51 | //idEncoded = idEncoded.replaceAll(":", "+").replaceAll("/", "=");
|
---|
[33009] | 52 | //Write item to file
|
---|
[33044] | 53 | fw.write(idEncoded + "\n");
|
---|
[33009] | 54 | }
|
---|
| 55 | }
|
---|
| 56 | buf.close();
|
---|
| 57 | fw.close();
|
---|
| 58 | }
|
---|
| 59 | }catch(Exception e){
|
---|
| 60 | e.printStackTrace();
|
---|
| 61 | }
|
---|
| 62 | }
|
---|
| 63 | }
|
---|
| 64 |
|
---|
[33031] | 65 | //REFERNECES
|
---|
[33009] | 66 | //https://www.javatpoint.com/java-filewriter-class
|
---|
| 67 | //https://docs.oracle.com/javase/8/docs/api/index.html?java/io/FileWriter.html
|
---|
[33044] | 68 | //Email from supervisor (Compx520 ID Error Apr 24)
|
---|