source: other-projects/is-sheet-music-encore/trunk/java-gen-corpus/javaGenFullIDList.java@ 33437

Last change on this file since 33437 was 33437, checked in by cpb16, 5 years ago

made progress with morphology. Need to have a better area dimension threshold setup

File size: 2.3 KB
Line 
1//NAME:Caleb Bird
2//ID: 1289680
3//??References??
4
5import java.io.BufferedReader;
6import java.io.FileReader;
7import java.io.FileWriter;
8
9//Creates textfile of all records tagged with XX (MU for example)
10//Used on the textfile that contains every record in hathiTrust
11public class javaGenFullIDList {
12 public static void main(String[] args) {
13 try{
14
15
16 if (args.length != 3) {
17 System.out.println("Usage: TabProcTextGen <inputFilename> <outputFilename> <inputType>");
18 }
19 else {
20 String inputFilename = args[0];
21 String outputFilename = args[1];
22 String inputType = args[2];
23
24
25 System.out.println("Processing: " + inputFilename);
26
27 FileReader fileReader = new FileReader(inputFilename);
28 BufferedReader buf = new BufferedReader(fileReader);
29 //FileWriter fw = new FileWriter("HathiDocIDList.txt");
30 FileWriter fw = new FileWriter(outputFilename);
31 String line = null;
32 String[] item;
33
34 //Splits into each record, since readLine splits by "\n"
35 int line_num = 0;
36
37 while ((line = buf.readLine()) != null) {
38 line_num++;
39 if (line_num%100000 == 0) {
40 System.out.print(".");
41 System.out.flush();
42 }
43
44 //Splits by tab, leaves "" for every blank entry
45 item = line.split("\t", -1);
46 //Check items to be equal
47 if( item[1].equals("allow") && //allow - end users can view the item
48 item[2].equals("pd") && //pd - public domain
49 item[19].equals(inputType) && //category defined by user (MU, SE,etc)
50 (item[24].equals("open")||item[24].equals("page"))){ //open - Items with this value do not have any download restrictions.
51 //page - Items with this value can be viewed on the HathiTrust website. Users can download individual pages but cannot download the full pdf, regardless of member affiliation.
52 //Encode id
53 String idEncoded = item[0];
54 //idEncoded = idEncoded.replaceAll(":", "+").replaceAll("/", "=");
55 //Write item to file
56 fw.write(idEncoded + "\n");
57 }
58 }
59 buf.close();
60 fw.close();
61 }
62 }catch(Exception e){
63 e.printStackTrace();
64 }
65 }
66}
67
68//REFERNECES
69//https://www.javatpoint.com/java-filewriter-class
70//https://docs.oracle.com/javase/8/docs/api/index.html?java/io/FileWriter.html
71//Email from supervisor (Compx520 ID Error Apr 24)
Note: See TracBrowser for help on using the repository browser.