source: other-projects/is-sheet-music-encore/trunk/java-gen-corpus/javaGenValidIDList.java@ 33437

Last change on this file since 33437 was 33437, checked in by cpb16, 5 years ago

made progress with morphology. Need to have a better area dimension threshold setup

File size: 3.9 KB
Line 
1//NAME:Caleb Bird
2//ID: 1289680
3//??References??
4
5import java.io.BufferedReader;
6import java.io.FileReader;
7import java.io.FileWriter;
8import java.io.*;
9import java.util.*;
10
11
12//Gets Textfile with all volumes with XX (MU for example)
13//Checks if item has ten pages, and gets first ten pages.
14//Writes list of IDs with more than ten pages.
15public class javaGenValidIDList {
16 public static void main(String[] args) {
17 try{
18 if (args.length != 3){
19 System.out.println("Usage: javaGenValidIDList <inputFilename> <outputCount> <outputFilename>");
20 }
21 else{
22 //|||PART1|||
23 String inputFilename = args[0];
24 int outputCount = Integer.parseInt(args[1]);
25 String outputFilename = args[2];
26 BufferedReader buf = new BufferedReader(new FileReader(inputFilename));
27 FileWriter fw = new FileWriter(outputFilename);
28 ArrayList<String> list = new ArrayList<String>();
29 String line = null;
30 String[] item;
31
32 //Add items to Array
33 while ((line = buf.readLine()) != null) {
34 //Split line by tab
35 item = line.split("\t", -1);
36 //Add first element (ID)
37 list.add(item[0]);
38 }
39 //randomize list
40 Collections.shuffle(list);
41
42 //||||PART2||||
43 //Go thru list, check if ID has more than 10 pages,
44 //Keep looping until spesified outputCount has been meet.
45
46 int countMatchingCrit = 0;
47 int j = 0;
48 ArrayList<String> subList = new ArrayList<String>();
49
50 while(countMatchingCrit < outputCount){
51 System.out.println("Processing item: " + j);
52 //current ID being processed
53 String idCurr = list.get(j);
54 //name of bashscript and the arguement
55 String cmd = "./download_metadata_temp.sh " + idCurr;
56 //run download script
57 Process p = Runtime.getRuntime().exec(cmd);
58
59 p.waitFor();
60 int exitStatus = p.exitValue();
61 //if script execution was successful
62 if(exitStatus == 0){
63 //Get numpage value for idCurr
64 int numpages = getNumPages(idCurr);
65 //if numpages greater than 10 then add this recorded to sublist
66 if(numpages >= 10){
67 System.out.println("Successful items: " + countMatchingCrit);
68 countMatchingCrit++;
69 subList.add(idCurr);
70 }
71 }
72 else{
73 System.err.println("failed to run:" + cmd);
74 System.exit(exitStatus);
75 }
76 j++;
77 if(j >= list.size()){break;}
78 }
79
80 //Write these to new file
81 for(int i =0; i < subList.size(); i++){
82 fw.write(subList.get(i) + '\n');
83 }
84
85 buf.close();
86 fw.close();
87 }
88 }catch(Exception e){
89 e.printStackTrace();
90 }
91 }
92 //Isolate numpages value in the downloaded xml file (script downloaded this)
93 private static int getNumPages(String id){
94 String numpages = null;
95 try{
96 //Variables
97 String inputFilename = "metadata_temp.xml";
98 FileReader fileReader = new FileReader(inputFilename);
99 BufferedReader buf = new BufferedReader(fileReader);
100 String line = null;
101 String[] item;
102
103 //Splits into each record
104 while ((line = buf.readLine()) != null) {
105 if(line.contains("<htd:numpages>")){
106 //Iscolate and store the page number
107 numpages = line.substring(line.indexOf(">")+1, line.lastIndexOf("<"));
108 }
109 }
110 buf.close();
111 }catch(Exception e){
112 e.printStackTrace();
113 }
114 return Integer.parseInt(numpages);
115 }
116}
117// Returns 42474
118
119//REFERNECES
120//https://stackoverflow.com/questions/12892665/how-to-capture-the-exit-status-of-a-shell-command-in-java
121//https://www.javatpoint.com/java-filewriter-class
122//https://docs.oracle.com/javase/8/docs/api/index.html?java/io/FileWriter.html
123//https://www.geeksforgeeks.org/randomly-select-items-from-a-list-in-java/
124//https://codereview.stackexchange.com/questions/146551/picking-10-distinct-words-randomly-from-list-of-unique-words
125//http://www.linuxforums.org/forum/programming-scripting/65117-c-c-system-function-analog-java.html
126
127//USE RUN-LIST.txt (modify file it reads using $1 (terminal entry variable (filename))
128
Note: See TracBrowser for help on using the repository browser.