1 | //NAME:Caleb Bird
|
---|
2 | //ID: 1289680
|
---|
3 | //??References??
|
---|
4 |
|
---|
5 | import java.io.BufferedReader;
|
---|
6 | import java.io.FileReader;
|
---|
7 | import java.io.FileWriter;
|
---|
8 | import java.io.*;
|
---|
9 | import java.util.*;
|
---|
10 |
|
---|
11 |
|
---|
12 | //Gets Textfile with all volumes with XX (MU for example)
|
---|
13 | //Checks if item has ten pages, and gets first ten pages.
|
---|
14 | //Writes list of IDs with more than ten pages.
|
---|
15 | public class javaGenValidIDList {
|
---|
16 | public static void main(String[] args) {
|
---|
17 | try{
|
---|
18 | if (args.length != 3){
|
---|
19 | System.out.println("Usage: javaGenValidIDList <inputFilename> <outputCount> <outputFilename>");
|
---|
20 | }
|
---|
21 | else{
|
---|
22 | //|||PART1|||
|
---|
23 | String inputFilename = args[0];
|
---|
24 | int outputCount = Integer.parseInt(args[1]);
|
---|
25 | String outputFilename = args[2];
|
---|
26 | BufferedReader buf = new BufferedReader(new FileReader(inputFilename));
|
---|
27 | FileWriter fw = new FileWriter(outputFilename);
|
---|
28 | ArrayList<String> list = new ArrayList<String>();
|
---|
29 | String line = null;
|
---|
30 | String[] item;
|
---|
31 |
|
---|
32 | //Add items to Array
|
---|
33 | while ((line = buf.readLine()) != null) {
|
---|
34 | //Split line by tab
|
---|
35 | item = line.split("\t", -1);
|
---|
36 | //Add first element (ID)
|
---|
37 | list.add(item[0]);
|
---|
38 | }
|
---|
39 | //randomize list
|
---|
40 | Collections.shuffle(list);
|
---|
41 |
|
---|
42 | //||||PART2||||
|
---|
43 | //Go thru list, check if ID has more than 10 pages,
|
---|
44 | //Keep looping until spesified outputCount has been meet.
|
---|
45 |
|
---|
46 | int countMatchingCrit = 0;
|
---|
47 | int j = 0;
|
---|
48 | ArrayList<String> subList = new ArrayList<String>();
|
---|
49 |
|
---|
50 | while(countMatchingCrit < outputCount){
|
---|
51 | System.out.println("Processing item: " + j);
|
---|
52 | //current ID being processed
|
---|
53 | String idCurr = list.get(j);
|
---|
54 | //name of bashscript and the arguement
|
---|
55 | String cmd = "./download_metadata_temp.sh " + idCurr;
|
---|
56 | //run download script
|
---|
57 | Process p = Runtime.getRuntime().exec(cmd);
|
---|
58 |
|
---|
59 | p.waitFor();
|
---|
60 | int exitStatus = p.exitValue();
|
---|
61 | //if script execution was successful
|
---|
62 | if(exitStatus == 0){
|
---|
63 | //Get numpage value for idCurr
|
---|
64 | int numpages = getNumPages(idCurr);
|
---|
65 | //if numpages greater than 10 then add this recorded to sublist
|
---|
66 | if(numpages >= 10){
|
---|
67 | System.out.println("Successful items: " + countMatchingCrit);
|
---|
68 | countMatchingCrit++;
|
---|
69 | subList.add(idCurr);
|
---|
70 | }
|
---|
71 | }
|
---|
72 | else{
|
---|
73 | System.err.println("failed to run:" + cmd);
|
---|
74 | System.exit(exitStatus);
|
---|
75 | }
|
---|
76 | j++;
|
---|
77 | if(j >= list.size()){break;}
|
---|
78 | }
|
---|
79 |
|
---|
80 | //Write these to new file
|
---|
81 | for(int i =0; i < subList.size(); i++){
|
---|
82 | fw.write(subList.get(i) + '\n');
|
---|
83 | }
|
---|
84 |
|
---|
85 | buf.close();
|
---|
86 | fw.close();
|
---|
87 | }
|
---|
88 | }catch(Exception e){
|
---|
89 | e.printStackTrace();
|
---|
90 | }
|
---|
91 | }
|
---|
92 | //Isolate numpages value in the downloaded xml file (script downloaded this)
|
---|
93 | private static int getNumPages(String id){
|
---|
94 | String numpages = null;
|
---|
95 | try{
|
---|
96 | //Variables
|
---|
97 | String inputFilename = "metadata_temp.xml";
|
---|
98 | FileReader fileReader = new FileReader(inputFilename);
|
---|
99 | BufferedReader buf = new BufferedReader(fileReader);
|
---|
100 | String line = null;
|
---|
101 | String[] item;
|
---|
102 |
|
---|
103 | //Splits into each record
|
---|
104 | while ((line = buf.readLine()) != null) {
|
---|
105 | if(line.contains("<htd:numpages>")){
|
---|
106 | //Iscolate and store the page number
|
---|
107 | numpages = line.substring(line.indexOf(">")+1, line.lastIndexOf("<"));
|
---|
108 | }
|
---|
109 | }
|
---|
110 | buf.close();
|
---|
111 | }catch(Exception e){
|
---|
112 | e.printStackTrace();
|
---|
113 | }
|
---|
114 | return Integer.parseInt(numpages);
|
---|
115 | }
|
---|
116 | }
|
---|
117 | // Returns 42474
|
---|
118 |
|
---|
119 | //REFERNECES
|
---|
120 | //https://stackoverflow.com/questions/12892665/how-to-capture-the-exit-status-of-a-shell-command-in-java
|
---|
121 | //https://www.javatpoint.com/java-filewriter-class
|
---|
122 | //https://docs.oracle.com/javase/8/docs/api/index.html?java/io/FileWriter.html
|
---|
123 | //https://www.geeksforgeeks.org/randomly-select-items-from-a-list-in-java/
|
---|
124 | //https://codereview.stackexchange.com/questions/146551/picking-10-distinct-words-randomly-from-list-of-unique-words
|
---|
125 | //http://www.linuxforums.org/forum/programming-scripting/65117-c-c-system-function-analog-java.html
|
---|
126 |
|
---|
127 | //USE RUN-LIST.txt (modify file it reads using $1 (terminal entry variable (filename))
|
---|
128 |
|
---|