1 | //NAME:Caleb Bird
|
---|
2 | //ID: 1289680
|
---|
3 | //??References??
|
---|
4 |
|
---|
5 | import java.io.BufferedReader;
|
---|
6 | import java.io.FileReader;
|
---|
7 | import java.io.FileWriter;
|
---|
8 | import java.io.*;
|
---|
9 | import java.util.*;
|
---|
10 |
|
---|
11 |
|
---|
12 | public class javaGenValidIDList {
|
---|
13 | public static void main(String[] args) {
|
---|
14 | try{
|
---|
15 | if (args.length != 3){
|
---|
16 | System.out.println("Usage: TabRndListGen <inputFilename> <outputCount> <outputFilename>");
|
---|
17 | }
|
---|
18 | else{
|
---|
19 | //|||PART1|||
|
---|
20 | String inputFilename = args[0];
|
---|
21 | int outputCount = Integer.parseInt(args[1]);
|
---|
22 | String outputFilename = args[2];
|
---|
23 | BufferedReader buf = new BufferedReader(new FileReader(inputFilename));
|
---|
24 | FileWriter fw = new FileWriter(outputFilename);
|
---|
25 | ArrayList<String> list = new ArrayList<String>();
|
---|
26 | String line = null;
|
---|
27 | String[] item;
|
---|
28 |
|
---|
29 | //Add items to Array
|
---|
30 | while ((line = buf.readLine()) != null) {
|
---|
31 | //Split line by tab
|
---|
32 | item = line.split("\t", -1);
|
---|
33 | //Add first element (ID)
|
---|
34 | list.add(item[0]);
|
---|
35 | }
|
---|
36 | //randomize list
|
---|
37 | Collections.shuffle(list);
|
---|
38 |
|
---|
39 | //||||PART2||||
|
---|
40 | //Go thru list, check if ID has more than 10 pages,
|
---|
41 | //Keep looping until spesified outputCount has been meet.
|
---|
42 |
|
---|
43 | int countMatchingCrit = 0;
|
---|
44 | int j = 0;
|
---|
45 | ArrayList<String> subList = new ArrayList<String>();
|
---|
46 |
|
---|
47 | while(countMatchingCrit < outputCount){
|
---|
48 | System.out.println("Processing item: " + j);
|
---|
49 | //current ID being processed
|
---|
50 | String idCurr = list.get(j);
|
---|
51 | //name of bashscript and the arguement
|
---|
52 | String cmd = "./download_metadata_temp.sh " + idCurr;
|
---|
53 | //run download script
|
---|
54 | Process p = Runtime.getRuntime().exec(cmd);
|
---|
55 |
|
---|
56 | p.waitFor();
|
---|
57 | int exitStatus = p.exitValue();
|
---|
58 | //if script execution was successful
|
---|
59 | if(exitStatus == 0){
|
---|
60 | //Get numpage value for idCurr
|
---|
61 | int numpages = getNumPages(idCurr);
|
---|
62 | //if numpages greater than 10 then add this recorded to sublist
|
---|
63 | if(numpages >= 10){
|
---|
64 | System.out.println("Successful items: " + countMatchingCrit);
|
---|
65 | countMatchingCrit++;
|
---|
66 | subList.add(idCurr);
|
---|
67 | }
|
---|
68 | }
|
---|
69 | else{
|
---|
70 | System.err.println("failed to run:" + cmd);
|
---|
71 | System.exit(exitStatus);
|
---|
72 | }
|
---|
73 | j++;
|
---|
74 | if(j >= list.size()){break;}
|
---|
75 | }
|
---|
76 |
|
---|
77 | //Write these to new file
|
---|
78 | for(int i =0; i < subList.size(); i++){
|
---|
79 | fw.write(subList.get(i) + '\n');
|
---|
80 | }
|
---|
81 |
|
---|
82 | buf.close();
|
---|
83 | fw.close();
|
---|
84 | }
|
---|
85 | }catch(Exception e){
|
---|
86 | e.printStackTrace();
|
---|
87 | }
|
---|
88 | }
|
---|
89 | //Isolate numpages value in the downloaded xml file (script downloaded this)
|
---|
90 | private static int getNumPages(String id){
|
---|
91 | String numpages = null;
|
---|
92 | try{
|
---|
93 | //Variables
|
---|
94 | String inputFilename = "metadata_temp.xml";
|
---|
95 | FileReader fileReader = new FileReader(inputFilename);
|
---|
96 | BufferedReader buf = new BufferedReader(fileReader);
|
---|
97 | String line = null;
|
---|
98 | String[] item;
|
---|
99 |
|
---|
100 | //Splits into each record
|
---|
101 | while ((line = buf.readLine()) != null) {
|
---|
102 | if(line.contains("<htd:numpages>")){
|
---|
103 | //Iscolate and store the page number
|
---|
104 | numpages = line.substring(line.indexOf(">")+1, line.lastIndexOf("<"));
|
---|
105 | }
|
---|
106 | }
|
---|
107 | buf.close();
|
---|
108 | }catch(Exception e){
|
---|
109 | e.printStackTrace();
|
---|
110 | }
|
---|
111 | return Integer.parseInt(numpages);
|
---|
112 | }
|
---|
113 | }
|
---|
114 | // Returns 42474
|
---|
115 |
|
---|
116 | //REFERNECES
|
---|
117 | //https://stackoverflow.com/questions/12892665/how-to-capture-the-exit-status-of-a-shell-command-in-java
|
---|
118 | //https://www.javatpoint.com/java-filewriter-class
|
---|
119 | //https://docs.oracle.com/javase/8/docs/api/index.html?java/io/FileWriter.html
|
---|
120 | //https://www.geeksforgeeks.org/randomly-select-items-from-a-list-in-java/
|
---|
121 | //https://codereview.stackexchange.com/questions/146551/picking-10-distinct-words-randomly-from-list-of-unique-words
|
---|
122 | //http://www.linuxforums.org/forum/programming-scripting/65117-c-c-system-function-analog-java.html
|
---|
123 |
|
---|
124 | //USE RUN-LIST.txt (modify file it reads using $1 (terminal entry variable (filename))
|
---|
125 |
|
---|