//NAME:Caleb Bird //ID: 1289680 //??References?? import java.io.BufferedReader; import java.io.FileReader; import java.io.FileWriter; import java.io.*; import java.util.*; public class TabRndListGen { public static void main(String[] args) { try{ if (args.length != 3){ System.out.println("Usage: TabRndListGen "); } else{ String inputFilename = args[0]; int outputCount = Integer.parseInt(args[1]); String outputFilename = args[2]; BufferedReader buf = new BufferedReader(new FileReader(inputFilename)); FileWriter fw = new FileWriter(outputFilename); //FileWriter fw = new FileWriter("hathiRndIDList.txt"); //BufferedReader buf = new BufferedReader(new FileReader("hathiDocIDList.txt")); ArrayList list = new ArrayList(); String line = null; String[] item; //Add items to Array while ((line = buf.readLine()) != null) { //Split line by tab item = line.split("\t", -1); //Add first element (ID) list.add(item[0]); } //randomize list Collections.shuffle(list); int countMatchingCrit = 0; int j = 0; ArrayList subList = new ArrayList(); //Download meta record, check if numpages >=10 while(countMatchingCrit < outputCount){ System.out.println("Processing item: " + j); String idCurr = list.get(j); //run download script String cmd = "./download_metadata_temp.sh " + idCurr; Process p = Runtime.getRuntime().exec(cmd); /*BufferedReader br = new BufferedReader(new InputStreamReader(p.getInputStream())); while(br.ready()) { System.out.println(br.readLine()); }*/ p.waitFor(); int exitStatus = p.exitValue(); if(exitStatus == 0){ //Extract numpages int numpages = getNumPages(idCurr); if(numpages >= 10){ System.out.println("Successful items: " + countMatchingCrit); countMatchingCrit++; subList.add(idCurr); } } else{ System.err.println("failed to run:" + cmd); System.exit(exitStatus); } j++; if(j >= list.size()){break;} } //Write these to new file for(int i =0; i < subList.size(); i++){ fw.write(subList.get(i) + '\n'); } buf.close(); fw.close(); } }catch(Exception e){ e.printStackTrace(); } } private static int getNumPages(String id){ String numpages = null; try{ //Variables String inputFilename = "metadata_temp.xml"; FileReader fileReader = new FileReader(inputFilename); BufferedReader buf = new BufferedReader(fileReader); String line = null; String[] item; //Splits into each record while ((line = buf.readLine()) != null) { /* if(line.contains("")){ //Isoclate and store the id from the line idLine = line.substring(line.indexOf(">")+1, line.indexOf("")){ //Iscolate and store the page number numpages = line.substring(line.indexOf(">")+1, line.lastIndexOf("<")); } } buf.close(); //Check if there are more than 10 pages }catch(Exception e){ e.printStackTrace(); } return Integer.parseInt(numpages); } } // Returns 42474 //REFERNECES //https://stackoverflow.com/questions/12892665/how-to-capture-the-exit-status-of-a-shell-command-in-java //https://www.javatpoint.com/java-filewriter-class //https://docs.oracle.com/javase/8/docs/api/index.html?java/io/FileWriter.html //https://www.geeksforgeeks.org/randomly-select-items-from-a-list-in-java/ //https://codereview.stackexchange.com/questions/146551/picking-10-distinct-words-randomly-from-list-of-unique-words //http://www.linuxforums.org/forum/programming-scripting/65117-c-c-system-function-analog-java.html //USE RUN-LIST.txt (modify file it reads using $1 (terminal entry variable (filename))