Changeset 31334 for other-projects/hathitrust
- Timestamp:
- 2017-01-23T18:03:51+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/vol-checker/src/org/hathitrust/extractedfeatures/VolumeCheck.java
r31296 r31334 27 27 28 28 protected static int HASHMAP_INIT_SIZE = 13800000; 29 protected static String DATA_DIR="D:/cygwin64/home/davidb/research/code-managed/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest";30 protected static HashMap<String, Boolean> id_check_ = null;29 //protected static String DATA_DIR="D:/cygwin64/home/davidb/research/code-managed/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest"; 30 protected static HashMap<String,String> id_check_ = null; 31 31 32 32 … … 35 35 } 36 36 37 protected static final String opt_file_ext = ".json.bz2"; 38 39 protected String full_filename_to_tail(String full_filename) 40 { 41 String filename_tail = full_filename.substring(full_filename.lastIndexOf("/")+1); 42 return filename_tail; 43 } 44 45 protected String filename_tail_to_id(String filename_tail) 46 { 47 String id = null; 48 if (filename_tail.endsWith(opt_file_ext)) { 49 id = filename_tail.substring(0,filename_tail.lastIndexOf(opt_file_ext)); 50 } 51 else { 52 id = filename_tail; 53 } 54 55 id = id.replaceAll("\\+", ":").replaceAll("=", "/"); 56 57 return id; 58 } 59 37 60 protected void storeIDs(BufferedReader br) 38 61 { … … 40 63 String line; 41 64 42 String opt_file_ext = ".json.bz2";65 43 66 44 67 try { 45 68 69 System.err.print("Loading hashmap: "); 46 70 while ((line = br.readLine()) != null) { 47 71 48 String file_id = line.substring(line.lastIndexOf("/")+1); 49 String id = null; 50 if (file_id.endsWith(opt_file_ext)) { 51 id = file_id.substring(0,file_id.lastIndexOf(opt_file_ext)); 52 } 53 else { 54 id = file_id; 55 } 56 57 id = id.replaceAll("\\+", ":").replaceAll("=", "/"); 58 59 id_check_.put(id, true); 60 72 String full_json_filename = line; 73 String json_filename_tail = full_filename_to_tail(full_json_filename); 74 String id = filename_tail_to_id(json_filename_tail); 75 76 id_check_.put(id, full_json_filename); 77 78 61 79 if ((line_num % 100000) == 0) { 62 System.err.println("sample id = " + id); 63 System.err.println("Passed line: " + line_num); 80 //System.err.println("sample id = " + id); 81 //System.err.println("Passed line: " + line_num); 82 System.err.print("."); 64 83 } 65 84 line_num++; 66 85 67 86 } 87 System.err.println(" => done."); 68 88 } 69 89 catch (Exception e) { … … 79 99 80 100 if (id_check_ == null) { 81 id_check_ = new HashMap<String, Boolean>(HASHMAP_INIT_SIZE);101 id_check_ = new HashMap<String,String>(HASHMAP_INIT_SIZE); 82 102 83 103 String htrc_list_file = "htrc-ef-all-files.txt"; … … 97 117 } 98 118 } 99 100 101 102 103 104 } 105 119 } 120 121 protected void doRsyncDownload(String full_json_filename) 122 { 123 String json_filename_tail = full_filename_to_tail(full_json_filename); 124 //String cmd = "rsync -av data.analytics.hathitrust.org::features/" + full_json_filename + "."; 125 126 127 Runtime rt = Runtime.getRuntime(); 128 String[] command = {"rsync","-av","data.analytics.hathitrust.org::features/" + full_json_filename, "."}; 129 130 131 try { 132 Process proc = rt.exec(command); 133 134 /* 135 BufferedReader stdInput = new BufferedReader(new 136 InputStreamReader(proc.getInputStream())); 137 138 BufferedReader stdError = new BufferedReader(new 139 InputStreamReader(proc.getErrorStream())); 140 141 // read the output from the command 142 System.out.println("Here is the standard output of the command:\n"); 143 String s = null; 144 while ((s = stdInput.readLine()) != null) { 145 System.out.println(s); 146 } 147 148 // read any errors from the attempted command 149 System.out.println("Here is the standard error of the command (if any):\n"); 150 while ((s = stdError.readLine()) != null) { 151 System.out.println(s); 152 } 153 */ 154 155 proc.waitFor(); 156 System.err.println("*** Rsync finished"); 157 158 //System.out.println("Done."); 159 160 } 161 catch (Exception e) { 162 e.printStackTrace(); 163 } 164 165 } 106 166 /** 107 167 * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response) … … 112 172 String cgi_ids = request.getParameter("ids"); 113 173 String cgi_id = request.getParameter("id"); 174 String cgi_download_id = request.getParameter("download-id"); 114 175 115 176 if (cgi_ids != null) { … … 124 185 String id = ids[i]; 125 186 126 Boolean exists= id_check_.get(id);127 boolean status = ( exists != null) ? exists : false;187 String full_json_filename = id_check_.get(id); 188 boolean status = (full_json_filename != null); 128 189 129 190 if (i>0) { … … 139 200 140 201 String id = cgi_id; 141 Boolean exists= id_check_.get(id);142 boolean status = ( exists != null) ? exists : false;202 String full_json_filename = id_check_.get(id); 203 boolean status = (full_json_filename != null); 143 204 pw.append("{'" + id + "':" + status + "}"); 205 } 206 else if (cgi_download_id != null) { 207 String download_id = cgi_download_id; 208 String full_json_filename = id_check_.get(download_id); 209 boolean exists = (full_json_filename != null); 210 if (!exists) { 211 // Error 212 response.sendError(HttpServletResponse.SC_BAD_REQUEST, "The requested volume id does not exist."); 213 } 214 else { 215 // rsync -av data.analytics.hathitrust.org::features/{PATH-TO-FILE} . 216 217 doRsyncDownload(full_json_filename); 218 219 } 144 220 } 145 221 else {
Note:
See TracChangeset
for help on using the changeset viewer.