Changeset 31001 for other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/extractedfeatures/PagedJSON.java
- Timestamp:
- 2016-10-30T23:51:07+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/extractedfeatures/PagedJSON.java
r30997 r31001 9 9 import java.net.URL; 10 10 import java.util.ArrayList; 11 import java.util.Iterator; 11 12 import java.util.Set; 12 13 … … 28 29 29 30 30 //class PagedJSON implements FlatMapFunction<String, String>31 public class PagedJSON implements VoidFunction<String>31 class PagedJSON implements FlatMapFunction<String, JSONObject>, VoidFunction<JSONObject> 32 //public class PagedJSON implements VoidFunction<String> 32 33 { 33 34 private static final long serialVersionUID = 1L; … … 53 54 } 54 55 55 public static void saveSolrDoc(JSONObject solr_add_doc_json, String output_file_json_bz2)56 {57 try {58 BufferedWriter bw = ClusterFileIO.getBufferedWriterForCompressedFile(output_file_json_bz2);59 bw.write(solr_add_doc_json.toString());60 bw.close();61 } catch (IOException e) {62 e.printStackTrace();63 } catch (CompressorException e) {64 e.printStackTrace();65 }66 }67 56 68 public static void postSolrDoc(String post_url, JSONObject solr_add_doc_json)69 {70 71 //String curl_popen = "curl -X POST -H 'Content-Type: application/json'";72 //curl_popen += " 'http://10.11.0.53:8983/solr/htrc-pd-ef/update'";73 //curl_popen += " --data-binary '";74 //curl_popen += "'"75 76 77 try {78 HttpURLConnection httpcon = (HttpURLConnection) ((new URL(post_url).openConnection()));79 httpcon.setDoOutput(true);80 httpcon.setRequestProperty("Content-Type", "application/json");81 httpcon.setRequestProperty("Accept", "application/json");82 httpcon.setRequestMethod("POST");83 httpcon.connect();84 85 byte[] outputBytes = solr_add_doc_json.toString().getBytes("UTF-8");86 OutputStream os = httpcon.getOutputStream();87 os.write(outputBytes);88 os.close();89 90 91 // Read response92 StringBuilder sb = new StringBuilder();93 BufferedReader in = new BufferedReader(new InputStreamReader(httpcon.getInputStream()));94 String decodedString;95 while ((decodedString = in.readLine()) != null) {96 sb.append(decodedString);97 }98 in.close();99 100 JSONObject solr_status_json = new JSONObject(sb.toString());101 JSONObject response_header_json = solr_status_json.getJSONObject("responseHeader");102 if (response_header_json != null) {103 int status = response_header_json.getInt("status");104 if (status != 0) {105 System.err.println("Warning: POST request to " + post_url + " returned status " + status);106 System.err.println("Full response was: " + sb);107 }108 }109 else {110 System.err.println("Failed response to Solr POST: " + sb);111 }112 113 114 115 }116 catch (Exception e) {117 e.printStackTrace();118 }119 120 }121 57 122 //public Iterator<String> call(String json_file_in)123 public void call(String json_file_in)58 public Iterator<JSONObject> call(String json_file_in) 59 //public void call(String json_file_in) 124 60 { 125 61 JSONObject extracted_feature_record = JSONClusterFileIO.readJSONFile(_input_dir + "/" + json_file_in); … … 148 84 ClusterFileIO.createDirectoryAll(_output_dir + "/" + page_json_dir); 149 85 150 ArrayList< String> ids = new ArrayList<String>(ef_num_pages);86 ArrayList<JSONObject> json_pages = new ArrayList<JSONObject>(ef_num_pages); 151 87 for (int i = 0; i < ef_page_count; i++) { 152 88 String formatted_i = String.format("page-%06d", i); … … 158 94 159 95 String output_json_bz2 = page_json_dir +"/" + formatted_i + ".json.bz2"; 160 ids.add(output_json_bz2);96 //ids.add(output_json_bz2); 161 97 162 98 if (i==0) { … … 165 101 166 102 JSONObject ef_page = ef_pages.getJSONObject(i); 167 103 168 104 if (ef_page != null) { 169 105 // Convert to Solr add form 170 106 JSONObject solr_add_doc_json = JSONSolrTransform.generateSolrDocJSON(volume_id, page_id, ef_page); 171 107 solr_add_doc_json.put("filename_json_bz2", output_json_bz2); 108 109 json_pages.add(solr_add_doc_json); 172 110 173 if ((_verbosity >=2) && (i==20)) {174 System.out.println("==================");175 System.out.println("Sample output Solr add JSON [page 20]: " + solr_add_doc_json.toString());176 System.out.println("==================");177 }178 111 179 180 if (_solr_url != null) {181 if ((_verbosity >=2) && (i==20)) {182 System.out.println("==================");183 System.out.println("Posting to: " + _solr_url);184 System.out.println("==================");185 }186 postSolrDoc(_solr_url, solr_add_doc_json);187 }188 189 if (_output_dir != null) {190 if ((_verbosity >=2) && (i==20)) {191 System.out.println("==================");192 System.out.println("Saving to: " + _output_dir);193 System.out.println("==================");194 }195 saveSolrDoc(solr_add_doc_json, _output_dir + "/" + output_json_bz2);196 }197 112 } 198 113 else { … … 203 118 204 119 205 ids.add(volume_id);120 //ids.add(volume_id); 206 121 _progress_accum.add(_progress_step); 207 122 208 123 //return ids.iterator(); 124 return json_pages.iterator(); 209 125 } 126 127 public void call(JSONObject solr_add_doc_json) 128 { 129 String output_json_bz2 = solr_add_doc_json.getString("filename_json_bz2"); 130 solr_add_doc_json.remove("filename_json_bz2"); 131 132 boolean random_test = (Math.random()>0.999); // every 1000 133 134 if ((_verbosity >=2) && (random_test)) { 135 System.out.println("=================="); 136 System.out.println("Sample output Solr add JSON [random test 1/1000]: " + solr_add_doc_json.toString()); 137 System.out.println("=================="); 138 } 139 140 141 if (_solr_url != null) { 142 if ((_verbosity >=2) && (random_test)) { 143 System.out.println("=================="); 144 System.out.println("Posting to: " + _solr_url); 145 System.out.println("=================="); 146 } 147 JSONSolrTransform.postSolrDoc(_solr_url, solr_add_doc_json); 148 } 149 150 if (_output_dir != null) { 151 if ((_verbosity >=2) && (random_test)) { 152 System.out.println("=================="); 153 System.out.println("Saving to: " + _output_dir); 154 System.out.println("=================="); 155 } 156 JSONSolrTransform.saveSolrDoc(solr_add_doc_json, _output_dir + "/" + output_json_bz2); 157 } 158 } 159 210 160 } 211 161
Note:
See TracChangeset
for help on using the changeset viewer.