Changeset 30985 for other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PagedJSON.java
- Timestamp:
- 2016-10-29T16:17:22+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PagedJSON.java
r30984 r30985 14 14 import org.apache.commons.compress.compressors.CompressorException; 15 15 import org.apache.spark.api.java.function.FlatMapFunction; 16 import org.apache.spark.api.java.function.VoidFunction; 16 17 import org.apache.spark.util.DoubleAccumulator; 17 18 import org.json.JSONArray; … … 28 29 29 30 30 class PagedJSON implements FlatMapFunction<String, String> 31 //class PagedJSON implements FlatMapFunction<String, String> 32 class PagedJSON implements VoidFunction<String> 31 33 { 32 34 private static final long serialVersionUID = 1L; … … 226 228 String decodedString; 227 229 while ((decodedString = in.readLine()) != null) { 228 //System.out.println(decodedString);229 230 sb.append(decodedString); 230 231 } … … 252 253 253 254 } 254 public Iterator<String> call(String json_file_in) 255 256 //public Iterator<String> call(String json_file_in) 257 public void call(String json_file_in) 255 258 { 256 259 JSONObject extracted_feature_record = readJSONFile(json_file_in); … … 305 308 System.out.println("Sample output Solr add JSON [page 20]: " + solr_add_doc_json.toString()); 306 309 System.out.println("=================="); 307 //System.out.println("Sample text [page 20]: " + solr_add_doc_json.getString("_text_"));308 310 } 309 311 310 // create JSON obj of just the page (for now), and write it out311 // write out the JSONOBject as a bz2 compressed file312 /*313 try {314 BufferedWriter bw = ClusterFileIO.getBufferedWriterForCompressedFile(_output_dir + "/" + output_json_bz2);315 bw.write(ef_page.toString());316 bw.close();317 } catch (IOException e) {318 e.printStackTrace();319 } catch (CompressorException e) {320 e.printStackTrace();321 }322 */323 312 324 313 if (_solr_url != null) { … … 346 335 } 347 336 348 /*349 for (int i = 0; i < ef_num_pages; i++)350 {351 //String post_id = ef_pages.getJSONObject(i).getString("post_id");352 //......353 }354 */355 //String pageName = json_obj.getJSONObject("pageInfo").getString("pageName");356 /*357 JSONArray arr = obj.getJSONArray("posts");358 for (int i = 0; i < arr.length(); i++)359 {360 String post_id = arr.getJSONObject(i).getString("post_id");361 ......362 }363 */364 365 337 366 338 ids.add(volume_id); 367 339 _progress_accum.add(_progress_step); 368 return ids.iterator(); 340 341 //return ids.iterator(); 369 342 } 370 343 }
Note:
See TracChangeset
for help on using the changeset viewer.