Changeset 30951 for other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PagedJSON.java
- Timestamp:
- 2016-10-26T17:54:44+13:00 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/PagedJSON.java
r30949 r30951 2 2 3 3 import java.io.BufferedReader; 4 import java.io.BufferedWriter; 5 import java.io.IOException; 4 6 import java.util.ArrayList; 5 7 import java.util.Iterator; 6 8 9 import org.apache.commons.compress.compressors.CompressorException; 7 10 import org.apache.spark.api.java.function.FlatMapFunction; 8 11 import org.json.JSONArray; … … 85 88 String json_dir = ClusterFileIO.removeSuffix(json_file_in,".json.bz2"); 86 89 String page_json_dir = json_dir + "/pages"; 87 //ClusterFileIO.createDirectoryAll(_output_dir + "/" + page_json_dir);88 System.out.println("mkdir: " + _output_dir + "/" + page_json_dir);90 ClusterFileIO.createDirectoryAll(_output_dir + "/" + page_json_dir); 91 //System.out.println("mkdir: " + _output_dir + "/" + page_json_dir); 89 92 90 93 ArrayList<String> ids = new ArrayList<String>(ef_num_pages); … … 97 100 } 98 101 99 // create JSON obj of just the page (for now) 100 // write it out 102 101 103 102 104 String output_json_bz2 = page_json_dir +"/" + formatted_i + ".json.bz2"; 103 104 105 ids.add(output_json_bz2); 105 106 … … 107 108 System.out.println("Sample output JSON page file: " + output_json_bz2); 108 109 } 110 111 // create JSON obj of just the page (for now), and write it out 112 JSONObject ef_page = ef_pages.getJSONObject(i); 113 try { 114 BufferedWriter bw = ClusterFileIO.getBufferedWriterForCompressedFile(output_json_bz2); 115 bw.write(ef_page.toString()); 116 bw.close(); 117 } catch (IOException e) { 118 e.printStackTrace(); 119 } catch (CompressorException e) { 120 e.printStackTrace(); 121 } 122 123 109 124 } 110 125
Note:
See TracChangeset
for help on using the changeset viewer.