Changeset 31266 for other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java
- Timestamp:
- 2016-12-27T18:51:42+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java
r31252 r31266 2 2 3 3 import java.io.IOException; 4 import java.util.ArrayList; 5 import java.util.Iterator; 4 6 7 import org.apache.spark.api.java.function.FlatMapFunction; 5 8 import org.apache.spark.api.java.function.VoidFunction; 6 9 import org.apache.spark.util.DoubleAccumulator; … … 18 21 19 22 20 public class PerVolumeJSON implements VoidFunction<String> 23 //public class PerVolumeJSON implements VoidFunction<String> 24 public class PerVolumeJSON implements FlatMapFunction<String,String> 21 25 { 22 26 private static final long serialVersionUID = 1L; … … 58 62 } 59 63 60 //public Iterator<String> call(String json_file_in) 61 public void call(String json_file_in) throws IOException 64 //public void call(String json_file_in) throws IOException 65 public Iterator<String> call(String json_file_in) throws IOException 66 62 67 { 63 68 if ((_whitelist_filename != null) && (_whitelist_bloomfilter == null)) { … … 65 70 } 66 71 72 ArrayList<String> ids = null; 73 67 74 String full_json_file_in = _input_dir + "/" + json_file_in; 68 75 JSONObject extracted_feature_record = JSONClusterFileIO.readJSONFile(full_json_file_in); … … 94 101 } 95 102 96 //ArrayList<String>ids = new ArrayList<String>(ef_num_pages);103 ids = new ArrayList<String>(ef_num_pages); 97 104 for (int i = 0; i < ef_page_count; i++) { 98 105 String formatted_i = String.format("page-%06d", i); … … 104 111 105 112 String output_json_bz2 = page_json_dir +"/" + formatted_i + ".json.bz2"; 106 //ids.add(output_json_bz2); // ****113 ids.add(page_id); 107 114 108 115 if (i==0) { … … 164 171 _progress_accum.add(_progress_step); 165 172 166 //return ids.iterator();173 return ids.iterator(); 167 174 } 168 175 }
Note:
See TracChangeset
for help on using the changeset viewer.