Changeset 31011
- Timestamp:
- 2016-10-31T15:40:36+13:00 (6 years ago)
- Location:
- other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/extractedfeatures
- Files:
-
- 2 added
- 1 deleted
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java
r31007 r31011 1 1 package org.hathitrust.extractedfeatures; 2 2 3 import java.io.BufferedReader;4 import java.io.BufferedWriter;5 import java.io.IOException;6 import java.io.InputStreamReader;7 import java.io.OutputStream;8 import java.net.HttpURLConnection;9 import java.net.URL;10 import java.util.ArrayList;11 import java.util.Set;12 13 import org.apache.commons.compress.compressors.CompressorException;14 import org.apache.spark.api.java.function.FlatMapFunction;15 3 import org.apache.spark.api.java.function.VoidFunction; 16 4 import org.apache.spark.util.DoubleAccumulator; -
other-projects/hathitrust/solr-extracted-features/trunk/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java
r31010 r31011 119 119 DoubleAccumulator progress_accum = jsc.sc().doubleAccumulator("Progress Percent"); 120 120 121 PerPageJSONFlatmap paged_ json= new PerPageJSONFlatmap(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol);122 JavaRDD<JSONObject> json_per_page_ids = json_list_data.flatMap(paged_json).cache();123 124 PerPageJSON Foreach paged_json_foreach = new PerPageJSONForeach(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol);125 json_per_page_ids.foreach(paged_json_foreach);121 PerPageJSONFlatmap paged_solr_json_flatmap = new PerPageJSONFlatmap(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol); 122 JavaRDD<JSONObject> per_page_jsonobjects = json_list_data.flatMap(paged_solr_json_flatmap).cache(); 123 124 PerPageJSONMap paged_json_id_map = new PerPageJSONMap(_input_dir,_solr_url,_output_dir,_verbosity, progress_accum,per_vol); 125 JavaRDD<String> per_page_ids = per_page_jsonobjects.map(paged_json_id_map); 126 126 127 127 /* … … 133 133 */ 134 134 135 long num_page_ids = json_per_page_ids.count();135 long num_page_ids = per_page_ids.count(); // trigger lazy eval of: flatmap:per-vol -> map:per-page 136 136 137 137 System.out.println("");
Note:
See TracChangeset
for help on using the changeset viewer.