Changeset 31028 for other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java
- Timestamp:
- 2016-11-02T14:17:45+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java
r31027 r31028 6 6 import java.io.IOException; 7 7 import java.io.Serializable; 8 import java.util.ArrayList; 9 8 10 import org.apache.commons.cli.*; 9 11 10 12 import org.apache.spark.api.java.*; 11 13 import org.apache.spark.util.DoubleAccumulator; 14 import org.apache.spark.util.LongAccumulator; 12 15 import org.hathitrust.extractedfeatures.PerPageJSONFlatmap; 13 16 import org.json.JSONObject; … … 103 106 jsc.close(); 104 107 } 105 108 public ArrayList<String> extrapolateSolrEndpoints() 109 { 110 ArrayList<String> solr_endpoints = new ArrayList<String>(); 111 112 String solr_endpoint_ips = System.getProperty("wcsa-ef-ingest.solr-endpoint-ips",null); 113 if (solr_endpoint_ips != null) { 114 String [] ips = solr_endpoint_ips.split(","); 115 for (String ip : ips) { 116 String solr_endpoint = _solr_url.replaceFirst("//.*?:", "//"+ip+":"); 117 solr_endpoints.add(solr_endpoint); 118 } 119 } 120 else { 121 if (_solr_url != null) { 122 solr_endpoints.add(_solr_url); 123 } 124 } 125 126 return solr_endpoints; 127 } 128 106 129 public void execPerPage() 107 130 { … … 129 152 //long num_page_ids = per_page_jsonobjects.count(); // trigger lazy eval of: flatmap:per-vol 130 153 131 DoubleAccumulator per_page_progress_accum = jsc.sc().doubleAccumulator("Pages Processed"); 154 LongAccumulator per_page_progress_accum = jsc.sc().longAccumulator("Pages Processed"); 155 ArrayList<String> solr_endpoints = extrapolateSolrEndpoints(); 132 156 133 157 PerPageJSONMap paged_json_id_map 134 = new PerPageJSONMap(_input_dir, _solr_url,_output_dir,_verbosity, per_page_progress_accum,1.0);158 = new PerPageJSONMap(_input_dir,solr_endpoints,_output_dir,_verbosity, per_page_progress_accum,1); 135 159 JavaRDD<String> per_page_ids = per_page_jsonobjects.map(paged_json_id_map); 136 160 … … 219 243 220 244 221 String verbosity_str = cmd.getOptionValue("verbosity"," 0");245 String verbosity_str = cmd.getOptionValue("verbosity","1"); 222 246 int verbosity = Integer.parseInt(verbosity_str); 223 247 … … 269 293 = new ProcessForSolrIngest(input_dir,json_list_filename,solr_url,output_dir,verbosity); 270 294 271 String process_ json_mode = System.getProperty("ef-solr.process-json-mode","per-page");272 if (process_ json_mode.equals("per-volume")) {295 String process_ef_json_mode = System.getProperty("wcsa-ef-ingest.process-ef-json-mode","per-page"); 296 if (process_ef_json_mode.equals("per-volume")) { 273 297 prep_for_ingest.execPerVolume(); 274 298 }
Note:
See TracChangeset
for help on using the changeset viewer.