Changeset 31375 for other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java
- Timestamp:
- 2017-01-31T21:35:50+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java
r31374 r31375 21 21 private static final long serialVersionUID = 1L; 22 22 23 // Following details on number of partitions to use given in24 // "Parallelized collections" section of:25 // https://spark.apache.org/docs/2.0.1/programming-guide.html26 //27 // For a more detailed discussion see:28 // http://blog.cloudera.com/blog/2015/03/how-to-tune-your-apache-spark-jobs-part-2/29 30 23 protected static final int DEFAULT_NUM_CORES = 10; 31 24 protected static final int MINIMUM_NUM_PARTITIONS = 10*DEFAULT_NUM_CORES; … … 36 29 //protected String _json_list_filename; 37 30 protected String _whitelist_filename; 31 protected String _langmap_directory; 32 38 33 protected String _solr_url; 39 34 protected String _output_dir; … … 49 44 boolean use_whitelist = Boolean.getBoolean("wcsa-ef-ingest.use-whitelist"); 50 45 _whitelist_filename = (use_whitelist) ? System.getProperty("wcsa-ef-ingest.whitelist-filename") : null; 46 47 boolean use_langmap = Boolean.getBoolean("wcsa-ef-ingest.use-langmap"); 48 _langmap_directory = (use_langmap) ? System.getProperty("wcsa-ef-ingest.langmap-directory") : null; 49 51 50 52 51 _solr_url = solr_url; … … 110 109 boolean strict_file_io = Boolean.getBoolean("wcsa-ef-ingest.strict-file-io"); 111 110 112 PerVolumeJSON per_vol_json = new PerVolumeJSON(_input_dir,_whitelist_filename, 111 PerVolumeJSON per_vol_json = new PerVolumeJSON(_input_dir,_whitelist_filename, _langmap_directory, 113 112 _solr_url,_output_dir,_verbosity, 114 113 icu_tokenize,strict_file_io);
Note:
See TracChangeset
for help on using the changeset viewer.