Changeset 31308
- Timestamp:
- 2017-01-20T20:33:39+13:00 (7 years ago)
- Location:
- other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForWhitelist.java
r31264 r31308 80 80 DoubleAccumulator per_vol_progress_accum = jsc.sc().doubleAccumulator("Per Volume Progress Percent"); 81 81 82 boolean icu_tokenize = Boolean.getBoolean("wcsa-ef-ingest.icu-tokenize"); 82 83 boolean strict_file_io = Boolean.getBoolean("wcsa-ef-ingest.strict-file-io"); 83 boolean icu_tokenize = Boolean.getBoolean("wcsa-ef-ingest.icu-tokenize"); 84 84 85 //System.err.println("***** icu_tokenize = " + icu_tokenize); 86 //System.err.println("***** num_part = " + num_partitions); 87 85 88 PerVolumeWordStreamFlatmap paged_solr_wordfreq_flatmap 86 89 = new PerVolumeWordStreamFlatmap(_input_dir,_verbosity, -
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/SolrDocJSON.java
r31274 r31308 38 38 String word_token = word_token_iter.next(); 39 39 40 if (icu_tokenize == true) {40 if (icu_tokenize) { 41 41 Reader reader = new StringReader(word_token); 42 42
Note:
See TracChangeset
for help on using the changeset viewer.