Changeset 31252 for other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/SolrDocJSON.java
- Timestamp:
- 2016-12-20T14:15:05+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/SolrDocJSON.java
r31245 r31252 24 24 public class SolrDocJSON { 25 25 26 protected static ArrayList<String> getTokenPosCountWords(JSONObject ef_token_pos_count, String page_id )27 {28 boolean solr_icu_tokenize = true;26 protected static ArrayList<String> getTokenPosCountWords(JSONObject ef_token_pos_count, String page_id, 27 boolean icu_tokenize) 28 { 29 29 30 30 ArrayList<String> words = new ArrayList<String>(); … … 36 36 String token = token_iter.next(); 37 37 38 if ( solr_icu_tokenize == true) {38 if (icu_tokenize == true) { 39 39 Reader reader = new StringReader(token); 40 40 … … 79 79 80 80 protected static String generateSolrText(JSONObject ef_token_pos_count, String page_id, 81 WhitelistBloomFilter whitelist_bloomfilter )82 { 83 ArrayList<String> tokens = getTokenPosCountWords(ef_token_pos_count, page_id );81 WhitelistBloomFilter whitelist_bloomfilter, boolean icu_tokenize) 82 { 83 ArrayList<String> tokens = getTokenPosCountWords(ef_token_pos_count, page_id,icu_tokenize); 84 84 85 85 StringBuilder sb = new StringBuilder(); … … 125 125 126 126 protected static JSONObject generateSolrDocJSON(String volume_id, String page_id, JSONObject ef_page, 127 WhitelistBloomFilter whitelist_bloomfilter )127 WhitelistBloomFilter whitelist_bloomfilter, boolean icu_tokenize) 128 128 { 129 129 JSONObject solr_update_json = null; … … 137 137 JSONObject solr_add_json = new JSONObject(); 138 138 139 String text = generateSolrText(ef_token_pos_count,page_id,whitelist_bloomfilter );139 String text = generateSolrText(ef_token_pos_count,page_id,whitelist_bloomfilter,icu_tokenize); 140 140 141 141 JSONObject solr_doc_json = new JSONObject(); … … 213 213 } 214 214 215 protected static ArrayList<String> generateTokenPosCountText(String volume_id, String page_id, JSONObject ef_page) 215 protected static ArrayList<String> generateTokenPosCountText(String volume_id, String page_id, JSONObject ef_page, 216 boolean icu_tokenize) 216 217 { 217 218 ArrayList<String> word_list = null; … … 221 222 if (ef_body != null) { 222 223 JSONObject ef_token_pos_count = ef_body.getJSONObject("tokenPosCount"); 223 word_list = getTokenPosCountWords(ef_token_pos_count,page_id );224 word_list = getTokenPosCountWords(ef_token_pos_count,page_id,icu_tokenize); 224 225 } 225 226 else {
Note:
See TracChangeset
for help on using the changeset viewer.