Changeset 31220 for other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/SolrDocJSON.java
- Timestamp:
- 2016-12-12T20:18:04+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/SolrDocJSON.java
r31176 r31220 16 16 public class SolrDocJSON { 17 17 18 protected static String generateSolrText(JSONObject ef_token_pos_count )18 protected static String generateSolrText(JSONObject ef_token_pos_count, WhitelistBloomFilter whitelist_bloomfilter) 19 19 { 20 20 StringBuilder sb = new StringBuilder(); 21 21 22 22 Iterator<String> token_iter = ef_token_pos_count.keys(); 23 while (token_iter.hasNext()) { 24 String token = token_iter.next(); 25 26 sb.append(token); 27 if (token_iter.hasNext()) { 28 sb.append(" "); 29 } 30 } 31 23 24 if (whitelist_bloomfilter == null) { 25 26 while (token_iter.hasNext()) { 27 String token = token_iter.next(); 28 sb.append(token); 29 if (token_iter.hasNext()) { 30 sb.append(" "); 31 } 32 } 33 } 34 else { 35 while (token_iter.hasNext()) { 36 String token = token_iter.next(); 37 if (whitelist_bloomfilter.contains(token)) { 38 sb.append(token); 39 if (token_iter.hasNext()) { 40 sb.append(" "); 41 } 42 } 43 } 44 45 } 32 46 /* 33 47 Set<String> token_keys = ef_token_pos_count.keySet(); … … 40 54 } 41 55 42 protected static JSONObject generateSolrDocJSON(String volume_id, String page_id, JSONObject ef_page) 56 protected static JSONObject generateSolrDocJSON(String volume_id, String page_id, JSONObject ef_page, 57 WhitelistBloomFilter whitelist_bloomfilter) 43 58 { 44 59 JSONObject solr_update_json = null; … … 52 67 JSONObject solr_add_json = new JSONObject(); 53 68 54 String text = generateSolrText(ef_token_pos_count );69 String text = generateSolrText(ef_token_pos_count,whitelist_bloomfilter); 55 70 56 71 JSONObject solr_doc_json = new JSONObject(); 57 72 solr_doc_json.put("id", page_id); 58 73 solr_doc_json.put("volumeid_s", volume_id); 59 solr_doc_json.put("eftext_txt", text); 60 74 if (!text.equals("")) { 75 solr_doc_json.put("eftext_txt", text); 76 } 77 else { 78 solr_doc_json.put("efnotext_b", true); 79 } 61 80 solr_add_json.put("commitWithin", 5000); 62 81 solr_add_json.put("doc", solr_doc_json);
Note:
See TracChangeset
for help on using the changeset viewer.