Changeset 31176 for other-projects
- Timestamp:
- 2016-12-03T21:16:38+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/SolrDocJSON.java
r31007 r31176 8 8 import java.net.HttpURLConnection; 9 9 import java.net.URL; 10 import java.util.ArrayList; 10 11 import java.util.Iterator; 11 12 … … 123 124 } 124 125 126 protected static ArrayList<String> generateTokenPostCountText(String volume_id, String page_id, JSONObject ef_page) 127 { 128 ArrayList<String> word_list = new ArrayList<String>(); 129 130 if (ef_page != null) { 131 JSONObject ef_body = ef_page.getJSONObject("body"); 132 if (ef_body != null) { 133 JSONObject ef_token_pos_count = ef_body.getJSONObject("tokenPosCount"); 134 if (ef_token_pos_count != null) { 135 136 Iterator<String> token_iter = ef_token_pos_count.keys(); 137 while (token_iter.hasNext()) { 138 String token = token_iter.next(); 139 word_list.add(token); 140 } 141 } 142 else { 143 System.err.println("Warning: empty tokenPosCount field for '" + page_id + "'"); 144 } 145 } 146 else { 147 System.err.println("Warning: empty body field for '" + page_id + "'"); 148 } 149 150 } 151 else { 152 System.err.println("Warning: null page for '" + page_id + "'"); 153 } 154 155 return word_list; 156 } 157 125 158 public static void saveSolrDoc(JSONObject solr_add_doc_json, String output_file_json_bz2) 126 159 {
Note:
See TracChangeset
for help on using the changeset viewer.