Changeset 31675 for other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java
- Timestamp:
- 2017-05-11T22:19:06+12:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java
r31597 r31675 134 134 } 135 135 } 136 /*136 137 137 // 138 138 // Now move on to POS extracted features per-page 139 139 // 140 JSONObject ef_features = extracted_feature_record.getJSONObject("features"); 141 142 int ef_page_count = ef_features.getInt("pageCount"); 143 144 if (_verbosity >= 1) { 145 System.out.println("Processing: " + volume_id); 146 System.out.println(" pageCount = " + ef_page_count); 147 } 148 149 JSONArray ef_pages = ef_features.getJSONArray("pages"); 150 ef_num_pages = ef_pages.length(); 151 152 153 for (int i = 0; i < ef_page_count; i++) { 154 String formatted_i = String.format("page-%06d", i); 155 String page_id = volume_id + "." + formatted_i; 156 157 if (_verbosity >= 2) { 158 System.out.println(" Page: " + page_id); 159 } 160 161 162 JSONObject ef_page = ef_pages.getJSONObject(i); 163 164 if (ef_page != null) { 165 // Convert to Solr add form 166 JSONObject solr_add_doc_json 167 = SolrDocJSON.generateSolrDocJSON(volume_id, page_id, ef_page, _whitelist_bloomfilter, _universal_langmap, _icu_tokenize); 168 169 170 if ((_verbosity >=2) && (i==20)) { 171 System.out.println("=================="); 172 System.out.println("Sample output Solr add JSON [page 20]: " + solr_add_doc_json.toString()); 173 System.out.println("=================="); 174 } 175 176 177 if (solr_url != null) { 178 SolrDocJSON.postSolrDoc(solr_url, solr_add_doc_json, 140 boolean index_pages = true; 141 if (index_pages) { 142 143 JSONObject ef_features = extracted_feature_record.getJSONObject("features"); 144 145 int ef_page_count = ef_features.getInt("pageCount"); 146 147 if (_verbosity >= 1) { 148 System.out.println("Processing: " + volume_id); 149 System.out.println(" pageCount = " + ef_page_count); 150 } 151 152 JSONArray ef_pages = ef_features.getJSONArray("pages"); 153 ef_num_pages = ef_pages.length(); 154 155 156 for (int i = 0; i < ef_page_count; i++) { 157 String formatted_i = String.format("page-%06d", i); 158 String page_id = volume_id + "." + formatted_i; 159 160 if (_verbosity >= 2) { 161 System.out.println(" Page: " + page_id); 162 } 163 164 165 JSONObject ef_page = ef_pages.getJSONObject(i); 166 167 if (ef_page != null) { 168 // Convert to Solr add form 169 JSONObject solr_add_doc_json 170 = SolrDocJSON.generateSolrDocJSON(volume_id, page_id, ef_page, _whitelist_bloomfilter, _universal_langmap, _icu_tokenize); 171 172 173 if ((_verbosity >=2) && (i==20)) { 174 System.out.println("=================="); 175 System.out.println("Sample output Solr add JSON [page 20]: " + solr_add_doc_json.toString()); 176 System.out.println("=================="); 177 } 178 179 180 if (solr_url != null) { 181 SolrDocJSON.postSolrDoc(solr_url, solr_add_doc_json, 179 182 volume_id, page_id); 180 }181 }182 else {183 System.err.println("Skipping: " + page_id);184 }185 186 }187 */183 } 184 } 185 else { 186 System.err.println("Skipping: " + page_id); 187 } 188 189 } 190 } 188 191 } 189 192 }
Note:
See TracChangeset
for help on using the changeset viewer.