Changeset 31597
- Timestamp:
- 2017-04-11T23:41:07+12:00 (7 years ago)
- Location:
- other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java
r31505 r31597 134 134 } 135 135 } 136 136 /* 137 137 // 138 138 // Now move on to POS extracted features per-page … … 185 185 186 186 } 187 */ 187 188 } 188 189 } -
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/ProcessForSolrIngest.java
r31502 r31597 124 124 solr_endpoints,_output_dir,_verbosity, 125 125 icu_tokenize,strict_file_io); 126 127 126 128 127 JavaRDD<Integer> per_volume_page_count = json_text_rdd.map(per_vol_json); 129 128 130 Integer num_page_ids = per_volume_page_count.reduce((a, b) -> a + b); 131 132 System.out.println(""); 133 System.out.println("############"); 134 System.out.println("# Number of page ids: " + num_page_ids); 129 //Integer num_page_ids = per_volume_page_count.reduce((a, b) -> a + b); 130 long num_vol_ids = per_volume_page_count.count(); 131 132 System.out.println(""); 133 System.out.println("############"); 134 //System.out.println("# Number of page ids: " + num_page_ids); 135 System.out.println("# Number of volume ids: " + num_vol_ids); 135 136 System.out.println("############"); 136 137 System.out.println(""); -
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/SolrDocJSON.java
r31510 r31597 73 73 if (metavalue != null) { 74 74 solr_doc_json.put(metaname+"_t",metavalue); 75 solr_doc_json.put(metaname+"_s",metavalue); 75 76 } 76 77 } … … 80 81 if (metavalues != null) { 81 82 solr_doc_json.put(metaname+"_t",metavalues); 82 } 83 } 84 85 solr_add_json.put("commitWithin", 5000); 83 solr_doc_json.put(metaname+"_ss",metavalues); 84 } 85 } 86 87 solr_add_json.put("commitWithin", 60000); // used to be 5000 86 88 solr_add_json.put("doc", solr_doc_json); 87 89
Note:
See TracChangeset
for help on using the changeset viewer.