Changeset 31505
- Timestamp:
- 2017-03-13T15:31:40+13:00 (7 years ago)
- Location:
- other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java
r31500 r31505 109 109 String volume_id = extracted_feature_record.getString("id"); 110 110 111 //JSONObject ef_metadata = extracted_feature_record.getJSONObject("metadata");111 JSONObject ef_metadata = extracted_feature_record.getJSONObject("metadata"); 112 112 //String title= ef_metadata.getString("title"); 113 113 114 // 115 // Top-level metadata Solr doc 116 // 117 JSONObject solr_add_metadata_doc_json = SolrDocJSON.generateToplevelMetadataSolrDocJSON(volume_id,ef_metadata); 118 if (solr_add_metadata_doc_json != null) { 119 120 if ((_verbosity >=2)) { 121 System.out.println("=================="); 122 System.out.println("Metadata JSON: " + solr_add_metadata_doc_json.toString()); 123 System.out.println("=================="); 124 } 125 126 if (solr_url != null) { 127 128 if ((_verbosity >=2) ) { 129 System.out.println("=================="); 130 System.out.println("Posting to: " + solr_url); 131 System.out.println("=================="); 132 } 133 SolrDocJSON.postSolrDoc(solr_url, solr_add_metadata_doc_json, volume_id, "top-level-metadata"); 134 } 135 } 136 137 // 138 // Now move on to POS extracted features per-page 139 // 114 140 JSONObject ef_features = extracted_feature_record.getJSONObject("features"); 115 141 … … 150 176 151 177 if (solr_url != null) { 152 if ((_verbosity >=2) && (i==20)) {153 System.out.println("==================");154 System.out.println("Posting to: " + solr_url);155 System.out.println("==================");156 }157 178 SolrDocJSON.postSolrDoc(solr_url, solr_add_doc_json, 158 179 volume_id, page_id); 159 180 } 160 161 162 181 } 163 182 else { -
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/SolrDocJSON.java
r31499 r31505 28 28 public class SolrDocJSON { 29 29 30 protected static JSONObject generateToplevelMetadataSolrDocJSON(String volume_id, JSONObject ef_metadata) 31 { 32 JSONObject solr_update_json = null; 33 34 35 String [] metadata_single = new String[] { 36 "accessProfile", 37 "rightsAttributes", 38 "hathitrustRecordNumber", 39 "title", 40 "imprint", 41 "pubDate", 42 "pubPlace", 43 "language", 44 "issuance", 45 "typeOfResource" 46 }; 47 48 String [] metadata_multiple = new String[] { 49 "oclc", 50 "isbn", 51 "issn", 52 "lccn", 53 "genre", 54 "names" 55 }; 56 57 if (ef_metadata != null) { 58 59 // For JSON Solr format see: 60 // https://cwiki.apache.org/confluence/display/solr/Uploading+Data+with+Index+Handlers 61 62 //String title= ef_metadata.getString("title"); 63 JSONObject solr_add_json = new JSONObject(); 64 65 JSONObject solr_doc_json = new JSONObject(); 66 solr_doc_json.put("id", volume_id); 67 68 for (String metaname: metadata_single) { 69 String metavalue = ef_metadata.getString(metaname); 70 if (metavalue != null) { 71 solr_doc_json.put(metaname+"_t",metavalue); 72 } 73 } 74 75 for (String metaname: metadata_multiple) { 76 JSONArray metavalues = ef_metadata.getJSONArray(metaname); 77 if (metavalues != null) { 78 solr_doc_json.put(metaname+"_t",metavalues); 79 } 80 } 81 82 solr_add_json.put("commitWithin", 5000); 83 solr_add_json.put("doc", solr_doc_json); 84 85 solr_update_json = new JSONObject(); 86 solr_update_json.put("add",solr_add_json); 87 88 } 89 else { 90 System.err.println("Warning: null metadata for '" + volume_id + "'"); 91 } 92 93 return solr_update_json; 94 } 95 96 30 97 31 98 protected static ArrayList<String> getTokenPosCountWords(JSONObject ef_token_pos_count, String page_id, … … 376 443 } 377 444 } 445 378 446 protected static JSONObject generateSolrDocJSON(String volume_id, String page_id, JSONObject ef_page, 379 447 WhitelistBloomFilter whitelist_bloomfilter, … … 618 686 e.printStackTrace(); 619 687 } 620 621 688 } 622 689 }
Note:
See TracChangeset
for help on using the changeset viewer.