Changeset 31375 for other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java
- Timestamp:
- 2017-01-31T21:35:50+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java
r31372 r31375 3 3 import java.io.IOException; 4 4 import java.util.ArrayList; 5 import java.util.HashMap; 5 6 import java.util.Iterator; 6 7 … … 29 30 protected String _input_dir; 30 31 protected String _whitelist_filename; 32 protected String _langmap_directory; 31 33 32 34 protected String _solr_url; … … 36 38 37 39 protected WhitelistBloomFilter _whitelist_bloomfilter; 38 39 40 41 boolean _icu_tokenize; 42 boolean _strict_file_io; 43 44 public PerVolumeJSON(String input_dir, String whitelist_filename, 40 protected UniversalPOSLangMap _universal_langmap; 41 42 boolean _icu_tokenize; 43 boolean _strict_file_io; 44 45 public PerVolumeJSON(String input_dir, String whitelist_filename, String langmap_directcory, 45 46 String solr_url, String output_dir, int verbosity, 46 47 boolean icu_tokenize, boolean strict_file_io) … … 48 49 _input_dir = input_dir; 49 50 _whitelist_filename = whitelist_filename; 51 _langmap_directory = langmap_directcory; 50 52 51 53 _solr_url = solr_url; … … 57 59 58 60 _whitelist_bloomfilter = null; 61 _universal_langmap = null; 59 62 } 60 63 … … 66 69 _whitelist_bloomfilter = new WhitelistBloomFilter(_whitelist_filename,true); 67 70 } 68 71 72 if ((_langmap_directory != null) && (_universal_langmap == null)) { 73 _universal_langmap = new UniversalPOSLangMap(_langmap_directory); 74 } 69 75 int ef_num_pages = 0; 70 76 … … 107 113 // Convert to Solr add form 108 114 JSONObject solr_add_doc_json 109 = SolrDocJSON.generateSolrDocJSON(volume_id, page_id, ef_page, _whitelist_bloomfilter, _ icu_tokenize);115 = SolrDocJSON.generateSolrDocJSON(volume_id, page_id, ef_page, _whitelist_bloomfilter, _universal_langmap, _icu_tokenize); 110 116 111 117
Note:
See TracChangeset
for help on using the changeset viewer.