Ignore:
Timestamp:
2016-12-12T20:18:04+13:00 (7 years ago)
Author:
davidb
Message:

Use of whitelist Bloom filter added to words going into Solr index

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java

    r31096 r31220  
    2020    private static final long serialVersionUID = 1L;
    2121    protected String _input_dir;
     22
     23    protected WhitelistBloomFilter _whitelist_bloomfilter;
     24
    2225    protected String _solr_url;
    2326    protected String _output_dir;
     
    2730    protected double            _progress_step;
    2831   
    29     public PerVolumeJSON(String input_dir, String solr_url, String output_dir, int verbosity,
     32    public PerVolumeJSON(String input_dir, String whitelist_filename,
     33                         String solr_url, String output_dir, int verbosity,
    3034                         DoubleAccumulator progress_accum, double progress_step)
    3135    {
    3236        _input_dir  = input_dir;
     37       
     38        if (whitelist_filename != null) {
     39            _whitelist_bloomfilter = new WhitelistBloomFilter(whitelist_filename,true);
     40        }
     41       
    3342        _solr_url   = solr_url;
    3443        _output_dir = output_dir;
Note: See TracChangeset for help on using the changeset viewer.