Changeset 31225 for other-projects

Show
Ignore:
Timestamp:
13.12.2016 13:46:23 (3 years ago)
Author:
davidb
Message:

Relocated bloomfilter creation to within call() method, so done on the submitted side where the code runs

Location:
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures
Files:
2 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerPageJSONFlatmap.java

    r31221 r31225  
    2525     
    2626    protected String _input_dir; 
     27    protected String _whitelist_filename; 
     28 
     29    protected String _solr_url; 
     30    protected String _output_dir; 
     31     
     32    protected int    _verbosity; 
    2733     
    2834    protected WhitelistBloomFilter _whitelist_bloomfilter; 
    2935 
    30     protected String _solr_url; 
    31     protected String _output_dir; 
    32     protected int    _verbosity; 
    3336     
    3437    protected DoubleAccumulator _progress_accum; 
     
    4346    { 
    4447        _input_dir  = input_dir; 
    45          
    46         if (whitelist_filename != null) { 
    47             _whitelist_bloomfilter = new WhitelistBloomFilter(whitelist_filename,true); 
    48         } 
     48        _whitelist_filename = whitelist_filename; 
    4949         
    5050        _solr_url   = solr_url; 
     
    5656         
    5757        _strict_file_io = strict_file_io; 
     58         
     59        _whitelist_bloomfilter = null; 
    5860    } 
    5961     
     
    6163    public Iterator<JSONObject> call(String json_file_in) throws IOException 
    6264    {  
     65        if ((_whitelist_filename != null) && (_whitelist_bloomfilter != null)) { 
     66            _whitelist_bloomfilter = new WhitelistBloomFilter(_whitelist_filename,true); 
     67        } 
     68         
    6369        String full_json_file_in = _input_dir + "/" + json_file_in; 
    6470        JSONObject extracted_feature_record = JSONClusterFileIO.readJSONFile(full_json_file_in); 
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/PerVolumeJSON.java

    r31221 r31225  
    2020    private static final long serialVersionUID = 1L; 
    2121    protected String _input_dir; 
    22  
    23     protected WhitelistBloomFilter _whitelist_bloomfilter; 
    24  
     22    protected String _whitelist_filename; 
     23     
    2524    protected String _solr_url; 
    2625    protected String _output_dir; 
     26     
    2727    protected int    _verbosity; 
     28     
     29    protected WhitelistBloomFilter _whitelist_bloomfilter; 
    2830     
    2931    protected DoubleAccumulator _progress_accum; 
     
    3537    { 
    3638        _input_dir  = input_dir; 
    37          
    38         if (whitelist_filename != null) { 
    39             _whitelist_bloomfilter = new WhitelistBloomFilter(whitelist_filename,true); 
    40         } 
     39        _whitelist_filename = whitelist_filename; 
    4140         
    4241        _solr_url   = solr_url; 
     
    4645        _progress_accum = progress_accum; 
    4746        _progress_step  = progress_step; 
     47         
     48        _whitelist_bloomfilter = null; 
    4849    } 
    4950     
     
    5152    public void call(String json_file_in)  
    5253    {  
     54        if ((_whitelist_filename != null) && (_whitelist_bloomfilter != null)) { 
     55            _whitelist_bloomfilter = new WhitelistBloomFilter(_whitelist_filename,true); 
     56        } 
     57 
    5358        JSONObject extracted_feature_record = JSONClusterFileIO.readJSONFile(_input_dir + "/" + json_file_in); 
    5459