Changeset 31204 for other-projects

Show
Ignore:
Timestamp:
12.12.2016 14:28:20 (3 years ago)
Author:
davidb
Message:

Splicing in Guava verion 20 of BloomFilter? into code as own class (now BloomFilterAdvanced?). This is because Spark runs with older version of Guava (14.0). Written code makes use of newer features.

Location:
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java
Files:
5 added
1 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/WhitelistBloomFilter.java

    r31203 r31204  
    33import java.io.BufferedInputStream; 
    44import java.io.BufferedOutputStream; 
     5import java.io.DataInputStream; 
     6import java.io.DataOutputStream; 
    57import java.io.File; 
    68import java.io.FileInputStream; 
     
    911import java.io.IOException; 
    1012import java.io.InputStream; 
     13import java.io.OutputStream; 
    1114import java.io.Serializable; 
    1215import java.nio.charset.Charset; 
     
    1821import javax.annotation.Nullable; 
    1922 
     23import static com.google.common.base.Preconditions.checkNotNull; 
     24 
    2025import com.google.common.base.Preconditions; 
    21 import com.google.common.hash.BloomFilter; 
     26//import com.google.common.hash.BloomFilter; 
     27import com.google.common.hash.BloomFilterAdvanced; 
     28//import com.google.common.hash.BloomFilterStrategies.BitArray; 
    2229import com.google.common.hash.Funnel; 
    2330import com.google.common.hash.Funnels; 
    2431import com.google.common.hash.PrimitiveSink; 
     32import com.google.common.primitives.SignedBytes; 
     33import com.google.common.primitives.UnsignedBytes; 
    2534 
    2635public class WhitelistBloomFilter { 
     
    2837     
    2938 
    30     protected BloomFilter<CharSequence> _bloomFilter; 
     39    protected BloomFilterAdvanced<CharSequence> _bloomFilter; 
    3140    protected static final String SERIALIZED_SUFFIX = "-serialized"; 
    3241    protected static final double FALSE_POSITIVE_PERCENTAGE = 0.01; 
     
    7887                //Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8); 
    7988                Funnel<CharSequence> string_funnel = stringFunnel(StandardCharsets.UTF_8); 
    80                 _bloomFilter = BloomFilter.create(string_funnel, num_lines,FALSE_POSITIVE_PERCENTAGE); 
     89                _bloomFilter = BloomFilterAdvanced.create(string_funnel, num_lines,FALSE_POSITIVE_PERCENTAGE); 
    8190            }  
    8291            catch (IOException e) { 
     
    139148    } 
    140149 
    141     protected static BloomFilter<CharSequence> serializeIn(File ser_file) 
    142     { 
    143         BloomFilter<CharSequence> bloomFilter = null; 
     150    protected static BloomFilterAdvanced<CharSequence> serializeIn(File ser_file) 
     151    { 
     152        BloomFilterAdvanced<CharSequence> bloomFilter = null; 
    144153     
    145154        try { 
     
    149158            //Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8); 
    150159            Funnel<CharSequence> string_funnel = stringFunnel(StandardCharsets.UTF_8); 
    151             bloomFilter = BloomFilter.readFrom(bfis,string_funnel); 
     160            bloomFilter = BloomFilterAdvanced.readFrom(bfis,string_funnel); 
    152161 
    153162            bfis.close(); 
     
    225234 
    226235        } 
    227  
    228236      } 
    229237