Changeset 31204 for other-projects


Ignore:
Timestamp:
2016-12-12T14:28:20+13:00 (7 years ago)
Author:
davidb
Message:

Splicing in Guava verion 20 of BloomFilter into code as own class (now BloomFilterAdvanced). This is because Spark runs with older version of Guava (14.0). Written code makes use of newer features.

Location:
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java
Files:
5 added
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/WhitelistBloomFilter.java

    r31203 r31204  
    33import java.io.BufferedInputStream;
    44import java.io.BufferedOutputStream;
     5import java.io.DataInputStream;
     6import java.io.DataOutputStream;
    57import java.io.File;
    68import java.io.FileInputStream;
     
    911import java.io.IOException;
    1012import java.io.InputStream;
     13import java.io.OutputStream;
    1114import java.io.Serializable;
    1215import java.nio.charset.Charset;
     
    1821import javax.annotation.Nullable;
    1922
     23import static com.google.common.base.Preconditions.checkNotNull;
     24
    2025import com.google.common.base.Preconditions;
    21 import com.google.common.hash.BloomFilter;
     26//import com.google.common.hash.BloomFilter;
     27import com.google.common.hash.BloomFilterAdvanced;
     28//import com.google.common.hash.BloomFilterStrategies.BitArray;
    2229import com.google.common.hash.Funnel;
    2330import com.google.common.hash.Funnels;
    2431import com.google.common.hash.PrimitiveSink;
     32import com.google.common.primitives.SignedBytes;
     33import com.google.common.primitives.UnsignedBytes;
    2534
    2635public class WhitelistBloomFilter {
     
    2837   
    2938
    30     protected BloomFilter<CharSequence> _bloomFilter;
     39    protected BloomFilterAdvanced<CharSequence> _bloomFilter;
    3140    protected static final String SERIALIZED_SUFFIX = "-serialized";
    3241    protected static final double FALSE_POSITIVE_PERCENTAGE = 0.01;
     
    7887                //Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8);
    7988                Funnel<CharSequence> string_funnel = stringFunnel(StandardCharsets.UTF_8);
    80                 _bloomFilter = BloomFilter.create(string_funnel, num_lines,FALSE_POSITIVE_PERCENTAGE);
     89                _bloomFilter = BloomFilterAdvanced.create(string_funnel, num_lines,FALSE_POSITIVE_PERCENTAGE);
    8190            }
    8291            catch (IOException e) {
     
    139148    }
    140149
    141     protected static BloomFilter<CharSequence> serializeIn(File ser_file)
    142     {
    143         BloomFilter<CharSequence> bloomFilter = null;
     150    protected static BloomFilterAdvanced<CharSequence> serializeIn(File ser_file)
     151    {
     152        BloomFilterAdvanced<CharSequence> bloomFilter = null;
    144153   
    145154        try {
     
    149158            //Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8);
    150159            Funnel<CharSequence> string_funnel = stringFunnel(StandardCharsets.UTF_8);
    151             bloomFilter = BloomFilter.readFrom(bfis,string_funnel);
     160            bloomFilter = BloomFilterAdvanced.readFrom(bfis,string_funnel);
    152161
    153162            bfis.close();
     
    225234
    226235        }
    227 
    228236      }
    229237   
Note: See TracChangeset for help on using the changeset viewer.