Ignore:
Timestamp:
2016-12-12T13:53:06+13:00 (7 years ago)
Author:
davidb
Message:

Turns out Spark uses Guava 14.0 not 20.0. Additional code to fill in some gaps

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/WhitelistBloomFilter.java

    r31201 r31202  
    99import java.io.IOException;
    1010import java.io.InputStream;
     11import java.io.Serializable;
     12import java.nio.charset.Charset;
    1113import java.nio.charset.StandardCharsets;
    1214import java.nio.file.Files;
     
    1416import java.util.stream.Stream;
    1517
     18import javax.annotation.Nullable;
     19
     20import com.google.common.base.Preconditions;
    1621import com.google.common.hash.BloomFilter;
    1722import com.google.common.hash.Funnel;
    1823import com.google.common.hash.Funnels;
     24import com.google.common.hash.PrimitiveSink;
    1925
    2026public class WhitelistBloomFilter {
     27   
     28   
    2129
    2230    protected BloomFilter<CharSequence> _bloomFilter;
     
    6876                num_lines = countLines(dictionary_filename);
    6977
     78               
     79                 
    7080                Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8);
    7181                _bloomFilter = BloomFilter.create(string_funnel, num_lines,FALSE_POSITIVE_PERCENTAGE);
     
    138148            BufferedInputStream bfis = new BufferedInputStream(fis);
    139149
    140             Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8);
     150            //Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8);
     151            Funnel<CharSequence> string_funnel = stringFunnel(StandardCharsets.UTF_8);
    141152            bloomFilter = BloomFilter.readFrom(bfis,string_funnel);
    142153
     
    154165
    155166
     167    // Spark uses Guava 14.0, the following is future-ported from Guava 20.0
     168    // Added in here, rather then Funnel, and StringCharsetFunnel -> MyStringCharsetFunnel
     169   
     170    public static Funnel<CharSequence> stringFunnel(Charset charset) {
     171
     172        return new MyStringCharsetFunnel(charset);
     173
     174      }
     175   
     176    private static class MyStringCharsetFunnel implements Funnel<CharSequence>, Serializable {
     177
     178        private static final long serialVersionUID = 1L;
     179       
     180        private final Charset charset;
     181
     182        MyStringCharsetFunnel(Charset charset) {
     183          this.charset = Preconditions.checkNotNull(charset);
     184        }
     185
     186        public void funnel(CharSequence from, PrimitiveSink into) {
     187          into.putString(from, charset);
     188        }
     189
     190        @Override
     191        public String toString() {
     192          return "Funnels.stringFunnel(" + charset.name() + ")";
     193        }
     194
     195        @Override
     196        public boolean equals(@Nullable Object o) {
     197
     198          if (o instanceof MyStringCharsetFunnel) {
     199            MyStringCharsetFunnel funnel = (MyStringCharsetFunnel) o;
     200            return this.charset.equals(funnel.charset);
     201          }
     202          return false;
     203        }
     204
     205        @Override
     206        public int hashCode() {
     207          return MyStringCharsetFunnel.class.hashCode() ^ charset.hashCode();
     208        }
     209
     210        Object writeReplace() {
     211          return new SerializedForm(charset);
     212        }
     213
     214        private static class SerializedForm implements Serializable {
     215          private final String charsetCanonicalName;
     216
     217          SerializedForm(Charset charset) {
     218            this.charsetCanonicalName = charset.name();
     219          }
     220
     221          private Object readResolve() {
     222            return stringFunnel(Charset.forName(charsetCanonicalName));
     223          }
     224
     225          private static final long serialVersionUID = 0;
     226
     227        }
     228
     229      }
     230   
     231   
    156232}
Note: See TracChangeset for help on using the changeset viewer.