Changeset 31202 for other-projects

Show
Ignore:
Timestamp:
12.12.2016 13:53:06 (2 years ago)
Author:
davidb
Message:

Turns out Spark uses Guava 14.0 not 20.0. Additional code to fill in some gaps

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/WhitelistBloomFilter.java

    r31201 r31202  
    99import java.io.IOException; 
    1010import java.io.InputStream; 
     11import java.io.Serializable; 
     12import java.nio.charset.Charset; 
    1113import java.nio.charset.StandardCharsets; 
    1214import java.nio.file.Files; 
     
    1416import java.util.stream.Stream; 
    1517 
     18import javax.annotation.Nullable; 
     19 
     20import com.google.common.base.Preconditions; 
    1621import com.google.common.hash.BloomFilter; 
    1722import com.google.common.hash.Funnel; 
    1823import com.google.common.hash.Funnels; 
     24import com.google.common.hash.PrimitiveSink; 
    1925 
    2026public class WhitelistBloomFilter { 
     27     
     28     
    2129 
    2230    protected BloomFilter<CharSequence> _bloomFilter; 
     
    6876                num_lines = countLines(dictionary_filename); 
    6977 
     78                 
     79                  
    7080                Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8); 
    7181                _bloomFilter = BloomFilter.create(string_funnel, num_lines,FALSE_POSITIVE_PERCENTAGE); 
     
    138148            BufferedInputStream bfis = new BufferedInputStream(fis); 
    139149 
    140             Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8); 
     150            //Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8); 
     151            Funnel<CharSequence> string_funnel = stringFunnel(StandardCharsets.UTF_8); 
    141152            bloomFilter = BloomFilter.readFrom(bfis,string_funnel); 
    142153 
     
    154165 
    155166 
     167    // Spark uses Guava 14.0, the following is future-ported from Guava 20.0 
     168    // Added in here, rather then Funnel, and StringCharsetFunnel -> MyStringCharsetFunnel 
     169     
     170    public static Funnel<CharSequence> stringFunnel(Charset charset) { 
     171 
     172        return new MyStringCharsetFunnel(charset); 
     173 
     174      } 
     175     
     176    private static class MyStringCharsetFunnel implements Funnel<CharSequence>, Serializable { 
     177 
     178        private static final long serialVersionUID = 1L; 
     179         
     180        private final Charset charset; 
     181 
     182        MyStringCharsetFunnel(Charset charset) { 
     183          this.charset = Preconditions.checkNotNull(charset); 
     184        } 
     185 
     186        public void funnel(CharSequence from, PrimitiveSink into) { 
     187          into.putString(from, charset); 
     188        } 
     189 
     190        @Override 
     191        public String toString() { 
     192          return "Funnels.stringFunnel(" + charset.name() + ")"; 
     193        } 
     194 
     195        @Override 
     196        public boolean equals(@Nullable Object o) { 
     197 
     198          if (o instanceof MyStringCharsetFunnel) { 
     199            MyStringCharsetFunnel funnel = (MyStringCharsetFunnel) o; 
     200            return this.charset.equals(funnel.charset); 
     201          } 
     202          return false; 
     203        } 
     204 
     205        @Override 
     206        public int hashCode() { 
     207          return MyStringCharsetFunnel.class.hashCode() ^ charset.hashCode(); 
     208        } 
     209 
     210        Object writeReplace() { 
     211          return new SerializedForm(charset); 
     212        } 
     213 
     214        private static class SerializedForm implements Serializable { 
     215          private final String charsetCanonicalName; 
     216 
     217          SerializedForm(Charset charset) { 
     218            this.charsetCanonicalName = charset.name(); 
     219          } 
     220 
     221          private Object readResolve() { 
     222            return stringFunnel(Charset.forName(charsetCanonicalName)); 
     223          } 
     224 
     225          private static final long serialVersionUID = 0; 
     226 
     227        } 
     228 
     229      } 
     230     
     231     
    156232}