Changeset 31215 for other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java
- Timestamp:
- 2016-12-12T17:12:56+13:00 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/WhitelistBloomFilter.java
r31211 r31215 3 3 import java.io.BufferedInputStream; 4 4 import java.io.BufferedOutputStream; 5 import java.io.DataInputStream;6 import java.io.DataOutputStream;7 5 import java.io.File; 8 6 import java.io.FileInputStream; … … 11 9 import java.io.IOException; 12 10 import java.io.InputStream; 13 import java.io.OutputStream;14 11 import java.io.Serializable; 15 12 import java.nio.charset.Charset; … … 21 18 import javax.annotation.Nullable; 22 19 23 import static com.google.common.base.Preconditions.checkNotNull;24 25 20 import com.google.common.base.Preconditions; 26 21 import com.google.common.hash.BloomFilter; 27 //import com.google.common.hash.BloomFilterAdvanced;28 //import com.google.common.hash.BloomFilterStrategies.BitArray;29 22 import com.google.common.hash.Funnel; 30 23 import com.google.common.hash.Funnels; 31 24 import com.google.common.hash.PrimitiveSink; 32 import com.google.common.primitives.SignedBytes;33 import com.google.common.primitives.UnsignedBytes;34 25 35 26 public class WhitelistBloomFilter { … … 83 74 num_lines = countLines(dictionary_filename); 84 75 85 //Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8); 86 Funnel<CharSequence> string_funnel = stringFunnel(StandardCharsets.UTF_8); 76 Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8); 87 77 _bloomFilter = BloomFilter.create(string_funnel, num_lines,FALSE_POSITIVE_PERCENTAGE); 88 78 } … … 154 144 BufferedInputStream bfis = new BufferedInputStream(fis); 155 145 156 //Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8); 157 Funnel<CharSequence> string_funnel = stringFunnel(StandardCharsets.UTF_8); 146 Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8); 158 147 bloomFilter = BloomFilter.readFrom(bfis,string_funnel); 159 148 … … 171 160 172 161 173 // Spark uses Guava 14.0, the following is future-ported from Guava 20.0174 // Added in here, rather then Funnel, and StringCharsetFunnel -> MyStringCharsetFunnel175 176 public static Funnel<CharSequence> stringFunnel(Charset charset) {177 162 178 return new MyStringCharsetFunnel(charset);179 180 }181 182 private static class MyStringCharsetFunnel implements Funnel<CharSequence>, Serializable {183 184 private static final long serialVersionUID = 1L;185 186 private final Charset charset;187 188 MyStringCharsetFunnel(Charset charset) {189 this.charset = Preconditions.checkNotNull(charset);190 }191 192 public void funnel(CharSequence from, PrimitiveSink into) {193 into.putString(from, charset);194 }195 196 @Override197 public String toString() {198 return "Funnels.stringFunnel(" + charset.name() + ")";199 }200 201 @Override202 public boolean equals(@Nullable Object o) {203 204 if (o instanceof MyStringCharsetFunnel) {205 MyStringCharsetFunnel funnel = (MyStringCharsetFunnel) o;206 return this.charset.equals(funnel.charset);207 }208 return false;209 }210 211 @Override212 public int hashCode() {213 return MyStringCharsetFunnel.class.hashCode() ^ charset.hashCode();214 }215 216 Object writeReplace() {217 return new SerializedForm(charset);218 }219 220 private static class SerializedForm implements Serializable {221 private final String charsetCanonicalName;222 223 SerializedForm(Charset charset) {224 this.charsetCanonicalName = charset.name();225 }226 227 private Object readResolve() {228 return stringFunnel(Charset.forName(charsetCanonicalName));229 }230 231 private static final long serialVersionUID = 0;232 233 }234 }235 163 236 164
Note:
See TracChangeset
for help on using the changeset viewer.