Changeset 31202 for other-projects/hathitrust
- Timestamp:
- 2016-12-12T13:53:06+13:00 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
other-projects/hathitrust/wcsa/extracted-features-solr/trunk/solr-ingest/src/main/java/org/hathitrust/extractedfeatures/WhitelistBloomFilter.java
r31201 r31202 9 9 import java.io.IOException; 10 10 import java.io.InputStream; 11 import java.io.Serializable; 12 import java.nio.charset.Charset; 11 13 import java.nio.charset.StandardCharsets; 12 14 import java.nio.file.Files; … … 14 16 import java.util.stream.Stream; 15 17 18 import javax.annotation.Nullable; 19 20 import com.google.common.base.Preconditions; 16 21 import com.google.common.hash.BloomFilter; 17 22 import com.google.common.hash.Funnel; 18 23 import com.google.common.hash.Funnels; 24 import com.google.common.hash.PrimitiveSink; 19 25 20 26 public class WhitelistBloomFilter { 27 28 21 29 22 30 protected BloomFilter<CharSequence> _bloomFilter; … … 68 76 num_lines = countLines(dictionary_filename); 69 77 78 79 70 80 Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8); 71 81 _bloomFilter = BloomFilter.create(string_funnel, num_lines,FALSE_POSITIVE_PERCENTAGE); … … 138 148 BufferedInputStream bfis = new BufferedInputStream(fis); 139 149 140 Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8); 150 //Funnel<CharSequence> string_funnel = Funnels.stringFunnel(StandardCharsets.UTF_8); 151 Funnel<CharSequence> string_funnel = stringFunnel(StandardCharsets.UTF_8); 141 152 bloomFilter = BloomFilter.readFrom(bfis,string_funnel); 142 153 … … 154 165 155 166 167 // Spark uses Guava 14.0, the following is future-ported from Guava 20.0 168 // Added in here, rather then Funnel, and StringCharsetFunnel -> MyStringCharsetFunnel 169 170 public static Funnel<CharSequence> stringFunnel(Charset charset) { 171 172 return new MyStringCharsetFunnel(charset); 173 174 } 175 176 private static class MyStringCharsetFunnel implements Funnel<CharSequence>, Serializable { 177 178 private static final long serialVersionUID = 1L; 179 180 private final Charset charset; 181 182 MyStringCharsetFunnel(Charset charset) { 183 this.charset = Preconditions.checkNotNull(charset); 184 } 185 186 public void funnel(CharSequence from, PrimitiveSink into) { 187 into.putString(from, charset); 188 } 189 190 @Override 191 public String toString() { 192 return "Funnels.stringFunnel(" + charset.name() + ")"; 193 } 194 195 @Override 196 public boolean equals(@Nullable Object o) { 197 198 if (o instanceof MyStringCharsetFunnel) { 199 MyStringCharsetFunnel funnel = (MyStringCharsetFunnel) o; 200 return this.charset.equals(funnel.charset); 201 } 202 return false; 203 } 204 205 @Override 206 public int hashCode() { 207 return MyStringCharsetFunnel.class.hashCode() ^ charset.hashCode(); 208 } 209 210 Object writeReplace() { 211 return new SerializedForm(charset); 212 } 213 214 private static class SerializedForm implements Serializable { 215 private final String charsetCanonicalName; 216 217 SerializedForm(Charset charset) { 218 this.charsetCanonicalName = charset.name(); 219 } 220 221 private Object readResolve() { 222 return stringFunnel(Charset.forName(charsetCanonicalName)); 223 } 224 225 private static final long serialVersionUID = 0; 226 227 } 228 229 } 230 231 156 232 }
Note:
See TracChangeset
for help on using the changeset viewer.