/* * Copyright (C) 2011 The Guava Authors * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package com.google.common.hash; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; import com.google.common.annotations.Beta; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Objects; import com.google.common.base.Predicate; import com.google.common.hash.BloomFilterAdvancedStrategies.BitArray; import com.google.common.primitives.SignedBytes; import com.google.common.primitives.UnsignedBytes; //import com.google.errorprone.annotations.CanIgnoreReturnValue; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.Serializable; import javax.annotation.Nullable; //import com.google.common.hash.BloomFilter; /** * A Bloom filter for instances of {@code T}. A Bloom filter offers an approximate containment test * with one-sided error: if it claims that an element is contained in it, this might be in error, * but if it claims that an element is not contained in it, then this is definitely true. * *

If you are unfamiliar with Bloom filters, this nice * tutorial may help you understand * how they work. * *

The false positive probability ({@code FPP}) of a bloom filter is defined as the probability * that {@linkplain #mightContain(Object)} will erroneously return {@code true} for an object that * has not actually been put in the {@code BloomFilter}. * *

Bloom filters are serializable. They also support a more compact serial representation via the * {@link #writeTo} and {@link #readFrom} methods. Both serialized forms will continue to be * supported by future versions of this library. However, serial forms generated by newer versions * of the code may not be readable by older versions of the code (e.g., a serialized bloom filter * generated today may not be readable by a binary that was compiled 6 months ago). * * @param the type of instances that the {@code BloomFilter} accepts * @author Dimitris Andreou * @author Kevin Bourrillion * @since 11.0 */ @Beta public final class BloomFilterAdvanced implements Predicate, Serializable { /** * A strategy to translate T instances, to {@code numHashFunctions} bit indexes. * *

Implementations should be collections of pure functions (i.e. stateless). */ interface Strategy extends java.io.Serializable { /** * Sets {@code numHashFunctions} bits of the given bit array, by hashing a user element. * *

Returns whether any bits changed as a result of this operation. */ boolean put(T object, Funnel funnel, int numHashFunctions, BitArray bits); /** * Queries {@code numHashFunctions} bits of the given bit array, by hashing a user element; * returns {@code true} if and only if all selected bits are set. */ boolean mightContain( T object, Funnel funnel, int numHashFunctions, BitArray bits); /** * Identifier used to encode this strategy, when marshalled as part of a BloomFilter. Only * values in the [-128, 127] range are valid for the compact serial form. Non-negative values * are reserved for enums defined in BloomFilterStrategies; negative values are reserved for any * custom, stateful strategy we may define (e.g. any kind of strategy that would depend on user * input). */ int ordinal(); } /** The bit set of the BloomFilter (not necessarily power of 2!) */ private final BitArray bits; /** Number of hashes per element */ private final int numHashFunctions; /** The funnel to translate Ts to bytes */ private final Funnel funnel; /** * The strategy we employ to map an element T to {@code numHashFunctions} bit indexes. */ private final Strategy strategy; /** * Creates a BloomFilter. */ private BloomFilterAdvanced( BitArray bits, int numHashFunctions, Funnel funnel, Strategy strategy) { checkArgument(numHashFunctions > 0, "numHashFunctions (%s) must be > 0", numHashFunctions); checkArgument( numHashFunctions <= 255, "numHashFunctions (%s) must be <= 255", numHashFunctions); this.bits = checkNotNull(bits); this.numHashFunctions = numHashFunctions; this.funnel = checkNotNull(funnel); this.strategy = checkNotNull(strategy); } /** * Creates a new {@code BloomFilter} that's a copy of this instance. The new instance is equal to * this instance but shares no mutable state. * * @since 12.0 */ public BloomFilterAdvanced copy() { return new BloomFilterAdvanced(bits.copy(), numHashFunctions, funnel, strategy); } /** * Returns {@code true} if the element might have been put in this Bloom filter, * {@code false} if this is definitely not the case. */ public boolean mightContain(T object) { return strategy.mightContain(object, funnel, numHashFunctions, bits); } /** * @deprecated Provided only to satisfy the {@link Predicate} interface; use {@link #mightContain} * instead. */ @Deprecated @Override public boolean apply(T input) { return mightContain(input); } /** * Puts an element into this {@code BloomFilter}. Ensures that subsequent invocations of * {@link #mightContain(Object)} with the same element will always return {@code true}. * * @return true if the bloom filter's bits changed as a result of this operation. If the bits * changed, this is definitely the first time {@code object} has been added to the * filter. If the bits haven't changed, this might be the first time {@code object} has * been added to the filter. Note that {@code put(t)} always returns the opposite * result to what {@code mightContain(t)} would have returned at the time it is called." * @since 12.0 (present in 11.0 with {@code void} return type}) */ //@CanIgnoreReturnValue public boolean put(T object) { return strategy.put(object, funnel, numHashFunctions, bits); } /** * Returns the probability that {@linkplain #mightContain(Object)} will erroneously return * {@code true} for an object that has not actually been put in the {@code BloomFilter}. * *

Ideally, this number should be close to the {@code fpp} parameter passed in * {@linkplain #create(Funnel, int, double)}, or smaller. If it is significantly higher, it is * usually the case that too many elements (more than expected) have been put in the * {@code BloomFilter}, degenerating it. * * @since 14.0 (since 11.0 as expectedFalsePositiveProbability()) */ public double expectedFpp() { // You down with FPP? (Yeah you know me!) Who's down with FPP? (Every last homie!) return Math.pow((double) bits.bitCount() / bitSize(), numHashFunctions); } /** * Returns the number of bits in the underlying bit array. */ @VisibleForTesting long bitSize() { return bits.bitSize(); } /** * Determines whether a given bloom filter is compatible with this bloom filter. For two bloom * filters to be compatible, they must: * *