1 | /*
|
---|
2 | * Copyright (C) 2011 The Guava Authors
|
---|
3 | *
|
---|
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
---|
5 | * in compliance with the License. You may obtain a copy of the License at
|
---|
6 | *
|
---|
7 | * http://www.apache.org/licenses/LICENSE-2.0
|
---|
8 | *
|
---|
9 | * Unless required by applicable law or agreed to in writing, software distributed under the License
|
---|
10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
---|
11 | * or implied. See the License for the specific language governing permissions and limitations under
|
---|
12 | * the License.
|
---|
13 | */
|
---|
14 |
|
---|
15 | package com.google.common.hash;
|
---|
16 |
|
---|
17 | import static com.google.common.base.Preconditions.checkArgument;
|
---|
18 |
|
---|
19 | import com.google.common.math.LongMath;
|
---|
20 | import com.google.common.primitives.Ints;
|
---|
21 | import com.google.common.primitives.Longs;
|
---|
22 | import java.math.RoundingMode;
|
---|
23 | import java.util.Arrays;
|
---|
24 | import javax.annotation.Nullable;
|
---|
25 |
|
---|
26 | /**
|
---|
27 | * Collections of strategies of generating the k * log(M) bits required for an element to be mapped
|
---|
28 | * to a BloomFilter of M bits and k hash functions. These strategies are part of the serialized form
|
---|
29 | * of the Bloom filters that use them, thus they must be preserved as is (no updates allowed, only
|
---|
30 | * introduction of new versions).
|
---|
31 | *
|
---|
32 | * Important: the order of the constants cannot change, and they cannot be deleted - we depend on
|
---|
33 | * their ordinal for BloomFilter serialization.
|
---|
34 | *
|
---|
35 | * @author Dimitris Andreou
|
---|
36 | * @author Kurt Alfred Kluever
|
---|
37 | */
|
---|
38 | enum BloomFilterAdvancedStrategies implements BloomFilter.Strategy {
|
---|
39 | /**
|
---|
40 | * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and Michael
|
---|
41 | * Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the
|
---|
42 | * performance of a Bloom filter (yet only needs two 32bit hash functions).
|
---|
43 | */
|
---|
44 | MURMUR128_MITZ_32() {
|
---|
45 | @Override
|
---|
46 | public <T> boolean put(
|
---|
47 | T object, Funnel<? super T> funnel, int numHashFunctions, BloomFilterStrategies.BitArray bits) {
|
---|
48 | long bitSize = bits.bitSize();
|
---|
49 | long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
|
---|
50 | int hash1 = (int) hash64;
|
---|
51 | int hash2 = (int) (hash64 >>> 32);
|
---|
52 |
|
---|
53 | boolean bitsChanged = false;
|
---|
54 | for (int i = 1; i <= numHashFunctions; i++) {
|
---|
55 | int combinedHash = hash1 + (i * hash2);
|
---|
56 | // Flip all the bits if it's negative (guaranteed positive number)
|
---|
57 | if (combinedHash < 0) {
|
---|
58 | combinedHash = ~combinedHash;
|
---|
59 | }
|
---|
60 | bitsChanged |= bits.set(combinedHash % bitSize);
|
---|
61 | }
|
---|
62 | return bitsChanged;
|
---|
63 | }
|
---|
64 |
|
---|
65 | @Override
|
---|
66 | public <T> boolean mightContain(
|
---|
67 | T object, Funnel<? super T> funnel, int numHashFunctions, BloomFilterStrategies.BitArray bits) {
|
---|
68 | long bitSize = bits.bitSize();
|
---|
69 | long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
|
---|
70 | int hash1 = (int) hash64;
|
---|
71 | int hash2 = (int) (hash64 >>> 32);
|
---|
72 |
|
---|
73 | for (int i = 1; i <= numHashFunctions; i++) {
|
---|
74 | int combinedHash = hash1 + (i * hash2);
|
---|
75 | // Flip all the bits if it's negative (guaranteed positive number)
|
---|
76 | if (combinedHash < 0) {
|
---|
77 | combinedHash = ~combinedHash;
|
---|
78 | }
|
---|
79 | if (!bits.get(combinedHash % bitSize)) {
|
---|
80 | return false;
|
---|
81 | }
|
---|
82 | }
|
---|
83 | return true;
|
---|
84 | }
|
---|
85 | },
|
---|
86 | /**
|
---|
87 | * This strategy uses all 128 bits of {@link Hashing#murmur3_128} when hashing. It looks different
|
---|
88 | * than the implementation in MURMUR128_MITZ_32 because we're avoiding the multiplication in the
|
---|
89 | * loop and doing a (much simpler) += hash2. We're also changing the index to a positive number by
|
---|
90 | * AND'ing with Long.MAX_VALUE instead of flipping the bits.
|
---|
91 | */
|
---|
92 | MURMUR128_MITZ_64() {
|
---|
93 | @Override
|
---|
94 | public <T> boolean put(
|
---|
95 | T object, Funnel<? super T> funnel, int numHashFunctions, BloomFilterStrategies.BitArray bits) {
|
---|
96 | long bitSize = bits.bitSize();
|
---|
97 | byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
|
---|
98 | long hash1 = lowerEight(bytes);
|
---|
99 | long hash2 = upperEight(bytes);
|
---|
100 |
|
---|
101 | boolean bitsChanged = false;
|
---|
102 | long combinedHash = hash1;
|
---|
103 | for (int i = 0; i < numHashFunctions; i++) {
|
---|
104 | // Make the combined hash positive and indexable
|
---|
105 | bitsChanged |= bits.set((combinedHash & Long.MAX_VALUE) % bitSize);
|
---|
106 | combinedHash += hash2;
|
---|
107 | }
|
---|
108 | return bitsChanged;
|
---|
109 | }
|
---|
110 |
|
---|
111 | @Override
|
---|
112 | public <T> boolean mightContain(
|
---|
113 | T object, Funnel<? super T> funnel, int numHashFunctions, BloomFilterStrategies.BitArray bits) {
|
---|
114 | long bitSize = bits.bitSize();
|
---|
115 | byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
|
---|
116 | long hash1 = lowerEight(bytes);
|
---|
117 | long hash2 = upperEight(bytes);
|
---|
118 |
|
---|
119 | long combinedHash = hash1;
|
---|
120 | for (int i = 0; i < numHashFunctions; i++) {
|
---|
121 | // Make the combined hash positive and indexable
|
---|
122 | if (!bits.get((combinedHash & Long.MAX_VALUE) % bitSize)) {
|
---|
123 | return false;
|
---|
124 | }
|
---|
125 | combinedHash += hash2;
|
---|
126 | }
|
---|
127 | return true;
|
---|
128 | }
|
---|
129 |
|
---|
130 | private /* static */ long lowerEight(byte[] bytes) {
|
---|
131 | return Longs.fromBytes(
|
---|
132 | bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]);
|
---|
133 | }
|
---|
134 |
|
---|
135 | private /* static */ long upperEight(byte[] bytes) {
|
---|
136 | return Longs.fromBytes(
|
---|
137 | bytes[15], bytes[14], bytes[13], bytes[12], bytes[11], bytes[10], bytes[9], bytes[8]);
|
---|
138 | }
|
---|
139 | };
|
---|
140 |
|
---|
141 | // Note: We use this instead of java.util.BitSet because we need access to the long[] data field
|
---|
142 | static final class BitArray {
|
---|
143 | final long[] data;
|
---|
144 | long bitCount;
|
---|
145 |
|
---|
146 | BitArray(long bits) {
|
---|
147 | this(new long[Ints.checkedCast(LongMath.divide(bits, 64, RoundingMode.CEILING))]);
|
---|
148 | }
|
---|
149 |
|
---|
150 | // Used by serialization
|
---|
151 | BitArray(long[] data) {
|
---|
152 | checkArgument(data.length > 0, "data length is zero!");
|
---|
153 | this.data = data;
|
---|
154 | long bitCount = 0;
|
---|
155 | for (long value : data) {
|
---|
156 | bitCount += Long.bitCount(value);
|
---|
157 | }
|
---|
158 | this.bitCount = bitCount;
|
---|
159 | }
|
---|
160 |
|
---|
161 | /** Returns true if the bit changed value. */
|
---|
162 | boolean set(long index) {
|
---|
163 | if (!get(index)) {
|
---|
164 | data[(int) (index >>> 6)] |= (1L << index);
|
---|
165 | bitCount++;
|
---|
166 | return true;
|
---|
167 | }
|
---|
168 | return false;
|
---|
169 | }
|
---|
170 |
|
---|
171 | boolean get(long index) {
|
---|
172 | return (data[(int) (index >>> 6)] & (1L << index)) != 0;
|
---|
173 | }
|
---|
174 |
|
---|
175 | /** Number of bits */
|
---|
176 | long bitSize() {
|
---|
177 | return (long) data.length * Long.SIZE;
|
---|
178 | }
|
---|
179 |
|
---|
180 | /** Number of set bits (1s) */
|
---|
181 | long bitCount() {
|
---|
182 | return bitCount;
|
---|
183 | }
|
---|
184 |
|
---|
185 | BitArray copy() {
|
---|
186 | return new BitArray(data.clone());
|
---|
187 | }
|
---|
188 |
|
---|
189 | /** Combines the two BitArrays using bitwise OR. */
|
---|
190 | void putAll(BitArray array) {
|
---|
191 | checkArgument(
|
---|
192 | data.length == array.data.length,
|
---|
193 | "BitArrays must be of equal length (%s != %s)",
|
---|
194 | data.length,
|
---|
195 | array.data.length);
|
---|
196 | bitCount = 0;
|
---|
197 | for (int i = 0; i < data.length; i++) {
|
---|
198 | data[i] |= array.data[i];
|
---|
199 | bitCount += Long.bitCount(data[i]);
|
---|
200 | }
|
---|
201 | }
|
---|
202 |
|
---|
203 | @Override
|
---|
204 | public boolean equals(@Nullable Object o) {
|
---|
205 | if (o instanceof BitArray) {
|
---|
206 | BitArray bitArray = (BitArray) o;
|
---|
207 | return Arrays.equals(data, bitArray.data);
|
---|
208 | }
|
---|
209 | return false;
|
---|
210 | }
|
---|
211 |
|
---|
212 | @Override
|
---|
213 | public int hashCode() {
|
---|
214 | return Arrays.hashCode(data);
|
---|
215 | }
|
---|
216 | }
|
---|
217 | } |
---|