1 | /*
|
---|
2 | * Copyright (C) 2011 The Guava Authors
|
---|
3 | *
|
---|
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
---|
5 | * in compliance with the License. You may obtain a copy of the License at
|
---|
6 | *
|
---|
7 | * http://www.apache.org/licenses/LICENSE-2.0
|
---|
8 | *
|
---|
9 | * Unless required by applicable law or agreed to in writing, software distributed under the License
|
---|
10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
---|
11 | * or implied. See the License for the specific language governing permissions and limitations under
|
---|
12 | * the License.
|
---|
13 | */
|
---|
14 |
|
---|
15 | package com.google.common.hash;
|
---|
16 |
|
---|
17 | //import static com.google.common.base.Preconditions.checkArgument;
|
---|
18 |
|
---|
19 | import com.google.common.math.LongMath;
|
---|
20 | import com.google.common.primitives.Ints;
|
---|
21 | import com.google.common.primitives.Longs;
|
---|
22 | import java.math.RoundingMode;
|
---|
23 | import java.util.Arrays;
|
---|
24 | import javax.annotation.Nullable;
|
---|
25 |
|
---|
26 |
|
---|
27 | /**
|
---|
28 | * Collections of strategies of generating the k * log(M) bits required for an element to be mapped
|
---|
29 | * to a BloomFilter of M bits and k hash functions. These strategies are part of the serialized form
|
---|
30 | * of the Bloom filters that use them, thus they must be preserved as is (no updates allowed, only
|
---|
31 | * introduction of new versions).
|
---|
32 | *
|
---|
33 | * Important: the order of the constants cannot change, and they cannot be deleted - we depend on
|
---|
34 | * their ordinal for BloomFilter serialization.
|
---|
35 | *
|
---|
36 | * @author Dimitris Andreou
|
---|
37 | * @author Kurt Alfred Kluever
|
---|
38 | */
|
---|
39 | enum BloomFilterAdvancedStrategies implements BloomFilterAdvanced.Strategy {
|
---|
40 | /**
|
---|
41 | * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and Michael
|
---|
42 | * Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the
|
---|
43 | * performance of a Bloom filter (yet only needs two 32bit hash functions).
|
---|
44 | */
|
---|
45 | MURMUR128_MITZ_32() {
|
---|
46 | @Override
|
---|
47 | public <T> boolean put(
|
---|
48 | T object, Funnel<? super T> funnel, int numHashFunctions, BloomFilterAdvancedStrategies.BitArray bits) {
|
---|
49 | long bitSize = bits.bitSize();
|
---|
50 | long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
|
---|
51 | int hash1 = (int) hash64;
|
---|
52 | int hash2 = (int) (hash64 >>> 32);
|
---|
53 |
|
---|
54 | boolean bitsChanged = false;
|
---|
55 | for (int i = 1; i <= numHashFunctions; i++) {
|
---|
56 | int combinedHash = hash1 + (i * hash2);
|
---|
57 | // Flip all the bits if it's negative (guaranteed positive number)
|
---|
58 | if (combinedHash < 0) {
|
---|
59 | combinedHash = ~combinedHash;
|
---|
60 | }
|
---|
61 | bitsChanged |= bits.set(combinedHash % bitSize);
|
---|
62 | }
|
---|
63 | return bitsChanged;
|
---|
64 | }
|
---|
65 |
|
---|
66 | @Override
|
---|
67 | public <T> boolean mightContain(
|
---|
68 | T object, Funnel<? super T> funnel, int numHashFunctions, BloomFilterAdvancedStrategies.BitArray bits) {
|
---|
69 | long bitSize = bits.bitSize();
|
---|
70 | long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
|
---|
71 | int hash1 = (int) hash64;
|
---|
72 | int hash2 = (int) (hash64 >>> 32);
|
---|
73 |
|
---|
74 | for (int i = 1; i <= numHashFunctions; i++) {
|
---|
75 | int combinedHash = hash1 + (i * hash2);
|
---|
76 | // Flip all the bits if it's negative (guaranteed positive number)
|
---|
77 | if (combinedHash < 0) {
|
---|
78 | combinedHash = ~combinedHash;
|
---|
79 | }
|
---|
80 | if (!bits.get(combinedHash % bitSize)) {
|
---|
81 | return false;
|
---|
82 | }
|
---|
83 | }
|
---|
84 | return true;
|
---|
85 | }
|
---|
86 | },
|
---|
87 | /**
|
---|
88 | * This strategy uses all 128 bits of {@link Hashing#murmur3_128} when hashing. It looks different
|
---|
89 | * than the implementation in MURMUR128_MITZ_32 because we're avoiding the multiplication in the
|
---|
90 | * loop and doing a (much simpler) += hash2. We're also changing the index to a positive number by
|
---|
91 | * AND'ing with Long.MAX_VALUE instead of flipping the bits.
|
---|
92 | */
|
---|
93 | MURMUR128_MITZ_64() {
|
---|
94 | @Override
|
---|
95 | public <T> boolean put(
|
---|
96 | T object, Funnel<? super T> funnel, int numHashFunctions, BloomFilterAdvancedStrategies.BitArray bits) {
|
---|
97 | long bitSize = bits.bitSize();
|
---|
98 | byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
|
---|
99 | long hash1 = lowerEight(bytes);
|
---|
100 | long hash2 = upperEight(bytes);
|
---|
101 |
|
---|
102 | boolean bitsChanged = false;
|
---|
103 | long combinedHash = hash1;
|
---|
104 | for (int i = 0; i < numHashFunctions; i++) {
|
---|
105 | // Make the combined hash positive and indexable
|
---|
106 | bitsChanged |= bits.set((combinedHash & Long.MAX_VALUE) % bitSize);
|
---|
107 | combinedHash += hash2;
|
---|
108 | }
|
---|
109 | return bitsChanged;
|
---|
110 | }
|
---|
111 |
|
---|
112 | @Override
|
---|
113 | public <T> boolean mightContain(
|
---|
114 | T object, Funnel<? super T> funnel, int numHashFunctions, BloomFilterAdvancedStrategies.BitArray bits) {
|
---|
115 | long bitSize = bits.bitSize();
|
---|
116 | byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
|
---|
117 | long hash1 = lowerEight(bytes);
|
---|
118 | long hash2 = upperEight(bytes);
|
---|
119 |
|
---|
120 | long combinedHash = hash1;
|
---|
121 | for (int i = 0; i < numHashFunctions; i++) {
|
---|
122 | // Make the combined hash positive and indexable
|
---|
123 | if (!bits.get((combinedHash & Long.MAX_VALUE) % bitSize)) {
|
---|
124 | return false;
|
---|
125 | }
|
---|
126 | combinedHash += hash2;
|
---|
127 | }
|
---|
128 | return true;
|
---|
129 | }
|
---|
130 |
|
---|
131 | private /* static */ long lowerEight(byte[] bytes) {
|
---|
132 | return Longs.fromBytes(
|
---|
133 | bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]);
|
---|
134 | }
|
---|
135 |
|
---|
136 | private /* static */ long upperEight(byte[] bytes) {
|
---|
137 | return Longs.fromBytes(
|
---|
138 | bytes[15], bytes[14], bytes[13], bytes[12], bytes[11], bytes[10], bytes[9], bytes[8]);
|
---|
139 | }
|
---|
140 | };
|
---|
141 |
|
---|
142 | // Note: We use this instead of java.util.BitSet because we need access to the long[] data field
|
---|
143 | static final class BitArray {
|
---|
144 | final long[] data;
|
---|
145 | long bitCount;
|
---|
146 |
|
---|
147 | BitArray(long bits) {
|
---|
148 | this(new long[Ints.checkedCast(LongMath.divide(bits, 64, RoundingMode.CEILING))]);
|
---|
149 | }
|
---|
150 |
|
---|
151 | // Used by serialization
|
---|
152 | BitArray(long[] data) {
|
---|
153 | checkArgument(data.length > 0, "data length is zero!");
|
---|
154 | this.data = data;
|
---|
155 | long bitCount = 0;
|
---|
156 | for (long value : data) {
|
---|
157 | bitCount += Long.bitCount(value);
|
---|
158 | }
|
---|
159 | this.bitCount = bitCount;
|
---|
160 | }
|
---|
161 |
|
---|
162 | /** Returns true if the bit changed value. */
|
---|
163 | boolean set(long index) {
|
---|
164 | if (!get(index)) {
|
---|
165 | data[(int) (index >>> 6)] |= (1L << index);
|
---|
166 | bitCount++;
|
---|
167 | return true;
|
---|
168 | }
|
---|
169 | return false;
|
---|
170 | }
|
---|
171 |
|
---|
172 | boolean get(long index) {
|
---|
173 | return (data[(int) (index >>> 6)] & (1L << index)) != 0;
|
---|
174 | }
|
---|
175 |
|
---|
176 | /** Number of bits */
|
---|
177 | long bitSize() {
|
---|
178 | return (long) data.length * Long.SIZE;
|
---|
179 | }
|
---|
180 |
|
---|
181 | /** Number of set bits (1s) */
|
---|
182 | long bitCount() {
|
---|
183 | return bitCount;
|
---|
184 | }
|
---|
185 |
|
---|
186 | BitArray copy() {
|
---|
187 | return new BitArray(data.clone());
|
---|
188 | }
|
---|
189 |
|
---|
190 | /** Combines the two BitArrays using bitwise OR. */
|
---|
191 | void putAll(BitArray array) {
|
---|
192 | checkArgument(
|
---|
193 | data.length == array.data.length,
|
---|
194 | "BitArrays must be of equal length (%s != %s)",
|
---|
195 | data.length,
|
---|
196 | array.data.length);
|
---|
197 | bitCount = 0;
|
---|
198 | for (int i = 0; i < data.length; i++) {
|
---|
199 | data[i] |= array.data[i];
|
---|
200 | bitCount += Long.bitCount(data[i]);
|
---|
201 | }
|
---|
202 | }
|
---|
203 |
|
---|
204 | @Override
|
---|
205 | public boolean equals(@Nullable Object o) {
|
---|
206 | if (o instanceof BitArray) {
|
---|
207 | BitArray bitArray = (BitArray) o;
|
---|
208 | return Arrays.equals(data, bitArray.data);
|
---|
209 | }
|
---|
210 | return false;
|
---|
211 | }
|
---|
212 |
|
---|
213 | @Override
|
---|
214 | public int hashCode() {
|
---|
215 | return Arrays.hashCode(data);
|
---|
216 | }
|
---|
217 |
|
---|
218 | public static void checkArgument(boolean expression, @Nullable Object errorMessage) {
|
---|
219 | if (!expression) {
|
---|
220 | throw new IllegalArgumentException(String.valueOf(errorMessage));
|
---|
221 | }
|
---|
222 | }
|
---|
223 | public static void checkArgument(
|
---|
224 | boolean b, @Nullable String errorMessageTemplate, @Nullable Object p1, @Nullable Object p2) {
|
---|
225 | if (!b) {
|
---|
226 | throw new IllegalArgumentException(String.format(errorMessageTemplate, p1, p2));
|
---|
227 | }
|
---|
228 | }
|
---|
229 |
|
---|
230 | }
|
---|
231 | } |
---|