Redis 实现布隆过滤器
昨天听马士兵教育张福刚讲公开课,里面讲解了布隆过滤器,今天无聊没事干,整理了一下笔记。关于布隆过滤器是什么东西,有什么应用场景就不做讨论了,网上有很多,大家可以自行了解,只记录实现:
1. pom 依赖
<dependency> <groupId>redis.clients</groupId> <artifactId>jedis</artifactId> <version>3.3.0</version> </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> <version>18.0</version> </dependency>
2. 具体实现
package cn.bridgeli.demo; import com.google.common.hash.Funnels; import com.google.common.hash.Hashing; import org.junit.Before; import org.junit.Test; import redis.clients.jedis.Jedis; import redis.clients.jedis.JedisPool; import redis.clients.jedis.Pipeline; import java.nio.charset.StandardCharsets; /** * @author BridgeLi * @date 2020/6/6 16:38 */ public class BloomFilter { private Jedis jedis = null; /** * 预估的数据量 */ private static long n = 10000; /** * 容忍的错误率 */ private static double fpp = 0.01; private static long numBits = optimalNumOfBits(n, fpp); private static int numHashFunctions = optimalNumOfHashFunctions(n, numBits); /** * 根据预估数据量 n 和允许的错误率 fpp 计算需要的 bit 数组的长度 * * @param n * @param fpp * @return */ private static long optimalNumOfBits(long n, double fpp) { if (0 == fpp) { fpp = Double.MIN_VALUE; } return (long) (-n * Math.log(fpp) / (Math.log(2) * Math.log(2))); } /** * 根据预估的数据量和计算出来的需要的 bit 数组的长度,计算所需要的 hash 函数的个数 * * @param n * @param numBits * @return */ private static int optimalNumOfHashFunctions(long n, long numBits) { return Math.max(1, (int) Math.round((double) numBits / n * Math.log(2))); } /** * 预热数据 */ @Before public void testBloomFilterBefore() { BloomFilter bloomFilter = new BloomFilter(); bloomFilter.init(); for (int i = 0; i < n; i++) { bloomFilter.put("bf", String.valueOf(i + 100)); } } /** * 过滤数据 */ @Test public void testBloomFilter() { BloomFilter bloomFilter = new BloomFilter(); bloomFilter.init(); int ex_count = 0; int ne_count = 0; for (int i = 0; i < 2 * n; i++) { boolean exist = bloomFilter.isExist("bf", String.valueOf(i + 100)); if (exist) { ex_count++; } else { ne_count++; } } System.out.println("ex_count: " + ex_count + ", ne_count: " + ne_count); } private void init() { JedisPool jedisPool = new JedisPool("127.0.0.1", 6379); jedis = jedisPool.getResource(); } public boolean isExist(String where, String key) { long[] indexs = getIndexs(key); boolean result = false; try (Pipeline pipeline = jedis.pipelined()) { for (long index : indexs) { pipeline.getbit(where, index); } // 只要有一个位置为 false,即代表该数据不存在 result = !pipeline.syncAndReturnAll().contains(false); } catch (Exception e) { } return result; } public void put(String where, String key) { long[] indexs = getIndexs(key); try (Pipeline pipeline = jedis.pipelined()) { for (long index : indexs) { pipeline.setbit(where, index, true); } pipeline.sync(); } catch (Exception e) { } } private long[] getIndexs(String key) { long hash1 = Hashing.murmur3_128().hashObject(key, Funnels.stringFunnel(StandardCharsets.UTF_8)).asLong(); long hash2 = hash1 >>> 16; long[] result = new long[numHashFunctions]; for (int i = 0; i < numHashFunctions; i++) { long combinedHash = hash1 + i * hash2; if (combinedHash < 0) { combinedHash = ~combinedHash; } result[i] = combinedHash % numBits; } return result; } }
全文完,如果本文对您有所帮助,请花 1 秒钟帮忙点击一下广告,谢谢。
作 者: BridgeLi,https://www.bridgeli.cn
原文链接:http://www.bridgeli.cn/archives/676
版权声明:非特殊声明均为本站原创作品,转载时请注明作者和原文链接。
作 者: BridgeLi,https://www.bridgeli.cn
原文链接:http://www.bridgeli.cn/archives/676
版权声明:非特殊声明均为本站原创作品,转载时请注明作者和原文链接。
近期评论