zoukankan      html  css  js  c++  java
  • 布隆过滤的代码

    # -*- coding: utf-8 -*-
    import redis
    from hashlib import md5
    
    class SimpleHash(object):
        def __init__(self, cap, seed):
            self.cap = cap
            self.seed = seed
    
        def hash(self, value):
            ret = 0
            for i in range(len(value)):
                ret += self.seed * ret + ord(value[i])
            return (self.cap - 1) & ret
    
    
    class BloomFilter(object):
        def __init__(self, host='localhost', port=6379, db=0,password=None, blockNum=1, key='bloomfilter'):
            """
            :param host: the host of Redis
            :param port: the port of Redis
            :param db: witch db in Redis
            :param blockNum: one blockNum for about 90,000,000; if you have more strings for filtering, increase it.
            :param key: the key's name in Redis
            """
            self.server = redis.Redis(host=host, port=port, db=db,password=password)
            self.bit_size = 1 << 31  # Redis的String类型最大容量为512M,现使用256M
            self.seeds = [5, 7, 11, 13, 31, 37, 61]
            self.key = key
            self.blockNum = blockNum
            self.hashfunc = []
            for seed in self.seeds:
                self.hashfunc.append(SimpleHash(self.bit_size, seed))
    
        def isContains(self, str_input):
            if not str_input:
                return False
            m5 = md5()
            m5.update(str_input.encode('utf-8'))
            str_input = m5.hexdigest()
            ret = True
            name = self.key + str(int(str_input[0:2], 16) % self.blockNum)
            for f in self.hashfunc:
                loc = f.hash(str_input)
                ret = ret & self.server.getbit(name, loc)
            return ret
    
        def insert(self, str_input):
            m5 = md5()
            m5.update(str_input.encode('utf-8'))
            str_input = m5.hexdigest()
            name = self.key + str(int(str_input[0:2], 16) % self.blockNum)
            for f in self.hashfunc:
                loc = f.hash(str_input)
                self.server.setbit(name, loc, 1)
    
    
    if __name__ == '__main__':
    
        bf = BloomFilter()
        if bf.isContains('http://www.baidu.com'):  # 判断字符串是否存在
            print('exists!')
        else:
            print('not exists!')#如果不存则加入进去
            bf.insert('http://www.baidu.com')
    以上内容作为课堂笔记,如有雷同,请联系于我
  • 相关阅读:
    2017寒假练习赛总结(实时更新)
    NOIP
    挖坑--总结
    BZOJ3709: [PA2014]Bohater
    BZOJ3714: [PA2014]Kuglarz
    BZOJ2276: [Poi2011]Temperature
    VIJOS P1543极值问题
    BZOJ2749: [HAOI2012]外星人
    BZOJ2173: 整数的lqp拆分
    BZOJ1100: [POI2007]对称轴osi
  • 原文地址:https://www.cnblogs.com/ArtisticMonk/p/10255658.html
Copyright © 2011-2022 走看看