zoukankan      html  css  js  c++  java
  • Bloom Filter布隆过滤器原理和实现(2)

    这一篇主要是根据 leveldb 实现的 布隆过滤器的简单版

    #include <iostream>
    #include <cstring>
    
    using namespace std;
    
    #ifndef FALLTHROUGH_INTENDED
    #define FALLTHROUGH_INTENDED 
      do {                       
      } while (0)
    #endif
    
    int LittleEndian() { //返回1,小端,返回0, 大端
        int a = 1;
        return *(char*)&a;
    }
    
    inline uint32_t DecodeFixed32(const char* ptr) {
        const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
        if (LittleEndian) {
            uint32_t result;
            std::memcpy(&result, buffer, sizeof(uint32_t));
            return result;
        }
    
        return (static_cast<uint32_t>(buffer[0])) |
               (static_cast<uint32_t>(buffer[1]) << 8) |
               (static_cast<uint32_t>(buffer[2]) << 16) |
               (static_cast<uint32_t>(buffer[3]) << 24);
    }
    
    uint32_t Hash(const char* data, size_t n, uint32_t seed) {
      // Similar to murmur hash
      const uint32_t m = 0xc6a4a793;
      const uint32_t r = 24;
      const char* limit = data + n;
      uint32_t h = seed ^ (n * m);
    
      // Pick up four bytes at a time
      while (data + 4 <= limit) {
        uint32_t w = DecodeFixed32(data);
        data += 4;
        h += w;
        h *= m;
        h ^= (h >> 16);
      }
    
      // Pick up remaining bytes
      switch (limit - data) {
        case 3:
          h += static_cast<uint8_t>(data[2]) << 16;
          FALLTHROUGH_INTENDED;
        case 2:
          h += static_cast<uint8_t>(data[1]) << 8;
          FALLTHROUGH_INTENDED;
        case 1:
          h += static_cast<uint8_t>(data[0]);
          h *= m;
          h ^= (h >> r);
          break;
      }
      return h;
    }
    
    class Bitmap {
    public:
        Bitmap(size_t size) : size_(size) {
            bits.resize((size_ >> 3) + 1, 0);  //多开辟一个空间,原因是数组只能表示区间[0,size)
        }
        void bitmapSet(size_t val) {
           bits[val >> 3] |= (1 << (val % 8));  // >> 3 相当于除以8,用移位操作可提高性能
        }
        bool bitmapGet(size_t val) {
            return bits[val >> 3] & (1 << (val % 8));
        }
    private:
        size_t size_;
        std::string bits;
    };
    
    class BloomFilter {
    private:
        static uint32_t BloomHash(const std::string& key) {
            return Hash(key.data(), key.size(), 0xbc9f1d34);
        }
        
        enum { defaultSize = 100000000 * 16 };  //16亿
    
    public:
        BloomFilter() : k_(8) {
            bitmap_ = new Bitmap(defaultSize);
        }
        ~BloomFilter() {
            delete bitmap_;
        }
        void Add(const string& s) {
            uint32_t h = BloomHash(s);
            const uint32_t delta = (h >> 17) | (h << 15); 
            for (size_t i = 0; i < k_; ++i) {
                uint32_t bitpos = h % defaultSize;
                bitmap_->bitmapSet(bitpos);
                h += delta;
            }
        }
        bool Contain(const string& s) {
            bool ret = true;
            uint32_t h = BloomHash(s);
            const uint32_t delta = (h >> 17) | (h << 15); 
            for (size_t i = 0; i < k_; ++i) {
                uint32_t bitpos = h % defaultSize;
                ret = ret && bitmap_->bitmapGet(bitpos);
                h += delta;
            }
            return ret;
        }
    
    private:
        int k_;  // hash的个数
        Bitmap* bitmap_;
    };
    
    void bloomFilterTest() {
        std::string email = "1293173298@qq.com";
        BloomFilter bf;
        bf.Add(email);
        bool ret1 = bf.Contain(email);       // true
        bool ret2 = bf.Contain("even.com");  // false
    }
    
    int main() {
        bloomFilterTest();
    
        system("pause");
        return 0;
    }
  • 相关阅读:
    linux-常用命令
    linux
    测试基础
    链家笔试1
    链家笔试2
    链家笔试3
    MySql优化
    Http1.1和Http2.0
    Charles学习
    链表中倒数第k个结点
  • 原文地址:https://www.cnblogs.com/evenleee/p/12020186.html
Copyright © 2011-2022 走看看