zoukankan      html  css  js  c++  java
  • A Brief Bloom Filter(英文标题唬人罢了)

           控制台字体颜色参考了https://blog.csdn.net/key_point/article/details/52667273这篇博文。代码使用哈希表实现了一个简易布隆过滤器,过滤器中存储了C++的所有关键字用于查询操作!详解待后续文章,先上代码!

           头文件BitMap.h:

     1 /*BitMap.h*/
     2 
     3 #ifndef BITMAP_H_INCLUDED
     4 #define BITMAP_H_INCLUDED
     5 
     6 #include <stdio.h>
     7 #include <assert.h>
     8 #include <stdlib.h>
     9 #include <windows.h>
    10 
    11 typedef struct BitMap
    12 {
    13     size_t* _bits;
    14     size_t _range;
    15 } BitMap;
    16 
    17 #endif // BITMAP_H_INCLUDED

           头文件BloomFilter.h:

     1 /*BloomFilter.h*/
     2 
     3 #ifndef BLOOMFILTER_H_INCLUDED
     4 #define BLOOMFILTER_H_INCLUDED
     5 
     6 #include "BitMap.h"
     7 
     8 typedef size_t(*HASH_FUNC)(const char* str);
     9 
    10 typedef struct                     /*5个哈希函数*/
    11 {
    12     BitMap _bm;
    13     HASH_FUNC hashfunc1;
    14     HASH_FUNC hashfunc2;
    15     HASH_FUNC hashfunc3;
    16     HASH_FUNC hashfunc4;
    17     HASH_FUNC hashfunc5;
    18 } BloomFilter;
    19 
    20 void BloomFilterInit(BloomFilter* bf, size_t range);
    21 void BloomFilterSet(BloomFilter* bf, const char* x);
    22 void BloomFilterReset(BloomFilter* bf, const char* x);
    23 void BloomFilterTest();
    24 void BloomFilterDestory(BloomFilter* bf);
    25 
    26 
    27 
    28 #endif // BLOOMFILTER_H_INCLUDED

           源文件BloomFilter.c:

      1 /*BloomFilter.c*/
      2 
      3 #include "BloomFilter.h"
      4 
      5 static size_t BKDRHash(const char* str)
      6 {
      7     size_t seed = 131;  // 31 131 1313 13131 131313
      8     size_t hash = 0;
      9     while (*str)
     10     {
     11         hash = hash * seed + (*str++);
     12     }
     13     return (hash & 0x7FFFFFFF);
     14 }
     15 
     16 size_t SDBMHash(const char* str)
     17 {
     18     size_t ch;
     19     size_t hash = 0;
     20     while (ch = (size_t)*str++)
     21     {
     22         hash = 65599 * hash + ch;
     23         //hash = (size_t)ch + (hash << 6) + (hash << 16) - hash;
     24     }
     25     return hash;
     26 }
     27 
     28 size_t RSHash(const char* str)
     29 {
     30     size_t ch;
     31     size_t hash = 0;
     32     size_t magic = 63689;
     33     while (ch = (size_t)*str++)
     34     {
     35         hash = hash * magic + ch;
     36         magic *= 378551;
     37     }
     38     return hash;
     39 }
     40 
     41 size_t FHash(const char *str)
     42 {
     43     size_t ch;
     44     size_t hash = 0;
     45     size_t magic = 61456;
     46     while (ch = (size_t)*str++)
     47     {
     48         hash = hash * magic + ch;
     49         magic = magic * 45616;
     50     }
     51     return hash;
     52 }
     53 
     54 size_t FiHash(const char *str)
     55 {
     56     size_t ch;
     57     size_t hash = 0;
     58     size_t magic = 60000;
     59     while (ch = (size_t)*str++)
     60     {
     61         hash = hash * magic + ch;
     62         magic = magic * 111;
     63     }
     64     return hash;
     65 }
     66 
     67 void BloomFilterInit(BloomFilter* bf, size_t range)
     68 {
     69     assert(bf);
     70     BitMapInit(&bf->_bm, range);
     71     bf->hashfunc1 = BKDRHash;
     72     bf->hashfunc2 = SDBMHash;
     73     bf->hashfunc3 = RSHash;
     74     bf->hashfunc4 = FHash;
     75     bf->hashfunc5 = FiHash;
     76 }
     77 
     78 void BloomFilterSet(BloomFilter* bf, const char* x)
     79 {
     80     size_t hash1, hash2, hash3, hash4, hash5;
     81     hash1 = bf->hashfunc1(x) % bf->_bm._range;
     82     hash2 = bf->hashfunc2(x) % bf->_bm._range;
     83     hash3 = bf->hashfunc3(x) % bf->_bm._range;
     84     hash4 = bf->hashfunc4(x) % bf->_bm._range;
     85     hash5 = bf->hashfunc5(x) % bf->_bm._range;
     86     BitMapSet(&bf->_bm, hash1);
     87     BitMapSet(&bf->_bm, hash2);
     88     BitMapSet(&bf->_bm, hash3);
     89     BitMapSet(&bf->_bm, hash4);
     90     BitMapSet(&bf->_bm, hash5);
     91 }
     92 int BloomFilterFind(BloomFilter* bf, const char* x)
     93 {
     94 
     95     size_t hash1, hash2, hash3, hash4, hash5;
     96     hash1 = bf->hashfunc1(x) % bf->_bm._range;
     97     hash2 = bf->hashfunc2(x) % bf->_bm._range;
     98     hash3 = bf->hashfunc3(x) % bf->_bm._range;
     99     hash4 = bf->hashfunc4(x) % bf->_bm._range;
    100     hash5 = bf->hashfunc5(x) % bf->_bm._range;
    101     if (BitMapTest(&bf->_bm, hash1) == -1)
    102         return -1;
    103     if (BitMapTest(&bf->_bm, hash2) == -1)
    104         return -1;
    105     if (BitMapTest(&bf->_bm, hash3) == -1)
    106         return -1;
    107     if (BitMapTest(&bf->_bm, hash4) == -1)
    108         return -1;
    109     if (BitMapTest(&bf->_bm, hash5) == -1)
    110         return -1;
    111     return 0;
    112 }
    113 
    114 void BloomFilterDestory(BloomFilter* bf)
    115 {
    116     assert(bf);
    117     BitMapDestroy(&bf->_bm);
    118 }

           源文件test.c:

     1 /*test.c*/
     2 
     3 #include "BloomFilter.h"
     4 
     5 int main()
     6 {
     7     int n;
     8     BloomFilter bf;
     9     char str[50000];
    10     FILE *out, *in;
    11     out = fopen("关键字.txt", "r");
    12     BloomFilterInit(&bf, 10000);
    13     while (!feof(out))
    14     {
    15 
    16         fscanf(out, "%s", str);
    17         /*  printf("%s
    ",str);*/
    18         BloomFilterSet(&bf, str);
    19     }
    20     printf("                      ****************请输入你要执行的操作************************
    ");
    21     printf("                      ****************1.查询是否存在该关键字***********************
    ");
    22     printf("                      ****************2.贮存关键字*********************************
    ");
    23     printf("                      ****************3.结束此次操作*******************************
    ");
    24     while (scanf("%d", &n) != EOF)
    25     {
    26         if (n == 3)
    27             break;
    28         if (n == 1)
    29         {
    30             printf("请输入你要查询的代码,按#回到主界面
    ");
    31             while (1)
    32             {
    33 
    34                 scanf("%s", str);
    35                 if (str[0] == '#')
    36                     break;
    37 
    38                 if (BloomFilterFind(&bf, str) == 0)
    39                 {
    40                     printf("该关键字(");
    41                     SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE),
    42                              FOREGROUND_BLUE |FOREGROUND_INTENSITY );
    43                     printf("%s", str);
    44                     SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE),
    45                              FOREGROUND_RED |FOREGROUND_GREEN | FOREGROUND_BLUE);
    46                     printf(")在表中!
    ");
    47                 }
    48                 else
    49                 {
    50                     printf("该关键字(");
    51                     SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE),
    52                              FOREGROUND_RED |FOREGROUND_INTENSITY );
    53                     printf("%s",str);
    54                     SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE),
    55                              FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE);
    56                     printf(")不在表中,请检查是否拼写错误!或者将其加入表中!
    ");
    57                 }
    58             }
    59         }
    60         if (n == 2)
    61         {
    62             while (1)
    63             {
    64                 printf("请输入你要保存的关键字
    按0退出
    ");
    65                 scanf("%s", str);
    66                 if (str[0] == '0')
    67                     break;
    68                 in = fopen("关键字.txt", "a");
    69                 fprintf(in, "
    %s", str);
    70             }
    71         }
    72     }
    73     return 0;
    74 }

           程序运行截图如下:

           

  • 相关阅读:
    unittest框架assert断言
    python logging 总结
    好文章推荐 数据库mysql
    python 日志模块 日志格式
    django admin 后台总结(转载)
    xpath解析html标签
    lua 排序table.sort()用法
    lua 随机数 math.random()和math.randomseed()用法
    三种方法获取 lua时间戳
    crontab调用python脚本新思路
  • 原文地址:https://www.cnblogs.com/25th-engineer/p/9484315.html
Copyright © 2011-2022 走看看