zoukankan      html  css  js  c++  java
  • 海量字符串查找——bloom filter,c

    对于海量字符串的查找,一般有两种方法,一种是建树,还有一种就是bf算法,即布隆过滤器,这个从原来上讲比较简单,也易于实现,主要就是根据哈希算法来实现。
    int len(char *ch)
    {
            int m=0;
            while(ch[m]!='') {
                m++;
            }
            return m;
    }
    
    
    bool judge(char *vertor,char ch[]){
        if (GETBIT(vertor,RSHash(ch,len(ch)))==0) return false;
        if (GETBIT(vertor,JSHash(ch,len(ch)))==0) return false;
        if (GETBIT(vertor,PJWHash(ch,len(ch)))==0) return false;
        if (GETBIT(vertor,ELFHash(ch,len(ch)))==0) return false;
        if (GETBIT(vertor,BKDRHash(ch,len(ch)))==0)    return false;
        if (GETBIT(vertor,SDBMHash(ch,len(ch)))==0)    return false;
        if (GETBIT(vertor,DJBHash(ch,len(ch)))==0) return false;
        if (GETBIT(vertor,DEKHash(ch,len(ch)))==0) return false;
        if (GETBIT(vertor,BPHash(ch,len(ch)))==0)  return false;
        if (GETBIT(vertor,FNVHash(ch,len(ch)))==0) return false;
        if (GETBIT(vertor,APHash(ch,len(ch)))==0)  return false;
            else
                return true;
     
       }
    
    int main(int argc,char *argv[]){
        argv[1]="/Users/emaillist.dat";
        argv[2]="/Users/checklist.dat";
        argv[3]="/Users/result2222.dat";
    
        clock_t  a=clock();
        int pos=1,k=0,j=0;
        FILE *fp_strpool,*fp_checkedstr,*fp_result;
        fp_strpool=fopen(argv[1], "r");//打开三个文件
        fp_checkedstr=fopen(argv[2], "r");
        fp_result=fopen(argv[3], "w");
        char ch[ARRAY_SIZE];
        char *vertor;
        char yes[5]="yes
    ";
        char no[4]="no
    ";
        vertor=(char *)calloc(SIZE , sizeof(char) );//申请位数组
        for (int i=0; i<SIZE; i++) {
            vertor[i]=0;
        }
            while (fscanf(fp_strpool, "%s",ch)==1) {
            //fgets(ch, ARRAY_SIZE, fp_strpool);
            SETBIT(vertor, RSHash(ch,len(ch)));
            SETBIT(vertor, JSHash(ch,len(ch)));
            SETBIT(vertor, PJWHash(ch,len(ch)));
            SETBIT(vertor, ELFHash(ch,len(ch)));
            SETBIT(vertor, BKDRHash(ch,len(ch)));
            SETBIT(vertor, SDBMHash(ch,len(ch)));
            SETBIT(vertor, DJBHash(ch,len(ch)));
            SETBIT(vertor, DEKHash(ch,len(ch)));
            SETBIT(vertor, BPHash(ch,len(ch)));
            SETBIT(vertor, FNVHash(ch,len(ch)));
            SETBIT(vertor, APHash(ch,len(ch)));
                j++;
                
        }
        while (fscanf(fp_checkedstr, "%s",ch)==1) {
            k++;
            //fgets(ch, ARRAY_SIZE, fp_checkedstr);
            if (judge(vertor,ch)) {
                printf("%d
    ",pos);
                pos++;
                fputs(yes, fp_result);
            }
            else
                fputs(no, fp_result);
        }
        printf("%d %d
    ",j,k);
        fclose(fp_result);
        fclose(fp_checkedstr);
        fclose(fp_strpool);//关闭文件
        clock_t  b=clock();
        double duration = (double)(b - a) / CLOCKS_PER_SEC;
        printf( "%f seconds
    ", duration );
    }
    
     
     
     
     
  • 相关阅读:
    VS工作目录,输出目录
    Google的C++开源代码项
    C++文件读写
    深拷贝浅拷贝
    Efficient Graph-Based Image Segmentation
    Graph Cut 简介
    Graph Cut
    "GrabCut" - Interactive Foreground Extraction using Iter
    EM算法
    Python图像处理库(2)
  • 原文地址:https://www.cnblogs.com/xiaoba1203/p/5581467.html
Copyright © 2011-2022 走看看