zoukankan      html  css  js  c++  java
  • Hash

    概念

    通过一个hash函数H,将一组数据(包括字符串,较大的数等)转化成能够用变量表示或直接可以作为下标的数,可以通过hash函数转化得到的数值成为hash值,hash可以实现快速查找和匹配,常用的有字符串hash哈希表

    字符串hash

    题目

    给定一个字符串 (A) 和一个字符串 (B),求在 (B) 中的出现次数。 (A)(B)中的字符均为英语大写字母或小写字母。

    (A) 中不同位置出现的 (B) 可重叠

    我们选取两个合适的互质的数 (b)(h) (b < h)假设字符串(C = c_1c_2c_3……c_m)

    (H(C) = (c_1b^{m-1} + c_2b^{m-2} + c_mb^{0})~mod~h;)

    (b) 代表的是基数,相当于把字符串看做 b 进制数——《一本通提高篇》(很诡异的东西

    (H(C,K)) 为前 (K) 个字符组成的字符串的哈希值

    (H(C,K + 1) = H(C,K)*b + C_{k + 1})

    举个栗子:

    如果字符串(C = “ACDA”)(令A表示1,B表示2)则

    (H(C,1) = 1;)

    (H(C,2) = 1*b + 3;)

    (H(C,3) = 1*b^2 + 3*b + 4;)

    (H(C,4) = 1*b^3 + 3*b^2 + 4 * b + 1;)

    判断主串的一个字符和另一个字符是够匹配

    即判断字符串(C = c_1c_2……c_m)从位置(k+1)开始的长度为(n)的子串(C^, = c_{k + 1}c_{k + 2}……c_{k + n})的哈希值与另一个匹串(S = s_1s_2……s_n)是否相等

    (H(C') = H(C, k + n) - H(C, K)*b^n)

    因此可得出求字符串区间hash值(l为左边界,r为右边界)

    (H(C') = H(C, r) - H(C, l)*b^(r - l + 1))

    int get(int l,int r){ 
       return hs[r] - hs[l - 1] * pre[r - l + 1];
    }
    

    字符串区间删去一个字符后的hash值类比可以得到

    int del(int l,int r,int pos){
    	return get(l, pos - 1) * pre[r-pos] + get(pos + 1, r);
    }
    

    然后就是习题了= =

    T1 子串查找

    模板题

    #include <iostream>
    #include <cstdio>
    #include <cstring>
    #include <string>
    #include <algorithm>
    using namespace std;
    const int M = 1e6 + 10;
    typedef unsigned long long ull;
    int read(){
      int x = 0,f = 1;char c = getchar();
      while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
      while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
      return x * f;
    }
    char sa[M],sb[M];
    ull base = 155,sum[M],pow[M],falg;
    int ans;
    int main(){
       pow[0] = 1;
       scanf("%s%s",sa + 1,sb + 1);
       
       int lena = strlen(sa + 1),lenb = strlen(sb + 1);
       
       for(int i = 1;i < 1000000; i++)
       	   pow[i] = pow[i - 1]*base;//处理进位
       	   
       sum[0] = 0;
        
       for(int i = 1;i <= lena; i++){
       	  sum[i] = sum[i - 1] * base + (ull)(sa[i] - 'A' + 1);
       }
       for(int i = 1; i <= lenb; i++){
       	   falg = falg * base + (ull)(sb[i] - 'A' + 1);
       }
       for(int i = 0;i <= lena - lenb; i++){
       	 if(falg == sum[i + lenb] - sum[i] * pow[lenb]) ans++; 
       }
       cout<<ans;
    }
    
    

    T2 图书管理

    #include <iostream>
    #include <cstdio>
    #include <cstring>
    #include <string>
    #include <algorithm>
    using namespace std;
    typedef unsigned long long ull;
    const int mod1 = 1e7 + 7;
    const int mod2 = 1e7 + 9;
    const int base = 1e9;
    const int M = 1e7;
    int n;
    char flag[5], S[M];
    bool A[M], B[M];
    int main() {
        cin>>n;
    
        for (int i = 1; i <= n; i++) {
            cin >> flag;
            ull sum1 = 1, sum2 = 1;
            gets(S);
    
            for (int j = 0; j < strlen(S); j++)
                sum1 = (sum1 * base % mod1 + S[j]) % mod1, sum2 = (sum2 * base % mod2 + S[j]) % mod2;
    
            if (flag[0] == 'a')
               A[sum1] = 1, B[sum2] = 1;
    
            if (flag[0] == 'f'){
                if (A[sum1] && B[sum2])
                    puts("yes");
                else
                    puts("no");
            }
        }
    
        return 0;
    }
    
    

    T3 Power Strings

    #include <iostream>
    #include <cstdio>
    #include <queue>
    #include <cstring>
    #include <vector>
    #include <cmath>
    #include <algorithm>
    using namespace std;
    const int A = 1e3 + 2;
    const int B = 1e4 + 2;
    const int C = 1e5 + 2;
    const int D = 1e6 + 2;
    const int inf = 0x3f3f3f3f;
    typedef unsigned long long ull;
    int read(){
    	int x = 0,f = 1;char c = getchar();
    	while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
    	while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
    	return x*f;
    }
    char c[D];
    ull hs[D],power[D],base = 37;
    int main(){
    	power[0] = 1; 
    	for(int i = 1;i <= D; i++)
    	  power[i] = power[i - 1]*base;
    	  
    	while (scanf("%s", c)){
    	  	
    	  if (!strcmp(c, "."))break;
    	  
    		ull ans = 0;int len = strlen(c);
    		hs[len] = 0;
    		
    		for (int i = len - 1; i >= 0; i--){
    			
    			hs[i] = hs[i + 1] * base + c[i] - 'a' + 1;
    		}
    		for (int k = 1; k <= len; k++)
    		{
    			if (len % k != 0)
    				continue;
    			 
    			 ull tomp = hs[0] - hs[k] * power[k];
    			 int j = 0;
    			 for(j = k;j < len; j = j + k){
    			 	
    			 	 if(tomp != hs[j] - hs[k + j]*power[k]) break;
    			 	 else tomp = hs[j] - hs[k + j]*power[k];
    			 }
    			 if(j == len){
    			 	ans = len / k;break;
    			 }	
    		}
    		cout<<ans<<"
    ";	
    	}
    	return 0;
    }
    
    

    T4 Seek the Name, Seek the Fame

    #include <iostream>
    #include <cstring>
    #include <cstdio>
    #include <queue>
    #include <vector>
    #include <cmath>
    #include <algorithm>
    using namespace std;
    const int A = 1e3 + 2;
    const int B = 1e4 + 2;
    const int C = 1e5 + 2;
    const int D = 1e6 + 2;
    const int inf = 0x3f3f3f3f;
    const int mod = 1e9 + 7;
    typedef unsigned long long ull;
    int read(){
    	int x = 0,f = 1;char c = getchar();
    	while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
    	while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
    	return x*f;
    }
    char s[D];
    ull base = 34,power[D],hs[D];
    int main(){
    	
       power[0] = 1;
       
       for(int i = 1;i <= D; i++) power[i] = power[i - 1] * base;
       
       while(scanf ("%s", s + 1) != EOF){
       	   int len = strlen(s + 1);
       	   hs[0] = 0;
       	   for(int i = 1; i <= len; i++){
       	        hs[i] = hs[i - 1] * base + s[i] - 'a' + 1; 	  
    	   }
    	   for(int i = 1;i <= len; i++){
    	   	   if(hs[i] == hs[len] - hs[len - i] * power[i]){
    	   	   	      printf("%d ",i);
    			}
    	   }
    	   printf("
    ");
       }
    }
    

    T5 「BalticOI 2014 Day 1」三个朋友

    求区间hash和删去字符后的hash

    #include <iostream>
    #include <cstdio>
    #include <queue>
    #include <cstring>
    #include <string>
    #include <cmath>
    #include <map>
    #define int unsigned long long
    using namespace std;
    const int A = 1e3 + 2;
    const int B = 1e4 + 2;
    const int C = 1e5 + 2;
    const int D = 2e6 + 5;
    const int inf = 0x3f3f3f3f;
    const int mod = 99984198447;
    int read(){
    	int x = 0,f = 1;char c = getchar();
    	while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
    	while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
    	return x*f;
    }
    
    char s[D];
    int pre[D],base = 999983,hs[D],ans,ll,rr,flag,n,mid,mark;;
    map<unsigned long long, int> vis;
    int get(int l,int r){
    	return hs[r] - hs[l - 1] * pre[r - l + 1];
    }
    
    int del(int l,int r,int pos){
    	return get(l, pos - 1) * pre[r-pos] + get(pos + 1, r);
    }
    
    bool check(int pos){
    
    	if(pos == mid){
    		ll = get(1, pos - 1);
    		rr = get(pos + 1, n);
    		return ll == rr;
    	}
    
    	else if(pos < mid){
    		ll = del(1, mid, pos);
    		rr = get(mid + 1, n);
    		return ll == rr;
    	}
    
    	else{
    		ll = get(1, mid - 1);
    		rr = del(mid, n, pos);
    		return ll == rr;
    	}
    
    }
    void itit(){
       pre[0]=1;
      for(int i = 1;i <= n; i++){
    		pre[i] = pre[i - 1] * base;hs[i] = hs[i - 1] * base + s[i];
    	}
    }
    signed main(){
    	
    	cin >> n >> (s + 1);
    	
    	mid = (n + 1) >> 1; //取字符串的中点下标
    	itit();
    		
    	for(int i = 1;i <= n;i++){
    		
    		if(check(i) == 1){ //删掉下标i的元素之后,能够得到俩个一样的子串
    		
                mark = i;
                
    			if(mark <= mid)
    				flag = rr;
    			
    	    	else{
    	    		flag = ll;	
    			}
    			
    			if(vis[flag] > 0) continue;
    			vis[flag] = 1;
    			ans++; 
    			if(ans > 1){
    				cout<<"NOT UNIQUE"<<endl;return 0;
    			}
            }
    	}
    
    	if(!ans){
    		cout<<"NOT POSSIBLE"<<endl;
    	}
    	else{
    	   if(mark <= mid){
    	   	 for(int i = mid + 1;i <= n; i++)cout<<s[i];
    	   	 printf("
    ");
    	   }
    	   else{
    	   	 for(int i = 1;i <= mid - 1; i++)cout<<s[i];
    	   	  printf("
    ");
    	   }
    	}
    	return 0;
    }
    

    T6 A Horrible Poem

    被困一上午,只因进制没取质数

    暴力枚举循环节,如果循环节长度不能被区间整除,直接跳过,不过显然T了

    正解是数论??

    假设最短循环节长度为len

    则原串长度显然为len*k。若只考虑k,

    并且将k的质因数依次分解,每次试除k,

    则得到的k。和len的乘积仍是循环节,

    利用这个性质。依次用质因数 i 试除n,

    若除去后仍是循环节,说明i属于k,将其除去,结果就留下了len

    #include <iostream>
    #include <cstdio>
    #include <queue>
    #include <cstring>
    #include <string>
    #include <cmath>
    #include <map>
    #define int unsigned long long
    using namespace std;
    const int A = 1e3 + 2;
    const int B = 1e4 + 2;
    const int C = 5e5 + 2;
    const int D = 5e5 + 10;
    const int inf = 0x3f3f3f3f;
    const int mod = 99984198447;
    int read(){
    	int x = 0,f = 1;char c = getchar();
    	while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
    	while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
    	return x*f;
    }
    int  q, n, pre[D], base = 63, hs[D],prim[D],ans,len;
    bool vis[D],flag;
    char s[D];
    void calc(){
        for (int i = 2; i <= n; ++i) {
            if (vis[i]) continue;
            for (int j = 1; i * j <= n; ++j) {
                int t = i * j;
                if (vis[t]) continue;
                vis[t] = 1;
                prim[t] = i;
            }
        }
    }
    
    signed main(){
    	n = read();calc();
        scanf("%s", s + 1);
    	pre[0] = 1;
    	for(int i = 1;i <= n; i++){
    		 pre[i] = pre[i - 1] * base;hs[i] = hs[i - 1] * base + s[i];
    	}
    		
    	int q = read();
    	while(q--){
    		int l = read(), r = read();
    	   len = ans = r - l + 1;
    		while(len > 1){
    			int k = ans / prim[len];
    			len /= prim[len];
    			if(hs[r - k] - hs[l - 1] * pre[r - k - l + 1] == hs[r] - hs[l - 1 + k] * pre[r - k - l + 1])
    				ans = k;
    		}
    		printf("%d
    ", ans);
    	}
    }
    
  • 相关阅读:
    HIVE Group by、join、distinct等实现原理
    【1.3】shell基础——调试shell(sh -x)
    sql server无法显示请求的对话框,检索数据失败
    sql server索引操作
    sql server中的alter
    tempdb无法收缩。使用 DBCC FREESYSTEMCACHE 解决
    在从该备份集进行读取时,RESTORE 检测到在数据库 "CISDB" 中的页(0:0)上存在错误。系统断定检查已失败
    【1.2】shell基础——stty erase解决按backspace出现^H的情况
    【1.1】shell基本实践——密码输入三次错误则结束
    (5.3.7)数据库迁移——sql server备份文件的加密解密
  • 原文地址:https://www.cnblogs.com/Arielzz/p/14258362.html
Copyright © 2011-2022 走看看