zoukankan      html  css  js  c++  java
  • 字符串hash

    似乎没写过多少字符串hash
    今天补一补

    字符串hash重要思想就是把字符串看做一个N进制大整数,进行取模后直接比较
    这样子做的优劣很直观:很快很简单,也有取模后蜜汁碰撞的风险

    对于i位置的hash值,可以这样求:

        for (int i = 1; i <= n; i++) H[i] = H[i - 1] * p + s[i];
    

    我们要取出子串[l,r]的hash值时,显然就是(H[r] - H[l - 1] * p^{r - l + 1})

    来道【正解SAM】的例题:
    最长公共子串
    当然对串a建SAM,用串b在上边匹配就可以了

    SAM太深奥了,我们来看看简单暴力的字符串hash
    我们二分长度len,对A串的所有位置的长度为len的hash排序,那B串所有位置长度为len的hash去查找
    复杂度O(nlog^2n)【似乎SAM接近O(n)?

    #include<iostream>
    #include<cstdio>
    #include<cmath>
    #include<cstring>
    #include<algorithm>
    #define LL long long int
    #define Redge(u) for (int k = h[u]; k; k = ed[k].nxt)
    #define REP(i,n) for (int i = 1; i <= (n); i++)
    #define ULL unsigned long long int
    using namespace std;
    const int maxn = 200005,maxm = 100005,INF = 1000000000;
    inline int read(){
    	int out = 0,flag = 1; char c = getchar();
    	while (c < 48 || c > 57){if (c == '-') flag = -1; c = getchar();}
    	while (c >= 48 && c <= 57){out = (out << 3) + (out << 1) + c - 48; c = getchar();}
    	return out * flag;
    }
    char A[maxn],B[maxn];
    int lena,lenb,n;
    ULL Ha[maxn],Hb[maxn];
    ULL b[maxn];
    bool check(int len){
    	n = 0;
    	ULL P = 1;
    	for (int i = 1; i <= len; i++) P *= 27;
    	for (int i = len; i <= lena; i++) b[++n] = Ha[i] - Ha[i - len] * P;
    	sort(b + 1,b + 1 + n);
    	for (int i = len; i <= lenb; i++){
    		ULL temp = Hb[i] - Hb[i - len] * P;
    		if (b[lower_bound(b + 1,b + 1 + n,temp) - b] == temp) return true;
    	}
    	return false;
    }
    int main(){
    	scanf("%s",A + 1); lena = strlen(A + 1);
    	scanf("%s",B + 1); lenb = strlen(B + 1);
    	for (int i = 1; i <= lena; i++) Ha[i] = Ha[i - 1] * 27 + A[i];
    	for (int i = 1; i <= lenb; i++) Hb[i] = Hb[i - 1] * 27 + B[i];
    	int l = 0,r = min(lena,lenb),mid;
    	while (l < r){
    		mid = l + r + 1 >> 1;
    		if (check(mid)) l = mid;
    		else r = mid - 1;
    	}
    	printf("%d
    ",l);
    	return 0;
    }
    
    

    BZOJ3207
    此题K很小,我们用上hash之后,每个位置就对应一个hash值,问题就转化为了一个区间内是否存在某个值,用可持续化线段树就可以了

    #include<iostream>
    #include<cmath>
    #include<cstdio>
    #include<cstring>
    #include<algorithm>
    #define LL long long int
    #define REP(i,n) for (int i = 1; i <= (n); i++)
    #define Redge(u) for (int k = h[u],to; k; k = ed[k].nxt)
    #define BUG(s,n) for (int i = 1; i <= (n); i++) cout<<s[i]<<' '; puts("");
    #define inf 18446744073709551615UL
    #define uLL unsigned long long int
    using namespace std;
    const int maxn = 100010,maxm = 8000005;
    inline int read(){
    	int out = 0,flag = 1; char c = getchar();
    	while (c < 48 || c > 57) {if (c == '-') flag = -1; c = getchar();}
    	while (c >= 48 && c <= 57) {out = (out << 3) + (out << 1) + c - '0'; c = getchar();}
    	return out * flag;
    }
    int ls[maxm],rs[maxm],sum[maxm],rt[maxn];
    int n,m,K,cnt;
    int A[maxn],T[maxn];
    uLL H[maxn];
    void modify(int& u,int pre,uLL l,uLL r,uLL pos){
    	u = ++cnt; sum[u] = sum[pre] + 1; ls[u] = ls[pre]; rs[u] = rs[pre];
    	if (l == r) return;
    	uLL mid = l / 2 + r / 2;
    	if (mid >= pos) modify(ls[u],ls[pre],l,mid,pos);
    	else modify(rs[u],rs[pre],mid + 1,r,pos);
    }
    int query(int u,int v,uLL l,uLL r,uLL pos){
    	if (l == r) return sum[u] - sum[v];
    	uLL mid = l / 2 + r / 2;
    	if (mid >= pos) return query(ls[u],ls[v],l,mid,pos);
    	else return query(rs[u],rs[v],mid + 1,r,pos);
    }
    int main(){
    	n = read(); m = read(); K = read();
    	REP(i,n) A[i] = read();
    	REP(i,n) H[i] = H[i - 1] * 107 + A[i];
    	uLL P = 1; REP(i,K) P *= 107;
    	for (int i = K; i <= n; i++)
    		modify(rt[i],rt[i - 1],0,inf,H[i] - H[i - K] * P);
    	while (m--){
    		int l = read() + K - 1,r = read();
    		uLL val = 0;
    		for (int i = 1; i <= K; i++) val = val * 107 + read();
    		if (query(rt[r],rt[l - 1],0,inf,val)) puts("No");
    		else puts("Yes");
    	}
    	return 0;
    }
    
    
  • 相关阅读:
    点击文本选中checkbox
    建立FTP服务器(FTP服务器名要与创建的用户名一致)
    asp概述
    windows server安装oracle
    1.Oracle数据库查看用户锁表和对表解锁的sql语句
    System.IO.File类和System.IO.FileInfo类
    13.JavaScript 类
    12.HTML DOM 允许 JavaScript 改变 HTML 元素的内容。
    [Python源码剖析]字符缓冲池intern机制
    [Python源码剖析]获取Python小整数集合范围
  • 原文地址:https://www.cnblogs.com/Mychael/p/8504433.html
Copyright © 2011-2022 走看看