概念
通过一个hash函数H,将一组数据(包括字符串,较大的数等)转化成能够用变量表示或直接可以作为下标的数,可以通过hash函数转化得到的数值成为hash值,hash可以实现快速查找和匹配,常用的有字符串hash 和 哈希表
字符串hash
题目
给定一个字符串 (A) 和一个字符串 (B),求在 (B) 中的出现次数。 (A)和(B)中的字符均为英语大写字母或小写字母。
(A) 中不同位置出现的 (B) 可重叠
我们选取两个合适的互质的数 (b) 和 (h) (b < h)假设字符串(C = c_1c_2c_3……c_m)
(H(C) = (c_1b^{m-1} + c_2b^{m-2} + c_mb^{0})~mod~h;)
(b) 代表的是基数,相当于把字符串看做 b 进制数——《一本通提高篇》(很诡异的东西)
设 (H(C,K)) 为前 (K) 个字符组成的字符串的哈希值
(H(C,K + 1) = H(C,K)*b + C_{k + 1})
举个栗子:
如果字符串(C = “ACDA”)(令A表示1,B表示2)则
(H(C,1) = 1;)
(H(C,2) = 1*b + 3;)
(H(C,3) = 1*b^2 + 3*b + 4;)
(H(C,4) = 1*b^3 + 3*b^2 + 4 * b + 1;)
判断主串的一个字符和另一个字符是够匹配
即判断字符串(C = c_1c_2……c_m)从位置(k+1)开始的长度为(n)的子串(C^, = c_{k + 1}c_{k + 2}……c_{k + n})的哈希值与另一个匹串(S = s_1s_2……s_n)是否相等
(H(C') = H(C, k + n) - H(C, K)*b^n)
因此可得出求字符串区间hash值(l为左边界,r为右边界)
(H(C') = H(C, r) - H(C, l)*b^(r - l + 1))
int get(int l,int r){
return hs[r] - hs[l - 1] * pre[r - l + 1];
}
字符串区间删去一个字符后的hash值类比可以得到
int del(int l,int r,int pos){
return get(l, pos - 1) * pre[r-pos] + get(pos + 1, r);
}
然后就是习题了= =
T1 子串查找
模板题
#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <algorithm>
using namespace std;
const int M = 1e6 + 10;
typedef unsigned long long ull;
int read(){
int x = 0,f = 1;char c = getchar();
while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
return x * f;
}
char sa[M],sb[M];
ull base = 155,sum[M],pow[M],falg;
int ans;
int main(){
pow[0] = 1;
scanf("%s%s",sa + 1,sb + 1);
int lena = strlen(sa + 1),lenb = strlen(sb + 1);
for(int i = 1;i < 1000000; i++)
pow[i] = pow[i - 1]*base;//处理进位
sum[0] = 0;
for(int i = 1;i <= lena; i++){
sum[i] = sum[i - 1] * base + (ull)(sa[i] - 'A' + 1);
}
for(int i = 1; i <= lenb; i++){
falg = falg * base + (ull)(sb[i] - 'A' + 1);
}
for(int i = 0;i <= lena - lenb; i++){
if(falg == sum[i + lenb] - sum[i] * pow[lenb]) ans++;
}
cout<<ans;
}
T2 图书管理
#include <iostream>
#include <cstdio>
#include <cstring>
#include <string>
#include <algorithm>
using namespace std;
typedef unsigned long long ull;
const int mod1 = 1e7 + 7;
const int mod2 = 1e7 + 9;
const int base = 1e9;
const int M = 1e7;
int n;
char flag[5], S[M];
bool A[M], B[M];
int main() {
cin>>n;
for (int i = 1; i <= n; i++) {
cin >> flag;
ull sum1 = 1, sum2 = 1;
gets(S);
for (int j = 0; j < strlen(S); j++)
sum1 = (sum1 * base % mod1 + S[j]) % mod1, sum2 = (sum2 * base % mod2 + S[j]) % mod2;
if (flag[0] == 'a')
A[sum1] = 1, B[sum2] = 1;
if (flag[0] == 'f'){
if (A[sum1] && B[sum2])
puts("yes");
else
puts("no");
}
}
return 0;
}
T3 Power Strings
#include <iostream>
#include <cstdio>
#include <queue>
#include <cstring>
#include <vector>
#include <cmath>
#include <algorithm>
using namespace std;
const int A = 1e3 + 2;
const int B = 1e4 + 2;
const int C = 1e5 + 2;
const int D = 1e6 + 2;
const int inf = 0x3f3f3f3f;
typedef unsigned long long ull;
int read(){
int x = 0,f = 1;char c = getchar();
while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
return x*f;
}
char c[D];
ull hs[D],power[D],base = 37;
int main(){
power[0] = 1;
for(int i = 1;i <= D; i++)
power[i] = power[i - 1]*base;
while (scanf("%s", c)){
if (!strcmp(c, "."))break;
ull ans = 0;int len = strlen(c);
hs[len] = 0;
for (int i = len - 1; i >= 0; i--){
hs[i] = hs[i + 1] * base + c[i] - 'a' + 1;
}
for (int k = 1; k <= len; k++)
{
if (len % k != 0)
continue;
ull tomp = hs[0] - hs[k] * power[k];
int j = 0;
for(j = k;j < len; j = j + k){
if(tomp != hs[j] - hs[k + j]*power[k]) break;
else tomp = hs[j] - hs[k + j]*power[k];
}
if(j == len){
ans = len / k;break;
}
}
cout<<ans<<"
";
}
return 0;
}
T4 Seek the Name, Seek the Fame
#include <iostream>
#include <cstring>
#include <cstdio>
#include <queue>
#include <vector>
#include <cmath>
#include <algorithm>
using namespace std;
const int A = 1e3 + 2;
const int B = 1e4 + 2;
const int C = 1e5 + 2;
const int D = 1e6 + 2;
const int inf = 0x3f3f3f3f;
const int mod = 1e9 + 7;
typedef unsigned long long ull;
int read(){
int x = 0,f = 1;char c = getchar();
while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
return x*f;
}
char s[D];
ull base = 34,power[D],hs[D];
int main(){
power[0] = 1;
for(int i = 1;i <= D; i++) power[i] = power[i - 1] * base;
while(scanf ("%s", s + 1) != EOF){
int len = strlen(s + 1);
hs[0] = 0;
for(int i = 1; i <= len; i++){
hs[i] = hs[i - 1] * base + s[i] - 'a' + 1;
}
for(int i = 1;i <= len; i++){
if(hs[i] == hs[len] - hs[len - i] * power[i]){
printf("%d ",i);
}
}
printf("
");
}
}
T5 「BalticOI 2014 Day 1」三个朋友
求区间hash和删去字符后的hash
#include <iostream>
#include <cstdio>
#include <queue>
#include <cstring>
#include <string>
#include <cmath>
#include <map>
#define int unsigned long long
using namespace std;
const int A = 1e3 + 2;
const int B = 1e4 + 2;
const int C = 1e5 + 2;
const int D = 2e6 + 5;
const int inf = 0x3f3f3f3f;
const int mod = 99984198447;
int read(){
int x = 0,f = 1;char c = getchar();
while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
return x*f;
}
char s[D];
int pre[D],base = 999983,hs[D],ans,ll,rr,flag,n,mid,mark;;
map<unsigned long long, int> vis;
int get(int l,int r){
return hs[r] - hs[l - 1] * pre[r - l + 1];
}
int del(int l,int r,int pos){
return get(l, pos - 1) * pre[r-pos] + get(pos + 1, r);
}
bool check(int pos){
if(pos == mid){
ll = get(1, pos - 1);
rr = get(pos + 1, n);
return ll == rr;
}
else if(pos < mid){
ll = del(1, mid, pos);
rr = get(mid + 1, n);
return ll == rr;
}
else{
ll = get(1, mid - 1);
rr = del(mid, n, pos);
return ll == rr;
}
}
void itit(){
pre[0]=1;
for(int i = 1;i <= n; i++){
pre[i] = pre[i - 1] * base;hs[i] = hs[i - 1] * base + s[i];
}
}
signed main(){
cin >> n >> (s + 1);
mid = (n + 1) >> 1; //取字符串的中点下标
itit();
for(int i = 1;i <= n;i++){
if(check(i) == 1){ //删掉下标i的元素之后,能够得到俩个一样的子串
mark = i;
if(mark <= mid)
flag = rr;
else{
flag = ll;
}
if(vis[flag] > 0) continue;
vis[flag] = 1;
ans++;
if(ans > 1){
cout<<"NOT UNIQUE"<<endl;return 0;
}
}
}
if(!ans){
cout<<"NOT POSSIBLE"<<endl;
}
else{
if(mark <= mid){
for(int i = mid + 1;i <= n; i++)cout<<s[i];
printf("
");
}
else{
for(int i = 1;i <= mid - 1; i++)cout<<s[i];
printf("
");
}
}
return 0;
}
T6 A Horrible Poem
被困一上午,只因进制没取质数
暴力枚举循环节,如果循环节长度不能被区间整除,直接跳过,不过显然T了
正解是数论??
假设最短循环节长度为len
则原串长度显然为len*k。若只考虑k,
并且将k的质因数依次分解,每次试除k,
则得到的k。和len的乘积仍是循环节,
利用这个性质。依次用质因数 i 试除n,
若除去后仍是循环节,说明i属于k,将其除去,结果就留下了len
#include <iostream>
#include <cstdio>
#include <queue>
#include <cstring>
#include <string>
#include <cmath>
#include <map>
#define int unsigned long long
using namespace std;
const int A = 1e3 + 2;
const int B = 1e4 + 2;
const int C = 5e5 + 2;
const int D = 5e5 + 10;
const int inf = 0x3f3f3f3f;
const int mod = 99984198447;
int read(){
int x = 0,f = 1;char c = getchar();
while(c < '0'||c > '9'){if(c == '-')f = -1;c = getchar();}
while(c >= '0'&&c <= '9'){x = x*10 + c - '0';c = getchar();}
return x*f;
}
int q, n, pre[D], base = 63, hs[D],prim[D],ans,len;
bool vis[D],flag;
char s[D];
void calc(){
for (int i = 2; i <= n; ++i) {
if (vis[i]) continue;
for (int j = 1; i * j <= n; ++j) {
int t = i * j;
if (vis[t]) continue;
vis[t] = 1;
prim[t] = i;
}
}
}
signed main(){
n = read();calc();
scanf("%s", s + 1);
pre[0] = 1;
for(int i = 1;i <= n; i++){
pre[i] = pre[i - 1] * base;hs[i] = hs[i - 1] * base + s[i];
}
int q = read();
while(q--){
int l = read(), r = read();
len = ans = r - l + 1;
while(len > 1){
int k = ans / prim[len];
len /= prim[len];
if(hs[r - k] - hs[l - 1] * pre[r - k - l + 1] == hs[r] - hs[l - 1 + k] * pre[r - k - l + 1])
ans = k;
}
printf("%d
", ans);
}
}