\(\frak{Description}\)
\(\frak{Solution}\)
\(\text{Sol 1.}\) 后缀数组
设 \(S_0\) 的长度为 \(l_1\),\(S\) 为 \(l_2\).
考虑到只允许失配 \(3\) 个碱基,我们考虑直接枚举 + 匹配。假设枚举的子串的开头为 \(i\),匹配了 \(j\) 位,我们现在要匹配 \(S_0\) 的 \(i+j\) 位与 \(S\) 的 \(j+1\) 位。
我们利用后缀数组跳跃。若两者的 \(\rm lcp\) 为 \(k\),那么第 \(i+j+k\) 与 \(j+1+k\) 一定是不匹配的。越过不匹配的点,将可以更改的次数减去一。复杂度是 \(\mathcal O(n\log n)\) 的。
\(\text{Sol 2.}\) \(\mathtt{FFT}\)
只有四种碱基,可以考虑枚举每一种碱基,假设枚举碱基 \(\rm C\),就令:
\[f(i)=[S_0(i)=\text{C}]\\
g(i)=[S(i)=\text{C}]
\]
那么对于从 \(p\) 开始匹配的字符串,碱基 \(\rm C\) 匹配的个数就是:
\[\sum_{i=0}^{m-1} f(p+i)\cdot g(i)
\]
不妨令 \(h(i)=g(m-i-1)\),就有:
\[\text{Ans}_p=\sum_{i+j=p+m-1} f(i)\cdot h(j)
\]
\(\frak{Code}\)
\(\text{Sol 1.}\) 后缀数组
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
using namespace std;
const int N = 2e5 + 3;
int lg[N], T, rmq[N][20], n, m, h[N], tax[N], SA[N], tp[N], rk[N], a[N];
int read() {
int x = 0, f = 1; char S;
while((S = getchar()) > '9' || S < '0') {
if(S == '-') f = -1;
if(S == EOF) exit(0);
}
while(S <= '9' && S >= '0') {
x = (x << 1) + (x << 3) + (S ^ 48);
S = getchar();
}
return x * f;
}
bool cmp(const int x, const int y, const int d) {return tp[x] == tp[y] && tp[x + d] == tp[y + d];}
void Sort() {
for(int i = 0; i <= m; ++ i) tax[i] = 0;
for(int i = 1; i <= n; ++ i) ++ tax[rk[tp[i]]];
for(int i = 1; i <= m; ++ i) tax[i] += tax[i - 1];
for(int i = n; i >= 1; -- i) SA[tax[rk[tp[i]]] --] = tp[i];
}
void Swap(int &x, int &y) {
x ^= y ^= x ^= y;
}
int Min(const int x, const int y) {
if(x < y) return x;
return y;
}
void Suffix() {
for(int i = 1; i <= n; ++ i) rk[i] = a[i], tp[i] = i;
m = 100; Sort();
for(int w = 1, p = 1, i; p < n; m = p, w <<= 1) {
for(p = 0, i = n - w + 1; i <= n; ++ i) tp[++ p] = i;
for(i = 1; i <= n; ++ i) if(SA[i] > w) tp[++ p] = SA[i] - w;
Sort(); swap(rk, tp); rk[SA[1]] = p = 1;
for(i = 2; i <= n; ++ i) rk[SA[i]] = cmp(SA[i], SA[i - 1], w) ? p : ++ p;
}
int j, k = 0;
for(int i = 1; i <= n; h[rk[i ++]] = k)
for(k = k ? k - 1 : k, j = SA[rk[i] - 1]; a[i + k] == a[j + k]; ++ k);
for(int i = 1; i <= n; ++ i) rmq[i][0] = h[i];
for(int j = 1; (1 << j) <= n; ++ j)
for(int i = 1; i + (1 << j) - 1 <= n; ++ i)
rmq[i][j] = Min(rmq[i][j - 1], rmq[i + (1 << j - 1)][j - 1]);
}
int lcp(const int x, const int y) {
int l = rk[x], r = rk[y];
if(l > r) Swap(l, r);
int t = lg[r - l];
return Min(rmq[l + 1][t], rmq[r - (1 << t) + 1][t]);
}
int main() {
char ch[N];
int l1, l2, ans;
for(int i = 2; i <= N - 1; ++ i) lg[i] = lg[i >> 1] + 1;
T = read();
while(T --) {
ans = 0;
scanf("%s", ch); l1 = strlen(ch);
for(int i = 1; i <= l1; ++ i) a[i] = ch[i - 1];
scanf("%s", ch); l2 = strlen(ch);
a[l1 + 1] = '$';
for(int i = l1 + 2; i <= l1 + l2 + 1; ++ i) a[i] = ch[i - l1 - 2];
n = l1 + l2 + 1;
Suffix();
for(int i = 1; i <= l1 - l2 + 1; ++ i) {
int j = 0;
for(int k = 0; j <= l2 && k <= 3; ++ j, ++ k) j += lcp(i + j, l1 + j + 2);
if(j > l2) ++ ans;
}
printf("%d\n", ans);
}
return 0;
}
\(\text{Sol 2.}\) \(\mathtt{FFT}\)
#include <cstdio>
#define print(x,y) write(x),putchar(y)
template <class T>
inline T read(const T sample) {
T x=0; char s; bool f=0;
while((s=getchar())>'9' || s<'0')
f |= (s=='-');
while(s>='0' && s<='9')
x = (x<<1)+(x<<3)+(s^48),
s = getchar();
return f?-x:x;
}
template <class T>
inline void write(T x) {
static int writ[50],w_tp=0;
if(x<0) putchar('-'),x=-x;
do writ[++w_tp]=x-x/10*10,x/=10; while(x);
while(putchar(writ[w_tp--]^48),w_tp);
}
#include <cmath>
#include <cstring>
#include <iostream>
using namespace std;
const int maxn = 4e5+5;
const double PI = acos(-1.0);
int n,m,lim,bit,mp[200];
int ans[maxn],rev[maxn];
char s[maxn],t[maxn];
struct cp {
double x,y;
cp() {}
cp(const double X,const double Y):x(X),y(Y) {}
cp operator + (const cp& t) const {
return cp(x+t.x,y+t.y);
}
cp operator - (const cp& t) const {
return cp(x-t.x,y-t.y);
}
cp operator * (const cp& t) const {
return cp(x*t.x-y*t.y,x*t.y+y*t.x);
}
} f[maxn],g[maxn],w,wn,tmp;
void preWork() {
lim=1, bit=0;
while(lim<n+m-1) lim<<=1, ++bit;
for(int i=0;i<lim;++i)
rev[i] = (rev[i>>1]>>1)|((i&1)<<bit-1);
}
void FFT(cp *f,int opt=1) {
for(int i=0;i<lim;++i)
if(i<rev[i]) swap(f[i],f[rev[i]]);
for(int mid=1;mid<lim;mid<<=1) {
wn = cp(cos(PI/mid),sin(PI/mid)*opt);
for(int i=0;i<lim;i+=(mid<<1)) {
w = cp(1,0);
for(int j=0;j<mid;++j,w=w*wn) {
tmp = f[i|j|mid]*w;
f[i|j|mid] = f[i|j]-tmp;
f[i|j] = f[i|j]+tmp;
}
}
}
}
int main() {
mp['A']=0, mp['G']=1;
mp['C']=2, mp['T']=3;
for(int T=read(9); T; --T) {
scanf("%s %s",s,t);
n=strlen(s), m=strlen(t);
preWork();
memset(ans,0,sizeof(int)*n);
for(int c=0;c<4;++c) {
for(int i=0;i<n;++i)
f[i].x = (mp[s[i]]==c),
f[i].y = 0;
for(int i=n;i<lim;++i)
f[i].x=f[i].y=0;
for(int i=0;i<m;++i)
g[i].x = (mp[t[m-i-1]]==c),
g[i].y = 0;
for(int i=m;i<lim;++i)
g[i].x=g[i].y=0;
FFT(f), FFT(g);
for(int i=0;i<lim;++i)
f[i] = f[i]*g[i];
FFT(f,-1);
for(int i=m-1;i<n;++i)
ans[i] += int(f[i].x/lim+0.5);
}
int ret=0;
for(int i=m-1;i<n;++i)
ret += (ans[i]>=m-3);
print(ret,'\n');
}
return 0;
}