zoukankan      html  css  js  c++  java
  • [TJOI 2017] DNA

    \(\frak{Description}\)

    \(\text{Link.}\)

    \(\frak{Solution}\)

    \(\text{Sol 1.}\) 后缀数组

    \(S_0\) 的长度为 \(l_1\)\(S\)\(l_2\).

    考虑到只允许失配 \(3\) 个碱基,我们考虑直接枚举 + 匹配。假设枚举的子串的开头为 \(i\),匹配了 \(j\) 位,我们现在要匹配 \(S_0\)\(i+j\) 位与 \(S\)\(j+1\) 位。

    我们利用后缀数组跳跃。若两者的 \(\rm lcp\)\(k\),那么第 \(i+j+k\)\(j+1+k\) 一定是不匹配的。越过不匹配的点,将可以更改的次数减去一。复杂度是 \(\mathcal O(n\log n)\) 的。

    \(\text{Sol 2.}\) \(\mathtt{FFT}\)

    只有四种碱基,可以考虑枚举每一种碱基,假设枚举碱基 \(\rm C\),就令:

    \[f(i)=[S_0(i)=\text{C}]\\ g(i)=[S(i)=\text{C}] \]

    那么对于从 \(p\) 开始匹配的字符串,碱基 \(\rm C\) 匹配的个数就是:

    \[\sum_{i=0}^{m-1} f(p+i)\cdot g(i) \]

    不妨令 \(h(i)=g(m-i-1)\),就有:

    \[\text{Ans}_p=\sum_{i+j=p+m-1} f(i)\cdot h(j) \]

    \(\frak{Code}\)

    \(\text{Sol 1.}\) 后缀数组

    #include <cstdio>
    #include <cstdlib>
    #include <cstring>
    #include <iostream>
    using namespace std;
    
    const int N = 2e5 + 3;
    
    int lg[N], T, rmq[N][20], n, m, h[N], tax[N], SA[N], tp[N], rk[N], a[N];
    
    int read() {
    	int x = 0, f = 1; char S;
    	while((S = getchar()) > '9' || S < '0') {
    		if(S == '-') f = -1;
    		if(S == EOF) exit(0);
    	}
    	while(S <= '9' && S >= '0') {
    		x = (x << 1) + (x << 3) + (S ^ 48);
    		S = getchar();
    	}
    	return x * f;
    }
    
    bool cmp(const int x, const int y, const int d) {return tp[x] == tp[y] && tp[x + d] == tp[y + d];}
    
    void Sort() {
        for(int i = 0; i <= m; ++ i) tax[i] = 0;
        for(int i = 1; i <= n; ++ i) ++ tax[rk[tp[i]]];
        for(int i = 1; i <= m; ++ i) tax[i] += tax[i - 1];
        for(int i = n; i >= 1; -- i) SA[tax[rk[tp[i]]] --] = tp[i];
    }
    
    void Swap(int &x, int &y) {
        x ^= y ^= x ^= y;
    }
    
    int Min(const int x, const int y) {
        if(x < y) return x;
        return y;
    }
    
    void Suffix() {
        for(int i = 1; i <= n; ++ i) rk[i] = a[i], tp[i] = i;
        m = 100; Sort();
        for(int w = 1, p = 1, i; p < n; m = p, w <<= 1) {
            for(p = 0, i = n - w + 1; i <= n; ++ i) tp[++ p] = i;
            for(i = 1; i <= n; ++ i) if(SA[i] > w) tp[++ p] = SA[i] - w;
            Sort(); swap(rk, tp); rk[SA[1]] = p = 1;
            for(i = 2; i <= n; ++ i) rk[SA[i]] = cmp(SA[i], SA[i - 1], w) ? p : ++ p;
        }
        int j, k = 0;
        for(int i = 1; i <= n; h[rk[i ++]] = k)
            for(k = k ? k - 1 : k, j = SA[rk[i] - 1]; a[i + k] == a[j + k]; ++ k);
        for(int i = 1; i <= n; ++ i) rmq[i][0] = h[i];
        for(int j = 1; (1 << j) <= n; ++ j)
            for(int i = 1; i + (1 << j) - 1 <= n; ++ i)
                rmq[i][j] = Min(rmq[i][j - 1], rmq[i + (1 << j - 1)][j - 1]);
    }
    
    int lcp(const int x, const int y) {
        int l = rk[x], r = rk[y];
        if(l > r) Swap(l, r);
        int t = lg[r - l];
        return Min(rmq[l + 1][t], rmq[r - (1 << t) + 1][t]);
    }
    
    int main() {
        char ch[N];
        int l1, l2, ans;
        for(int i = 2; i <= N - 1; ++ i) lg[i] = lg[i >> 1] + 1;
        T = read();
        while(T --) {
            ans = 0;
            scanf("%s", ch); l1 = strlen(ch);
            for(int i = 1; i <= l1; ++ i) a[i] = ch[i - 1];
            scanf("%s", ch); l2 = strlen(ch);
            a[l1 + 1] = '$';
            for(int i = l1 + 2; i <= l1 + l2 + 1; ++ i) a[i] = ch[i - l1 - 2];
            n = l1 + l2 + 1;
            Suffix();
            for(int i = 1; i <= l1 - l2 + 1; ++ i) {
                int j = 0;
                for(int k = 0; j <= l2 && k <= 3; ++ j, ++ k) j += lcp(i + j, l1 + j + 2);
                if(j > l2) ++ ans;
            }
            printf("%d\n", ans);
        }
        return 0;
    }
    
    

    \(\text{Sol 2.}\) \(\mathtt{FFT}\)

    #include <cstdio>
    #define print(x,y) write(x),putchar(y)
    
    template <class T>
    inline T read(const T sample) {
        T x=0; char s; bool f=0;
        while((s=getchar())>'9' || s<'0')
            f |= (s=='-');
        while(s>='0' && s<='9')
            x = (x<<1)+(x<<3)+(s^48),
            s = getchar();
        return f?-x:x;
    }
    
    template <class T>
    inline void write(T x) {
        static int writ[50],w_tp=0;
        if(x<0) putchar('-'),x=-x;
        do writ[++w_tp]=x-x/10*10,x/=10; while(x);
        while(putchar(writ[w_tp--]^48),w_tp);
    }
    
    #include <cmath>
    #include <cstring>
    #include <iostream>
    using namespace std;
    
    const int maxn = 4e5+5;
    const double PI = acos(-1.0);
    
    int n,m,lim,bit,mp[200];
    int ans[maxn],rev[maxn];
    char s[maxn],t[maxn];
    struct cp {
        double x,y;
        cp() {}
        cp(const double X,const double Y):x(X),y(Y) {}
    
        cp operator + (const cp& t) const {
            return cp(x+t.x,y+t.y);
        }
        cp operator - (const cp& t) const {
            return cp(x-t.x,y-t.y);
        }
        cp operator * (const cp& t) const {
            return cp(x*t.x-y*t.y,x*t.y+y*t.x);
        }
    
    } f[maxn],g[maxn],w,wn,tmp;
    
    void preWork() {
        lim=1, bit=0; 
        while(lim<n+m-1) lim<<=1, ++bit;
        for(int i=0;i<lim;++i)
            rev[i] = (rev[i>>1]>>1)|((i&1)<<bit-1);
    }
    
    void FFT(cp *f,int opt=1) {
        for(int i=0;i<lim;++i)
            if(i<rev[i]) swap(f[i],f[rev[i]]);
        for(int mid=1;mid<lim;mid<<=1) {
            wn = cp(cos(PI/mid),sin(PI/mid)*opt);
            for(int i=0;i<lim;i+=(mid<<1)) {
                w = cp(1,0);
                for(int j=0;j<mid;++j,w=w*wn) {
                    tmp = f[i|j|mid]*w;
                    f[i|j|mid] = f[i|j]-tmp;
                    f[i|j] = f[i|j]+tmp;
                }
            }
        }
    }
    
    int main() {
        mp['A']=0, mp['G']=1;
        mp['C']=2, mp['T']=3;
        for(int T=read(9); T; --T) {
            scanf("%s %s",s,t);
            n=strlen(s), m=strlen(t);
            preWork();
            memset(ans,0,sizeof(int)*n);
            for(int c=0;c<4;++c) {
                for(int i=0;i<n;++i)
                    f[i].x = (mp[s[i]]==c),
                    f[i].y = 0;
                for(int i=n;i<lim;++i) 
                    f[i].x=f[i].y=0;
                for(int i=0;i<m;++i)
                    g[i].x = (mp[t[m-i-1]]==c),
                    g[i].y = 0;
                for(int i=m;i<lim;++i) 
                    g[i].x=g[i].y=0;
                FFT(f), FFT(g);
                for(int i=0;i<lim;++i)
                    f[i] = f[i]*g[i];
                FFT(f,-1);
                for(int i=m-1;i<n;++i)
                    ans[i] += int(f[i].x/lim+0.5);
            }
            int ret=0;
            for(int i=m-1;i<n;++i)
                ret += (ans[i]>=m-3);
            print(ret,'\n');
        }
        return 0;
    }
    
  • 相关阅读:
    Asp.net文章内容分页
    JQuery文字不间断滚动
    .Net Core利用反射动态加载DLL类库的方法(解决类库不包含Nuget依赖包的问题)
    【Bug】远程登录导致WPF应用程序中的UserControl控件Loaded事件重复触发
    【原创】WPF TreeView带连接线样式的优化(WinFrom风格)
    DataGrid 字体垂直居中
    Elasticsearch.Net
    利用数学归纳法指导编写递归程序
    多种图像格式相互转换工具的开发(附源代码)
    油气大数据分析 第一章 软计算基础(第四、五、六节)
  • 原文地址:https://www.cnblogs.com/AWhiteWall/p/12390859.html
Copyright © 2011-2022 走看看