zoukankan      html  css  js  c++  java
  • hdu 5769 Substring 后缀数组 + KMP

    http://acm.hdu.edu.cn/showproblem.php?pid=5769

    题意:在S串中找出X串出现的不同子串的数目? 其中1 <= |S| < $10^5$

    官方题解: 处理出后缀数组中的sa[]数组和height[]数组。在不考虑包含字符X的情况下,不同子串的个数为

    sum_{1}^{length}length−left(sa[i]+height[i]
ight)

    如果要求字符X,只需要记录距离sa[i]最近的字符X的位置(用nxt[sa[i]]表示)即可,个数

    sum_{1}^{length}length-maxleft(nxtleft[saleft[i
ight]
ight],saleft[i
ight]+heightleft[i
ight]
ight)

    理解:后缀数组height[i]就是sa[i]与sa[i-1]的LCP,在后缀数组中求解全部的不同子串(之前只写过SAM处理所有不同子串..)还是比较好理解的,在一定要含有子串x时,需要先kmp求出所有匹配的位置,在处理到第i个后缀时,取max即可表示一定含有X,并且是不同的子串;

    #pragma comment(linker, "/STACK:1024000000,1024000000")
    #include<bits/stdc++.h>
    using namespace std;
    #define rep0(i,l,r) for(int i = (l);i < (r);i++)
    #define rep1(i,l,r) for(int i = (l);i <= (r);i++)
    #define rep_0(i,r,l) for(int i = (r);i > (l);i--)
    #define rep_1(i,r,l) for(int i = (r);i >= (l);i--)
    #define MS0(a) memset(a,0,sizeof(a))
    #define MS1(a) memset(a,-1,sizeof(a))
    #define MSi(a) memset(a,0x3f,sizeof(a))
    #define inf 0x3f3f3f3f
    #define pb push_back
    #define MK make_pair
    #define A first
    #define B second
    #define eps 1e-8
    #define zero(x) (((x)>0?(x):-(x))<eps)
    #define bitnum(a) __builtin_popcount(a)
    #define lowbit(x) (x&(-x))
    #define clear0 (0xFFFFFFFE)
    #define mod 1000000007
    #define K(x) ((x)*(x))
    typedef pair<int,int> PII;
    typedef long long ll;
    typedef unsigned long long ull;
    template<typename T>
    void read1(T &m)
    {
        T x = 0,f = 1;char ch = getchar();
        while(ch <'0' || ch >'9'){ if(ch == '-') f = -1;ch=getchar(); }
        while(ch >= '0' && ch <= '9'){ x = x*10 + ch - '0';ch = getchar(); }
        m = x*f;
    }
    template<typename T>
    void read2(T &a,T &b){read1(a);read1(b);}
    template<typename T>
    void read3(T &a,T &b,T &c){read1(a);read1(b);read1(c);}
    template<typename T>
    void out(T a)
    {
        if(a>9) out(a/10);
        putchar(a%10+'0');
    }
    inline ll gcd(ll a,ll b){ return b == 0? a: gcd(b,a%b); }
    template<class T1, class T2> inline void gmax(T1& a, T2 b){ if(a < b) a = b;}
    template<class T1, class T2> inline void gmin(T1& a, T2 b){ if(a > b) a = b;}
    const int maxn = 100001;
    int sa[maxn], t[maxn],t2[maxn],c[maxn],w[maxn];
    int cmp(int *r,int a,int b,int l)
    {
        return r[a] == r[b] && r[a+l] == r[b+l];
    }
    void build_sa(char *r,int n,int m)
    {
        int i, j, p, *x = t, *y = t2;
        for(i = 0; i < m;i++) c[i] = 0;
        for(i = 0; i < n;i++) c[x[i] = r[i]]++;
        for(i = 1; i < m;i++) c[i] += c[i-1];
        for(i = n-1; i >= 0; i--) sa[--c[x[i]]] = i;
    
        for(j = 1, p = 1; p < n; j <<= 1, m = p){
            for(p = 0, i = n - j; i < n;i++) y[p++] = i;
            for(i = 0; i < n; i++) if(sa[i] >= j) y[p++] = sa[i]-j;
            for(i = 0; i < n; i++) w[i] = x[y[i]];
            for(i = 0; i < m; i++) c[i] = 0;
            for(i = 0; i < n; i++) c[w[i]]++;
            for(i = 0; i < m; i++) c[i] += c[i-1];
            for(i = n-1; i >= 0; i--) sa[--c[w[i]]] = y[i];
            swap(x,y);
            for(p = 1, x[sa[0]] = 0, i = 1; i < n; i++)
                x[sa[i]] = cmp(y, sa[i-1], sa[i], j)? p-1: p++;
            if(p >= n) break;
            m = p;
        }
    }
    char S[maxn], X[maxn];
    int  height[maxn], rk[maxn];
    void getHeight(int n)
    {
        for(int i = 1; i<= n; i++) rk[sa[i]] = i;
        for(int i = 0, j, k = 0; i < n; height[rk[i++]] = k)
            for(k? k--:0, j = sa[rk[i] - 1]; S[i+k] == S[j+k]; k++);
    }
    
    int f[maxn];
    void getfail(char *p)
    {
        f[0] = f[1] = 0;
        int n = strlen(p);
        for(int i = 1;i < n;i++){
            int j = f[i];
            if(j && p[i] != p[j]) j = f[j];
            f[i+1] = (p[i] == p[j] ?j+1:0);// i+1会递推到第n位
        }
    }
    vector<int> vec;
    void Find(char *T, char *p)
    {
        vec.clear();
        ll j = 0,n = strlen(T),m = strlen(p);
        for(int i = 0;i < n;i++){
            while(j && T[i] != p[j]) j = f[j];
            if(T[i] == p[j]) j++;
            if(j == m){
                vec.pb(i);
                j = 0;
                i -= m-1;
            }
        }
        sort(vec.begin(), vec.end());
    }
    
    int main()
    {
        //freopen("data.txt","r",stdin);
        //freopen("out.txt","w",stdout);
        int T, kase = 1;
        scanf("%d",&T);
        while(T--){
            scanf("%s%s", X, S);
            int len = strlen(S), m = strlen(X);
            S[len] = '#';S[len+1] = 0;
            build_sa(S,len+1,'z'+1);
            getHeight(len);
    
            getfail(X);
            Find(S,X);
            vec.pb(len);
            ll ans = 0;
            rep1(i,1,len){
                if(sa[i]+m-1 > len) continue;
                int nxt = lower_bound(vec.begin(), vec.end(), sa[i]+m-1) - vec.begin();
                ans += len - max(vec[nxt],sa[i] + height[i]);
            }
            printf("Case #%d: %lld
    ", kase++, ans);
        }
        return 0;
    }
  • 相关阅读:
    网络性能测试工具iperf详细使用图文教程zz
    linux时间和定时器zz
    sleep
    linux调度器的配置参数zz
    zz升级Mininet自带的OpenvSwitch & 编译OpenvSwitch
    minnet sample
    电信新势力,TIP/CORD能颠覆电信设备商吗?
    【TS】534- TypeScript 可辨识联合
    【拓展】一张图看懂字节跳动8年创业史,太励志了吧
    【CSS】533- CSS 渲染原理以及优化策略
  • 原文地址:https://www.cnblogs.com/hxer/p/5724311.html
Copyright © 2011-2022 走看看