zoukankan      html  css  js  c++  java
  • 后缀数组---New Distinct Substrings

    Description

    Given a string, we need to find the total number of its distinct substrings.

    Input

    T- number of test cases. T<=20; Each test case consists of one string, whose length is <= 50000

    Output

    For each test case output one number saying the number of distinct substrings.

    Example

    Input:
    2
    CCCCC
    ABABA
    
    Output:
    5
    9

    题意:给一个字符串长小于50000,求这个字符串的不同子串的个数;

    思路:使用后缀数组算法先求出sa[],sa[i]表示排第i的后缀的开始位置下标,然后求出height[]数组,height[i]表示排名第i的后缀和排名第i-1的后缀的最大公共前缀的长度,容易知道排名i和i-1的串的子串重复个数为height[i]个,而原始串的子串个数为sum=n*(n+1)/2,故用sum减去所有的henght[]即为结果;
    #include <iostream>
    #include <algorithm>
    #include <cstdio>
    #include <cstring>
    #define rep(i,n) for(int i = 0;i < n; i++)
    using namespace std;
    const int size=50005,INF=1<<30;
    int rk[size],sa[size],height[size],w[size],wa[size],res[size];
    void getSa (int len,int up) {
        int *k = rk,*id = height,*r = res, *cnt = wa;
        rep(i,up) cnt[i] = 0;
        rep(i,len) cnt[k[i] = w[i]]++;
        rep(i,up) cnt[i+1] += cnt[i];
        for(int i = len - 1; i >= 0; i--) {
            sa[--cnt[k[i]]] = i;
        }
        int d = 1,p = 0;
        while(p < len){
            for(int i = len - d; i < len; i++) id[p++] = i;
            rep(i,len)  if(sa[i] >= d) id[p++] = sa[i] - d;
            rep(i,len) r[i] = k[id[i]];
            rep(i,up) cnt[i] = 0;
            rep(i,len) cnt[r[i]]++;
            rep(i,up) cnt[i+1] += cnt[i];
            for(int i = len - 1; i >= 0; i--) {
                sa[--cnt[r[i]]] = id[i];
            }
            swap(k,r);
            p = 0;
            k[sa[0]] = p++;
            rep(i,len-1) {
                if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d])
                    k[sa[i+1]] = p - 1;
                else k[sa[i+1]] = p++;
            }
            if(p >= len) return ;
            d *= 2,up = p, p = 0;
        }
    }
    
    void getHeight(int len) {
        rep(i,len) rk[sa[i]] = i;
        height[0] =  0;
        for(int i = 0,p = 0; i < len - 1; i++) {
            int j = sa[rk[i]-1];
            while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) {
                p++;
            }
            height[rk[i]] = p;
            p = max(0,p - 1);
        }
    }
    
    int getSuffix(char s[]) {
        int len = strlen(s),up = 0;
        for(int i = 0; i < len; i++) {
            w[i] = s[i];
            up = max(up,w[i]);
        }
        w[len++] = 0;
        getSa(len,up+1);
        getHeight(len);
        return len;
    }
    
    int main()
    {
        int T;
        long long sum;
        char s[size];
        scanf("%d",&T);
        while(T--)
        {
            scanf("%s",s);
            sum=0;
            getSuffix(s);
            long long len=strlen(s);
            for(int i=0;i<=len;i++)
              sum+=height[i];
              sum=len*(len+1)/2-sum;
            printf("%lld
    ",sum);
        }
    }
  • 相关阅读:
    解决方案 git@github.com出现Permission denied (publickey)
    github设置添加SSH
    base64是啥原理
    PHP面试题:HTTP中POST、GET、PUT、DELETE方式的区别
    PHP中put和post区别
    常用的微信编辑器
    局域网内一台电脑的ip地址自己会变,怎样让它不变
    Trendalyzer is an information visualization software
    FineReport报表和水晶报表的比较
    x
  • 原文地址:https://www.cnblogs.com/chen9510/p/5471342.html
Copyright © 2011-2022 走看看