zoukankan      html  css  js  c++  java
  • SPOJ705 Distinct Substrings (后缀自动机&后缀数组)

    Given a string, we need to find the total number of its distinct substrings.

    Input

    T- number of test cases. T<=20;
    Each test case consists of one string, whose length is <= 1000

    Output

    For each test case output one number saying the number of distinct substrings.

    Example

    Sample Input:
    2
    CCCCC
    ABABA

    Sample Output:
    5
    9

    Explanation for the testcase with string ABABA: 
    len=1 : A,B
    len=2 : AB,BA
    len=3 : ABA,BAB
    len=4 : ABAB,BABA
    len=5 : ABABA
    Thus, total number of distinct substrings is 9.

    题意:

    求出大写的字符串里不同的子串。默写了一遍后缀自动机。今天主要是练习后缀数组。

     注意:

    • 注意是大写还是小写;
    • 注意init初始化的时候没有一次性memset,所以下面要把每个新出现的点memset。不要搞忘。

    后缀自动机:

    #include<cstdio>
    #include<cstdlib>
    #include<iostream>
    #include<cstring>
    #include<algorithm>
    using namespace std;
    const int maxn=10000;
    struct SAM
    {
        int ch[maxn][26],fa[maxn],maxlen[maxn],Last,sz;
        void init()
        {
            sz=Last=1;    fa[1]=maxlen[1]=0;
            memset(ch[1],0,sizeof(ch[1]));
        }
        void add(int x)
        {
            int np=++sz,p=Last;Last=np;
            memset(ch[np],0,sizeof(ch[np]));
            maxlen[np]=maxlen[p]+1;
            while(p&&!ch[p][x]) ch[p][x]=np,p=fa[p];
            if(!p) fa[np]=1;
            else {
                int q=ch[p][x];
                if(maxlen[p]+1==maxlen[q]) fa[np]=q;
                else {
                    int nq=++sz;
                    memcpy(ch[nq],ch[q],sizeof(ch[q]));
                    maxlen[nq]=maxlen[p]+1;
                    fa[nq]=fa[q];
                    fa[q]=fa[np]=nq;
                    while(p&&ch[p][x]==q) ch[p][x]=nq,p=fa[p];
                }
            }
        }
    };
    SAM Sam;
    int main()
    {
        char chr[maxn];
        int T,ans,i,L;
        scanf("%d",&T);
        while(T--){
            Sam.init();ans=0;
            scanf("%s",chr);
            L=strlen(chr);
            for(i=0;i<L;i++) Sam.add(chr[i]-'A');
            for(i=1;i<=Sam.sz;i++) ans+=Sam.maxlen[i]-Sam.maxlen[Sam.fa[i]];
            printf("%d
    ",ans);
        }
        return 0;
    } 
    View Code

    后缀数组:

    #include<cstdio>
    #include<cstdlib>
    #include<cstring>
    #include<iostream>
    #include<algorithm>
    using namespace std;
    const int maxn=10000;
    char ch[maxn];
    int L;
    struct SA
    {
        int cntA[maxn],cntB[maxn],A[maxn],B[maxn];
        int rank[maxn],sa[maxn],tsa[maxn],ht[maxn];void sort()
        {
             for (int i = 0; i < 256; i ++) cntA[i] = 0;
             for (int i = 1; i <= L; i ++) cntA[ch[i]] ++;
             for (int i = 1; i < 256; i ++) cntA[i] += cntA[i - 1];
             for (int i = L; i; i --) sa[cntA[ch[i]] --] = i;
             rank[sa[1]] = 1;
             for (int i = 2; i <= L; i ++){
                  rank[sa[i]] = rank[sa[i - 1]];
                  if (ch[sa[i]] != ch[sa[i - 1]]) rank[sa[i]] ++;
             }
             for (int l = 1; rank[sa[L]] < L; l <<= 1){
                  for (int i = 0; i <= L; i ++) cntA[i] = 0;
                  for (int i = 0; i <= L; i ++) cntB[i] = 0;
                  for ( int i = 1; i <= L; i ++){
                      cntA[A[i] = rank[i]] ++; 
                      cntB[B[i] = (i + l <= L) ? rank[i + l] : 0] ++;
                  }
                  for (int i = 1; i <= L; i ++) cntB[i] += cntB[i - 1];
                  for (int i = L; i; i --) tsa[cntB[B[i]] --] = i;
                  for (int i = 1; i <= L; i ++) cntA[i] += cntA[i - 1];
                  for (int i = L; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
                  rank[sa[1]] = 1;
                  for (int i = 2; i <= L; i ++){
                       rank[sa[i]] = rank[sa[i - 1]];
                       if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) rank[sa[i]] ++;
                  }
             }
        }
        void getheight()
        {
             for (int i = 1, j = 0; i <= L; i ++){
                  if (j) j --;
                  while (ch[i + j] == ch[sa[rank[i] - 1] + j]) j ++;
                  ht[rank[i]] = j;
            }
        }
    };
    SA Sa;
    int main()
    {
        int T,ans,i;
        scanf("%d",&T);
        while(T--){
            ans=0;
            scanf("%s",ch+1);
            L=strlen(ch+1);
            Sa.sort();
            Sa.getheight();
            for(i=1;i<=L;i++) ans+=L-Sa.sa[i]+1-Sa.ht[i];
            printf("%d
    ",ans);
        }
        return 0;
    } 
  • 相关阅读:
    函数要多小才够好——谈小函数之道
    vb.net 打字练习
    vb.net 打字练习
    vb.net 打字练习
    unsigned int 与 unsigned long 一样吗?
    epoll使用详解(精髓)
    论epoll的使用 高调coding,低调做人 C++博客
    学习使用epoll The time is passing ITeye技术网站
    ubuntu下sed命令详解 Dicky 开源中国社区
    分享:jquery遍历之children()与find()的区别
  • 原文地址:https://www.cnblogs.com/hua-dong/p/8016222.html
Copyright © 2011-2022 走看看