zoukankan      html  css  js  c++  java
  • 后缀数组

    两篇论文:许智磊后缀数组.pdf   后缀数组——处理字符串的有力工具.pdf

    贴两模版:

    DA:

    /*
    	*后缀数组,倍增算法实现,复杂度O(nlogn)
    	*sa[i]: 第i小的后缀是在字符串位置,即后缀sa[i]
    	*rank[i]: 后追i在sa数组下标,即第rank[i]小
    	*height[i]: LCP (suffix (sa[i-1], sa[i]))
    */
    int sa[N], rank[N], height[N];
    int ws[N], wa[N], wb[N];
    
    bool cmp(int *r, int a, int b, int l) {
        return (r[a] == r[b] && r[a+l] == r[b+l]);
    }
    //r数组为读入的字符串,m = max (r[i]) + 1,一般字符128足够了
    //n为strlen (s) + 1,加上最后一个''
    void DA(char *r, int n, int m = 128) {
        int i, j, p, *x = wa, *y = wb;
        for (i=0; i<m; ++i) ws[i] = 0;
        for (i=0; i<n; ++i) ws[x[i]=r[i]]++;
        for (i=1; i<m; ++i) ws[i] += ws[i-1];
        for (i=n-1; i>=0; --i) sa[--ws[x[i]]] = i;
        for (j=1, p=1; p<n; j<<=1, m=p) {
            for (p=0, i=n-j; i<n; ++i) y[p++] = i;
            for (i=0; i<n; ++i) if (sa[i] >= j) y[p++] = sa[i] - j;
            for (i=0; i<m; ++i) ws[i] = 0;
            for (i=0; i<n; ++i) ws[x[y[i]]]++;
            for (i=1; i<m; ++i) ws[i] += ws[i-1];
            for (i=n-1; i>=0; --i) sa[--ws[x[y[i]]]] = y[i];
            std::swap (x, y);
            for (p = 1, x[sa[0]] = 0, i=1; i<n; ++i) {
                x[sa[i]] = cmp (y, sa[i-1], sa[i], j) ? p - 1 : p++;
            }
        }
    }
    void calc_height(char *r, int *sa, int n) {
        int i, j, k = 0;
        for (i=1; i<=n; ++i) rank[sa[i]] = i; //i: 第i小的后缀 sa[0] = n(s[n]='')
        for (i=0; i<n; ++i) { //i: 后缀i
            if (k) k--;
            j = sa[rank[i]-1];
            while (r[i+k] == r[j+k]) k++;
            height[rank[i]] = k;  //其实并没有计算height[n]
        }
    }
    /*
    	*LCP (suffix (i), suffix (j)) = min (height[l] to height[r]); //RMQ
    	*l = rank[i], r = rank[j]; if (l > r) swap (l, r); l++;
    */

    DC3:

    /*
        *后缀数组,DC3算法实现,复杂度O(n)
    */
    int wa[N],wb[N],wv[N],ws[N];
    int rank[N],height[N];   
    int sa[N],r[N];
    
    int c0(int *y,int a,int b) {
        return y[a]==y[b]&&y[a+1]==y[b+1]&&y[a+2]==y[b+2];
    }
    int c12(int k,int *y,int a,int b) {
        if(k==2) return y[a]<y[b]||y[a]==y[b]&&c12(1,y,a+1,b+1);
        else return y[a]<y[b]||y[a]==y[b]&&wv[a+1]<wv[b+1];
    }
    void sort(int *r,int *a,int *b,int n,int m) {
        int i;
        for(i=0;i<n;i++) wv[i]=r[a[i]];
        for(i=0;i<m;i++) ws[i]=0;
        for(i=0;i<n;i++) ws[wv[i]]++;
        for(i=1;i<m;i++) ws[i]+=ws[i-1];
        for(i=n-1;i>=0;i--) b[--ws[wv[i]]]=a[i];
    }
    void DC3(int *r,int *sa,int n,int m) {
        int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p;
        r[n]=r[n+1]=0;
        for(i=0;i<n;i++) if(i%3!=0) wa[tbc++]=i;
        sort(r+2,wa,wb,tbc,m);
        sort(r+1,wb,wa,tbc,m);
        sort(r,wa,wb,tbc,m);
        for(p=1,rn[F(wb[0])]=0,i=1;i<tbc;i++)
            rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++;
        if(p<tbc) dc3(rn,san,tbc,p);
        else for(i=0;i<tbc;i++) san[rn[i]]=i;
        for(i=0;i<tbc;i++) if(san[i]<tb) wb[ta++]=san[i]*3;
        if(n%3==1) wb[ta++]=n-1;
        sort(r,wb,wa,ta,m);
        for(i=0;i<tbc;i++) wv[wb[i]=G(san[i])]=i;
        for(i=0,j=0,p=0;i<ta && j<tbc;p++)
            sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++];
        for(;i<ta;p++) sa[p]=wa[i++];
        for(;j<tbc;p++) sa[p]=wb[j++];
    }
    void calc_height(int *r,int *sa,int n) {
        int i,j,k=0;
        for(i=1;i<=n;i++) rank[sa[i]]=i;
        for(i=0;i<n;height[rank[i++]]=k)
            for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
    }
    

      

  • 相关阅读:
    脚本——基础命令
    ELK 安装
    keepalived+lvs
    zabbix安装
    lnmp
    nagios安装
    nginx-tomcat动静分离
    课时五、boost与adaboost
    课时四、决策树和随机森林
    课时三、回归实践
  • 原文地址:https://www.cnblogs.com/Running-Time/p/5450483.html
Copyright © 2011-2022 走看看