zoukankan      html  css  js  c++  java
  • 后缀数组小结

    后缀数组又被称为字符串处理神器;

    http://blog.csdn.net/xymscau/article/details/8798046 这里讲的非常好

    实现rank排名是用到了倍增法和一个比较神奇的计数排序,时间复杂度是nlongn

    height[i]存放的是排名第i的后缀与排名第i-1的后缀的最长前缀,

    sa[i]存的是排名第i的后缀是第几位开头的

    rk[i]存放第i个位置开头的后缀的字典序排名

    Poj 2774,Poj1743,Poj3294,Poj3261,Poj2758

    1.poj2774(后缀数组水题)

    题意:给你两串字符,要你找出在这两串字符中都出现过的最长子串.........

    思路:先用个分隔符将两个字符串连接起来,再用后缀数组求出height数组的值,找出一个height值最大并且i与i-1的sa值分别在两串字符中就好.....

    正确性证明,另一个后缀是i,与它拥有最长公共前缀的的后缀j,我们知道i和j一定排名是相连的。

    那么我们将两个字符用空格连接起来之后,如果答案是在i和j这两个位置,如果这两个位置的rank是不相连的,那么设中间有一个值是k,那么显然i与k,或者j与k是一个更优的解。

    #include<iostream>
    #include<string.h>
    #include<stdio.h>
    using namespace std;
    
    #define rep(i,n) for(int i = 0;i < n; i++)
    using namespace std;
    const int size  = 200005,INF = 1<<30;
    int rk[size],sa[size],height[size],w[size],wa[size],res[size];
    void getSa (int len,int up) {
        int *k = rk,*id = height,*r = res, *cnt = wa;
        rep(i,up) cnt[i] = 0;
        rep(i,len) cnt[k[i] = w[i]]++;
        rep(i,up) cnt[i+1] += cnt[i];
        for(int i = len - 1; i >= 0; i--) {
            sa[--cnt[k[i]]] = i;
        }
        int d = 1,p = 0;
        while(p < len){
            for(int i = len - d; i < len; i++) id[p++] = i;
            rep(i,len)    if(sa[i] >= d) id[p++] = sa[i] - d;
            rep(i,len) r[i] = k[id[i]];
            rep(i,up) cnt[i] = 0;
            rep(i,len) cnt[r[i]]++;
            rep(i,up) cnt[i+1] += cnt[i];
            for(int i = len - 1; i >= 0; i--) {
                sa[--cnt[r[i]]] = id[i];
            }
            swap(k,r);
            p = 0;
            k[sa[0]] = p++;
            rep(i,len-1) {
                if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d])
                    k[sa[i+1]] = p - 1;
                else k[sa[i+1]] = p++;
            }
            if(p >= len) return ;
            d *= 2,up = p, p = 0;
        }
    }
    void getHeight(int len) {
        rep(i,len) rk[sa[i]] = i;
        height[0] =  0;
        for(int i = 0,p = 0; i < len - 1; i++) {
            int j = sa[rk[i]-1];
            while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) {
                p++;
            }
            height[rk[i]] = p;
            p = max(0,p - 1);
        }
    }
    int getSuffix(char s[]) {
        int len = strlen(s),up = 0;
        for(int i = 0; i < len; i++) {
            w[i] = s[i];
            up = max(up,w[i]);
        }
        w[len++] = 0;
        getSa(len,up+1);
        getHeight(len);
        return len;
    }const int maxa = 100000*2+1;
    char str[maxa];
    int main(){
        while(scanf("%s", str)!=EOF){
            int l = strlen(str);
            str[l] = ' ';
            scanf("%s", str+l+1);
            getSuffix(str);
            int ans = 0;
            int L = strlen(str);
            for(int i = 1;i < L; i++){
                if((sa[i-1] < l && sa[i] > l) || (sa[i-1] > l && sa[i] < l)){
                    ans = max(ans, height[i]);
                }
            }
            printf("%d
    ", ans);
        }
    }
    /*
    abcde
    bcde
    */
    View Code

    2.poj1743

    题意:给一串数字,求变化相同,且不重叠的最长字符串

    变化相同就是将字符串s[i]变成s[i]-s[i-1]

    那么再求后缀数组的话height[i]代表的是两个长度是height[i]+1变化相等,而如果s[i]与s[j]间距是n的话那么他们在实际字符串中的间距也是n,所以如果两个地方的height最小值是n的话他们的间距应该是n+1才行。

    二分答案的方法这里讲的很好http://blog.sina.com.cn/s/blog_6635898a0102e0me.html

    #include<iostream>
    #include<string.h>
    #include<stdio.h>
    using namespace std;
    
    #define rep(i,n) for(int i = 0;i < n; i++)
    using namespace std;
    const int size  = 200005,INF = 1<<30;
    int rk[size],sa[size],height[size],w[size],wa[size],res[size];
    void getSa (int len,int up) {
        int *k = rk,*id = height,*r = res, *cnt = wa;
        rep(i,up) cnt[i] = 0;
        rep(i,len) cnt[k[i] = w[i]]++;
        rep(i,up) cnt[i+1] += cnt[i];
        for(int i = len - 1; i >= 0; i--) {
            sa[--cnt[k[i]]] = i;
        }
        int d = 1,p = 0;
        while(p < len){
            for(int i = len - d; i < len; i++) id[p++] = i;
            rep(i,len)    if(sa[i] >= d) id[p++] = sa[i] - d;
            rep(i,len) r[i] = k[id[i]];
            rep(i,up) cnt[i] = 0;
            rep(i,len) cnt[r[i]]++;
            rep(i,up) cnt[i+1] += cnt[i];
            for(int i = len - 1; i >= 0; i--) {
                sa[--cnt[r[i]]] = id[i];
            }
            swap(k,r);
            p = 0;
            k[sa[0]] = p++;
            rep(i,len-1) {
                if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d])
                    k[sa[i+1]] = p - 1;
                else k[sa[i+1]] = p++;
            }
            if(p >= len) return ;
            d *= 2,up = p, p = 0;
        }
    }
    void getHeight(int len) {
        rep(i,len) rk[sa[i]] = i;
        height[0] =  0;
        for(int i = 0,p = 0; i < len - 1; i++) {
            int j = sa[rk[i]-1];
            while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) {
                p++;
            }
            height[rk[i]] = p;
            p = max(0,p - 1);
        }
    }
    int getSuffix(int s[], int n) {
        int len = n,up = 0;
        /*for(int i = 0;i  < len; i++){
            printf("%d ", s[i]);
        }puts("");*/
        for(int i = 0; i < len; i++) {
            w[i] = s[i];
            up = max(up,w[i]);
        }
        w[len++] = 0;
        getSa(len,up+1);
        getHeight(len);
        return len;
    }const int maxa = 100000*2+1;
    int str[maxa];
    int a[maxa];
    int judge(int ans, int n){
        int l = sa[0], r = sa[0];
        for(int i = 0;i <= n; i++){
            if(height[i] >= ans){
                l = min(l, sa[i]);
                r = max(r, sa[i]);
                if(r - l > ans)
                    return 1;
            }
            else{
                l = r = sa[i];
            }
        }
        return 0;
    }
    int main(){
        int n;
        while(scanf("%d", &n)!=EOF){
            if(n == 0)return 0;
            for(int i = 0; i < n; i++){
                scanf("%d", &a[i]);
            }
            /*a[n] = a[n-1];
            n++;*/
            for(int i = 0; i < n-1; i++){
                str[i] = a[i+1] - a[i] + 100;
            }
            str[n-1] = 0;
            getSuffix(str, n-1);
            int l = 0, r = n-1;
            while(l < r){
                int mid = (l+r) / 2;
                if(judge(mid, n-1)) l = mid+1;
                else r = mid ;
            }
            //printf("%d
    " , l);
            if(l < 5){
                printf("0
    ");
            }else{
                printf("%d
    ", l);
            }
        }
    }
    /*
    abcde
    bcde
    */
    View Code

    3.poj3261

    题意:找出一个字符串中的所有出现次数不小于K的最长字串:

    依旧是二分:

    #include<iostream>
    #include<string.h>
    #include<vector>
    #include<map>
    #include<set>
    #include<stdio.h>
    #include<algorithm>
    using namespace std;
    
    #define rep(i,n) for(int i = 0;i < n; i++)
    using namespace std;
    const int size  = 200005,INF = 1<<30;
    int rk[size],sa[size],height[size],w[size],wa[size],res[size];
    void getSa (int len,int up) {
        int *k = rk,*id = height,*r = res, *cnt = wa;
        rep(i,up) cnt[i] = 0;
        rep(i,len) cnt[k[i] = w[i]]++;
        rep(i,up) cnt[i+1] += cnt[i];
        for(int i = len - 1; i >= 0; i--) {
            sa[--cnt[k[i]]] = i;
        }
        int d = 1,p = 0;
        while(p < len){
            for(int i = len - d; i < len; i++) id[p++] = i;
            rep(i,len)    if(sa[i] >= d) id[p++] = sa[i] - d;
            rep(i,len) r[i] = k[id[i]];
            rep(i,up) cnt[i] = 0;
            rep(i,len) cnt[r[i]]++;
            rep(i,up) cnt[i+1] += cnt[i];
            for(int i = len - 1; i >= 0; i--) {
                sa[--cnt[r[i]]] = id[i];
            }
            swap(k,r);
            p = 0;
            k[sa[0]] = p++;
            rep(i,len-1) {
                if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d])
                    k[sa[i+1]] = p - 1;
                else k[sa[i+1]] = p++;
            }
            if(p >= len) return ;
            d *= 2,up = p, p = 0;
        }
    }
    void getHeight(int len) {
        rep(i,len) rk[sa[i]] = i;
        height[0] =  0;
        for(int i = 0,p = 0; i < len - 1; i++) {
            int j = sa[rk[i]-1];
            while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) {
                p++;
            }
            height[rk[i]] = p;
            p = max(0,p - 1);
        }
    }
    int getSuffix(int s[], int n) {
        int len = n,up = 0;
        for(int i = 0; i < len; i++) {
            w[i] = s[i];
            up = max(up,w[i]);
        }
        w[len++] = 0;
        getSa(len,up+1);
        getHeight(len);
        return len;
    }
    const int maxa = 100000*2+5;
    int num[maxa];
    char str[maxa];
    int mp[maxa];
    int vis[200];
    int que[maxa], qq[maxa];
    int o;
    int judge(int n, int mid, int K){
        int siz = 0;
        for(int i = 0; i <= n; i++){
            if(height[i] >= mid){
                siz ++;
            }else{
                if(siz >= K)return 1;
                    siz = 1;
                }
        }
               // printf("%d == size
    ", siz);
        if(siz >= K){
            return 1;
        }
        return 0;
    }
    int snum[maxa];
    map<int,int>mp1;
    int main(){
        int n, K;
        int first = 0;
        while(scanf("%d%d", &n, &K)!=EOF){
            mp1.clear();
            for(int i =0;i < n; i++){
                scanf("%d", &num[i]);
                snum[i] = num[i];
            }
            sort(snum, snum+n);
            for(int i = 0;i < n; i++){
                mp1[num[i]] = i+1;
            }
            for(int i = 0;i < n; i++){
                num[i] = mp1[num[i]];
            }
            getSuffix(num, n);
            int high = n + 10;
            int low = 0;
            while(low < high){
                int mid = (low + high) / 2;
                if(judge(n, mid, K)) low = mid+1;
                else high = mid;
            }
            printf("%d
    ", low -1);
        }
    }
    View Code

    4.poj3294

    题意:给n个字符串,找出出现在大于一半字串中的最长字串,如果有多个按字典序输出

    没看到按字典序输出卡了两天啊啊啊啊啊啊啊啊啊啊啊

    用一些不同的字符去连接所有字串,二分答案,找出连续的height大于K的所有位置,如果分别属于不同字串就成立

    #include<iostream>
    #include<string.h>
    #include<vector>
    #include<set>
    #include<stdio.h>
    using namespace std;
    
    #define rep(i,n) for(int i = 0;i < n; i++)
    using namespace std;
    const int size  = 200005,INF = 1<<30;
    int rk[size],sa[size],height[size],w[size],wa[size],res[size];
    void getSa (int len,int up) {
        int *k = rk,*id = height,*r = res, *cnt = wa;
        rep(i,up) cnt[i] = 0;
        rep(i,len) cnt[k[i] = w[i]]++;
        rep(i,up) cnt[i+1] += cnt[i];
        for(int i = len - 1; i >= 0; i--) {
            sa[--cnt[k[i]]] = i;
        }
        int d = 1,p = 0;
        while(p < len){
            for(int i = len - d; i < len; i++) id[p++] = i;
            rep(i,len)    if(sa[i] >= d) id[p++] = sa[i] - d;
            rep(i,len) r[i] = k[id[i]];
            rep(i,up) cnt[i] = 0;
            rep(i,len) cnt[r[i]]++;
            rep(i,up) cnt[i+1] += cnt[i];
            for(int i = len - 1; i >= 0; i--) {
                sa[--cnt[r[i]]] = id[i];
            }
            swap(k,r);
            p = 0;
            k[sa[0]] = p++;
            rep(i,len-1) {
                if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d])
                    k[sa[i+1]] = p - 1;
                else k[sa[i+1]] = p++;
            }
            if(p >= len) return ;
            d *= 2,up = p, p = 0;
        }
    }
    void getHeight(int len) {
        rep(i,len) rk[sa[i]] = i;
        height[0] =  0;
        for(int i = 0,p = 0; i < len - 1; i++) {
            int j = sa[rk[i]-1];
            while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) {
                p++;
            }
            height[rk[i]] = p;
            p = max(0,p - 1);
        }
    }
    int getSuffix(int s[], int n) {
        int len = n,up = 0;
        for(int i = 0; i < len; i++) {
            w[i] = s[i];
            up = max(up,w[i]);
        }
        w[len++] = 0;
        getSa(len,up+1);
        getHeight(len);
        return len;
    }
    const int maxa = 100000*2+5;
    int num[maxa];
    char str[maxa];
    int mp[maxa];
    int vis[200];
    int que[maxa], qq[maxa];
    int o;
    int judge(int n, int mid, int K){
        int ok = 0;
        int siz = 0;
        int oo = 0;
        int last = sa[0];
        memset(vis, 0, sizeof(vis));
        for(int i = 0; i <= n; i++){
           // printf("%d == hight[i] %d == sa[i] %d == mp
    ", height[i], sa[i], mp[sa[i]]);
            if(height[i] >= mid){
                if(vis[mp[sa[i]]] == 0){
                    vis[mp[sa[i]]] = 1;
                    siz ++;
                    last = sa[i];
                    //printf("--%d %d
    ", sa[i], mp[sa[i]]);
                }
            }else{
                memset(vis, 0, sizeof(vis));
                if(siz > K){
                    qq[oo++] = last;
                    ok = 1;
                }
                siz = 1;
                memset(vis, 0, sizeof(vis));
                vis[mp[sa[i]]] = 1;
            }
               // printf("%d == weizhi %d == size
    ", sa[i], siz);
        }
               // printf("%d == size
    ", siz);
        if(siz > K){
            qq[oo++] = last;
            ok = 1;
        }
    
        if(ok == 1){
            o = oo;
            for(int i = 0;i  < oo; i++){
                que[i] = qq[i];
            }
            return 1;
        }return 0;
    }
    int main(){
        int n;
        int first = 0;
        while(scanf("%d", &n), n){
            int l = 0;
            for(int i = 0;i  < n; i++){
                scanf("%s", str);
                int L = l;
                for(int k = 0; str[k] ; k++){
                    num[L+k] = str[k];
                    l++;
                    mp[L+k] = i;
                }
                mp[l] = i;
                num[l++] = 500+i;
            }
            getSuffix(num, l);
            /*for(int i = 0;i  <=l ; i++){
                printf("%d %d
    ", height[i], mp[sa[i]]);
            }
            judge(l, 6, n/2);
                        for(int i = 0; i < o; i++){
                    for(int k = que[i]; k < que[i]+6; k++){
                        printf("%c", num[k]);
                    }puts("");
                }
    */
            int high = 1005;
            int low = 0;
            while(low < high){
                int mid = (low + high) / 2;
                if(judge(l, mid, n/2)) low = mid+1;
                else high = mid;
            }
            if(first == 0)first = 1;
            else puts("");
            if(n == 1){printf("%s
    ", str);
            continue;
            }
            if(low > 1){
                for(int i = 0; i < o; i++){
                    for(int k = que[i]; k < que[i]+low-1; k++){
                        printf("%c", num[k]);
                    }puts("");
                }
            }else
                puts("?");
        }
    }
    View Code
  • 相关阅读:
    Ubuntu Linux下的Wireshark使用drcom_2011.lua分析drcom协议
    Keil提示premature end of file错误 无法生成HEX文件
    Linux和win7(win10)双系统时间错误问题 时间相差8小时
    Wireshark使用drcom_2011.lua插件协助分析drcom协议
    Keil报错failed to execute 'd:KeilC51BINC51.EXE'
    第一篇博文
    LG 7078 贪吃蛇
    LG 1791 人员雇佣
    洛谷 2698 Flowerpot
    HDU 5965 扫雷
  • 原文地址:https://www.cnblogs.com/icodefive/p/4782060.html
Copyright © 2011-2022 走看看