zoukankan      html  css  js  c++  java
  • 后缀数组练习

    pku-1743 Musical Theme

    解题思路

    本质是一个寻找最长不重叠相同子串长度的题目
    下面是求最长重叠相同子串长度的思路:
    二分枚举+height数组分组。这道题的思想很巧妙,后面要仔细推敲。先二分答案,把题目变成判定性问题:判断是否存在两个长度为k的子串是相同的,且不重叠。解决这个问题的关键还是利用height数组。把排序后的后缀分成若干组,其中每组的后缀之间的height值都不小于k。例如,字符串为“aabaaaab”,当k=2时,后缀分成了4组,如图所示。
    此处输入图片的描述
    容易看出,有希望成为最长公共前缀不小于k的两个后缀一定在同一组。然后对于每组后缀,只须判断每个后缀的sa值的最大值和最小值之差是否不小于k。如果有一组满足,则说明存在,否则不存在。整个做法的时间复杂度为O(nlogn)。
    下面一份DC3和一份后缀数组的写法= =
    Musical Theme

    代码

    #include <algorithm>
    #include <cstdio>
    #include <cmath>
    #include <cstring>
    #include <iostream>
    #include <cstdlib>
    #include <set>
    #include <vector>
    #include <cctype>
    #include <iomanip>
    #include <sstream>
    #include <climits>
    #include <queue>
    #include <stack>
    using namespace std;
    /*    freopen("k.in", "r", stdin);
        freopen("k.out", "w", stdout); */
    //clock_t c1 = clock();
    //std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
    //#pragma comment(linker, "/STACK:1024000000,1024000000")
    #define de(a) cout << #a << " = " << a << endl
    #define rep(i, a, n) for (int i = a; i <= n; i++)
    #define per(i, a, n) for (int i = n; i >= a; i--)
    typedef long long ll;
    typedef unsigned long long ull;
    typedef pair<int, int> PII;
    typedef pair<double, double> PDD;
    typedef vector<int, int> VII;
    #define inf 0x3f3f3f3f
    const ll INF = 0x3f3f3f3f3f3f3f3f;
    const ll MAXN = 1e6 + 7;
    const ll MAXM = 1e6 + 7;
    const ll MOD = 1e9 + 7;
    const double eps = 1e-6;
    const double pi = acos(-1.0);
    int sa[MAXN];     //rank为i的后缀的起始位置
    int rk[MAXN];     //sa数组的映射
    int tp[MAXN];     //基数排序的第二关键字,第二关键字排名为i的后缀的起始位置
    int tax[MAXN];    //第i号元素出现了多少次,辅助基数排序
    int Height[MAXN]; //排名为i的后缀与排名为i-1的后缀的最长公共前缀
    /* lcp(sa[i],sa[i-1])*/
    int n, m;
    int s[MAXN];
    /* void Debug()
    {
        printf("*****************
    ");
        printf("下标");
        for (int i = 1; i <= n; i++)
            printf("%d ", i);
        printf("
    ");
        printf("sa  ");
        for (int i = 1; i <= n; i++)
            printf("%d ", sa[i]);
        printf("
    ");
        printf("rak ");
        for (int i = 1; i <= n; i++)
            printf("%d ", rk[i]);
        printf("
    ");
        printf("tp  ");
        for (int i = 1; i <= n; i++)
            printf("%d ", tp[i]);
        printf("
    ");
    } */
    void Qsort()
    {
        for (int i = 0; i <= m; i++)
            tax[i] = 0;
        for (int i = 1; i <= n; i++)
            tax[rk[i]]++;
        for (int i = 1; i <= m; i++)
            tax[i] += tax[i - 1];
        for (int i = n; i >= 1; i--)
            sa[tax[rk[tp[i]]]--] = tp[i];
    }
    void SuffixSort()
    {
        m = 200;
        for (int i = 1; i <= n; i++)
            rk[i] = s[i], tp[i] = i;
        Qsort();
        // Debug();
        for (int w = 1, p = 0; p < n; m = p, w <<= 1)
        {
            //w:当前倍增的长度,w = x表示已经求出了长度为x的后缀的排名,现在要更新长度为2x的后缀的排名
            //p表示不同的后缀的个数,很显然原字符串的后缀都是不同的,因此p = n时可以退出循环
            p = 0; //这里的p仅仅是一个计数器
            for (int i = 1; i <= w; i++)
                tp[++p] = n - w + i;
            for (int i = 1; i <= n; i++)
                if (sa[i] > w)
                    tp[++p] = sa[i] - w; //这两句是后缀数组的核心部分,我已经画图说明
            Qsort();                     //此时我们已经更新出了第二关键字,利用上一轮的rk更新本轮的sa
            swap(tp, rk);                //这里原本tp已经没有用了
            rk[sa[1]] = p = 1;
            for (int i = 2; i <= n; i++)
                rk[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p;
            //这里当两个后缀上一轮排名相同时本轮也相同,至于为什么大家可以思考一下
            // Debug();
        }
    }
    void GetHeight()
    {
        int j, k = 0;
        for (int i = 1; i <= n; i++)
        {
            if (k)
                k--;
            j = sa[rk[i] - 1];
            while (s[i + k] == s[j + k])
                k++;
            Height[rk[i]] = k;
        }
    }
    bool check(int len)
    {
        int minn = sa[1], maxx = sa[1];
        for (int i = 2; i <= n; i++)
        {
            if (Height[i] >= len - 1)
            {
                maxx = max(maxx, sa[i]);
                minn = min(minn, sa[i]);
            }
            else
                maxx = minn = sa[i];
            if (maxx - minn >= len)
                return true;
        }
        return false;
    }
    int ans = 0;
    int main()
    {
        int num;
        while (~scanf("%d", &n) && n)
        {
            ans = 0;
            for (int i = 1; i <= n; i++)
                scanf("%d", &s[i]);
            for (int i = n; i >= 1; i--)
                s[i] -= s[i - 1] - 100;
            SuffixSort();
            GetHeight();
            int l = 1, r = (n >> 1) + 1;
            while (l < r)
            {
                int mid = (l + r) >> 1;
                if (check(mid))
                {
                    l = mid + 1;
                    ans = mid;
                }
                else
                    r = mid;
            }
            if (ans < 5)
                printf("0
    ");
            else
                printf("%d
    ", ans);
        }
        return 0;
    }
    
    //--------------------DC3
    #include <cstdio>
    #include <algorithm>
    #include <queue>
    #include <iostream>
    #include <cmath>
    #include <cstring>
    using namespace std;
    #define F(x) ((x) / 3 + ((x) % 3 == 1 ? 0 : tb))
    #define G(x) ((x) < tb ? (x)*3 + 1 : ((x)-tb) * 3 + 2)
    const int MAXN = 200000 + 100; //n*10
    int sa[MAXN];
    int rk[MAXN];
    int height[MAXN];
    int n;
    int s[MAXN];
    int r[MAXN];
    int wa[MAXN], wb[MAXN], wv[MAXN];
    int wws[MAXN];
    void sort(int *r, int *a, int *b, int n, int m)
    {
        int i;
        for (i = 0; i < n; i++)
            wv[i] = r[a[i]];
        for (i = 0; i < m; i++)
            wws[i] = 0;
        for (i = 0; i < n; i++)
            wws[wv[i]]++;
        for (i = 1; i < m; i++)
            wws[i] += wws[i - 1];
        for (i = n - 1; i >= 0; i--)
            b[--wws[wv[i]]] = a[i];
        return;
    }
    int c0(int *r, int a, int b)
    {
        return r[a] == r[b] && r[a + 1] == r[b + 1] && r[a + 2] == r[b + 2];
    }
    int c12(int k, int *r, int a, int b)
    {
        if (k == 2)
            return r[a] < r[b] || r[a] == r[b] && c12(1, r, a + 1, b + 1);
        else
            return r[a] < r[b] || r[a] == r[b] && wv[a + 1] < wv[b + 1];
    }
    
    void dc3(int *r, int *sa, int n, int m)
    {
        int i, j, *rn = r + n, *san = sa + n, ta = 0, tb = (n + 1) / 3, tbc = 0, p;
        r[n] = r[n + 1] = 0;
        for (i = 0; i < n; i++)
            if (i % 3 != 0)
                wa[tbc++] = i;
        sort(r + 2, wa, wb, tbc, m);
        sort(r + 1, wb, wa, tbc, m);
        sort(r, wa, wb, tbc, m);
        for (p = 1, rn[F(wb[0])] = 0, i = 1; i < tbc; i++)
            rn[F(wb[i])] = c0(r, wb[i - 1], wb[i]) ? p - 1 : p++;
        if (p < tbc)
            dc3(rn, san, tbc, p);
        else
            for (i = 0; i < tbc; i++)
                san[rn[i]] = i;
        for (i = 0; i < tbc; i++)
            if (san[i] < tb)
                wb[ta++] = san[i] * 3;
        if (n % 3 == 1)
            wb[ta++] = n - 1;
        sort(r, wb, wa, ta, m);
        for (i = 0; i < tbc; i++)
            wv[wb[i] = G(san[i])] = i;
        for (i = 0, j = 0, p = 0; i < ta && j < tbc; p++)
            sa[p] = c12(wb[j] % 3, r, wa[i], wb[j]) ? wa[i++] : wb[j++];
        for (; i < ta; p++)
            sa[p] = wa[i++];
        for (; j < tbc; p++)
            sa[p] = wb[j++];
        return;
    }
    void calheight(int *r, int *sa, int n)
    {
        int i, j, k = 0;
        for (i = 1; i <= n; ++i)
            rk[sa[i]] = i;
        for (i = 0; i < n; height[rk[i++]] = k)
            for (k ? k-- : 0, j = sa[rk[i] - 1]; r[i + k] == r[j + k]; ++k)
                ;
        return;
    }
    bool check(int len)
    {
        int minn = sa[1], maxx = sa[1];
        for (int i = 2; i <= n; i++)
        {
            if (height[i] >= len - 1)
            {
                maxx = max(maxx, sa[i]);
                minn = min(minn, sa[i]);
            }
            else
                maxx = minn = sa[i];
            if (maxx - minn >= len)
                return true;
        }
        return false;
    }
    int ans;
    int main()
    {
        int num;
        while (~scanf("%d", &n) && n)
        {
            ans = 0;
            for (int i = 0; i < n; i++)
                scanf("%d", &s[i]);
            for (int i = n - 1; i >= 0; i--)
                s[i] -= s[i - 1] - 100;
            int Max = -1;
            for (int i = 0; i < n; i++)
            {
                r[i] = s[i];
                if (r[i] > Max)
                    Max = r[i];
            }
            r[n] = 0;
            dc3(r, sa, n + 1, Max + 1);
            calheight(r, sa, n);
            int l = 1, r = (n >> 1) + 1;
            while (l < r)
            {
                int mid = (l + r) >> 1;
                if (check(mid))
                {
                    l = mid + 1;
                    ans = mid;
                }
                else
                    r = mid;
            }
            if (ans < 5)
                printf("0
    ");
            else
                printf("%d
    ", ans);
        }
        return 0;
    }
    

    HDU-4622 Reincarnation

    Reincarnation

    题意

    区间内不同子串个数

    #include <bits/stdc++.h>
    using namespace std;
    /*    freopen("k.in", "r", stdin);
        freopen("k.out", "w", stdout); */
    //clock_t c1 = clock();
    //std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
    //#pragma comment(linker, "/STACK:1024000000,1024000000")
    #define de(a) cout << #a << " = " << a << endl
    #define rep(i, a, n) for (int i = a; i <= n; i++)
    #define per(i, a, n) for (int i = n; i >= a; i--)
    typedef long long ll;
    typedef unsigned long long ull;
    typedef pair<int, int> PII;
    typedef pair<double, double> PDD;
    typedef vector<int, int> VII;
    #define inf 0x3f3f3f3f
    const ll INF = 0x3f3f3f3f3f3f3f3f;
    const ll MAXN = 1e6 + 7;
    const ll MAXM = 1e6 + 7;
    const ll MOD = 1e9 + 7;
    const double eps = 1e-6;
    const double pi = acos(-1.0);
    int sa[MAXN];     //rank为i的后缀的起始位置
    int rk[MAXN];     //sa数组的映射
    int tp[MAXN];     //基数排序的第二关键字,第二关键字排名为i的后缀的起始位置
    int tax[MAXN];    //第i号元素出现了多少次,辅助基数排序
    int Height[MAXN]; //排名为i的后缀与排名为i-1的后缀的最长公共前缀
    /* lcp(sa[i],sa[i-1])*/
    int n, m;
    char s[MAXN];
    /* void Debug()
    {
        printf("*****************
    ");
        printf("下标");
        for (int i = 1; i <= n; i++)
            printf("%d ", i);
        printf("
    ");
        printf("sa  ");
        for (int i = 1; i <= n; i++)
            printf("%d ", sa[i]);
        printf("
    ");
        printf("rak ");
        for (int i = 1; i <= n; i++)
            printf("%d ", rk[i]);
        printf("
    ");
        printf("tp  ");
        for (int i = 1; i <= n; i++)
            printf("%d ", tp[i]);
        printf("
    ");
    } */
    void Qsort()
    {
        for (int i = 0; i <= m; i++)
            tax[i] = 0;
        for (int i = 1; i <= n; i++)
            tax[rk[i]]++;
        for (int i = 1; i <= m; i++)
            tax[i] += tax[i - 1];
        for (int i = n; i >= 1; i--)
            sa[tax[rk[tp[i]]]--] = tp[i];
    }
    void SuffixSort()
    {
        m = 75;
        for (int i = 1; i <= n; i++)
            rk[i] = s[i] - '0' + 1, tp[i] = i;
        Qsort();
        // Debug();
        for (int w = 1, p = 0; p < n; m = p, w <<= 1)
        {
            //w:当前倍增的长度,w = x表示已经求出了长度为x的后缀的排名,现在要更新长度为2x的后缀的排名
            //p表示不同的后缀的个数,很显然原字符串的后缀都是不同的,因此p = n时可以退出循环
            p = 0; //这里的p仅仅是一个计数器
            for (int i = 1; i <= w; i++)
                tp[++p] = n - w + i;
            for (int i = 1; i <= n; i++)
                if (sa[i] > w)
                    tp[++p] = sa[i] - w; //这两句是后缀数组的核心部分,我已经画图说明
            Qsort();                     //此时我们已经更新出了第二关键字,利用上一轮的rk更新本轮的sa
            swap(tp, rk);                //这里原本tp已经没有用了
            rk[sa[1]] = p = 1;
            for (int i = 2; i <= n; i++)
                rk[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p;
            //这里当两个后缀上一轮排名相同时本轮也相同,至于为什么大家可以思考一下
            // Debug();
        }
    }
    void GetHeight()
    {
        int j, k = 0;
        for (int i = 1; i <= n; i++)
        {
            if (k)
                k--;
            j = sa[rk[i] - 1];
            while (s[i + k] == s[j + k])
                k++;
            Height[rk[i]] = k;
        }
    }
    int st[MAXN][21];
    int Query(int l, int r)
    {
        int k = log2(r - l + 1);
        return min(st[l][k], st[r - (1 << k) + 1][k]);
    }
    int main()
    {
        int t;
        scanf("%d", &t);
        while (t--)
        {
            scanf(" %s", s + 1);
            n = strlen(s + 1);
            SuffixSort();
            GetHeight();
            for (int i = 0; i <= n; i++)
                st[i][0] = Height[i];
            for (int i = 1; i <= 21; i++)
                for (int j = 1; j + (1 << i) - 1 <= n; j++)
                    st[j][i] = min(st[j][i - 1], st[j + (1 << (i - 1))][i - 1]);
            //st处理出lcp(sa[i],sa[j])
            int q;
            scanf("%d", &q);
            while (q--)
            {
                int l, r;
                scanf("%d%d", &l, &r);
                int ans = (r - l + 1) * (r - l + 2) / 2;
                int cnt = 0;
                int pre = -1;
                for (int i = 1; i <= n; i++)
                {
                    if (cnt == r - l + 1)
                        break;
                    if (sa[i] < l || sa[i] > r)
                        continue;
                    cnt++;
                    if (pre == -1)
                    {
                        pre = i;
                        continue;
                    }
                    int a = pre;
                    int b = i;
                    if (pre > i)
                        swap(pre, i);
                    int lcp = Query(a + 1, b);
                    int la = r - sa[pre] + 1;
                    int lb = r - sa[i] + 1;
                    if(!(la > lb && lcp >= lb))
                        pre = i;
                    ans -= min(lcp, min(la, lb));
                }
                printf("%d
    ", ans);
            }
        }
        return 0;
    }
    

    牛客 CSL的密码

    题目链接

    题意:

    长度不小于k的本质不同子串数量

    #include <bits/stdc++.h>
    using namespace std;
    /*    freopen("k.in", "r", stdin);
        freopen("k.out", "w", stdout); */
    //clock_t c1 = clock();
    //std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
    //#pragma comment(linker, "/STACK:1024000000,1024000000")
    #define de(a) cout << #a << " = " << a << endl
    #define rep(i, a, n) for (int i = a; i <= n; i++)
    #define per(i, a, n) for (int i = n; i >= a; i--)
    typedef long long ll;
    typedef unsigned long long ull;
    typedef pair<int, int> PII;
    typedef pair<double, double> PDD;
    typedef vector<int, int> VII;
    #define inf 0x3f3f3f3f
    const ll INF = 0x3f3f3f3f3f3f3f3f;
    const ll MAXN = 1e6 + 7;
    const ll MAXM = 1e6 + 7;
    const ll MOD = 1e9 + 7;
    const double eps = 1e-6;
    const double pi = acos(-1.0);
    int sa[MAXN];     //rank为i的后缀的起始位置
    int rk[MAXN];     //sa数组的映射
    int tp[MAXN];     //基数排序的第二关键字,第二关键字排名为i的后缀的起始位置
    int tax[MAXN];    //第i号元素出现了多少次,辅助基数排序
    int Height[MAXN]; //排名为i的后缀与排名为i-1的后缀的最长公共前缀
    /* lcp(sa[i],sa[i-1]) */
    int n, m; // n字符串长度  m字符集大小
    char s[MAXN], t[MAXN];
    /* void Debug()
    {
        printf("*****************
    ");
        printf("下标");
        for (int i = 1; i <= n; i++)
            printf("%d ", i);
        printf("
    ");
        printf("sa  ");
        for (int i = 1; i <= n; i++)
            printf("%d ", sa[i]);
        printf("
    ");
        printf("rak ");
        for (int i = 1; i <= n; i++)
            printf("%d ", rk[i]);
        printf("
    ");
        printf("tp  ");
        for (int i = 1; i <= n; i++)
            printf("%d ", tp[i]);
        printf("
    ");
    } */
    void Qsort()
    {
        for (int i = 0; i <= m; i++)
            tax[i] = 0;
        for (int i = 1; i <= n; i++)
            tax[rk[i]]++;
        for (int i = 1; i <= m; i++)
            tax[i] += tax[i - 1];
        for (int i = n; i >= 1; i--)
            sa[tax[rk[tp[i]]]--] = tp[i];
    }
    void SuffixSort()
    {
        m = 75;
        for (int i = 1; i <= n; i++)
            rk[i] = s[i] - '0' + 1, tp[i] = i;
        Qsort();
        // Debug();
        for (int w = 1, p = 0; p < n; m = p, w <<= 1)
        {
            //w:当前倍增的长度,w = x表示已经求出了长度为x的后缀的排名,现在要更新长度为2x的后缀的排名
            //p表示不同的后缀的个数,很显然原字符串的后缀都是不同的,因此p = n时可以退出循环
            p = 0; //这里的p仅仅是一个计数器
            for (int i = 1; i <= w; i++)
                tp[++p] = n - w + i;
            for (int i = 1; i <= n; i++)
                if (sa[i] > w)
                    tp[++p] = sa[i] - w; //这两句是后缀数组的核心部分,我已经画图说明
            Qsort();                     //此时我们已经更新出了第二关键字,利用上一轮的rk更新本轮的sa
            swap(tp, rk);                //这里原本tp已经没有用了
            rk[sa[1]] = p = 1;
            for (int i = 2; i <= n; i++)
                rk[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p;
            //这里当两个后缀上一轮排名相同时本轮也相同
            // Debug();
        }
    }
    void GetHeight()
    {
        int j, k = 0;
        for (int i = 1; i <= n; i++)
        {
            if (k)
                k--;
            j = sa[rk[i] - 1];
            while (s[i + k] == s[j + k])
                k++;
            Height[rk[i]] = k;
        }
    }
    /* 本质不同的子串的数量
    枚举每一个后缀,第i个后缀对答案的贡献为n-sa[i]+1-Height[i]*/
    /* 长度不小于k的不同本质子串数量 */
    int main()
    {
        int k;
        while (~scanf("%d%d", &n, &k))
        {
            scanf(" %s", s + 1);
            n = strlen(s + 1);
            SuffixSort();
            GetHeight();
            ll ans = 0;
            for (int i = 1; i <= n; i++)
                ans += (n - sa[i] + 1) - min(max(k - 1, Height[i]), n - sa[i] + 1);
            printf("%lld
    ", ans);
        }
        return 0;
    }
    
  • 相关阅读:
    google搜索技巧
    sqlite,mysql,access对比
    【转】python技术博客
    2013待阅读书目
    【转】larbin的代码实现逻辑概述
    【转】python遍历文件夹和文件
    【转】正则表达式高级讲解
    Atitit.mybatis的测试  以及spring与mybatis在本项目中的集成配置说明
    Atitit.100% 多个子元素自适应布局属性
    atitti.atiNav 手机导航组件的设计
  • 原文地址:https://www.cnblogs.com/graytido/p/11578989.html
Copyright © 2011-2022 走看看