zoukankan      html  css  js  c++  java
  • 后缀自动机(SAM)解题记录

    求循环串在原串中出现次数

    解题思路:

    实际上去找循环同构串在母串中的出现次数,用母串构建SAM,将给出的串比如abc变成abcab,那么我们对于循环同构串按位去在SAM中跳trans,如果能跳就跳,代表这以u为结尾的循环同构串在后缀自动机上到达的状态,如果有不能通过trans转移的话,那么有link回跳至能由trans转移的结点,相当于找到了以u为结尾的最长公共子串,而且还知道该LCS(最长公共子串的长度),那么以u为结尾的最长公共子串的出现次数就是转移到的状态的|endpos|,能够通过link连边拓扑排序计算,但是有两种例外情况,如果原串为'aa',那么循环同构串会出现重复的情况,会导致计算出现重复怎么办,要记录lcs的长度>=原串长度的情况并给状态打上标记以免重复计算,还有一种特殊情况,如果lcs长度>=原串长度,原串,那么把lcs往右缩(只是打个比方),长度到达原串长度时,就可能在u->S的后缀链接到达的状态上,找到一个状态包含该长度即可

    题目链接

    #include <bits/stdc++.h>
    using namespace std;
    /* freopen("k.in", "r", stdin);
    freopen("k.out", "w", stdout); */
    // clock_t c1 = clock();
    // std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
    //#pragma comment(linker, "/STACK:1024000000,1024000000")
    #define de(a) cout << #a << " = " << a << endl
    #define rep(i, a, n) for (int i = a; i <= n; i++)
    #define per(i, a, n) for (int i = n; i >= a; i--)
    #define ls ((x) << 1)
    #define rs ((x) << 1 | 1)
    typedef long long ll;
    typedef unsigned long long ull;
    typedef pair<int, int> PII;
    typedef pair<double, double> PDD;
    typedef pair<ll, ll> PLL;
    typedef vector<int, int> VII;
    #define inf 0x3f3f3f3f
    const ll INF = 0x3f3f3f3f3f3f3f3f;
    const ll MAXN = 1e6 + 7;
    const ll MAXM = 1e5 + 7;
    const ll MOD = 1e9 + 7;
    const double eps = 1e-6;
    const double pi = acos(-1.0);
    struct Suffix_Automaton
    {
        int cnt, root, last, link[MAXN << 1], trans[MAXN << 1][30], mx[MAXN << 1];
        int ed_size[MAXN << 1], in[MAXN << 1];
        vector<int> vec[MAXN << 1];
        int vis[MAXN << 1];
        void init()
        {
            root = last = cnt = 1;
            mx[cnt] = link[cnt] = 0;
    
            memset(in, 0, sizeof(in));
            memset(ed_size, 0, sizeof(ed_size));
            memset(vis, 0, sizeof(vis));
        }
        void extend(int c)
        {
            int np = ++cnt, p = last; //np表示新的母串
            mx[np] = mx[p] + 1;
            ed_size[np] = 1;
            for (; p && !trans[p][c]; p = link[p])
                trans[p][c] = np; //将last的后缀连接路径上没有字符c出边的p连向np
            if (!p)               //如果p跳到了0 需要把np连向parent树的根
                link[np] = root;
            else
            {
                int q = trans[p][c];
                if (mx[q] == mx[p] + 1)
                    link[np] = q; //把u 连接到trans(v,c)
                else
                {                                                  //需要新建节点
                    int nq = ++cnt;                                //nq是new q是old
                    memcpy(trans[nq], trans[q], sizeof(trans[q])); //复制出边到新节点
                    mx[nq] = mx[p] + 1;
                    link[nq] = link[q];      //nq的后缀链接指向q的后缀连接
                    link[q] = link[np] = nq; //q和np的后缀链接指向nq
                    for (; p && trans[p][c] == q; p = link[p])
                        trans[p][c] = nq; //把路径上原来有转移的q的节点改成指向nq
                }
            }
            last = np; //替换整个母串
        }
        void toposort()
        {
            for (int i = 1; i <= cnt; i++)
                vec[i].clear();
            queue<int> q;
            for (int i = 1; i <= cnt; i++)
            {
                vec[i].push_back(link[i]);
                in[link[i]]++;
            }
            for (int i = 1; i <= cnt; i++)
                if (!in[i])                                                                                                                       
                    q.push(i);
            while (!q.empty())
            {
                int temp = q.front();
                q.pop();
                for (auto i : vec[temp])
                {
                    ed_size[i] += ed_size[temp];
                    if (!(--in[i]))
                        q.push(i);
                }
            }
        }
        int query(string str, int num, int sz)
        {
            int ans = 0;
            int u = 1, lcs = 0;
            for (auto i : str)
            {
                int id = i - 'a';
                if (trans[u][id])
                {
                    u = trans[u][id];
                    lcs++;
                }
                else
                {
                    //如果没有匹配函数
                    //根据后缀树进行回溯 找到最大后缀满足匹配条件的
                    for (; u && !trans[u][id]; u = link[u])
                        ;
                    if (!u) //没了
                    {
                        u = 1;
                        lcs = 0;
                    }
                    else
                    {
                        lcs = mx[u] + 1;
                        u = trans[u][id];
                    }
                }
                if (lcs >= sz)
                {
                    while (mx[link[u]] >= sz)
                        u = link[u], lcs = mx[u];
                }
                if (lcs >= sz && vis[u] != num)
                {
                    vis[u] = num;
                    ans += ed_size[u];
                }
            }
            return ans;
        }
    } SAM;
    int main()
    {
        string str;
        while (cin >> str)
        {
            SAM.init();
            for (auto i : str)
                SAM.extend(i - 'a');
            SAM.toposort();
            int n;
            cin >> n;
            for (int i = 1; i <= n; i++)
            {
                cin >> str;
                int temp = str.size();
                str += str.substr(0, str.size() - 1);
                printf("%d
    ", SAM.query(str, i, temp));
            }
        }
        return 0;
    }
    

    统计所有本质不同子串权值和

    解题思路:

    统计所有本质不同子串权值和,因为是多个询问,可以用广义后缀自动机解决,在串之间加入ascll码为'0'+10的字符作为链接
    那么在后缀自动机上的状态,他的值就相当于能由trans函数到达该状态的状态的sum(权值和10+转移的状态的边权转移自的状态的合法子串个数)
    因为:是作为链接多个子串加入的,在每个状态中,含有:的子串属于不合法子串,那我们统计每个状态的合法子串可以从s开始拓扑排序,转移的边权为:时不做上述转移

    题目链接

    #include <bits/stdc++.h>
    using namespace std;
    /* freopen("k.in", "r", stdin);
    freopen("k.out", "w", stdout); */
    // clock_t c1 = clock();
    // std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
    //#pragma comment(linker, "/STACK:1024000000,1024000000")
    #define de(a) cout << #a << " = " << a << endl
    #define rep(i, a, n) for (int i = a; i <= n; i++)
    #define per(i, a, n) for (int i = n; i >= a; i--)
    #define ls ((x) << 1)
    #define rs ((x) << 1 | 1)
    typedef long long ll;
    typedef unsigned long long ull;
    typedef pair<int, int> PII;
    typedef pair<double, double> PDD;
    typedef pair<ll, ll> PLL;
    typedef vector<int, int> VII;
    #define inf 0x3f3f3f3f
    const ll INF = 0x3f3f3f3f3f3f3f3f;
    const ll MAXN = 1e6 + 7;
    const ll MAXM = 1e5 + 7;
    const ll MOD = 1e9 + 7;
    const double eps = 1e-6;
    const double pi = acos(-1.0);
    struct Suffix_Automaton
    {
        int cnt, root, last, link[MAXN << 1], trans[MAXN << 1][30], mx[MAXN << 1];
        int in[MAXN << 1];
        ll val[MAXN << 1], vaild_size[MAXN << 1];
        void init()
        {
            root = last = cnt = 1;
            mx[cnt] = link[cnt] = 0;
    
            memset(in, 0, sizeof(in));
            memset(val, 0, sizeof(val));
            memset(vaild_size, 0, sizeof(vaild_size));
        }
        void extend(int c)
        {
            int np = ++cnt, p = last; //np表示新的母串
            mx[np] = mx[p] + 1;
            for (; p && !trans[p][c]; p = link[p])
                trans[p][c] = np; //将last的后缀连接路径上没有字符c出边的p连向np
            if (!p)               //如果p跳到了0 需要把np连向parent树的根
                link[np] = root;
            else
            {
                int q = trans[p][c];
                if (mx[q] == mx[p] + 1)
                    link[np] = q; //把u连接到trans(v,c)
                else
                {                                                  //需要新建节点
                    int nq = ++cnt;                                //nq是new q是old
                    memcpy(trans[nq], trans[q], sizeof(trans[q])); //复制出边到新节点
                    mx[nq] = mx[p] + 1;
                    link[nq] = link[q];      //nq的后缀链接指向q的后缀连接
                    link[q] = link[np] = nq; //q和np的后缀链接指向nq
                    for (; p && trans[p][c] == q; p = link[p])
                        trans[p][c] = nq; //把路径上原来有转移的q的节点改成指向nq
                }
            }
            last = np; //替换整个母串
        }
        ll toposort()
        {
            ll ans = 0;
            for (int i = 1; i <= cnt; i++)
            {
                for (int j = 0; j <= 10; j++)
                {
                    int to = trans[i][j];
                    if (to)
                        in[to]++;
                }
            }
            queue<int> q;
            for (int i = 1; i <= cnt; i++)
                if (!in[i])
                {
                    q.push(i);
                    vaild_size[i] = 1;
                    val[i] = 0;
                }
            while (!q.empty())
            {
                int temp = q.front();
                q.pop();
                for (int i = 0; i <= 10; i++)
                {
                    int to = trans[temp][i];
                    if (!to)
                        continue;
                    if (i != 10)
                    {
                        (vaild_size[to] += vaild_size[temp]) %= MOD;
                        (val[to] += val[temp] * 10 + i * vaild_size[temp]) %= MOD;
                    }
                    if (!(--in[to]))
                        q.push(to);
                }
            }
            for (int i = 1; i <= cnt; i++)
                (ans += val[i]) %= MOD;
            return ans;
        }
    
    } SAM;
    int main()
    {
        int n;
        while (~scanf("%d", &n))
        {
            string str = "";
            for (int i = 0; i < n; i++)
            {
                if (i)
                    str += ('0' + 10);
                string temp;
                cin >> temp;
                str += temp;
            }
            SAM.init();
            for (auto i : str)
                SAM.extend(i - '0');
            printf("%lld
    ", SAM.toposort());
        }
        return 0;
    }
    

    HDU -3518

    题目链接

    解题思路:

    在建立后缀自动机的时候维护endpos集合的最左和最右位置,然后就不是很难想了

    #include <bits/stdc++.h>
    using namespace std;
    /* freopen("k.in", "r", stdin);
    freopen("k.out", "w", stdout); */
    // clock_t c1 = clock();
    // std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
    //#pragma comment(linker, "/STACK:1024000000,1024000000")
    #define de(a) cout << #a << " = " << a << endl
    #define rep(i, a, n) for (int i = a; i <= n; i++)
    #define per(i, a, n) for (int i = n; i >= a; i--)
    #define ls ((x) << 1)
    #define rs ((x) << 1 | 1)
    typedef long long ll;
    typedef unsigned long long ull;
    typedef pair<int, int> PII;
    typedef pair<double, double> PDD;
    typedef pair<ll, ll> PLL;
    typedef vector<int, int> VII;
    #define inf 0x3f3f3f3f
    const ll INF = 0x3f3f3f3f3f3f3f3f;
    const ll MAXN = 1e3 + 7;
    const ll MAXM = 1e5 + 7;
    const ll MOD = 1e9 + 7;
    const double eps = 1e-6;
    const double pi = acos(-1.0);
    struct Suffix_Automaton
    {
        int cnt, root, last, link[MAXN << 1], trans[MAXN << 1][30], mx[MAXN << 1];
        vector<int> vec[MAXN << 1];
        int val[MAXN << 1];
        int in[MAXN << 1];
    
        pair<int, int> sit[MAXN << 1]; //各个状态endpos的最左和最右位置
        void init()
        {
            memset(link, 0, sizeof(link));
            memset(mx, 0, sizeof(mx));
            root = last = cnt = 1;
            mx[cnt] = link[cnt] = 0;
            memset(trans, 0, sizeof(trans));
            memset(val, 0, sizeof(val));
            memset(in, 0, sizeof(in));
            for (int i = 1; i < (MAXN << 1); i++)
                sit[i] = {inf, -inf};
        }
        void extend(int c, int _pos)
        {
            int np = ++cnt, p = last; //np表示新的母串
            sit[np] = {inf, -inf};
            val[np] = 1;
            mx[np] = mx[p] + 1;
            for (; p && !trans[p][c]; p = link[p])
                trans[p][c] = np; //将last的后缀连接路径上没有字符c出边的p连向np
            if (!p)               //如果p跳到了0 需要把np连向parent树的根
                link[np] = root;
            else
            {
                int q = trans[p][c];
                if (mx[q] == mx[p] + 1)
                    link[np] = q; //把u连接到trans(v,c)
                else
                {                   //需要新建节点
                    int nq = ++cnt; //nq是new q是old
                    sit[np] = {inf, -inf};
                    memcpy(trans[nq], trans[q], sizeof(trans[q])); //复制出边到新节点
                    mx[nq] = mx[p] + 1;
                    link[nq] = link[q];      //nq的后缀链接指向q的后缀连接
                    link[q] = link[np] = nq; //q和np的后缀链接指向nq
                    for (; p && trans[p][c] == q; p = link[p])
                        trans[p][c] = nq; //把路径上原来有转移的q的节点改成指向nq
                }
            }
            last = np; //替换整个母串
            for (; np; np = link[np])
            {
                sit[np].first = min(sit[np].first, _pos);
                sit[np].second = max(sit[np].second, _pos);
            }
        }
        ll build()
        {
            for (int i = 1; i <= cnt; i++)
                vec[i].clear();
            for (int i = 1; i <= cnt; i++)
                vec[i].push_back(link[i]), in[link[i]]++;
            queue<int> q;
            for (int i = 1; i <= cnt; i++)
                if (!in[i])
                    q.push(i);
            while (!q.empty())
            {
                int temp = q.front();
                q.pop();
                for (auto i : vec[temp])
                {
                    val[i] += val[temp];
                    sit[i].first = min(sit[i].first, sit[temp].first);
                    sit[i].second = max(sit[i].second, sit[temp].second);
                    in[i]--;
                    if (!in[i])
                        q.push(i);
                }
            }
            ll res = 0;
            for (int i = 1; i <= cnt; i++)
            {
                if (val[i] >= 2)
                    res += max(0, min(mx[i], sit[i].second - sit[i].first) - mx[link[i]]);
            }
            return res;
        }
    
    } SAM;
    int main()
    {
        string str;
        while (cin >> str && str != "#")
        {
            SAM.init();
            for (int i = 0; i < str.size(); i++)
                SAM.extend(str[i] - 'a', i + 1);
            printf("%lld
    ", SAM.build());
        }
        return 0;
    }
    
  • 相关阅读:
    varnish4 配置文件整理
    简单谈谈数据库DML、DDL和DCL的区别
    使用mysqlbinlog恢复数据
    zabbix自定义监控项
    管理python虚拟环境的工具virtuelenvwrapper
    vim的使用
    python的虚拟环境virtualenv
    编译安装python
    Linux基础(二)
    Linux基础(一)
  • 原文地址:https://www.cnblogs.com/graytido/p/13221573.html
Copyright © 2011-2022 走看看