zoukankan      html  css  js  c++  java
  • 暑假集训 || AC自动机

    HDU 2222

    题意:给n个模式串和一个字符串,求有多少个模式串在这个字符串中出现

    思路:裸题,注意数组开的大小

    #include <iostream>
    #include <cstdio>
    #include <cstdlib>
    #include <cmath>
    #include <cstring>
    #include <algorithm>
    #include <queue>
    using namespace std;
    typedef long long LL;
    const int SZ = 500100;
    char keystr[55];
    int tot;
    int fail[SZ], ch[SZ][33], sum[SZ];
    char str[1000005];
    void insert(char s[])
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = s[i] - 'a';
            if(!ch[p][c]) ch[p][c] = ++tot;
            p = ch[p][c];
        }
        sum[p]++;
    }
    queue<int> q;
    void getfail()
    {
        fail[0] = 0;
        q.push(0);
        while(q.size())
        {
            int u = q.front();
            q.pop();
            for(int i = 0; i < 26; i++)
            {
                if(ch[u][i])
                {
                    if(u == 0) fail[ch[u][i]] = 0;
                    else
                    {
                        int v = fail[u];
                        while(v && ch[v][i] == 0) v = fail[v];
                        fail[ch[u][i]] = ch[v][i];
                    }
                    q.push(ch[u][i]);
                }
                else ch[u][i] = ch[fail[u]][i];
            }
        }
    }
    
    int match(char s[])
    {
        int ans = 0, p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = s[i] - 'a';
            while(p && ch[p][c] == 0) p = fail[p];
            p = ch[p][c];
            int v = p;
            while(sum[v] > 0)
            {
                ans += sum[v];
                sum[v] = 0;
                v = fail[v];
            }
        }
        return ans;
    }
    void init()
    {
        tot = 0;
        memset(sum, 0, sizeof(sum));
        memset(ch, 0, sizeof(ch));
        memset(fail, 0, sizeof(fail));
    }
    int main()
    {
        int T;
        scanf("%d", &T);
        while(T--)
        {
            int n;
            scanf("%d", &n);
            init();
            for(int i = 0; i < n; i++)
            {
                scanf("%s", keystr);
                insert(keystr);
            }
            getfail();
            scanf("%s", str);
            printf("%d
    ", match(str));
        }
        return 0;
    }
    View Code

    HDU 3065

    题意:给n个模式串,一个字符串,输出在这个字符串中出现的模式串的出现次数

    思路:裸题,记录idx[p] = id;//以p结尾的是第id个模式串

    #include <iostream>
    #include <cstdio>
    #include <cstdlib>
    #include <cmath>
    #include <cstring>
    #include <algorithm>
    #include <queue>
    using namespace std;
    typedef long long LL;
    typedef long double LD;
    const int SZ = 100100;
    char keystr[1010][55];
    int tot;
    int fail[SZ];
    int ch[SZ][30], idx[SZ], cnt[SZ];
    char str[2000005];
    void insert(char s[], int id)
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = s[i] - 'A';
            if(!ch[p][c]) ch[p][c] = ++tot;
            p = ch[p][c];
        }
        idx[p] = id;//以p结尾的是第id个模式串
    }
    queue<int> q;
    void getfail()
    {
        fail[0] = 0;
        q.push(0);
        while(q.size())
        {
            int u = q.front();
            q.pop();
            for(int i = 0; i < 26; i++)
            {
                if(ch[u][i])
                {
                    if(u == 0) fail[ch[u][i]] = 0;
                    else
                    {
                        int v = fail[u];
                        while(v && ch[v][i] == 0) v = fail[v];
                        fail[ch[u][i]] = ch[v][i];
                    }
                    q.push(ch[u][i]);
                }
                else ch[u][i] = ch[fail[u]][i];
            }
        }
    }
    
    void match(char s[])
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = s[i] - 'A';
            if(s[i] < 'A' || s[i] > 'Z') c = 26;
            while(p && ch[p][c] == 0) p = fail[p];
            p = ch[p][c];
            int v = p;
            while(v)
            {
                if(idx[v] > 0) cnt[idx[v]]++;
                v = fail[v];
            }
        }
    }
    
    void init()
    {
        tot = 0;
        memset(cnt, 0, sizeof(cnt));
        memset(ch, 0, sizeof(ch));
        memset(fail, 0, sizeof(fail));
        memset(idx, 0, sizeof(idx));
    }
    int main()
    {
        int n;
        while(~scanf("%d", &n))
        {
            init();
            for(int i = 1; i <= n; i++)
            {
                scanf("%s", keystr[i]);
                insert(keystr[i], i);
            }
            getfail();
            scanf("%s", str);
            match(str);
            for(int i = 1; i <= n; i++)
                if(cnt[i]) printf("%s: %d
    ", keystr[i], cnt[i]);
        }
        return 0;
    }
    View Code

    ZOJ 3228

    题意:给n个模式串,一个字符串,分别求每个模式串在字符串中出现的次数,其中输入中0表示可以覆盖着出现,1表示不能

    思路:如果没有1那种情况就是裸题,1的情况下

    用last[i]记录Trie节点i在上一次匹配时所对应的字符在文本串中的位置。

    用pos[i]记录Trie节点i所对应的字符在模式串中的位置。

    没有重叠的判断 —— 当前字符位置 - last[当前节点] >= pos[当前节点]。

    abababac - aba

    pos[1] = 1  pos[2] = 2  pos[3] = 3

    i = 4时,last[3] = 2 pos[3] = 3 而i - last[3] < pos[3] 所以不行

    3再往前到1,last[1] = 2 pos[1] = 1 可以了,然后last[1] = 4,节点1匹配到了文本串中的第4位

    太高端了。。。。

    卡数组大小,记得开n*len的

    #include <iostream>
    #include <cstdio>
    #include <cstdlib>
    #include <cmath>
    #include <cstring>
    #include <algorithm>
    #include <queue>
    using namespace std;
    typedef long long LL;
    typedef long double LD;
    const int SZ = 600100;
    char keystr[10];
    int tot;
    int fail[SZ];
    int ch[SZ][30], idx[SZ], cnt[2][SZ];
    int last[SZ], pos[SZ], typ[SZ];
    char str[100005];
    void insert(char s[], int id)
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = s[i] - 'a';
            if(!ch[p][c]) ch[p][c] = ++tot;
            p = ch[p][c];
            pos[p] = i+1;
        }
        idx[id] = p;
    }
    queue<int> q;
    void getfail()
    {
        fail[0] = 0;
        q.push(0);
        while(q.size())
        {
            int u = q.front();
            q.pop();
            for(int i = 0; i < 26; i++)
            {
                if(ch[u][i])
                {
                    if(u == 0) fail[ch[u][i]] = 0;
                    else
                    {
                        int v = fail[u];
                        while(v && ch[v][i] == 0) v = fail[v];
                        fail[ch[u][i]] = ch[v][i];
                    }
                    q.push(ch[u][i]);
                }
                else ch[u][i] = ch[fail[u]][i];
            }
        }
    }
    
    void match(char s[])
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = s[i] - 'a';
            p = ch[p][c];
            int v = p;
            while(v)
            {
                cnt[0][v]++;
                if(i - last[v] >= pos[v])
                {
                    cnt[1][v]++;
                    last[v] = i;
                }
                v = fail[v];
            }
        }
    }
    
    void init()
    {
        tot = 0;
        memset(cnt, 0, sizeof(cnt));
        memset(ch, 0, sizeof(ch));
        memset(last, -1, sizeof(last));
        memset(pos, 0, sizeof(pos));
    }
    int main()
    {
        int n, tt = 0;
        while(~scanf("%s", str))
        {
            init();
            scanf("%d", &n);
            for(int i = 0; i < n; i++)
            {
                scanf("%d %s", &typ[i], keystr);
                insert(keystr, i);
            }
            getfail();
            match(str);
            printf("Case %d
    ", ++tt);
            for(int i = 0; i < n; i++)
                printf("%d
    ", cnt[typ[i]][idx[i]]);
            printf("
    ");
        }
        return 0;
    }
    View Code

    HDU 2457 AC自动机+DP

    题意:给n种病毒序列(只含ACGT),给一串基因,为最少修改多少个碱基(?)能使得其中不含病毒,如果不能做到则输出-1

    思路:考虑神仙DP

    f[i][j] 表示文本串的前 i 个字符,处于 Trie 图的节点 j 且不 经过终止节点(危险节点)的最少修改个数

    每个点的是否是终止节点:如果它或它顺着 fail 函数能达到 的点中有一个是终止节点,那么它也是终止节点

    这个讲的蛮明白的:https://blog.csdn.net/human_ck/article/details/6577142

    转移方程:dp[i+1][ch[p][j]] = min(dp[i+1][ch[p][j]], dp[i][p] + (j == c ? 0 : 1)); //j表示枚举把这个非危险节点的点修改成什么,如果和原来相同则操作数不变,如果不同则+1

    #include <iostream>
    #include <cstdio>
    #include <cstdlib>
    #include <cmath>
    #include <cstring>
    #include <algorithm>
    #include <queue>
    using namespace std;
    typedef long long LL;
    const int SZ = 1010;
    const int INF = 1000000100;
    char keystr[22];
    int tot;
    int fail[SZ];
    int ch[SZ][6], dp[1010][SZ];
    char str[1005];
    bool flag[SZ];
    int tran(char c)
    {
        if(c == 'A') return 1;
        if(c == 'G') return 2;
        if(c == 'C') return 3;
        if(c == 'T') return 4;
    }
    void insert(char s[])
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = tran(s[i]);
            if(!ch[p][c]) ch[p][c] = ++tot;
            p = ch[p][c];
        }
        flag[p] = true;
    }
    queue<int> q;
    void getfail()
    {
        fail[0] = 0;
        q.push(0);
        while(q.size())
        {
            int u = q.front();
            q.pop();
            for(int i = 1; i <= 4; i++)
            {
                if(ch[u][i])
                {
                    if(u == 0) fail[ch[u][i]] = 0;
                    else
                    {
                        int v = fail[u];
                        while(v && ch[v][i] == 0) v = fail[v];
                        fail[ch[u][i]] = ch[v][i];
                    }
                    if(flag[fail[ch[u][i]]]) flag[ch[u][i]] = true;
                    q.push(ch[u][i]);
                }
                else ch[u][i] = ch[fail[u]][i];
            }
        }
    }
    
    void match(char s[])
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i <= len; i++)
            for(int j = 0; j <= tot; j++)
                dp[i][j] = INF;
        dp[0][0] = 0;
        for(int i = 0; i < len; i++)
        {
            int c = tran(s[i]);
            for(p = 0; p <= tot; p++)
            {
                if(dp[i][p] == INF) continue;
                for(int j = 1; j <= 4; j++)
                {
                    if(flag[ch[p][j]]) continue;
                    dp[i+1][ch[p][j]] =  min(dp[i+1][ch[p][j]], dp[i][p] + (j == c ? 0 : 1));
                }
            }
        }
    }
    
    void init()
    {
        tot = 0;
        memset(ch, 0, sizeof(ch));
        memset(fail, 0, sizeof(fail));
        memset(flag, false, sizeof(flag));
    }
    int main()
    {
        int n, tt = 0;
        while(scanf("%d", &n) && n)
        {
            init();
            for(int i = 1; i <= n; i++)
            {
                scanf("%s", keystr);
                insert(keystr);
            }
            getfail();
            scanf("%s", str);
            int len = strlen(str);
            match(str);
            int minn = INF;
            for(int i = 0; i <= tot; i++)
                if(!flag[i]) minn = min(minn, dp[len][i]);
            if(minn == INF) minn = -1;
            printf("Case %d: %d
    ", ++tt, minn);
        }
        return 0;
    }
    View Code

    OTZ

  • 相关阅读:
    Unity 3(一):简介与示例
    MongoDB以Windows Service运行
    动态SQL中变量赋值
    网站发布IIS后堆栈追踪无法获取出错的行号
    GridView Postback后出错Operation is not valid due to the current state of the object.
    Visual Studio 2010 SP1 在线安装后,找到缓存在本地的临时文件以便下次离线安装
    SQL Server 问题之 排序规则(collation)冲突
    IIS 问题集锦
    linux下安装mysql(ubuntu0.16.04.1)
    apt-get update 系列作用
  • 原文地址:https://www.cnblogs.com/pinkglightning/p/9550772.html
Copyright © 2011-2022 走看看