zoukankan      html  css  js  c++  java
  • AC自己主动机

    AC自己主动机

    AC自己主动机是KMP和Trie的结合,主要处理多模板串匹配问题。以下推荐一个博客,有助于学习AC自己主动机。

    这里另一个Kuangbin开的比赛,大家也能够做一下,加深对算法的理解。

    以下是比赛中的题目,採用了notonlysuccess的模板。
    HDU 2222 Keywords Search
    题意:最裸的模板题,给定一些模板串以及一个文本串,要在文本串中找有多少个模板串。

    /*
    ID: wuqi9395@126.com
    PROG:
    LANG: C++
    */
    #include<map>
    #include<set>
    #include<queue>
    #include<stack>
    #include<cmath>
    #include<cstdio>
    #include<vector>
    #include<string>
    #include<fstream>
    #include<cstring>
    #include<ctype.h>
    #include<iostream>
    #include<algorithm>
    #define INF (1<<30)
    #define PI acos(-1.0)
    #define mem(a, b) memset(a, b, sizeof(a))
    #define rep(i, n) for (int i = 0; i < n; i++)
    #define debug puts("===============")
    typedef long long ll;
    using namespace std;
    const int maxnode = 500100;
    const int charset = 26;
    struct ACAutomaton {
        int ch[maxnode][charset];
        int fail[maxnode];
        int Q[maxnode];
        int val[maxnode];
        int sz;
        int ID[128];
    
        //初始化,计算字母相应的儿子ID,如:'a'->0 ... 'z'->25
        void init() {
            fail[0] = 0;
            for (int i = 0; i < charset; i++) ID[i + 'a'] = i;
        }
        //又一次建树需先Reset
        void reset() {
            memset(ch[0], 0, sizeof(ch[0]));
            sz = 1;
        }
        //将权值为key的字符串a插入到trie中
        void Insert(char *s, int key) {
            int u = 0;
            for ( ; *s; s++) {
                int c = ID[*s];
                if (!ch[u][c]) {
                    memset(ch[sz], 0, sizeof(ch[sz]));
                    val[sz] = 0;
                    ch[u][c] = sz++;
                }
                u = ch[u][c];
            }
            val[u] += key;
        }
        //建立AC自己主动机,确定每一个节点的权值以及状态转移
        void Construct () {
            int *s = Q, *e = Q;
            for (int i = 0; i < charset; i++) {
                if (ch[0][i]) {
                    fail[ch[0][i]] = 0;
                    *e ++ = ch[0][i];
                }
            }
            while(s != e) {
                int u = *s++;
                for (int i = 0; i < charset; i++) {
                    int &v = ch[u][i];
                    if (ch[u][i]) {
                        *e ++ = v;
                        fail[v] = ch[fail[u]][i];
                    } else {
                        v = ch[fail[u]][i];
                    }
                }
            }
        }
        //最基础的查询,询问一个字符串中出现了多少模板串
        int query(char *s) {
            int ans = 0, u = 0;
            for ( ; *s; s++) {
                int c = ID[*s];
                u = ch[u][c];
                int tmp = u;
                while(tmp) {
                    ans += val[tmp];
                    val[tmp] = 0;
                    tmp = fail[tmp];
                }
            }
            return ans;
        }
    }AC;
    char str[1000100];
    int main() {
        AC.init();
        int t, n;
        scanf("%d", &t);
        while(t--) {
            scanf("%d", &n);
            AC.reset();
            for (int i = 0; i < n; i++) {
                scanf("%s", str);
                AC.Insert(str, 1);
            }
            AC.Construct();
            scanf("%s", str);
            printf("%d
    ", AC.query(str));
        }
        return 0;
    }
    
    

    HDU 2896 病毒侵袭
    题意:有N个病毒,M个文本串,问每一个文本串出现了多少个病毒,各自是哪些?一共同拥有多少个文本串出现了病毒?
    思路:这道题的病毒能够包括全部可见ASC码值

    /*
    ID: wuqi9395@126.com
    PROG:
    LANG: C++
    */
    #include<map>
    #include<set>
    #include<queue>
    #include<stack>
    #include<cmath>
    #include<cstdio>
    #include<vector>
    #include<string>
    #include<fstream>
    #include<cstring>
    #include<ctype.h>
    #include<iostream>
    #include<algorithm>
    #define INF (1<<30)
    #define PI acos(-1.0)
    #define mem(a, b) memset(a, b, sizeof(a))
    #define rep(i, n) for (int i = 0; i < n; i++)
    #define debug puts("===============")
    typedef long long ll;
    using namespace std;
    const int maxnode = 100100;
    const int charset = 128;
    struct ACAutomaton {
        int ch[maxnode][charset];
        int fail[maxnode];
        int Q[maxnode];
        int val[maxnode];
        int sz;
        int ID[128];
        void init() {
            fail[0] = 0;
            for (int i = 0; i < charset; i++) ID[i] = i;
        }
        void reset() {
            sz = 1;
            memset(ch[0], 0, sizeof(ch[0]));
        }
        void Insert(char *s, int key) {
            int u = 0;
            for ( ; *s; s++) {
                int c = ID[*s];
                if (!ch[u][c]) {
                    memset(ch[sz], 0, sizeof(ch[sz]));
                    val[sz] = 0;
                    ch[u][c] = sz++;
                }
                u = ch[u][c];
            }
            val[u] = key;
        }
        void Construct () {
            int *s = Q, *e = Q;
            for (int i = 0; i < charset; i++) {
                if (ch[0][i]) {
                    *e++ = ch[0][i];
                    fail[ch[0][i]] = 0;
                }
            }
            while(s != e) {
                int u = *s++;
                for (int i = 0; i < charset; i++) {
                    int &v = ch[u][i];
                    if (v) {
                        *e++ = v;
                        fail[v] = ch[fail[u]][i];
                    } else {
                        v = ch[fail[u]][i];
                    }
                }
            }
        }
        void query(char *s, int &tot, int id) {
            int ans = 0, u = 0;
            set<int> S;
            set<int>::iterator it;
            S.clear();
            for (; *s; s++) {
                int c = ID[*s];
                u = ch[u][c];
                int tmp = u;
                while(tmp) {
                    if (val[tmp]) S.insert(val[tmp]), ans++;
                    tmp = fail[tmp];
                }
            }
            if (ans) {
                printf("web %d:", id);
                for (it = S.begin(); it != S.end(); it++) printf(" %d", *it);
                putchar('
    ');
                tot++;
            }
        }
    }AC;
    char buf[210], str[10100];
    int main () {
        int n, m, tot = 0;
        scanf("%d", &n);
        AC.init();
        AC.reset();
        for (int i = 0; i < n; i++) {
            scanf("%s", buf);
            AC.Insert(buf, i + 1);
        }
        AC.Construct();
        scanf("%d", &m);
        for (int i = 0; i < m; i++) {
            scanf("%s", str);
            AC.query(str, tot, i + 1);
        }
        printf("total: %d
    ", tot);
        return 0;
    }
    

    HDU 3065 病毒侵袭持续中
    题意:有N个病毒,一个文本串,问文本串中每一个病毒出现了多少次
    思路:也是基础的模板,是多case。。

    /*
    ID: wuqi9395@126.com
    PROG:
    LANG: C++
    */
    #include<map>
    #include<set>
    #include<queue>
    #include<stack>
    #include<cmath>
    #include<cstdio>
    #include<vector>
    #include<string>
    #include<fstream>
    #include<cstring>
    #include<ctype.h>
    #include<iostream>
    #include<algorithm>
    #define INF (1<<30)
    #define PI acos(-1.0)
    #define mem(a, b) memset(a, b, sizeof(a))
    #define rep(i, n) for (int i = 0; i < n; i++)
    #define debug puts("===============")
    typedef long long ll;
    using namespace std;
    const int maxnode = 50010;
    const int charset = 128;
    int cnt[1100];
    struct ACAutomaton {
        int ch[maxnode][charset];
        int fail[maxnode];
        int Q[maxnode];
        int val[maxnode];
        int sz;
        int ID[128];
        void init() {
            fail[0] = 0;
            for (int i = 0; i < charset; i++) ID[i] = i;
        }
        void reset() {
            sz = 1;
            memset(ch[0], 0, sizeof(ch[0]));
        }
        void Insert(char *s, int key) {
            int u = 0;
            for ( ; *s; s++) {
                int c = ID[*s];
                if (!ch[u][c]) {
                    memset(ch[sz], 0, sizeof(ch[sz]));
                    val[sz] = 0;
                    ch[u][c] = sz++;
                }
                u = ch[u][c];
            }
            val[u] = key;
        }
        void Construct () {
            int *s = Q, *e = Q;
            for (int i = 0; i < charset; i++) {
                if (ch[0][i]) {
                    *e++ = ch[0][i];
                    fail[ch[0][i]] = 0;
                }
            }
            while(s != e) {
                int u = *s++;
                for (int i = 0; i < charset; i++) {
                    int &v = ch[u][i];
                    if (v) {
                        *e++ = v;
                        fail[v] = ch[fail[u]][i];
                    } else {
                        v = ch[fail[u]][i];
                    }
                }
            }
        }
        void query(char *s) {
            int u = 0;
            for (; *s; s++) {
                int c = ID[*s];
                u = ch[u][c];
                int tmp = u;
                while(tmp) {
                    if (val[tmp]) cnt[val[tmp]]++;
                    tmp = fail[tmp];
                }
            }
        }
    } AC;
    char buf[1100][55], str[2000100];
    int main () {
        int n, m, tot = 0;
        AC.init();
        while(~scanf("%d", &n)) {
            AC.reset();
            for (int i = 0; i < n; i++) {
                scanf("%s", buf[i]);
                AC.Insert(buf[i], i + 1);
                cnt[i + 1] = 0;
            }
            AC.Construct();
            scanf("%s", str);
            AC.query(str);
            for (int i = 1; i <= n; i++) if (cnt[i]) printf("%s: %d
    ", buf[i - 1], cnt[i]);
        }
        return 0;
    }
    

    ZOJ 3430 Detect the Virus
    题意:有一种编码方式,将输进来的字符转化为二进制,然后6个为一组,不足补零,得到一个新的数字,每一个数字相应一个字符(见题面)。如今给你已经编码过的n个病毒,和m个编码过的文本串,问每一个文本串各包括多少种病毒。
    思路:这里反编码的时候,会发现可能有256种状态,所以不能用字符串表示。反编码之后就是裸的AC自己主动机。
    /*
    ID: wuqi9395@126.com
    PROG:
    LANG: C++
    */
    #include<map>
    #include<set>
    #include<queue>
    #include<stack>
    #include<cmath>
    #include<cstdio>
    #include<vector>
    #include<string>
    #include<fstream>
    #include<cstring>
    #include<ctype.h>
    #include<iostream>
    #include<algorithm>
    #define INF (1<<30)
    #define PI acos(-1.0)
    #define mem(a, b) memset(a, b, sizeof(a))
    #define rep(i, n) for (int i = 0; i < n; i++)
    #define debug puts("===============")
    typedef long long ll;
    using namespace std;
    const int maxnode = 510 * 64;
    const int charset = 256;
    struct ACAutomaton {
        int ch[maxnode][charset];
        int fail[maxnode];
        int Q[maxnode];
        int val[maxnode];
        int sz;
        int ID[256];
        void init() {
            fail[0] = 0;
            //for (int i = 0; i < charset; i++) ID[i] = i;
        }
        void reset() {
            sz = 1;
            memset(ch[0], 0, sizeof(ch[0]));
        }
        void Insert(unsigned char s[], int key, int len) {
            int u = 0;
            for (int i = 0; i < len; i++) {
                int c = s[i];
                if (!ch[u][c]) {
                    memset(ch[sz], 0, sizeof(ch[sz]));
                    val[sz] = 0;
                    ch[u][c] = sz++;
                }
                u = ch[u][c];
            }
            val[u] = key;
        }
        void Construct () {
            int *s = Q, *e = Q;
            for (int i = 0; i < charset; i++) {
                if (ch[0][i]) {
                    *e++ = ch[0][i];
                    fail[ch[0][i]] = 0;
                }
            }
            while(s != e) {
                int u = *s++;
                for (int i = 0; i < charset; i++) {
                    int &v = ch[u][i];
                    if (v) {
                        *e++ = v;
                        fail[v] = ch[fail[u]][i];
                    } else {
                        v = ch[fail[u]][i];
                    }
                }
            }
        }
        void query(unsigned char s[], int len) {
            int u = 0, ans = 0;
            bool vis[520] = {0};
            for (int i = 0; i < len; i++) {
                int c = s[i];
                u = ch[u][c];
                int tmp = u;
                while(tmp) {
                    if (val[tmp] && !vis[val[tmp]]) {
                        ans++, vis[val[tmp]] = 1;
                    }
                    tmp = fail[tmp];
                }
            }
            printf("%d
    ", ans);
        }
    } AC;
    char s[4000];
    unsigned char g[4000];
    unsigned char now[4000];
    void get(char *s, int len) {
        for (int i = 0; i < len; i++) {
            if (s[i] >= 'A' && s[i] <= 'Z') g[i] = s[i] - 'A';
            else if (s[i] >= 'a' && s[i] <= 'z') g[i] = s[i] - 'a' + 26;
            else if (s[i] >= '0' && s[i] <= '9') g[i] = s[i] - '0' + 52;
            else if (s[i] == '+') g[i] = 62;
            else g[i] = 63;
        }
        g[len] = 0;
    }
    int change(unsigned char g[], int len) {
        int cnt = 0;
        for (int i = 0; i < len; i += 4) {
            now[cnt++] = (g[i] << 2) | (g[i + 1] >> 4);
            if (i + 2 < len) now[cnt++] = (g[i + 1] << 4) | (g[i + 2] >> 2);
            if (i + 3 < len) now[cnt++] = (g[i + 2] << 6) | g[i + 3];
        }
        return cnt;
    }
    int main () {
        int n, m;
        AC.init();
        while(~scanf("%d", &n)) {
            AC.reset();
            for (int i = 0; i < n; i++) {
                scanf("%s", s);
                int len = strlen(s);
                while(s[len - 1] == '=') len--;
                get(s, len);
                int cnt = change(g, len);
                AC.Insert(now, i + 1, cnt);
            }
            AC.Construct();
            scanf("%d", &m);
            while(m--) {
                scanf("%s", s);
                int len = strlen(s);
                while(s[len - 1] == '=') len--;
                get(s, len);
                int cnt = change(g, len);
                AC.query(now, cnt);
            }
            putchar('
    ');
        }
        return 0;
    }
    

    POJ 2778 DNA Sequence
    题意:DNA的序列由ACTG四个字母组成,如今给定m个不可行的序列。问随机构成的长度为n的序列中,有多少种序列是可行的(仅仅要包括一个不可行序列便不可行)。个数非常大,对100000取模。 
    思路:AC自己主动机 + DP 解题报告

    HDU 2243 考研路茫茫――单词情结
    题意:给定一些词根,假设一个单词包括有词根,则觉得是有效的。如今问长度不超过L的单词里面,有多少有效的单词?

    ZOJ 2619 Generator
    题意:给定一个数N,代表能够选前N个字母。然后给定一个仅有前N个字母组成的字符串,问从空串開始构造,每次能够在已有基础上从前N个字母中挑选一个加在后面,问构造的字符串的长度期望是多少?


    持续更新中

  • 相关阅读:
    python读取csv文件、excel文件并封装成dict类型的list,直接看代码
    利用Python获取cookie的方法,相比java代码简便不少
    关于appium操作真机打开app之后无法定位页面元素的问题的解决办法
    关于做移动端ui自动化测试使用PC代理网络会出现的问题
    接口测试面试问题总结-转载
    接口测试3-参数关联接口(从上一个接口中获取数据,访问幼儿园服务器接口无session)
    接口测试2-接口测试 get post请求
    HTTP协议
    接口测试1-概论
    python视频学习笔记8(函数返回值和参数进阶)
  • 原文地址:https://www.cnblogs.com/hrhguanli/p/4067205.html
Copyright © 2011-2022 走看看