AC自己主动机
AC自己主动机是KMP和Trie的结合,主要处理多模板串匹配问题。以下推荐一个博客,有助于学习AC自己主动机。
这里另一个Kuangbin开的比赛,大家也能够做一下,加深对算法的理解。
以下是比赛中的题目,採用了notonlysuccess的模板。
HDU 2222 Keywords Search
题意:最裸的模板题,给定一些模板串以及一个文本串,要在文本串中找有多少个模板串。
/* ID: wuqi9395@126.com PROG: LANG: C++ */ #include<map> #include<set> #include<queue> #include<stack> #include<cmath> #include<cstdio> #include<vector> #include<string> #include<fstream> #include<cstring> #include<ctype.h> #include<iostream> #include<algorithm> #define INF (1<<30) #define PI acos(-1.0) #define mem(a, b) memset(a, b, sizeof(a)) #define rep(i, n) for (int i = 0; i < n; i++) #define debug puts("===============") typedef long long ll; using namespace std; const int maxnode = 500100; const int charset = 26; struct ACAutomaton { int ch[maxnode][charset]; int fail[maxnode]; int Q[maxnode]; int val[maxnode]; int sz; int ID[128]; //初始化,计算字母相应的儿子ID,如:'a'->0 ... 'z'->25 void init() { fail[0] = 0; for (int i = 0; i < charset; i++) ID[i + 'a'] = i; } //又一次建树需先Reset void reset() { memset(ch[0], 0, sizeof(ch[0])); sz = 1; } //将权值为key的字符串a插入到trie中 void Insert(char *s, int key) { int u = 0; for ( ; *s; s++) { int c = ID[*s]; if (!ch[u][c]) { memset(ch[sz], 0, sizeof(ch[sz])); val[sz] = 0; ch[u][c] = sz++; } u = ch[u][c]; } val[u] += key; } //建立AC自己主动机,确定每一个节点的权值以及状态转移 void Construct () { int *s = Q, *e = Q; for (int i = 0; i < charset; i++) { if (ch[0][i]) { fail[ch[0][i]] = 0; *e ++ = ch[0][i]; } } while(s != e) { int u = *s++; for (int i = 0; i < charset; i++) { int &v = ch[u][i]; if (ch[u][i]) { *e ++ = v; fail[v] = ch[fail[u]][i]; } else { v = ch[fail[u]][i]; } } } } //最基础的查询,询问一个字符串中出现了多少模板串 int query(char *s) { int ans = 0, u = 0; for ( ; *s; s++) { int c = ID[*s]; u = ch[u][c]; int tmp = u; while(tmp) { ans += val[tmp]; val[tmp] = 0; tmp = fail[tmp]; } } return ans; } }AC; char str[1000100]; int main() { AC.init(); int t, n; scanf("%d", &t); while(t--) { scanf("%d", &n); AC.reset(); for (int i = 0; i < n; i++) { scanf("%s", str); AC.Insert(str, 1); } AC.Construct(); scanf("%s", str); printf("%d ", AC.query(str)); } return 0; }
HDU 2896 病毒侵袭
题意:有N个病毒,M个文本串,问每一个文本串出现了多少个病毒,各自是哪些?一共同拥有多少个文本串出现了病毒?
思路:这道题的病毒能够包括全部可见ASC码值
/* ID: wuqi9395@126.com PROG: LANG: C++ */ #include<map> #include<set> #include<queue> #include<stack> #include<cmath> #include<cstdio> #include<vector> #include<string> #include<fstream> #include<cstring> #include<ctype.h> #include<iostream> #include<algorithm> #define INF (1<<30) #define PI acos(-1.0) #define mem(a, b) memset(a, b, sizeof(a)) #define rep(i, n) for (int i = 0; i < n; i++) #define debug puts("===============") typedef long long ll; using namespace std; const int maxnode = 100100; const int charset = 128; struct ACAutomaton { int ch[maxnode][charset]; int fail[maxnode]; int Q[maxnode]; int val[maxnode]; int sz; int ID[128]; void init() { fail[0] = 0; for (int i = 0; i < charset; i++) ID[i] = i; } void reset() { sz = 1; memset(ch[0], 0, sizeof(ch[0])); } void Insert(char *s, int key) { int u = 0; for ( ; *s; s++) { int c = ID[*s]; if (!ch[u][c]) { memset(ch[sz], 0, sizeof(ch[sz])); val[sz] = 0; ch[u][c] = sz++; } u = ch[u][c]; } val[u] = key; } void Construct () { int *s = Q, *e = Q; for (int i = 0; i < charset; i++) { if (ch[0][i]) { *e++ = ch[0][i]; fail[ch[0][i]] = 0; } } while(s != e) { int u = *s++; for (int i = 0; i < charset; i++) { int &v = ch[u][i]; if (v) { *e++ = v; fail[v] = ch[fail[u]][i]; } else { v = ch[fail[u]][i]; } } } } void query(char *s, int &tot, int id) { int ans = 0, u = 0; set<int> S; set<int>::iterator it; S.clear(); for (; *s; s++) { int c = ID[*s]; u = ch[u][c]; int tmp = u; while(tmp) { if (val[tmp]) S.insert(val[tmp]), ans++; tmp = fail[tmp]; } } if (ans) { printf("web %d:", id); for (it = S.begin(); it != S.end(); it++) printf(" %d", *it); putchar(' '); tot++; } } }AC; char buf[210], str[10100]; int main () { int n, m, tot = 0; scanf("%d", &n); AC.init(); AC.reset(); for (int i = 0; i < n; i++) { scanf("%s", buf); AC.Insert(buf, i + 1); } AC.Construct(); scanf("%d", &m); for (int i = 0; i < m; i++) { scanf("%s", str); AC.query(str, tot, i + 1); } printf("total: %d ", tot); return 0; }
题意:有N个病毒,一个文本串,问文本串中每一个病毒出现了多少次
思路:也是基础的模板,是多case。。
/* ID: wuqi9395@126.com PROG: LANG: C++ */ #include<map> #include<set> #include<queue> #include<stack> #include<cmath> #include<cstdio> #include<vector> #include<string> #include<fstream> #include<cstring> #include<ctype.h> #include<iostream> #include<algorithm> #define INF (1<<30) #define PI acos(-1.0) #define mem(a, b) memset(a, b, sizeof(a)) #define rep(i, n) for (int i = 0; i < n; i++) #define debug puts("===============") typedef long long ll; using namespace std; const int maxnode = 50010; const int charset = 128; int cnt[1100]; struct ACAutomaton { int ch[maxnode][charset]; int fail[maxnode]; int Q[maxnode]; int val[maxnode]; int sz; int ID[128]; void init() { fail[0] = 0; for (int i = 0; i < charset; i++) ID[i] = i; } void reset() { sz = 1; memset(ch[0], 0, sizeof(ch[0])); } void Insert(char *s, int key) { int u = 0; for ( ; *s; s++) { int c = ID[*s]; if (!ch[u][c]) { memset(ch[sz], 0, sizeof(ch[sz])); val[sz] = 0; ch[u][c] = sz++; } u = ch[u][c]; } val[u] = key; } void Construct () { int *s = Q, *e = Q; for (int i = 0; i < charset; i++) { if (ch[0][i]) { *e++ = ch[0][i]; fail[ch[0][i]] = 0; } } while(s != e) { int u = *s++; for (int i = 0; i < charset; i++) { int &v = ch[u][i]; if (v) { *e++ = v; fail[v] = ch[fail[u]][i]; } else { v = ch[fail[u]][i]; } } } } void query(char *s) { int u = 0; for (; *s; s++) { int c = ID[*s]; u = ch[u][c]; int tmp = u; while(tmp) { if (val[tmp]) cnt[val[tmp]]++; tmp = fail[tmp]; } } } } AC; char buf[1100][55], str[2000100]; int main () { int n, m, tot = 0; AC.init(); while(~scanf("%d", &n)) { AC.reset(); for (int i = 0; i < n; i++) { scanf("%s", buf[i]); AC.Insert(buf[i], i + 1); cnt[i + 1] = 0; } AC.Construct(); scanf("%s", str); AC.query(str); for (int i = 1; i <= n; i++) if (cnt[i]) printf("%s: %d ", buf[i - 1], cnt[i]); } return 0; }
ZOJ 3430 Detect the Virus
题意:有一种编码方式,将输进来的字符转化为二进制,然后6个为一组,不足补零,得到一个新的数字,每一个数字相应一个字符(见题面)。如今给你已经编码过的n个病毒,和m个编码过的文本串,问每一个文本串各包括多少种病毒。
思路:这里反编码的时候,会发现可能有256种状态,所以不能用字符串表示。反编码之后就是裸的AC自己主动机。
/* ID: wuqi9395@126.com PROG: LANG: C++ */ #include<map> #include<set> #include<queue> #include<stack> #include<cmath> #include<cstdio> #include<vector> #include<string> #include<fstream> #include<cstring> #include<ctype.h> #include<iostream> #include<algorithm> #define INF (1<<30) #define PI acos(-1.0) #define mem(a, b) memset(a, b, sizeof(a)) #define rep(i, n) for (int i = 0; i < n; i++) #define debug puts("===============") typedef long long ll; using namespace std; const int maxnode = 510 * 64; const int charset = 256; struct ACAutomaton { int ch[maxnode][charset]; int fail[maxnode]; int Q[maxnode]; int val[maxnode]; int sz; int ID[256]; void init() { fail[0] = 0; //for (int i = 0; i < charset; i++) ID[i] = i; } void reset() { sz = 1; memset(ch[0], 0, sizeof(ch[0])); } void Insert(unsigned char s[], int key, int len) { int u = 0; for (int i = 0; i < len; i++) { int c = s[i]; if (!ch[u][c]) { memset(ch[sz], 0, sizeof(ch[sz])); val[sz] = 0; ch[u][c] = sz++; } u = ch[u][c]; } val[u] = key; } void Construct () { int *s = Q, *e = Q; for (int i = 0; i < charset; i++) { if (ch[0][i]) { *e++ = ch[0][i]; fail[ch[0][i]] = 0; } } while(s != e) { int u = *s++; for (int i = 0; i < charset; i++) { int &v = ch[u][i]; if (v) { *e++ = v; fail[v] = ch[fail[u]][i]; } else { v = ch[fail[u]][i]; } } } } void query(unsigned char s[], int len) { int u = 0, ans = 0; bool vis[520] = {0}; for (int i = 0; i < len; i++) { int c = s[i]; u = ch[u][c]; int tmp = u; while(tmp) { if (val[tmp] && !vis[val[tmp]]) { ans++, vis[val[tmp]] = 1; } tmp = fail[tmp]; } } printf("%d ", ans); } } AC; char s[4000]; unsigned char g[4000]; unsigned char now[4000]; void get(char *s, int len) { for (int i = 0; i < len; i++) { if (s[i] >= 'A' && s[i] <= 'Z') g[i] = s[i] - 'A'; else if (s[i] >= 'a' && s[i] <= 'z') g[i] = s[i] - 'a' + 26; else if (s[i] >= '0' && s[i] <= '9') g[i] = s[i] - '0' + 52; else if (s[i] == '+') g[i] = 62; else g[i] = 63; } g[len] = 0; } int change(unsigned char g[], int len) { int cnt = 0; for (int i = 0; i < len; i += 4) { now[cnt++] = (g[i] << 2) | (g[i + 1] >> 4); if (i + 2 < len) now[cnt++] = (g[i + 1] << 4) | (g[i + 2] >> 2); if (i + 3 < len) now[cnt++] = (g[i + 2] << 6) | g[i + 3]; } return cnt; } int main () { int n, m; AC.init(); while(~scanf("%d", &n)) { AC.reset(); for (int i = 0; i < n; i++) { scanf("%s", s); int len = strlen(s); while(s[len - 1] == '=') len--; get(s, len); int cnt = change(g, len); AC.Insert(now, i + 1, cnt); } AC.Construct(); scanf("%d", &m); while(m--) { scanf("%s", s); int len = strlen(s); while(s[len - 1] == '=') len--; get(s, len); int cnt = change(g, len); AC.query(now, cnt); } putchar(' '); } return 0; }
POJ 2778 DNA Sequence
题意:DNA的序列由ACTG四个字母组成,如今给定m个不可行的序列。问随机构成的长度为n的序列中,有多少种序列是可行的(仅仅要包括一个不可行序列便不可行)。个数非常大,对100000取模。 思路:AC自己主动机 + DP 解题报告
HDU 2243 考研路茫茫――单词情结
题意:给定一些词根,假设一个单词包括有词根,则觉得是有效的。如今问长度不超过L的单词里面,有多少有效的单词?
ZOJ 2619 Generator
题意:给定一个数N,代表能够选前N个字母。然后给定一个仅有前N个字母组成的字符串,问从空串開始构造,每次能够在已有基础上从前N个字母中挑选一个加在后面,问构造的字符串的长度期望是多少?
持续更新中