zoukankan      html  css  js  c++  java
  • AC自动机学习笔记

    建议先学习 KMP, 阅读本篇前请至少先阅读 KMP学习笔记 的前言


    \[Upd \ in \ 2019.10.10 \]

    增加一些新的理解,顺便把博客搬过来。

    AC 自动机的 nxt 与 KMP 的并不一样,前者是指向一个与自己等价的地方,而 KMP 则是指向 与前一个等价的地方的后一个 ,所以才是失配后去找的地方。

    还有注意只有 $ ch $ 需要开满,其他的数组只用开到 数量 * 长度 即可。


    #include <iostream>
    #include <cstdio>
    #include <cstring>
    #include <queue>
    #define reg register
    #define ll long long
    using namespace std;
    template <typename _Typer>
    inline _Typer read() {
    	reg char ch = getchar();
    	reg _Typer init = 0, base = 1;
    	while ((ch < '0' || ch > '9') && ch != '-') ch = getchar();
    	if (ch == '-') base = -1;
    	else init = ch - '0';
    	ch = getchar();
    	while (ch >= '0' && ch <= '9')
    		init = (init<<3) + (init<<1) + ch - '0', ch = getchar();
    	return init * base;
    }
    const ll N = 1000005, INF = 1e9;
    const int Sigma_Size = 26;
    const char Cmod = 'a';
    
    int ch[N][Sigma_Size];
    int val[N], sz;
    int ans;
    struct Trie {
    	Trie () { sz = 1; /*memset(ch, 0, sizeof(ch)); memset(val, 0, sizeof(val));*/}
    	int idx(char c) { return c - Cmod;}
    	void insert(char *s, int v) {
    		int u = 0, len = strlen(s);
    		for (reg int i = 0; i < len; ++i) {
    			int c = idx(s[i]);
    			if (!ch[u][c]) {
    //				memset(ch[sz], 0, sizeof(ch[sz]));
    				val[sz] = 0;
    				ch[u][c] = sz;
    				++sz;
    			}
    			u = ch[u][c];
    		}
    		val[u] += v;
    	}
    	int query(char *s) {
    		int u = 0, len = strlen(s);
    		for (reg int i = 0; i < len; ++i) {
    			int c = idx(s[i]);
    			if (!ch[u][c]) return -1;
    			u = ch[u][c];
    		}
    		return val[u];
    	}
    }trie;
    
    int nex[N*Sigma_Size], lst[N*Sigma_Size];
    // nex[x]=y, only when char[x]=char[y] and 0...y=somewhere...x
    inline void getfail() {
    	queue <int> q;
    	nex[0] = 0;
    	for (reg int i = 0; i < Sigma_Size; ++i) {
    		int u = ch[0][i];
    		if (u) { nex[u] = 0; q.push(u); lst[u] = 0;}
    	}
    	while (!q.empty()) {
    		int r = q.front(); q.pop();
    		for (reg int c = 0; c < Sigma_Size; ++c) {
    			int u = ch[r][c];
    			// when r,i fail -> nex[r],i -> 0-nex[r]=x-r -> r=nex[r]
    			// because haven't ch[r][c](u) -> r.c=nex[r].c (@1)
    			if (!u) { ch[r][c] = ch[nex[r]][c]; continue;}
    			q.push(u);
    			int v = nex[r];
    			// 1.ch[v][c]>0 -> now.c=v.c -> nex[u(ch[r][c])]=sz(v.c)=ch[v][c]
    			// 2.v=0 -> no one same of now(r.c) -> now.c=0 -> nex[u]=0(ch[v][c])
    			nex[u] = ch[v][c];
    			// 1.nex[u].val>0 -> nex[u] is a word
    			// 2.nex[u].val=0 -> nex[u] isn't a word ->
    			//   1).lst[nex[u]].val>0 -> lst[nex[u]] is a word
    			//   2).lst[nex[u]].val=0 -> it's not a word ->
    			//		.
    			//		.
    			//		.
    			// so, lst[x] mean the lastest word
    			// (if lst[x] is 0, the reason is that no word is finish with x(lst[x])) (@2)
    			lst[u] = val[nex[u]] ? nex[u] : lst[nex[u]];
    		}
    	}
    }
    inline void update(int j) {
    	for ( ; j && val[j]; j = lst[j]) ans += val[j], val[j] = 0;
    }
    inline void find(char *T) {
    	getfail();
    	int j = 0, len = strlen(T);
    	for (reg int i = 0; i < len; ++i) {
    		int c = trie.idx(T[i]);
    		// try to compare T[i](c),ch[j][c]
    		// if j have son is c -> some word's P[j].next is c
    		// else -> no word's P[j].next is c -> fail -> compare T[i],P[nex[j]]
    		// because j haven't son is c, ch[j][c](first) is 0 -> @1 -> ch[j][c]=ch[nex[j][c]
    		j = ch[j][c];
    		// if val[j]>0 a word have compared successful -> update
    		if (val[j]) update(j);
    		// else may be when we compare in now, some word have compared successful
    		// but in this node, the val is 0
    		// and now, if lst[j]>0, it means some word is finish with j(ch[pre[j]][c(T[i])])
    		// (because @2)
    		else if (lst[j]) update(lst[j]);
    	}
    }
    
    char Ts[N];
    inline void scan();
    inline int _main() {
    	scan();
    	find(Ts);
    	printf("%d\n", ans);
    	return 0;
    }
    
    int main() {
    	#define offline1
    	#ifdef offline
    	freopen("Aho-Corasick.in", "r", stdin);
    	freopen("Aho-Corasick.out", "w", stdout);
    	_main();
    	fclose(stdin); fclose(stdout);
    	#else
    	_main();
    	#endif
    	return 0;
    }
    
    inline void scan() {
    	int n = read<int>();
    	char Ps[N];
    	for (reg int i = 1; i <= n; ++i) {
    		scanf("%s", Ps);
    		trie.insert(Ps, 1);
    	}
    	scanf("%s", Ts);
    }
    
    
  • 相关阅读:
    [网络技术][转]网卡的offload概念
    [网络技术][转]路由表查找过程(ip_route_input_slow)
    [linux-内核][转]内核日志及printk结构浅析
    [DPDK][转]DPDK编程开发(4)—lcore
    [技术新闻]相关网站
    [计算机、网络相关历史]unix简史
    [编辑器]sublime使用入门
    [CDN]CDN的系统架构
    [windows操作系统]windows管理
    [安卓]创建一个视图类
  • 原文地址:https://www.cnblogs.com/Ameiyo/p/11648332.html
Copyright © 2011-2022 走看看