zoukankan html css js c++ java

HDU 2222 Keywords Search AC自己主动机入门题

单词统计的题目，给出一些单词，统计有多少单词在一个文本中出现，最经典的入门题了。

AC自己主动机的基础：
1 Trie。以这个数据结构为基础的，只是添加一个fail指针和构造fail的函数

2 KMP，不是直接运用KMP。而是须要KMP的思想。KMP思想都没有的话，理解这个算法会更加吃力的。

注意本题的单词会有反复出现的，一个单词仅仅能统计一次。

搜索了一下网上的题解。发现好多代码都是一大抄的啊。⊙﹏⊙b汗。

本博客的乃是原创代码。代码风格也是几乎相同固定的，转载请注明出处：http://blog.csdn.net/kenden23。不少所谓的IT站点转载我的文章，不但链接没给出，连作者也没有，还好意思说自己是IT站点吗？

请尊重作者。假设觉得这些算法代码那么好敲的，能够自己去敲去。

#include <cstdio>

const int ARR_SIZE = 26;
const int MAX_N = 10001;
const int MAX_M = 1000001;
const int MAX_KEY_LEN = 51;

struct Node
{
	Node *arr[ARR_SIZE];
	Node *fail;
	int n;
};

void clearNode(Node *rt)
{
	for (int i = 0; i < ARR_SIZE; i++)
	{
		rt->arr[i] = NULL;
	}
	rt->n = 0;
	rt->fail = NULL;
}

Node *q[MAX_KEY_LEN*MAX_N], pool[MAX_KEY_LEN*MAX_N], *Trie;
int head, tail, poolID;

void insert(char *str)
{
	Node *pCrawl = Trie;
	for ( ; *str; str++)
	{
		int id = *str - 'a';
		if (!pCrawl->arr[id])
		{
			pCrawl->arr[id] = &pool[poolID++];
			clearNode(pCrawl->arr[id]);
		}
		pCrawl = pCrawl->arr[id];
	}
	pCrawl->n++;
}

void buildFail()
{
	Node *pCrawl;
	head = tail = 0;
	q[tail++] = Trie;
	while (head < tail)
	{
		pCrawl = q[head++];
		for (int i = 0; i < ARR_SIZE; i++)
		{
			if (pCrawl->arr[i] == NULL) continue;
			pCrawl->arr[i]->fail = Trie;//initialize all to Trie
			Node *fail = pCrawl->fail;
			while (fail)
			{
				if (fail->arr[i])//find the first next up level match
				{//which make it the longest match and the best.
					pCrawl->arr[i]->fail = fail->arr[i];
					break;
				}
				fail = fail->fail;
			}//whi (p != NULL)
			q[tail++] = pCrawl->arr[i];
		}//for (int i = 0; i < kind; i++)
	}//while (head < tail)
}

int searchWordsInText(char *text)
{
	Node *pCrawl = Trie;
	int i = 0, ans = 0;
	while (text[i])
	{
		int id = text[i++] - 'a';
		//find the longest prefix match
		while (!pCrawl->arr[id] && pCrawl != Trie) pCrawl = pCrawl->fail;
		if (pCrawl->arr[id]) pCrawl = pCrawl->arr[id];
		else continue;

		Node *tmp = pCrawl;
		while (tmp && tmp->n != -1)
		{//If one word apprear multiply times, only count as one time.
			ans += tmp->n;
			tmp->n = -1;
			tmp = tmp->fail;
		}//traval through all words that end with text[i], add them to result
	}
	return ans;
}

int main()
{
	int T, n;
	char keyWord[MAX_KEY_LEN], text[MAX_M];
	scanf("%d", &T);
	while (T--)
	{
		Trie = &pool[0];
		clearNode(Trie);
		poolID = 1;

		scanf("%d", &n);
		getchar();
		while (n--)
		{
			gets(keyWord);
			insert(keyWord);
		}		
		gets(text);
		buildFail();
		printf("%d
", searchWordsInText(text));
	}
	return 0;
}

查看全文

相关阅读:
WAF与IPS的区别总结
 web后门排查与高效分析web日志技巧
 如何做一名好的web安全工程师？
从“黑掉Github”学Web安全开发
 DNS劫持
 万网上如何将IP和申请的域名绑定
 如何申请网站域名
 什么是域名？什么网站名？什么是URL？
myeclipse svn 插件去除已经保存的密码方法
 SVN中检出和导出的区别

原文地址：https://www.cnblogs.com/zhchoutai/p/7006996.html