多模式匹配算法:给定几个特定的单词和一篇文章,查看文章中出现特定单词的次数;
多模式匹配算法通常有以下几个步骤组成:
1、通过几个给定的单词建立字典树
2、对字典树建立匹配失败后的指针
3、对给定的文章进行进行匹配,可以在线性时间内完成
下面给出相应代码:
#include
#include
#include
#include
#include
using namespace std ;
//创建结点
struct Node {
Node *next[26] ;
Node *fail ;
bool is_over ;
int len ;
};
//申请一个新的结点
Node* new_Node() {
Node *root = new Node ;
root->fail = root ;
for(int i = 0 ; i < 26 ; i++)
root->next[i] = NULL ;
root->is_over = false ;
root->len = 0 ;
return root ;
}
//创建字典树
void build_tree(Node *root ,char *s) {
int len = strlen(s) ;
for(int i = 0 ; i < len ; i++) {
if(root->next[s[i]-'a'] == NULL)
root->next[s[i]-'a'] = new_Node() ;
root = root->next[s[i]-'a'] ;
}
root->len = len ;
root->is_over = true ;
}
//为字典树创建Fail指针
void build_Fail(Node *root) {
queue q ;
q.push(root) ;
root->fail = root ;
while(!q.empty()) {
Node *p = q.front() ;
Node *r = p ;
q.pop() ;
for(int i = 0 ; i < 26 ; i++) {
if(p->next[i] != NULL) {
while(r ->fail->next[i] == NULL && r != root) //查看是否存在最长前缀后缀
r = r->fail ;
if(r != root) //如果存在最长公共前缀后缀
p->next[i]->fail = r->fail->next[i] ;
else
p->next[i]->fail = root ;
q.push(p->next[i]) ;
}
}
}
}
//进行匹配
int Auto_AC(Node* root , char *s) {
int len = strlen(s) ;
int count = 0 ;
Node *r = root ;
for(int i = 0 ; i < len ; i++) {
if(!isalpha(s[i])) {
r = root ;
continue ;
}
if(r->next[s[i]-'a'] != NULL) {
r = r->next[s[i]-'a'] ;
}
else
r = r-> fail ;
if(r->is_over == true && !isalpha(s[i+1]) && !(isalpha(s[i-r->len])))
count++ ;
}
return count ;
}
//主函数
int main() {
int m , n , t = 1 ;
while(cin >> m >> n) {
// getchar() ;
Node *root = new_Node() ;
Node *r = root ;
while(m--) {
char s[30] ;
cin >> s ;
getchar() ;
// cout << s << endl ;
build_tree(root,s) ;
root = r ;
}
root = r ;
build_Fail(root) ;
root = r ;
int count[100] = {0} ;
int ma = 0 ;
char str[30][300] , str1[30][300] ;
for(int i = 0 ; i < n ; i++) {
gets(str[i]) ;
strcpy(str1[i],str[i]) ;
int len = strlen(str[i]) ;
for(int k = 0 ; k < len ; k++)
if(isupper(str[i][k]))
str[i][k] += 32 ;
count[i] = Auto_AC(root,str[i]) ;
ma = max(ma,count[i]) ;
}
cout << "Excuse Set #" << t++ << endl;
for(int k = 0 ; k < n ; k++)
if(count[k] == ma)
cout << str1[k] << endl ;
cout << endl ;
}
return 0 ;
}
下面介绍一种暴力求解方法,通过调用strstr函数实现
下面给出相应代码:
#include
#include
#include
using namespace std;
#define N 111
char str[N][N];
char tem[N][N];
char pop[N][N];
int num[N];
void strdx(char f[])
{
int k = strlen(f);
for (int i = 0; i < k; i++)
if (f[i] >= 'A' && f[i] <= 'Z')
f[i] +=32;
}
int main()
{
int n, m;
int t = 1;
while (cin >> n >> m)
{
getchar();
// Init.
memset(str, 0, sizeof(str));
memset(tem, 0, sizeof(tem));
memset(num, 0, sizeof(num));
memset(pop, 0, sizeof(pop));
// Read.
for (int i = 0; i < n; i++)
{
gets(str[i]);
strdx(str[i]);
}
for (int i = 0; i < m; i++)
{
gets(tem[i]);
strcpy(pop[i], tem[i]);
strdx(pop[i]);
}
// Count.
for (int i = 0; i < m; i++)
{
char *move = NULL;
for (int j = 0; j < n; j++)
{
move = strstr(pop[i], str[j]);
if (move == NULL)
continue;
int k = strlen(str[j]);
if (*(move + k) >= 'a' && *(move + k) <= 'z')
continue;
num[i]++;
}
}
int max = 0;
for (int i = 0; i < m; i++)
if (num[i] > max)
max = num[i];
cout << "Excuse Set #" << t++ << endl;
for (int i = 0; i < m; i++)
if (max == num[i])
puts(tem[i]);
cout << endl;
}
return 0;
}