鉴于校园招聘笔试题,有个字符串模式匹配的问题,99+%都是暴力,偶尔一两个写KMP,但是明显是知其表不知其里。期待的 BM算法 或者 Sunday 没有出现!
鉴于网友的回复,特此声明:我的代码假定字符串中的字符都在ASCII范围内
想了解Sunday,可以查作者原著,不难找。
By the way,国内有好多 Paper 是对Sunday的改进,我本人是忽略不计, 国内的Paper擅长这个。
头文件定义:
/* Sunday.h */
class Sunday
{
public:
Sunday();
~Sunday();
public:
int find(const char* pattern, const char* text);
private:
void preCompute(const char* pattern);
private:
//Let's assume all characters are all ASCII
static const int ASSIZE = 128;
int _td[ASSIZE] ;
int _patLength;
int _textLength;
};
源文件
/* Sunday.cpp */
Sunday::Sunday()
{
}
Sunday::~Sunday()
{
}
void Sunday::preCompute(const char* pattern)
{
for(int i = 0; i < ASSIZE; i++ )
_td[i] = _patLength + 1;
const char* p;
for ( p = pattern; *p; p++)
_td[*p] = _patLength - (p - pattern);
}
int Sunday::find(const char* pattern, const char* text)
{
_patLength = strlen( pattern );
_textLength = strlen( text );
if ( _patLength <= 0 || _textLength <= 0)
return -1;
preCompute( pattern );
const char *t, *p, *tx = text;
while (tx + _patLength <= text + _textLength)
{
for (p = pattern, t = tx; *p; ++p, ++t)
{
if (*p != *t)
break;
}
if (*p == 0)
return tx-text;
tx += _td[tx[_patLength]];
}
return -1;
}
简单测试下:
int main()
{
char* text = "blog.csdn,blog.net";
char* pattern = "csdn,blog" ;
Sunday sunday;
printf("The First Occurence at: %d
",sunday.find(pattern,text));
return 1;
}