常见的字符串匹配算法中,KMP算法是比较难懂的一个,其中的思想和实现的方法也有很多种,下面介绍一种适用比较广的情况:
1 #include <iostream> 2 #include <vector> 3 using namespace std; 4 5 int KMP_Match(const string &target, const string &pattern) { 6 //计算匹配串的“部分匹配表”jump[] 7 const int pLength = pattern.length(); 8 vector<int> jump(pLength); 9 10 jump[0] = -1; 11 for (int i = 1; i < pLength; i++){ 12 int index = jump[i-1]; 13 while (index >= 0 && pattern[i] != pattern[index+1]){ 14 index = jump[index];//必须判断index>= 0 否则访问时会出现-1导致失败。 15 } 16 if(pattern[i] == pattern[index+1]) 17 jump[i] = index + 1; 18 else 19 jump[i] = -1; 20 } 21 22 for (vector<int>::iterator ibegin = jump.begin(); ibegin != jump.end(); ibegin++){ 23 cout << *ibegin << " " ; 24 } 25 cout << endl; 26 27 //match algorithm start 28 int pattern_index = 0, target_index = 0; 29 while (pattern_index < pLength && target_index < target.length()) { 30 if (target[target_index] == pattern[pattern_index]) { 31 ++target_index; 32 ++pattern_index; 33 } else if (pattern_index == 0) { //必须先判断pattern_index == 0 的情况,此时pattern已经到了第一个字符 34 //,无法再向前退,只能将target_index+1然后匹配。 35 ++target_index; 36 } else { 37 //jump[pattern_index - 1],因为pattern_index表示当前的不匹配的字符的位置,故需要去其-1后的jump值, 38 //+1 是因为patter[jump[pattern_index-1]]的值和target[target_index-1]相同,故需要将其+1后进行比较 39 pattern_index = jump[pattern_index-1] + 1; 40 } 41 } 42 43 if (pattern_index == pLength) { 44 return target_index - pLength; 45 } else { 46 return -1; 47 } 48 } 49 50 51 int main(int args, char* argv[]) 52 { 53 string pattern = "abaabcac"; 54 string target = "acabaabaabcacaabc"; 55 int index = KMP_Match(target,pattern); 56 cout << index << endl; 57 return 1; 58 }