具体算法和推导,看链接。
参考:http://www.cnblogs.com/ider/p/longest-common-substring-problem-optimization.html
1、暴力法
#include <iostream> #include <string> #include <vector> using namespace std; //brute force方法 int longestCommonSubstring1(const string &s1, const string &s2) { string::size_type size1 = s1.size(); string::size_type size2 = s2.size(); if(size1 == 0 || size2 == 0) return 0; int length = 0; int start1 = -1; int start2 = -1; int longest = 0; for(int i = 0; i < size1; ++i) { for(int j = 0; j < size2; ++j) { length = 0; //while循环计算S1的i~size1的后缀和S2的j~size2的后缀 是不是最长子串 int m = i; int n = j; while(m < size1 && n < size2) { //只要子串中有一个数不想等,立即退出 if(s1[m] != s2[n]) { break; } else { //如果两个后缀子串中,从开始到现在的字符想等,则++ ++m; ++n; ++length; } } //S1和S2的一个子串,结束后,看length的长度 if(length > longest) { longest = length; start1 = i; start2 = j; } } } return longest; //cout << "(start1, start2, longest) = " << "(" << start1 << "," << start2 << "," << longest << ")" << endl; }
2、动态规划法
(1)第一行和第一列先求,作为动态规划的初始化状态;
状态转移:如果两个字符相等,table[i][j] = table[i-1][j-1] + 1;
int longestCommonSubstring2(const string &s1, const string &s2) { string::size_type size1 = s1.size(); string::size_type size2 = s2.size(); if(size1 == 0 || size2 == 0) return 0; int start1 = -1; int start2 = -1; int longest = 0; int length; //动态生成二维数组 vector<vector<int>> table(size1, vector<int>(size2, 0)); //第一行table赋值,第一行与S1的第一个字符比较 //即动态规划的初始状态 for(int j = 0; j < size2; ++j) { table[0][j] = (s1[0] == s2[j] ? 1 : 0); } //从第二行开始,从左到右依次遍历字符串 for(int i = 1; i < size1; ++i) { //第一列的值,相当于初始状态 table[i][0] = (s1[i] == s2[0] ? 1 : 0); for(int j = 1; j < size2; ++j) { if(s1[i] == s2[j]) { //如果相等,那么+1;不想等,则table为0(在创建table时已经赋值了0) table[i][j] = table[i-1][j-1] + 1; } } } //找出table中的最大值 for(int i = 0; i < size1; ++i) { for(int j = 0; j < size2; ++j) { if( longest < table[i][j]) { longest = table[i][j]; start1 = i - longest + 1; start2 = j - longest + 1; } } } cout << "(start1, start2, longest) = " << "(" << start1 << "," << start2 << "," << longest << ")" << endl; }
(2)优化(求最大长度的循环进行优化,放到前面的循环中)
int longestCommonSubstring3(const string &s1, const string &s2) { string::size_type size1 = s1.size(); string::size_type size2 = s2.size(); if(size1 == 0 || size2 == 0) return 0; int start1 = -1; int start2 = -1; int longest = 0; int length; vector<vector<int>> table(size1, vector<int>(size2, 0)); for(int j = 0; j < size2; ++j) { table[0][j] = (s1[0] == s2[j] ? 1 : 0); if(table[0][j] > longest) { longest = table[0][j]; start1 = 0; start2 = j; } } for(int i = 1; i < size1; ++i) { if(s1[i] == s2[0]) { table[i][0] = 1; if(longest == 0) { longest = table[i][0]; start1 = i; start2 = 0; } } for(int j = 1; j < size2; ++j) { if(s1[i] == s2[j]) { table[i][j] = table[i-1][j-1] + 1; if( longest < table[i][j]) { longest = table[i][j]; start1 = i - longest + 1; start2 = j - longest + 1; } } } } cout << "(start1, start2, longest) = " << "(" << start1 << "," << start2 << "," << longest << ")" << endl; }
(3)优化空间复杂度
上面(1)(2)是按行求的,现在按斜对角线求。
int longestCommonSubstring4(const string &s1, const string &s2) { string::size_type size1 = s1.size(); string::size_type size2 = s2.size(); if(size1 == 0 || size2 == 0) return 0; int start1 = -1; int start2 = -1; int longest = 0; int length; vector<vector<int>> table(size1, vector<int>(size2, 0)); for(int i = 0; i < size1; ++i) { int m = i; int n = 0; int length = 0; while(m < size1 && n < size2) { if(s1[m] != s2[n]) { //如果对角线上两个字符不相等,则后面继续计算最大公共子串时,要从0开始 length = 0; } else { ++length; //在对角线计算时,子串到一定长度,后面不相等。 //如果在if外更新longest的值,是错误的,是在字符相等时,更新值。 if(longest < length) { longest = length; start1 = m - longest + 1; start2 = n - longest + 1; } } ++m; ++n; } } for(int j = 1; j < size2; ++j) { int m = 0; int n = j; while(m < size1 && n < size2) { if(s1[m] != s2[n]) { length = 0; } else { ++length; if(length > longest) { longest = length; start1 = m - longest + 1; start2 = n - longest + 1; } } ++m; ++n; } } cout << "(start1, start2, longest) = " << "(" << start1 << "," << start2 << "," << longest << ")" << endl; }