Given two words word1 and word2, find the minimum number of operations required to convert word1 to word2.
You have the following 3 operations permitted on a word:
- Insert a character
- Delete a character
- Replace a character
Example 1:
Input: word1 = "horse", word2 = "ros" Output: 3 Explanation: horse -> rorse (replace 'h' with 'r') rorse -> rose (remove 'r') rose -> ros (remove 'e')
Example 2:
Input: word1 = "intention", word2 = "execution" Output: 5 Explanation: intention -> inention (remove 't') inention -> enention (replace 'i' with 'e') enention -> exention (replace 'n' with 'x') exention -> exection (replace 'n' with 'c') exection -> execution (insert 'u')
题目大意:将字符串word1转化为字符串word2需要的最小操作数,对每个字符你可以进行三种操作:插入/删除/替换。
思路一:word1的长度为len1, word2的长度为len2.
定义函数minDistance(i, j)返回的是word1[i, ..., len1)变换到word2[j, ..., len2)的最小操作数。
1)当i >= len1, j >= len2时,返回0
2)当i >= len1, 而 j < len2, 此时只能通过在word1后插入字符来变换成word2[j, ..., len2), 需要插入 len2 - j 个字符. 返回 len2 - j.
3) 当i < len1, 而 j >= len2, 此时只能通过删除word1后的字符, 需要删除 len1 - i 个字符. 返回 len1 - i.
4) 一般情况下
a. word1[i] == word2[j], 那么minDistance(i, j) = minDistance(i + 1, j + 1)
b. word1[i] != word2[j], 我们有三种选择:
替换,将word1[i]替换为word2[j], minDistance(i, j) = 1 + minDistance(i + 1, j + 1)
删除,将word1[i]删除,让word1[i + 1, ..., len1)完成变换成word2[j, ..., len2)的任务。 minDistance(i, j) = 1 + minDistance(i + 1, j)
插入,在word1的第i位插入与word2[j]相等的字符,让原来word1[i, ..., len1)完成变换成word2[j + 1, ..., len2)的任务。 minDistance(i, j) = 1 + minDistance(i, j + 1)
这四种情况最小的操作数。
递归代码如下:
1 class Solution { 2 public: 3 int minDistance(string word1, string word2) { 4 int len1 = word1.length(), len2 = word2.length(); 5 vector<vector<int> > dp(len1, vector<int>(len2, INT_MAX)); 6 return minDistance(word1, word2, 0, 0, len1, len2, dp); 7 } 8 private: 9 int minDistance(const string word1, const string word2, int i, int j, int len1, int len2, vector<vector<int> > &dp) { 10 if (i >= len1 && j >= len2) 11 return 0; 12 if (i >= len1) 13 return len2 - j; 14 if (j >= len2) 15 return len1 - i; 16 if (word1[i] == word2[j]) 17 return minDistance(word1, word2, i + 1, j + 1, len1, len2, dp); 18 else { 19 int RepDistance = 1 + minDistance(word1, word2, i + 1, j + 1, len1, len2, dp); //替换最小操作数 20 int InsertDistance = 1 + minDistance(word1, word2, i, j + 1, len1, len2, dp); //插入最小操作数 21 int DelDistance = 1 + minDistance(word1, word2, i + 1, j, len1, len2, dp); //删除最小操作数 22 return min(RepDistance, min(InsertDistance, DelDistance)); //返回最小的 23 } 24 } 25 };
思路二:从上面递归代码可以看出,其实会有很多重复的计算。以下是记忆化的递归代码:
1 class Solution { 2 public: 3 int minDistance(string word1, string word2) { 4 int len1 = word1.length(), len2 = word2.length(); 5 vector<vector<int> > dp(len1, vector<int>(len2, INT_MAX)); 6 return minDistance(word1, word2, 0, 0, len1, len2, dp); 7 } 8 private: 9 int minDistance(const string word1, const string word2, int i, int j, int len1, int len2, vector<vector<int> > &dp) { 10 if (i >= len1 && j >= len2) 11 return 0; 12 if (i >= len1) 13 return len2 - j; 14 if (j >= len2) 15 return len1 - i; 16 if (dp[i][j] != INT_MAX) 17 return dp[i][j]; 18 if (word1[i] == word2[j]) 19 dp[i][j] = minDistance(word1, word2, i + 1, j + 1, len1, len2, dp); 20 else { 21 int RepDistance = 1 + minDistance(word1, word2, i + 1, j + 1, len1, len2, dp); 22 int InsertDistance = 1 + minDistance(word1, word2, i, j + 1, len1, len2, dp); 23 int DelDistance = 1 + minDistance(word1, word2, i + 1, j, len1, len2, dp); 24 dp[i][j] = min(RepDistance, min(InsertDistance, DelDistance)); 25 } 26 return dp[i][j]; 27 } 28 };
思路三:动态规划。
我们假设dp[i][j]表示word1[0, ..., i) 转化成 word2[0, ..., j)需要的最小操作数。长度为i的串转化成长度为j的串需要的最小操作数。
划分子问题:
1) word1[i] == word2[j], dp[i][j] = dp[i - 1][j - 1]
2) word1[i] != word2[j],
替换:将word1[i]替换为与word2[j]相等的字符。dp[i][j] = 1 + dp[i - 1][j - 1]
删除:将word1[i]删除。 dp[i][j] = 1 + dp[i - 1][j].
插入:在word1的第i的位置上插入与word2[j]相等的字符, 此时word1[0, ..., i)要完成的任务是转化成word2[0, ..., j - 1). dp[i][j] = 1 + dp[i][j - 1]
3)考虑边界条件:
dp[i][0]: 只要进行删除操作,次数:i
dp[0][j]: 只要进行插入操作,次数:j
1 class Solution { 2 public: 3 int minDistance(string word1, string word2) { 4 int len1 = word1.length(), len2 = word2.length(); 5 vector<vector<int> > dp(len1 + 1, vector<int>(len2 + 1, 0)); 6 //边界设定 7 for (int i = 0; i <= len1; i++) 8 dp[i][0] = i; 9 //边界设定 10 for (int j = 0; j <= len2; j++) 11 dp[0][j] = j; 12 for (int i = 1; i <= len1; i++) { 13 for (int j = 1; j <= len2; j++) { 14 if (word1[i - 1] == word2[j - 1]) 15 dp[i][j] = dp[i - 1][j - 1]; 16 else { 17 int IDist = 1 + dp[i][j - 1]; // Insert 18 int DDist = 1 + dp[i - 1][j]; //Delete 19 int RDist = 1 + dp[i - 1][j - 1]; //replace 20 dp[i][j] = min(min(IDist, DDist), RDist); 21 } 22 } 23 } 24 return dp[len1][len2]; 25 } 26 };
思路四:考虑思路三, 我们实际上是在一个二维数组上进行操作。进一步考虑空间复杂度优化。
0 | 1 | 2 | 3 | 4 | |
0 | dp[0][0] | dp[0][1] | dp[0][2] | dp[0][3] | dp[0][4] |
1 | dp[1][0] | dp[1][1] | dp[1][2] | dp[1][3] | dp[1][4] |
2 | dp[2][0] | dp[2][1] | dp[2][2] | dp[2][3] | dp[2][4] |
我们考虑dp[2][2], 实际上只跟dp[1][1], dp[1][2], dp[2][1]有关,我们可以进行降维,只利用一个一维数组。
我们定义dp[j]表示任意长度的word1 (假设word1的长度为i) 转化为长度为j的word2需要的最小操作数。
初始化边界条件:
当word1的长度为0时,dp[j] = j (为了好迭代,初始化第一行)
当j = 0时,dp[0] = i 表示需要将word1的每一个字符都删除。
迭代两轮:
i = 0, dp[j] = j ( 0 <= j <= len(word2))
i = 1, dp[0]表示长度为1的word1的串转化为长度为0的word2需要的最小操作数, dp[0] = 1 (i = 0时,dp[0] = 0), 计算此时的dp[1]实际上需要dp[0] (i = 0), dp[1] (i = 0) 和dp[0] (i = 1)我们会发现处理边界情况的时候 i = 0时的dp[0] 会被覆盖。我们另外用一个变量保存prev = dp[0];
dp[1]表示长度为1的word1的串转化为长度为1的word2需要的最小操作数, dp[1] = a (i = 0时,dp[1] = b), 计算此时的dp[2]实际上需要dp[1] (i = 0), dp[2] (i = 0) 和dp[1] (i = 1)我们会发现前面计算dp[1]会把i = 0的dp[1]覆盖,因此我们将i = 0的dp[1]赋值给prev。
1 class Solution { 2 public: 3 int minDistance(string word1, string word2) { 4 int len1 = word1.length(), len2 = word2.length(); 5 vector<int> dp(len2 + 1, 0); 6 7 for (int j = 0; j <= len2; j++) // 长度为0的word1转换为word2需要的最小操作数 8 dp[j] = j; 9 10 for (int i = 1; i <= len1; i++) { //外层表示word1长度 11 int prev = dp[0]; 12 dp[0] = i; 13 for (int j = 1; j <= len2; j++) { //内层表示word2长度 14 int temp = dp[j]; 15 if (word1[i - 1] == word2[j - 1]) 16 dp[j] = prev; 17 else { 18 dp[j] = 1 + min(min(prev, dp[j]), dp[j - 1]); 19 } 20 prev = temp; 21 } 22 } 23 return dp[len2]; 24 } 25 };