目标:
找出两个 字符串 X = 'CGATAATTGAGA' 和 Y = 'GTTCCTAATA' 的最长公共子序列。用动态规划求解(解不唯一?)。
首先建立一个表 table = table[row,col] = table[12+1,10+1];为什么多一格呢?
先初始化每个单元格的值为0;
然后,遍历行和列:
当X[row] = Y[col]时,table[row,col] = table[row-1,col-1]+1;
当X[row] <> Y[col]时,table[row,col] = Math.max(table[row,col-1],table[row-1,col]);
这样就构建出一个下面的表:
G T T C C T A A T A
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
C [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
G [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
A [0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2]
T [0, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3]
A [0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4]
A [0, 1, 2, 2, 2, 2, 2, 3, 4, 4, 4]
T [0, 1, 2, 3, 3, 3, 3, 3, 4, 5, 5]
T [0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5]
G [0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5]
A [0, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6]
G [0, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6]
A [0, 1, 2, 3, 3, 3, 4, 5, 6, 6, 6]
通过这个表来找出LCS,LCS由表的下标给出,将这些下标放入一个数组L
从表的最后开始回溯:
row = 13, col = 11;
1、table[13,11]对应的行和列的字符相等,将(13,11)放入L,row-=1,col-=1;
2、table[12,10]对应的行和列的字符不相等,比较table[11,10]和table[12,9]
2.1 table[11,10] > table[12,9],i -= 1;
2.2 否则 j -= 1;
重复1、2
实现:
/* initial a row*col table return a table */ function initTable(row,col){ var table = []; for(var i=0;i<row+1;i++){ if(!table[i])table[i] = []; for(var j=0;j<col+1;j++){ if(!table[i][j])table[i][j] = []; table[i][j] = 0; } } return table; } /* compute the path index of the table return array contain the index of s1 and s2 */ function computeLCSIndex(s1,s2,table){ var i = s1.length, j = s2.length, L1=[]; while(i>0 && j>0){ //console.log(i,j); if(s1[i-1] == s2[j-1]){ //console.log(i,j); L1.push([i,j]); j -=1; i -=1; }else{ if(!table[i-1]){ console.log(i); break; } if(table[i-1][j] > table[i][j-1]){ i -= 1; }else{ j -= 1; } } } return L1; } /* get LCS from s1,or s2 from LCS index */ function LCS(s1,s2,L1){ //console.log(L1 = L1.reverse()); L1 = L1.reverse(); var LCS = ""; for(var j=0;j<L1.length;j++){ LCS+=(s2[L1[j][1]-1]); } //console.log(LCS); return LCS; } function diff(s1,s2){ var len1 = s1.length, len2 = s2.length, table, L1;//LCS index of s1 table = initTable(len1,len2); for(k=1;k<len1+1;k++){ for(l=1;l<len2+1;l++){ if(s1[k-1] === s2[l-1]){ //console.log("k:"+k+",l:"+l+table[k-1][l-1]); table[k][l] = table[k-1][l-1] + 1; }else{ //console.log("k:"+k,"l:"+l); table[k][l] = Math.max(table[k][l-1],table[k-1][l]); } } } /* table.forEach(function(i){ console.log(i); }); */ L1 = computeLCSIndex(s1,s2,table); return L1; } var debug = true; if(debug){ var A = 'CGATAATTGAGA', B = 'GTTCCTAATA'; var L = diff(A,B); var s = LCS(A,B,L); console.log(s); var X = ["Cqdf", "G23", "A", "T", "A", "A", "T", "T", "G", "A", "G", "A"], Y = ["G2", "Tsfj", "T", "C", "C", "T", "A", "A", "T", "A"]; var L2 = diff(X,Y); var s2 = LCS(X,Y,L2); console.log(s2); }
通常是以单个字符作为比较的“元”,若以多个字符作为比较的元,那么输入的参数为数组即可: