zoukankan      html  css  js  c++  java
  • JavaScript 实现最长子字符串(LCS)

    目标:

    找出两个 字符串 X = 'CGATAATTGAGA' 和 Y = 'GTTCCTAATA' 的最长公共子序列。用动态规划求解(解不唯一?)。

    首先建立一个表 table = table[row,col] = table[12+1,10+1];为什么多一格呢? 

    先初始化每个单元格的值为0;

    然后,遍历行和列:

    当X[row] = Y[col]时,table[row,col] = table[row-1,col-1]+1;

    当X[row] <> Y[col]时,table[row,col] = Math.max(table[row,col-1],table[row-1,col]);

    这样就构建出一个下面的表:

            G  T  T  C  C  T  A  A  T  A       

       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]    

    C [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]    

    G [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]    

    A [0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2]    

    T [0, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3]    

    A [0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4]    

    A [0, 1, 2, 2, 2, 2, 2, 3, 4, 4, 4]    

    T [0, 1, 2, 3, 3, 3, 3, 3, 4, 5, 5]    

    T [0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5]    

    G [0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5]    

    A [0, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6]    

    G [0, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6]    

    A [0, 1, 2, 3, 3, 3, 4, 5, 6, 6, 6]

    通过这个表来找出LCS,LCS由表的下标给出,将这些下标放入一个数组L

    从表的最后开始回溯:

    row = 13, col = 11;

    1、table[13,11]对应的行和列的字符相等,将(13,11)放入L,row-=1,col-=1;

    2、table[12,10]对应的行和列的字符不相等,比较table[11,10]和table[12,9]

      2.1 table[11,10] > table[12,9],i -= 1;

      2.2 否则 j -= 1;

    重复1、2

    实现:

    /*
      initial a row*col table
      return a table
    */
    function initTable(row,col){
        var table = [];
        
        for(var i=0;i<row+1;i++){
    	if(!table[i])table[i] = [];
    	for(var j=0;j<col+1;j++){
    	    if(!table[i][j])table[i][j] = [];
    	    table[i][j] = 0;
    	}
        }
        return table;
    }
    /*
      compute the path index of the table
      return array contain the index of s1 and s2
    */
    function computeLCSIndex(s1,s2,table){
        var i = s1.length,
        j = s2.length,
        L1=[];
        
        while(i>0 && j>0){
    	//console.log(i,j);
    	if(s1[i-1] == s2[j-1]){
    	    //console.log(i,j);
    	    L1.push([i,j]);
    	    j -=1;
    	    i -=1;
    	}else{
    	    if(!table[i-1]){
    		console.log(i);	    
    		break;
    	    }
    
    	    if(table[i-1][j] > table[i][j-1]){
    		i -= 1;
    	    }else{
    		j -= 1;
    	    }
    	}
        }
    
        return L1;
    }
    /*
      get LCS from s1,or s2 from LCS index
     */
    function LCS(s1,s2,L1){
        //console.log(L1 = L1.reverse());
        L1 = L1.reverse();
        var LCS = "";
        for(var j=0;j<L1.length;j++){
    	LCS+=(s2[L1[j][1]-1]);
        }
        //console.log(LCS);
        return LCS;
    }
    
    function diff(s1,s2){
        var len1 = s1.length,
        len2 = s2.length,
        table,
        L1;//LCS index of s1
        
        table  = initTable(len1,len2);
    
        for(k=1;k<len1+1;k++){
    	for(l=1;l<len2+1;l++){
    	    if(s1[k-1] === s2[l-1]){
    		//console.log("k:"+k+",l:"+l+table[k-1][l-1]);
    		table[k][l] = table[k-1][l-1] + 1;
    	    }else{
    		//console.log("k:"+k,"l:"+l);
    		table[k][l] = Math.max(table[k][l-1],table[k-1][l]);
    	    }
    	}
        }
        /*
        table.forEach(function(i){
    	console.log(i);
        });
        */
    
        L1 = computeLCSIndex(s1,s2,table);
        return L1;
    }
    
    var debug = true;
    if(debug){
        var A = 'CGATAATTGAGA',
        B = 'GTTCCTAATA';
        var L = diff(A,B);
        
        var s = LCS(A,B,L);
        console.log(s);
    
        var X = ["Cqdf", "G23", "A", "T", "A", "A", "T", "T", "G", "A", "G", "A"],
        Y = ["G2", "Tsfj", "T", "C", "C", "T", "A", "A", "T", "A"];
    
        var L2 = diff(X,Y);
        var s2 = LCS(X,Y,L2);
    
        console.log(s2);
    }
    

    通常是以单个字符作为比较的“元”,若以多个字符作为比较的元,那么输入的参数为数组即可:

  • 相关阅读:
    每天一道LeetCode--141.Linked List Cycle(链表环问题)
    每天一道LeetCode--119.Pascal's Triangle II(杨辉三角)
    每天一道LeetCode--118. Pascal's Triangle(杨辉三角)
    CF1277D Let's Play the Words?
    CF1281B Azamon Web Services
    CF1197D Yet Another Subarray Problem
    CF1237D Balanced Playlist
    CF1239A Ivan the Fool and the Probability Theory
    CF1223D Sequence Sorting
    CF1228D Complete Tripartite
  • 原文地址:https://www.cnblogs.com/wewe/p/2059858.html
Copyright © 2011-2022 走看看