zoukankan      html  css  js  c++  java
  • [算法] 最大公共子串

    具体算法和推导,看链接。

    参考:http://www.cnblogs.com/ider/p/longest-common-substring-problem-optimization.html

    1、暴力法

    #include <iostream>
    #include <string>
    #include <vector>
    using namespace std;
    
    //brute force方法
    int longestCommonSubstring1(const string &s1, const string &s2)
    {
        string::size_type size1 = s1.size();
        string::size_type size2 = s2.size();
        if(size1 == 0 || size2 == 0)
            return 0;
        
        int length = 0;
        int start1 = -1;
        int start2 = -1;
        int longest = 0;
        for(int i = 0; i < size1; ++i)
        {
            for(int j = 0; j < size2; ++j)
            {
                length = 0;
                //while循环计算S1的i~size1的后缀和S2的j~size2的后缀 是不是最长子串
                int m = i;
                int n = j;
                while(m < size1 && n < size2)
                {
                    //只要子串中有一个数不想等,立即退出
                    if(s1[m] != s2[n])
                    {
                        break;
                    }
                    else
                    {
                    //如果两个后缀子串中,从开始到现在的字符想等,则++
                        ++m;
                        ++n;
                        ++length;
                    }
                }
                //S1和S2的一个子串,结束后,看length的长度
                if(length > longest)
                {
                    longest = length;
                    start1 = i;
                    start2 = j;
                }
            
            }
        }
        return longest;
        //cout << "(start1, start2, longest) = " << "(" << start1 << "," << start2 << "," << longest << ")" << endl;
    }

    2、动态规划法

    (1)第一行和第一列先求,作为动态规划的初始化状态;

    状态转移:如果两个字符相等,table[i][j] = table[i-1][j-1] + 1;

    int longestCommonSubstring2(const string &s1, const string &s2)
    {
        string::size_type size1 = s1.size();
        string::size_type size2 = s2.size();
        if(size1 == 0 || size2 == 0)
            return 0;
        int start1 = -1;
        int start2 = -1;
        int longest = 0;
        int length;
    
        //动态生成二维数组
        vector<vector<int>> table(size1, vector<int>(size2, 0));
        //第一行table赋值,第一行与S1的第一个字符比较
        //即动态规划的初始状态
        for(int j = 0; j < size2; ++j)
        {
            table[0][j] = (s1[0] == s2[j] ? 1 : 0);
        }
        //从第二行开始,从左到右依次遍历字符串
        for(int i = 1; i < size1; ++i)
        {
            //第一列的值,相当于初始状态
            table[i][0] = (s1[i] == s2[0] ? 1 : 0);
            for(int j = 1; j < size2; ++j)
            {
                if(s1[i] == s2[j])
                {
                    //如果相等,那么+1;不想等,则table为0(在创建table时已经赋值了0)
                    table[i][j] = table[i-1][j-1] + 1;
                }
            }
        }
        //找出table中的最大值
        for(int i = 0; i < size1; ++i)
        {
            for(int j = 0; j < size2; ++j)
            {
                if( longest < table[i][j])
                {
                    longest = table[i][j];
                    start1 = i - longest + 1;
                    start2 = j - longest + 1;
                }
            }
        }
    
        cout << "(start1, start2, longest) = " << "(" << start1 << "," << start2 << "," << longest << ")" << endl;
    }

    (2)优化(求最大长度的循环进行优化,放到前面的循环中)

    int longestCommonSubstring3(const string &s1, const string &s2)
    {
        string::size_type size1 = s1.size();
        string::size_type size2 = s2.size();
        if(size1 == 0 || size2 == 0)
            return 0;
        int start1 = -1;
        int start2 = -1;
        int longest = 0;
        int length;
        vector<vector<int>> table(size1, vector<int>(size2, 0));
        for(int j = 0; j < size2; ++j)
        {
            table[0][j] = (s1[0] == s2[j] ? 1 : 0);
            if(table[0][j] > longest)
            {
                longest = table[0][j];
                start1 = 0;
                start2 = j;
            }
        }
        for(int i = 1; i < size1; ++i)
        {
            if(s1[i] == s2[0])
            {
                table[i][0] = 1;
                if(longest == 0)
                {
                    longest = table[i][0];
                    start1 = i;
                    start2 = 0;
                }
            }
            for(int j = 1; j < size2; ++j)
            {
                if(s1[i] == s2[j])
                {
                    table[i][j] = table[i-1][j-1] + 1;
                    if( longest < table[i][j])
                    {
                        longest = table[i][j];
                        start1 = i - longest + 1;
                        start2 = j - longest + 1;
                    }
                }
                
            }
        }
    
        cout << "(start1, start2, longest) = " << "(" << start1 << "," << start2 << "," << longest << ")" << endl;
    }

    (3)优化空间复杂度

    上面(1)(2)是按行求的,现在按斜对角线求。

    int longestCommonSubstring4(const string &s1, const string &s2)
    {
        string::size_type size1 = s1.size();
        string::size_type size2 = s2.size();
        if(size1 == 0 || size2 == 0)
            return 0;
        int start1 = -1;
        int start2 = -1;
        int longest = 0;
        int length;
        vector<vector<int>> table(size1, vector<int>(size2, 0));
        for(int i = 0; i < size1; ++i)
        {
            int m = i;
            int n = 0;
            int length = 0;
            while(m < size1 && n < size2)
            {
                if(s1[m] != s2[n])
                {
                    //如果对角线上两个字符不相等,则后面继续计算最大公共子串时,要从0开始
                    length = 0;
                }
                else
                {
                    ++length;
                    //在对角线计算时,子串到一定长度,后面不相等。
                    //如果在if外更新longest的值,是错误的,是在字符相等时,更新值。
                    if(longest < length)
                    {
                        longest = length;
                        start1 = m - longest + 1;
                        start2 = n - longest + 1;
                    }
                }
                ++m;
                ++n;
            }
            
        }
        for(int j = 1; j < size2; ++j)
        {
            int m = 0;
            int n = j;
            while(m < size1 && n < size2)
            {
                if(s1[m] != s2[n])
                {
                    length = 0;
                }
                else
                {
                    ++length;
                    if(length > longest)
                    {
                        longest = length;
                        start1 = m - longest + 1;
                        start2 = n - longest + 1;
                    }
                }
            ++m;
            ++n;
            }
            
        }
        cout << "(start1, start2, longest) = " << "(" << start1 << "," << start2 << "," << longest << ")" << endl;
    }
  • 相关阅读:
    Ubuntu10下MySQL搭建Amoeba_分片
    MySQL多Text字段报8126错误(解决过程)
    SQL Server 维护计划实现数据库备份(Step by Step)
    Ubuntu10下MySQL搭建Master Slave
    Ubuntu10下安装JAVA JDK
    Windows下安装MySQL最佳实践
    Ubuntu12下重新挂载硬盘
    Windows 下使用Git管理Github项目
    解决Windows Server2008 R2中IE开网页时弹出阻止框
    Win Server 2008中开启Areo特效
  • 原文地址:https://www.cnblogs.com/pukaifei/p/5543849.html
Copyright © 2011-2022 走看看