zoukankan      html  css  js  c++  java
  • 字符串相似度的算法(sqlserver转和c#)

    CREATE   function get_semblance_By_2words
    (
    @word1 varchar(50),
    @word2 varchar(50)  
    )
    returns nvarchar(4000)
    as
    begin
    declare @re int
    declare @maxLenth int
    declare @i int,@l int
    declare @tb1 table(child varchar(50))
    declare @tb2 table(child varchar(50))
    set @i=1
    set @l=2
    set @maxLenth=len(@word1)
    if len(@word1)<len(@word2) 
    begin
    set @maxLenth=len(@word2)
    end
    while @l<=len(@word1) 
    begin
    while @i<len(@word1)-1
    begin
    insert @tb1 (child) values( SUBSTRING(@word1,@i,@l) ) 
    set @i=@i+1
    end
    set @i=1
    set @l=@l+1
    end
    set @i=1
    set @l=2
    while @l<=len(@word2) 
    begin
    while @i<len(@word2)-1
    begin
    insert @tb2 (child) values( SUBSTRING(@word2,@i,@l) ) 
    set @i=@i+1
    end
    set @i=1
    set @l=@l+1
    end   
    select @re=isnull(max( len(a.child)*100/  @maxLenth ) ,0) from @tb1 a, @tb2 b where a.child=b.child
    return @re
    end 
    GO
     
    --测试
    --select dbo.get_semblance_By_2words('我是谁','我是谁啊') 
    --75

    --相似度 

    c#------------------------------------------------------

    using System;
    using System.Collections.Generic;
    using System.Text;

    namespace ConsoleApplication6
    {
        class semblance
        {

            static void Main(string[] args)
            {
                string re= get_semblance_By_2words("我是谁", "我是谁啊");
                Console.WriteLine(re);
                Console.ReadLine();
            }

            public static string get_semblance_By_2words(string word1, string word2)
            {
                int re = 0;
                int maxLength;
                int i, l;
                List<string> tb1 = new List<string>();
                List<string> tb2 = new List<string>();
                i = 0;
                l = 1;
                maxLength = word1.Length;
                if (word1.Length < word2.Length)
                    maxLength = word2.Length;
                while (l <= word1.Length)
                {
                    while (i < word1.Length - 1)
                    {
                        if (i + l > word1.Length)
                            break;
                        tb1.Add(word1.Substring(i, l));
                        i++;
                    }
                    i = 0;
                    l++;
                }

                i = 0;
                l = 1;

                while (l <= word2.Length)
                {
                    while (i < word2.Length - 1)
                    {
                        if (i + l > word2.Length)
                            break;
                        tb2.Add(word2.Substring(i, l));
                        i++;
                    }
                    i = 0;
                    l++;
                }
                foreach (string subStr in tb1)
                {
                    int tempRe = 0;
                    if (tb2.Contains(subStr))
                    {
                        tempRe = subStr.Length * 100 / maxLength;
                        if (tempRe > re)
                            re = tempRe;
                        if (tempRe == 100)
                            break;
                    }
                }
                return re.ToString()+"%";
            }
        }
    }


  • 相关阅读:
    twoSum
    getIntersectionNode
    MinStack
    hasCycle
    tomcat和mybatis源码导入eclipse
    这不就是多线程ThreadPoolExecutor和阻塞队列吗
    Linux IO模式及 select、poll、epoll详解
    spring5源码如何导入eclipse
    好东西一定是时间沉淀的产物!!!
    java线程真的太难了!!!
  • 原文地址:https://www.cnblogs.com/bayonetxxx/p/1408752.html
Copyright © 2011-2022 走看看