zoukankan      html  css  js  c++  java
  • similarity 字符串编辑距离相似度匹配

    Step Description
    1 Set n to be the length of s.
      Set m to be the length of t.
      If n = 0, return m and exit.
      If m = 0, return n and exit.
      Construct a matrix containing 0 .. m rows and 0 .. n columns.
    2 Initialize the first row to 0 .. n.
      Initialize the first column to 0 .. m.
    3 Examine each character of s (i from 1 to n).
    4 Examine each character of t (j from 1 to m).
    5 If s [i] equals t [j], the cost is 0.
      If s [i] doesn't equal t [j], the cost is 1.
    6 Set cell d [i, j] of the matrix equal to the minimum of:
      a. The cell immediately above plus 1: d [i-1, j] + 1.
      b. The cell immediately to the left plus 1: d [i, j-1] + 1.
      c. The cell diagonally above and to the left plus the cost: d [i-1, j-1] + cost.
    7 After the iteration steps (3, 4, 5, 6) are complete, the distance is found in cell d [n, m].





    /// <summary>  
    /// 编辑距离(Levenshtein Distance)  
    /// </summary>  
    /// <param name="source">源串</param>  
    /// <param name="target">目标串</param>  
    /// <param name="similarity">输出:相似度,值在0~1</param>  
    /// <param name="isCaseSensitive">是否大小写敏感</param>  
    /// <returns>源串和目标串之间的编辑距离</returns>  
    public static Int32 LevenshteinDistance(String source, String target, out Double similarity, Boolean isCaseSensitive = false)  
    {  
        if (String.IsNullOrEmpty(source))  
        {  
            if (String.IsNullOrEmpty(target))  
            {  
                similarity = 1;  
                return 0;  
            }  
            else  
            {  
                similarity = 0;  
                return target.Length;  
            }  
        }  
        else if (String.IsNullOrEmpty(target))  
        {  
            similarity = 0;  
            return source.Length;  
        }  
      
        String From, To;  
        if (isCaseSensitive)  
        {   // 大小写敏感  
            From = source;  
            To = target;  
        }  
        else  
        {   // 大小写无关  
            From = source.ToLower();  
            To = target.ToLower();  
        }  
      
        // 初始化  
        Int32 m = From.Length;  
        Int32 n = To.Length;  
        Int32[,] H = new Int32[m + 1, n + 1];  
        for (Int32 i = 0; i <= m; i++) H[i, 0] = i;  // 注意:初始化[0,0]  
        for (Int32 j = 1; j <= n; j++) H[0, j] = j;  
      
        // 迭代  
        for (Int32 i = 1; i <= m; i++)  
        {  
            Char SI = From[i - 1];  
            for (Int32 j = 1; j <= n; j++)  
            {   // 删除(deletion) 插入(insertion) 替换(substitution)  
                if (SI == To[j - 1])  
                    H[i, j] = H[i-1, j-1];                      
                else  
                    H[i, j] = Math.Min(H[i-1, j-1], Math.Min(H[i-1, j], H[i, j-1])) + 1;                      
            }  
        }  
      
        // 计算相似度  
        Int32 MaxLength = Math.Max(m, n);   // 两字符串的最大长度  
        similarity = ((Double)(MaxLength - H[m, n])) / MaxLength;  
      
        return H[m, n];    // 编辑距离  
    }  
     
    1. public static void CheckEditDistance()  
    2. {  
    3.     Int32 Distance;  
    4.     Double Similarity;  
    5.   
    6.     while (true)  
    7.     {  
    8.         Console.WriteLine("------------------------------------");  
    9.         Console.Write("源串 = ");  
    10.         String Source = Console.ReadLine();  
    11.         if (Source == "q") break;  
    12.   
    13.         Console.Write("目标串 = ");  
    14.         String Target = Console.ReadLine();  
    15.   
    16.         Distance = LevenshteinDistance(Source, Target, out Similarity, true);  
    17.         Console.WriteLine("编辑距离 = " + Distance.ToString());  
    18.         Console.WriteLine("相似度 = " + Similarity.ToString("0.####"));  
    19.     }  
    20. }  
    
    
    
    
     
    改变自己,成长自己
  • 相关阅读:
    比较Maven和Ant
    解决浏览器缓存
    Servlet--HttpServletResponse的2个操作流的方法
    Servlet--j2e中文乱码解决
    java乱码详解(java中byte与char的转换)
    linux中操作java进程
    Servlet--超链接,表单提交,重定向,转发4种情况的路径
    物理路径,相对路径,绝对路径以及根目录
    Servlet--转发和重定向
    Servlet--传参和接参
  • 原文地址:https://www.cnblogs.com/xxh-2014/p/8397914.html
Copyright © 2011-2022 走看看