zoukankan      html  css  js  c++  java
  • 文本相似度分析

    https://blog.csdn.net/wenyusuran/article/details/85761020   //计算
    
    https://cloud.tencent.com/developer/article/1088325
     //不适用地址分析
    
    /*调用*/
           static void Main(string[] args)
            {
                  string str1 = "河南省洛阳市涧西区洛阳职业技术学院";
                  string str2 = "河北省洛阳市涧西区洛阳职业技术学院南校";
                  Console.WriteLine("字符串1 {0}", str1);
      
                  Console.WriteLine("字符串2 {0}", str2);
      
                 Console.WriteLine("相似度 {0} %", new LevenshteinDistance().LevenshteinDistancePercent(str1, str2) * 100);          
                 Console.ReadLine();
            }
    
    
    /*文本相似度*/
    public class LevenshteinDistance
          {
              /// <summary>
              /// 取最小的一位数
              /// </summary>
              /// <param name="first"></param>
              /// <param name="second"></param>
              /// <param name="third"></param>
              /// <returns></returns>
             private int LowerOfThree(int first, int second, int third)
             {
                 int min = Math.Min(first, second);
                 return Math.Min(min, third);
            }
     
             private int Levenshtein_Distance(string str1, string str2)
             {
                 int[,] Matrix;
                 int n = str1.Length;
                 int m = str2.Length;
     
                 int temp = 0;
                 char ch1;
                 char ch2;
                 int i = 0;
                 int j = 0;
                 if (n == 0)
                 {
                     return m;
                 }
                 if (m == 0)
                 {
    
                     return n;
                 }
                 Matrix = new int[n + 1, m + 1];
     
                 for (i = 0; i <= n; i++)
                 {
                     //初始化第一列
                     Matrix[i, 0] = i;
                 }
     
                 for (j = 0; j <= m; j++)
                 {
                     //初始化第一行
                     Matrix[0, j] = j;
                 }
                  for (i = 1; i <= n; i++)
                 {
                     ch1 = str1[i - 1];
                     for (j = 1; j <= m; j++)
                     {
                         ch2 = str2[j - 1];
                         if (ch1.Equals(ch2))
                         {
                            temp = 0;
                         }
                        else
                         {
                             temp = 1;
                        }
                        Matrix[i, j] = LowerOfThree(Matrix[i - 1, j] + 1, Matrix[i, j - 1] + 1, Matrix[i - 1, j - 1] + temp);
                     }
                 }
             for (i = 0; i <= n; i++)
                 {
                     for (j = 0; j <= m; j++)
                     {
                         Console.Write(" {0} ", Matrix[i, j]);
                     }
                     Console.WriteLine("");
                 }
          
                 return Matrix[n, m];
             }
     
             /// <summary>
             /// 计算字符串相似度
             /// </summary>
             /// <param name="str1"></param>
             /// <param name="str2"></param>
            /// <returns></returns>
            public decimal LevenshteinDistancePercent(string str1, string str2)
             {
                //int maxLenth = str1.Length > str2.Length ? str1.Length : str2.Length;
                 int val = Levenshtein_Distance(str1, str2);
                 return 1 - (decimal)val / Math.Max(str1.Length, str2.Length);
            }
        }
  • 相关阅读:
    今日小结 5.7
    今日小结 5.2
    今日小结 4.30
    今日小结 4.29
    设计模式 笔记1
    第一次找实习
    Java入门 任务表
    今日小结 4.24
    今日小结 4.18
    今日小结 4.17
  • 原文地址:https://www.cnblogs.com/tianranhui/p/10662874.html
Copyright © 2011-2022 走看看