zoukankan      html  css  js  c++  java
  • 简单的字符串相似度计算

    计算Levenshtein 距离,再和较长字符串求比率

    代码
    /// <summary>
    /// Levenshtein Distance
    /// </summary>
    static class StringExt
    {
    /// <summary>
    /// 计算两个字符串的差异距离
    /// </summary>
    /// <param name="source">来源字符串</param>
    /// <param name="target">目标字符串</param>
    /// <returns>字符串差距</returns>
    public static int CalcDistance(this string source, string target)
    {
    int n = source.Length;
    int m = target.Length;
    if (m == 0) return n;
    if (n == 0) return m;
    var matrix
    = new int[n + 1, m + 1];
    for (int i = 1; i <= n; i++)
    {
    matrix[i,
    0] = i;
    }
    for (int i = 1; i <= m; i++)
    {
    matrix[
    0, i] = i;
    }

    for (int i = 1; i <= n; i++)
    {
    var si
    = source[i - 1];
    for (int j = 1; j <= m; j++)
    {
    var tj
    = target[j - 1];

    int cost;
    if (si == tj)
    cost
    = 0;
    else
    cost
    = 1;

    int above = matrix[i - 1, j] + 1;
    int left = matrix[i, j - 1] + 1;
    int diag = matrix[i - 1, j - 1] + cost;
    matrix[i, j]
    = Math.Min(above, Math.Min(left, diag));
    }
    }
    return matrix[n, m];
    }

    /// <summary>
    /// 计算两个字符串的相似度
    /// </summary>
    /// <param name="source">来源字符串</param>
    /// <param name="target">目标字符串</param>
    /// <returns>相似度</returns>
    public static double CalcSimilarity(this string source, string target)
    {
    int n = source.Length;
    int m = target.Length;
    if (n == 0 || m == 0)
    return 0;
    int distance = source.CalcDistance(target);
    int max = Math.Max(n, m);
    return 1.0 * (max - distance) / max;
    }
    }

  • 相关阅读:
    EUI组件之DataGroup
    EUI组件之CheckBox
    EUI组件之Button
    EUI组件之BitmapLabel 位图字体
    微信小游戏 egret.getDefinitionByName获取不到
    微信小游戏横屏设置
    本地 win7 与虚拟机Centos7 ping互通和Centos7 上网设置
    Centos7没有ETH0网卡
    Parallels Desktop Centos 设置IP
    2.用Python套用Excel模板,一键完成原亮样式
  • 原文地址:https://www.cnblogs.com/heros/p/1910562.html
Copyright © 2011-2022 走看看