zoukankan      html  css  js  c++  java
  • 高效比对,返回最短编辑距离算法匹配度最高的数据

            #region 高效比对返回匹配度最高的数据
            /// <summary>
            /// 高效比对返回匹配度最高的数据
            /// </summary>
            /// <param name="sourceList">源数据</param>
            /// <param name="targetList">目标数据</param>
            /// <returns></returns>
            public static List<MapToData> GetAutoMapData(List<MapToData> sourceList, List<MapToData> targetList)
            {
                #region 高效计算匹配
                List<MapToData> resultList = new List<MapToData>();
                Parallel.For(0, sourceList.Count, i =>
                {
                    var sourceValue = sourceList[i].key;
                    foreach (var item in targetList)
                    {
                        var targetValue = item.key;
                        var jsonObject = item.value;
                        int matchNum = LevenshteinDistance(sourceValue, targetValue);
                        resultList.Add(new MapToData { key = sourceValue, value = jsonObject, match = matchNum });
                    }
                });
    
                var q = from p in resultList
                        where p != null
                        orderby p.match descending
                        group new { p.key, p.value, p.match } by p.key into g
                        select new MapToData
                        {
                            key = g.FirstOrDefault().key,
                            value = g.FirstOrDefault().value,
                            match = g.FirstOrDefault().match
                        };
                return q.ToList();
                #endregion
            }
            #endregion
    
            #region LD最短编辑距离算法
    
            /// <summary>
            /// LD最短编辑距离算法
            /// </summary>
            /// <param name="source">源字符串</param>
            /// <param name="target">目标字符串</param>
            /// <returns></returns>
            public static int LevenshteinDistance(string source, string target)
            {
                int cell = source.Length;
                int row = target.Length;
                if (cell == 0)
                {
                    return row;
                }
                if (row == 0)
                {
                    return cell;
                }
                int[,] matrix = new int[row + 1, cell + 1];
                for (var i = 0; i <= cell; i++)
                {
                    matrix[0, i] = i;
                }
                for (var j = 1; j <= row; j++)
                {
                    matrix[j, 0] = j;
                }
                var tmp = 0;
                for (var k = 0; k < row; k++)
                {
                    for (var l = 0; l < cell; l++)
                    {
                        if (source[l].Equals(target[k]))
                            tmp = 0;
                        else
                            tmp = 1;
                        matrix[k + 1, l + 1] = Math.Min(Math.Min(matrix[k, l] + tmp, matrix[k + 1, l] + 1), matrix[k, l + 1] + 1);
                    }
                }
                return matrix[row, cell];
            }
            #endregion

        public class MapToData
        {
            /// <summary>
            /// 要匹配的字符串
            /// </summary>
            public string key = "";
    
            /// <summary>
            /// 匹配的结果
            /// </summary>
            public object value = new object();
            /// <summary>
            /// 匹配度
            /// </summary>
            public int match = 0;
        }
  • 相关阅读:
    inputstream和outputstream读写数据模板代码
    如何显示包的上一层包
    我的cnblogs设置代码
    myeclipse ctrl+shift+F失效
    数据包加密解密
    用VisualSVN Server创建版本库,以及TortoiseSVN的使用
    权限验证MVC
    Asp.net MVC23 使用Areas功能的常见错误
    MVC基础知识
    最全的Resharper快捷键汇总
  • 原文地址:https://www.cnblogs.com/smartsmile/p/6234064.html
Copyright © 2011-2022 走看看