zoukankan      html  css  js  c++  java
  • 高效比对,返回最短编辑距离算法匹配度最高的数据

            #region 高效比对返回匹配度最高的数据
            /// <summary>
            /// 高效比对返回匹配度最高的数据
            /// </summary>
            /// <param name="sourceList">源数据</param>
            /// <param name="targetList">目标数据</param>
            /// <returns></returns>
            public static List<MapToData> GetAutoMapData(List<MapToData> sourceList, List<MapToData> targetList)
            {
                #region 高效计算匹配
                List<MapToData> resultList = new List<MapToData>();
                Parallel.For(0, sourceList.Count, i =>
                {
                    var sourceValue = sourceList[i].key;
                    foreach (var item in targetList)
                    {
                        var targetValue = item.key;
                        var jsonObject = item.value;
                        int matchNum = LevenshteinDistance(sourceValue, targetValue);
                        resultList.Add(new MapToData { key = sourceValue, value = jsonObject, match = matchNum });
                    }
                });
    
                var q = from p in resultList
                        where p != null
                        orderby p.match descending
                        group new { p.key, p.value, p.match } by p.key into g
                        select new MapToData
                        {
                            key = g.FirstOrDefault().key,
                            value = g.FirstOrDefault().value,
                            match = g.FirstOrDefault().match
                        };
                return q.ToList();
                #endregion
            }
            #endregion
    
            #region LD最短编辑距离算法
    
            /// <summary>
            /// LD最短编辑距离算法
            /// </summary>
            /// <param name="source">源字符串</param>
            /// <param name="target">目标字符串</param>
            /// <returns></returns>
            public static int LevenshteinDistance(string source, string target)
            {
                int cell = source.Length;
                int row = target.Length;
                if (cell == 0)
                {
                    return row;
                }
                if (row == 0)
                {
                    return cell;
                }
                int[,] matrix = new int[row + 1, cell + 1];
                for (var i = 0; i <= cell; i++)
                {
                    matrix[0, i] = i;
                }
                for (var j = 1; j <= row; j++)
                {
                    matrix[j, 0] = j;
                }
                var tmp = 0;
                for (var k = 0; k < row; k++)
                {
                    for (var l = 0; l < cell; l++)
                    {
                        if (source[l].Equals(target[k]))
                            tmp = 0;
                        else
                            tmp = 1;
                        matrix[k + 1, l + 1] = Math.Min(Math.Min(matrix[k, l] + tmp, matrix[k + 1, l] + 1), matrix[k, l + 1] + 1);
                    }
                }
                return matrix[row, cell];
            }
            #endregion

        public class MapToData
        {
            /// <summary>
            /// 要匹配的字符串
            /// </summary>
            public string key = "";
    
            /// <summary>
            /// 匹配的结果
            /// </summary>
            public object value = new object();
            /// <summary>
            /// 匹配度
            /// </summary>
            public int match = 0;
        }
  • 相关阅读:
    自定义组件要加@click方法
    绑定样式
    647. Palindromic Substrings
    215. Kth Largest Element in an Array
    448. Find All Numbers Disappeared in an Array
    287. Find the Duplicate Number
    283. Move Zeroes
    234. Palindrome Linked List
    202. Happy Number
    217. Contains Duplicate
  • 原文地址:https://www.cnblogs.com/smartsmile/p/6234064.html
Copyright © 2011-2022 走看看