zoukankan      html  css  js  c++  java
  • 【转载】计算汉字相似程度

    package com.thunisoft.in4.cm.associate.util;
    
    import java.util.List;
    import java.util.Map;
    
    /**
     * 编辑距离算法
     * 
     * @author fanxf
     * 
     */
    public class HanziParse {
        
        /**
         * 计算矢量距离 Levenshtein Distance(LD)
         * 
         * @param str1
         *            str1
         * @param str2
         *            str2
         * @return ld; 0:完全相等,非零代表相似距離
         */
        private static int distance(String str1, String str2) {
    
            int n = str1.length();
            int m = str2.length();
            if (n == 0) {
                return m;
            }
            if (m == 0) {
                return n;
            }
            /** 二维数组,存放中间距离 */
            int[][] distance = new int[n + 1][m + 1];
            /** 初始化数据 */
            int i = 0;
            for (i = 0; i <= n; i++) {
                distance[i][0] = i;
            }
            int j = 0;
            for (j = 0; j <= m; j++) {
                distance[0][j] = j;
            }
            /** 计算中间数据 */
            int cost = 0;
            for (i = 1; i <= n; i++) {
                char ch1 = str1.charAt(i - 1);
                for (j = 1; j <= m; j++) {
                    char ch2 = str2.charAt(j - 1);
                    if (ch1 == ch2) {
                        cost = 0;
                    } else {
                        cost = 1;
                    }
                    /** 获取最小值 */
                    distance[i][j] = min(distance[i - 1][j] + 1,
                            distance[i][j - 1] + 1, distance[i - 1][j - 1] + cost);
                }
            }
            return distance[n][m];
        }
    
        /**
         * 计算最小值
         * 
         * @param one
         * @param two
         * @param three
         * @return 返回最小值
         */
        private static int min(int one, int two, int three) {
            int min = one;
            if (two < min) {
                min = two;
            }
            if (three < min) {
                min = three;
            }
            return min;
        }
    
        /**
         * 计算相似度
         * 
         * @param str1
         *            str1 待匹配数据
         * @param str2
         *            str2 待匹配数据
         * @return sim,匹配相似度
         */
        public static float similarity(String str1, String str2) {
            int ld = distance(str1, str2);
            return 1 - (float) ld / Math.max(str1.length(), str2.length());
        }
    
    
        /**
         * 测试
         * 
         * @param args
         */
        public static void main(String[] args) {
             HanziParse ld = new HanziParse();
             float num = ld.similarity("vvv", "vvv");
             System.out.println(num);
    
        }
    }
  • 相关阅读:
    121. Best Time to Buy and Sell Stock
    玩转算法2.3常见的算法复杂度分析
    数组中的逆序对
    一些基本的代码模板
    230. Kth Smallest Element in a BST
    42. Trapping Rain Water
    api token
    仿百度查询
    baidu jsonp
    How to fix Error: laravel.log could not be opened?
  • 原文地址:https://www.cnblogs.com/zhangxsh/p/3494535.html
Copyright © 2011-2022 走看看