zoukankan      html  css  js  c++  java
  • 【模糊搜索排序算法】基于KMP和Levenshtein的模糊搜索实现

    KMP:字符间是否匹配

    Levenshtein:字符间转换距离 

    先贴代码,再补别的

    /**
     * @Author: liuxs
     * @Description: 编辑距离算法工具类
     * @Date: Create in 18:47 2018/7/25.
     */
    public class LevenshteinDistanceUtil {
    
        /**
         * 计算两字符间转换编辑距离
         * @param s1
         * @param s2
         * @return Levenshtein Distance
         */
        public static int getStringDistance(String s1, String s2) {
    
            int distance[][];// 定义距离表
            int s1_len = s1.length();
            int s2_len = s2.length();
    
            if (s1_len == 0) {
                return s2_len;
            }
            if (s2_len == 0) {
                return s1_len;
            }
            distance = new int[s1_len + 1][s2_len + 1];
    
            // 二维数组第一行和第一列放置自然数
            for (int i = 0; i <= s1_len; i++) {
                distance[i][0] = i;
            }
            for (int j = 0; j <= s2_len; j++) {
                distance[0][j] = j;
            }
            // 比较,若行列相同,则代价为0,否则代价为1;
            for (int i = 1; i <= s1_len; i++) {
                char s1_i = s1.charAt(i - 1);
                // 逐一比较
                for (int j = 1; j <= s2_len; j++) {
                    char s2_j = s2.charAt(j - 1);
                    // 若相等,则代价取0;直接取左上方值
                    if (s1_i == s2_j) {
                        distance[i][j] = distance[i - 1][j - 1];
                    } else {
                        // 否则代价取1,取左上角、左、上 最小值 + 代价(代价之和便是最终距离)
                        distance[i][j] = getMin(distance[i - 1][j], distance[i][j - 1], distance[i - 1][j - 1]) + 1;
                    }
                }
            }
            // 取二位数组最后一位便是两个字符串之间的距离
            return distance[s1_len][s2_len];
        }
    
        // 求最小值
        private static int getMin(int a, int b, int c) {
            int min = a;
            if (b < min) {
                min = b;
            }
            if (c < min) {
                min = c;
            }
            return min;
        }
    
        /**
         * 计算相似度
         * @param s1
         * @param s2
         * @return
         */
        public static float calculateProximity(String s1, String s2) {
            float editDistance = getStringDistance(s1, s2);
            float proximity = 1 - editDistance / Math.max(s1.length(), s2.length());
            return (float) (Math.round(proximity * 100)) / 100;
        }
    
    }
    /**
     * @Author: liuxs
     * @Description: 字符匹配算法工具类
     * @Date: Create in 18:47 2018/7/25.
     */
    public class KMPMatchUtil {
    
        /**
         * @param c
         *            主串(源串)中的字符
         * @param T
         *            模式串(目标串)字符数组
         * @return 滑动距离
         */
        private static int dist(char c, char T[]) {
            int n = T.length;
            if (c == T[n - 1]) {
                return n;// c出现在模式串最后一位时
            }
            for (int i = n; i >= 1; i--) {
                if (T[i - 1] == c)
                    return n - i;// i=max{i|t[i-1]且0<=i<=n-2}
            }
            return n;// c不出现在模式中时
        }
    
        /**
         * @param p_s
         * @param p_t
         * @return -2错误,-1匹配不到,[0,p_s.length-p_t.length]表示t在s中位置,下标从0开始
         */
        public static int index(final String p_s, final String p_t) {
            if (p_s == null || p_t == null) {
                return -2;
            }
            char[] s = p_s.toCharArray();
            char[] t = p_t.toCharArray();
            int slen = s.length, tlen = t.length;
    
            if (slen < tlen) {
                return -1;
            }
    
            int i = tlen, j;
            while (i <= slen) {
                j = tlen;
                while (j > 0 && s[i - 1] == t[j - 1]) {// S[i-1]与T[j-1]若匹配,则进行下一组比较;反之离开循环。
                    i--;
                    j--;
                }
                if (0 == j) {// j=0时,表示完美匹配,返回其开始匹配的位置
                    return i;// 如果要匹配多个,这里改为:int pos=i;i = i+tlen+1; --其中每次这个pos就是位置
                } else {
                    // System.out.println(dist(s[i - 1], t));
                    i = i + dist(s[i - 1], t);// 把主串和模式串均向右滑动一段距离dist(s[i-1]).即跳过dist(s[i-1])个字符无需比较
                }
            }
    
            return -1;// 模式串与主串无法匹配
    
        }
    
        /**
         * 两字符是否匹配
         * @param source
         * @param target
         * @return
         */
        public static boolean kmpMatch(String source, String target)
        {
            if(null == source || null == target || "".equals(source.trim()) || "".equals(target.trim()))
            {
                return false;
            }
    
            int bl = source.length();
            int al = target.length();
    
            for(int bi = 0,ai = 0;bi < al;ai++)
            {
                if(bi == al || ai == bl)
                {
                    return false;
                }
                else if(source.charAt(ai) == target.charAt(bi))
                {
                    bi++;
                }
            }
            return true;
        }
    
    }
    /**
     * @Author: liuxs
     * @Description: 模糊搜索支行信息
     * @Date: Create in 11:38 2018/7/26.
     */
    @Service
    public class FuzzyBankBranchService {
    
        private final static Logger logger = LoggerFactory.getLogger(FuzzyBankBranchService.class);
    
        @Autowired
        private BankBranchService bankBranchService;
    
        public List<BankBranch> fuzzyFindByBranchName(QueryBankBean queryBankBean) {
            String fuzzyWord = queryBankBean.getBranchName();
            List<BankBranch> allBranches = bankBranchService.findAll();
            List<String> resultStr = new ArrayList<>();
            List<String> relateCodes = new ArrayList<>();
            for (BankBranch branch : allBranches) {
                String code = branch.getRelateCode();
                String branchName = branch.getBranchName();
                if ((KMPMatchUtil.kmpMatch(branchName, fuzzyWord)/* || KMPMatchUtil.kmpMatch(fuzzyWord, branchName)*/) && !relateCodes.contains(code)) {
                    resultStr.add(branch.objectToString(branch));
                }
            }
            if (CollectionUtils.isNotEmpty(resultStr)) {
                logger.info("通过支行名称:{},匹配到{}记录。", fuzzyWord, resultStr.size());
                Collections.sort(resultStr, new Comparator<String>() {
                    public int compare(String s1, String s2) {
                        return LevenshteinDistanceUtil.getStringDistance(s1.split(",")[2], fuzzyWord)
                                - LevenshteinDistanceUtil.getStringDistance(s2.split(",")[2], fuzzyWord);
                    }
                });
                //若匹配数大于15条,则返回相似度较高的15条
                if (resultStr.size() > 15) {
                    resultStr = resultStr.subList(0, 15);
                }
            }
            return convertBranch(resultStr, fuzzyWord);
        }
    
        /**
         * 整合返回信息,标记相似度
         * @param resultStr
         * @param fuzzyWord
         * @return
         */
        private List<BankBranch> convertBranch(List<String> resultStr, String fuzzyWord) {
            List<BankBranch> results = new ArrayList<>();
            for (String str : resultStr) {
                BankBranch branch = stringToObject(str);
                branch.setProximity(LevenshteinDistanceUtil.calculateProximity(branch.getBranchName(), fuzzyWord));
                results.add(branch);
            }
            return results;
        }
    
        private BankBranch stringToObject(String str) {
            BankBranch bankBranch = new BankBranch();
            bankBranch.setRelateCode(str.split(",")[0]);
            bankBranch.setBankName(str.split(",")[1]);
            bankBranch.setBranchName(str.split(",")[2]);
            bankBranch.setProvinceName(str.split(",")[3]);
            bankBranch.setCityName(str.split(",")[4]);
            return bankBranch;
        }
    }
  • 相关阅读:
    使用指针的误区之指针未初始化
    实验室react项目名词解释
    生活感悟之大学
    git 快速入门
    口才锻炼
    narcissus
    crest value &minimum
    factorial
    Str_turn
    array_x
  • 原文地址:https://www.cnblogs.com/liuxs13/p/9367744.html
Copyright © 2011-2022 走看看