zoukankan      html  css  js  c++  java
  • 多字符串查找算法:kmp与step

    1,计算kmp失败回退函数:

    	public static int[] getKmpFail(String pattern) {
    		if (!hasLength(pattern)) throw new IllegalArgumentException("null or empty pattern is not allowed to get kmp fail array.");
    		
    		int i, j, len = pattern.length();
    		int[] fail = new int[len];
    		fail[0] = -1;
    		for (j = 1; j < len; j++) {
    			i = fail[j - 1];
    			while ((pattern.charAt(j) != pattern.charAt(i + 1)) && (i >= 0)) {
    				i = fail[i];
    			}
    			if (pattern.charAt(j) == pattern.charAt(i + 1)) {
    				fail[j] = i + 1;
    			} else {
    				fail[j] = -1;
    			}
    		}
    		return fail;
    	}

    2,查找子串:

    	public static int kmpIndexOf(String source, String pattern, int from, int[] fail, int to) {
    		int i = from, j = 0, lenp = pattern.length();
    		while ((i < to) && (j < lenp)) {
    			if (source.charAt(i) == pattern.charAt(j)) {
    				i++;
    				j++;
    			} else {
    				if (j == 0) {
    					i++;
    				} else {
    					j = fail[j - 1] + 1;
    				}
    			}
    		}
    		return (j == lenp) ? (i - lenp) : -1;
    	}

    3,kmp多字符串查找:

    	public static int[] kmpFirstMatch(String source, String[] patterns, int from, int[][] fails) {
    		int[] indices = { -1, -1 };
    		int to = source.length();
    		for (int i = 0; i < patterns.length; i++) {//多次遍历source查找patterns[i]
    			if(to - from < patterns[i].length()) continue;
    			int index = kmpIndexOf(source, patterns[i], from, fails[i], to);
    			if (index != -1 && (index < indices[0] || indices[0] == -1)) {
    				indices[0] = to = index;//缩小范围,to之后的不用查找了
    				indices[1] = patterns[i].length();//返回目标索引和长度
    				to++;//if(from == index) break;上面的continue更优化
    			}
    		}
    		return indices;
    	}

    4,step多字符串查找:

    	public static int[] stepFirstMatch(char[] sources, int from, char[][] chars) {
    		int[] indices = { -1, -1 };
    		for (int i = from; i < sources.length; i++) {//从头至尾依次查找
    			for (int j = 0; j < chars.length; j++) {
    				if (sources[i] == chars[j][0]) {
    					int k = 1;
    					while (k < chars[j].length && i + k < sources.length && sources[i + k] == chars[j][k])
    						k++;
    					if (k == chars[j].length) {
    						indices[0] = i;
    						indices[1] = k;
    						return indices;//找到就立即返回
    					}
    				}
    			}
    		}
    		return indices;
    	}
    step比kmp方式要快一点,大概是基本类型上占些优势。

    找到stepFirstMatch的一个合适用法:生成随机充值码时排出相似字符串!

    //检查相似字符不能同时出现,生成的随机码串调用checkNoSimilarPairs(randomString)即可。
    	private static String[] similarPairsString = {"0O", "1I", "2Z", "VY"};
    	private static char[][] similarPairs = null;
    	static {
    		similarPairs = new char[similarPairsString.length*2][];
    		for(int i = 0; i < similarPairsString.length; i++) {
    			similarPairs[2*i] = similarPairsString[i].toCharArray();
    			similarPairs[2*i + 1] = new StringBuilder(similarPairsString[i]).reverse().toString().toCharArray();
    		}
    	}
    	private boolean checkNoSimilarPairs(String source) {
    		return stepFirstMatch(source.toCharArray(), 0, similarPairs)[0] == -1;
    	}
  • 相关阅读:
    机器学习笔记
    python学习笔记-day8
    python学习笔记-day7
    python学习笔记-day6
    python学习笔记-day5
    python习题
    単語
    bat批处理----copy和xcopy区别
    C#
    VB
  • 原文地址:https://www.cnblogs.com/xingqi/p/2039225.html
Copyright © 2011-2022 走看看