next:
/*! * Description: * author scictor <scictor@gmail.com> * date 2018/7/4 */ #include <stdio.h> #include <stdlib.h> #include <string.h> // https://tekmarathon.com/2013/05/14/algorithm-to-find-substring-in-a-string-kmp-algorithm/ /*What is Partial Match Table? It is an array of size (pattern_length+1) where, for each position of i in pattern p, b[i] is defined such that it takes the ‘length of the longest proper suffix of P[1…i-1]’ that matches with the ‘prefix of P’. What is longest prefix/suffix match??? Proper prefix is a prefix which is not same as the substring. Recall proper set which is a subset of a set but is not same as the set. Why a prefix should match suffix of the pattern? its because when we shift the pattern its the prefix of P which comes towards the suffix. And also the key idea is that if we have successfully matched prefix P[1…i-1] of the pattern with the substring T[j-(i-1)…j-1] of the input string and P(i)!=T(j), then we dont need to reprocess any of the suffix T[j-(i-1)…j-1] since we know this portion of the text string is the prefix of the pattern that we have just matched. */ //"ababacb" /** * Pre processes the pattern array based on proper prefixes and proper * suffixes at every position of the array * * @param ptrn * word that is to be searched in the search string * @return partial match table which indicates */ void kmp_next(const char *pattern, int patternLen, int *next) { int i = 0, j = -1; next[i] = j; // default next[0] = -1 while (i < patternLen) { while (j >= 0 && pattern[i] != pattern[j]) { // if there is mismatch consider the next widest border // The borders to be examined are obtained in decreasing order from // the values b[i], b[b[i]] etc. j = next[j]; } i++; j++; next[i] = j; } for(int index = 0; index < patternLen; ++index) printf("%d ", next[index]); return; } /** * Based on the pre processed array, search for the pattern in the text * * @param text * text over which search happens * @param ptrn * pattern that is to be searched */ //int matchIndex[128] = {0}; int kmp_search(const char *text, int textLen, const char *pattern, int patternLen) { int i = 0, j = 0; // initialize new array and preprocess the pattern int next[patternLen + 1]; memset(next, 0x00, sizeof(next)); // int idx = 0; // memset(matchIndex, 0x00, sizeof(matchIndex)); kmp_next(pattern, patternLen, next); while (i < textLen) { while (j >= 0 && text[i] != pattern[j]) { j = next[j]; } i++; j++; // a match is found // if (j == patternLen) { // printf("found substring at index:" + (i - patternLen)); // j = next[j]; // } if (j == patternLen) { printf("found substring at index:%d", (i - patternLen)); //j = next[j]; //matchIndex[idx++] = i - patternLen; return (i - patternLen); } } // for(int k = 0; k < idx; ++k) // { // printf("%d ", matchIndex[k]); // } return -1; } /* Index 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 Text(T) a b c a b d a b c a b d a b c a b d a b d a b c Pattern(P) a b c a b d a b c PMT (next[i]) -1 0 0 0 1 2 0 1 2 3 */
// 4ms int kmp(const char *text, int textLen, const char *pattern, int patternLen) { int *T; int i, j; if (pattern[0] == '