zoukankan      html  css  js  c++  java
  • C语言实现KMP模式匹配算法

    next:

    /*!
     * Description:
     * author scictor <scictor@gmail.com>
     * date 2018/7/4
     */
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    // https://tekmarathon.com/2013/05/14/algorithm-to-find-substring-in-a-string-kmp-algorithm/
    /*What is Partial Match Table?
    It is an array of size (pattern_length+1) where, for each position of i in pattern p, b[i] is defined such that it takes the ‘length of the longest proper suffix of P[1…i-1]’ that matches with the ‘prefix of P’.
    What is longest prefix/suffix match??? Proper prefix is a prefix which is not same as the substring. Recall proper set which is a subset of a set but is not same as the set.
    Why a prefix should match suffix of the pattern? its because when we shift the pattern its the prefix of P which comes towards the suffix. And also the key idea is that if we have successfully matched prefix P[1…i-1] of the pattern with the substring T[j-(i-1)…j-1] of the input string and P(i)!=T(j), then we dont need to reprocess any of the suffix T[j-(i-1)…j-1] since we know this portion of the text string is the prefix of the pattern that we have just matched.
    */
    //"ababacb"
    /**
     * Pre processes the pattern array based on proper prefixes and proper
     * suffixes at every position of the array
     *
     * @param ptrn
     *            word that is to be searched in the search string
     * @return partial match table which indicates
     */
    void kmp_next(const char *pattern, int patternLen, int *next) {
        int i = 0, j = -1;
    
        next[i] = j; // default next[0] = -1
        while (i < patternLen) {
            while (j >= 0 && pattern[i] != pattern[j]) {
                // if there is mismatch consider the next widest border
                // The borders to be examined are obtained in decreasing order from
                //  the values b[i], b[b[i]] etc.
                j = next[j];
            }
            i++;
            j++;
            next[i] = j;
        }
        for(int index = 0; index < patternLen; ++index) printf("%d ", next[index]);
        return;
    }
    
    /**
         * Based on the pre processed array, search for the pattern in the text
         *
         * @param text
         *            text over which search happens
         * @param ptrn
         *            pattern that is to be searched
         */
    
    //int matchIndex[128] = {0};
    
    int kmp_search(const char *text, int textLen, const char *pattern, int patternLen) {
        int i = 0, j = 0;
    
        // initialize new array and preprocess the pattern
        int next[patternLen + 1];
        memset(next, 0x00, sizeof(next));
    
    //    int idx = 0;
    //    memset(matchIndex, 0x00, sizeof(matchIndex));
    
        kmp_next(pattern, patternLen, next);
    
        while (i < textLen) {
            while (j >= 0 && text[i] != pattern[j]) {
                j = next[j];
            }
            i++;
            j++;
    
            // a match is found
            //        if (j == patternLen) {
            //            printf("found substring at index:" + (i - patternLen));
            //            j = next[j];
            //        }
            if (j == patternLen) {
                printf("found substring at index:%d", (i - patternLen));
                //j = next[j];
                //matchIndex[idx++] = i - patternLen;
                return (i - patternLen);
            }
        }
    
    //    for(int k = 0; k < idx; ++k)
    //    {
    //        printf("%d ", matchIndex[k]);
    //    }
        return -1;
    }
    
    /*
    Index         0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16  17  18  19  20  21  22  23
    Text(T)       a b c a b d a b c a b  d  a  b  c  a  b   d   a   b   d   a   b   c
    Pattern(P)    a b c a b d a b c
    PMT (next[i])   -1 0 0 0 1 2 0 1 2 3
     */
    // 4ms
    int kmp(const char *text, int textLen, const char *pattern, int patternLen)
    {
        int *T;
        int i, j;
    
        if (pattern[0] == '')
            return 0;
    
        // Construct the lookup table
        T = (int*) malloc( (patternLen + 1) * sizeof(int) );
        T[0] = -1;
        for (i=0; pattern[i] != ''; i++) {
            T[i+1] = T[i] + 1;
            while (T[i+1] > 0 && pattern[i] != pattern[T[i+1]-1])
                T[i+1] = T[T[i+1]-1] + 1;
        }
    
        for(int k = 0; k < patternLen; ++k) printf("%d ", T[k]);
    
        // Perform the search
        for (i=j=0; text[i] != ''; ) {
            if (j < 0 || text[i] == pattern[j]) {
                ++i, ++j;
                if (pattern[j] == '') {
                    return (i-j);
                }
            }
            else j = T[j];
        }
    
        free(T);
        return -1;
    }
    
    /*const char *kmp_search(const char *text, const char *pattern)
    {
        int *T;
        int i, j;
        const char *result = NULL;
    
        if (pattern[0] == '')
            return text;
    
        // Construct the lookup table
        T = (int*) malloc((strlen(pattern)+1) * sizeof(int) );
        T[0] = -1;
        for (i=0; pattern[i] != ''; i++) {
            T[i+1] = T[i] + 1;
            while (T[i+1] > 0 && pattern[i] != pattern[T[i+1]-1])
                T[i+1] = T[T[i+1]-1] + 1;
        }
    
        // Perform the search
        for (i=j=0; text[i] != ''; ) {
            if (j < 0 || text[i] == pattern[j]) {
                ++i, ++j;
                if (pattern[j] == '') {
                    result = text+i-j;
                    break;
                }
            }
            else j = T[j];
        }
    
        free(T);
        return result;
    }
    */

    nextval:

    void preKmp(char *x, int m, int kmpNext[]) {
       int i, j;
    
       i = 0;
       j = kmpNext[0] = -1;
       while (i < m) {
          while (j > -1 && x[i] != x[j])
             j = kmpNext[j];
          i++;
          j++;
          if (x[i] == x[j])
             kmpNext[i] = kmpNext[j];
          else
             kmpNext[i] = j;
       }
    }
    
    
    // text:y, len: n, match parttern:x, len: m
    int KMP(char *y, int n, char *x, int m) {
       int i, j, kmpNext[m];
    
       /* Preprocessing */
       preKmp(x, m, kmpNext);
    
       /* Searching */
       i = j = 0;
       while (j < n) {
          while (i > -1 && x[i] != y[j])
             i = kmpNext[i];
          i++;
          j++;
          // if (i >= m) {
          // printf("j-i=%d
    ",j - i);
          // i = kmpNext[i];
          // }
           if (i == m) {
                //printf("j-i=%d
    ",j - i);
                return j - i;
                //i = kmpNext[i];
          }
       }
        return -1;
    }
    
    int strStr(char* haystack, char* needle) {
        int needleLen = strlen(needle);
        if(needleLen == 0) return 0;
        int hayLen = strlen(haystack);
        if(hayLen == 0) return -1;
        
        return KMP(haystack, hayLen, needle, needleLen);
    }
  • 相关阅读:
    使用freemarker生成word,步骤详解并奉上源代码
    汉诺塔问题
    java interview
    java 反射
    java 匿名内部类
    java 内部类(转)
    MYSQL和ORACLE的一些区别
    Hibernate总结(转)
    Hibernate的使用
    Arduino LM35温度检测
  • 原文地址:https://www.cnblogs.com/guxuanqing/p/9266333.html
Copyright © 2011-2022 走看看