zoukankan      html  css  js  c++  java
  • KMP模式匹配算法程序(Python,C++,C)

    代码来自维基教科书:Knuth-Morris-Pratt pattern matcher

    Python程序如下:

    # Knuth-Morris-Pratt string matching
    # David Eppstein, UC Irvine, 1 Mar 2002
    
    #from http://code.activestate.com/recipes/117214/
    def KnuthMorrisPratt(text, pattern):
    
        '''Yields all starting positions of copies of the pattern in the text.
    Calling conventions are similar to string.find, but its arguments can be
    lists or iterators, not just strings, it returns all matches, not just
    the first one, and it does not need the whole text in memory at once.
    Whenever it yields, it will have read the text exactly up to and including
    the match that caused the yield.'''
    
        # allow indexing into pattern and protect against change during yield
        pattern = list(pattern)
    
        # build table of shift amounts
        shifts = [1] * (len(pattern) + 1)
        shift = 1
        for pos in range(len(pattern)):
            while shift <= pos and pattern[pos] != pattern[pos-shift]:
                shift += shifts[pos-shift]
            shifts[pos+1] = shift
    
        # do the actual search
        startPos = 0
        matchLen = 0
        for c in text:
            while matchLen == len(pattern) or 
                  matchLen >= 0 and pattern[matchLen] != c:
                startPos += shifts[matchLen]
                matchLen -= shifts[matchLen]
            matchLen += 1
            if matchLen == len(pattern):
                yield startPos

    C++程序如下:

    #include <iostream>
    #include <vector>
    using namespace std;
    
    //----------------------------
    //Returns a vector containing the zero based index of 
    //  the start of each match of the string K in S.
    //  Matches may overlap
    //----------------------------
    vector<int> KMP(string S, string K)
    {
            vector<int> T(K.size() + 1, -1);
    	vector<int> matches;
    
            if(K.size() == 0)
            {
                matches.push_back(0);
                return matches;
            }
    	for(int i = 1; i <= K.size(); i++)
    	{
    		int pos = T[i - 1];
    		while(pos != -1 && K[pos] != K[i - 1]) pos = T[pos];
    		T[i] = pos + 1;
    	}
    
    	int sp = 0;
    	int kp = 0;
    	while(sp < S.size())
    	{
    		while(kp != -1 && (kp == K.size() || K[kp] != S[sp])) kp = T[kp];
    		kp++;
    		sp++;
    		if(kp == K.size()) matches.push_back(sp - K.size());
    	}
    	
    	return matches;
    }

    C程序如下:

    #include<stdio.h>
    #include<string.h>
    #include<stdlib.h>
     
    void computeLPSArray(char *pat, int M, int *lps);
     
    void KMPSearch(char *pat, char *txt)
    {
        int M = strlen(pat);
        int N = strlen(txt);
     
        // create lps[] that will hold the longest prefix suffix values for pattern
        int *lps = (int *)malloc(sizeof(int)*M);
        int j  = 0;  // index for pat[]
     
        // Preprocess the pattern (calculate lps[] array)
        computeLPSArray(pat, M, lps);
     
        int i = 0;  // index for txt[]
        while(i < N)
        {
          if(pat[j] == txt[i])
          {
            j++;
            i++;
          }
     
          if (j == M)
          {
            printf("Found pattern at index %d 
    ", i-j);
            j = lps[j-1];
          }
     
          // mismatch after j matches
          else if(pat[j] != txt[i])
          {
            // Do not match lps[0..lps[j-1]] characters,
            // they will match anyway
            if(j != 0)
             j = lps[j-1];
            else
             i = i+1;
          }
        }
        free(lps); // to avoid memory leak
    }
     
    void computeLPSArray(char *pat, int M, int *lps)
    {
        int len = 0;  // lenght of the previous longest prefix suffix
        int i;
     
        lps[0] = 0; // lps[0] is always 0
        i = 1;
     
        // the loop calculates lps[i] for i = 1 to M-1
        while(i < M)
        {
           if(pat[i] == pat[len])
           {
             len++;
             lps[i] = len;
             i++;
           }
           else // (pat[i] != pat[len])
           {
             if( len != 0 )
             {
               // This is tricky. Consider the example AAACAAAA and i = 7.
               len = lps[len-1];
     
               // Also, note that we do not increment i here
             }
             else // if (len == 0)
             {
               lps[i] = 0;
               i++;
             }
           }
        }
    }
     
    // Driver program to test above function
    int main()
    {
       char *txt = "apurba mandal loves ayoshi loves";
       char *pat = "loves";
       KMPSearch(pat, txt);
       return 0;
    }


  • 相关阅读:
    Dedecms5.7修改文章,不改变发布时间的方法
    dedecms列表页如何调用栏目关键词和描述
    DEDE内容页调用栏目的SEO标题、描述、关键字的方法
    织梦安装过后出现"...www/include/templets/default/index.htm Not Found!"
    DEDE无简略标题时显示完整标题
    Dede调用简略标题_简略标题标签(短标题)
    修改dede提示信息
    交叉栏目实现织梦首页分页
    织梦添加和调用自定义字段的方法
    织梦列表页和内容页调用缩略图的方法
  • 原文地址:https://www.cnblogs.com/tigerisland/p/7564848.html
Copyright © 2011-2022 走看看