zoukankan      html  css  js  c++  java
  • [DataStructure] AC 自动机(Aho–Corasick Automata)

    KMP(Knuth-Morris-Pratt)算法

    • 通常用来解决单个字符串快速匹配问题(时间复杂度为(O(N+M)))

    • 通过初始化fail数组使得匹配失败时按fail数组回退即可无需重复匹配

    • 扩展:

      • 扩展kmp算法(Extended KMP)

    AC自动机(Aho–Corasick Automata)

    • 多个字符串快速匹配,是trie(字典树)和kmp的结合
    • 其主要流程如下:
      1. 选取待匹配字符串构建trie
      2. 利用BFS构建fail指针(即匹配失败的回退点)

    472. 连接词 - 力扣(LeetCode) (leetcode-cn.com)

    class TrieNode{
        public:
            TrieNode(int len,char value,int originIndex):fail(nullptr),value(value),len(len),originIndex(originIndex){
            }
            unordered_map<char,TrieNode*> children;
            int len=0;
            char value;
            int originIndex;
            TrieNode * fail;
    };
    
    class Trie{
        private:
            TrieNode * root;
        public:
            Trie():root(new TrieNode(0,' ',-1)){
            }
            void emplace(const string & s,int index){
                auto p = root;
                auto ptr = 0;
                while (ptr<s.size()){
                    if (p->children.find(s[ptr])==p->children.end()){
                        p->children[s[ptr]] = new TrieNode(-(ptr+1),s[ptr],-1);
                    }
                    p=p->children[s[ptr++]];
                }
                p->len = s.size();
                p->originIndex = index;
            }
            void buildACAutomata(){
                queue<TrieNode *> q;
                for (auto const &[key,value]:root->children){
                    q.emplace(value);
                    value->fail = root;
                }
                while (!q.empty()){
                    auto cur = q.front();
                    q.pop();
                    for (auto const &[key,value]:cur->children){
                        auto failto = cur->fail;
                        while (true){
                            if (failto==nullptr){
                                value->fail = root;
                                break;
                            }else if (failto->children.find(key)!=failto->children.end() && value->value==key){
                                value->fail = failto->children[key];
                                break;
                            }else{
                                failto=failto->fail;
                            }
                        };
                        q.emplace(value);
                    }   
                }
            }
            bool match(const string &s,int index){
                if (s.size()==0) return false;
                vector<int> matchSize(s.size());
                auto p = root;
                for (auto i=0;i<s.size();++i){
                    // 下一个节点不匹配,则需要回退
                    if (p->children.find(s[i])==p->children.end() || p->children[s[i]]->originIndex==index){
                        if (p->fail==nullptr){ // 无法退回,返回false
                            return false;
                        }else{
                            p=p->fail; // 回退一个节点
                            while (p!=nullptr && (p->children.find(s[i])==p->children.end() || matchSize[i-abs(p->len)-1]==0)){ 
                                //判断回退后是否可匹配,否则继续回退
                                p=p->fail;
                            }
                            if (p==nullptr) return false;
                        }
                    };
                    p=p->children[s[i]];
                    if (p==nullptr) return false;
                    if (p->len>0){
                        matchSize[i] = p->len;
                    }
                    auto fa = p->fail;
                    while (fa!=root){
                        if (fa->len>0 && matchSize[i-fa->len]>0){
                            matchSize[i] = fa->len;
                            break;
                        }
                        fa=fa->fail;
                    }
                }
                auto last = matchSize.size()-1;
                return matchSize[last]>0;
            }
    };
    
    class Solution {
    public:
        vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {
            Trie trie;
            for (auto i=0;i<words.size();++i){
                trie.emplace(words[i],i);
            }
            trie.buildACAutomata();
            vector<string> ans;
            for (auto i=0;i<words.size();++i){
                if (trie.match(words[i],i)){
                    ans.emplace_back(words[i]);
                }
            }
            return ans;
        }
    };
    
  • 相关阅读:
    [CodeForces
    [CodeChef]RIN(最小割)
    [Bzoj3894]文理分科(最小割)
    [Poj3469]Dual Core CPU(最小割)
    MySQL- 锁(3)
    MySQL- 锁(1)
    MySQL- 锁(2)
    MySQL-中文全文检索
    Solr
    多线程编程-之并发编程:同步容器
  • 原文地址:https://www.cnblogs.com/minskiter/p/14699677.html
Copyright © 2011-2022 走看看