zoukankan      html  css  js  c++  java
  • 【模版】(旧)AC自动机

    AC自动机

    (注:此博不是讲解,只是备忘用的)

    多模式串匹配的好东西~

    以trie树为保存字符串的载体,在树上建立失配边来进行模式串间的转移,以达到快速匹配的目的。

    以 HDU:2222  Keywords Search 为例:

    (但有些部分代码设计不太合理,可根据题微调)

    (此处采用了建trie图的思想加速,但实际还可以用last数组再度加速)

    #include <queue>
    #include <cstdio>
    #include <cstring>
    #include <iostream>
    #define MSZ (1000000)
    #define SZ (50)
    #define SGSZ (26)
    using namespace std;
    
    struct TRIE
    {
        char cc;
        int fail, last;
        int get, cnt;
        int next[SGSZ];
    };
    
    int cTr;
    TRIE Tr[MSZ];
    // * those are my magic
    #define np(p, id) (Tr[p].next[id])
    #define fp(p) (Tr[p].fail)
    #define lp(p) (Tr[p].last)
    #define ct(p) (Tr[p].cnt)
    #define gt(p) (Tr[p].get)
    
    int T, N;
    
    // * get a char's index
    inline int index(char cc)
    {
        return cc - 'a';
    }
    
    inline char opdex(int id)
    {
        return id + 'a';
    }
    
    // * clear the Trie
    void ClearTr()
    {
        for (int i = 0; i <= cTr; i++)
            Tr[i] = (TRIE){0, 0, 0, 0, 0, 0};
        return;
    }
    
    // * add a point into Trie
    int AddPTr(int id)
    {
        Tr[++cTr].cc = opdex(id);
        return cTr;
    }
    
    // * add a string into Trie
    void AddSTr(char s[])
    {
        int p = 0, l = strlen(s);
        for (int i = 0; i < l; i++)
        {
            int id = index(s[i]);
            if (!np(p, id))
                np(p, id) = AddPTr(id);
            p = np(p, id); // * Magic part , careful !
        }
        ct(p)++, lp(p) = p;
        return;
    }
    
    void FailAc()
    {
        queue<int> Q;
        for (int id = 0; id < SGSZ; id++)
            if (np(0, id))
                Q.push(np(0, id));
        while (!Q.empty())
        {
            int p = Q.front();
            Q.pop();
            for (int id = 0; id < SGSZ; id++)
            {
                int tp = fp(p);
                while (tp && !np(tp, id))
                    tp = fp(tp);
                if (np(p, id))
                {
                    Q.push(np(p, id));
                    fp(np(p, id)) = np(tp, id);
                    lp(np(p, id)) = ct(np(tp, id)) ? np(tp, id) : lp(np(tp, id));
                }
                else
                    np(p, id) = np(tp, id);
            }
        }
        return;
    }
    
    int FindAc(char ms[])
    {
        int p = 0, l = strlen(ms), sum = 0;
        for (int i = 0; i < l; i++)
        {
            int id = index(ms[i]);
            p = np(p, id);
            int tp = p;
            while (tp)
            {
                if (gt(tp))
                    break;
                gt(tp) = 1;
                if (ct(tp))
                    sum += ct(tp);
                tp = lp(tp);
            }
        }
        return sum;
    }
    
    int main()
    {
        char s[SZ], ms[MSZ];
        scanf("%d", &T);
        while (T--)
        {
            scanf("%d", &N);
            ClearTr();
            for (int i = 1; i <= N; i++)
            {
                scanf("%s", s);
                AddSTr(s);
            }
            FailAc();
            scanf("%s", ms);
            printf("%d", FindAc(ms));
        }
        return 0;
    }

     寻找失配边的fail函数:

     ...

    void FailAc()
    {
        queue<int> Q;
        for (int id = 0; id < SGSZ; id++)
            if (np(0, id))
                Q.push(np(0, id));
        while (!Q.empty())
        {
            int p = Q.front();
            Q.pop();
            for (int id = 0; id < SGSZ; id++)
            {
                int tp = fp(p);
                while (tp && !np(tp, id))
                    tp = fp(tp);
                if (np(p, id))
                {
                    Q.push(np(p, id));
                    fp(np(p, id)) = np(tp, id);
                    lp(np(p, id)) = ct(np(tp, id)) ? np(tp, id) : lp(np(tp, id));
                }
                else
                    np(p, id) = np(tp, id);
            }
        }
        return;
    }

     寻找匹配串数量的find函数:

    包括fail函数,一定要记住一些明显的优化,比如trie图转化,get标记,last记录上个单词节点。

    int FindAc(char ms[])
    {
        int p = 0, l = strlen(ms), sum = 0;
        for (int i = 0; i < l; i++)
        {
            int id = index(ms[i]);
            p = np(p, id);
            int tp = p;
            while (tp)
            {
                if (gt(tp))
                    break;
                gt(tp) = 1;
                if (ct(tp))
                    sum += ct(tp);
                tp = lp(tp);
            }
        }
        return sum;
    }

     清空和加入节点:(以下是Tire树操作)

     这是一种很蠢的写法,实际中节点可以直接删除(如用memset清空,不过可能会卡)。

    // * add a point into Trie
    int AddPTr(int id)
    {
        Tr[++cTr].cc = opdex(id);
        return cTr;
    }
    
    // * add a string into Trie
    void AddSTr(char s[])
    {
        int p = 0, l = strlen(s);
        for (int i = 0; i < l; i++)
        {
            int id = index(s[i]);
            if (!np(p, id))
                np(p, id) = AddPTr(id);
            p = np(p, id); // * Magic part , careful !
        }
        ct(p)++, lp(p) = p;
        return;
    }

     将字符转化成编号和反转化的函数:

     这是为那些写trie树却不会指针的同学们准备的,将每个字符转化成唯一对应的下标。

    // * get a char's index
    inline int index(char cc)
    {
        return cc - 'a';
    }
    
    inline char opdex(int id)
    {
        return id + 'a';
    }

    宏黑魔法++。

     

     

  • 相关阅读:
    SqlServer 查看数据库中所有存储过程
    SqlServer 查看数据库中所有视图
    SqlServer 查询表的详细信息
    SqlServer 遍历修改字段长度
    net core 操作Redis
    Tuning SharePoint Workflow Engine
    Open With Explorer
    Download language packs for SharePoint 2013
    Change Maximum Size For SharePoint List Template when Saving
    Six ways to store settings in SharePoint
  • 原文地址:https://www.cnblogs.com/Ztraveler/p/6933512.html
Copyright © 2011-2022 走看看