zoukankan      html  css  js  c++  java
  • BZOJ3172 单词 Fail树

    题目大意:求一篇论文中每个单词分别在论文中出现多少次。

    本题用AC自动机太慢,应该用Fail树将AC自动机中所有的Fail指针反向得到一个新树,这就是Fail树。对长度为x的字符串a和长度为y的字符串b,如果a是b的子串,则a可能与位于b[0,a],b[0,a+1],b[0,a+2]...b[0,y]中的后缀相等。根据fail指针的定义,只要沿着反向Fail边走,走到的节点所代表的字符串必然存在与a(前缀)相等的后缀。因此,一遍DFS,返回加上子节点的总Cnt值的当前节点的Cnt值,即可。注意,Trie树中,有些节点是多个字符串公用的,因此每次构造Trie树时,都要对每个节点的Cnt++,以等价于此处存在多个字符串。

    #include <cstdio>
    #include <cstring>
    #include <cassert>
    #include <algorithm>
    #include <cmath>
    #include <queue>
    #include <vector>
    using namespace std;
    
    const int MAX_CHAR = 26, MAX_LEN = 1e6 + 10, MAX_STR = 210;
    
    struct FailTree
    {
    #define Root _nodes[0]
    #define Org(x) x - 'a'
    
        struct Node;
        struct Edge;
    
        struct Node
        {
            Node *Next[MAX_CHAR], *Fail;
            int Cnt;
            Edge *Head;
            Node() :Cnt(0), Fail(NULL), Head(NULL) { memset(Next, NULL, sizeof(Next)); }
        };
        vector<Node*> _nodes, Tail;
    
        struct Edge
        {
            Node *To;
            Edge *Next;
            Edge(Node *to, Edge *next):To(to),Next(next){}
        };
        vector<Edge*> _edges;
    
        FailTree()
        {
            _nodes.push_back(new Node());
        }
    
        void AddEdge(Node *from, Node *to)
        {
            Edge *e = new Edge(to, from->Head);
            from->Head = e;
            _edges.push_back(e);
        }
    
        Node *BuildTrie(char *s)
        {
            int len = strlen(s);
            Node *cur = Root;
            for (int i = 0; i < len; i++)
            {
                if (!cur->Next[Org(s[i])])
                    _nodes.push_back(cur->Next[Org(s[i])] = new Node());
                cur = cur->Next[Org(s[i])];
                cur->Cnt++;
            }
            return cur;
        }
    
        void Insert(char *s)
        {
            Tail.push_back(BuildTrie(s));
        }
    
        void SetFail()
        {
            static queue<Node*> q;
            q.push(Root);
            while (!q.empty())
            {
                Node *cur = q.front();
                q.pop();
                for (int i = 0; i < MAX_CHAR; i++)
                {
                    if (cur->Next[i])
                    {
                        Node *temp = cur->Fail;
                        while (temp)
                        {
                            if (temp->Next[i])
                            {
                                cur->Next[i]->Fail = temp->Next[i];
                                AddEdge(temp->Next[i], cur->Next[i]);
                                break;
                            }
                            temp = temp->Fail;
                        }
                        if (!temp)
                        {
                            cur->Next[i]->Fail = Root;
                            AddEdge(Root, cur->Next[i]);
                        }
                        q.push(cur->Next[i]);
                    }
                }
            }
        }
    
        int Dfs(Node *u)
        {
            for (Edge *e = u->Head; e; e = e->Next)
                u->Cnt += Dfs(e->To);
            return u->Cnt;
        }
    }g;
    
    int main()
    {
    #ifdef _DEBUG
        freopen("c:\noi\source\input.txt", "r", stdin);
    #endif
        int tot;
        char s[MAX_LEN];
        scanf("%d", &tot);
        for(int i=0; i<tot; i++)
        {
            scanf("%s", s);
            g.Insert(s);
        }
        g.SetFail();
        g.Dfs(g.Root);
        for (int i = 0; i < tot; i++)
            printf("%d
    ", g.Tail[i]->Cnt);
        return 0;
    }
    View Code

     或者不用反向Fail指针也可以,站在后缀上去找其所包含的前缀。这样编程复杂度低一些。

    #include <cstdio>
    #include <cstring>
    #include <vector>
    #include <queue>
    #include <cassert>
    #include <cmath>
    #include <algorithm>
    using namespace std;
    
    const int MAX_CHAR = 26, MAX_NODE = 5e5 + 1, MAX_LEN = 1e6 + 1;
    
    struct Node
    {
        int Sum, Id, Cnt;
        Node *Fail;
        Node *Next[MAX_CHAR];
    }Nodes[MAX_NODE];
    int Nodes_Cnt = 1;
    char P[MAX_LEN];
    Node *WordNode[MAX_NODE];
    
    int Ord(char c)
    {
        return c - 'a';
    }
    
    Node *NewNode()
    {
        return ++Nodes_Cnt + Nodes;
    }
    
    Node *Root()
    {
        return Nodes + 1;
    }
    
    void BuildTrie(char *s, int id)
    {
        Node *cur = Root();
        int len = strlen(s);
        for (int i = 0; i < len; i++)
        {
            if (cur->Next[Ord(s[i])])
                cur = cur->Next[Ord(s[i])];
            else
                cur = cur->Next[Ord(s[i])] = NewNode();
        }
        cur->Sum++;
        cur->Id = id;
        WordNode[id] = cur;
    }
    
    void SetFail()
    {
        queue<Node*> q;
        q.push(Root());
        while (!q.empty())
        {
            Node *cur = q.front();
            q.pop();
            for (int i = 0; i < MAX_CHAR; i++)
            {
                if (cur->Next[i])
                {
                    Node *temp = cur->Fail;
                    while (temp)
                    {
                        if (temp->Next[i])
                        {
                            cur->Next[i]->Fail = temp->Next[i];
                            break;
                        }
                        temp = temp->Fail;
                    }
                    if (!temp)
                    {
                        cur->Next[i]->Fail = Root();
                    }
                    q.push(cur->Next[i]);
                }
            }
        }
    }
    
    int Dfs1(Node *cur)
    {
        int cnt = cur->Sum;
        for (int i = 0; i < MAX_CHAR; i++)
            if (cur->Next[i])
                cnt += Dfs1(cur->Next[i]);
        for (Node *temp = cur; temp != Root(); temp = temp->Fail)
            if (temp->Sum)
                temp->Cnt+=cnt;
        //cur->Cnt += cnt;
        return cnt;
    }
    
    int main()
    {
        //freopen("c:\noi\source\input.txt", "r", stdin);
        int totP;
        scanf("%d", &totP);
        for (int i = 0; i < totP; i++)
        {
            scanf("%s", P);
            BuildTrie(P, i);
        }
        SetFail();
        Dfs1(Root());
        for (int i = 0; i < totP; i++)
            printf("%d
    ", WordNode[i]->Cnt);
        return 0;
    }
    View Code
  • 相关阅读:
    centos出现“FirewallD is not running”怎么办
    百度编辑器(Ueditor)最新版(1.4.3.3)插入锚点失败原因分析及BUG修复
    centos rm -rf 恢复删除的文件
    php实现粘贴截图并完成上传功能
    微信网页授权java实现
    JAVA使用POI读取EXCEL文件的简单model
    java读取excel文件数据
    java文件操作(读流)
    oracle 10g正则表达式 REGEXP_LIKE 用法
    Oracle正则表达式函数:regexp_like、regexp_substr、regexp_instr、regexp_replace
  • 原文地址:https://www.cnblogs.com/headboy2002/p/8453926.html
Copyright © 2011-2022 走看看