zoukankan      html  css  js  c++  java
  • HDU 1277 全文检索

    全文检索

    Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 65536/32768 K (Java/Others)
    Total Submission(s): 1048    Accepted Submission(s): 324


    Problem Description
    我们大家经常用google检索信息,但是检索信息的程序是很困难编写的;现在请你编写一个简单的全文检索程序。
    问题的描述是这样的:给定一个信息流文件,信息完全有数字组成,数字个数不超过60000个,但也不少于60个;再给定一个关键字集合,其中关键字个数不超过10000个,每个关键字的信息数字不超过60个,但也不少于5个;两个不同的关键字的前4个数字是不相同的;由于流文件太长,已经把它分成多行;请你编写一个程序检索出有那些关键字在文件中出现过。
     
    Input
    第一行是两个整数M,N;M表示数字信息的行数,N表示关键字的个数;接着是M行信息数字,然后是一个空行;再接着是N行关键字;每个关键字的形式是:[Key No. 1] 84336606737854833158。
     
    Output
    输出只有一行,如果检索到有关键字出现,则依次输出,但不能重复,中间有空格,形式如:Found key: [Key No. 9] [Key No. 5];如果没找到,则输出形如:No key can be found !。
     
    Sample Input
    20 10
    646371829920732613433350295911348731863560763634906583816269
    637943246892596447991938395877747771811648872332524287543417
    420073458038799863383943942530626367011418831418830378814827
    679789991249141417051280978492595526784382732523080941390128
    848936060512743730770176538411912533308591624872304820548423
    057714962038959390276719431970894771269272915078424294911604
    285668850536322870175463184619212279227080486085232196545993
    274120348544992476883699966392847818898765000210113407285843
    826588950728649155284642040381621412034311030525211673826615
    398392584951483398200573382259746978916038978673319211750951
    759887080899375947416778162964542298155439321112519055818097
    642777682095251801728347934613082147096788006630252328830397
    651057159088107635467760822355648170303701893489665828841446
    069075452303785944262412169703756833446978261465128188378490
    310770144518810438159567647733036073099159346768788307780542
    503526691711872185060586699672220882332373316019934540754940
    773329948050821544112511169610221737386427076709247489217919
    035158663949436676762790541915664544880091332011868983231199
    331629190771638894322709719381139120258155869538381417179544
    000361739177065479939154438487026200359760114591903421347697
    [Key No. 1] 934134543994403697353070375063
    [Key No. 2] 261985859328131064098820791211
    [Key No. 3] 306654944587896551585198958148
    [Key No. 4]338705582224622197932744664740
    [Key No. 5] 619212279227080486085232196545
    [Key No. 6]333721611669515948347341113196
    [Key No. 7] 558413268297940936497001402385
    [Key No. 8] 212078302886403292548019629313
    [Key No. 9] 877747771811648872332524287543
    [Key No. 10] 488616113330539801137218227609
     
    Sample Output
    Found key: [Key No. 9] [Key No. 5]
    题目大意:给定一段长数字串和一组短的数字串,问哪些短串在长串中出现过。
    解题方法:AC自动机。
    #include <iostream>
    #include <stdio.h>
    #include <string.h>
    #include <queue>
    #include <algorithm>
    #include <stdlib.h>
    using namespace std;
    
    typedef struct node
    {
        int id;
        node *fail;
        node *next[10];
        node()
        {
            id = 0;
            fail = NULL;
            memset(next, 0, sizeof(next));
        }
    }TreeNode;
    
    int res[10005];
    int nCount = 0;
    bool flag = false;
    
    void Insert(TreeNode *pRoot, char Substr[], int id)
    {
        int nLen = strlen(Substr);
        TreeNode *p = pRoot;
        for (int i = 0; i < nLen; i++)
        {
            int index = Substr[i] - '0';
            if (p->next[index] == NULL)
            {
                p->next[index] = new TreeNode;
            }
            p = p->next[index];
        }
        p->id = id;
    }
    
    void BuildAC(TreeNode *pRoot)
    {
        queue<TreeNode*> Queue;
        Queue.push(pRoot);
        while(!Queue.empty())
        {
            TreeNode *p = Queue.front();
            Queue.pop();
            for (int i = 0; i < 10; i++)
            {
                if (p->next[i] != NULL)
                {
                    if (p == pRoot)
                    {
                        p->next[i]->fail = pRoot;
                    }
                    else
                    {
                        TreeNode *temp = p->fail;
                        while(temp != NULL)
                        {
                            if (temp->next[i] != NULL)
                            {
                                p->next[i]->fail = temp->next[i];
                                break;
                            }
                            temp = temp->fail;
                        }
                        if (temp == NULL)
                        {
                            p->next[i]->fail = pRoot;
                        }
                    }
                    Queue.push(p->next[i]);
                }
            }
        }
    }
    
    void Query(TreeNode *pRoot, char str[])
    {
        TreeNode *p = pRoot;
        int nLen = strlen(str);
        for (int i = 0; i < nLen; i++)
        {
            int index = str[i] - '0';
            while(p != pRoot && p->next[index] == NULL)
            {
                p = p->fail;
            }
            p = p->next[index];
            if (p == NULL)
            {
                p = pRoot;
            }
            TreeNode *temp = p;
            while(temp != pRoot && temp->id != -1)
            {
                if (temp->id > 0)
                {
                    res[nCount++] = temp->id;
                    temp->id = -1;
                    flag = true;
                }
                temp = temp->fail;
            }
        }
    }
    
    void DeleteNode(TreeNode *pRoot)
    {
        for (int i = 0; i < 10; i++)
        {
            if (pRoot != NULL)
            {
                DeleteNode(pRoot->next[i]);
            }
        }
        delete pRoot;
    }
    
    int main()
    {
        int m, n;
        scanf("%d%d", &m, &n);
        char temp[105];
        char str[60001];
        memset(str, 0, sizeof(str));
        TreeNode *pRoot = new TreeNode;
        for (int i = 0; i < m; i++)
        {
            scanf("%s", temp);
            strcat(str, temp);
        }
        int num;
        for (int i = 0; i < n; i++)
        {
            while(1)
            {
                char ch = getchar();
                if (ch == ']')
                {
                    getchar();
                    break;
                }
            }
            scanf("%s", temp);
            Insert(pRoot, temp, i + 1);
        }
        BuildAC(pRoot);
        Query(pRoot, str);
        if (flag)
        {
            printf("Found key: ");
            for (int i = 0; i < nCount; i++)
            {
                printf(i == nCount - 1 ? "[Key No. %d]
    " : "[Key No. %d] ", res[i]);
            }
        }
        else
        {
            printf( "No key can be found !
    " );     
        }
        DeleteNode(pRoot);
        return 0;
    }
  • 相关阅读:
    20.12.2 leetcode7
    20.12.1 leetcode34
    20.11.30 leetcode767
    20.11.29 leetcode976
    Codeforces632E 选择/小偷与商店 背包DP
    魔法少女 DP NG放的水
    逆反的01串 模拟 NG放的水
    最大数maxnumber bzoj1012 JSOI2008 单调队列
    组合数问题 vijos2006 NOIP2016 D2T1 杨辉三角 排列组合 前缀和
    信息传递 vijos1979 NOIP2015D1T2 强连通分量 tarjan模版题
  • 原文地址:https://www.cnblogs.com/lzmfywz/p/3184924.html
Copyright © 2011-2022 走看看