zoukankan      html  css  js  c++  java
  • hdu 1277 全文检索 (直接映射查找 || 自动机)

    Problem - 1277

      无聊做水题的时候发现的一道题目。这道题第一反应可以用自动机来解决。当然,条件是各种限制,从而导致可以用直接映射标记的方法来搜索。具体的做法就像RK算法一样,将字符串hash成一个数,这里每一个关键字前四位都是不同的,这样就有利于hash搜索了。当前四位匹配的时候,就可以搜索整个串是否完全匹配。这整个的复杂度大概是O(n*m),n是全文测长度,m是关键字的长度。

    自动机代码:

      1 #include <cstdio>
      2 #include <iostream>
      3 #include <algorithm>
      4 #include <cstring>
      5 #include <string>
      6 #include <queue>
      7 #include <set>
      8 
      9 using namespace std;
     10 
     11 const int kind = 10;
     12 struct Node {
     13     Node *next[kind];
     14     Node *fail;
     15     string label;
     16     Node() {
     17         memset(next, 0, sizeof(next));
     18         fail = NULL;
     19         label = "";
     20     }
     21     Node(char *tmp) {
     22         memset(next, 0, sizeof(next));
     23         fail = NULL;
     24         label = tmp;
     25     }
     26 } ;
     27 
     28 struct Trie {
     29     Node *Root;
     30     Trie() { Root = new Node();}
     31 
     32     void insert(char *s, char *label) {
     33         Node *p = Root;
     34         while (*s) {
     35             int idx = *s - '0';
     36             if (!p->next[idx]) {
     37                 p->next[idx] = new Node();
     38                 p->next[idx]->fail = p;
     39             }
     40             p = p->next[idx];
     41             s++;
     42         }
     43         p->label = label;
     44     }
     45     void build() {
     46         queue<Node *> Q;
     47         while (!Q.empty()) Q.pop();
     48         Q.push(Root);
     49         Root->fail = NULL;
     50         Node *cur, *p;
     51         while (!Q.empty()) {
     52             cur = Q.front();
     53             Q.pop();
     54             for (int i = 0; i < kind; i++) {
     55 //                cout << cur->next[i] << ' ';
     56                 if (cur->next[i]) {
     57 //                    cout << i << ' ';
     58                     Q.push(cur->next[i]);
     59                     p = cur->fail;
     60                     while (p) {
     61                         if (p->next[i]) {
     62                             cur->next[i]->fail = p->next[i];
     63                             break;
     64                         }
     65                         p = p->fail;
     66                     }
     67                     if (!p) cur->next[i]->fail = Root;
     68                 }
     69             }
     70         }
     71     }
     72     void query(char *s) {
     73         set<string> vis;
     74         bool pnt = false;
     75         Node *cur = Root;
     76         while (*s) {
     77             int idx = *(s++) - '0';
     78             while (cur && !cur->next[idx]) cur = cur->fail;
     79             cur = cur ? cur->next[idx] : Root;
     80             if (vis.find(cur->label) != vis.end()) continue;
     81             if (cur->label.length()) {
     82                 if (!pnt) {
     83                     printf("Found key:");
     84                     pnt = true;
     85                 }
     86                 cout << ' ' << cur->label;
     87                 vis.insert(cur->label);
     88             }
     89         }
     90         if (!pnt) puts("No key can be found !");
     91         cout << endl;
     92     }
     93 } trie;
     94 
     95 char str[66666], buf[111];
     96 
     97 int main() {
     98 //    freopen("in", "r", stdin);
     99     int n, m;
    100     while (cin >> n >> m) {
    101         trie = Trie();
    102         gets(buf);
    103         str[0] = 0;
    104         while (n-- && gets(buf)) strcat(str, buf);
    105         gets(buf);
    106         while (m-- && gets(buf)) {
    107             char *p = buf;
    108             while (*(p++) != ']') ;
    109             *(p++) = 0;
    110             while (*p == ' ') p++;
    111             trie.insert(p, buf);
    112 //            cout << p << ' ' << buf << " inserted!"<< endl;
    113         }
    114         trie.build();
    115 //        cout << "built!!" << endl;
    116         trie.query(str);
    117     }
    118     return 0;
    119 }
    View Code

    UPD:

     1 #include <cstdio>
     2 #include <iostream>
     3 #include <algorithm>
     4 #include <cstring>
     5 #include <string>
     6 
     7 using namespace std;
     8 
     9 const int HASHLEN = 5;
    10 const int M = 111111;
    11 const int N = 11111;
    12 string label[N], key[N];
    13 bool vis[N];
    14 int id[M];
    15 char str[6 * N], buf[N];
    16 
    17 bool check(int t, char *p) {
    18     int len = key[t].length();
    19     for (int i = 0; i < len; i++) {
    20         if (key[t][i] != p[i]) return false;
    21     }
    22     return true;
    23 }
    24 
    25 void work(char *p) {
    26     bool pnt = false;
    27     char *q = p;
    28     int cur = 0, ep = 10;
    29     for (int i = 0; i < HASHLEN - 1; i++) cur = cur * 10 + *(q++) - '0', ep *= 10;
    30     while (*q && *p) {
    31 //        cout << cur << endl;
    32         cur = cur * 10 + *q - '0';
    33         cur %= ep;
    34         int t = id[cur];
    35         if (t != -1 && !vis[t]) {
    36             if (check(t, p)) {
    37                 if (!pnt) printf("Found key:");
    38                 pnt = vis[t] = true;
    39                 cout << ' ' << label[t];
    40             }
    41         }
    42         p++, q++;
    43     }
    44     if (!pnt) printf("No key can be found !");
    45     cout << endl;
    46 }
    47 
    48 int main() {
    49 //    freopen("in", "r", stdin);
    50     int n, m;
    51     while (cin >> n >> m) {
    52         memset(id, -1, sizeof(id));
    53         memset(vis, 0, sizeof(vis));
    54         gets(buf);
    55         str[0] = 0;
    56         while (n-- && gets(buf)) strcat(str, buf);
    57         gets(buf);
    58         for (int i = 0; i < m; i++) {
    59             gets(buf);
    60             char *p = buf;
    61             while (*p && *p != ']') p++;
    62             *(++p) = 0;
    63             p++;
    64             while (*p && *p == ' ') p++;
    65             label[i] = buf;
    66             key[i] = p;
    67 //            cout << label[i] << ' ' << key[i] << endl;
    68             int cid = 0;
    69             for (int i = 0; i < HASHLEN; i++) cid = cid * 10 + p[i] - '0';
    70 //            cout << cid << endl;
    71             if (id[cid] != -1) { puts("shit!"); while (1) ;}
    72             id[cid] = i;
    73         }
    74         work(str);
    75     }
    76     return 0;
    77 }
    View Code

      不知道为什么这个代码能过G++但是C++过不了,应该是代码里面有bug。另外,数据似乎跟描述的有点出入,说好的不同key值的前四位不会相同,可是写了句来试它的数据时就发现被骗了,所以后来用了5位,过了!

    ——written by Lyon

  • 相关阅读:
    贝叶斯思想的实质之我见
    强化学习基础概念理解
    Thinkpad x200用户只能放弃生化危机5(PC版), 希望能全速运行星际争霸2!
    This is it
    今天自己掏腰包去买联通iPhone有几位?
    今天是我的生日:)
    2009已经到来 / 2009 Just the Beginning
    好评如潮的PS3游戏《抵抗2 Resistance2》你玩了吗?
    生化危机5 / BIOHAZARD5 简直就是一款完美的印钞机?(+2009.4.9)
    一部好电影《第九区 District 9》
  • 原文地址:https://www.cnblogs.com/LyonLys/p/hdu_1277_Lyon.html
Copyright © 2011-2022 走看看