zoukankan      html  css  js  c++  java
  • POJ3294--Life Forms 后缀数组+二分答案 大于k个字符串的最长公共子串

                                                                              Life Forms
    Time Limit: 5000MS   Memory Limit: 65536K
    Total Submissions: 10800   Accepted: 2967

    Description

    You may have wondered why most extraterrestrial life forms resemble humans, differing by superficial traits such as height, colour, wrinkles, ears, eyebrows and the like. A few bear no human resemblance; these typically have geometric or amorphous shapes like cubes, oil slicks or clouds of dust.

    The answer is given in the 146th episode of Star Trek - The Next Generation, titled The Chase. It turns out that in the vast majority of the quadrant's life forms ended up with a large fragment of common DNA.

    Given the DNA sequences of several life forms represented as strings of letters, you are to find the longest substring that is shared by more than half of them.

    Input

    Standard input contains several test cases. Each test case begins with 1 ≤ n ≤ 100, the number of life forms. n lines follow; each contains a string of lower case letters representing the DNA sequence of a life form. Each DNA sequence contains at least one and not more than 1000 letters. A line containing 0 follows the last test case.

    Output

    For each test case, output the longest string or strings shared by more than half of the life forms. If there are many, output all of them in alphabetical order. If there is no solution with at least one letter, output "?". Leave an empty line between test cases.

    Sample Input

    3
    abcdefg
    bcdefgh
    cdefghi
    3
    xxx
    yyy
    zzz
    0

    Sample Output

    bcdefg
    cdefgh
    
    ?

    题意: n个字符串, 求大于n/2个字符串的最长子串。 如果有多个按字典序输出。

    大致思路:首先把所有字符串用不相同的一个字符隔开(用同一个字符隔开wa了好久), 这里我是用数字来隔开的。
    然后依次求sa,lcp。 我们可以二分答案的长度, 对于长度x,我们可以把 后缀进行分组(lcp[i] < x时 隔开), 然后对于每一组判断有多少个字符串出现,如果大于n/2说明符合。。对于字典序就不用排序了,,因为我们就是按照sa数组来遍历lcp的。。所以直接得到的答案就是字典序从小到大。
      1 #include <set>
      2 #include <map>
      3 #include <cmath>
      4 #include <ctime>
      5 #include <queue>
      6 #include <stack>
      7 #include <cstdio>
      8 #include <string>
      9 #include <vector>
     10 #include <cstdlib>
     11 #include <cstring>
     12 #include <iostream>
     13 #include <algorithm>
     14 using namespace std;
     15 typedef unsigned long long ull;
     16 typedef long long ll;
     17 const int inf = 0x3f3f3f3f;
     18 const double eps = 1e-8;
     19 const int M = 2e6+10;
     20 int s[M];
     21 int sa[M], tmp[M], rank[M], lcp[M], k, len;
     22 bool cmp(int i, int j)
     23 {
     24     if (rank[i] != rank[j])
     25         return rank[i] < rank[j];
     26     else
     27     {
     28         int x = i+k <= len ? rank[i+k] : -1;
     29         int y = j+k <= len ? rank[j+k] : -1;
     30         return x < y;
     31     }
     32 }
     33 void build_sa()
     34 {
     35     for (int i = 0; i <= len; i++)
     36     {
     37         sa[i] = i;
     38         rank[i] = i < len ? s[i] : -1;
     39     }
     40     for (k = 1; k <= len; k *= 2)
     41     {
     42         sort (sa, sa+len+1, cmp);
     43         tmp[sa[0]] = 0;
     44         for (int i = 1; i <= len; i++)
     45         {
     46             tmp[sa[i]] = tmp[sa[i-1]] + (cmp(sa[i-1], sa[i]) ? 1 : 0);
     47         }
     48         for (int i = 0; i <= len; i++)
     49         {
     50             rank[i] = tmp[i];
     51         }
     52     }
     53 }
     54 void Get_Lcp()
     55 {
     56     for (int i = 0; i < len; i++)
     57     {
     58         rank[sa[i]] = i;
     59     }
     60     int h = 0;
     61     lcp[0] = 0;
     62     for (int i = 0; i <  len; i++)
     63     {
     64         int j = sa[rank[i]-1];
     65         if (h > 0)
     66             h--;
     67         for (; i+h < len && j+h < len; h++)
     68             if (s[i+h] != s[j+h])
     69                 break;
     70         lcp[rank[i]] = h;
     71     }
     72 }
     73 int vis[110], pos[M];
     74 int ans[M], tot;
     75 int Stack[M], top;
     76 bool solve (int x, int n)
     77 {
     78     int minv = inf;
     79     int cnt = 0;
     80     bool flag = false;
     81     for (int i = 0; i <= len+1; i++)
     82     {
     83         if (lcp[i] < x)
     84         {
     85 
     86             if ( cnt+ (!vis[pos[sa[i-1]]]) > n/2 && (minv != inf && minv >= x))
     87             {
     88                 if (!flag )
     89                     tot = 0;
     90                 flag = true;
     91                 ans[tot++] = sa[i-1];
     92             }
     93             minv = inf;
     94             cnt = 0;
     95             memset(vis, 0, sizeof (vis));
     96             continue;
     97         }
     98         if ( vis[pos[sa[i-1]]]==0)
     99         {
    100             cnt++;
    101 
    102         }
    103         vis[pos[sa[i-1]]] = 1;
    104         minv = min(minv, lcp[i]);
    105 
    106     }
    107     return tot > 0 && flag;
    108 }
    109 int string_len[110], c1;
    110 void init()
    111 {
    112     c1 = tot = 0;
    113     memset(vis, 0, sizeof (vis));
    114     memset(string_len, 0, sizeof (string_len));
    115 }
    116 char cacaca[1100];
    117 int main()
    118 {
    119 #ifndef ONLINE_JUDGE
    120     freopen("in.txt","r",stdin);
    121    // freopen("wa.txt","w",stdout);
    122 #endif
    123     int n, cas = 1;
    124     while ( scanf ("%d", &n), n)
    125     {
    126         if (cas != 1)
    127             printf("
    ");
    128         cas++;
    129         init();
    130         len = 0;
    131         int del = 1;
    132         for (int i = 0; i < n; i++)
    133         {
    134             scanf ("%s", cacaca);
    135             int sub_len = strlen(cacaca);
    136             for (int j = 0; j < sub_len; j++)
    137             {
    138                 s[len++] = cacaca[j];
    139             }
    140             s[len++] = M+del;
    141             del++;
    142             string_len[c1] = sub_len + string_len[c1-1];
    143             if (c1)
    144                 string_len[c1]++;
    145             c1++;
    146         }
    147         if (n == 1)
    148         {
    149             for (int i = 0; i < len-1; i++)
    150             {
    151                 printf("%c", s[i]);
    152             }
    153             continue;
    154         }
    155         for (int i = 0, j = 0; i < len; i++)
    156         {
    157             if (i >= string_len[j])
    158             {
    159                 pos[i] = -1;
    160                 j++;
    161                 continue;
    162             }
    163             pos[i] = j+1;
    164         }
    165         build_sa();
    166         Get_Lcp();
    167 
    168         int ua = 0, ub = M;
    169         while (ua + 1 < ub)
    170         {
    171             int mid = (ua + ub) >> 1;
    172             if (mid&&solve(mid, n) == true)
    173             {
    174 
    175                 ua = mid;
    176             }
    177             else
    178                 ub = mid;
    179         }
    180         if (tot == 0)
    181             printf("?
    ");
    182         else
    183         {
    184             if (ua == 0)
    185             {
    186                 printf("?
    ");
    187                 continue;
    188             }
    189             for (int i = 0; i < tot; i++)
    190             {
    191                 for (int j = ans[i]; j < ans[i]+ua; j++)
    192                 {
    193                     printf("%c", s[j]);
    194                 }
    195                 printf("
    ");
    196             }
    197         }
    198     }
    199     return 0;
    200 }
     
  • 相关阅读:
    关于UI设计的文章汇总
    Linq 中不爽之处
    难题autoconf、automake、libtool
    静态构造函数线程安全的几个版本[转载]
    Window Live Writer
    Guid、Int、BigInt编号的速度和存储空间的比较
    MVP模式中的P和V关系
    LR 剖析器
    快速软件开发 学习笔记 之七
    快速软件开发 学习笔记 之六
  • 原文地址:https://www.cnblogs.com/oneshot/p/4448746.html
Copyright © 2011-2022 走看看