zoukankan      html  css  js  c++  java
  • UVA 11107 Life Forms——(多字符串的最长公共子序列,后缀数组+LCP)

    题意: 输入n个序列,求出一个最大长度的字符串,使得它在超过一半的DNA序列中连续出现。如果有多解,按照字典序从小到大输出所有解。

    分析:这道题的关键是将多个字符串连接成一个串,方法是用不同的分隔符把所有原串拼接起来。接下来,就可以求这个新串的后缀数组和 height 数组, 然后二分答案,没次只需判断是非有一个长度为p的串在超过一半的串中出现过,判断方法是扫描一遍height数组,把它分成若干段,每当height[i] < p时,开辟一个新段,然后判断之前段是否包含了超过 n/2个原串后缀,那么当前的p值满足条件(注意n = 1时要特判

    详见代码:

    #include <iostream>
    #include <cstdio>
    #include <algorithm>
    #include <cstring>
    #include <map>
    #include <vector>
    using namespace std;
    
    const int maxn = 104;
    const int maxm = 1005;
    char s[maxn*maxm];
    int sa[maxn*maxm], t[maxn*maxm], t2[maxn*maxm], c[maxn*maxm];
    
    int N;
    void build_sa(int m) {
        int* x = t, *y = t2;
        for(int i = 0; i < m; i++) c[i] = 0;
        for(int i = 0; i < N; i++) c[x[i] = s[i]]++;
        for(int i = 1; i < m; i++) c[i] += c[i-1];
        for(int i = N-1; i >= 0; i--) sa[--c[x[i]]] = i;
        for(int k = 1; k <= N; k <<= 1) {
            int p = 0;
            for(int i = N-k; i < N; i++) y[p++] = i;
            for(int i = 0; i < N; i++) if(sa[i] >= k) y[p++] = sa[i] - k;
            for(int i = 0; i < m; i++) c[i] = 0;
            for(int i = 0; i < N; i++) c[x[y[i]]]++;
            for(int i = 1; i < m; i++) c[i] += c[i-1];
            for(int i = N-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
            swap(x, y);
            p = 1;
            x[sa[0]] = 0;
            for(int i = 1; i < N; i++)
                x[sa[i]] = (y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+k] == y[sa[i]+k] ? p-1 :p++);
            if(p >= N) break;
            m = p;
        }
    }
    int rnk[maxn*maxm], height[maxn*maxm];
    void get_height() {
        int k = 0;
        for(int i = 0; i < N; i++) rnk[sa[i]] = i;
        for(int i = 0; i < N; i++) {
            if(!rnk[i]) continue;
            int j = sa[rnk[i]-1];
            if(k) k--;
            while(s[i+k] == s[j+k]) k++;
            height[rnk[i]] = k;
        }
    }
    
    int n;
    char s2[maxm];
    int sign[maxn];
    int mlen;
    vector<int> A;
    int flag[maxn];
    map<char, int> Map;
    bool find(int p, vector<int> &A) { //判断当前长度p是否符合要求
        memset(flag, 0, sizeof flag);
        bool OK = false;
        int cnt = 0;
        int start = 0;
        int t = lower_bound(sign, sign+n, sa[start]) - sign;
        if(!Map.count(s[sa[start]]))
            cnt++;
        flag[t] = start;
        for(int i = 1; i < N; i++) {
            if(height[i] >= p) {
                t = lower_bound(sign, sign+n, sa[i]) - sign;
                if(!Map.count(s[sa[i]]) && flag[t] < start)
                    cnt++;
                flag[t] = i;
                if(i == N-1 && cnt > n/2){
                    OK = true;
                    A.push_back(sa[start]);
                }
            }
            else {
                if(cnt > n/2) {
                    OK = true;
                    A.push_back(sa[start]);
                }
                cnt = 0;
                start = i;
                int t = lower_bound(sign, sign+n, sa[start]) - sign;
                if(!Map.count(s[sa[start]]))
                    cnt++;
                flag[t] = start;
            }
        }
        return OK;
    }
    int cnt;
    char gen_sign() { //生成分隔符并记录
        int i = 1;
        for(; i < 128; i++) if(!Map.count(i) && (i < 'a' || i > 'z')) break;
        Map[i] = ++cnt;
        return i;
    }
    int main() {
        int tt = 0;
        while(scanf("%d", &n) == 1 && n) {
            if(tt++) puts("");
            if(n == 1) {
                scanf("%s", s);
                printf("%s
    ", s);
                continue;
            }
            cnt = 0;
            Map.clear();
            N = 0;
            for(int i = 0; i < n; i++) {
                scanf("%s", s2);
                strcpy(s+N, s2);
                N += strlen(s2);
                s[N++] = gen_sign();
                sign[i] = N-1;
            }
            s[N] = '';
            //cout << s <<endl;
            //for(int i = 0; i < n; i++) cout<< sign[i] <<endl;
            build_sa(127);
            get_height();
            //for(int i = 0; i < N; i++) printf("%d ", sa[i]);
            //puts("");
            //for(int i = 0; i < N; i++) printf("%d ", height[i]);
            //puts("");
            mlen = 0;
            int L = 0, R = N-1;
            A.clear();
            vector<int> B;
            while(R >= L) {
                int M = L + (R-L+1)/2;
                B.clear();
                if(find(M, B)) {
                    mlen = M;
                    A = B;
                    L = M+1;
                }
                else R = M-1;
            }
            
            if(A.size() == 0) printf("?
    ");
            for(int i = 0; i < A.size(); i++) {
                for(int j = 0; j < mlen; j++) printf("%c", s[A[i]+j]);
                printf("
    ");
            }
        }
    }
  • 相关阅读:
    比较全的屏幕信息
    使用div实现progress进度条
    选项卡效果的菜单栏
    javascript写的轮播图
    centos6.5 命令行配置无线上网
    CentOS 6.5 BCM43142 80211无线网卡驱动安装
    [数据库] windows server 2003下mysql出现10048错误的解决办法 Can't connect to MySQL server on '127.0.0.1' (10048)
    桥接模式-多台虚拟机配置(重要)
    VMware虚拟机中如何配置静态IP
    MySQL5.7 mysql.user创建用户
  • 原文地址:https://www.cnblogs.com/Kiraa/p/6151259.html
Copyright © 2011-2022 走看看