zoukankan      html  css  js  c++  java
  • Common Substrings POJ

    A substring of a string T is defined as:

    T(i, k)=TiTi+1...Ti+k-1, 1≤i≤i+k-1≤|T|.
    Given two strings A, B and one integer K, we define S, a set of triples (i, j, k):

    S = {(i, j, k) | k≥K, A(i, k)=B(j, k)}.
    You are to give the value of |S| for specific A, B and K.

    Input
    The input file contains several blocks of data. For each block, the first line contains one integer K, followed by two lines containing strings A and B, respectively. The input file is ended by K=0.

    1 ≤ |A|, |B| ≤ 105
    1 ≤ K ≤ min{|A|, |B|}
    Characters of A and B are all Latin letters.

    Output
    For each case, output an integer |S|.

    Sample Input
    2
    aababaa
    abaabaa
    1
    xx
    xx
    0
    Sample Output
    22
    5

    题意:求串1和串2中所有长度 (geq k) 的相同字串个数。
    思路:首先要把两个字符串连接起来,并用一个没出现的字符分隔, 假设 Sa[j] 属于串1,Sa[i ~ (j - 1)] 属于串2, 且 Height[i + 1] 到 Height[j] 单调递减且 Height[j] (geq k)
    根据后缀数组的性质, Sa[i ~ (j - 1)] 和 Sa[j] 的贡献为 (j - i) * (Height[i] - k + 1), 所以我们可以维护一个单调递增的栈, 将所有连续递减的Height[i]压缩成一个块,
    记录这个块的最小 Height 和属于串2的后缀的个数,当 Sa[i] 属于串1时,加上前面所有块的贡献,为了快速计算贡献和,要维护一个前缀和。
    所以我们for两次, 一次求串1的后缀和该后缀前面所有串2后缀的贡献,一次求串2的后缀和该后缀前面所有串1后缀的贡献。

    #include <cstdio>
    #include <algorithm>
    #include <queue>
    #include <stack>
    #include <string>
    #include <math.h>
    #include <string.h>
    #include <map>
    #include <iostream>
    using namespace std;
    const int maxn = 5e5 + 50;
    const int mod = 20090717;
    int INF = 1e9;
    typedef long long LL;
    typedef pair<int, int> pii;
    #define fi first
    #define se second
    int Sa[maxn], Height[maxn], Tax[maxn], Rank[maxn], tp[maxn], a[maxn], n, m, minLen;
    char str[maxn];
    
    void Rsort(){
        for(int i = 0; i <= m; i++) Tax[i] = 0;
        for(int i = 1; i <= n; i++) Tax[Rank[tp[i]]]++;
        for(int i = 1; i <= m; i++) Tax[i] += Tax[i - 1];
        for(int i = n; i >= 1; i--) Sa[Tax[Rank[tp[i]]]--] = tp[i];
    }
    
    int cmp(int *f, int x, int y, int w){
        if(x + w > n || y + w > n) return 0; // 注意防止越界,多组输入的时候这条必须有
        return f[x] == f[y] && f[x + w] == f[y + w];
    }
    
    void Suffix(){
        for(int i = 1; i <= n; i++) Rank[i] = a[i], tp[i] = i;
        m = 200, Rsort();
        for(int w = 1, p = 1, i; p < n; w += w, m = p){
            for(p = 0, i = n - w + 1; i <= n; i++) tp[++p] = i;
            for(i = 1; i <= n; i++) if(Sa[i] > w) tp[++p] = Sa[i] - w;
            Rsort(), swap(Rank, tp), Rank[Sa[1]] = p = 1;
            for(int i = 2; i <= n; i++)  Rank[Sa[i]] = cmp(tp, Sa[i], Sa[i - 1], w) ? p : ++p;
        }
        int j, k = 0;
        for(int i = 1; i <= n; Height[Rank[i++]] = k){
            for(k = k ? k - 1 : k, j = Sa[Rank[i] - 1]; a[i + k] == a[j + k]; ++k);
        }
    }
    
    int dpmi[maxn][60];
    void RMQ(){
        for(int i = 1; i <= n; i++){
            dpmi[i][0] = Height[i];
        }
        for(int j = 1; (1 << j) <= n; j++){
            for(int i = 1; i + (1 << j) - 1 <= n; i++){
                dpmi[i][j] = min(dpmi[i][j - 1], dpmi[i + (1 << (j - 1))][j - 1]);
            }
        }
    }
    
    int QueryMin(int l, int r){
        int k = log2(r - l + 1);
        return min(dpmi[l][k], dpmi[r - (1 << k) + 1][k]);
    } 
    
    int QueryLcp(int i, int j){
        if(i > j) swap(i, j);
        i++;
        return QueryMin(i, j);
    }
    
    
    int Find(int i){
        int le = i, ri = n;
        int res = 0;
        while(le <= ri){
            int mid = (le + ri) >> 1;
            if(QueryLcp(i, mid) >= minLen){
                le = mid + 1;
                res = max(res, mid);
            } else {
                ri = mid - 1;
            }
        }
        return res;
    }
    struct qnode
    {
        int cnt, h;
        LL sum;
    } stk[maxn];
    int main(int arg, char const *argv[])
    {
        while(1){
            scanf("%d", &minLen);
            if(!minLen) break;
            scanf("%s", str + 1);
            int len = strlen(str + 1) + 1;
            str[len] = '0';
            scanf("%s", str + len + 1);
            n = strlen(str + 1);
            for(int i = 1; i <= n; i++) a[i] = str[i];
            Suffix();
            LL ans = 0;
            int top = 0;
            for(int i = 2; i <= n; i++){
                int cnt = 0;
                if(Height[i] < minLen) {
                    top = 0;
                    continue;
                }
                while(top && Height[i] <= stk[top].h) {
                    cnt += stk[top--].cnt;
                }
                if(Sa[i - 1] > len) cnt++;
                stk[++top].cnt = cnt;
                stk[top].h = Height[i];
                stk[top].sum = stk[top - 1].sum + (stk[top].h - minLen + 1) * stk[top].cnt;
                if(Sa[i] <= len) ans += stk[top].sum;
            }
    
            top = 0;
            for(int i = 2; i <= n; i++){
                int cnt = 0;
                if(Height[i] < minLen) { // 这步必须有
                    top = 0;
                    continue;
                }
                while(top && Height[i] <= stk[top].h) {
                    cnt += stk[top--].cnt;
                }
                if(Sa[i - 1] <= len) cnt++;
                stk[++top].cnt = cnt;
                stk[top].h = Height[i];
                stk[top].sum = stk[top - 1].sum + (stk[top].h - minLen + 1) * stk[top].cnt;
                if(Sa[i] > len) ans += stk[top].sum;
            }
            printf("%I64d
    ", ans);
        }
        return 0;
    }
    
    
  • 相关阅读:
    sed 删除最后几行 和删除指定行 awk使用
    nagios监控
    nginx服务启动脚本
    Linux修改环境变量的4种方法
    源码编译apache报错的解决方法
    apache源码安装及启动脚本添加
    haproxy配置
    awk命令2
    awk命令1
    通配符及变量
  • 原文地址:https://www.cnblogs.com/PCCCCC/p/13282920.html
Copyright © 2011-2022 走看看