zoukankan      html  css  js  c++  java
  • poj 3693 Maximum repetition substring 重复次数最多的连续子串

    题目链接

    题意

    对于任意的字符串,定义它的 重复次数 为:它最多可被划分成的完全相同的子串个数。例如:ababab 的重复次数为3,ababa 的重复次数为1.

    现给定一字符串,求它的一个子串,其重复次数取到最大值,且字典序取到最小值。

    思路

    参考 hzwer.

    首先,重复次数显然至少为(1),所以下面只考虑重复次数(geq 2)的情况。

    首先枚举子串长度(L)。对于长度为(L)的子串,其重复次数至少为(2),意味着它的其中两个重复部分必为(s[0],s[L],s[2L],...)中相邻的两个。

    所以只需枚举(i),看(s[i*L])(s[(i+1)*L])向左((le))向右((ri))最多能匹配到多远,记总长度为(k),则重复次数(=k/L+1).

    至于开头位置,则落在(i*L-le)(i*L-le+(le+ri)\%k)之间,取字典序最小者。

    对于所有重复次数相同的子串,要使其字典序取到最小值,即取(rank)值最小者。

    上述步骤中,看最长匹配多远(即看最长公共前缀)与取rank最小值均借助(ST)表实现。

    Code

    #include <stdio.h>
    #include <iostream>
    #include <string.h>
    #define maxn 100010
    using namespace std;
    typedef long long LL;
    int wa[maxn], wb[maxn], wv[maxn], wt[maxn], r1[maxn], r2[maxn],
        h1[maxn], h2[maxn], rk1[maxn], rk2[maxn], sa[maxn], n, kas, bin[20], Log[maxn];
    struct rmqNode { int val, p; } mn[maxn][20];
    struct node { int val, pos, len; };
    int mn1[maxn][20], mn2[maxn][20];
    char s[maxn];
    void rmqInit(int n) {
        Log[0] = -1; bin[0] = 1;
        for (int i = 1; i < 20; ++i) bin[i] = bin[i-1] << 1;
        for (int i = 1; i <= n; ++i) Log[i] = Log[i>>1] + 1;
    }
    bool cmp(int* r, int a, int b, int l) { return r[a] == r[b] && r[a+l] == r[b+l]; }
    void init(int* r, int* rk, int* h, int n, int m) {
        int* x=wa, *y=wb, *t, i, j, p;
        for (i = 0; i < m; ++i) wt[i] = 0;
        for (i = 0; i < n; ++i) ++wt[x[i] = r[i]];
        for (i = 1; i < m; ++i) wt[i] += wt[i - 1];
        for (i = n-1; i >= 0; --i) sa[--wt[x[i]]] = i;
    
        for (j = 1, p = 1; p < n; j <<= 1, m = p) {
            for (p = 0, i = n-j; i < n; ++i) y[p++] = i;
            for (i = 0; i < n; ++i) if (sa[i] >= j) y[p++] = sa[i] - j;
    
            for (i = 0; i < n; ++i) wv[i] = x[y[i]];
    
            for (i = 0; i < m; ++i) wt[i] = 0;
            for (i = 0; i < n; ++i) ++wt[wv[i]];
            for (i = 1; i < m; ++i) wt[i] += wt[i - 1];
            for (i = n-1; i >= 0; --i) sa[--wt[wv[i]]] = y[i];
    
            t = x, x = y, y = t, x[sa[0]] = 0;
            for (p = 1, i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i], sa[i-1], j) ? p - 1 : p++;
        }
    
        for (i = 0; i < n; ++i) rk[sa[i]] = i;
        int k = 0;
        for (i = 0; i < n - 1; h[rk[i++]] = k) {
            for (k = k ? --k : 0, j = sa[rk[i] - 1]; r[i+k] == r[j+k]; ++k);
        }
    }
    void rmq1(int* a, int (*mn)[20], int n) {
        for (int i = 1; i <= n; ++i) mn[i][0] = a[i];
        for (int j = 1; bin[j] <= n; ++j) {
            for (int i = 1; i+bin[j-1]-1 <= n; ++i) {
                mn[i][j] = min(mn[i][j-1], mn[i+bin[j-1]][j-1]);
            }
        }
    }
    void rmq2(int* a, rmqNode (*mn)[20], int n) {
        for (int i = 1; i <= n; ++i) mn[i][0] = {a[i-1], i};
        for (int j = 1; bin[j] <= n; ++j) {
            for (int i = 1; i+bin[j-1]-1 <= n; ++i) {
                if (mn[i][j-1].val <= mn[i+bin[j-1]][j-1].val) mn[i][j] = mn[i][j-1];
                else mn[i][j] = mn[i+bin[j-1]][j-1];
            }
        }
    }
    int query1(int (*mn)[20], int l, int r) {
        int k = Log[r-l+1];
        return min(mn[l][k], mn[r-bin[k]+1][k]);
    }
    int query2(rmqNode (*mn)[20], int l, int r) {
        int k = Log[r-l+1];
        return mn[l][k].val < mn[r-bin[k]+1][k].val ? mn[l][k].p : mn[r-bin[k]+1][k].p;
    }
    int match(int* rk, int (*mn)[20], int p, int len) {
        int rk1 = rk[p], rk2 = rk[p+len];
        if (rk1 > rk2) swap(rk1, rk2);
        return query1(mn, rk1+1, rk2);
    }
    void work() {
        int tot1=0, tot2=0, m=0, len=strlen(s);
        for (int i = 0; i < len; ++i) m = max(r1[tot1++] = s[i], m); r1[tot1++] = 0;
        for (int i = len-1; i >= 0; --i) r2[tot2++] = s[i]; r2[tot2++] = 0;
        rmqInit(len);
        init(r1, rk1, h1, tot1, ++m);
        init(r2, rk2, h2, tot2, m);
        rmq1(h1, mn1, len);
        rmq1(h2, mn2, len);
        rmq2(rk1, mn, len);
    
        node ans = {1,0,0};
        for (int l = 1; l <= len; ++l) {
            int lim = len / l, upp;
            if (ans.val > lim+1) break;
            if (len % l) upp = lim; else upp = lim-1;
            for (int i = 0; i < upp; ++i) {
                int ri = match(rk1, mn1, i*l, l),
                    le = i ? match(rk2, mn2, len-(i+1)*l, l) : 0,
                    k = le + ri;
                int cnt = k/l + 1;
                if (cnt >= ans.val) {
                    int l1 = i*l-le, l2 = l1+k%l,
                        p = query2(mn, l1+1, l2+1)-1;
                    if (cnt > ans.val || rk1[p]<rk1[ans.pos]) ans = {cnt, p, cnt*l};
                }
            }
        }
        printf("Case %d: ", ++kas);
        if (ans.val == 1) {
            char ch='z'+1; for (int i = 0; s[i]; ++i) ch = min(ch, s[i]);
            putchar(ch); puts("");
        }
        else {
            s[ans.pos+ans.len] = '';
            puts(s+ans.pos);
        }
    }
    int main() {
        while (scanf("%s", s) != EOF && s[0]!='#') work();
        return 0;
    }
    
    
  • 相关阅读:
    CF1109D Sasha and Interesting Fact from Graph Theory 组合数
    和与或 数位dp
    G
    E. String Multiplication dp
    Obtain a Permutation 乱搞
    CF1061E Politics 费用流
    mysql连接报错
    编译安装nginx
    SQL四种语言:DDL,DML,DCL,TCL
    Linux 常用管理命令
  • 原文地址:https://www.cnblogs.com/kkkkahlua/p/8443887.html
Copyright © 2011-2022 走看看