pku-1743 Musical Theme
解题思路
本质是一个寻找最长不重叠相同子串长度的题目
下面是求最长重叠相同子串长度的思路:
二分枚举+height数组分组。这道题的思想很巧妙,后面要仔细推敲。先二分答案,把题目变成判定性问题:判断是否存在两个长度为k的子串是相同的,且不重叠。解决这个问题的关键还是利用height数组。把排序后的后缀分成若干组,其中每组的后缀之间的height值都不小于k。例如,字符串为“aabaaaab”,当k=2时,后缀分成了4组,如图所示。
容易看出,有希望成为最长公共前缀不小于k的两个后缀一定在同一组。然后对于每组后缀,只须判断每个后缀的sa值的最大值和最小值之差是否不小于k。如果有一组满足,则说明存在,否则不存在。整个做法的时间复杂度为O(nlogn)。
下面一份DC3和一份后缀数组的写法= =
Musical Theme
代码
#include <algorithm>
#include <cstdio>
#include <cmath>
#include <cstring>
#include <iostream>
#include <cstdlib>
#include <set>
#include <vector>
#include <cctype>
#include <iomanip>
#include <sstream>
#include <climits>
#include <queue>
#include <stack>
using namespace std;
/* freopen("k.in", "r", stdin);
freopen("k.out", "w", stdout); */
//clock_t c1 = clock();
//std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#define de(a) cout << #a << " = " << a << endl
#define rep(i, a, n) for (int i = a; i <= n; i++)
#define per(i, a, n) for (int i = n; i >= a; i--)
typedef long long ll;
typedef unsigned long long ull;
typedef pair<int, int> PII;
typedef pair<double, double> PDD;
typedef vector<int, int> VII;
#define inf 0x3f3f3f3f
const ll INF = 0x3f3f3f3f3f3f3f3f;
const ll MAXN = 1e6 + 7;
const ll MAXM = 1e6 + 7;
const ll MOD = 1e9 + 7;
const double eps = 1e-6;
const double pi = acos(-1.0);
int sa[MAXN]; //rank为i的后缀的起始位置
int rk[MAXN]; //sa数组的映射
int tp[MAXN]; //基数排序的第二关键字,第二关键字排名为i的后缀的起始位置
int tax[MAXN]; //第i号元素出现了多少次,辅助基数排序
int Height[MAXN]; //排名为i的后缀与排名为i-1的后缀的最长公共前缀
/* lcp(sa[i],sa[i-1])*/
int n, m;
int s[MAXN];
/* void Debug()
{
printf("*****************
");
printf("下标");
for (int i = 1; i <= n; i++)
printf("%d ", i);
printf("
");
printf("sa ");
for (int i = 1; i <= n; i++)
printf("%d ", sa[i]);
printf("
");
printf("rak ");
for (int i = 1; i <= n; i++)
printf("%d ", rk[i]);
printf("
");
printf("tp ");
for (int i = 1; i <= n; i++)
printf("%d ", tp[i]);
printf("
");
} */
void Qsort()
{
for (int i = 0; i <= m; i++)
tax[i] = 0;
for (int i = 1; i <= n; i++)
tax[rk[i]]++;
for (int i = 1; i <= m; i++)
tax[i] += tax[i - 1];
for (int i = n; i >= 1; i--)
sa[tax[rk[tp[i]]]--] = tp[i];
}
void SuffixSort()
{
m = 200;
for (int i = 1; i <= n; i++)
rk[i] = s[i], tp[i] = i;
Qsort();
// Debug();
for (int w = 1, p = 0; p < n; m = p, w <<= 1)
{
//w:当前倍增的长度,w = x表示已经求出了长度为x的后缀的排名,现在要更新长度为2x的后缀的排名
//p表示不同的后缀的个数,很显然原字符串的后缀都是不同的,因此p = n时可以退出循环
p = 0; //这里的p仅仅是一个计数器
for (int i = 1; i <= w; i++)
tp[++p] = n - w + i;
for (int i = 1; i <= n; i++)
if (sa[i] > w)
tp[++p] = sa[i] - w; //这两句是后缀数组的核心部分,我已经画图说明
Qsort(); //此时我们已经更新出了第二关键字,利用上一轮的rk更新本轮的sa
swap(tp, rk); //这里原本tp已经没有用了
rk[sa[1]] = p = 1;
for (int i = 2; i <= n; i++)
rk[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p;
//这里当两个后缀上一轮排名相同时本轮也相同,至于为什么大家可以思考一下
// Debug();
}
}
void GetHeight()
{
int j, k = 0;
for (int i = 1; i <= n; i++)
{
if (k)
k--;
j = sa[rk[i] - 1];
while (s[i + k] == s[j + k])
k++;
Height[rk[i]] = k;
}
}
bool check(int len)
{
int minn = sa[1], maxx = sa[1];
for (int i = 2; i <= n; i++)
{
if (Height[i] >= len - 1)
{
maxx = max(maxx, sa[i]);
minn = min(minn, sa[i]);
}
else
maxx = minn = sa[i];
if (maxx - minn >= len)
return true;
}
return false;
}
int ans = 0;
int main()
{
int num;
while (~scanf("%d", &n) && n)
{
ans = 0;
for (int i = 1; i <= n; i++)
scanf("%d", &s[i]);
for (int i = n; i >= 1; i--)
s[i] -= s[i - 1] - 100;
SuffixSort();
GetHeight();
int l = 1, r = (n >> 1) + 1;
while (l < r)
{
int mid = (l + r) >> 1;
if (check(mid))
{
l = mid + 1;
ans = mid;
}
else
r = mid;
}
if (ans < 5)
printf("0
");
else
printf("%d
", ans);
}
return 0;
}
//--------------------DC3
#include <cstdio>
#include <algorithm>
#include <queue>
#include <iostream>
#include <cmath>
#include <cstring>
using namespace std;
#define F(x) ((x) / 3 + ((x) % 3 == 1 ? 0 : tb))
#define G(x) ((x) < tb ? (x)*3 + 1 : ((x)-tb) * 3 + 2)
const int MAXN = 200000 + 100; //n*10
int sa[MAXN];
int rk[MAXN];
int height[MAXN];
int n;
int s[MAXN];
int r[MAXN];
int wa[MAXN], wb[MAXN], wv[MAXN];
int wws[MAXN];
void sort(int *r, int *a, int *b, int n, int m)
{
int i;
for (i = 0; i < n; i++)
wv[i] = r[a[i]];
for (i = 0; i < m; i++)
wws[i] = 0;
for (i = 0; i < n; i++)
wws[wv[i]]++;
for (i = 1; i < m; i++)
wws[i] += wws[i - 1];
for (i = n - 1; i >= 0; i--)
b[--wws[wv[i]]] = a[i];
return;
}
int c0(int *r, int a, int b)
{
return r[a] == r[b] && r[a + 1] == r[b + 1] && r[a + 2] == r[b + 2];
}
int c12(int k, int *r, int a, int b)
{
if (k == 2)
return r[a] < r[b] || r[a] == r[b] && c12(1, r, a + 1, b + 1);
else
return r[a] < r[b] || r[a] == r[b] && wv[a + 1] < wv[b + 1];
}
void dc3(int *r, int *sa, int n, int m)
{
int i, j, *rn = r + n, *san = sa + n, ta = 0, tb = (n + 1) / 3, tbc = 0, p;
r[n] = r[n + 1] = 0;
for (i = 0; i < n; i++)
if (i % 3 != 0)
wa[tbc++] = i;
sort(r + 2, wa, wb, tbc, m);
sort(r + 1, wb, wa, tbc, m);
sort(r, wa, wb, tbc, m);
for (p = 1, rn[F(wb[0])] = 0, i = 1; i < tbc; i++)
rn[F(wb[i])] = c0(r, wb[i - 1], wb[i]) ? p - 1 : p++;
if (p < tbc)
dc3(rn, san, tbc, p);
else
for (i = 0; i < tbc; i++)
san[rn[i]] = i;
for (i = 0; i < tbc; i++)
if (san[i] < tb)
wb[ta++] = san[i] * 3;
if (n % 3 == 1)
wb[ta++] = n - 1;
sort(r, wb, wa, ta, m);
for (i = 0; i < tbc; i++)
wv[wb[i] = G(san[i])] = i;
for (i = 0, j = 0, p = 0; i < ta && j < tbc; p++)
sa[p] = c12(wb[j] % 3, r, wa[i], wb[j]) ? wa[i++] : wb[j++];
for (; i < ta; p++)
sa[p] = wa[i++];
for (; j < tbc; p++)
sa[p] = wb[j++];
return;
}
void calheight(int *r, int *sa, int n)
{
int i, j, k = 0;
for (i = 1; i <= n; ++i)
rk[sa[i]] = i;
for (i = 0; i < n; height[rk[i++]] = k)
for (k ? k-- : 0, j = sa[rk[i] - 1]; r[i + k] == r[j + k]; ++k)
;
return;
}
bool check(int len)
{
int minn = sa[1], maxx = sa[1];
for (int i = 2; i <= n; i++)
{
if (height[i] >= len - 1)
{
maxx = max(maxx, sa[i]);
minn = min(minn, sa[i]);
}
else
maxx = minn = sa[i];
if (maxx - minn >= len)
return true;
}
return false;
}
int ans;
int main()
{
int num;
while (~scanf("%d", &n) && n)
{
ans = 0;
for (int i = 0; i < n; i++)
scanf("%d", &s[i]);
for (int i = n - 1; i >= 0; i--)
s[i] -= s[i - 1] - 100;
int Max = -1;
for (int i = 0; i < n; i++)
{
r[i] = s[i];
if (r[i] > Max)
Max = r[i];
}
r[n] = 0;
dc3(r, sa, n + 1, Max + 1);
calheight(r, sa, n);
int l = 1, r = (n >> 1) + 1;
while (l < r)
{
int mid = (l + r) >> 1;
if (check(mid))
{
l = mid + 1;
ans = mid;
}
else
r = mid;
}
if (ans < 5)
printf("0
");
else
printf("%d
", ans);
}
return 0;
}
HDU-4622 Reincarnation
题意
区间内不同子串个数
#include <bits/stdc++.h>
using namespace std;
/* freopen("k.in", "r", stdin);
freopen("k.out", "w", stdout); */
//clock_t c1 = clock();
//std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#define de(a) cout << #a << " = " << a << endl
#define rep(i, a, n) for (int i = a; i <= n; i++)
#define per(i, a, n) for (int i = n; i >= a; i--)
typedef long long ll;
typedef unsigned long long ull;
typedef pair<int, int> PII;
typedef pair<double, double> PDD;
typedef vector<int, int> VII;
#define inf 0x3f3f3f3f
const ll INF = 0x3f3f3f3f3f3f3f3f;
const ll MAXN = 1e6 + 7;
const ll MAXM = 1e6 + 7;
const ll MOD = 1e9 + 7;
const double eps = 1e-6;
const double pi = acos(-1.0);
int sa[MAXN]; //rank为i的后缀的起始位置
int rk[MAXN]; //sa数组的映射
int tp[MAXN]; //基数排序的第二关键字,第二关键字排名为i的后缀的起始位置
int tax[MAXN]; //第i号元素出现了多少次,辅助基数排序
int Height[MAXN]; //排名为i的后缀与排名为i-1的后缀的最长公共前缀
/* lcp(sa[i],sa[i-1])*/
int n, m;
char s[MAXN];
/* void Debug()
{
printf("*****************
");
printf("下标");
for (int i = 1; i <= n; i++)
printf("%d ", i);
printf("
");
printf("sa ");
for (int i = 1; i <= n; i++)
printf("%d ", sa[i]);
printf("
");
printf("rak ");
for (int i = 1; i <= n; i++)
printf("%d ", rk[i]);
printf("
");
printf("tp ");
for (int i = 1; i <= n; i++)
printf("%d ", tp[i]);
printf("
");
} */
void Qsort()
{
for (int i = 0; i <= m; i++)
tax[i] = 0;
for (int i = 1; i <= n; i++)
tax[rk[i]]++;
for (int i = 1; i <= m; i++)
tax[i] += tax[i - 1];
for (int i = n; i >= 1; i--)
sa[tax[rk[tp[i]]]--] = tp[i];
}
void SuffixSort()
{
m = 75;
for (int i = 1; i <= n; i++)
rk[i] = s[i] - '0' + 1, tp[i] = i;
Qsort();
// Debug();
for (int w = 1, p = 0; p < n; m = p, w <<= 1)
{
//w:当前倍增的长度,w = x表示已经求出了长度为x的后缀的排名,现在要更新长度为2x的后缀的排名
//p表示不同的后缀的个数,很显然原字符串的后缀都是不同的,因此p = n时可以退出循环
p = 0; //这里的p仅仅是一个计数器
for (int i = 1; i <= w; i++)
tp[++p] = n - w + i;
for (int i = 1; i <= n; i++)
if (sa[i] > w)
tp[++p] = sa[i] - w; //这两句是后缀数组的核心部分,我已经画图说明
Qsort(); //此时我们已经更新出了第二关键字,利用上一轮的rk更新本轮的sa
swap(tp, rk); //这里原本tp已经没有用了
rk[sa[1]] = p = 1;
for (int i = 2; i <= n; i++)
rk[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p;
//这里当两个后缀上一轮排名相同时本轮也相同,至于为什么大家可以思考一下
// Debug();
}
}
void GetHeight()
{
int j, k = 0;
for (int i = 1; i <= n; i++)
{
if (k)
k--;
j = sa[rk[i] - 1];
while (s[i + k] == s[j + k])
k++;
Height[rk[i]] = k;
}
}
int st[MAXN][21];
int Query(int l, int r)
{
int k = log2(r - l + 1);
return min(st[l][k], st[r - (1 << k) + 1][k]);
}
int main()
{
int t;
scanf("%d", &t);
while (t--)
{
scanf(" %s", s + 1);
n = strlen(s + 1);
SuffixSort();
GetHeight();
for (int i = 0; i <= n; i++)
st[i][0] = Height[i];
for (int i = 1; i <= 21; i++)
for (int j = 1; j + (1 << i) - 1 <= n; j++)
st[j][i] = min(st[j][i - 1], st[j + (1 << (i - 1))][i - 1]);
//st处理出lcp(sa[i],sa[j])
int q;
scanf("%d", &q);
while (q--)
{
int l, r;
scanf("%d%d", &l, &r);
int ans = (r - l + 1) * (r - l + 2) / 2;
int cnt = 0;
int pre = -1;
for (int i = 1; i <= n; i++)
{
if (cnt == r - l + 1)
break;
if (sa[i] < l || sa[i] > r)
continue;
cnt++;
if (pre == -1)
{
pre = i;
continue;
}
int a = pre;
int b = i;
if (pre > i)
swap(pre, i);
int lcp = Query(a + 1, b);
int la = r - sa[pre] + 1;
int lb = r - sa[i] + 1;
if(!(la > lb && lcp >= lb))
pre = i;
ans -= min(lcp, min(la, lb));
}
printf("%d
", ans);
}
}
return 0;
}
牛客 CSL的密码
题意:
长度不小于k的本质不同子串数量
#include <bits/stdc++.h>
using namespace std;
/* freopen("k.in", "r", stdin);
freopen("k.out", "w", stdout); */
//clock_t c1 = clock();
//std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#define de(a) cout << #a << " = " << a << endl
#define rep(i, a, n) for (int i = a; i <= n; i++)
#define per(i, a, n) for (int i = n; i >= a; i--)
typedef long long ll;
typedef unsigned long long ull;
typedef pair<int, int> PII;
typedef pair<double, double> PDD;
typedef vector<int, int> VII;
#define inf 0x3f3f3f3f
const ll INF = 0x3f3f3f3f3f3f3f3f;
const ll MAXN = 1e6 + 7;
const ll MAXM = 1e6 + 7;
const ll MOD = 1e9 + 7;
const double eps = 1e-6;
const double pi = acos(-1.0);
int sa[MAXN]; //rank为i的后缀的起始位置
int rk[MAXN]; //sa数组的映射
int tp[MAXN]; //基数排序的第二关键字,第二关键字排名为i的后缀的起始位置
int tax[MAXN]; //第i号元素出现了多少次,辅助基数排序
int Height[MAXN]; //排名为i的后缀与排名为i-1的后缀的最长公共前缀
/* lcp(sa[i],sa[i-1]) */
int n, m; // n字符串长度 m字符集大小
char s[MAXN], t[MAXN];
/* void Debug()
{
printf("*****************
");
printf("下标");
for (int i = 1; i <= n; i++)
printf("%d ", i);
printf("
");
printf("sa ");
for (int i = 1; i <= n; i++)
printf("%d ", sa[i]);
printf("
");
printf("rak ");
for (int i = 1; i <= n; i++)
printf("%d ", rk[i]);
printf("
");
printf("tp ");
for (int i = 1; i <= n; i++)
printf("%d ", tp[i]);
printf("
");
} */
void Qsort()
{
for (int i = 0; i <= m; i++)
tax[i] = 0;
for (int i = 1; i <= n; i++)
tax[rk[i]]++;
for (int i = 1; i <= m; i++)
tax[i] += tax[i - 1];
for (int i = n; i >= 1; i--)
sa[tax[rk[tp[i]]]--] = tp[i];
}
void SuffixSort()
{
m = 75;
for (int i = 1; i <= n; i++)
rk[i] = s[i] - '0' + 1, tp[i] = i;
Qsort();
// Debug();
for (int w = 1, p = 0; p < n; m = p, w <<= 1)
{
//w:当前倍增的长度,w = x表示已经求出了长度为x的后缀的排名,现在要更新长度为2x的后缀的排名
//p表示不同的后缀的个数,很显然原字符串的后缀都是不同的,因此p = n时可以退出循环
p = 0; //这里的p仅仅是一个计数器
for (int i = 1; i <= w; i++)
tp[++p] = n - w + i;
for (int i = 1; i <= n; i++)
if (sa[i] > w)
tp[++p] = sa[i] - w; //这两句是后缀数组的核心部分,我已经画图说明
Qsort(); //此时我们已经更新出了第二关键字,利用上一轮的rk更新本轮的sa
swap(tp, rk); //这里原本tp已经没有用了
rk[sa[1]] = p = 1;
for (int i = 2; i <= n; i++)
rk[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p;
//这里当两个后缀上一轮排名相同时本轮也相同
// Debug();
}
}
void GetHeight()
{
int j, k = 0;
for (int i = 1; i <= n; i++)
{
if (k)
k--;
j = sa[rk[i] - 1];
while (s[i + k] == s[j + k])
k++;
Height[rk[i]] = k;
}
}
/* 本质不同的子串的数量
枚举每一个后缀,第i个后缀对答案的贡献为n-sa[i]+1-Height[i]*/
/* 长度不小于k的不同本质子串数量 */
int main()
{
int k;
while (~scanf("%d%d", &n, &k))
{
scanf(" %s", s + 1);
n = strlen(s + 1);
SuffixSort();
GetHeight();
ll ans = 0;
for (int i = 1; i <= n; i++)
ans += (n - sa[i] + 1) - min(max(k - 1, Height[i]), n - sa[i] + 1);
printf("%lld
", ans);
}
return 0;
}