从这里开始
毒瘤千万条,HNOI第一条。
对拍不规范,爆零泪两行。
每日一吹 scoi 2019
Day 1
Problem A 鱼
不难证明 BC 垂直平分 AD,考虑枚举 AD,然后你要计算严格在其右侧的 E, F 的对数,以及 B, C 的对数,前者直接极角序扫描线即可,后者考虑以直线的位置和斜率为第一关键字,BC 中点位置为第二关键字对所有中垂线排序,比较位置是相对位置,可以用点积或者叉积判断,询问的时候二分。
计算几何点坐标出正负 1e9 的都是毒瘤。
Code
#include <bits/stdc++.h> using namespace std; typedef bool boolean; const int N = 1005; #define ll long long int gcd(int a, int b) { return (!b) ? a : gcd(b, a % b); } typedef class Point { public: int x, y; Point(int x = 0, int y = 0) : x(x), y(y) { } int where() const { return y > 0 || (y == 0 && x < 0); } ll length2() const { return 1ll * x * x + 1ll * y * y; } Point rotate90() const { return Point(-y, x); } void normalize() { int d = gcd(x, y); x /= d, y /= d; if (x < 0 || ((x == 0) && (y < 0))) x = -x, y = -y; } boolean operator < (Point b) const { return (x ^ b.x) ? (x < b.x) : (y < b.y); } boolean operator != (Point b) const { return x != b.x || y != b.y; } void read() { scanf("%d%d", &x, &y); } } Point; Point operator + (Point a, Point b) { return Point(a.x + b.x, a.y + b.y); } Point operator - (Point a, Point b) { return Point(a.x - b.x, a.y - b.y); } Point operator - (Point a) { return Point(-a.x, -a.y); } ll dot(Point a, Point b) { return 1ll * a.x * b.x + 1ll * a.y * b.y; } ll cross(Point a, Point b) { return 1ll * a.x * b.y - 1ll * a.y * b.x; } boolean compare_angle(const Point& a, const Point& b) { return (a.where() ^ b.where()) ? (a.where() < b.where()) : (cross(a, b) > 0); } boolean same_direct(const Point& a, const Point& b) { return cross(a, b) == 0 && dot(a, b) > 0; } typedef class Line { public: Point a, v; Line() { } Line(Point a, Point v) : a(a), v(v) { this->v.normalize(); } boolean operator < (Line b) const { if (v != b.v) return v < b.v; ll d1 = cross(a, v); ll d2 = cross(b.a, b.v); if (d1 ^ d2) return d1 < d2; return a < b.a; } } Line; typedef class Event { public: int op; Point dir; Event() { } Event(int op, Point dir) : op(op), dir(dir) { } boolean operator < (Event b) const { return (!same_direct(dir, b.dir)) ? compare_angle(dir, b.dir) : op < b.op; } } Event; ostream& operator << (ostream& os, Point p) { os << "(" << p.x << ", " << p.y << ")"; return os; } int n; vector<Point> P; vector<Line> Ls; int main() { scanf("%d", &n); P.resize(n); for (auto& x : P) { x.read(); } for (int i = 0; i < n; i++) { for (int j = i + 1; j < n; j++) { Ls.emplace_back(P[i] + P[j], (P[j] - P[i]).rotate90()); } } sort(Ls.begin(), Ls.end()); ll ans = 0; vector<Event> E; unordered_map<ll, int> G; for (int i = 0; i < n; i++) { int cnt_ef = 0; E.clear(); G.clear(); for (int j = 0; j < n; j++) { if (i ^ j) { Point d = P[j] - P[i]; if (d.x > 0 || (d.x == 0 && d.y > 0)) { cnt_ef += G[d.length2()]++; } E.emplace_back(0, d); E.emplace_back(1, d = d.rotate90()); E.emplace_back(-1, -d); } } sort(E.begin(), E.end()); for (auto e : E) { if (e.op == 1) { cnt_ef += G[e.dir.length2()]++; } else if (e.op == -1) { cnt_ef -= --G[e.dir.length2()]; } else if (cnt_ef) { Line foo (P[i] + P[i], e.dir); Line bar (P[i] + e.dir + P[i] + e.dir, e.dir); if (bar < foo) swap(foo, bar); int cnt_bc = lower_bound(Ls.begin(), Ls.end(), bar) - upper_bound(Ls.begin(), Ls.end(), foo); if (cnt_bc > 0) { ans += 1ll * cnt_bc * cnt_ef; } } } } printf("%lld ", ans * 4ll); return 0; }
Problem B JOJO
考虑优化暴力 KMP,考虑 border 形成 log 个等差数列,对于一个等差数列只用判断最长的一项以及剩下任意一项是否能添加当前字符。
由于每次加多个字符,可能需要认真分析一下复杂度,注意到对于每一组 border 在加字符的过程中,要么最长的被扔掉,要么剩下的被扔掉,要么整组被扔掉,因此,每次暴力跳 fail 的过程只会进行 $O(log L)$ 次。
注意一下如果 fail 在第一段,那么这时候不是周期不变,是 fail 不变,简单特判一下。
好像把重复次数作为一种字符也能进行 kmp,然后能优化到一个 log,我先咕咕咕。
Code
#include <bits/stdc++.h> using namespace std; typedef bool boolean; #define ll long long #define pii pair<int, int> const int N = 1e5 + 5; const int Mod = 998244353; ll C2(int n) { return 1ll * n * (n - 1) / 2; } ll sum(int l, int r) { return C2(r + 1) - C2(l); } typedef class Edge { public: int ed, c, len; Edge(int ed, int c, int len) : ed(ed), c(c), len(len) { } } Edge; typedef class Data { public: boolean fixed; int ch; int dif; int R; int id; Data() { } Data(int ch, int dif, int R, int id, boolean fixed = false) : fixed(fixed), ch(ch), dif(dif), R(R), id(id) { } } Data; int n; int p[N]; ll ans[N]; vector<Edge> G[N]; vector<pii> s; vector<Data> br; Data& get(int p) { int l = 0, r = (signed) br.size() - 1, mid; while (l <= r) { mid = (l + r) >> 1; if (br[mid].R >= p) { r = mid - 1; } else { l = mid + 1; } } return br[r + 1]; } int fail(int p) { Data& d = get(p); return d.fixed ? d.dif : p - d.dif; } int rest(int p) { return s[get(p).id].second - p; } int extend(int len, int c) { boolean flag = false; while (len && get(len + 1).ch != c) { int f = fail(len); if (flag) { if (f <= (len >> 1)) { len = f; } else { int half = len >> 1; f = len - f; len -= half / f * f; } flag = false; } else { flag = f >= (len >> 1); len = f; } } return (get(len + 1).ch == c) ? (len + 1) : 0; } void dfs(int p, int dep) { for (auto _ : G[p]) { int e = _.ed; int ch = _.c; int l = _.len; s.emplace_back(ch, dep + l); ll& ans = ::ans[e]; ans = ::ans[p]; int oldsz = br.size(); int curid = (signed) s.size() - 1; if (!p) { br.emplace_back(ch, 1, l, 0); ans = C2(l); } else { int f = fail(dep); int len = dep + 1; while (l && (f = extend(f, ch)) > s[0].second) { int nl = min(l - 1, rest(f)); br.emplace_back(ch, len - f, len + nl, curid); ans += sum(f, f + nl); f += nl; l -= nl + 1; len += nl + 1; }; if (l && f) { int nl = min(l - 1, rest(f)); br.emplace_back(ch, len - f, len + nl, curid); ans += sum(f, f + nl); f = s[0].second; l -= nl + 1; len += nl + 1; } if (l) { br.emplace_back(ch, f, len + l - 1, curid, true); ans += 1ll * f * l; } assert(br.back().R == dep + _.len); } dfs(e, dep + _.len); while ((signed) br.size() > oldsz) br.pop_back(); s.pop_back(); } } int main() { static char str[5]; scanf("%d", &n); for (int i = 1, op, x; i <= n; i++) { scanf("%d%d", &op, &x); if (op == 1) { scanf("%s", str); int c = str[0] - 'a'; int y = p[i - 1]; p[i] = i; G[y].emplace_back(i, c, x); } else { p[i] = p[x]; } } dfs(0, 0); for (int i = 1; i <= n; i++) { int res = ans[p[i]] % Mod; printf("%d ", res); } return 0; }
Problem C 多边形
不难用归纳法证明,最终一定是 $i (1 < i < n - 1)$ 向 $n$ 连边。考虑答案的下界是不是这些边的条数,方案数大概是一棵树的拓扑序。这个就序列长度的阶乘再乘上每个 $frac{1}{sz_i}$。手玩一下发现修改大概是做了一个类似单旋的操作。
Code
#include <bits/stdc++.h> using namespace std; typedef bool boolean; #define ll long long void exgcd(int a, int b, int& x, int& y) { if (!b) { x = 1, y = 0; } else { exgcd(b, a % b, y, x); y -= (a / b) * x; } } int inv(int a, int n) { int x, y; exgcd(a, n, x, y); return (x < 0) ? (x + n) : (x); } const int Mod = 1e9 + 7; template <const int Mod = :: Mod> class Z { public: int v; Z() : v(0) { } Z(int x) : v(x){ } Z(ll x) : v(x % Mod) { } friend Z operator + (const Z& a, const Z& b) { int x; return Z(((x = a.v + b.v) >= Mod) ? (x - Mod) : (x)); } friend Z operator - (const Z& a, const Z& b) { int x; return Z(((x = a.v - b.v) < 0) ? (x + Mod) : (x)); } friend Z operator * (const Z& a, const Z& b) { return Z(a.v * 1ll * b.v); } friend Z operator ~(const Z& a) { return inv(a.v, Mod); } friend Z operator - (const Z& a) { return Z(0) - a; } Z& operator += (Z b) { return *this = *this + b; } Z& operator -= (Z b) { return *this = *this - b; } Z& operator *= (Z b) { return *this = *this * b; } friend boolean operator == (const Z& a, const Z& b) { return a.v == b.v; } }; Z<> qpow(Z<> a, int p) { Z<> rt = Z<>(1), pa = a; for ( ; p; p >>= 1, pa = pa * pa) { if (p & 1) { rt = rt * pa; } } return rt; } typedef Z<> Zi; const int N = 1e5 + 5; #define pii pair<int, int> int W; int n, m; Zi fac[N], Inv[N]; void prepare(int n) { fac[0] = 1; for (int i = 1; i <= n; i++) { fac[i] = fac[i - 1] * i; } Inv[0] = Inv[1] = 1; for (int i = 2; i <= n; i++) { Inv[i] = -Inv[Mod % i] * (Mod / i); } } set<int> S[N]; map<pii, int> sz; void putans(int ans, Zi ansv) { if (W) { printf("%d %d ", ans, (ansv * fac[ans]).v); } else { printf("%d ", ans); } } int ans = 0; Zi ansv = 1; int solve(int l, int r) { if (l + 1 == r) return 0; int t = *--S[l].lower_bound(r), x; ansv *= Inv[sz[pii(l, r)] = x = solve(l, t) + solve(t, r) + 1]; return x; } int main() { scanf("%d", &W); scanf("%d", &n); ans = n - 3; for (int i = 1; i < n; i++) { S[i].insert(i + 1); S[i + 1].insert(i); } S[n].insert(1); S[1].insert(n); for (int i = 1, u, v; i <= n - 3; i++) { scanf("%d%d", &u, &v); S[u].insert(v); S[v].insert(u); if (max(u, v) == n) { ans--; } } prepare(n); int ls = -1; for (auto p : S[n]) { if (ls != -1) { solve(ls, p); } ls = p; } putans(ans, ansv); scanf("%d", &m); while (m--) { int a, b, c, d; scanf("%d%d", &a, &c); b = *--S[a].lower_bound(c); d = *S[a].upper_bound(c); int x = sz[pii(a, b)]; int y = sz[pii(b, c)]; int z = sz[pii(c, d)]; if (d == n) { putans(ans - 1, ansv * (x + y + 1)); } else { putans(ans, ansv * (x + y + 1) * Inv[y + z + 1]); } } return 0; }
Day 2
Problem A 校园旅行
$O(m^2)$ 的 dp 相信大家都会,考虑怎么优化。
70 分的话把这个转移分成两个阶段,这样一次只用枚举一边。第一次知道 $O(nm)$ 能跑 $2 imes 3 imes 10^3 imes 5 imes 10^4$.
把边分成两类:连接同色顶点和异色顶点的。
考虑路径两端在走的过程中关心的无非是位置和长度的奇偶性 (因为可以左右横跳来等待)。
这个保留生成树可以完成第一项,如果连通块不是二分图的话添加一个自环就行了。
这样边数只有 $O(n)$ 了。
Code
#include <bits/stdc++.h> using namespace std; typedef bool boolean; #define digit(_x) ((_x) >= '0' && (_x) <= '9') template <typename T> void readuint(T& u) { char x; while (~(x = getchar()) && !digit(x)); for (u = x - '0'; ~(x = getchar()) && digit(x); u = u * 10 + x - '0'); } template <typename T, typename ...K> void readuint(T& x, K& ...args) { readuint(x); readuint(args...); } const int N = 5e3 + 5, M = 5e5 + 5; #define pii pair<int, int> int n, m, q; char s[N]; bitset<N> f[N]; vector<int> Gt[N]; vector<int> G[2][N]; boolean flag; vector<int> *_G; bitset<N> vis, col; void dfs(int p) { vis.set(p); for (auto e : _G[p]) { if (vis.test(e)) { flag = flag || (col.test(e) == col.test(p)); } else { col[e] = col[p] ^ 1; Gt[e].push_back(p); Gt[p].push_back(e); dfs(e); } } } int main() { readuint(n, m, q); scanf("%s", s + 1); for (int i = 1, u, v; i <= m; i++) { readuint(u, v); G[s[u] == s[v]][u].push_back(v); G[s[u] == s[v]][v].push_back(u); } for (int _ = 0; _ < 2; _++) { _G = G[_]; vis.reset(); for (int i = 1; i <= n; i++) { if (!vis.test(i)) { flag = false; dfs(i); if (flag) { Gt[i].push_back(i); } } } } queue<pii> Q; auto visit = [&] (int x, int y) { if (x > y) swap(x, y); if (!f[x].test(y)) { f[x].set(y); Q.push(pii(x, y)); } }; for (int i = 1; i <= n; i++) { visit(i, i); for (auto e : Gt[i]) { if (s[i] == s[e]) { visit(i, e); } } } while (!Q.empty()) { pii _ = Q.front(); Q.pop(); int u = _.first, v = _.second; for (auto x : Gt[u]) { for (auto y : Gt[v]) { if (s[x] == s[y]) { visit(x, y); } } } } int x, y; while (q--) { readuint(x, y); if (x > y) swap(x, y); if (f[x].test(y)) { puts("YES"); } else { puts("NO"); } } return 0; }
Problem B 白兔之舞
考虑每走一步相当于是乘 $x$。考虑对这个多项式在模 $x^k - 1$ 意义下进行插值。带入 $x = omega _{k}^i$ 用矩阵快速幂计算即可。
然后你要做一个任意长度 FFT 的东西,把 IDFT 的式子写出来,然后发现贴一个 goodbye jihai 的 D 就完事了。然后贴一个 goodbye jihai 的 D 和 三模数 NTT 就行了。
Code
#include <bits/stdc++.h> using namespace std; typedef bool boolean; #define ll long long int n, K, L, x, y, Mod; template <typename T> T add(T a, T b, T Mod = ::Mod) { return ((a += b) >= Mod) ? (a -= Mod) : a; } int sub(int a, int b, int Mod = ::Mod) { return ((a -= b) < 0) ? (a += Mod) : a; } int mul(int a, int b, int Mod = ::Mod) { return 1ll * a * b % Mod; } int qpow(int a, int p, int Mod = ::Mod) { int rt = 1; for ( ; p; p >>= 1, a = mul(a, a, Mod)) { if (p & 1) { rt = mul(rt, a, Mod); } } return rt; } void exgcd(int a, int b, int& x, int& y) { if (!b) { x = 1, y = 0; } else { exgcd(b, a % b, y, x); y -= (a / b) * x; } } int inv(int a, int Mod = ::Mod) { int x, y; exgcd(a, Mod, x, y); return (x < 0) ? (x + Mod) : x; } int g, Phi; vector<int> P; boolean check_root() { for (auto p : P) { if (qpow(g, Phi / p) == 1) { return false; } } return true; } void prepare_g() { Phi = Mod - 1; int phi = Phi; for (int i = 2; i * i <= phi; i++) { if (!(phi % i)) { P.push_back(i); while (!(phi % i)) phi /= i; } } if (phi > 1) { P.push_back(phi); } for (g = 2; !check_root(); g++); } typedef class Matrix { public: int a[3][3]; Matrix operator * (Matrix b) { Matrix rt; memset(rt.a, 0, sizeof(rt.a)); for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { for (int k = 0; k < n; k++) { rt[i][k] = add(rt[i][k], mul(a[i][j], b[j][k])); } } } return rt; } int* operator [] (int p) { return a[p]; } } Matrix; template <const int Mod, const int g> class NTT { public: int gn[20], _gn[20]; NTT() { int Phi = Mod - 1; for (int i = 1; i <= 18; i++) { gn[i] = qpow(g, (Phi >> i), Mod); _gn[i] = inv(gn[i], Mod); } } void operator () (int* f, int len, int sgn) { for (int i = 1, j = len >> 1, k; i < len - 1; i++, j += k) { if (i < j) swap(f[i], f[j]); for (k = len >> 1; j >= k; j -= k, k >>= 1); } int* wn = ((sgn > 0) ? gn : _gn) + 1; for (int l = 2; l <= len; l <<= 1, wn++) { int hl = l >> 1; for (int i = 0; i < len; i += l) { for (int j = 0, w = 1; j < hl; j++, w = 1ll * w * *wn % Mod) { int a = f[i + j], b = 1ll * f[i + j + hl] * w % Mod; f[i + j] = add(a, b, Mod); f[i + j + hl] = sub(a, b, Mod); } } } if (sgn < 0) { int x = inv(len, Mod); for (int i = 0; i < len; i++) { f[i] = mul(f[i], x, Mod); } } } }; const int Mod1 = 998244353, Mod2 = 469762049, Mod3 = 1004535809; NTT<Mod1, 3> ntt1; NTT<Mod2, 3> ntt2; NTT<Mod3, 3> ntt3; int Wk, _Wk; Matrix a; int solve() { Matrix rt; for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { rt[i][j] = (i == j); } } Matrix b = a; for (int p = L; p; p >>= 1, b = b * b) { if (p & 1) { rt = rt * b; } } return rt[x][y]; } int main() { ios::sync_with_stdio(false); cin.tie(0), cout.tie(0); cin >> n >> K >> L >> x >> y >> Mod; --x, --y; prepare_g(); Wk = qpow(g, Phi / K); for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { cin >> a[i][j]; } } int t = 1; while (t < K) t <<= 1; t <<= 1; _Wk = inv(Wk); vector<int> f (t, 0), g (t, 0); for (int i = 0; i < K; i++) { for (int j = 0; j < n; j++) { a[j][j] = add(a[j][j], 1); } f[i] = solve(); for (int j = 0; j < n; j++) { a[j][j] = sub(a[j][j], 1); } for (int j = 0; j < n; j++) { for (int k = 0; k < n; k++) { a[j][k] = mul(a[j][k], Wk); } } f[i] = mul(f[i], qpow(_Wk, 1ll * i * i % Phi)); } int *h = g.data() + K - 1; for (int i = -K + 1; i < 0; i++) h[i] = qpow(Wk, (1ll * i * (i - 1) / 2) % Phi); for (int i = 0; i < K; i++) { int tmp = qpow(Wk, 1ll * i * (i - 1) / 2 % Phi); h[i] = tmp; f[i] = mul(f[i], tmp); } vector<int> f1 = f, f2 = f, f3 = f; vector<int> g1 = g, g2 = g, g3 = g; auto fix = [&](vector<int>& x, int Mod) { for (auto& y : x) y %= Mod; }; fix(f1, Mod1), fix(f2, Mod2), fix(f3, Mod3); fix(g1, Mod1), fix(g2, Mod2), fix(g3, Mod3); ntt1(f1.data(), t, 1); ntt2(f2.data(), t, 1); ntt3(f3.data(), t, 1); ntt1(g1.data(), t, 1); ntt2(g2.data(), t, 1); ntt3(g3.data(), t, 1); for (int i = 0; i < t; i++) { f1[i] = 1ll * f1[i] * g1[i] % Mod1; f2[i] = 1ll * f2[i] * g2[i] % Mod2; f3[i] = 1ll * f3[i] * g3[i] % Mod3; } ntt1(f1.data(), t, -1); ntt2(f2.data(), t, -1); ntt3(f3.data(), t, -1); constexpr ll M = 1ll * Mod1 * Mod2; int inv1 = inv(Mod2 % Mod1, Mod1), inv2 = inv(Mod1 % Mod2, Mod2), inv3 = inv(M % Mod3, Mod3); for (int i = 0; i < t; i++) { int r1 = f1[i]; int r2 = f2[i]; int r3 = f3[i]; ll x = (1ll * r1 * inv1) % Mod1 * Mod2 + (1ll * r2 * inv2) % Mod2 * Mod1; (x >= M) && (x -= M); int k = 1ll * (r3 - (x % Mod3) + Mod3) * inv3 % Mod3; f[i] = (1ll * (M % Mod) * k + x) % Mod; } h = f.data() + K - 1; int inv4 = inv(K); for (int i = 0; i < K; i++) { h[i] = mul(h[i], qpow(_Wk, 1ll * i * (i - 1) / 2 % Phi)); h[i] = mul(h[i], inv4); cout << h[i] << ' '; } return 0; }
Problem C 序列
区分会不会保序回归的选手。
先考虑一下 $m = 0$ 怎么做,如果若干个数它们最后相同,显然当它们都等于它们的平均数的时候它们的代价最小。
求答案是维护一个单调栈,如果当前段的平均数比栈顶小,就把这一段和栈顶合并,最后每一段内的数赋值为它们的平均值。
什么?证明?感性理解就好了。
然后考虑有修改的情况,大概是你要合并两个奇怪的单调栈,考虑枚举 $x$ 所在段的右端点 $r$ 在哪,可以发现这个右端点一定是右侧单调栈中一段的前一个位置。考虑求出最靠右的一个左端点满足 $x$ 所在的这一段是 $[l, r]$ 这一段,并且比前一段的平均值大。发现如果 $l$ 满足那么前一段的后一个位置也满足,因此可以二分来求。发现 $r$ 也满足类似的性质,因此也可以二分求解。
Code
#include <bits/stdc++.h> using namespace std; typedef bool boolean; const int N = 1e5 + 5; const int Mod = 998244353; void exgcd(int a, int b, int& x, int& y) { if (!b) { x = 1, y = 0; } else { exgcd(b, a % b, y, x); y -= (a / b) * x; } } int inv(int a) { int x, y; exgcd(a, Mod, x, y); return (x < 0) ? (x + Mod) : (x); } #define ll long long int add(int a, int b) { return ((a += b) >= Mod) ? (a - Mod) : a; } int sub(int a, int b) { return ((a -= b) < 0) ? (a + Mod) : a; } typedef class Query { public: int x, v, id; Query(int x, int v, int id) : x(x), v(v), id(id) { } } Query; typedef class Data { public: ll sum; int cnt; int sum2; int pos; ll psum; Data() { } Data(ll sum, int cnt, int sum2) : sum(sum), cnt(cnt), sum2(sum2) { } friend Data operator + (Data a, Data b) { return Data(a.sum + b.sum, a.cnt + b.cnt, add(a.sum2, b.sum2)); } int val() { int sm = sum % Mod; return sub(sum2, 1ll * sm * sm % Mod * inv(cnt) % Mod); } boolean operator < (Data b) const { return 1.0 * sum * b.cnt < 1.0 * b.sum * cnt; } } Data; int n, m; int a[N]; ll sum[N]; int sum2[N]; int oldtp[N]; Data oldv[N]; Data stkL[N], stkR[N]; int ans[N]; vector<Query> Q; int main() { scanf("%d%d", &n, &m); for (int i = 1; i <= n; i++) { scanf("%d", a + i); sum[i] = sum[i - 1] + a[i]; sum2[i] = add(sum2[i - 1], 1ll * a[i] * a[i] % Mod); } Q.emplace_back(1, a[1], 0); for (int i = 1, x, v; i <= m; i++) { scanf("%d%d", &x, &v); Q.emplace_back(x, v, i); } sort(Q.begin(), Q.end(), [&] (const Query& a, const Query& b) { return a.x < b.x; }); int tpr = 0; stkR[0].pos = n + 1; for (int i = n; i; i--) { oldtp[i] = tpr; Data cur (a[i], 1, 1ll * a[i] * a[i] % Mod); while (tpr && stkR[tpr] < cur) cur = cur + stkR[tpr--]; cur.psum = cur.val() + stkR[tpr].psum; cur.pos = i; oldv[i] = stkR[++tpr]; stkR[tpr] = cur; } int tpl = 0; auto it = Q.begin(), _it = Q.end(); for (int i = 1; i <= n && it != _it; i++) { stkR[tpr] = oldv[i]; tpr = oldtp[i]; while (it != _it && (it->x) == i) { auto query_d = [&] (int l, int r) { Data d = Data(sum[r] - sum[l - 1], r - l + 1, sub(sum2[r], sum2[l - 1])); d.sum += -a[it->x] + it->v; d.sum2 = sub(d.sum2, 1ll * a[it->x] * a[it->x] % Mod); d.sum2 = add(d.sum2, 1ll * it->v * it->v % Mod); return d; }; auto query_l = [&] (int R) { int l = 1, r = tpl, mid; while (l <= r) { mid = (l + r) >> 1; if (stkL[mid] < query_d(stkL[mid].pos + 1, R)) { l = mid + 1; } else { r = mid - 1; } } return l - 1; }; int l = 1, r = tpr, mid; while (l <= r) { mid = (l + r) >> 1; int R = stkR[mid].pos - 1; int L = stkL[query_l(R)].pos + 1; if (query_d(L, R) < stkR[mid]) { l = mid + 1; } else { r = mid - 1; } } int R = stkR[--l].pos - 1; int Lid = query_l(R); int L = stkL[Lid].pos + 1; ans[it->id] = (stkL[Lid].psum + stkR[l].psum + query_d(L, R).val()) % Mod; it++; } Data cur (a[i], 1, 1ll * a[i] * a[i] % Mod); while (tpl && cur < stkL[tpl]) cur = cur + stkL[tpl--]; cur.psum = stkL[tpl].psum + cur.val(); cur.pos = i; stkL[++tpl] = cur; } for (int i = 0; i <= m; i++) { printf("%d ", ans[i]); } return 0; }