「解题报告」 [UOJ#62] 怎样跑得更快 (莫比乌斯反演)
题意
给定 (n,c,d), 有 (q) 个询问, 每个询问给定 (b_1,cdots, b_n), 满足
[sum_{j=1}^ngcd(i,j)^ccdot{
m lcm}(i,j)^dcdot x_j equiv b_i pmod p
]
对每个询问, 解出 (x_1,cdots,x_n) 的值.
思路
目标 : 把原式转化为 (f(x)=sum_{d|x} g(d)) 的形式, 然后用莫比乌斯反演求出 (g(x)).
下面的运算都是在 (mod p) 意义下的运算.
[egin{align}
b_i &= sum_{j=1}^ngcd(i,j)^ccdot{
m lcm}(i,j)^dcdot x_j \
&= sum_{j=1}^n gcd(i,j)^{c-d} cdot (ij)^d cdot x_j \
&= sum_{u|i} u^{c-d} sum_{u|j}^n [gcd(i,j)=u] cdot (ij)^d cdot x_j \
&= sum_{u|i} x^{c-d} sum_{u|j}^n [frac{gcd(i,j)}{u}=1] cdot (ij)^d cdot x_j \
&= sum_{u|i} u^{c-d} sum_{u|j}^n (ij)^d cdot x_j sum_{v|frac{gcd(i,j)}{u}} mu(v)\
&= sum_{u|i} u^{c-d} sum_{u|j}^n (ij)^d cdot x_j sum_{uv|gcd(i,j)} mu(v)\
end{align}
]
设 (T=uv) (最关键的一步)
[egin{align}
b_i &= sum_{u|i} u^{c-d} sum_{u|j}^n (ij)^d cdot x_j sum_{uv|gcd(i,j)} mu(v) \
&= sum_{T|i}sum_{T|j}^n (ij)^dx_j sum_{u|T} u^{c-d}muleft(frac{T}{u}
ight)
end{align}
]
其中 (sum_{u|T} u^{c-d}muleft(frac{T}{u} ight)) 是两个积性函数的狄利克雷卷积, 可以线性筛出来, 设为 (h(T)), 则
[egin{align}
b_i &= sum_{T|i}sum_{T|j}^n (ij)^dx_j sum_{u|T} u^{c-d}muleft(frac{T}{u}
ight) \
&= i^d sum_{T|i}sum_{T|j}^n j^d x_j h(T)
end{align}
]
设 (sum_{T|j}^n j^d x_j h(T)) 为 (g(T)), (frac{b_i}{i^d}) 为 (f(i)), 则
[egin{align}
f(i)=sum_{T|i} g(T) \
end{align}
]
利用莫比乌斯反演便可 (O(nsqrt n)) 求出 (g(x)). 而
[egin{align}
g(T)&=sum_{T|j}^n j^d x_j h(T) \
&Downarrow \
frac{g(T)}{h(T)} &=sum_{T|j}^n j^d x_j \
&Downarrow \
x_T&=frac{1}{T_d}left( sum_{T|j}^n muleft(frac{j}{T}
ight) frac{g(j)}{h(j)}
ight)
end{align}
]
便可在 (O(nlog n)) 内求出 (x_n).
总复杂度为 (O(n+q(n+nsqrt n+nlog n))).
需要注意优化一下常数, 比如预处理 (x^d) 以及 (x^{c-d}).
代码
#include<bits/stdc++.h>
typedef long long ll;
using namespace std;
const int _=1e5+7;
const ll mod=998244353;
int n,T,pri[_],p[_];
ll C,D,mu[_],pw[2][_],f[_],g[_],h[_],a[_],b[_];
bool v[_],flag;
ll gi(){
ll x=0,f=1; char c=getchar();
while(!isdigit(c)){ if(c=='-') f=-1; c=getchar(); }
while(isdigit(c)){ x=(x<<3)+(x<<1)+c-'0'; c=getchar(); }
return x*f;
}
ll _pw(ll a,ll p){
if(p<0) return _pw(_pw(a,-p),mod-2);
ll res=1;
while(p){
if(p&1) res=res*a%mod;
a=a*a%mod; p>>=1;
}
return res;
}
ll _inv(ll x){ return _pw(x,mod-2); }
void _pls(ll &x,ll y){ x=((x+y)%mod+mod)%mod; }
void _pre(){
mu[1]=pw[0][1]=pw[1][1]=h[1]=p[1]=1;
for(int i=2;i<=n;i++){
if(!v[i]){
v[i]=i; pri[++pri[0]]=i;
mu[i]=-1;
pw[1][i]=_pw(i,D);
pw[0][i]=_pw(i,C)*_inv(pw[1][i])%mod;
h[i]=((mu[i]+pw[0][i])%mod+mod)%mod;
p[i]=i;
}
for(int j=1;j<=pri[0]&&i*pri[j]<=n;j++){
v[i*pri[j]]=pri[j];
pw[0][i*pri[j]]=pw[0][i]*pw[0][pri[j]]%mod;
pw[1][i*pri[j]]=pw[1][i]*pw[1][pri[j]]%mod;
if(!(i%pri[j])){
mu[i*pri[j]]=0;
p[i*pri[j]]=p[i]*pri[j];
_pls(h[i*pri[j]],h[i/p[i]]*pw[0][p[i]]%mod*mu[pri[j]]%mod);
_pls(h[i*pri[j]],h[i/p[i]]*pw[0][p[i*pri[j]]]%mod);
break;
}
mu[i*pri[j]]=-mu[i];
p[i*pri[j]]=pri[j];
h[i*pri[j]]=h[i]*h[pri[j]]%mod;
}
}
for(int i=1;i<=n;i++){
h[i]=_inv(h[i]);
pw[1][i]=_inv(pw[1][i]);
}
}
void _run(){
flag=0;
for(int i=1;i<=n;i++){
b[i]=gi(); f[i]=b[i]*pw[1][i]%mod;
if(b[i]&&!pw[1][i]) flag=1;
}
for(int i=1;i<=n;i++){
g[i]=0;
for(int j=1;j*j<=i;j++)
if(!(i%j)){
_pls(g[i],f[j]*mu[i/j]%mod);
if(j*j!=i) _pls(g[i],f[i/j]*mu[j]%mod);
}
if(g[i]&&!h[i]){ flag=1; break; }
}
if(flag){ puts("-1"); return; }
for(int i=1;i<=n;i++){
ll a=0;
for(int j=i;j<=n;j+=i)
_pls(a,mu[j/i]*g[j]%mod*h[j]%mod);
a=a*pw[1][i]%mod;
printf("%lld ",a);
}
putchar('
');
}
int main(){
#ifndef ONLINE_JUDGE
freopen("2.in","r",stdin);
freopen("x.out","w",stdout);
#endif
n=gi(); C=gi(); D=gi(); T=gi();
_pre();
while(T--)
_run();
return 0;
}