写完帕秋莉的超级多项式于是正好贴个模板大汇总(带优化的那种...)
//by Judge
#include<bits/stdc++.h>
#define Rg register
#define fp(i,a,b) for(Rg int i=(a),I=(b)+1;i<I;++i)
#define fd(i,a,b) for(Rg int i=(a),I=(b)-1;i>I;--i)
#define ll long long
using namespace std;
const int mod=998244353;
const int iG=332748118;
const int M=3e5+3;
typedef int arr[M];
#ifndef Judge
#define getchar() (p1==p2&&(p2=(p1=buf)+fread(buf,1,1<<21,stdin),p1==p2)?EOF:*p1++)
#endif
char buf[1<<21],*p1=buf,*p2=buf;
inline int inc(int x,int y){return (x+y)%mod;}
inline int dec(int x,int y){return (x-y+mod)%mod;}
inline int Dec(int x,int y){return x<y?x-y+mod:x-y;}
inline int Inc(int x,int y){return x+y>=mod?x+y-mod:x+y;}
inline int mul(int x,int y){return 1ll*x*y%mod;}
inline int read(){ int x=0,f=1; char c=getchar();
for(;!isdigit(c);c=getchar()) if(c=='-') f=-1;
for(;isdigit(c);c=getchar()) x=x*10+c-'0'; return x*f;
} char sr[1<<21],z[20];int CCF=-1,Z;
inline void Ot(){fwrite(sr,1,CCF+1,stdout),CCF=-1;}
inline void print(int x,char chr=' '){
if(CCF>1<<20)Ot();if(x<0)sr[++CCF]=45,x=-x;
while(z[++Z]=x%10+48,x/=10);
while(sr[++CCF]=z[Z],--Z);sr[++CCF]=chr;
} int n,k,d,limit; arr a,b,r[21],lg,inv,G[2];
inline int qpow(Rg int x,Rg int p=mod-2,Rg int s=1){
for(;p;p>>=1,x=mul(x,x)) if(p&1) s=mul(s,x); return s;
}
inline void prep(int n){
inv[1]=1; for(limit=1;limit<=n;limit<<=1);
fp(i,2,limit) inv[i]=mul(mod-mod/i,inv[mod%i]);
fp(d,1,19){ lg[1<<d]=d; fp(i,0,(1<<d)-1)
r[d][i]=(r[d][i>>1]>>1)|((i&1)<<(d-1));
}
for(Rg int t=(mod-1)>>1,i=1,x,y;i<262144;i<<=1,t>>=1){
x=qpow(3,t),y=qpow(iG,t),G[0][i]=G[1][i]=1;
fp(k,1,i-1) G[1][i+k]=mul(G[1][i+k-1],x),G[0][i+k]=mul(G[0][i+k-1],y);
}
}
inline void NTT(int* a,int tp){
fp(i,0,limit-1) if(i<r[d][i]) swap(a[i],a[r[d][i]]);
for(Rg int mid=1;mid<limit;mid<<=1){ int I=mid<<1;
for(Rg int j=0,x,y;j<limit;j+=I) fp(k,0,mid-1)
x=a[j+k],y=mul(G[tp][mid+k],a[j+k+mid]),
a[j+k]=(x+y)%mod,a[j+k+mid]=(x-y+mod)%mod;
}
if(tp) return ; fp(i,0,limit-1) a[i]=mul(a[i],inv[limit]);
}
inline void init(Rg int n){ d=0;
for(limit=1;limit<=n;limit<<=1)++d;
}
void Inv(int* a,int* b,int n){
static arr C,D; if(n==1) return b[0]=qpow(a[0]),void();
Inv(a,b,n>>1),init(n); fp(i,0,n-1) C[i]=a[i],D[i]=b[i];
fp(i,n,limit-1) C[i]=D[i]=0; NTT(C,1),NTT(D,1);
fp(i,0,limit-1) C[i]=mul(C[i],mul(D[i],D[i])); NTT(C,0);
fp(i,0,n-1) b[i]=dec(inc(b[i],b[i]),C[i]); fp(i,n,limit-1) b[i]=0;
}
void Sqrt(int* a,int* b,int n){
static arr D,F; if(n==1) return b[0]=sqrt(a[0]),void();
Sqrt(a,b,n>>1); fp(i,0,n<<1) F[i]=0;
Inv(b,F,n),init(n); fp(i,0,n-1) D[i]=a[i];
fp(i,n,limit-1) D[i]=0; NTT(D,1),NTT(b,1),NTT(F,1);
fp(i,0,limit-1) b[i]=mul(inc(b[i],mul(D[i],F[i])),inv[2]);
NTT(b,0); fp(i,n,limit-1) b[i]=0;
memset(D,0,limit<<2),memset(F,0,limit<<2);
}
inline void Direv(int* a,int* b,int n){
fp(i,1,n-1) b[i-1]=mul(a[i],i); b[n-1]=b[n]=0;
}
inline void Inter(int* a,int* b,int n){
fp(i,1,n-1) b[i]=mul(a[i-1],inv[i]); b[0]=0;
}
inline void Ln(int* a,int* b,int n){
static arr A,B; Direv(a,A,n),Inv(a,B,n);
init(n),NTT(A,1),NTT(B,1);
fp(i,0,limit-1) A[i]=mul(A[i],B[i]);
NTT(A,0),Inter(A,b,limit);
memset(A,0,limit<<2),memset(B,0,limit<<2);
}
inline void Exp(int* a,int* b,int n){
static arr F; if(n==1) return b[0]=1,void();
Exp(a,b,n>>1),Ln(b,F,n),init(n);
F[0]=dec(a[0]+1,F[0]); fp(i,1,n-1) F[i]=dec(a[i],F[i]);
NTT(F,1),NTT(b,1); fp(i,0,limit-1) b[i]=mul(b[i],F[i]);
NTT(b,0); fp(i,n,limit-1) b[i]=0; memset(F,0,limit<<2);
}
inline void Pow(int* a,int* b,int n,int k){
static arr F; memset(F,0,n<<2),Ln(a,F,n);
fp(i,0,n-1) F[i]=mul(F[i],k); Exp(F,b,n);
}
int main(){ prep(1<<19);
return 0;
}