zoukankan      html  css  js  c++  java
  • 特殊整除分块的常数优化

    整除分块十分naive,但是卡常之后就不清真了。

    常数优化也是一门技术啊!

    如果需要计算$$sum_{i=1}^nlfloor frac{n}{i} floor$$
    有一个naive的做法就是

    for (long long i=1,la; i<=n; i=la+1){
        la=n/(n/i);
        ans+=(n/i)*(la-i+1);
    }
    

    但是,这样不仅根号有2的常数,瓶颈上还有3次除法(可优化至2次),如果n是一个较大的数,跑起来很man。
    今天突然看到了松1自己的提交,于是兴冲冲地又复习了一下优越的算法。
    首先推式子
    要求$$sum_{i=1}^n sum_{j=1}^n [ij <= n] $$
    可拆为$$ sum_{i=1}^{ lfloor sqrt {n} floor} sum_{j=1}^n [i
    j leq n] +sum_{i= lfloor sqrt{n} floor +1}^n sum_{j=1}^n [ij leq n] $$
    变换边界条件$$ sum_{i=1}^{ lfloor sqrt {n} floor} sum_{j=1}^n [i
    j leq n] +sum_{i= lfloor sqrt{n} floor +1}^n sum_{j=1}^{lfloor sqrt{n} floor} [i*j leq n] $$
    在把前后两项变得一样

    [sum_{i=1}^{ lfloor sqrt {n} floor} sum_{j=1}^n [i*j leq n] +sum_{i=1}^n sum_{j=1}^{lfloor sqrt{n} floor} [i*j leq n]-sum_{i=1}^{ lfloor sqrt {n} floor} sum_{j=1}^{lfloor sqrt {n} floor}[i*j leq n] ]

    合并一下

    [2* sum_{i=1}^{lfloor sqrt {n} floor} sum_{j=1}^n [i*j leq n] - ( lfloor sqrt{n} floor)^2 ]

    换一种表示

    [2* sum_{i=1}^{lfloor sqrt{n} floor} lfloor frac{n}{i} floor -(lfloor sqrt{n} floor) ^2 ]

    就可以快速计算啦!

    %:pragma GCC target("avx")
    %:pragma GCC optimize(3)
    %:pragma GCC optimize("Ofast")
    %:pragma GCC optimize("inline")
    %:pragma GCC optimize("-fgcse")
    %:pragma GCC optimize("-fgcse-lm")
    %:pragma GCC optimize("-fipa-sra")
    %:pragma GCC optimize("-ftree-pre")
    %:pragma GCC optimize("-ftree-vrp")
    %:pragma GCC optimize("-fpeephole2")
    %:pragma GCC optimize("-ffast-math")
    %:pragma GCC optimize("-fsched-spec")
    %:pragma GCC optimize("unroll-loops")
    %:pragma GCC optimize("-falign-jumps")
    %:pragma GCC optimize("-falign-loops")
    %:pragma GCC optimize("-falign-labels")
    %:pragma GCC optimize("-fdevirtualize")
    %:pragma GCC optimize("-fcaller-saves")
    %:pragma GCC optimize("-fcrossjumping")
    %:pragma GCC optimize("-fthread-jumps")
    %:pragma GCC optimize("-funroll-loops")
    %:pragma GCC optimize("-fwhole-program")
    %:pragma GCC optimize("-freorder-blocks")
    %:pragma GCC optimize("-fschedule-insns")
    %:pragma GCC optimize("inline-functions")
    %:pragma GCC optimize("-ftree-tail-merge")
    %:pragma GCC optimize("-fschedule-insns2")
    %:pragma GCC optimize("-fstrict-aliasing")
    %:pragma GCC optimize("-fstrict-overflow")
    %:pragma GCC optimize("-falign-functions")
    %:pragma GCC optimize("-fcse-skip-blocks")
    %:pragma GCC optimize("-fcse-follow-jumps")
    %:pragma GCC optimize("-fsched-interblock")
    %:pragma GCC optimize("-fpartial-inlining")
    %:pragma GCC optimize("no-stack-protector")
    %:pragma GCC optimize("-freorder-functions")
    %:pragma GCC optimize("-findirect-inlining")
    %:pragma GCC optimize("-frerun-cse-after-loop")
    %:pragma GCC optimize("inline-small-functions")
    %:pragma GCC optimize("-finline-small-functions")
    %:pragma GCC optimize("-ftree-switch-conversion")
    %:pragma GCC optimize("-foptimize-sibling-calls")
    %:pragma GCC optimize("-fexpensive-optimizations")
    %:pragma GCC optimize("-funsafe-loop-optimizations")
    %:pragma GCC optimize("inline-functions-called-once")
    %:pragma GCC optimize("-fdelete-null-pointer-checks")
    #include <iostream>
    #include <cmath>
    using namespace std;
    typedef unsigned long long ll;
    int main(){
    	ll n; cin>>n;
    	ll ans=0;
    	ll p=sqrt(n);
    	for (ll i=p; i; --i) ans+=n/i;
    	ans=ans*2-p*p;
    	cout<<ans<<endl;
    }
    

    还不够快?
    利用$$ lfloor frac{n}{2i} floor =lfloor frac{lfloor frac{n}{i} floor}{2} floor $$
    可以优化

    %:pragma GCC target("avx")
    %:pragma GCC optimize(3)
    %:pragma GCC optimize("Ofast")
    %:pragma GCC optimize("inline")
    %:pragma GCC optimize("-fgcse")
    %:pragma GCC optimize("-fgcse-lm")
    %:pragma GCC optimize("-fipa-sra")
    %:pragma GCC optimize("-ftree-pre")
    %:pragma GCC optimize("-ftree-vrp")
    %:pragma GCC optimize("-fpeephole2")
    %:pragma GCC optimize("-ffast-math")
    %:pragma GCC optimize("-fsched-spec")
    %:pragma GCC optimize("unroll-loops")
    %:pragma GCC optimize("-falign-jumps")
    %:pragma GCC optimize("-falign-loops")
    %:pragma GCC optimize("-falign-labels")
    %:pragma GCC optimize("-fdevirtualize")
    %:pragma GCC optimize("-fcaller-saves")
    %:pragma GCC optimize("-fcrossjumping")
    %:pragma GCC optimize("-fthread-jumps")
    %:pragma GCC optimize("-funroll-loops")
    %:pragma GCC optimize("-fwhole-program")
    %:pragma GCC optimize("-freorder-blocks")
    %:pragma GCC optimize("-fschedule-insns")
    %:pragma GCC optimize("inline-functions")
    %:pragma GCC optimize("-ftree-tail-merge")
    %:pragma GCC optimize("-fschedule-insns2")
    %:pragma GCC optimize("-fstrict-aliasing")
    %:pragma GCC optimize("-fstrict-overflow")
    %:pragma GCC optimize("-falign-functions")
    %:pragma GCC optimize("-fcse-skip-blocks")
    %:pragma GCC optimize("-fcse-follow-jumps")
    %:pragma GCC optimize("-fsched-interblock")
    %:pragma GCC optimize("-fpartial-inlining")
    %:pragma GCC optimize("no-stack-protector")
    %:pragma GCC optimize("-freorder-functions")
    %:pragma GCC optimize("-findirect-inlining")
    %:pragma GCC optimize("-frerun-cse-after-loop")
    %:pragma GCC optimize("inline-small-functions")
    %:pragma GCC optimize("-finline-small-functions")
    %:pragma GCC optimize("-ftree-switch-conversion")
    %:pragma GCC optimize("-foptimize-sibling-calls")
    %:pragma GCC optimize("-fexpensive-optimizations")
    %:pragma GCC optimize("-funsafe-loop-optimizations")
    %:pragma GCC optimize("inline-functions-called-once")
    %:pragma GCC optimize("-fdelete-null-pointer-checks")
    %:pragma GCC target("sse2,sse3,ssse3,sse4")
    #include <iostream>
    #include <cmath>
    using namespace std;
    typedef long long ll;
    int main(){
        ll n; cin>>n;
        ll ans=0;
        ll p=sqrt(n),z=n/p;
        for (ll i=1; i<=p; i+=2){
    	ll t=n/i;
    	while (t>=z){
    	    ans+=t;
    	    t>>=1;
    	}
        }
        ans=ans*2-p*p;
        cout<<ans;
    }
    

    还不够快,减少一次判断?

    #pragma GCC target("avx")
    #pragma GCC optimize(3)
    #pragma GCC optimize("Ofast")
    #pragma GCC optimize("inline")
    #pragma GCC optimize("-fgcse")
    #pragma GCC optimize("-fgcse-lm")
    #pragma GCC optimize("-fipa-sra")
    #pragma GCC optimize("-ftree-pre")
    #pragma GCC optimize("-ftree-vrp")
    #pragma GCC optimize("-fpeephole2")
    #pragma GCC optimize("-ffast-math")
    #pragma GCC optimize("-fsched-spec")
    #pragma GCC optimize("unroll-loops")
    #pragma GCC optimize("-falign-jumps")
    #pragma GCC optimize("-falign-loops")
    #pragma GCC optimize("-falign-labels")
    #pragma GCC optimize("-fdevirtualize")
    #pragma GCC optimize("-fcaller-saves")
    #pragma GCC optimize("-fcrossjumping")
    #pragma GCC optimize("-fthread-jumps")
    #pragma GCC optimize("-funroll-loops")
    #pragma GCC optimize("-fwhole-program")
    #pragma GCC optimize("-freorder-blocks")
    #pragma GCC optimize("-fschedule-insns")
    #pragma GCC optimize("inline-functions")
    #pragma GCC optimize("-ftree-tail-merge")
    #pragma GCC optimize("-fschedule-insns2")
    #pragma GCC optimize("-fstrict-aliasing")
    #pragma GCC optimize("-fstrict-overflow")
    #pragma GCC optimize("-falign-functions")
    #pragma GCC optimize("-fcse-skip-blocks")
    #pragma GCC optimize("-fcse-follow-jumps")
    #pragma GCC optimize("-fsched-interblock")
    #pragma GCC optimize("-fpartial-inlining")
    #pragma GCC optimize("no-stack-protector")
    #pragma GCC optimize("-freorder-functions")
    #pragma GCC optimize("-findirect-inlining")
    #pragma GCC optimize("-frerun-cse-after-loop")
    #pragma GCC optimize("inline-small-functions")
    #pragma GCC optimize("-finline-small-functions")
    #pragma GCC optimize("-ftree-switch-conversion")
    #pragma GCC optimize("-foptimize-sibling-calls")
    #pragma GCC optimize("-fexpensive-optimizations")
    #pragma GCC optimize("-funsafe-loop-optimizations")
    #pragma GCC optimize("inline-functions-called-once")
    #pragma GCC optimize("-fdelete-null-pointer-checks")
    #pragma GCC target("sse2,sse3,ssse3,sse4")
    #include <iostream>
    #include <cmath>
    using namespace std;
    typedef long long ll;
    int main(){
        ll n; cin>>n;
        ll ans=0;
        ll p=sqrt(n),z=n/p;
        for (ll i=1,t=n; i<=p; t=n/(i+=2))
    	do{
    	    ans+=t;
    	}while ((t>>=1)>=z);
        ans=ans*2-p*p;
        cout<<ans;
    }
    

    显然这还是没有到极致,不过我觉得已经挺快了。
    1e16在机房的普通台式机上只需0.5s
    还不够快?优化除法次数吧!

    #pragma GCC target("avx")
    #pragma GCC optimize(3)
    #pragma GCC optimize("Ofast")
    #pragma GCC optimize("inline")
    #pragma GCC optimize("-fgcse")
    #pragma GCC optimize("-fgcse-lm")
    #pragma GCC optimize("-fipa-sra")
    #pragma GCC optimize("-ftree-pre")
    #pragma GCC optimize("-ftree-vrp")
    #pragma GCC optimize("-fpeephole2")
    #pragma GCC optimize("-ffast-math")
    #pragma GCC optimize("-fsched-spec")
    #pragma GCC optimize("unroll-loops")
    #pragma GCC optimize("-falign-jumps")
    #pragma GCC optimize("-falign-loops")
    #pragma GCC optimize("-falign-labels")
    #pragma GCC optimize("-fdevirtualize")
    #pragma GCC optimize("-fcaller-saves")
    #pragma GCC optimize("-fcrossjumping")
    #pragma GCC optimize("-fthread-jumps")
    #pragma GCC optimize("-funroll-loops")
    #pragma GCC optimize("-fwhole-program")
    #pragma GCC optimize("-freorder-blocks")
    #pragma GCC optimize("-fschedule-insns")
    #pragma GCC optimize("inline-functions")
    #pragma GCC optimize("-ftree-tail-merge")
    #pragma GCC optimize("-fschedule-insns2")
    #pragma GCC optimize("-fstrict-aliasing")
    #pragma GCC optimize("-fstrict-overflow")
    #pragma GCC optimize("-falign-functions")
    #pragma GCC optimize("-fcse-skip-blocks")
    #pragma GCC optimize("-fcse-follow-jumps")
    #pragma GCC optimize("-fsched-interblock")
    #pragma GCC optimize("-fpartial-inlining")
    #pragma GCC optimize("no-stack-protector")
    #pragma GCC optimize("-freorder-functions")
    #pragma GCC optimize("-findirect-inlining")
    #pragma GCC optimize("-frerun-cse-after-loop")
    #pragma GCC optimize("inline-small-functions")
    #pragma GCC optimize("-finline-small-functions")
    #pragma GCC optimize("-ftree-switch-conversion")
    #pragma GCC optimize("-foptimize-sibling-calls")
    #pragma GCC optimize("-fexpensive-optimizations")
    #pragma GCC optimize("-funsafe-loop-optimizations")
    #pragma GCC optimize("inline-functions-called-once")
    #pragma GCC optimize("-fdelete-null-pointer-checks")
    #pragma GCC target("sse2,sse3,ssse3,sse4")
    #include <iostream>
    #include <cmath>
    using namespace std;
    typedef long long ll;
    #define C 13
    int main(){
        ll n; cin>>n;
        ll ans=0;
        ll p=sqrt(n),z=n/p;
        for (ll i=1,t=n,la=n+C; i<=p;){
    	ll tmp=t;
    	do{
    	    ans+=tmp;
    	}while ((tmp>>=1)>=z);
    	if (la-t<C){
    	    la=t;
    	    i+=2;
    	    ll g=i*(--t);
    	    while (g>n){
    		--t;
    		g-=i;
    	    }
    	}
    	else{
    	    la=t;
    	    t=n/(i+=2);
    	}
        }
        ans=ans*2-p*p;
        cout<<ans;
    }
    

    现在1e16只需0.38s左右

    这东西貌似有一个(log)做法,先咕着。

  • 相关阅读:
    noip模拟赛 花
    noip模拟赛 柜(暴力)
    noip模拟赛 读
    Java基础知识强化47:StringBuffer类之StringBuffer的三个面试题
    Java基础知识强化46:StringBuffer类之判断一个字符串是否对称案例
    Java基础知识强化45:StringBuffer类之字符串反转的案例
    Java基础知识强化44:StringBuffer类之把数组拼接成指定格式的字符串的案例
    Java基础知识强化43:StringBuffer类之StringBuffer和String的相互转化
    Java基础知识强化42:StringBuffer类之StringBuffer的截取功能
    Java基础知识强化41:StringBuffer类之StringBuffer的反转功能
  • 原文地址:https://www.cnblogs.com/Yuhuger/p/9940189.html
Copyright © 2011-2022 走看看