zoukankan      html  css  js  c++  java
  • sqrti128

    求平方根下取整,对于gcc type __uint128_t。

    ~45.5ns/op on i7-7700k@4.35G,即typical <200cyc/op。

    Together with u128gen&timing&validation.

    #include <cmath>
    #include <cstdio>
    #include <random>
    #include <chrono>
    typedef __uint128_t u128;
    typedef unsigned long long u64;
    const int count=10000000;
    u64 sqrt_approx(u64 x){
    	u64 approx=sqrt(double(x));
    	return (approx+x/approx)>>1;
    }
    u64 sqrt(u64 x){
    	u64 approx=sqrt(double(x));
    	u64 apt=(approx+x/approx)>>1;
    	approx=apt*apt;
    	if(approx>x)return apt-1;
    	if(x-approx>=2*apt-1)return apt+1;
    	return apt;
    }
    u128 sqrt(u128 r){
    	if(!(r>>64))return sqrt(u64(r));
    	int cnt=(((64-__builtin_clzll(u64(r>>64)))+1)|1)^1;
    	u128 approx=u128(sqrt_approx(u64(r>>cnt)))<<(cnt/2);
    	approx=(approx+r/approx)>>1;
    	u128 apt=u128(u64(approx))*u128(u64(approx));
    //	if(r-apt>=2*approx-1)return approx+1;
    	return approx-((r-apt)>>127);
    }
    u128 rand_arr[count],root_arr[count];
    
    typedef void(*func)();
    
    void Time(const char*str,func fn,int multi=count){
    	using hrc=std::chrono::high_resolution_clock;
    	hrc::time_point start=hrc::now();
    	fn();
    	hrc::time_point stop=hrc::now();
    	hrc::duration dur=stop-start;
    	printf("%s Finished in %llu us . 
    ",str,std::chrono::duration_cast<std::chrono::microseconds>(dur).count());
    	if(multi)
    		printf("Average %.3lfns per op.
    ",double(std::chrono::duration_cast<std::chrono::nanoseconds>(dur).count())/multi);
    }
    
    void Root(){
    	for(int i=0;i<count;++i)
    		root_arr[i]=sqrt(rand_arr[i]);
    }
    
    std::mt19937_64 rng;
    
    void Gen(){
    	for(int i=0;i<count;++i)
    		rand_arr[i]=(u128(rng())<<64)|rng();
    }
    
    int Validate(){
    	for(int i=0;i<count;++i){
    		u128 ax=root_arr[i];
    		u128 bx=(ax+1)*(ax+1);
    		ax=ax*ax;
    		if(ax>rand_arr[i])
    			return i+1;
    		if(bx<=rand_arr[i])
    			return i+1;
    	}return 0;
    }
    
    char pp[300];
    #define spp(...) (sprintf(pp,##__VA_ARGS__),pp)
    #define hexo(x) (spp("0x%llx%016llx",u64(x>>64),u64(x)))
    
    int main(){
    	printf("Count=%d
    ",count);
    	Time("Generation",Gen);
    	Time("Square root",Root);
    	int val=Validate();
    	printf("Validation %s
    ",val?spp("Fail at %d",val):"Passed");
    	if(val){
    		--val;
    		printf("Rand %s
    ",hexo(rand_arr[val]));
    		printf("Root %s
    ",hexo(root_arr[val]));
    	}
    	return 0;
    }
    
  • 相关阅读:
    vim设置字体
    mplayer error opening/initializing the selected video_out (vo) device
    ubuntn MySQL安装指南
    man linux
    ubuntu中无法启用桌面效果(3D效果)几种解决方案
    man c 函数 安装 使用
    Ubuntu使用桌面小工具
    Josephus 排列问题
    Adding the PPA to Ubuntu
    Ubuntu的root密码是什么
  • 原文地址:https://www.cnblogs.com/tmzbot/p/9393723.html
Copyright © 2011-2022 走看看