zoukankan html css js c++ java

memcpy/memmove?快速乘?

memcpy?memmove?

//#pragma GCC optimize(2)
#include<bits/stdc++.h>
using namespace std;
const int n=50000000;
int a[n+10],b[n+10],c[n+10],d[n+10],e[n+10],f[n+10];
int main()
{
    int i;
    srand(2395);
    for(i=1;i<=n;i++)    a[i]=rand();
    clock_t st1=clock();
    memcpy(b+1,a+1,sizeof(int)*n);
    clock_t ed1=clock();
    clock_t st2=clock();
    memmove(c+1,a+1,sizeof(int)*n);
    clock_t ed2=clock();
    clock_t st3=clock();
    for(i=1;i<=n;i++)    d[i]=a[i];
    clock_t ed3=clock();
    clock_t st4=clock();
    for(i=1;i<=n-3;i+=4)
    {
        e[i]=a[i];
        e[i+1]=a[i+1];
        e[i+2]=a[i+2];
        e[i+3]=a[i+3];
    }
    (i<=n)&&(e[i]=a[i]);
    (i+1<=n)&&(e[i+1]=a[i+1]);
    (i+2<=n)&&(e[i+2]=a[i+2]);
    clock_t ed4=clock();
    clock_t st5=clock();
    for(i=1;i<=n-7;i+=8)
    {
        f[i]=a[i];
        f[i+1]=a[i+1];
        f[i+2]=a[i+2];
        f[i+3]=a[i+3];
        f[i+4]=a[i+4];
        f[i+5]=a[i+5];
        f[i+6]=a[i+6];
        f[i+7]=a[i+7];
    }
    (i<=n)&&(f[i]=a[i]);
    (i+1<=n)&&(f[i+1]=a[i+1]);
    (i+2<=n)&&(f[i+2]=a[i+2]);
    (i+3<=n)&&(f[i+3]=a[i+3]);
    (i+4<=n)&&(f[i+4]=a[i+4]);
    (i+5<=n)&&(f[i+5]=a[i+5]);
    (i+6<=n)&&(f[i+6]=a[i+6]);
    clock_t ed5=clock();
    cout<<"time1:"<<ed1-st1<<' '<<memcmp(a+1,b+1,sizeof(int)*n)<<'
';
    cout<<"time2:"<<ed2-st2<<' '<<memcmp(a+1,c+1,sizeof(int)*n)<<'
';
    cout<<"time3:"<<ed3-st3<<' '<<memcmp(a+1,d+1,sizeof(int)*n)<<'
';
    cout<<"time4:"<<ed4-st4<<' '<<memcmp(a+1,e+1,sizeof(int)*n)<<'
';
    cout<<"time5:"<<ed5-st5<<' '<<memcmp(a+1,f+1,sizeof(int)*n)<<'
';
    return 0;
}

不开优化：
time1:139254 0
time2:198093 0
time3:601853 0
time4:588247 0
time5:598584 0

O2：
time1:138256 0
time2:139235 0
time3:426570 0
time4:322532 0
time5:301933 0

Ofast：
time1:137893 0
time2:140585 0
time3:422154 0
time4:309306 0
time5:298620 0

很显然在大数据(n=50000000)下memcpy最快

另外，在小数据（比如n=26）下，测试得到明显直接赋值(time3)最快

在较小数据（比如n=1000）下，测试得到memmove最快？

快速乘

测试对比程序：

#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
ll rd()
{
    return rand()|(ll(rand())<<32);
}
ll md;
ll mul1(ll x,ll y)
{
    x%=md;y%=md;
    ll t=x*y-ll((long double)x/md*y+0.5)*md;
    return t<0?t+md:t;
}
ll mul2(ll x,ll y)
{
    x%=md;y%=md;
    ll t=x*y-ll((long double)x*y/md+0.5)*md;
    return t<0?t+md:t;
}
ll mul3(ll x,ll y)
{
    x%=md;y%=md;
    ll t=x*y-ll((long double)x/md*y+1e-8)*md;
    return t<0?t+md:t;
}
ll mul0(ll x,ll y)
{
    return __int128(x)*y%md;
}
ll a,b;
int main()
{
    int T=0;
    srand(3254244);
    while(1)
    {
        T++;
        ll a=rd(),b=rd();
        md=rd();//%ll(1e18);
        //cout<<a<<' '<<b<<' '<<md<<'
';
        ll t1=mul1(a,b),t2=mul0(a,b);//可将mul1改为mul2/mul3
        //cout<<t1<<' '<<t2<<'
';
        if(t1!=t2)
        {
            printf("%d
",T);
            puts("test");
            int t;cin>>t;
        }
        //int t;cin>>t;
    }
    return 0;
}

View Code

经过一些测试，可以发现，mul3效果最差（在模数>=1e17时，100000组以内就拍出锅）；应该是1e-8不够

mul2效果没有mul1好（模数不设额外上限时，100000组以内出锅；上限1e18时，20秒不出锅）

mul1效果最好（模数不设额外上限时，20秒不出锅）

原因就不知道了。。。

查看全文

相关阅读:
[转]如何选购塑料水杯（塑料口杯、茶杯）
【转】在sqlserver下增加MYSQL的链接服务器，实现分布式数据库开发第一步
 MySql: 查看当前登录用户,当前数据库
 python import, from xx import yy
python class metaclass instance
git: fatal: Not a git repository (or any of the parent directories): .git
Python flask 基于 Flask 提供 RESTful Web 服务
 Python flask @app.route
MySql: log 位置
 MySql: 忘记root密码

原文地址：https://www.cnblogs.com/hehe54321/p/9781767.html