zoukankan      html  css  js  c++  java
  • 多媒体指令(AVX加速数组求和)

    #include <stdio.h>  
    #include <intrin.h>  
    #include <iostream>
    #include <ctime>
    
    using namespace std;
    void test1(double *a, double *b, double *re)
    {
        size_t t = clock();
        for (int k = 0; k < 4; k++)
        {
            for (int i = 0; i < 100000000; i++)
            {
                re[i] = a[i] + b[i];
            }
        }
        size_t en = clock();
        cout << en - t << endl;
    }
    
    void test2(double *a, double *b, double *re)
    {
        size_t t = clock();
        __m256d m1, m2;
        for (int k = 0; k < 4; k++)
        {
            for (int i = 0; i < 100000000; i += 4)
            {
                m1 = _mm256_set_pd(a[i], a[i + 1], a[i + 2], a[i + 3]);
                m2 = _mm256_set_pd(b[i], b[i + 1], b[i + 2], b[i + 3]);
    
                __m256d l1 = _mm256_add_pd(m1, m2);
                
                re[i + 3] = l1.m256d_f64[0];
                re[i + 2] = l1.m256d_f64[1];
                re[i + 1] = l1.m256d_f64[2];
                re[i]     = l1.m256d_f64[3];
            }
        }
        size_t en = clock();
        cout << en - t << endl;
    }
    
    int main(int argc, char* argv[])
    {
        double *a = new double[100000000];
        double *b = new double[100000000];
        double *re = new double[100000000];
        for (int i = 0; i < 100000000; i++)
        {
            a[i] = i;
            b[i] = i;
        }
        test1(a, b, re);
        test2(a, b, re);
        delete[] a;
        delete[] b;
        delete[] re;
        system("pause");
        return 0;
    }

    大概能快个100毫秒左右。

  • 相关阅读:
    P2494 [SDOI2011]保密 最小割
    P2765 魔术球问题
    [CTSC2008]祭祀river
    CF311E Biologist
    P4177 [CEOI2008]order
    函数的形参和实参
    python字符编码
    源码安装Vim并配置YCM自动补全插件
    Python基础练习之购物车
    Python字符串的所有操作
  • 原文地址:https://www.cnblogs.com/tiandsp/p/9050220.html
Copyright © 2011-2022 走看看