zoukankan      html  css  js  c++  java
  • 多媒体指令(AVX加速数组求和)

    #include <stdio.h>  
    #include <intrin.h>  
    #include <iostream>
    #include <ctime>
    
    using namespace std;
    void test1(double *a, double *b, double *re)
    {
        size_t t = clock();
        for (int k = 0; k < 4; k++)
        {
            for (int i = 0; i < 100000000; i++)
            {
                re[i] = a[i] + b[i];
            }
        }
        size_t en = clock();
        cout << en - t << endl;
    }
    
    void test2(double *a, double *b, double *re)
    {
        size_t t = clock();
        __m256d m1, m2;
        for (int k = 0; k < 4; k++)
        {
            for (int i = 0; i < 100000000; i += 4)
            {
                m1 = _mm256_set_pd(a[i], a[i + 1], a[i + 2], a[i + 3]);
                m2 = _mm256_set_pd(b[i], b[i + 1], b[i + 2], b[i + 3]);
    
                __m256d l1 = _mm256_add_pd(m1, m2);
                
                re[i + 3] = l1.m256d_f64[0];
                re[i + 2] = l1.m256d_f64[1];
                re[i + 1] = l1.m256d_f64[2];
                re[i]     = l1.m256d_f64[3];
            }
        }
        size_t en = clock();
        cout << en - t << endl;
    }
    
    int main(int argc, char* argv[])
    {
        double *a = new double[100000000];
        double *b = new double[100000000];
        double *re = new double[100000000];
        for (int i = 0; i < 100000000; i++)
        {
            a[i] = i;
            b[i] = i;
        }
        test1(a, b, re);
        test2(a, b, re);
        delete[] a;
        delete[] b;
        delete[] re;
        system("pause");
        return 0;
    }

    大概能快个100毫秒左右。

  • 相关阅读:
    P1338 末日的传说
    P1364医院设置
    线程
    进程通信
    CentOS设置中文
    C++快读讲解
    迭代加深搜索
    P1118 [USACO06FEB]Backward Digit Sums G/S
    N皇后问题
    RMQ区间最值查询
  • 原文地址:https://www.cnblogs.com/tiandsp/p/9050220.html
Copyright © 2011-2022 走看看