zoukankan      html  css  js  c++  java
  • OpenACC 简单的原子操作

    ▶ OpenACC 的原子操作,用到了 C++ 的一个高精度计时器

    ● 代码,直接的原子操作

     1 #include <iostream>
     2 #include <cstdlib>
     3 #include <chrono>
     4 
     5 #define ATOMIC
     6 
     7 using namespace std;
     8 using namespace std::chrono;
     9 
    10 int main()
    11 {
    12     high_resolution_clock::time_point t1 = high_resolution_clock::now();// 高精度计时器
    13 
    14     const int count = 1073741824;
    15     int sum = 0;
    16 
    17 #pragma acc parallel loop copyout(sum)
    18     for (int i = 0; i < count; i++)
    19     {
    20 #ifdef ATOMIC
    21     #pragma acc atomic update
    22 #endif
    23         sum++;
    24     }
    25 
    26     high_resolution_clock::time_point t2 = high_resolution_clock::now();
    27     duration<double> time = duration_cast<duration<double>>(t2 - t1);
    28     
    29     cout << "
    Count = " << count << ", duraion = " << time.count() << " s" << endl;        
    30     return 0;
    31 }

    ● 输出结果,不知道为什么,win10中的 pgCC 不能用

    D:CodeOpenACCOpenACCProjectOpenACCProject>pgCC -acc -o acc_win10.exe main.cpp -Minfo
    pgCC-Warning-C++ compilation is not supported: main.cpp

    ● 输出结果,WSL 中

    // 不使用 OpenACC
    cuan@CUAN:/mnt/d/Code/OpenACC/OpenACCProject/OpenACCProject$ pgc++ -std=c++11 -o acc.exe main.cpp -Minfo
    cuan@CUAN:/mnt/d/Code/OpenACC/OpenACCProject/OpenACCProject$ ./acc.exe
    
    Count = 1073741824, duraion = 0.483907 s
    
    // 使用宏 ATOMIC,即使用原子操作
    cuan@CUAN:/mnt/d/Code/OpenACC/OpenACCProject/OpenACCProject$ pgc++ -acc -std=c++11 -o acc.exe main.cpp -Minfo
    main:
         15, Generating copyout(sum)
             Accelerator kernel generated
             Generating Tesla code
             18, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
    cuan@CUAN:/mnt/d/Code/OpenACC/OpenACCProject/OpenACCProject$ ./acc.exe
    
    Count = 1073741824, duraion = 0.248377 s
    
    // 不用宏 ATOMIC,即不用原子操作
    cuan@CUAN:/mnt/d/Code/OpenACC/OpenACCProject/OpenACCProject$ pgc++ -acc -std=c++11 -o acc.exe main.cpp -Minfo
    main:
         15, Generating copyout(sum)
             Accelerator kernel generated
             Generating Tesla code
             18, #pragma acc loop seq
         23, Accelerator restriction: induction variable live-out from loop: sum    // 编译器提示强制原子操作
    cuan@CUAN:/mnt/d/Code/OpenACC/OpenACCProject/OpenACCProject$ ./acc.exe
    
    Count = 1073741824, duraion = 0.247399 s

    ● 优化一下,使用分段计数

     1 #include <iostream>
     2 #include <cstdlib>
     3 #include <chrono>
     4 
     5 using namespace std;
     6 using namespace std::chrono;
     7 
     8 int main()
     9 {
    10     high_resolution_clock::time_point t1 = high_resolution_clock::now();
    11 
    12     const int count = 1073741824, length = count / 32;// 每一段的长度
    13     int sum = 0;
    14 
    15 #pragma acc parallel loop copyout(sum)
    16     for (int start = 0; start < count; start+=length)                       // start 取每段的起点,共 count / length 段
    17     {
    18         const int end = (start + length < count) ? start + length : count;  // 每段的终点
    19         int subSum = 0;
    20 #pragma acc loop worker reduction(+:subSum)
    21         for (int j = start; j < end; j++)                                   // 每段从 start 加到 end
    22             subSum ++;
    23 
    24 #pragma acc atomic update                                                   
    25         sum += subSum;                                                      // 规约结果加到 sum 中来
    26     }
    27 
    28     high_resolution_clock::time_point t2 = high_resolution_clock::now();
    29     duration<double> time = duration_cast<duration<double>>(t2 - t1);
    30     
    31     cout << "
    Count = " << sum << ", duraion = " << time.count() << " s" << endl;                
    32     return 0;
    33 }

    ● 输出结果,好像好一点点

    cuan@CUAN:/mnt/d/Code/OpenACC/OpenACCProject/OpenACCProject$ pgc++ -acc -std=c++11 -o acc.exe main.cpp -Minfo
    main:
         15, Generating copyout(sum)
             Accelerator kernel generated
             Generating Tesla code
             18, #pragma acc loop gang /* blockIdx.x */
             23, #pragma acc loop seq /* threadIdx.y */
                 Generating reduction(+:subSum)
         23, Loop is parallelizable
    cuan@CUAN:/mnt/d/Code/OpenACC/OpenACCProject/OpenACCProject$ ./acc.exe
    
    Count = 1073741824, duraion = 0.246488 s
  • 相关阅读:
    svn命令
    dos 批处理删除svn目录
    Libevent 的多线程操作
    Linux Daemon 类程序
    模板函数 使用的默认void 模板报错
    配置BUG-Linux系统下ssh登陆很慢的解决办法
    Centos apache + mysql + usvn 配置svn 服务器
    Centos 7U盘安装
    mysql 常用基础
    shell 的 md5 命令
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/9458900.html
Copyright © 2011-2022 走看看