zoukankan html css js c++ java

cuda基础－－流

１：运行时ＡＰＩ实现

　　事件管理可以用于测量程序运行时间，或者管理ＣＰＵ和ＧＰＵ同时进行

 1 //创建事件
 2 cudaEvent_t start,stop;
 3 cudaEventCreate(&start);
 4 cudaEventCreate(&stop);
 5 
 6 cudaEventRecord(start,0);
 7 //do somthing
 8 cudaEventRecord(stop,0);
 9 cudaEventSynchronize(stop);
10 float elapsedTime;
11 cudaEventElapsedTime(&elapsedTime,start,stop);
12 
13 cudaEventDestroy(start);
14 cudaEventDestroy(stop);

2：驱动ＡＰＩ实现

 1 CUevent start,stop;
 2 cuEventCreate(&start);
 3 cuEventCreate(&stop);
 4 
 5 cuEventRecord(start,0);
 6 //do somthing
 7 cuEventRecord(stop,0);
 8 cuEventSynchronize(stop);
 9 float elapsedTime;
10 cuEventElapsedTime(&elapsedTime,start,stop);
11 
12 cuEventDestroy(start);
13 cuEventDestroy(stop);

３：ｓｉｍｐｌｅＳｔｒｅａｍ实例

  1 #include <stdio.h>
  2 #include <cutil_inline.h>
  3 
  4 __global__ void 
  5 init_array(int *g_data,int* factor,int num_iterations)
  6 {
  7 int idx = blockIdx.x*blockDim.x+threadIdx.x;
  8 for(int i = 0;i < n;i++)
  9 g_data[idx] += *factor;
 10 }
 11 
 12 int correct_data(int *a,const int n,const int c)
 13 {
 14 for(int i = 0;i < n;i++)
 15 {
 16 if(a[i] != c)
 17 {
 18 printf("%d:%d %d
",i,a[i],c);
 19 return 0;
 20 }
 21 return 1;
 22 }
 23 }
 24 
 25 
 26 int main(int arg,char* argv[])
 27 {
 28 int CUDA_device = 0;
 29 in nstream = 4;
 30 int nreps = 10;//整体循环次数
 31 int n = 16*1024*1024;//数组元素个数
 32 int nbytes = n*sizeof(int);
 33 dim3 threads,blocks;
 34 float elapsed_time,time_memcpy,time_kernel;
 35 
 36 int niterations;//ｋｅｒｎｅｌ内部循环次数
 37 if(argc > 1)
 38 {
 39 CUDA_Device = atoi(argv[1]);
 40 }
 41 
 42 //查询设备计算能力
 43 int num_devices = 0;
 44 cudaGetDeviceCount(&num_devices);
 45 if(0 == num_devices)
 46 {
 47 printf("no device
");
 48 return 1;
 49 }
 50 if(CUDA_devices >= num_device)
 51 {
 52  printf("CUDA_device between 0 and %d
", num_devices-1);
 53 return 1;  
 54 }
 55 
 56 cudaSetDevice(CUDA_device);
 57 cudaDeviceProp device_properties;
 58 cudaDeviceProperties(&device_properties,CUDA_device);
 59 printf("running on:%s
",device_properties.name);
 60 
 61 //内存分配
 62 int c = 5;
 63 int *a = 0;
 64 cuMallocHost((void**)&a,nbytes);
 65 
 66 //显存分配
 67 int* d_a = 0,*d_c = 0;
 68 cudaMalloc((void**)&d_a,nbytes);
 69 cudaMalloc((void**)&d_c,sizeof(int));
 70 cudaMemcpy(d_c,&c,sizeof(int),cudaMemcpyHostToDevice);
 71 
 72 //流的创建和初始化
 73 cudaStream_t* streams = (cudaStream_t*)malloc(nstreams*sizeof(cudaStream_t));
 74 for(int i = 0;i  nstreams;i++)
 75 {
 76 cudaStreamCreate(&streams[i]);
 77 }
 78 
 79 //事件的创建
 80 cudaEvent_t  start_event,stop_event;
 81 cudaEventCreate(&start_event);
 82 cudaEventCreate(&stop_event);
 83 
 84 //内存拷贝计时
 85 cudaEventRecord(start_event,0);//stream0中计时，确保所有之前的ｃｕｄａ调用均已完成
 86 cudaMemcpyAsync(d_a,a,nbytes,cudaMemcpyHostToDevice,streama[0]);
 87 cudaEventRecord(stop_event,0);
 88 cudaEventElapsedTimer(&time_memcpy,start_event,stop_event);
 89 
 90 //ｋｅｒｎｅｌ计时，使用流
 91 threads = dim3(512,1);
 92 blocks = dim3(n/threads.x,1);
 93 cudaEventRecord(start_event,0);
 94 init_array<<<blocks,threads,0,streams[0]>>>(d_a,d_c,niterations);
 95 cudaEventRecord(stop_event,0);
 96 cudaEventSynchronize(stop_event);
 97 cudaElapsedTime(&time_kernel,start_event,stop_event);
 98 
 99 //ｋｅｒｎｅｌ计时，不使用流
100 threads = dim3(512,1);
101 blocks = dim3(n/threads.x,1);
102 cudaEventRecord(start_event,0);
103 for(int i = 0;i < nreps;i++)
104 {
105 init_array<<<blocks,threads>>>(d_a,d_c,niterations);
106 cudaMemcpy(a,d_a,nbytes,cudaMemcpyDeviceToHost);
107 }
108 cudaEventRecord(stop_event,0);
109 cudaEventSynchronize(stop_event);
110 cudaElapsedTime(&elapsed_time,start_event,stop_event);
111 
112 //核对结果
113 if(correct_data(a,n,c*nreps*niterations))
114 printf("passed
");
115 
116 //释放资源
117 for(int i = 0;i < nstreams;i++)
118 cudaStreamDestroy(streams[i]);
119 cudaEventDestroy(start_event);
120 cudaEventDestroy(stop_event);
121 cudaFreeHost(a);
122 cudaFree(d_a);
123 cudaFree(d_c);
124 
125 cudaThreadExit();
126 return 0;

查看全文

相关阅读:
server.c:5170:31: error: ‘struct redisServer'
SpringBoot配置文件笔记：yaml语法，yaml如何写、注入配置文件类(2种方式根据业务场景选择)、多环境切换、配置文件加载优先级、指定位置加载配置文件
 微服务（Microservices）—— Martin Flower
利用原生子窗体解决悬浮窗口播放的问题及踩坑记录
 uniapp中nvue页面如何使用iconfont字体图标
 项目经验踩坑记录：跨平台业务影响时注意要考虑多个平台
 SQL基础知识笔记：概述(层状/网状/关系模型)、数据类型、操作数据库能力(DDL/DML/DQL)、关系模型(主键、联合主键、外键、外键约束-性能影响、一对一、一对多、多对多、索引、索引效率)、实用SQL语句、事务(四个特性、四种隔离级别)
Java的Maven基础知识笔记：Maven是什么、maven目录、pom.xml唯一ID、maven解决依赖管理、maven中央仓库与镜像、构建流程(声明周期、阶段、目标)、使用插件、模块管理、mvnw指定版本、如何发布自己的开源库
 Java里的IO基础知识笔记：IO流、字节流/字符流、File对象读取、输入流/输出流(使用过后及时关闭、缓冲区)、Filter模式、ZIP操作、读取classpath资源的意义、序列化/反序列化、Reader/Writer、使用Files工具类及其局限性
 推荐了解一个用于JavaScript的快速SQL数据库

原文地址：https://www.cnblogs.com/pengtangtang/p/13083913.html