数据传输测试,先从主机传输到设备,再在设备内传输,再从设备传输到主机。
H-->D
D-->D
D-->H
1 // moveArrays.cu 2 // 3 // demonstrates CUDA interface to data allocation on device (GPU) 4 // and data movement between host (CPU) and device. 5 6 7 #include <stdio.h> 8 #include <assert.h> 9 #include <cuda.h> 10 #include "cuda_runtime.h" 11 #include "device_launch_parameters.h" 12 int main(void) 13 { 14 float *a_h, *b_h; //指向主机的指针 15 float *a_d, *b_d; //指向设备的指针 16 17 int N = 14; 18 int i; 19 //在主机端申请内存 20 a_h = (float *)malloc(sizeof(float)*N); 21 b_h = (float *)malloc(sizeof(float)*N); 22 //在设备端申请存储 23 cudaMalloc((void **)&a_d, sizeof(float)*N); 24 cudaMalloc((void **)&b_d, sizeof(float)*N); 25 //主机端数据初始化 26 for (i = 0; i < N; i++) { 27 a_h[i] = 10.f + i; 28 b_h[i] = 0.f; 29 } 30 //将数据从主机端传送到设备端:a_h-->a_d 31 cudaMemcpy(a_d, a_h, sizeof(float)*N, cudaMemcpyHostToDevice); 32 33 //在设备内传输数据 34 cudaMemcpy(b_d, a_d, sizeof(float)*N, cudaMemcpyDeviceToDevice); 35 36 //将数据从设备端传输到主机:b_d-->b_h 37 cudaMemcpy(b_h, b_d, sizeof(float)*N, cudaMemcpyDeviceToHost); 38 39 //核对结果 40 for (i = 0; i < N; i++) 41 assert(a_h[i] == b_h[i]); 42 43 //释放主机端存储 44 free(a_h); 45 free(b_h); 46 //释放设备端存储 47 cudaFree(a_d); 48 cudaFree(b_d); 49 }
测试环境:
Win7+VS2013+CUDA6.5