zoukankan      html  css  js  c++  java
  • 向量相加CUDA练习

     1 #include<string.h>
     2 #include<math.h>
     3 #include<stdlib.h>
     4 #include<stdio.h>
     5 #define N 100
     6 
     7 __global__ void vecAdd(float* A,float* B,float* C){
     8     int i=threadIdx.x;
     9     if(i<N)
    10         C[i]=A[i]+B[i];
    11 }
    12 
    13 
    14 
    15 
    16 int main(int argc,int argv){
    17     size_t size=N*sizeof(float);
    18     float *h_A,*h_B,*h_C;
    19     h_A=(float*)malloc(size);
    20     h_B=(float*)malloc(size);
    21     h_C=(float*)malloc(size);
    22     float* d_A;
    23     cudaMalloc((void**)&d_A,size);
    24     float* d_B;
    25     cudaMalloc((void**)&d_B,size);
    26     float* d_C;
    27     cudaMalloc((void**)&d_C,size);
    28     srand(time(NULL));
    29     for(int i=0;i<N;i++){
    30         h_A[i]=rand()%100;
    31         h_B[i]=rand()%100;
    32     }
    33     cudaMemcpy(d_A,h_A,size,cudaMemcpyHostToDevice);
    34     cudaMemcpy(d_B,h_B,size,cudaMemcpyHostToDevice);
    35     int threadsPerBlock=256;
    36     int threadsPerGrid=(N+threadsPerBlock-1)/threadsPerBlock;
    37     vecAdd<<<threadsPerGrid,threadsPerBlock>>>(d_A,d_B,d_C);
    38     cudaMemcpy(h_C,d_C,size,cudaMemcpyDeviceToHost);
    39     for(int i=0;i<N;i++){
    40         printf("%5.0d:%.0f+%.0f=%.0f
    ",i,h_A[i],h_B[i],h_C[i]);
    41     }
    42     free(h_A);
    43     free(h_B);
    44     free(h_C);
    45 
    46     cudaFree(d_A);
    47     cudaFree(d_B);
    48     cudaFree(d_C);
    49 }
  • 相关阅读:
    absolute之后居中宽度自适应
    定位网页元素(5)
    浮动(4)
    Android的方法和属性(1)
    Activity步骤
    JSP的指令
    边框和边距(3)
    计算机快件键
    字体、文本、背景、列表样式和超链接(2)
    c/s和b/s的区别
  • 原文地址:https://www.cnblogs.com/zhangchengbing/p/5063278.html
Copyright © 2011-2022 走看看