zoukankan      html  css  js  c++  java
  • CUDA实战

    1.第一个程序,输出hello world,1个Block块中含有5个线程

     1 #include <stdio.h>
     2 #include "cuda_runtime.h"
     3 
     4 __global__ void hello(void)
     5 {
     6   printf("hello world from GPU!
    ");
     7 }
     8 int main()
     9 {
    10     printf("hello world from CPU!
    ");
    11     hello<<<1,5>>>();
    12     //重置CUDA设置释放程序占用的资源
    13     cudaDeviceReset();
    14     return 0;
    15 }
    View Code

    2.参数的传入,

     1 #include <stdio.h>
     2 #include "cuda_runtime.h"
     3 #include "device_launch_parameters.h"
     4 __global__ void add(int i,int j)
     5 {
     6    int count;
     7    count = i + j;
     8    printf("
     Sum is %d
    ",count);
     9 }
    10 
    11 int main()
    12 {
    13     add<<<1,1>>>(10,20);
    14     cudaDeviceReset();
    15     return 0;
    16 }
    View Code

     3.数据的传入与传出,我们的数据要从内存copy到显存上面,然后现在又要从显存上面copy回来

     1 #include <stdio.h>
     2 #include "cuda_runtime.h"
     3 #include "device_launch_parameters.h"
     4 
     5 __global__ void decrease(int a, int b, int *c)
     6 {
     7     *c = a + b;
     8 }
     9 int main()
    10 {
    11     int *c=0;
    12     int *dev_c=0;
    13     //初始化CPU上的内存空间
    14     c = (int*)malloc(sizeof(int));
    15     //初始化GPU上的内存空间
    16     cudaMalloc((void**)&dev_c,sizeof(int));
    17     //调用内核函数
    18     decrease <<<1,1>>>(15,20,dev_c);
    19     //等待设备所有线程任务执行完毕
    20     cudaDeviceSynchronize();
    21     //将数据从device中复制到hist中
    22     cudaMemcpy(c,dev_c,sizeof(int),cudaMemcpyDeviceToHost);
    23     //输出
    24     printf(" c = %d
    ",*c);
    25     //释放内存
    26     cudaFree(dev_c);
    27     free(c);
    28     return 0;
    29 }
    View Code

     4.传入的值全改为指针类型

     1 #include <stdio.h>
     2 #include "cuda_runtime.h"
     3 #include "device_launch_parameters.h"
     4 
     5 __global__ void addCuda(int* a, int* b, int* c)
     6 {
     7     *c = *a - *b;
     8 }
     9 
    10 void addWithCuda(int *c,int *a,int *b)
    11 {
    12     int *dev_c = 0;
    13     int *dev_a = 0;
    14     int *dev_b = 0;
    15     
    16     //初始化CUDA内存
    17     cudaMalloc((void**)&dev_c,sizeof(int));
    18     cudaMalloc((void**)&dev_a,sizeof(int));
    19     cudaMalloc((void**)&dev_b,sizeof(int));
    20     
    21     //从主机复制数据复制到device上
    22     cudaMemcpy(dev_a,a,sizeof(int),cudaMemcpyHostToDevice);
    23     cudaMemcpy(dev_b,b,sizeof(int),cudaMemcpyHostToDevice);
    24     
    25     //调用内核函数
    26     addCuda<<<1,1>>>(dev_a,dev_b,dev_c);
    27     cudaDeviceSynchronize();
    28     
    29     //数据复制到host
    30     cudaMemcpy(c,dev_c,sizeof(int),cudaMemcpyDeviceToHost);
    31     
    32     cudaFree(dev_c);
    33     cudaFree(dev_a);
    34     cudaFree(dev_b);
    35 
    36 }
    37 
    38 int main()
    39 {
    40    int a, b, c;
    41    a = 30;
    42    b = 15;
    43    c = 10;
    44    //传入参数变量(地址)
    45    addWithCuda(&c,&a,&b);
    46    //重置CUDA设备释放程序占用的程序
    47    cudaDeviceReset();
    48    printf("Value is %d
    ", c);
    49 
    50     return 0;
    51 }
    View Code

    5.传入的值全改为指针类型

     1 #include <stdio.h>
     2 #include "cuda_runtime.h"
     3 #include "device_launch_parameters.h"
     4 
     5 __global__ void deCuda(int* a, int* b, int* c)
     6 {
     7     *c = *a - *b;
     8 }
     9 int main()
    10 {
    11    int *a, *b, *c;
    12    a = (int*)malloc(sizeof(int));
    13    b = (int*)malloc(sizeof(int));
    14    c = (int*)malloc(sizeof(int));
    15    *a=10;
    16    *b=5;
    17    *c=0;
    18    int *dev_c = 0;
    19    int *dev_a = 0;
    20    int *dev_b = 0;
    21     //3.请求CUDA设备的内存(显存),执行CUDA函数
    22     cudaMalloc((void**)&dev_c, sizeof(int));
    23     cudaMalloc((void**)&dev_a, sizeof(int));
    24     cudaMalloc((void**)&dev_b, sizeof(int));
    25     
    26     cudaMemcpy(dev_a, a, sizeof(int), cudaMemcpyHostToDevice);
    27     cudaMemcpy(dev_b, b, sizeof(int), cudaMemcpyHostToDevice);
    28     
    29     deCuda<<<1,1>>>(dev_a,dev_b,dev_c);
    30     cudaMemcpy(c, dev_c, sizeof(int), cudaMemcpyDeviceToHost);
    31     printf("Value is %d
    ", *c);
    32    
    33     cudaFree(dev_c);
    34     cudaFree(dev_a);
    35     cudaFree(dev_b);
    36     free(a);
    37     free(b);
    38     free(c);
    39    //重置CUDA设备释放程序占用的程序
    40    cudaDeviceReset();
    41    return 0;
    42 }
    View Code

     6. 程序实现向量的加法操作,一个block中含有512个线程

     1 #include <stdio.h>
     2 #include <cuda_runtime.h>
     3 __global__ void add( int *dev_a, int *dev_b, int *dev_c)
     4 {
     5     int i=threadIdx.x;
     6     dev_c[i] = dev_a[i] + dev_b[i];
     7 }
     8 int main()
     9 {
    10     int host_a[512], host_b[512], host_c[512];
    11     for(int i = 0; i < 512; i++)
    12     {
    13         host_a[i] = i;
    14         host_b[i] = i<<1;
    15     }
    16     //定义cudaError,默认为cudaSuccess
    17     cudaError_t err = cudaSuccess;
    18     int *dev_a, *dev_b, *dev_c;
    19     err = cudaMalloc((void**)&dev_a,sizeof(int)*512);
    20     err = cudaMalloc((void**)&dev_b,sizeof(int)*512);
    21     err = cudaMalloc((void**)&dev_c,sizeof(int)*512);
    22     
    23     if(err!=cudaSuccess)
    24     {
    25         printf("the cuadaMalloc on GPU is failed");
    26         return 1;
    27     }
    28     
    29     printf("SUCCESS
    ");
    30     //从host到device
    31     cudaMemcpy(dev_a,host_a,sizeof(host_a),cudaMemcpyHostToDevice);
    32     cudaMemcpy(dev_b,host_b,sizeof(host_b),cudaMemcpyHostToDevice);
    33     
    34     //调用核函数
    35     add<<<1,512>>>(dev_a,dev_b,dev_c);
    36     cudaMemcpy(&host_c,dev_c,sizeof(host_c),cudaMemcpyDeviceToHost);
    37     for(int i=0; i<512; i++)
    38     {
    39         printf("host_a[%d] + host_b[%d] = %d + %d = %d
    ",i,i,host_a[i],host_b[i],host_c[i]);
    40     }
    41     
    42     //释放内存
    43     cudaFree(dev_c);
    44     cudaFree(dev_b);
    45     cudaFree(dev_a);
    46     
    47     return 0;
    48     
    49 }
    View Code

           

     

  • 相关阅读:
    堆栈学习
    需要阅读的书籍
    Rust Book Lang Ch.19 Fully Qualified Syntax, Supertraits, Newtype Pattern, type aliases, never type, dynamic sized type
    Rust Lang Book Ch.19 Placeholder type, Default generic type parameter, operator overloading
    Rust Lang Book Ch.19 Unsafe
    Rust Lang Book Ch.18 Patterns and Matching
    Rust Lang Book Ch.17 OOP
    Rust Lang Book Ch.16 Concurrency
    Rust Lang Book Ch.15 Smart Pointers
    HDU3966-Aragorn's Story-树链剖分-点权
  • 原文地址:https://www.cnblogs.com/lin1216/p/12672994.html
Copyright © 2011-2022 走看看