zoukankan      html  css  js  c++  java
  • 0_Simple__template

    简单的 CUDA 应用模板,白送的 Sample。

    ▶ 源代码

    1 //template_cpu.cpp
    2 extern "C" void computeGold(float *, const unsigned int);
    3 
    4 void computeGold(float *idata, const unsigned int len)
    5 {
    6     const float f_len = static_cast<float>(len);
    7     for (unsigned int i = 0; i < len; ++i)
    8         idata[i] *= f_len;
    9 }
     1 // template.cu
     2 #include <stdio.h>
     3 #include <cuda_runtime.h>
     4 #include "device_launch_parameters.h"
     5 #include <helper_functions.h>
     6 
     7 extern "C" void computeGold(float *, const unsigned int);
     8 
     9 __global__ void testKernel(float *g_idata, float *g_odata)
    10 {
    11     extern  __shared__  float sdata[];
    12     const unsigned int tid = threadIdx.x;
    13 
    14     sdata[tid] = g_idata[tid];
    15     __syncthreads();
    16     sdata[tid] = (float)blockDim.x * sdata[tid];
    17     __syncthreads();    
    18     g_odata[tid] = sdata[tid];
    19 }
    20 
    21 int main()
    22 {
    23     printf("
    	Start.
    ");
    24 
    25     cudaSetDevice(0);
    26     StopWatchInterface *timer = 0;
    27     sdkCreateTimer(&timer);
    28     sdkStartTimer(&timer);
    29 
    30     unsigned int num_threads = 32;
    31     unsigned int mem_size = sizeof(float) * num_threads;
    32     float *h_idata, *h_odata, *d_idata, *d_odata;
    33     h_idata = (float *)malloc(mem_size);
    34     h_odata = (float *)malloc(mem_size);
    35     cudaMalloc((void **) &d_idata, mem_size);    
    36     cudaMalloc((void **)&d_odata, mem_size);
    37     for (unsigned int i = 0; i < num_threads; ++i)
    38         h_idata[i] = (float)i;
    39     cudaMemcpy(d_idata, h_idata, mem_size, cudaMemcpyHostToDevice);
    40     
    41     testKernel << < dim3(1, 1, 1), dim3(num_threads, 1, 1), mem_size >> > (d_idata, d_odata);
    42     //getLastCudaError("Kernel execution failed");// 检查内核调用的报错结果
    43     cudaMemcpy(h_odata, d_odata, sizeof(float) * num_threads, cudaMemcpyDeviceToHost);
    44     cudaDeviceSynchronize();
    45 
    46     sdkStopTimer(&timer);
    47     printf("
    	Processing time: %f ms
    ", sdkGetTimerValue(&timer));
    48     sdkDeleteTimer(&timer);
    49 
    50     computeGold(h_idata, num_threads);
    51     printf("
    	Finish, return %s.
    ", compareData(h_idata, h_odata, num_threads, 0.0f,0.0f) ? "Passed" : "Failed");
    52     
    53     free(h_idata);
    54     free(h_odata);
    55     cudaFree(d_idata);
    56     cudaFree(d_odata);
    57     getchar();
    58     return 0;
    59 }

    ▶ 输出结果:

        Start.
    
        Processing time: 101.169357 ms
    
        Finish, return Passed.

    ▶ 涨姿势:没有

  • 相关阅读:
    钱伟长的养生之道:每天步行三千步
    GBDT 深入理解
    整形数据的存储方式
    进制基础学习
    C语言运算符(注意事项)
    PHP文件锁
    gcc options选项的优化及选择
    Datenode无法启动
    如何使用WebUploader。
    thinkphp如何实现伪静态
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/8011176.html
Copyright © 2011-2022 走看看