zoukankan      html  css  js  c++  java
  • cuda中当元素个数超过线程个数时的处理案例

    项目打包下载

    当向量元素超过线程个数时的情况

    向量元素个数为(33 * 1024)/(128 * 128)=2.x倍

     1 /*
     2 * Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
     3 *
     4 * NVIDIA Corporation and its licensors retain all intellectual property and
     5 * proprietary rights in and to this software and related documentation.
     6 * Any use, reproduction, disclosure, or distribution of this software
     7 * and related documentation without an express license agreement from
     8 * NVIDIA Corporation is strictly prohibited.
     9 *
    10 * Please refer to the applicable NVIDIA end user license agreement (EULA)
    11 * associated with this source code for terms and conditions that govern
    12 * your use of this NVIDIA software.
    13 *
    14 */
    15 
    16 
    17 #include "../common/book.h"
    18 #include "cuda.h"
    19 #include "cuda_runtime.h"
    20 #include "device_launch_parameters.h"
    21 
    22 #define N   (33 * 1024)
    23 
    24 __global__ void add(int *a, int *b, int *c) {
    25     int tid = threadIdx.x + blockIdx.x * blockDim.x;
    26     while (tid < N) {
    27         c[tid] = a[tid] + b[tid];
    28         tid += blockDim.x * gridDim.x;
    29     }
    30 }
    31 
    32 int main(void) {
    33     int *a, *b, *c;
    34     int *dev_a, *dev_b, *dev_c;
    35 
    36     // allocate the memory on the CPU
    37     a = (int*)malloc(N * sizeof(int));
    38     b = (int*)malloc(N * sizeof(int));
    39     c = (int*)malloc(N * sizeof(int));
    40 
    41     // allocate the memory on the GPU
    42     HANDLE_ERROR(cudaMalloc((void**)&dev_a, N * sizeof(int)));
    43     HANDLE_ERROR(cudaMalloc((void**)&dev_b, N * sizeof(int)));
    44     HANDLE_ERROR(cudaMalloc((void**)&dev_c, N * sizeof(int)));
    45 
    46     // fill the arrays 'a' and 'b' on the CPU
    47     for (int i = 0; i<N; i++) {
    48         a[i] = i;
    49         b[i] = 2 * i;
    50     }
    51 
    52     // copy the arrays 'a' and 'b' to the GPU
    53     HANDLE_ERROR(cudaMemcpy(dev_a, a, N * sizeof(int),
    54         cudaMemcpyHostToDevice));
    55     HANDLE_ERROR(cudaMemcpy(dev_b, b, N * sizeof(int),
    56         cudaMemcpyHostToDevice));
    57 
    58     /*
    59     当向量元素超过线程个数时的情况
    60     向量元素个数为(33 * 1024)/(128 * 128)=2.x倍
    61     */
    62     add << <128, 128 >> >(dev_a, dev_b, dev_c);
    63 
    64     // copy the array 'c' back from the GPU to the CPU
    65     HANDLE_ERROR(cudaMemcpy(c, dev_c, N * sizeof(int),
    66         cudaMemcpyDeviceToHost));
    67 
    68     // verify that the GPU did the work we requested
    69     bool success = true;
    70     for (int i = 0; i<N; i++) {
    71         if ((a[i] + b[i]) != c[i]) {
    72             printf("Error:  %d + %d != %d
    ", a[i], b[i], c[i]);
    73             success = false;
    74         }
    75     }
    76     if (success)    printf("We did it!
    ");
    77 
    78     // free the memory we allocated on the GPU
    79     HANDLE_ERROR(cudaFree(dev_a));
    80     HANDLE_ERROR(cudaFree(dev_b));
    81     HANDLE_ERROR(cudaFree(dev_c));
    82 
    83     // free the memory we allocated on the CPU
    84     free(a);
    85     free(b);
    86     free(c);
    87 
    88     return 0;
    89 }

     

  • 相关阅读:
    JS的IE和FF兼容性问题汇总
    解决flash挡住层的问题
    javascript 代码优化工具 UglifyJS
    理解面向对象
    js中的等号与非等号
    js 的数据类型转换
    js优化 ----js的有序加载
    各浏览器对页面外部资源加载的策略
    js 执行效率
    脚本的加载,解析,与执行
  • 原文地址:https://www.cnblogs.com/liangliangdetianxia/p/3985040.html
Copyright © 2011-2022 走看看