zoukankan      html  css  js  c++  java
  • CUDA实例练习(三):线程块索引

    题目:在长方形布局的方式中,每个线程块的X轴方向上开启了32个线程,Y轴方向上开启了4个线程。在线程网格上,X轴方向上有1个线程块,Y轴方向有4个线程块。计算在X轴方向和Y轴方向上的线程块索引与线程索引等一些信息。

      1 #include "cuda_runtime.h"
      2 #include "device_launch_parameters.h"
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 __global__ void what_is_my_id_2d_A(
      6     unsigned int * const block_x,
      7     unsigned int * const block_y,
      8     unsigned int * const thread,
      9     unsigned int * const calc_thread,
     10     unsigned int * const x_thread,
     11     unsigned int * const y_thread,
     12     unsigned int * const grid_dimx,
     13     unsigned int * const block_dimx,
     14     unsigned int * const grid_dimy,
     15     unsigned int * const block_dimy)
     16 {
     17     const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
     18     const unsigned int idy = blockIdx.y * blockDim.y + threadIdx.y;
     19     const unsigned int thread_idx = (gridDim.x * blockDim.x) * idy + idx;
     20     block_x[thread_idx] = blockIdx.x;
     21     block_y[thread_idx] = blockIdx.y;
     22     thread[thread_idx] = threadIdx.x;
     23     calc_thread[thread_idx] = thread_idx;
     24     x_thread[thread_idx] = idx;
     25     y_thread[thread_idx] = idy;
     26     grid_dimx[thread_idx] = gridDim.x;
     27     block_dimx[thread_idx] = blockDim.x;
     28     grid_dimy[thread_idx] = gridDim.y;
     29     block_dimy[thread_idx] = blockDim.y;
     30 }
     31 
     32 #define ARRAY_SIZE_X 32
     33 #define ARRAY_SIZE_Y 16
     34 #define ARRAY_SIZE_IN_BYTES ((ARRAY_SIZE_X) * (ARRAY_SIZE_Y) * sizeof(unsigned int))
     35 
     36 unsigned int cpu_block_x[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     37 unsigned int cpu_block_y[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     38 unsigned int cpu_thread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     39 unsigned int cpu_warp[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     40 unsigned int cpu_calc_thread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     41 unsigned int cpu_xthread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     42 unsigned int cpu_ythread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     43 unsigned int cpu_grid_dimx[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     44 unsigned int cpu_block_dimx[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     45 unsigned int cpu_grid_dimy[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     46 unsigned int cpu_block_dimy[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     47 
     48 int main(void){
     49     const dim3 threads_rect(32, 4);
     50     const dim3 blocks_rect(1, 4);
     51 
     52     const dim3 threads_square(16, 8);
     53     const dim3 blocks_square(2, 2);
     54 
     55     char ch;
     56 
     57     unsigned int * gpu_block_x;
     58     unsigned int * gpu_block_y;
     59     unsigned int * gpu_thread;
     60     unsigned int * gpu_warp;
     61     unsigned int * gpu_calc_thread;
     62     unsigned int * gpu_xthread;
     63     unsigned int * gpu_ythread;
     64     unsigned int * gpu_grid_dimx;
     65     unsigned int * gpu_block_dimx;
     66     unsigned int * gpu_grid_dimy;
     67     unsigned int * gpu_block_dimy;
     68 
     69     cudaMalloc((void **)&gpu_block_x, ARRAY_SIZE_IN_BYTES);
     70     cudaMalloc((void **)&gpu_block_y, ARRAY_SIZE_IN_BYTES);
     71     cudaMalloc((void **)&gpu_thread, ARRAY_SIZE_IN_BYTES);
     72     cudaMalloc((void **)&gpu_calc_thread, ARRAY_SIZE_IN_BYTES);
     73     cudaMalloc((void **)&gpu_xthread, ARRAY_SIZE_IN_BYTES);
     74     cudaMalloc((void **)&gpu_ythread, ARRAY_SIZE_IN_BYTES);
     75     cudaMalloc((void **)&gpu_grid_dimx, ARRAY_SIZE_IN_BYTES);
     76     cudaMalloc((void **)&gpu_block_dimx, ARRAY_SIZE_IN_BYTES);
     77     cudaMalloc((void **)&gpu_grid_dimy, ARRAY_SIZE_IN_BYTES);
     78     cudaMalloc((void **)&gpu_block_dimy, ARRAY_SIZE_IN_BYTES);
     79 
     80     for (int kernel = 0; kernel < 2; kernel++){
     81         switch (kernel)
     82         {
     83         case 0:
     84         {
     85             what_is_my_id_2d_A << <blocks_rect, threads_rect >> >(gpu_block_x, gpu_block_y,
     86                 gpu_thread, gpu_calc_thread, gpu_xthread, gpu_ythread, gpu_grid_dimx,
     87                 gpu_block_dimx, gpu_grid_dimy, gpu_block_dimy);
     88         } break;
     89 
     90         case 1:
     91         {
     92             what_is_my_id_2d_A << <blocks_square, threads_square >> >(gpu_block_x, gpu_block_y,
     93                 gpu_thread, gpu_calc_thread, gpu_xthread, gpu_ythread, gpu_grid_dimx,
     94                 gpu_block_dimx, gpu_grid_dimy, gpu_block_dimy);
     95         } break;
     96 
     97         default: exit(1); break;
     98         }
     99 
    100         cudaMemcpy(cpu_block_x, gpu_block_x, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    101         cudaMemcpy(cpu_block_y, gpu_block_y, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    102         cudaMemcpy(cpu_thread, gpu_thread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    103         cudaMemcpy(cpu_calc_thread, gpu_calc_thread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    104         cudaMemcpy(cpu_xthread, gpu_xthread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    105         cudaMemcpy(cpu_ythread, gpu_ythread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    106         cudaMemcpy(cpu_grid_dimx, gpu_grid_dimx, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    107         cudaMemcpy(cpu_block_dimx, gpu_block_dimx, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    108         cudaMemcpy(cpu_grid_dimy, gpu_grid_dimy, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    109         cudaMemcpy(cpu_block_dimy, gpu_block_dimy, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    110 
    111         printf("
    Kernel %d
    ", kernel);
    112         for (int y = 0; y < ARRAY_SIZE_Y; y++){
    113             for (int x = 0; x < ARRAY_SIZE_X; x++){
    114                 printf("CT:%2u BKX:%1u BKY:%1u TID:%2u YTID:%2u XTID:%2u GDX:%1u BDX:%1u GDY %1u BDY %1u
    ",
    115                     cpu_calc_thread[y][x], cpu_block_x[y][x], cpu_block_y[y][x], cpu_thread[y][x], cpu_ythread[y][x],
    116                     cpu_xthread[y][x], cpu_grid_dimx[y][x], cpu_block_dimx[y][x],
    117                     cpu_grid_dimy[y][x], cpu_block_dimy[y][x]);
    118                 ch = getchar();
    119             }
    120         }
    121         printf("Press any key to continue
    ");
    122         ch = getchar();
    123     }
    124 
    125     cudaFree(gpu_block_x);
    126     cudaFree(gpu_block_y);
    127     cudaFree(gpu_thread);
    128     cudaFree(gpu_calc_thread);
    129     cudaFree(gpu_xthread);
    130     cudaFree(gpu_ythread);
    131     cudaFree(gpu_grid_dimx);
    132     cudaFree(gpu_block_dimx);
    133     cudaFree(gpu_grid_dimy);
    134     cudaFree(gpu_block_dimy);
    135 }
  • 相关阅读:
    POJ 1953 World Cup Noise
    POJ 1995 Raising Modulo Numbers (快速幂取余)
    poj 1256 Anagram
    POJ 1218 THE DRUNK JAILER
    POJ 1316 Self Numbers
    POJ 1663 Number Steps
    POJ 1664 放苹果
    如何查看DIV被设置什么CSS样式
    独行DIV自适应宽度布局CSS实例与扩大应用范围
    python 从入门到精通教程一:[1]Hello,world!
  • 原文地址:https://www.cnblogs.com/zhangshuwen/p/7229844.html
Copyright © 2011-2022 走看看