zoukankan      html  css  js  c++  java
  • CUDA实例练习(三):线程块索引

    题目:在长方形布局的方式中,每个线程块的X轴方向上开启了32个线程,Y轴方向上开启了4个线程。在线程网格上,X轴方向上有1个线程块,Y轴方向有4个线程块。计算在X轴方向和Y轴方向上的线程块索引与线程索引等一些信息。

      1 #include "cuda_runtime.h"
      2 #include "device_launch_parameters.h"
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 __global__ void what_is_my_id_2d_A(
      6     unsigned int * const block_x,
      7     unsigned int * const block_y,
      8     unsigned int * const thread,
      9     unsigned int * const calc_thread,
     10     unsigned int * const x_thread,
     11     unsigned int * const y_thread,
     12     unsigned int * const grid_dimx,
     13     unsigned int * const block_dimx,
     14     unsigned int * const grid_dimy,
     15     unsigned int * const block_dimy)
     16 {
     17     const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
     18     const unsigned int idy = blockIdx.y * blockDim.y + threadIdx.y;
     19     const unsigned int thread_idx = (gridDim.x * blockDim.x) * idy + idx;
     20     block_x[thread_idx] = blockIdx.x;
     21     block_y[thread_idx] = blockIdx.y;
     22     thread[thread_idx] = threadIdx.x;
     23     calc_thread[thread_idx] = thread_idx;
     24     x_thread[thread_idx] = idx;
     25     y_thread[thread_idx] = idy;
     26     grid_dimx[thread_idx] = gridDim.x;
     27     block_dimx[thread_idx] = blockDim.x;
     28     grid_dimy[thread_idx] = gridDim.y;
     29     block_dimy[thread_idx] = blockDim.y;
     30 }
     31 
     32 #define ARRAY_SIZE_X 32
     33 #define ARRAY_SIZE_Y 16
     34 #define ARRAY_SIZE_IN_BYTES ((ARRAY_SIZE_X) * (ARRAY_SIZE_Y) * sizeof(unsigned int))
     35 
     36 unsigned int cpu_block_x[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     37 unsigned int cpu_block_y[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     38 unsigned int cpu_thread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     39 unsigned int cpu_warp[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     40 unsigned int cpu_calc_thread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     41 unsigned int cpu_xthread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     42 unsigned int cpu_ythread[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     43 unsigned int cpu_grid_dimx[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     44 unsigned int cpu_block_dimx[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     45 unsigned int cpu_grid_dimy[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     46 unsigned int cpu_block_dimy[ARRAY_SIZE_Y][ARRAY_SIZE_X];
     47 
     48 int main(void){
     49     const dim3 threads_rect(32, 4);
     50     const dim3 blocks_rect(1, 4);
     51 
     52     const dim3 threads_square(16, 8);
     53     const dim3 blocks_square(2, 2);
     54 
     55     char ch;
     56 
     57     unsigned int * gpu_block_x;
     58     unsigned int * gpu_block_y;
     59     unsigned int * gpu_thread;
     60     unsigned int * gpu_warp;
     61     unsigned int * gpu_calc_thread;
     62     unsigned int * gpu_xthread;
     63     unsigned int * gpu_ythread;
     64     unsigned int * gpu_grid_dimx;
     65     unsigned int * gpu_block_dimx;
     66     unsigned int * gpu_grid_dimy;
     67     unsigned int * gpu_block_dimy;
     68 
     69     cudaMalloc((void **)&gpu_block_x, ARRAY_SIZE_IN_BYTES);
     70     cudaMalloc((void **)&gpu_block_y, ARRAY_SIZE_IN_BYTES);
     71     cudaMalloc((void **)&gpu_thread, ARRAY_SIZE_IN_BYTES);
     72     cudaMalloc((void **)&gpu_calc_thread, ARRAY_SIZE_IN_BYTES);
     73     cudaMalloc((void **)&gpu_xthread, ARRAY_SIZE_IN_BYTES);
     74     cudaMalloc((void **)&gpu_ythread, ARRAY_SIZE_IN_BYTES);
     75     cudaMalloc((void **)&gpu_grid_dimx, ARRAY_SIZE_IN_BYTES);
     76     cudaMalloc((void **)&gpu_block_dimx, ARRAY_SIZE_IN_BYTES);
     77     cudaMalloc((void **)&gpu_grid_dimy, ARRAY_SIZE_IN_BYTES);
     78     cudaMalloc((void **)&gpu_block_dimy, ARRAY_SIZE_IN_BYTES);
     79 
     80     for (int kernel = 0; kernel < 2; kernel++){
     81         switch (kernel)
     82         {
     83         case 0:
     84         {
     85             what_is_my_id_2d_A << <blocks_rect, threads_rect >> >(gpu_block_x, gpu_block_y,
     86                 gpu_thread, gpu_calc_thread, gpu_xthread, gpu_ythread, gpu_grid_dimx,
     87                 gpu_block_dimx, gpu_grid_dimy, gpu_block_dimy);
     88         } break;
     89 
     90         case 1:
     91         {
     92             what_is_my_id_2d_A << <blocks_square, threads_square >> >(gpu_block_x, gpu_block_y,
     93                 gpu_thread, gpu_calc_thread, gpu_xthread, gpu_ythread, gpu_grid_dimx,
     94                 gpu_block_dimx, gpu_grid_dimy, gpu_block_dimy);
     95         } break;
     96 
     97         default: exit(1); break;
     98         }
     99 
    100         cudaMemcpy(cpu_block_x, gpu_block_x, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    101         cudaMemcpy(cpu_block_y, gpu_block_y, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    102         cudaMemcpy(cpu_thread, gpu_thread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    103         cudaMemcpy(cpu_calc_thread, gpu_calc_thread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    104         cudaMemcpy(cpu_xthread, gpu_xthread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    105         cudaMemcpy(cpu_ythread, gpu_ythread, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    106         cudaMemcpy(cpu_grid_dimx, gpu_grid_dimx, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    107         cudaMemcpy(cpu_block_dimx, gpu_block_dimx, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    108         cudaMemcpy(cpu_grid_dimy, gpu_grid_dimy, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    109         cudaMemcpy(cpu_block_dimy, gpu_block_dimy, ARRAY_SIZE_IN_BYTES, cudaMemcpyDeviceToHost);
    110 
    111         printf("
    Kernel %d
    ", kernel);
    112         for (int y = 0; y < ARRAY_SIZE_Y; y++){
    113             for (int x = 0; x < ARRAY_SIZE_X; x++){
    114                 printf("CT:%2u BKX:%1u BKY:%1u TID:%2u YTID:%2u XTID:%2u GDX:%1u BDX:%1u GDY %1u BDY %1u
    ",
    115                     cpu_calc_thread[y][x], cpu_block_x[y][x], cpu_block_y[y][x], cpu_thread[y][x], cpu_ythread[y][x],
    116                     cpu_xthread[y][x], cpu_grid_dimx[y][x], cpu_block_dimx[y][x],
    117                     cpu_grid_dimy[y][x], cpu_block_dimy[y][x]);
    118                 ch = getchar();
    119             }
    120         }
    121         printf("Press any key to continue
    ");
    122         ch = getchar();
    123     }
    124 
    125     cudaFree(gpu_block_x);
    126     cudaFree(gpu_block_y);
    127     cudaFree(gpu_thread);
    128     cudaFree(gpu_calc_thread);
    129     cudaFree(gpu_xthread);
    130     cudaFree(gpu_ythread);
    131     cudaFree(gpu_grid_dimx);
    132     cudaFree(gpu_block_dimx);
    133     cudaFree(gpu_grid_dimy);
    134     cudaFree(gpu_block_dimy);
    135 }
  • 相关阅读:
    【结对开发】电梯调度 一(从电梯布局分配考虑)需求分析及设计思路。
    【结对开发】求一个整数数组的所有子数组中和的最大值。
    【测试用例选取及异常处理】 之 求一个数组中的最大整数。
    互测测评报告
    绩效考核
    冲刺5
    写个烂android一天到晚活累死,
    冲刺3
    冲刺2
    冲刺1
  • 原文地址:https://www.cnblogs.com/zhangshuwen/p/7229844.html
Copyright © 2011-2022 走看看