zoukankan      html  css  js  c++  java
  • CUDA Thread Indexing

    1D grid of 1D blocks
    
    __device__ int getGlobalIdx_1D_1D()
    {
    return blockIdx.x *blockDim.x + threadIdx.x;
    }
    
    
    
    1D grid of 2D blocks
    
    __device__ int getGlobalIdx_1D_2D()
    {
    return blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
    }
    
    
    
    1D grid of 3D blocks
    
    __device__ int getGlobalIdx_1D_3D()
    {
    return blockIdx.x * blockDim.x * blockDim.y * blockDim.z 
    + threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x;
    }
    
    {
    return blockIdx.x * blockDim.x * blockDim.y * blockDim.z 
    + threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x;
    }
    
    
    
    2D grid of 1D blocks
    
     __device__ int getGlobalIdx_2D_1D()
    {
    int blockId   = blockIdx.y * gridDim.x + blockIdx.x; 
    int threadId = blockId * blockDim.x + threadIdx.x; 
    return threadId;
    }
    
    {
    int blockId   = blockIdx.y * gridDim.x + blockIdx.x; 
    int threadId = blockId * blockDim.x + threadIdx.x; 
    return threadId;
    }
    
    
    
    2D grid of 2D blocks  
    
    __device__ int getGlobalIdx_2D_2D()
    {
    int blockId = blockIdx.x + blockIdx.y * gridDim.x; 
    int threadId = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
    return threadId;
    }
    
    2D grid of 3D blocks
    
    __device__ int getGlobalIdx_2D_3D()
    {
    int blockId = blockIdx.x 
    + blockIdx.y * gridDim.x; 
    int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
      + (threadIdx.z * (blockDim.x * blockDim.y))
      + (threadIdx.y * blockDim.x)
      + threadIdx.x;
    return threadId;
    }
    
    
    
    3D grid of 1D blocks
    
    __device__ int getGlobalIdx_3D_1D()
    {
    int blockId = blockIdx.x 
    + blockIdx.y * gridDim.x 
    + gridDim.x * gridDim.y * blockIdx.z; 
    int threadId = blockId * blockDim.x + threadIdx.x;
    return threadId;
    }
    
    
    
    3D grid of 2D blocks
    
    __device__ int getGlobalIdx_3D_2D()
    {
    int blockId = blockIdx.x 
            + blockIdx.y * gridDim.x 
    + gridDim.x * gridDim.y * blockIdx.z; 
    int threadId = blockId * (blockDim.x * blockDim.y)
     + (threadIdx.y * blockDim.x)
     + threadIdx.x;
    return threadId;
    }
    
    
    
    3D grid of 3D blocks
    
    __device__ int getGlobalIdx_3D_3D()
    {
    int blockId = blockIdx.x 
    + blockIdx.y * gridDim.x 
    + gridDim.x * gridDim.y * blockIdx.z; 
    int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
     + (threadIdx.z * (blockDim.x * blockDim.y))
     + (threadIdx.y * blockDim.x)
     + threadIdx.x;
    return threadId;
    }
    

      

  • 相关阅读:
    vue(30)vuex使用子模块
    Python写一个简单的控制台输入输出并打包为exe文件
    使用npm安装的方式使用Arcgisapi
    跨域的理解
    vue中使provide中的数据变为响应式
    ArcObjects或者ArcEngine的C#版本在读取shp文件的时候报错:HRESULT:0x80040228
    C++ 新标准生成随机数
    Arcgisapi for js 4.x使用query/goto/PopupTemplate
    Arcgisapi for js 4.x加载Arcgis serve发布的矢量切片
    abp 指定 AppService中某个方法不是用驼峰命名(CamelCase)返回
  • 原文地址:https://www.cnblogs.com/tibetanmastiff/p/4639194.html
Copyright © 2011-2022 走看看