zoukankan      html  css  js  c++  java
  • 基于共享内存的位图

     基于共享内存的位图,项目打包下载

     1 /*
     2 * Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
     3 *
     4 * NVIDIA Corporation and its licensors retain all intellectual property and
     5 * proprietary rights in and to this software and related documentation.
     6 * Any use, reproduction, disclosure, or distribution of this software
     7 * and related documentation without an express license agreement from
     8 * NVIDIA Corporation is strictly prohibited.
     9 *
    10 * Please refer to the applicable NVIDIA end user license agreement (EULA)
    11 * associated with this source code for terms and conditions that govern
    12 * your use of this NVIDIA software.
    13 *
    14 */
    15 #include <GLglut.h>
    16 #include "cuda.h"
    17 #include "cuda_runtime.h"
    18 #include "device_launch_parameters.h"
    19 #include "cuda.h"
    20 #include "../common/book.h"
    21 #include "../common/cpu_bitmap.h"
    22 
    23 
    24 #define DIM 1024
    25 #define PI 3.1415926535897932f
    26 
    27 __global__ void kernel(unsigned char *ptr) {
    28     // map from threadIdx/BlockIdx to pixel position
    29     int x = threadIdx.x + blockIdx.x * blockDim.x;
    30     int y = threadIdx.y + blockIdx.y * blockDim.y;
    31     int offset = x + y * blockDim.x * gridDim.x;
    32 
    33     __shared__ float    shared[16][16];
    34 
    35     // now calculate the value at that position
    36     const float period = 128.0f;
    37 
    38     shared[threadIdx.x][threadIdx.y] =
    39         255 * (sinf(x*2.0f*PI / period) + 1.0f) *
    40         (sinf(y*2.0f*PI / period) + 1.0f) / 4.0f;
    41 
    42     // removing this syncthreads shows graphically what happens
    43     // when it doesn't exist.  this is an example of why we need it.
    44     __syncthreads();
    45 
    46     ptr[offset * 4 + 0] = 0;
    47     ptr[offset * 4 + 1] = shared[15 - threadIdx.x][15 - threadIdx.y];
    48     ptr[offset * 4 + 2] = 0;
    49     ptr[offset * 4 + 3] = 255;
    50 }
    51 
    52 // globals needed by the update routine
    53 struct DataBlock {
    54     unsigned char   *dev_bitmap;
    55 };
    56 
    57 int main(void) {
    58     DataBlock   data;
    59     CPUBitmap bitmap(DIM, DIM, &data);
    60     unsigned char    *dev_bitmap;
    61 
    62     HANDLE_ERROR(cudaMalloc((void**)&dev_bitmap,
    63         bitmap.image_size()));
    64     data.dev_bitmap = dev_bitmap;
    65 
    66     dim3    grids(DIM / 16, DIM / 16);
    67     dim3    threads(16, 16);
    68     kernel <<<grids, threads >>>(dev_bitmap);
    69 
    70     HANDLE_ERROR(cudaMemcpy(bitmap.get_ptr(), dev_bitmap,
    71         bitmap.image_size(),
    72         cudaMemcpyDeviceToHost));
    73 
    74     HANDLE_ERROR(cudaFree(dev_bitmap));
    75 
    76     bitmap.display_and_exit();
    77 }

    kernel函数中加粗标红的 __syncthreads()在去掉和加上时的效果图是不一样的。

    取消时:

    加上时:

    这也是为什么加上同步的重要性。

    抛砖引玉

    这个

    int offset = x + y * blockDim.x * gridDim.x;

     以及

    1 ptr[offset * 4 + 0] = 0;
    2 ptr[offset * 4 + 1] = shared[15 - threadIdx.x][15 - threadIdx.y];
    3 ptr[offset * 4 + 2] = 0;
    4 ptr[offset * 4 + 3] = 255;

    如何理解?!

  • 相关阅读:
    廖雪峰的多线程 1
    保持良好的心态 戒骄戒躁
    Break camelCase
    int32 to IPv4 (int32到IPv4地址转换)
    Stop gninnipS My sdroW!
    Find The Parity Outlier 找到奇偶校验异常值
    今日新闻整理 2020-7-31
    改造rabbitmq demo 到 jpa
    Flink实战(110):FLINK-SQL应用场景(11)connector(十九)Flink 与 hive 结合使用(七) Flink Hive Connector 使用
    Hadoop基础(六十):面试题 Hadoop数据切片(二)切片机制源码
  • 原文地址:https://www.cnblogs.com/liangliangdetianxia/p/3986256.html
Copyright © 2011-2022 走看看