zoukankan      html  css  js  c++  java
  • 光线跟踪=全局内存+常量内存

    光线跟踪通过全局内存和常量内存实现,项目打包下载

    全局内存

      1 /*
      2 * Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
      3 *
      4 * NVIDIA Corporation and its licensors retain all intellectual property and
      5 * proprietary rights in and to this software and related documentation.
      6 * Any use, reproduction, disclosure, or distribution of this software
      7 * and related documentation without an express license agreement from
      8 * NVIDIA Corporation is strictly prohibited.
      9 *
     10 * Please refer to the applicable NVIDIA end user license agreement (EULA)
     11 * associated with this source code for terms and conditions that govern
     12 * your use of this NVIDIA software.
     13 *
     14 */
     15 
     16 #include <GLglut.h>
     17 #include "cuda.h"
     18 #include "../common/book.h"
     19 #include "../common/cpu_bitmap.h"
     20 #include "cuda_runtime.h"
     21 #include "device_launch_parameters.h"
     22 #include <math.h>
     23 #define DIM 512
     24 
     25 #define rnd( x ) (x * rand() / RAND_MAX)
     26 #define INF     2e10f
     27 
     28 struct Sphere {
     29     float   r, b, g;
     30     float   radius;
     31     float   x, y, z;
     32     __device__ float hit(float ox, float oy, float *n) {
     33         float dx = ox - x;
     34         float dy = oy - y;
     35         if (dx*dx + dy*dy < radius*radius) {
     36             float dz = sqrtf(radius*radius - dx*dx - dy*dy);
     37             *n = dz / sqrtf(radius * radius);
     38             return dz + z;
     39         }
     40         return -INF;
     41     }
     42 };
     43 #define SPHERES 100
     44 
     45 __constant__ Sphere s[SPHERES];
     46 
     47 __global__ void kernel(unsigned char *ptr) {
     48     // map from threadIdx/BlockIdx to pixel position
     49     int x = threadIdx.x + blockIdx.x * blockDim.x;
     50     int y = threadIdx.y + blockIdx.y * blockDim.y;
     51     int offset = x + y * blockDim.x * gridDim.x;
     52     float   ox = (x - DIM / 2);
     53     float   oy = (y - DIM / 2);
     54 
     55     float   r = 0, g = 0, b = 0;
     56     float   maxz = -INF;
     57     for (int i = 0; i<SPHERES; i++) {
     58         float   n;
     59         float   t = s[i].hit(ox, oy, &n);
     60         if (t > maxz) {
     61             float fscale = n;
     62             r = s[i].r * fscale;
     63             g = s[i].g * fscale;
     64             b = s[i].b * fscale;
     65             maxz = t;
     66         }
     67     }
     68 
     69     ptr[offset * 4 + 0] = (int)(r * 255);
     70     ptr[offset * 4 + 1] = (int)(g * 255);
     71     ptr[offset * 4 + 2] = (int)(b * 255);
     72     ptr[offset * 4 + 3] = 255;
     73 }
     74 
     75 // globals needed by the update routine
     76 struct DataBlock {
     77     unsigned char   *dev_bitmap;
     78 };
     79 
     80 int main(void) {
     81     DataBlock   data;
     82     // capture the start time
     83     cudaEvent_t     start, stop;
     84     HANDLE_ERROR(cudaEventCreate(&start));
     85     HANDLE_ERROR(cudaEventCreate(&stop));
     86     HANDLE_ERROR(cudaEventRecord(start, 0));
     87 
     88     CPUBitmap bitmap(DIM, DIM, &data);
     89     unsigned char   *dev_bitmap;
     90 
     91     // allocate memory on the GPU for the output bitmap
     92     HANDLE_ERROR(cudaMalloc((void**)&dev_bitmap, bitmap.image_size()));
     93 
     94     // allocate temp memory, initialize it, copy to constant
     95     // memory on the GPU, then free our temp memory
     96     Sphere *temp_s = (Sphere*)malloc(sizeof(Sphere)* SPHERES);
     97     for (int i = 0; i<SPHERES; i++) {
     98         temp_s[i].r = rnd(1.0f);
     99         temp_s[i].g = rnd(1.0f);
    100         temp_s[i].b = rnd(1.0f);
    101         temp_s[i].x = rnd(1000.0f) - 500;
    102         temp_s[i].y = rnd(1000.0f) - 500;
    103         temp_s[i].z = rnd(1000.0f) - 500;
    104         temp_s[i].radius = rnd(100.0f) + 20;
    105     }
    106     /*
    107     将SPHERES个球面对象存放在常量内存中
    108     通过cudaMemcpyToSymbol来操作
    109     */
    110     HANDLE_ERROR(cudaMemcpyToSymbol(s, temp_s, sizeof(Sphere)* SPHERES));
    111     free(temp_s);
    112 
    113     // generate a bitmap from our sphere data
    114     dim3    grids(DIM / 16, DIM / 16);
    115     dim3    threads(16, 16);
    116     kernel <<<grids, threads >>>(dev_bitmap);
    117 
    118     // copy our bitmap back from the GPU for display
    119     HANDLE_ERROR(cudaMemcpy(bitmap.get_ptr(), dev_bitmap, bitmap.image_size(), cudaMemcpyDeviceToHost));
    120 
    121     // get stop time, and display the timing results
    122     HANDLE_ERROR(cudaEventRecord(stop, 0));
    123     HANDLE_ERROR(cudaEventSynchronize(stop));
    124     float   elapsedTime;
    125     HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime, start, stop));
    126     printf("Time to generate:  %3.1f ms
    ", elapsedTime);
    127 
    128     HANDLE_ERROR(cudaEventDestroy(start));
    129     HANDLE_ERROR(cudaEventDestroy(stop));
    130 
    131     HANDLE_ERROR(cudaFree(dev_bitmap));
    132 
    133     // display
    134     bitmap.display_and_exit();
    135 }

    常量内存:

      1 /*
      2 * Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
      3 *
      4 * NVIDIA Corporation and its licensors retain all intellectual property and
      5 * proprietary rights in and to this software and related documentation.
      6 * Any use, reproduction, disclosure, or distribution of this software
      7 * and related documentation without an express license agreement from
      8 * NVIDIA Corporation is strictly prohibited.
      9 *
     10 * Please refer to the applicable NVIDIA end user license agreement (EULA)
     11 * associated with this source code for terms and conditions that govern
     12 * your use of this NVIDIA software.
     13 *
     14 */
     15 #include <GLglut.h>
     16 #include "cuda.h"
     17 #include "cuda_runtime.h"
     18 #include "device_launch_parameters.h"
     19 #include "../common/book.h"
     20 #include "../common/cpu_bitmap.h"
     21 #include "device_functions.h"
     22 
     23 #define DIM 512
     24 
     25 #define rnd( x ) (x * rand() / RAND_MAX)
     26 #define INF     2e10f
     27 
     28 struct Sphere {
     29     float   r, b, g;
     30     float   radius;
     31     float   x, y, z;
     32     __device__ float hit(float ox, float oy, float *n) {
     33         //将中心坐标移动到图像中间,dx和dy就是相对于新的中心坐标ox和oy的新坐标
     34         float dx = ox - x;
     35         float dy = oy - y;
     36         //只处理点在圆内的Sphere对象
     37         if (dx*dx + dy*dy < radius*radius) {
     38             /*
     39             计算的dz为离圆心轴的距离
     40             */
     41             float dz = sqrtf(radius*radius - dx*dx - dy*dy);
     42             /*
     43             和半径相除,由于dz是float类型,所以结果也为float类型
     44             也就是说结果为0.xxx这样的数字,
     45             n为一个指针,*n为解析指针n,存放的也就是值0.xxx这样的值
     46             呈现在最后的结果就是图像颜色的渐变效果
     47             */
     48             *n = dz / sqrtf(radius * radius);
     49             /*
     50             在三维空间中,已经将xoy投影到为图上,z轴垂直于位图
     51             距离圆心的距离dz再加上原来的Z轴坐标就是当前坐标对应于xoy面的Z轴方向距离
     52             */
     53             return dz + z;
     54         }
     55         return -INF;
     56     }
     57 };
     58 #define SPHERES 100
     59 
     60 
     61 __global__ void kernel(Sphere *s, unsigned char *ptr) {
     62     // 映射到图像像素上的位置
     63     int x = threadIdx.x + blockIdx.x * blockDim.x;
     64     int y = threadIdx.y + blockIdx.y * blockDim.y;
     65     int offset = x + y * blockDim.x * gridDim.x;//步长
     66     //移动使得Z轴在图像中心
     67     float   ox = (x - DIM / 2);
     68     float   oy = (y - DIM / 2);
     69 
     70     float   r = 0, g = 0, b = 0;
     71     float   maxz = -INF;
     72     //每个像素递归判断SPHERES个对象在这个像素点上的值
     73     for (int i = 0; i<SPHERES; i++) {
     74         float   n;
     75         float   t = s[i].hit(ox, oy, &n);
     76         //这里垂直于屏幕的坐标朝里为负,朝外为正,因此选择最大的那个距离显示颜色,距离小的我们认为看不见
     77         if (t > maxz) {
     78             //这里取n的地址,hit函数将结果存放在&n地址所指的空间,不同的n对应不同的颜色及深度
     79             float fscale = n;
     80             r = s[i].r * fscale;
     81             g = s[i].g * fscale;
     82             b = s[i].b * fscale;
     83             maxz = t;
     84         }
     85     }
     86 
     87     ptr[offset * 4 + 0] = (int)(r * 255);
     88     ptr[offset * 4 + 1] = (int)(g * 255);
     89     ptr[offset * 4 + 2] = (int)(b * 255);
     90     ptr[offset * 4 + 3] = 255;
     91 }
     92 
     93 
     94 // globals needed by the update routine
     95 struct DataBlock {
     96     unsigned char   *dev_bitmap;
     97     Sphere          *s;
     98 };
     99 
    100 int main(void) {
    101     DataBlock   data;
    102     //获取时间
    103     cudaEvent_t     start, stop;
    104     HANDLE_ERROR(cudaEventCreate(&start));
    105     HANDLE_ERROR(cudaEventCreate(&stop));
    106     HANDLE_ERROR(cudaEventRecord(start, 0));
    107 
    108     CPUBitmap bitmap(DIM, DIM, &data);
    109     unsigned char   *dev_bitmap;
    110     Sphere          *s;
    111 
    112     // allocate memory on the GPU for the output bitmap
    113     HANDLE_ERROR(cudaMalloc((void**)&dev_bitmap, bitmap.image_size()));
    114     //在全局内存中分配s
    115     HANDLE_ERROR(cudaMalloc((void**)&s,
    116         sizeof(Sphere)* SPHERES));
    117 
    118     //主机上申请存储空间
    119     Sphere *temp_s = (Sphere*)malloc(sizeof(Sphere)* SPHERES);
    120     for (int i = 0; i<SPHERES; i++) {
    121         temp_s[i].r = rnd(1.0f);
    122         temp_s[i].g = rnd(1.0f);
    123         temp_s[i].b = rnd(1.0f);
    124         temp_s[i].x = rnd(1000.0f) - 500;
    125         temp_s[i].y = rnd(1000.0f) - 500;
    126         temp_s[i].z = rnd(1000.0f) - 500;
    127         temp_s[i].radius = rnd(100.0f) + 20;
    128     }
    129     HANDLE_ERROR(cudaMemcpy(s, temp_s, sizeof(Sphere)* SPHERES, cudaMemcpyHostToDevice));
    130     free(temp_s);
    131 
    132     // generate a bitmap from our sphere data
    133     dim3    grids(DIM / 16, DIM / 16);
    134     dim3    threads(16, 16);
    135     kernel <<<grids, threads >>>(s, dev_bitmap);
    136 
    137     // copy our bitmap back from the GPU for display
    138     HANDLE_ERROR(cudaMemcpy(bitmap.get_ptr(), dev_bitmap, bitmap.image_size(), cudaMemcpyDeviceToHost));
    139 
    140     // get stop time, and display the timing results
    141     HANDLE_ERROR(cudaEventRecord(stop, 0));
    142     HANDLE_ERROR(cudaEventSynchronize(stop));
    143     float   elapsedTime;
    144     HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime,
    145         start, stop));
    146     printf("Time to generate:  %3.1f ms
    ", elapsedTime);
    147 
    148     HANDLE_ERROR(cudaEventDestroy(start));
    149     HANDLE_ERROR(cudaEventDestroy(stop));
    150 
    151     HANDLE_ERROR(cudaFree(dev_bitmap));
    152     HANDLE_ERROR(cudaFree(s));
    153 
    154     // display
    155     bitmap.display_and_exit();
    156 }

    两者的结果为

    全局内存

    常量内存

     但是问题是我的性能怎么就没有提升呢?请大侠看到了,给小弟指导下。E-mail:lianglianghelloworld@gmail.com。期待大牛的回复。

  • 相关阅读:
    各种版本控制器的作用
    mybatis的一些特殊符号标识(大于,小于,等于,不等于)
    struts2的作用是什么
    js中给数组添加元素的方法有哪些
    springmvc中拦截器配置格式
    js中require()的用法----JS如何连接数据库执行sql语句或者建立数据库连接池
    hover()函数的用法
    error和exception的不同与相同
    cookie和session的区别有哪些
    数据库连接池的工作机制是什么
  • 原文地址:https://www.cnblogs.com/liangliangdetianxia/p/3988626.html
Copyright © 2011-2022 走看看