zoukankan      html  css  js  c++  java
  • 使用常量内存来处理光线跟踪

     项目打包下载

      1 /*
      2 * Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
      3 *
      4 * NVIDIA Corporation and its licensors retain all intellectual property and
      5 * proprietary rights in and to this software and related documentation.
      6 * Any use, reproduction, disclosure, or distribution of this software
      7 * and related documentation without an express license agreement from
      8 * NVIDIA Corporation is strictly prohibited.
      9 *
     10 * Please refer to the applicable NVIDIA end user license agreement (EULA)
     11 * associated with this source code for terms and conditions that govern
     12 * your use of this NVIDIA software.
     13 *
     14 */
     15 
     16 #include <GLglut.h>
     17 #include "cuda.h"
     18 #include "../common/book.h"
     19 #include "../common/cpu_bitmap.h"
     20 #include "cuda_runtime.h"
     21 #include "device_launch_parameters.h"
     22 #include <math.h>
     23 #define DIM 1024
     24 
     25 #define rnd( x ) (x * rand() / RAND_MAX)
     26 #define INF     2e10f
     27 
     28 struct Sphere {
     29     float   r, b, g;
     30     float   radius;
     31     float   x, y, z;
     32     __device__ float hit(float ox, float oy, float *n) {
     33         float dx = ox - x;
     34         float dy = oy - y;
     35         if (dx*dx + dy*dy < radius*radius) {
     36             float dz = sqrtf(radius*radius - dx*dx - dy*dy);
     37             *n = dz / sqrtf(radius * radius);
     38             return dz + z;
     39         }
     40         return -INF;
     41     }
     42 };
     43 #define SPHERES 20
     44 
     45 __constant__ Sphere s[SPHERES];
     46 
     47 __global__ void kernel(unsigned char *ptr) {
     48     // map from threadIdx/BlockIdx to pixel position
     49     int x = threadIdx.x + blockIdx.x * blockDim.x;
     50     int y = threadIdx.y + blockIdx.y * blockDim.y;
     51     int offset = x + y * blockDim.x * gridDim.x;
     52     float   ox = (x - DIM / 2);
     53     float   oy = (y - DIM / 2);
     54 
     55     float   r = 0, g = 0, b = 0;
     56     float   maxz = -INF;
     57     for (int i = 0; i<SPHERES; i++) {
     58         float   n;
     59         float   t = s[i].hit(ox, oy, &n);
     60         if (t > maxz) {
     61             float fscale = n;
     62             r = s[i].r * fscale;
     63             g = s[i].g * fscale;
     64             b = s[i].b * fscale;
     65             maxz = t;
     66         }
     67     }
     68 
     69     ptr[offset * 4 + 0] = (int)(r * 255);
     70     ptr[offset * 4 + 1] = (int)(g * 255);
     71     ptr[offset * 4 + 2] = (int)(b * 255);
     72     ptr[offset * 4 + 3] = 255;
     73 }
     74 
     75 // globals needed by the update routine
     76 struct DataBlock {
     77     unsigned char   *dev_bitmap;
     78 };
     79 
     80 int main(void) {
     81     DataBlock   data;
     82     // capture the start time
     83     cudaEvent_t     start, stop;
     84     HANDLE_ERROR(cudaEventCreate(&start));
     85     HANDLE_ERROR(cudaEventCreate(&stop));
     86     HANDLE_ERROR(cudaEventRecord(start, 0));
     87 
     88     CPUBitmap bitmap(DIM, DIM, &data);
     89     unsigned char   *dev_bitmap;
     90 
     91     // allocate memory on the GPU for the output bitmap
     92     HANDLE_ERROR(cudaMalloc((void**)&dev_bitmap,
     93         bitmap.image_size()));
     94 
     95     // allocate temp memory, initialize it, copy to constant
     96     // memory on the GPU, then free our temp memory
     97     Sphere *temp_s = (Sphere*)malloc(sizeof(Sphere)* SPHERES);
     98     for (int i = 0; i<SPHERES; i++) {
     99         temp_s[i].r = rnd(1.0f);
    100         temp_s[i].g = rnd(1.0f);
    101         temp_s[i].b = rnd(1.0f);
    102         temp_s[i].x = rnd(1000.0f) - 500;
    103         temp_s[i].y = rnd(1000.0f) - 500;
    104         temp_s[i].z = rnd(1000.0f) - 500;
    105         temp_s[i].radius = rnd(100.0f) + 20;
    106     }
    107     /*
    108     将SPHERES个球面对象存放在常量内存中
    109     通过cudaMemcpyToSymbol来操作
    110     */
    111     HANDLE_ERROR(cudaMemcpyToSymbol(s, temp_s,
    112         sizeof(Sphere)* SPHERES));
    113     free(temp_s);
    114 
    115     // generate a bitmap from our sphere data
    116     dim3    grids(DIM / 16, DIM / 16);
    117     dim3    threads(16, 16);
    118     kernel <<<grids, threads >>>(dev_bitmap);
    119 
    120     // copy our bitmap back from the GPU for display
    121     HANDLE_ERROR(cudaMemcpy(bitmap.get_ptr(), dev_bitmap,
    122         bitmap.image_size(),
    123         cudaMemcpyDeviceToHost));
    124 
    125     // get stop time, and display the timing results
    126     HANDLE_ERROR(cudaEventRecord(stop, 0));
    127     HANDLE_ERROR(cudaEventSynchronize(stop));
    128     float   elapsedTime;
    129     HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime,
    130         start, stop));
    131     printf("Time to generate:  %3.1f ms
    ", elapsedTime);
    132 
    133     HANDLE_ERROR(cudaEventDestroy(start));
    134     HANDLE_ERROR(cudaEventDestroy(stop));
    135 
    136     HANDLE_ERROR(cudaFree(dev_bitmap));
    137 
    138     // display
    139     bitmap.display_and_exit();
    140 }

     结果如下所示:

  • 相关阅读:
    springboot Serving Web Content with Spring MVC
    Java的String中的subString()方法
    required string parameter XXX is not present
    NMON监控linux性能
    Linux下Java性能监控
    Linux常用命令
    Loadrunner测试webservice协议总结
    AWR报告分析
    性能测试指标
    如何保证测试的覆盖率
  • 原文地址:https://www.cnblogs.com/liangliangdetianxia/p/3987795.html
Copyright © 2011-2022 走看看