本文主要分析CUDA SDK sample如何同OpenGL相结合.
在CUDA中调用OpenGL主要有以下几个要点:
- Interoperability with OpenGL requires that the CUDA device be specified by cudaGLSetGLDevice() before any other runtime calls.
- Register resource to CUDA before mapping. 一个资源只需注册一次
- After registering to CUDA, a resource should be mapped before accessing with CUDA function and unmapped after accessing it by calling cudaGraphicsMapResources() and cudaGraphicsUnmapResources().
- A mapped resource can be read from or written to by kernels using the device memory address returned by cudaGraphicsResourceGetMappedPointer()
for buffers and cudaGraphicsSubResourceGetMappedArray() for CUDA arrays. - DO NOT access a resource through OpenGL or Direct3D while it is mapped to CUDA, cause it will produce undefined results.
整体伪代码
1: set_OpenGL_device();2: register_resources();3:4: while( is_running )
5: {6: map_resource();7: resource_pointer *pointer = get_mapped_pointetr();8: process_using_cuda( pointer );9: unmap_resource();10: do_normal_rendering();11: }12: unregister_resources();
选择设备
1: // sets device as the current device for the calling host thread.
2: extern __host__ cudaError_t CUDARTAPI cudaGLSetGLDevice(int device);
本例中被封装在chooseCudaDevice()函数中, 自动选择性能最佳的device.
资源创建和注册
这里使用的资源是Pixel Buffer Object : The buffer object storing pixel data is called Pixel Buffer Object (PBO). initPixelBuffer()函数负责创建并注册PBO.
1: // OpenGL pixel buffer object
2: GLuint pbo = 0;3:4: // create pixel buffer object for display
5: glGenBuffersARB(1, &pbo);6: glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);7: glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, width*height*sizeof(GLubyte)*4, 0, GL_STREAM_DRAW_ARB);
8: glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);9:10: // register this buffer object with CUDA
11: cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, pbo, cudaGraphicsMapFlagsWriteDiscard));
使用资源
1: // CUDA Graphics Resource (to transfer PBO)
2: struct cudaGraphicsResource *cuda_pbo_resource;
3:4: // render image using CUDA
5: void render()
6: {7: // Copy inverse view matrix to const device memory
8: copyInvViewMatrix(invViewMatrix, sizeof(float4)*3);9:10: // Map graphics resources for access by CUDA
11: uint *d_output;12: cutilSafeCall(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));13:14: // Get CUDA device pointer
15: size_t num_bytes;16: cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void **)&d_output, &num_bytes,
17: cuda_pbo_resource));18:19: // clear image
20: cutilSafeCall(cudaMemset(d_output, 0, width*height*4));21:22: // call CUDA kernel, writing results to PBO
23: render_kernel(gridSize, blockSize, d_output, width, height, density, brightness, transferOffset, transferScale);24:25: cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));26: }
这里的cutilSafeCall()是cuda util中的函数, 负责log错误. render_kernel()是前面的d_render()函数负责写入计算出的颜色到PBO, d_output是map得到的供CUDA存取的指向PBO内存的指针.
显示
1: // display results using OpenGL
2: void display()
3: {4: // use OpenGL to build view matrix
5: BuildViewMartix();6:7: // prepare pbo piexl
8: render();9:10: // display results
11: glClear(GL_COLOR_BUFFER_BIT);12:13: // draw image from PBO
14: glPixelStorei(GL_UNPACK_ALIGNMENT, 1);15:16: // copy from pbo to texture
17: glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, pbo);18: glBindTexture(GL_TEXTURE_2D, tex);19: glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, 0);20: glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);21:22: // draw textured quad
23: glEnable(GL_TEXTURE_2D);24: glBegin(GL_QUADS);25: glTexCoord2f(0, 0); glVertex2f(0, 0);26: glTexCoord2f(1, 0); glVertex2f(1, 0);27: glTexCoord2f(1, 1); glVertex2f(1, 1);28: glTexCoord2f(0, 1); glVertex2f(0, 1);29: glEnd();30:31: glDisable(GL_TEXTURE_2D);32: glBindTexture(GL_TEXTURE_2D, 0);33:34: glutSwapBuffers();35: glutReportErrors();36:37: cutilCheckError(cutStopTimer(timer));38:39: computeFPS();40: }
其中render()就是上面写入PBO的函数, 这个display()函数是由glutDisplayFunc()注册的显示函数. 也就是渲染的全过程. 为了简化函数, 中间省略了一些统计和不核心的处理.
我们可以看到, 渲染的所有效果都是由CUDA通过volume render产生的, 最后OpenGL只是把结果作为一张图片贴在我们的视口上. 这里面有两个小细节glPixelStorei()函数修改数据对齐的单位, 详细介绍在这里. 第二是如何从PBO拷贝到纹理, Song Ho的OpenGL教程介绍的非常清楚, 我就不再赘述了.
看过以上几期的分析, 希望大家对Volume Render和CUDA能有一些新的理解, 欢迎大家与我讨论学习. 下一次想分析一下这个例子的一些细节技术.
参考:
CUDA C Programming Guide