本文要解决的问题是如何实现CUDA和OpenGL的互操作,使得GPU能够将通用计算的运算结果交给OpenGL进行绘制。
本文的应用程序主要包括两个方面:
1. 使用CUDA核函数生成图像数据
2. 将数据传递给OpenGL驱动程序并进行渲染
实现这个功能需要按如下四个步骤:
Step1: 申明两个全局变量,保存指向同一个缓冲区的不同句柄,指向要在OpenGL和CUDA之间共享的数据;
Step2: 选择运行应用程序的CUDA设备(cudaChooseDevice),告诉cuda运行时使用哪个设备来执行CUDA和OpenGL (cudaGLSetGLDevice);
Step3:在OpenGL中创建像素缓冲区对象;
Step4: 通知CUDA运行时将像素缓冲区对象bufferObj注册为图形资源,实现缓冲区共享。
然后就可以按照一般的CUDA程序调用核函数进行计算。运行结果如下:
/******************************************************************** * SharedBuffer.cu * interact between CUDA and OpenGL *********************************************************************/ #include <stdio.h> #include <stdlib.h> #include "GLglut.h" #include "GLglext.h" #include <cuda_runtime.h> #include <cutil_inline.h> #include <cuda.h> #include <cuda_gl_interop.h> #define GET_PROC_ADDRESS(str) wglGetProcAddress(str) #define DIM 512 PFNGLBINDBUFFERARBPROC glBindBuffer = NULL; PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL; PFNGLGENBUFFERSARBPROC glGenBuffers = NULL; PFNGLBUFFERDATAARBPROC glBufferData = NULL; // step one: GLuint bufferObj; cudaGraphicsResource *resource; __global__ void cudaGLKernel(uchar4 *ptr) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; int offset = x + y * blockDim.x * gridDim.x; float fx = x/(float)DIM - 0.5f; float fy = y/(float)DIM - 0.5f; unsigned char green = 128 + 127 * sin(abs(fx*100) - abs(fy*100)); ptr[offset].x = 0; ptr[offset].y = green; ptr[offset].z = 0; ptr[offset].w = 255; } void drawFunc(void) { glDrawPixels(DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0); glutSwapBuffers(); } static void keyFunc(unsigned char key, int x, int y) { switch(key){ case 27: cutilSafeCall(cudaGraphicsUnregisterResource(resource)); glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); glDeleteBuffers(1, &bufferObj); exit(0); } } int main(int argc, char* argv[]) { // step 2: cudaDeviceProp prop; int dev; memset(&prop, 0, sizeof(cudaDeviceProp)); prop.major = 1; prop.minor = 0; cutilSafeCall(cudaChooseDevice(&dev, &prop)); cutilSafeCall(cudaGLSetGLDevice(dev)); glutInit(&argc, argv); glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA); glutInitWindowSize(DIM, DIM); glutCreateWindow("CUDA interact with OpenGL"); // step 3: glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer"); glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers"); glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers"); glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData"); glGenBuffers(1, &bufferObj); glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj); glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, DIM*DIM*4, NULL, GL_DYNAMIC_DRAW_ARB); // step 4: cutilSafeCall(cudaGraphicsGLRegisterBuffer(&resource, bufferObj, cudaGraphicsMapFlagsNone)); uchar4* devPtr; size_t size; cutilSafeCall(cudaGraphicsMapResources(1, &resource, NULL)); cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void**)&devPtr, &size, resource)); dim3 grids(DIM/16, DIM/16); dim3 threads(16, 16); cudaGLKernel<<<grids, threads>>>(devPtr); cutilSafeCall(cudaGraphicsUnmapResources(1, &resource, NULL)); glutKeyboardFunc(keyFunc); glutDisplayFunc(drawFunc); glutMainLoop(); return 0; }
程序编译的时候貌似要注意头文件glut.h和glext.h的顺序,否则会报错~
参考资源:
1、Jason Sanders, Edward Kandrot, CUDA By Example: An Introduction toGeneral-Purpose GPU Programming (2011).该书电子版下载和源码下载。