CUDA和OpenCV混合编程,使用CUDA的纹理内存,实现图像的二值化以及滤波功能。
#include <cuda_runtime.h>
#include <highgui/highgui.hpp>
#include <imgproc/imgproc.hpp>
using namespace cv;
int width = 512;
int height = 512;
// 2维纹理
texture<float, 2, cudaReadModeElementType> texRef;
// 核函数
__global__ void transformKernel(uchar* output, int width, int height)
{
// 根据tid bid计算归一化的拾取坐标
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
float u = x / (float)width;
float v = y / (float)height;
//从纹理存储器中拾取数据,并写入显存
output[(y * width + x)] = tex2D(texRef, u / 4, v / 4);
}
int main()
{
// 分配CUDA数组
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
cudaArray* cuArray;
cudaMallocArray(&cuArray, &channelDesc, width, height);
//使用OpenCV读入图像
Mat image = imread("D:\lena.jpg", 0);
resize(image, image, Size(width, height));
imshow("原始图像", image);
cudaMemcpyToArray(cuArray, 0, 0, image.data, width*height, cudaMemcpyHostToDevice);
// 设置纹理属性
texRef.addressMode[0] = cudaAddressModeWrap; //循环寻址方式
texRef.addressMode[1] = cudaAddressModeWrap;
texRef.filterMode = cudaFilterModeLinear; //线性滤波
texRef.normalized = true; //归一化坐标
//绑定纹理
cudaBindTextureToArray(texRef, cuArray, channelDesc);
Mat imageOutput = Mat(Size(width, height), CV_8UC1);
uchar * output = imageOutput.data;
cudaMalloc((void**)&output, width * height * sizeof(float));
dim3 dimBlock(16, 16);
dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y);
transformKernel << <dimGrid, dimBlock >> > (output, width, height);
cudaMemcpy(imageOutput.data, output, height*width, cudaMemcpyDeviceToHost);
imshow("CUDA+OpenCV滤波", imageOutput);
waitKey();
cudaFreeArray(cuArray);
cudaFree(output);
}
原始lena图像:
运行效果: