zoukankan html css js c++ java

0_Simple__simpleSurfaceWrite

使用表面写入函数，结合纹理引用实现图片的旋转
▶ 源代码

  1 #include <stdio.h>
  2 #include <windows.h>
  3 #include <cuda_runtime.h>
  4 #include "device_launch_parameters.h"
  5 #include <helper_functions.h>
  6 #include <helper_cuda.h>    
  7 
  8 #define WINDOWS_LEAN_AND_MEAN
  9 #define NOMINMAX
 10 #define MIN_EPSILON_ERROR 5e-3f
 11 float angle = 0.5f;                             // 弧度制
 12 texture<float, 2, cudaReadModeElementType> tex;
 13 surface<void, 2> outputSurface;
 14 
 15 // 使用表面写入，将全局内存中的数据 d_data 写到绑定了纹理引用的 CUDA 数组 cuArray 中
 16 __global__ void surfaceWriteKernel(float *gIData, int width, int height)
 17 {
 18     unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
 19     unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
 20 
 21     surf2Dwrite(gIData[y * width + x], outputSurface, x * 4, y, cudaBoundaryModeTrap);
 22 }
 23 
 24 // 利用纹理取样，将绑定了纹理引用的 CUDA 数组 cuArray 中的图片进行旋转，写入全局内存 d_data 中
 25 __global__ void transformKernel(float *gOData,int width,int height,float theta) 
 26 {
 27     unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
 28     unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;    
 29     float u = x / (float)width - 0.5f;
 30     float v = y / (float)height - 0.5f;
 31 
 32     gOData[y * width + x] = tex2D(tex, u * cosf(theta) - v * sinf(theta) + 0.5f, v * cosf(theta) + u * sinf(theta) + 0.5f);
 33 }
 34 
 35 int main()
 36 {
 37     printf("
	Start.
");
 38     cudaSetDevice(0);// 删掉了筛选设备的过程
 39     cudaDeviceProp deviceProps;
 40     cudaGetDeviceProperties(&deviceProps, 0);
 41     printf("
	Device %s, Multi-Processors: %d, SM %d.%d
", deviceProps.name, deviceProps.multiProcessorCount, deviceProps.major, deviceProps.minor);
 42 
 43     // 读取图片数据
 44     float *h_data = NULL, *h_dataRef = NULL;
 45     unsigned int width, height, size;
 46     sdkLoadPGM("D:\Code\CUDA\cudaProjectTemp\data\lena_bw.pgm", &h_data, &width, &height);// 删掉了用函数 sdkFindFilePath() 查找输入文件的过程
 47     size = width * height * sizeof(float);
 48     sdkLoadPGM("D:\Code\CUDA\cudaProjectTemp\data\ref_rotated.pgm", &h_dataRef, &width, &height);
 49     printf("
	Load input files, %d x %d pixels
", width, height);
 50 
 51     // 申请设备内存
 52     float *d_data = NULL;
 53     cudaMalloc((void **) &d_data, size);
 54     cudaArray *cuArray;
 55     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
 56     cudaMallocArray(&cuArray,&channelDesc,width,height,cudaArraySurfaceLoadStore);
 57     cudaMemcpy(d_data, h_data, size, cudaMemcpyHostToDevice);
 58     //cudaMemcpyToArray(cuArray,0,0,h_data,size,cudaMemcpyHostToDevice); 只使用纹理内存时，可以直接拷贝到cuArray中
 59     
 60     // 绑定表面引用
 61     cudaBindSurfaceToArray(outputSurface, cuArray, channelDesc);
 62 
 63     // 使用表面写入
 64     dim3 dimBlock(8, 8, 1);
 65     dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
 66     surfaceWriteKernel<<<dimGrid, dimBlock>>>(d_data, width, height);
 67 
 68     // 绑定纹理引用
 69     tex.addressMode[0] = cudaAddressModeWrap;
 70     tex.addressMode[1] = cudaAddressModeWrap;
 71     tex.filterMode = cudaFilterModeLinear;
 72     tex.normalized = true;
 73     cudaBindTextureToArray(tex, cuArray, channelDesc);
 74 
 75     // 预跑
 76     transformKernel<<<dimGrid, dimBlock, 0>>>(d_data, width, height, angle);
 77     cudaDeviceSynchronize();
 78 
 79     StopWatchInterface *timer = NULL;
 80     sdkCreateTimer(&timer);
 81     sdkStartTimer(&timer);
 82     
 83     transformKernel<<<dimGrid, dimBlock, 0>>>(d_data, width, height, angle);
 84     
 85     cudaDeviceSynchronize();
 86     sdkStopTimer(&timer); 
 87     sdkDeleteTimer(&timer);
 88     printf("
	Cost time: %f ms, %.2f Mpixels/sec
", sdkGetTimerValue(&timer), (width *height / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6);
 89 
 90     // 结果回收、输出和检验
 91     cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
 92     sdkSavePGM("D:\Code\CUDA\cudaProjectTemp\data\output.pgm", h_data, width, height);
 93     printf("
	Save output file.
");
 94     printf("
	Finish, return %s.
", compareData(h_data, h_dataRef, width * height, MIN_EPSILON_ERROR, 0.0f) ? "Passed" : "Failed");
 95 
 96     cudaFree(d_data);
 97     cudaFreeArray(cuArray);
 98     getchar();
 99     return 0;
100 }

▶ 输出结果

 1 Start.
 2 
 3 Device GeForce GTX 1070, Multi-Processors: 16, SM 6.1
 4 
 5 Load input files, 512 x 512 pixels
 6 
 7 Cost time: 0.000000 ms, inf Mpixels/sec
 8 
 9 Save output file.
10 
11 Finish, return Passed

▶ 涨姿势

● 使用函数 sdkLoadPGM() 读取图片数据

  1 // helper_image.h
  2 inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w, unsigned int *h, unsigned int *channels)
  3 {
  4     FILE *fp = NULL;
  5     if (FOPEN_FAIL(FOPEN(fp, file, "rb")))
  6     {
  7         std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;
  8         return false;
  9     }
 10 
 11     // check header
 12     char header[PGMHeaderSize];
 13     if (fgets(header, PGMHeaderSize, fp) == NULL)
 14     {
 15         std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
 16         return false;
 17     }
 18     if (strncmp(header, "P5", 2) == 0)
 19     {
 20         *channels = 1;
 21     }
 22     else if (strncmp(header, "P6", 2) == 0)
 23     {
 24         *channels = 3;
 25     }
 26     else
 27     {
 28         std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl;
 29         *channels = 0;
 30         return false;
 31     }
 32 
 33     // parse header, read maxval, width and height
 34     unsigned int width = 0;
 35     unsigned int height = 0;
 36     unsigned int maxval = 0;
 37     unsigned int i = 0;
 38     while (i < 3)
 39     {
 40         if (fgets(header, PGMHeaderSize, fp) == NULL)
 41         {
 42             std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
 43             return false;
 44         }
 45 
 46         if (header[0] == '#')
 47         {
 48             continue;
 49         }
 50 
 51         if (i == 0)
 52         {
 53             i += SSCANF(header, "%u %u %u", &width, &height, &maxval);
 54         }
 55         else if (i == 1)
 56         {
 57             i += SSCANF(header, "%u %u", &height, &maxval);
 58         }
 59         else if (i == 2)
 60         {
 61             i += SSCANF(header, "%u", &maxval);
 62         }
 63     }
 64 
 65     // check if given handle for the data is initialized
 66     if (NULL != *data)
 67     {
 68         if (*w != width || *h != height)
 69         {
 70             std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;
 71         }
 72     }
 73     else
 74     {
 75         *data = (unsigned char *)malloc(sizeof(unsigned char) * width * height **channels);
 76         *w = width;
 77         *h = height;
 78     }
 79 
 80     // read and close file
 81     if (fread(*data, sizeof(unsigned char), width * height **channels, fp) == 0)
 82     {
 83         std::cerr << "__LoadPPM() read data returned error." << std::endl;
 84     }
 85 
 86     fclose(fp);
 87     return true;
 88 }
 89 
 90 template <class T> inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w, unsigned int *h)
 91 {
 92     unsigned char *idata = NULL;
 93     unsigned int channels;
 94 
 95     if (!__loadPPM(file, &idata, w, h, &channels))
 96         return false;
 97     unsigned int size = *w **h * channels;
 98 
 99     if (*data == NULL)// 如果 T **data 没有初始化，则按照读取的 size 进行初始化 
100         *data = (T *)malloc(sizeof(T) * size);
101 
102     std::transform(idata, idata + size, *data, ConverterFromUByte<T>());// 拷贝数据到 data 中
103 
104     free(idata);
105     return true;
106 }

● 使用到的表面写入函数原型

1 // surface_functions.h
2 template<class T> static __device__ __forceinline__ void surf2Dwrite(T val, surface<void, cudaSurfaceType2D> surf, int x, int y, enum cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
3 {
4 #ifdef __CUDA_ARCH__ 
5     __nv_tex_surf_handler("__surf2Dwrite_v2", (typename __nv_surf_trait<T>::cast_type)&val, (int)sizeof(T), surf, x, y, mode);
6 #endif
7 }

查看全文

相关阅读:
头插法建立单链表
 顺序表
 栈的顺序存储实现
 折半查找
 myeclipe 快捷键盘
 ztree redio单选按钮
 webuploader上传进度条上传删除
 svn乱码解决办法
 异构SOA系统架构之Asp.net实现(兼容dubbo)
RPC框架

原文地址：https://www.cnblogs.com/cuancuancuanhao/p/7934165.html