zoukankan      html  css  js  c++  java
  • 0_Simple__simpleSurfaceWrite

    使用表面写入函数,结合纹理引用实现图片的旋转
    ▶ 源代码

      1 #include <stdio.h>
      2 #include <windows.h>
      3 #include <cuda_runtime.h>
      4 #include "device_launch_parameters.h"
      5 #include <helper_functions.h>
      6 #include <helper_cuda.h>    
      7 
      8 #define WINDOWS_LEAN_AND_MEAN
      9 #define NOMINMAX
     10 #define MIN_EPSILON_ERROR 5e-3f
     11 float angle = 0.5f;                             // 弧度制
     12 texture<float, 2, cudaReadModeElementType> tex;
     13 surface<void, 2> outputSurface;
     14 
     15 // 使用表面写入,将全局内存中的数据 d_data 写到绑定了纹理引用的 CUDA 数组 cuArray 中
     16 __global__ void surfaceWriteKernel(float *gIData, int width, int height)
     17 {
     18     unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
     19     unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
     20 
     21     surf2Dwrite(gIData[y * width + x], outputSurface, x * 4, y, cudaBoundaryModeTrap);
     22 }
     23 
     24 // 利用纹理取样,将绑定了纹理引用的 CUDA 数组 cuArray 中的图片进行旋转,写入全局内存 d_data 中
     25 __global__ void transformKernel(float *gOData,int width,int height,float theta) 
     26 {
     27     unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
     28     unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;    
     29     float u = x / (float)width - 0.5f;
     30     float v = y / (float)height - 0.5f;
     31 
     32     gOData[y * width + x] = tex2D(tex, u * cosf(theta) - v * sinf(theta) + 0.5f, v * cosf(theta) + u * sinf(theta) + 0.5f);
     33 }
     34 
     35 int main()
     36 {
     37     printf("
    	Start.
    ");
     38     cudaSetDevice(0);// 删掉了筛选设备的过程
     39     cudaDeviceProp deviceProps;
     40     cudaGetDeviceProperties(&deviceProps, 0);
     41     printf("
    	Device %s, Multi-Processors: %d, SM %d.%d
    ", deviceProps.name, deviceProps.multiProcessorCount, deviceProps.major, deviceProps.minor);
     42 
     43     // 读取图片数据
     44     float *h_data = NULL, *h_dataRef = NULL;
     45     unsigned int width, height, size;
     46     sdkLoadPGM("D:\Code\CUDA\cudaProjectTemp\data\lena_bw.pgm", &h_data, &width, &height);// 删掉了用函数 sdkFindFilePath() 查找输入文件的过程
     47     size = width * height * sizeof(float);
     48     sdkLoadPGM("D:\Code\CUDA\cudaProjectTemp\data\ref_rotated.pgm", &h_dataRef, &width, &height);
     49     printf("
    	Load input files, %d x %d pixels
    ", width, height);
     50 
     51     // 申请设备内存
     52     float *d_data = NULL;
     53     cudaMalloc((void **) &d_data, size);
     54     cudaArray *cuArray;
     55     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
     56     cudaMallocArray(&cuArray,&channelDesc,width,height,cudaArraySurfaceLoadStore);
     57     cudaMemcpy(d_data, h_data, size, cudaMemcpyHostToDevice);
     58     //cudaMemcpyToArray(cuArray,0,0,h_data,size,cudaMemcpyHostToDevice); 只使用纹理内存时,可以直接拷贝到cuArray中
     59     
     60     // 绑定表面引用
     61     cudaBindSurfaceToArray(outputSurface, cuArray, channelDesc);
     62 
     63     // 使用表面写入
     64     dim3 dimBlock(8, 8, 1);
     65     dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
     66     surfaceWriteKernel<<<dimGrid, dimBlock>>>(d_data, width, height);
     67 
     68     // 绑定纹理引用
     69     tex.addressMode[0] = cudaAddressModeWrap;
     70     tex.addressMode[1] = cudaAddressModeWrap;
     71     tex.filterMode = cudaFilterModeLinear;
     72     tex.normalized = true;
     73     cudaBindTextureToArray(tex, cuArray, channelDesc);
     74 
     75     // 预跑
     76     transformKernel<<<dimGrid, dimBlock, 0>>>(d_data, width, height, angle);
     77     cudaDeviceSynchronize();
     78 
     79     StopWatchInterface *timer = NULL;
     80     sdkCreateTimer(&timer);
     81     sdkStartTimer(&timer);
     82     
     83     transformKernel<<<dimGrid, dimBlock, 0>>>(d_data, width, height, angle);
     84     
     85     cudaDeviceSynchronize();
     86     sdkStopTimer(&timer); 
     87     sdkDeleteTimer(&timer);
     88     printf("
    	Cost time: %f ms, %.2f Mpixels/sec
    ", sdkGetTimerValue(&timer), (width *height / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6);
     89 
     90     // 结果回收、输出和检验
     91     cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
     92     sdkSavePGM("D:\Code\CUDA\cudaProjectTemp\data\output.pgm", h_data, width, height);
     93     printf("
    	Save output file.
    ");
     94     printf("
    	Finish, return %s.
    ", compareData(h_data, h_dataRef, width * height, MIN_EPSILON_ERROR, 0.0f) ? "Passed" : "Failed");
     95 
     96     cudaFree(d_data);
     97     cudaFreeArray(cuArray);
     98     getchar();
     99     return 0;
    100 }

    ▶ 输出结果

     1 Start.
     2 
     3 Device GeForce GTX 1070, Multi-Processors: 16, SM 6.1
     4 
     5 Load input files, 512 x 512 pixels
     6 
     7 Cost time: 0.000000 ms, inf Mpixels/sec
     8 
     9 Save output file.
    10 
    11 Finish, return Passed

    ▶ 涨姿势

    ● 使用函数 sdkLoadPGM() 读取图片数据

      1 // helper_image.h
      2 inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w, unsigned int *h, unsigned int *channels)
      3 {
      4     FILE *fp = NULL;
      5     if (FOPEN_FAIL(FOPEN(fp, file, "rb")))
      6     {
      7         std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;
      8         return false;
      9     }
     10 
     11     // check header
     12     char header[PGMHeaderSize];
     13     if (fgets(header, PGMHeaderSize, fp) == NULL)
     14     {
     15         std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
     16         return false;
     17     }
     18     if (strncmp(header, "P5", 2) == 0)
     19     {
     20         *channels = 1;
     21     }
     22     else if (strncmp(header, "P6", 2) == 0)
     23     {
     24         *channels = 3;
     25     }
     26     else
     27     {
     28         std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl;
     29         *channels = 0;
     30         return false;
     31     }
     32 
     33     // parse header, read maxval, width and height
     34     unsigned int width = 0;
     35     unsigned int height = 0;
     36     unsigned int maxval = 0;
     37     unsigned int i = 0;
     38     while (i < 3)
     39     {
     40         if (fgets(header, PGMHeaderSize, fp) == NULL)
     41         {
     42             std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;
     43             return false;
     44         }
     45 
     46         if (header[0] == '#')
     47         {
     48             continue;
     49         }
     50 
     51         if (i == 0)
     52         {
     53             i += SSCANF(header, "%u %u %u", &width, &height, &maxval);
     54         }
     55         else if (i == 1)
     56         {
     57             i += SSCANF(header, "%u %u", &height, &maxval);
     58         }
     59         else if (i == 2)
     60         {
     61             i += SSCANF(header, "%u", &maxval);
     62         }
     63     }
     64 
     65     // check if given handle for the data is initialized
     66     if (NULL != *data)
     67     {
     68         if (*w != width || *h != height)
     69         {
     70             std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;
     71         }
     72     }
     73     else
     74     {
     75         *data = (unsigned char *)malloc(sizeof(unsigned char) * width * height **channels);
     76         *w = width;
     77         *h = height;
     78     }
     79 
     80     // read and close file
     81     if (fread(*data, sizeof(unsigned char), width * height **channels, fp) == 0)
     82     {
     83         std::cerr << "__LoadPPM() read data returned error." << std::endl;
     84     }
     85 
     86     fclose(fp);
     87     return true;
     88 }
     89 
     90 template <class T> inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w, unsigned int *h)
     91 {
     92     unsigned char *idata = NULL;
     93     unsigned int channels;
     94 
     95     if (!__loadPPM(file, &idata, w, h, &channels))
     96         return false;
     97     unsigned int size = *w **h * channels;
     98 
     99     if (*data == NULL)// 如果 T **data 没有初始化,则按照读取的 size 进行初始化 
    100         *data = (T *)malloc(sizeof(T) * size);
    101 
    102     std::transform(idata, idata + size, *data, ConverterFromUByte<T>());// 拷贝数据到 data 中
    103 
    104     free(idata);
    105     return true;
    106 }

    ● 使用到的表面写入函数原型

    1 // surface_functions.h
    2 template<class T> static __device__ __forceinline__ void surf2Dwrite(T val, surface<void, cudaSurfaceType2D> surf, int x, int y, enum cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)
    3 {
    4 #ifdef __CUDA_ARCH__ 
    5     __nv_tex_surf_handler("__surf2Dwrite_v2", (typename __nv_surf_trait<T>::cast_type)&val, (int)sizeof(T), surf, x, y, mode);
    6 #endif
    7 }
  • 相关阅读:
    头插法建立单链表
    顺序表
    栈的顺序存储实现
    折半查找
    myeclipe 快捷键盘
    ztree redio单选按钮
    webuploader上传进度条 上传删除
    svn乱码解决办法
    异构SOA系统架构之Asp.net实现(兼容dubbo)
    RPC框架
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/7934165.html
Copyright © 2011-2022 走看看