• # cuda(0)

自从进入电气工程，阴差阳错的学习了cuda，在学习的过程中初步了解并行计算的一些思路，

由于我的电脑空间有限，所以我准备记录平时的一些cuda编程的练习，放到博客园上这样方便我

以后的学习，这样也可以为博客园的成长尽一下绵薄之力。接下来进入正题:

图像灰度转换cuda实现：

``` 1 #include "cuda_runtime.h"
2 #include "device_launch_parameters.h"
3 #include <stdio.h>
4 #include<iostream>
5
6 #include <iostream>
7 #include <opencv2/core/core.hpp>
8 #include <opencv2/highgui/highgui.hpp>
9
10 #define CHANNELS 3
11
12 __global__ void colorToGreyScaleConversion(unsigned char *pout, unsigned char *pin, int width, int height) {
13     int Col = blockIdx.x*blockDim.x + threadIdx.x;
14     int Row = blockIdx.y*blockDim.y + threadIdx.y;
15
16     if (Col < width && Row < height) {
17         int greyoffset = Row * width + Col;
18         int rgbOffset = greyoffset * CHANNELS;
19
20         unsigned char r = pin[rgbOffset];
21         unsigned char g = pin[rgbOffset + 1];
22         unsigned char b = pin[rgbOffset + 2];
23
24         pout[greyoffset] = 0.21f*r + 0.71f*g + 0.07f*b;
25     }
26 }
27
28 using namespace cv;
29 int main(void) {
30
31     // 读入一张图片（缩小图）
33     // 创建一个名为 "图片"窗口
34     namedWindow("lena");
35     // 在窗口中显示图片
36     imshow("lena", img);
37     // 等待6000 ms后窗口自动关闭
38     waitKey(6000);
39
40     const int imgheight = img.rows;
41     const int imgwidth = img.cols;
42     const int imgchannel = img.channels();
43
44     Mat grayImage(imgheight, imgwidth, CV_8UC1, Scalar(0));
45
46     unsigned char *dev_pin;
47     unsigned char *dev_pout;
48
49     cudaMalloc((void**)&dev_pin, imgheight*imgwidth*imgchannel* sizeof(unsigned char));
50     cudaMalloc((void**)&dev_pout, imgheight*imgwidth*sizeof(unsigned char));
51
52     cudaMemcpy(dev_pin, img.data, imgheight*imgwidth*imgchannel * sizeof(unsigned char), cudaMemcpyHostToDevice);
53
54
55     dim3 BlockDim(16, 16);
56     dim3 GridDim((imgwidth - 1) / BlockDim.x + 1, (imgheight - 1) / BlockDim.y + 1);
57     colorToGreyScaleConversion << <GridDim, BlockDim >> > (dev_pout, dev_pin, imgwidth, imgheight);
58
59     cudaMemcpy(grayImage.data, dev_pout, imgheight*imgwidth*sizeof(unsigned char), cudaMemcpyDeviceToHost);
60
61     cudaFree(dev_pin);
62     cudaFree(dev_pout);
63     imshow("grayImage", grayImage);
64     waitKey(3000);
65     return 0;
66 }```
greyConvert

值得注意的是：我是在vs2017平台上实现的，在编程的时候调用了openCV的库函数，读者需要自行配置该环境，为了不让

大家在配置环境时耗费太长时间，我附上CSDN上一位大神的配置教程链接：https://blog.csdn.net/qq_41175905/article/details/80560429

转换前如下图：

转换后如下图：

另外读者也可用Matlab, openCV等库函数实现，比较发现，cuda并行计算的时间要快一些。

