zoukankan html css js c++ java

我的第一个CUDA程序

最近在学习CUDA框架，折腾了一个多月终于把CUDA安装完毕，现在终于跑通了自己的一个CUDA的Hello world程序，值得欣喜~

首先，关于CUDA的初始化，代码和解释如下，这部分主要参考GXW的文章：

//init the CUDA device 
bool initCUDA()
{
	int deviceCount;
	cudaGetDeviceCount(&deviceCount);	//Get the CUDA GPU count
	if (deviceCount==0)
	{
		fprintf(stderr, "There is no device.
");
		return false;
	}

	//判断是否有支持CUDA的装置，如果没有，deviceCount会返回一个1，而device0是一个cuda的仿真装置，而这个仿真的装置是不支持CUDA1.0以上。
	//所以在写程序时候需要判断各个装置支持的版本号，只有在版本号大于1.0
	int i;
	for (i=0;i<deviceCount;i++)		
	{
		cudaDeviceProp prop;
		if (cudaGetDeviceProperties(&prop,i)==cudaSuccess)
		{
			if (prop.major>=1)
			{
				break;
			}
		}
	}

	if (i==deviceCount)
	{
		fprintf(stderr,"There is no device supporting CUDA1.x.
");
		return false;
	}

	cudaSetDevice(i);

	return true;
}

　　初始化之后，先说一下程序的总体的思想，在main函数中生命一个字符串“Hello Word!”，然后利用GPU多个线程，分别对这个字符串进行修改。废话不多说，上代码：

__global__ void helloword(char * helloword)
{
	int i= threadIdx.x;  //得到当前线程的id
	helloword[i] = helloword[i]+1;
}

int main(int count, char * args[])
{
	if (!initCUDA())
	{
		return 0;
	}
	printf("CUDA successfully init");

	char * cpu_hello="Hello world!";
	int size= strlen(cpu_hello);

	char* gpu_hello;　　　　　　
　　　　 //为gpu_hello在GPU上面开辟一块空间，cudaMalloc和c里面的函数malloc有些相似，不过cudaMalloc是在GPU上面开辟空间
	cudaMalloc((void**)&gpu_hello,size*sizeof(char));
　　　　 //cudaMemcpy主要用于在cpu和GPU之间拷贝数据，前三个参数和c语言里面的memcpy一样，最后一个指定方向的
	cudaMemcpy(gpu_hello,cpu_hello,size*sizeof(char),cudaMemcpyHostToDevice);
	helloword <<<1,size>>>(gpu_hello);
	
	cudaError_t cudaErr=cudaGetLastError();
	if (cudaErr!=cudaSuccess)
	{
		fprintf(stderr,"addKernel failed!");
		return 0;
	}
	
	cudaErr=cudaDeviceSynchronize();
	if (cudaErr!= cudaSuccess)
	{
		fprintf(stderr,"synchronize is wrong");
		return 0;
	}

	char *temp= (char *)malloc(sizeof(char)*(size+1));
	memset(temp,0,size+1);
	cudaErr =cudaMemcpy(temp,gpu_hello,size*sizeof(char),cudaMemcpyDeviceToHost);
	if (cudaErr != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy failed!%s",cudaGetErrorString(cudaErr));
		return 0;
	}
	printf("
%s
",temp);

	return 1;
}

调试了好久，这个程序才调通，对于老手来讲可能不算什么，但是对于我这个新手来讲，喜悦之情……呵呵。

路漫漫其修远兮，吾将上下而求索~

查看全文

相关阅读:
dmesg 时间转换
 [转载]Linux性能测试 ss命令
 [转载]Python高效编程技巧
 [转载]Latency Numbers Every Programmer Should Know
几道有意思的智力面试题
 单网卡多ip配置
 [转]Office visio 2007 打开后死机，提示关闭
 [转]C++的五种内存存储区
 [转]Linux性能测试 pmap命令
 利用wojilu框架仿一个网站的全过程（Step by Step利用wojilu框架开发网站系列序言）

原文地址：https://www.cnblogs.com/havePassed/p/3273005.html