▶ 函数 vloadn 和 vstoren 来实现全局存储器和局部存储器之间的向量拷贝
● 代码
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <cl.h> 4 5 const int nElement = 4096; 6 const char *programSource = " 7 __kernel void prog(__global int *A, __global int *B) 8 { 9 int idx = get_global_id(0); 10 int4 temp = vload4(idx, A); 11 vstore4(temp, idx, B); 12 return; 13 } 14 "; 15 16 int main() 17 { 18 const size_t datasize = sizeof(int) * nElement; 19 int i, *A, *B; 20 cl_int status; 21 22 A = (int*)malloc(datasize); 23 B = (int*)malloc(datasize); 24 for (i = 0; i < nElement; A[i] = i, B[i] = 0, i++); 25 26 cl_platform_id platform; 27 clGetPlatformIDs(1, &platform, NULL); 28 cl_device_id device; 29 clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); 30 cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &status); 31 cl_command_queue cmdQueue = clCreateCommandQueue(context, device, 0, &status); 32 cl_mem bufferA, bufferB; 33 bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status); 34 bufferB = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize, NULL, &status); 35 clEnqueueWriteBuffer(cmdQueue, bufferA, CL_FALSE, 0, datasize, A, 0, NULL, NULL); 36 cl_program program = clCreateProgramWithSource(context, 1, &programSource, NULL, &status); 37 status = clBuildProgram(program, 1, &device, NULL, NULL, NULL); 38 cl_kernel kernel = clCreateKernel(program, "prog", &status); 39 clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferA); 40 clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferB); 41 size_t globalSize[1] = { nElement }, localSize[1] = { 128 }; 42 status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, globalSize, localSize, 0, NULL, NULL); 43 clEnqueueReadBuffer(cmdQueue, bufferB, CL_TRUE, 0, datasize, B, 0, NULL, NULL); 44 45 for (i = 0; i < nElement; i++) 46 { 47 if (B[i] != i) 48 break; 49 } 50 printf("Output is %s. ", (i == nElement) ? "correct" : "incorrect"); 51 52 free(A); 53 free(B); 54 clReleaseContext(context); 55 clReleaseMemObject(bufferA); 56 clReleaseMemObject(bufferB); 57 clReleaseCommandQueue(cmdQueue); 58 clReleaseProgram(program); 59 clReleaseKernel(kernel); 60 getchar(); 61 return 0; 62 }
● 输出结果
Output is correct.
● 教训
■ 核函数代码中每个 "" 的后面不要有任何东西,包括空格。因为 "" 在预处理以后会消失,其后的内容会被当成下一行的内容,而空格会在IDE中使 "" 失效,导致编译错误
■ 核函数代码中不要有 "//" 型的行注释,理由类似。会使得 "//" 以后的代码全部失效
■ 可以改用字符串连接来写核函数代码,如:
1 const char *programSource = 2 "__kernel void prog(__global int *A, __global int *B) " 3 "{ " 4 " int idx = get_global_id(0); " 5 " int4 temp = vload4(idx, A); " 6 " vstore4(temp, idx, B); " 7 " return; " 8 "} " 9 "; "