▶ 按书上写的设备队列的代码,需要 OpenCL2.0 的平台和设备,先把代码堆上来
● 程序主要功能:用主机上的数组 Ahost 和 Bhost 创建设备缓冲区 Adevice 和 Bdevice,调用核函数 foo 及其子核函数 fooChild 计算 factor * Adevice .* Bdevice,结果写入 Cdevice,最后拷贝回主机数组 Chost 检查结果。
● 代码
1 //deviceQueue.cl 2 __kernel void fooChild(const int nElement, const float factor, 3 __global const float *A, __global const float *B, __global float *C) 4 { 5 uint gid = get_global_id(0); 6 if (gid < nElement) 7 C[gid] = factor * A[gid] * B[gid]; 8 } 9 10 __kernel void foo(const int nElement, const float factor, 11 __global const float *A, __global const float *B, __global float *C) 12 { 13 uint gid = get_global_id(0), gsize = get_global_size(0); 14 uint childGsize = nElement / gsize, childOffset = gid * childGsize; 15 16 __global const float *Achild = &A[childOffset]; 17 __global const float *Bchild = &B[childOffset]; 18 __global const float *Cchild = &C[childOffset]; 19 20 queue_t defQ = get_default_queue(); 21 ndrange_t ndrange = ndrange_1D(childGsize); 22 void(^fooChildWrapper)(void) = ^{ fooChild(childGsize, factor, Achild, Bchild, Cchild); }; 23 enqueue_kernel(defQ, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, saxpyDpChildWrapper); 24 }
1 //main.c 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <cl.h> 5 6 const char *sourceCode = "D:/Code/deviceQueue.cl"; 7 8 char* readSource(const char* kernelPath)// 读取文本文件,存储为 char * 9 { 10 FILE *fp; 11 char *source; 12 long int size; 13 //printf("readSource, Program file: %s ", kernelPath); 14 fopen_s(&fp, kernelPath, "rb"); 15 if (!fp) 16 { 17 printf("Open kernel file failed "); 18 exit(-1); 19 } 20 if (fseek(fp, 0, SEEK_END) != 0) 21 { 22 printf("Seek end of file faildd "); 23 exit(-1); 24 } 25 if ((size = ftell(fp)) < 0) 26 { 27 printf("Get file position failed "); 28 exit(-1); 29 } 30 rewind(fp); 31 if ((source = (char *)malloc(size + 1)) == NULL) 32 { 33 printf("Allocate space failed "); 34 exit(-1); 35 } 36 fread(source, 1, size, fp); 37 fclose(fp); 38 source[size] = '