zoukankan      html  css  js  c++  java
  • OpenCL 管道

    ▶ 按书上写的管道的代码,需要使用 OpenCL2.0 的平台和设备,目前编译不通过,暂时不知道是什么问题,先把代码堆上来,以后换了新的设备再说

    ● 程序主要功能:用主机上的数组 srcHost 创建设备缓冲区 src,调用核函数 pipeProducer 将 src 分装到管道中,再调用核函数 pipeConsumer 将管道中的数据读到设备缓冲区 dst 中,最后拷贝回主机数组 dstHost 中检查结果。

    ● 代码

     1 //pipe.cl
     2 __kernel void pipeProducer(__global float *src, __write_only pipe float outPipe)
     3 {
     4     int gid = get_global_id(0);
     5     float srcPipe = src[gid];
     6     reserve_id_t resID = reserve_write_pipe(outPipe, 1);
     7     if (is_valid_reserve_id(resID))
     8     {
     9         if (write_pipe(outPipe, resID, 0, &srcPipe) != 0)
    10             return;
    11         commit_write_pipe(outPipe, resID);
    12     }
    13 }
    14 
    15 __kernel void pipeConsumer(__global float *dst, __read_only pipe float inPipe)
    16 {
    17     int gid = get_global_id(0);
    18     float dstPipe;
    19     reserve_id_t resID = reserve_read_pipe(inPipe, 1);
    20     if (is_valid_reserve_id(resID))
    21     {
    22         if (read_pipe(inPipe, resID, 0, &dstPipe) != 0)
    23             return;
    24         commit_read_pipe(inPipe, resID);
    25     }
    26     dst[gid] = dstPipe;
    27 }
      1 //main.c
      2 #include <stdio.h>  
      3 #include <stdlib.h>  
      4 #include <cl.h>
      5 
      6 const char *sourceCode = "D:/Code/pipe.cl";
      7 
      8 char* readSource(const char* kernelPath)// 读取文本文件,存储为 char *
      9 {
     10     FILE *fp;
     11     char *source;
     12     long int size;
     13     //printf("readSource, Program file: %s
    ", kernelPath);
     14     fopen_s(&fp, kernelPath, "rb");
     15     if (!fp)
     16     {
     17         printf("Open kernel file failed
    ");
     18         exit(-1);
     19     }
     20     if (fseek(fp, 0, SEEK_END) != 0)
     21     {
     22         printf("Seek end of file faildd
    ");
     23         exit(-1);
     24     }
     25     if ((size = ftell(fp)) < 0)
     26     {
     27         printf("Get file position failed
    ");
     28         exit(-1);
     29     }
     30     rewind(fp);
     31     if ((source = (char *)malloc(size + 1)) == NULL)
     32     {
     33         printf("Allocate space failed
    ");
     34         exit(-1);
     35     }
     36     fread(source, 1, size, fp);
     37     fclose(fp);
     38     source[size] = '';
     39     return source;
     40 }
     41 
     42 int main()
     43 {
     44     const int nPacket = 1024, dataSize = nPacket * sizeof(float);
     45     char info[1024] = { 0 };
     46     int i;
     47 
     48     // 初始化平台
     49     cl_int status;    
     50     cl_platform_id platform;
     51     clGetPlatformIDs(1, &platform, NULL);    
     52     cl_device_id device;
     53     clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
     54     cl_context_properties contextProp[] = { CL_CONTEXT_PLATFORM,(cl_context_properties)(platform), 0 };
     55     cl_context context = clCreateContext(contextProp, 1, &device, NULL, contextProp, &status);
     56     cl_command_queue queue = clCreateCommandQueueWithProperties(context, device, NULL, &status);    
     57     cl_event eventProducer, eventConsumer; 
     58 
     59     const char* source = readSource(sourceCode);
     60     cl_program program = clCreateProgramWithSource(context, 1, &source, NULL, &status);    
     61     status = clBuildProgram(program, 1, &device, "-w -g –cl-std=CL2.0", NULL, NULL);
     62 
     63     clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 1024, info, NULL);
     64     printf("Build log:
    %s
    ", info);
     65 
     66     cl_kernel kernelProducer = clCreateKernel(program, "pipeProducer", &status);
     67     cl_kernel kernelConsumer = clCreateKernel(program, "pipeConsumer", &status);
     68     size_t globalSize = nPacket, localSize = 128;
     69 
     70     float *srcHost = (float *)malloc(dataSize);
     71     float *dstHost = (float *)malloc(dataSize);
     72     for (i = 0; i < nPacket; srcHost[i] = i, dstHost[i] = 0.0f, i++);
     73         
     74     cl_mem src, dst;
     75     src = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, dataSize, srcHost, &status);
     76     dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize, NULL, &status);
     77    
     78     cl_mem pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(float), nPacket, NULL, &status);
     79 
     80     clSetKernelArg(kernelProducer, 0, sizeof(cl_mem),src);
     81     clSetKernelArg(kernelProducer, 1, sizeof(cl_mem), &pipe);
     82 
     83     clSetKernelArg(kernelProducer, 0, sizeof(cl_mem), dst);
     84     clSetKernelArg(kernelProducer, 1, sizeof(cl_mem), &pipe);
     85     
     86     clEnqueueNDRangeKernel(queue, kernelProducer, 1, NULL, &globalSize, &localSize, 0, NULL, &eventProducer);
     87     clEnqueueNDRangeKernel(queue, kernelConsumer, 1, NULL, &globalSize, &localSize, 1, &eventProducer, &eventConsumer);        
     88     clEnqueueReadBuffer(queue, dst, CL_TRUE, dataSize, dataSize, dstHost, 1, &eventConsumer, NULL);
     89     clFinish(queue);
     90 
     91     for (i = 0; i < nPacket; i++)
     92     {
     93         if (dstHost[i] != i)
     94             break;
     95     }
     96     printf("Output is %s.
    ", (i == nPacket) ? "correct" : "incorrect");
     97 
     98     free(srcHost);
     99     free(dstHost);
    100     clReleaseContext(context);    
    101     clReleaseCommandQueue(queue);
    102     clReleaseProgram(program);
    103     clReleaseKernel(kernelProducer);
    104     clReleaseKernel(kernelConsumer);
    105     getchar();
    106     return 0;
    107 }

    ● 输出结果

    ■ 使用编译参数 "-w -g –cl-std=CL2.0" 时返回 status 为 -43(CL_INVALID_BUILD_OPTIONS),不使用参数 "–cl-std=CL2.0" 的情况下返回 -11(CL_BUILD_PROGRAM_FAILURE),麻烦的是调用函数 clGetProgramBuildInfo 查询编译日志 info 始终都是空的,不知道出了什么问题。

    ■ 转机,去掉了修饰符 __write_only 和 __read_only(只用于图像类型的缓冲区),返回 status 为 -11,至少报错信息有了:【identifier "pipe" is undefined】和【invalid combination of type specifiers】(指在 float 上)

    ● 后续代码,但是上述代码都编译不了,下面的也暂时没用。(1)使用局部内存来统一工作组的写入

     1 //pipe2.cl
     2 __kernel void pipeProducer(__global float *src, __write_only pipe float outPipe)
     3 {
     4     int gid = get_global_id(0), lid = get_local_id(0);
     5     __local reserve_id_t resID;
     6     if (lid == 0)
     7         resID = reserve_write_pipe(outPipe, get_local_size(0)); // 工作组中首个工作项一次预定多个管道位置
     8     barrier(CLK_LOCAL_MEM_FENCE);
     9 
    10     float srcPipe = src[id];
    11     if (is_valid_reserve_id(resID))
    12     {
    13         if (write_pipe(outPipe, resID, lid, &srcPipe) != 0)     // 每个工作项写入预定的位置
    14             return;
    15         commit_write_pipe(outPipe, resID);
    16     }
    17 }
    18 
    19 __kernel void pipeConsumer(__global float *dst, __read_only pipe float inPipe)
    20 {
    21     int gid = get_global_id(0), lid = get_local_id(0);    
    22     __local reserve_id_t resID;
    23     if (lid == 0)
    24         resID = reserve_read_pipe(inPipe, get_local_size(0));
    25     barrier(CLK_LOCAL_MEM_FENCE);
    26     
    27     float dstPipe;
    28     if (is_valid_reserve_id(resID))
    29     {
    30         if (read_pipe(inPipe, resID, lid, &dstPipe) != 0)
    31             return;
    32         commit_read_pipe(inPipe, resID);
    33     }
    34     dst[gid] = dstPipe;
    35 }

    ● (2)使用工作组管道操作简化上述代码(只是干掉了一个 if 和一个同步)

     1 //pipe3.cl
     2 __kernel void pipeProducer(__global float *src, __write_only pipe float outPipe)
     3 {
     4     int gid = get_global_id(0), lid = get_local_id(0);
     5     __local reserve_id_t resID = work_group_reserve_write_pipe(outPipe, get_local_size(0));// 自带分支和同步
     6 
     7     float srcPipe = src[id];
     8     if (is_valid_reserve_id(resID))
     9     {
    10         if (write_pipe(outPipe, resID, lid, &srcPipe) != 0)
    11             return;
    12         commit_write_pipe(outPipe, resID);
    13     }
    14 }
    15 
    16 __kernel void pipeConsumer(__global float *dst, __read_only pipe float inPipe)
    17 {
    18     int gid = get_global_id(0), lid = get_local_id(0);    
    19     __local reserve_id_t resID = work_group_reserve_read_pipe(inPipe, get_local_size(0));    
    20     
    21     float dstPipe;
    22     if (is_valid_reserve_id(resID))
    23     {
    24         if (read_pipe(inPipe, resID, lid, &dstPipe) != 0)
    25             return;
    26         commit_read_pipe(inPipe, resID);
    27     }
    28     dst[gid] = dstPipe;
    29 }

     ● 书上原本的主函数的内容(关于数据缓冲区的部分),是用虚拟内存写的,由于办公室的电脑不支持,上面的代码中被我换成了普通缓冲区

     1     float *src = (float *)clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, dataSize, 0);
     2     float *dst = (float *)clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, dataSize, 0);
     3     if (src == NULL || dst == NULL)
     4     {
     5         printf("clSVMAlloc failed!
    ");
     6         getchar();
     7         return 0;
     8     }
     9 
    10     clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_WRITE, src, dataSize, 0, NULL, NULL);
    11     for (i = 0; i < nPacket; i++)
    12         src[i] = i, dst[i] = 0.0f;
    13     clEnqueueSVMUnmap(queue, src, 0, NULL, NULL);
    14 
    15     cl_mem pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(float), nPacket, NULL, &status);
    16 
    17     clSetKernelArgSVMPointer(kernelProducer, 0, src);
    18     clSetKernelArg(kernelProducer, 1, sizeof(cl_mem), &pipe);
    19 
    20     clSetKernelArgSVMPointer(kernelProducer, 0, dst);
    21     clSetKernelArg(kernelProducer, 1, sizeof(cl_mem), &pipe);
    22     
    23     clEnqueueNDRangeKernel(queue, kernelProducer, 1, NULL, &globalSize, &localSize, 0, NULL, &eventProducer);
    24     clEnqueueNDRangeKernel(queue, kernelConsumer, 1, NULL, &globalSize, &localSize, 1, &eventProducer,NULL);
    25     clFinish(queue);
    26     
    27     clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, dst, dataSize, 0, NULL, NULL);    
    28     for (i = 0; i < nPacket; i++)
    29     {
    30         if (dst[i] != i)
    31             break;
    32     }
    33     printf("Output is %s.
    ", (i == nPacket) ? "correct" : "incorrect");
    34     clEnqueueSVMUnmap(queue, dst, 0, NULL, NULL);
  • 相关阅读:
    Unity3D移动端海水的实时绘制
    NGUI 3.x 深度管理及渲染优化
    【入门】从学生到成熟:游戏模块设计起步之抽象思维 (转)
    正弦波近似 http://blog.csdn.net/ring0hx/article/details/44492415
    Stack 栈 ----Queue 队列
    ORM
    CBV&FBV
    Django路由系统
    CRM
    深浅拷贝
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/9038655.html
Copyright © 2011-2022 走看看