zoukankan      html  css  js  c++  java
  • OpenCL 三种内存对象的使用

    ▶ 包括带有 CL_MEM_READ_ONLY,CL_MEM_WRITE_ONLY,CL_MEM_READ_WRITE 标识的显示拷贝(函数 clEnqueueWriteBuffer 和 clEnqueueReadBuffer);带有 CL_MEM_COPY_HOST_PTR 标识的隐式拷贝(不用拷贝函数,在设备上直接使用);以及使用函数 clEnqueueMapBuffer 直接在设备和主机之间映射(转换)一段内存的指针

    ● 代码

     1 #include <cl.h>
     2 #include <stdio.h>
     3 #include <stdlib.h>
     4 #include <time.h>
     5 #include <iostream>
     6 
     7 using namespace std;
     8 const int nElement = 2048;
     9 
    10 const char *programSource = "                                              
    11 __kernel void vectorAdd(__global int *A, __global int *B, __global int *C) 
    12 {                                                                          
    13     int idx = get_global_id(0);                                            
    14     C[idx] = A[idx] + B[idx];                                              
    15     return;                                                                
    16 }                                                                          
    17 ";
    18 
    19 int main(int argc, char* argv[])
    20 {
    21     const size_t dataSize = sizeof(int) * nElement;
    22     int i, *A, *B, *C, *returnC;
    23 
    24     A = (int *)malloc(dataSize);
    25     B = (int *)malloc(dataSize);
    26     C = (int *)malloc(dataSize);
    27     for (srand((unsigned)time(NULL)), i = 0; i < nElement; A[i] = rand() % 65535, B[i] = rand() % 65535, C[i] = A[i] + B[i], i++);
    28 
    29     cl_int status;
    30     cl_platform_id platform;
    31     clGetPlatformIDs(1, &platform, NULL);
    32     cl_device_id device;
    33     clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
    34     cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &status);
    35     cl_command_queue queue = clCreateCommandQueue(context, device, 0, &status);
    36     cl_program program = clCreateProgramWithSource(context, 1, &programSource, NULL, &status);
    37     clBuildProgram(program, 1, &device, NULL, NULL, NULL);
    38     cl_kernel kernel = clCreateKernel(program, "vectorAdd", NULL);
    39     cl_event writeEvent, runEvent, mapEvent;
    40 
    41     //创建三个内存对象,把 A 隐式拷贝到 clA,把 B 显示拷贝到 clB,clC 接收计算结果映射给 returnC
    42     cl_mem clA = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, dataSize, A, NULL);
    43     cl_mem clB = clCreateBuffer(context, CL_MEM_READ_ONLY, dataSize, NULL, NULL);
    44     cl_mem clC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize, NULL, NULL);
    45 
    46     clEnqueueWriteBuffer(queue, clB, 1, 0, dataSize, B, 0, 0, &writeEvent);
    47 
    48     clFlush(queue);// 出现需要等待的事件之前,把队列中的任务全部提交掉    
    49     clWaitForEvents(1, &writeEvent);// 等待指定事件完成
    50 
    51                                     // 执行内核
    52     size_t global_work_size = nElement;
    53     clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&clA);
    54     clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&clB);
    55     clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&clC);
    56     clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, &runEvent);
    57     clFlush(queue);
    58     clWaitForEvents(1, &runEvent);
    59 
    60     // 结果拷回
    61     returnC = (cl_int *)clEnqueueMapBuffer(queue, clC, CL_TRUE, CL_MAP_READ, 0, dataSize, 0, NULL, &mapEvent, NULL);
    62     clFlush(queue);
    63     clWaitForEvents(1, &mapEvent);
    64 
    65     //结果验证
    66     printf("Verify %s.
    ", !memcmp(C, returnC, dataSize) ? "passed" : "failed");// 定义在 iostream
    67 
    68     free(C);
    69     free(A);
    70     free(B);
    71     clReleaseMemObject(clA);
    72     clReleaseMemObject(clB);
    73     clReleaseMemObject(clC);
    74     clReleaseContext(context);
    75     clReleaseCommandQueue(queue);
    76     clReleaseProgram(program);
    77     clReleaseEvent(writeEvent);
    78     clReleaseEvent(runEvent);
    79     clReleaseEvent(mapEvent);
    80     getchar();
    81     return 0;
    82 }

    ● 输出结果

    Verify passed.

    ● 注意

    ■ 事件在声明以后一定要通过某个函数的  cl_event * /* event */  参数定义它,才能进行 clWaitForEvents 或 clReleaseEvent,否则会报错

    ● 使用 CL_MEM_COPY_HOST_PTR 和 CL_MEM_USE_HOST_PTR 的区别,前者创建一个独立的缓冲区,只是使用了 host_ptr 的值来初始化,后续使用过程中主机端对 host_ptr 的修改不会影响到缓冲区的内容;后者直接使用 host_ptr (转化为设备指针来使用),后续使用过程中主机端对 host_ptr 的修改仍会影响缓冲区内容

    ● 代码

     1 #include <cl.h>
     2 #include <stdio.h>
     3 #include <stdlib.h>
     4 
     5 using namespace std;
     6 const int nElement = 2048;
     7 
     8 const char *programSource = "               
     9 __kernel void vectorAdd(__global int *A)    
    10 {                                           
    11     return;                                 
    12 }                                           
    13 ";
    14 
    15 int main(int argc, char* argv[])
    16 {   
    17     const size_t dataSize = sizeof(int) * nElement;
    18     int i, sum, *A, *returnA, *mapReturnA;
    19             
    20     A = (int *)malloc(dataSize);        
    21     returnA = (int *)malloc(dataSize);
    22     //mapReturnA = (int *)malloc(dataSize);// 注意用作内存映射的指针不需要申请内存,使用这个表达式会造成结尾处的运行时错误
    23     for (i = 0; i < nElement; A[i++] = 1);
    24 
    25     cl_int status;
    26     cl_platform_id platform;
    27     clGetPlatformIDs(1, &platform, NULL);
    28     cl_device_id device;
    29     clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
    30     cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &status);
    31     cl_command_queue queue = clCreateCommandQueue(context, device, 0, &status);
    32     cl_program program = clCreateProgramWithSource(context, 1, &programSource, NULL, &status);
    33     status = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
    34     cl_kernel kernel = clCreateKernel(program, "vectorAdd", &status);
    35     
    36     cl_mem clA = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, dataSize, A, NULL);
    37     //cl_mem clA = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, dataSize, A, NULL);
    38     
    39     for (i = 0; i < nElement; A[i++]++);// 在创建缓冲区以后再修改 A 的值
    40         
    41     size_t global_work_size = nElement; 
    42     clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&clA);
    43     clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);    
    44     
    45     clEnqueueReadBuffer(queue, clA, CL_TRUE, 0, dataSize, returnA, 0, NULL, NULL);
    46     mapReturnA = (cl_int *)clEnqueueMapBuffer(queue, clA, CL_TRUE, CL_MAP_READ, 0, dataSize, 0, NULL, NULL, &status);   
    47 
    48     for (i = sum = 0; i < nElement; sum += A[i++]);
    49     printf("sum A = %d
    ", sum);
    50     for (i = sum = 0; i < nElement; sum += returnA[i++]);
    51     printf("sum returnA = %d
    ", sum);
    52     for (i = sum = 0; i < nElement; sum += mapReturnA[i++]);
    53     printf("sum mapReturnA = %d
    ", sum);
    54 
    55     free(A);    
    56     free(returnA);        
    57     clReleaseMemObject(clA);    
    58     clReleaseContext(context);
    59     clReleaseCommandQueue(queue);
    60     clReleaseProgram(program);
    61     getchar();
    62     return 0;
    63 }

    ● 输出结果

    ■ 使用 CL_MEM_COPY_HOST_PTR,仅有主机端的值被修改

    sum A = 4096
    sum returnA = 2048
    sum mapReturnA = 2048

    ■ 使用 CL_MEM_USE_HOST_PTR,设备端使用的缓冲区也遭到了修改

    sum A = 4096
    sum returnA = 4096
    sum mapReturnA = 4096
  • 相关阅读:
    笛卡尔树学习笔记
    图论基础(自认为很全)
    我的博客在这里
    C++学习资料
    test
    个人介绍
    CF1153F Serval and Bonus Problem
    【ZJOI2020】抽卡
    【LOJ】小 Q 的序列
    我的个人博客:https://xyix.github.io
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/8964534.html
Copyright © 2011-2022 走看看