zoukankan      html  css  js  c++  java
  • OpenCL 矢量存取

    ▶ 函数 vloadn 和 vstoren 来实现全局存储器和局部存储器之间的向量拷贝

    ● 代码

     1 #include <stdio.h>  
     2 #include <stdlib.h>
     3 #include <cl.h>
     4 
     5 const int nElement = 4096;
     6 const char *programSource = "                           
     7 __kernel void prog(__global int *A, __global int *B)    
     8 {                                                       
     9     int idx = get_global_id(0);                         
    10     int4 temp = vload4(idx, A);                         
    11     vstore4(temp, idx, B);                              
    12     return;                                             
    13 }                                                       
    14 ";
    15 
    16 int main()
    17 {
    18     const size_t datasize = sizeof(int) * nElement;
    19     int i, *A, *B;
    20     cl_int status;
    21 
    22     A = (int*)malloc(datasize);
    23     B = (int*)malloc(datasize);
    24     for (i = 0; i < nElement; A[i] = i, B[i] = 0, i++);
    25 
    26     cl_platform_id platform;
    27     clGetPlatformIDs(1, &platform, NULL);
    28     cl_device_id device;
    29     clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);
    30     cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &status);
    31     cl_command_queue cmdQueue = clCreateCommandQueue(context, device, 0, &status);
    32     cl_mem bufferA, bufferB;
    33     bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status);
    34     bufferB = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize, NULL, &status);
    35     clEnqueueWriteBuffer(cmdQueue, bufferA, CL_FALSE, 0, datasize, A, 0, NULL, NULL);
    36     cl_program program = clCreateProgramWithSource(context, 1, &programSource, NULL, &status);
    37     status = clBuildProgram(program, 1, &device, NULL, NULL, NULL);    
    38     cl_kernel kernel = clCreateKernel(program, "prog", &status);
    39     clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferA);
    40     clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferB);
    41     size_t globalSize[1] = { nElement }, localSize[1] = { 128 };
    42     status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, globalSize, localSize, 0, NULL, NULL);
    43     clEnqueueReadBuffer(cmdQueue, bufferB, CL_TRUE, 0, datasize, B, 0, NULL, NULL);
    44 
    45     for (i = 0; i < nElement; i++)
    46     {
    47         if (B[i] != i)
    48             break;
    49     }
    50     printf("Output is %s.
    ", (i == nElement) ? "correct" : "incorrect");
    51 
    52     free(A);
    53     free(B);
    54     clReleaseContext(context);
    55     clReleaseMemObject(bufferA);
    56     clReleaseMemObject(bufferB);
    57     clReleaseCommandQueue(cmdQueue);
    58     clReleaseProgram(program);
    59     clReleaseKernel(kernel);
    60     getchar();
    61     return 0;
    62 }

    ● 输出结果

    Output is correct.

    ● 教训

    ■ 核函数代码中每个 "" 的后面不要有任何东西,包括空格。因为 "" 在预处理以后会消失,其后的内容会被当成下一行的内容,而空格会在IDE中使 "" 失效,导致编译错误
    ■ 核函数代码中不要有 "//" 型的行注释,理由类似。会使得 "//" 以后的代码全部失效
    ■ 可以改用字符串连接来写核函数代码,如:

    1 const char *programSource =
    2 "__kernel void prog(__global int *A, __global int *B)   
    "
    3 "{                                                      
    "
    4 "    int idx = get_global_id(0);                        
    "
    5 "    int4 temp = vload4(idx, A);                        
    "
    6 "    vstore4(temp, idx, B);                             
    "
    7 "    return;                                            
    "
    8 "}                                                      
    "
    9 ";                                                      
    "
  • 相关阅读:
    Java开发中的23种设计模式详解
    Zookeeper基本知识
    Zookeeper命令
    ZooKeeper原理及使用
    几种java通信(rmi,http,hessian,webservice)协议性能比较
    linux 查看系统信息命令
    Zookeeper安装和配置
    Hive 接口介绍(Web UI/JDBC)
    窗口标志-外观
    pyqt5-QWidget-窗口状态(最大化最小化等)
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/8931357.html
Copyright © 2011-2022 走看看