zoukankan      html  css  js  c++  java
  • opencl(八)----clEnqueueNDRangeKernel、clEnqueueTask、工作组和工作项

    clEnqueueNDRangeKernel

    cl_int clEnqueueNDRangeKernel (    
            cl_command_queue command_queue,     //命令队列
         cl_kernel kernel,                                   //
         cl_uint work_dim,                                //数据的维度
         const size_t *global_work_offset,         // 各维度上的全局ID偏移量
         const size_t *global_work_size,     //各维度上的工作项数量
         const size_t *local_work_size,      // 各维度上一个工作组中工作项的数量
         cl_uint num_events_in_wait_list,
         const cl_event *event_wait_list,
         cl_event *event
    )

    clEnqueueTask

    cl_int clEnqueueTask (    
            cl_command_queue command_queue, //命令队列
         cl_kernel kernel,    //
         cl_uint num_events_in_wait_list,  
         const cl_event *event_wait_list,   
         cl_event *event
    )
    clEnqueueTask 和 clEnqueueNDRangeKernel的功能都是将核执行命令加入命令队列。而clEnqueueNDRangeKernel可以更好的划分数据,充分利用设备的资源

    工作组和工作项的特点:

    1、工作组中的工作项可以访问局部内存的同一块地址

    2、工作组中的工作项可以进行同步

    工作项相关函数

     工作组相关函数

     demo:

    // 核函数
    __kernel void id_check(__global float *output) { 
    
       /* Access work-item/work-group information */
       size_t global_id_0 = get_global_id(0);
       size_t global_id_1 = get_global_id(1);
       size_t global_size_0 = get_global_size(0);
       size_t offset_0 = get_global_offset(0);
       size_t offset_1 = get_global_offset(1);
       size_t local_id_0 = get_local_id(0);
       size_t local_id_1 = get_local_id(1);
    
       /* Determine array index */
       int index_0 = global_id_0 - offset_0;
       int index_1 = global_id_1 - offset_1;
       int index = index_1 * global_size_0 + index_0;
       
       /* Set float data */
       float f = global_id_0 * 10.0f + global_id_1 * 1.0f;
       f += local_id_0 * 0.1f + local_id_1 * 0.01f;
    
       output[index] = f;
    }
    #define _CRT_SECURE_NO_WARNINGS
    #define PROGRAM_FILE "id_check.cl"
    #define KERNEL_FUNC "id_check"
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    #ifdef MAC
    #include <OpenCL/cl.h>
    #else
    #include <CL/cl.h>
    #endif
    
    /* Find a GPU or CPU associated with the first available platform */
    // 获取平台  获取设备
    cl_device_id create_device() {
    
       cl_platform_id platform;
       cl_device_id dev;
       int err;
    
       /* Identify a platform */
       err = clGetPlatformIDs(1, &platform, NULL);
       if(err < 0) {
          perror("Couldn't identify a platform");
          exit(1);
       } 
    
       /* Access a device */
       err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &dev, NULL);
       if(err == CL_DEVICE_NOT_FOUND) {
          err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &dev, NULL);
       }
       if(err < 0) {
          perror("Couldn't access any devices");
          exit(1);   
       }
    
       return dev;
    }
    
    /* Create program from a file and compile it */
    // 创建 cl_program  编译cl_program
    cl_program build_program(cl_context ctx, cl_device_id dev, const char* filename) {
    
       cl_program program;
       FILE *program_handle;
       char *program_buffer, *program_log;
       size_t program_size, log_size;
       int err;
    
       /* Read program file and place content into buffer */
       program_handle = fopen(filename, "r");
       if(program_handle == NULL) {
          perror("Couldn't find the program file");
          exit(1);
       }
       fseek(program_handle, 0, SEEK_END);
       program_size = ftell(program_handle);
       rewind(program_handle);
       program_buffer = (char*)malloc(program_size + 1);
       program_buffer[program_size] = '';
       fread(program_buffer, sizeof(char), program_size, program_handle);
       fclose(program_handle);
    
       /* Create program from file */
       program = clCreateProgramWithSource(ctx, 1, 
          (const char**)&program_buffer, &program_size, &err);
       if(err < 0) {
          perror("Couldn't create the program");
          exit(1);
       }
       free(program_buffer);
    
       /* Build program */
       err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
       if(err < 0) {
    
          /* Find size of log and print to std output */
          clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 
                0, NULL, &log_size);
          program_log = (char*) malloc(log_size + 1);
          program_log[log_size] = '';
          clGetProgramBuildInfo(program, dev, CL_PROGRAM_BUILD_LOG, 
                log_size + 1, program_log, NULL);
          printf("%s
    ", program_log);
          free(program_log);
          exit(1);
       }
    
       return program;
    }
    
    int main() {
    
       /* OpenCL data structures */
       cl_device_id device;
       cl_context context;
       cl_command_queue queue;
       cl_program program;
       cl_kernel kernel;
       cl_int i, err;
    
       /* Data and buffers */
       size_t dim = 2;
       size_t global_offset[] = {3, 5};
       size_t global_size[] = {6, 4};
       size_t local_size[] = {3, 2};
       float test[24];      
       cl_mem test_buffer;
    
       /* Create a device and context */
       // 获取设备
       device = create_device();
       // 获取上下文
       context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
       if(err < 0) {
          perror("Couldn't create a context");
          exit(1);   
       }
    
       /* Build the program and create a kernel */
       // 获取编译后的 cl_program
       program = build_program(context, device, PROGRAM_FILE);
       // 创建核
       kernel = clCreateKernel(program, KERNEL_FUNC, &err);
       if(err < 0) {
          perror("Couldn't create a kernel");
          exit(1);   
       };
    
       /* Create a write-only buffer to hold the output data */
       // 创建 cl_mem
       test_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
             sizeof(test), NULL, &err);
       if(err < 0) {
          perror("Couldn't create a buffer");
          exit(1);   
       };
    
       /* Create kernel argument */
       // 设置核参数
       err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buffer);
       if(err < 0) {
          perror("Couldn't set a kernel argument");
          exit(1);   
       };
    
       /* Create a command queue */
       // 创建命令队列
       queue = clCreateCommandQueue(context, device, 0, &err);
       if(err < 0) {
          perror("Couldn't create a command queue");
          exit(1);   
       };
    
       /* Enqueue kernel */
       //Enqueues a command to execute a kernel on a device.
       /**
        *
        cl_int clEnqueueNDRangeKernel ( cl_command_queue command_queue,
          cl_kernel kernel,
          cl_uint work_dim,
          const size_t *global_work_offset,
          const size_t *global_work_size,
          const size_t *local_work_size,
          cl_uint num_events_in_wait_list,
          const cl_event *event_wait_list,
          cl_event *event)
        * **/
        // dim =2
        //size_t global_offset[] = {3, 5};
        //size_t global_size[] = {6, 4};
        //size_t local_size[] = {3, 2};
       err = clEnqueueNDRangeKernel(queue, kernel, dim, global_offset,
             global_size, local_size, 0, NULL, NULL);
       if(err < 0) {
          perror("Couldn't enqueue the kernel");
          exit(1);   
       }
    
    
       /* Read and print the result */
       // 从设备中读取结果
       err = clEnqueueReadBuffer(queue, test_buffer, CL_TRUE, 0, 
          sizeof(test), &test, 0, NULL, NULL);
       if(err < 0) {
          perror("Couldn't read the buffer");
          exit(1);   
       }
    
       for(i=0; i<24; i+=6) {
          printf("%.2f     %.2f     %.2f     %.2f     %.2f     %.2f
    ", 
             test[i], test[i+1], test[i+2], test[i+3], test[i+4], test[i+5]);
       }
    
       /* Deallocate resources */
       clReleaseMemObject(test_buffer);
       clReleaseKernel(kernel);
       clReleaseCommandQueue(queue);
       clReleaseProgram(program);
       clReleaseContext(context);
       return 0;
    }
  • 相关阅读:
    分形与数据结构第一篇(神奇的色子)
    画图小工具第二篇
    画图小工具第一篇
    图形界面第一篇
    回合制对战游戏第二篇
    回合对战制游戏第一篇(初识java)
    技术+态度+人品
    排序的一些方法(稳定性,内外排序,时间空间复杂度)
    暂时性死区
    vue传值(父子传值,非父子传值)
  • 原文地址:https://www.cnblogs.com/feihu-h/p/12084747.html
Copyright © 2011-2022 走看看