zoukankan      html  css  js  c++  java
  • OpenCL 存储器次序的验证

    ▶ 《OpenCL异构并行编程实战》P224 的代码,先放上来,坐等新设备到了再执行

     1 //kernel.cl
     2 __global volatile atomic_int globalAtom = ATOMIC_VAR_INIT(0);   // 全局原子对象
     3 __kernel void memoryOrderTest01(__global int *dst)
     4 {
     5     __local volatile atomic_int localAtom;                      // 本地原子对象   
     6     atomic_init(&localAtom, 0);
     7     const int gid = get_global_id(0);
     8     work_group_barrier(CLK_LOCAL_MEM_FENCE);
     9     if (gid == 0)                                               // 0 号工作项尝试写入 1
    10     {
    11         atomic_store_explicit(&localAtom, 1, memory_order_seq_cst, memory_scope_work_group);
    12         atomic_store_explicit(&globalAtom, 1, memory_order_seq_cst, memory_scope_device);
    13     }
    14     //atomic_work_item_fence(CLK_LOCAL_MEM_FENCE, memory_order_acq_rel, memory_scope_work_group);
    15     if (gid == 64)
    16     {
    17         int a, count;
    18         for (a = 0, count = 1; a == 0 && count < 10000; count++)
    19             a = atomic_load_explicit(&localAtom, memory_order_seq_cst, memory_scope_work_group);
    20         dst[0] = !!a;
    21         dst[2] = count;
    22         for (count = 1; a == 0 && count < 10000; count++)
    23             a = atomic_load_explicit(&globalAtom, memory_order_seq_cst, memory_scope_device);
    24         dst[1] = !!a;
    25     }
    26     work_group_barrier(0);// 必须添加,将 0 号工作项的副作用暴露给其他工作项
    27 }
    28 
    29 __kernel void memoryOrderTest02(__global int *dst)
    30 {
    31     __local volatile atomic_int localAtom;
    32     atomic_init(&localAtom, 0);
    33     const int gid = get_global_id(0);
    34     work_group_barrier(CLK_LOCAL_MEM_FENCE);
    35     if (gid == 0)
    36     {
    37         atomic_store(&localAtom, 1);
    38         atomic_store(&globalAtom, 1);
    39     }
    40     //atomic_work_item_fence(CLK_LOCAL_MEM_FENCE, memory_order_acq_rel, memory_scope_work_group);
    41     if (gid == 64)
    42     {
    43         int a, count;
    44         for (a = 0, count = 1; a == 0 && count < 10000; count++)
    45             a = atomic_load(&localAtom);
    46         dst[0] = !!a;
    47         dst[2] = count;
    48         for (count = 1; a == 0 && count < 10000; count++)
    49             a = atomic_load(&globalAtom);
    50         dst[1] = !!a;
    51     }
    52     work_group_barrier(0);
    53 }
     1 //main.c
     2 #include <stdio.h>  
     3 #include <stdlib.h>  
     4 #include <cl.h>
     5 
     6 const char *sourceCode = "D:/Code/kernel.cl";
     7 
     8 int readSource(const char* kernelPath, char *source)// 读取文本文件,存储为 char *,返回代码长度
     9 {
    10     FILE *fp;
    11     long int size;
    12     //printf("readSource, Program file: %s
    ", kernelPath);
    13     fopen_s(&fp, kernelPath, "rb");
    14     if (!fp)
    15     {
    16         printf("Open kernel file failed
    ");
    17         exit(-1);
    18     }
    19     if (fseek(fp, 0, SEEK_END) != 0)
    20     {
    21         printf("Seek end of file faildd
    ");
    22         exit(-1);
    23     }
    24     if ((size = ftell(fp)) < 0)
    25     {
    26         printf("Get file position failed
    ");
    27         exit(-1);
    28     }
    29     rewind(fp);
    30     if ((source = (char *)malloc(size + 1)) == NULL)
    31     {
    32         printf("Allocate space failed
    ");
    33         exit(-1);
    34     }
    35     fread(source, 1, size, fp);
    36     fclose(fp);
    37     source[size] = '';
    38     return size + 1;
    39 }
    40 
    41 int main()
    42 {
    43     const int nElement = 2048, dataSize = nElement * sizeof(float);
    44     int i, host[nElement] = { 0 };
    45     char info[1024];
    46 
    47     // 初始化平台
    48     cl_int status;
    49     cl_platform_id platform;
    50     clGetPlatformIDs(1, &platform, NULL);
    51     cl_device_id device[2];
    52     clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, device, NULL);
    53     cl_context_properties contextProp[] = { CL_CONTEXT_PLATFORM,(cl_context_properties)(platform), 0 };
    54     cl_context context = clCreateContext(contextProp, 1, device, NULL, contextProp, &status);
    55     cl_command_queue_properties queueProp = 0;// useless
    56     cl_command_queue queue = clCreateCommandQueueWithProperties(context, device[0], NULL, &status);    
    57 
    58     cl_mem buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize, NULL, &status);
    59 
    60     char *source;
    61     size_t sourceLength = readSource(sourceCode, source);
    62     cl_program program = clCreateProgramWithSource(context, 1, &source, &sourceLength, &status);
    63     status = clBuildProgram(program, 1, device, "-cl-std=CL2.0", NULL, NULL);
    64     if (status)
    65     {
    66         clGetProgramBuildInfo(program, device[0], CL_PROGRAM_BUILD_LOG, 1024, info, NULL);
    67         printf("Build log:
    %s
    ", info);
    68     }
    69     cl_kernel kernel = clCreateKernel(program, "memoryOrderTest", &status);            
    70     clSetKernelArg(kernel, 0, sizeof(cl_mem), buffer);
    71     size_t globalSize = nElement, localSize = 256;
    72     clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize, 0, NULL, NULL);
    73     clFinish(queue);
    74 
    75     clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, dataSize, host, 0, NULL, NULL);
    76     
    77     printf("Local memory result: %d, global memory result: %d, waiting count: %d
    ", host[0], host[1], host[2]);
    78        
    79     clReleaseContext(context);
    80     clReleaseCommandQueue(queue);
    81     clReleaseProgram(program);    
    82     clReleaseKernel(kernel);
    83     clReleaseMemObject(buffer);
    84     getchar();
    85     return 0;
    86 }
  • 相关阅读:
    element_2对话框
    填报
    润乾报表中进度条的一种实现方式
    列名作为分类值时如何画出统计图
    填报之动态扩展列
    自由格式填报的制作
    复杂报表设计之动态报表
    如何通过动态参数实现周报制作
    如何实现行列互换效果?
    大数据集报表点击表头排序
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/9045657.html
Copyright © 2011-2022 走看看