zoukankan      html  css  js  c++  java
  • Ubuntu下使用AMD APP编写OpenCL程序

    对于Ubuntu或其近亲(Lubuntu、Kubuntu、Mint等)编写OpenCL程序也不会太难。由于本例用的是AMD APP SDK,因此需要AMD的GPU以及相关驱动。首先,去AMD官网下载GPU驱动——AMD Catalyst。如果你用的是APU并且还有一块独立显卡的话,通过AMD Catalyst Control Center可以选择使用哪个GPU。像我现在用的联想Z475笔记本,搭载了AMD APU A6-3420M以及一块AMD Radeon HD 7400M,但是相比较而言,还是APU自带的6620G的GPU性能更強一些,因此我这边设置的是采用AMD Radeon HD 6620G。

    在Linux下,AMD官方的GPU驱动是.run文件,只需使用sudo sh xxx.run即可安装。安装时采用默认安装即可。

    然后去developer.amd.com开发者网站下载AMD APP SDK。下载完成之后,将lib里面的动态库文件(xxx.so)取出来,并且把include里的头文件取出来。在你的OpenCL工程中把头文件的路径以及动态加载库都设置好。在你用-l的时候,如果动态库文件后缀名为.so.1,那么得把文件名后缀.1去掉。因为-l只能连接.a、.lib、.so等这些后缀的库文件。


    下面我们将举一个最简单的例子,首先看主机端代码:

    /*
     ============================================================================
     Name        : OpenCLTest.c
     Author      : Zenny Chen
     Version     :
     Copyright   : Your copyright notice
     Description : Hello World in C, Ansi-style
     ============================================================================
     */
    
    #include <stdio.h>
    #include <string.h>
    #include <stdlib.h>
    #include <unistd.h>
    #include <CL/cl.h>
    
    static int GetCurrentLocationFilePath(char pDst[512], const char *filename)
    {
        if(pDst == NULL || filename == NULL)
            return 0;
    
        int size = readlink("/proc/self/exe", pDst, 512);
        while(pDst[size - 1] != '/')
            size--;
    
        strcpy(&pDst[size], filename);
        int retSize = strlen(filename) + size;
        pDst[retSize] = '';
    
        return retSize;
    }
    
    int main(void)
    {
        /*Step1: Getting platforms and choose an available one.*/
        cl_uint numPlatforms;   //the NO. of platforms
        cl_int  status = clGetPlatformIDs(0, NULL, &numPlatforms);
        if (status != CL_SUCCESS)
        {
            puts("Error: Getting platforms!");
            return 0;
        }
    
        cl_platform_id platforms[16];
    
        /*For clarity, choose the first available platform. */
        if(numPlatforms > 0)
        {
            status = clGetPlatformIDs(numPlatforms, platforms, NULL);
            if(status != CL_SUCCESS)
            {
                puts("Failed to get platform IDs");
                return 0;
            }
        }
    
        /*Step 2:Query the platform and choose the first GPU device if has one.Otherwise use the CPU as device.*/
        cl_uint             numDevices = 0;
        cl_device_id        devices[16];
        clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
        if(numDevices == 0)    //no GPU available.
        {
            puts("No devices available!");
            return 0;
        }
        else
        {
            printf("The number of available devices is: %u
    ", numDevices);
            clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
        }
    
        /*Step 3: Create context.*/
        cl_context context = clCreateContext(NULL,1, devices,NULL,NULL,NULL);
    
        /*Step 4: Creating command queue associate with the context.*/
        cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
    
        /*Step 5: Create program object */
        char filePath[512];
        GetCurrentLocationFilePath(filePath, "test.cl");
        FILE *fp = fopen(filePath, "r");
        if(fp == NULL)
        {
            puts("OpenCL kernel source file open failed!");
            return 0;
        }
        fseek(fp, 0, SEEK_END);
        long fileLength = ftell(fp);
        fseek(fp, 0, SEEK_SET);
        char *source = (char*)malloc(fileLength + 1);
        fread(source, 1, fileLength, fp);
        fclose(fp);
        size_t sourceSize[] = {fileLength};
        cl_program program = clCreateProgramWithSource(context, 1, (const char**)&source, sourceSize, NULL);
    
        free(source);
        if(program == NULL)
        {
            puts("Failed to create the program!");
            return 0;
        }
    
        /*Step 6: Build program. */
        status = clBuildProgram(program, 1,devices,NULL,NULL,NULL);
        if(status != CL_SUCCESS)
        {
            puts("Failed to build the program!");
            return 0;
        }
    
        /*Step 7: Initial input,output for the host and create memory objects for the kernel*/
        int input[128];
        for(int i = 0; i < 128; i++)
            input[i] = i + 1;
    
        cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, sizeof(input), input, NULL);
        cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY , sizeof(input), NULL, NULL);
    
        /*Step 8: Create kernel object */
        cl_kernel kernel = clCreateKernel(program, "test", NULL);
    
        /*Step 9: Sets Kernel arguments.*/
        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer);
        status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputBuffer);
    
        /*Step 10: Running the kernel.*/
        size_t global_work_size[1] = { 128 };
        status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
    
        /*Step 11: Read the cout put back to host memory.*/
        int output[128];
        status = clEnqueueReadBuffer(commandQueue, outputBuffer, CL_TRUE, 0, sizeof(input), output, 0, NULL, NULL);
    
        /*Step 12: Clean the resources.*/
        status = clReleaseKernel(kernel);               //Release kernel.
        status = clReleaseProgram(program);             //Release the program object.
        status = clReleaseMemObject(inputBuffer);       //Release mem object.
        status = clReleaseMemObject(outputBuffer);
        status = clReleaseCommandQueue(commandQueue);   //Release  Command queue.
        status = clReleaseContext(context);             //Release context.
    
        for(int i = 0; i < 128; i++)
        {
            if(output[i] != i + 2)
            {
                printf("Error occurred @%d!", i);
                return 0;
            }
        }
    
        puts("Pass!");
        return 1;
    }

    在编译选项中,使用-std=gnu99或-std=gnu11。上述代码为纯C语言,因此即便你没有安装g++也完全没关系。

    下面看看内核源代码:

    /*
     ============================================================================
     Name        : OpenCLTest.c
     Author      : Zenny Chen
     Version     :
     Copyright   : Your copyright notice
     Description : Simple OpenCL kernel source
     ============================================================================
     */
    
    __kernel void test(__global int* in, __global int* out)
    {
        int index = get_global_id(0);
        out[index] = in[index] + 1;
    }

    将此文件放在可执行文件相同路径下,然后我们就能正常运行了。

  • 相关阅读:
    交换相邻字符(CharBuffer)
    ANSI和UNICODE
    关键路径
    拓扑排序 java
    MySql 中group by使用
    面试题2
    面试题
    K8S如何限制资源使用
    Kubernetes中配置Pod的liveness和readiness探针
    sed入门详解教程
  • 原文地址:https://www.cnblogs.com/zenny-chen/p/3307946.html
Copyright © 2011-2022 走看看