zoukankan      html  css  js  c++  java
  • OpenACC 与 CUDA 的相互调用

    ▶ 按照书上的代码完成了 OpenACC 与CUDA 的相互调用,以及 OpenACC 调用 cuBLAS。遇到了很多问题,如 CUDA 版本,代码版本,计算能力指定等,以后填坑。

    ● 代码,OpenACC 调用 CUDA

     1 // kernel.cu
     2 __global__ void saxpy_kernel(const int n, const float a, float *x, float *y)
     3 {
     4     int id = blockIdx.x * blockDim.x + threadIdx.x;
     5     if (id < n)
     6         y[id] += a * x[id];
     7 }
     8 
     9 extern "C" void saxpy(const int n, const float a, float *x, float *y)
    10 {
    11     saxpy_kernel << < (n + 128 - 1) / 128, 128 >> > (n, a, x, y);
    12 }
    13 
    14 // main.c
    15 #include <stdio.h>
    16 #include <stdlib.h>
    17 
    18 #define N   1024
    19 
    20 #pragma acc routine seq
    21 extern void saxpy(int n, float a, float *x, float *y);
    22 
    23 int main()
    24 {
    25     float *x = (float *)malloc(sizeof(float)*N);
    26     float *y = (float *)malloc(sizeof(float)*N);
    27 
    28 #pragma acc data create(x[0:N]) copyout(y[0:N])
    29     {
    30 #pragma acc kernels 
    31 #pragma acc loop independent
    32         for (int i = 0; i < N; i++)
    33         {
    34             x[i] = 1.0f;
    35             y[i] = 4.0f;
    36         }
    37 #pragma acc host_data use_device(x, y)
    38         saxpy(N, 2.0f, x, y);
    39     }
    40 #pragma wait
    41 
    42     printf("
    y[0] = %f
    ", y[0]);
    43     free(x);
    44     free(y);
    45     //getchar();
    46     return 0;
    47 }

    ● 输出结果,代码在 win10上不能链接,报错:LINK : fatal error LNK1104: 无法打开文件“libcudapgi.lib”,WSL上输出结果不正确,在 Ubuntu 中报链接错误。参考了 参考https://blog.csdn.net/wcj0626/article/details/12611689?locationNum=12&fps=1 和 https://stackoverflow.com/questions/31737024/openacc-calling-cuda-device-kernel-from-openacc-parallel-loop,还是没有解决问题

    WSL:
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ nvcc -c kernel.cu -rdc=true
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ pgcc -acc -c main.c
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ pgcc -ta=tesla:rdc,cuda9.1 -Mcuda -o acc.exe main.o kernel.o
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ ./acc.exe
    
    y[0] = 4.000000
    
    Ubuntu:
    @E@nvlink fatal   : elfLink fatbinary error
    pgacclnk: child process exit status 2: /usr/local/pgi/linux86-64/18.4/bin/pgnvd

    ● 代码,OpenACC 调用 CUDA

     1 // fun.c
     2 void set(const int n, const float c, float *x)
     3 {
     4 #pragma acc kernels deviceptr(x)
     5     for (int i = 0; i < n; i++)
     6         x[i] = c;
     7 }
     8 
     9 void saxpy(const int n, const float a, float *restrict x, float *restrict y)
    10 {
    11 #pragma acc kernels deviceptr(x, y)
    12     for (int i = 0; i < n; i++)
    13         y[i] += a * x[i];
    14 }
    15 
    16 // main.cu
    17 #include <stdio.h>
    18 #include <cuda.h>
    19 #include "cuda_runtime.h"
    20 #include "device_launch_parameters.h"
    21 
    22 #define N   1024
    23 
    24 extern "C" void set(int, float, float *);
    25 extern "C" void saxpy(int, float, float *, float *);
    26 
    27 int main()
    28 {
    29     float *x, *y, y0;
    30     cudaMalloc((void**)&x, sizeof(float)*N);
    31     cudaMalloc((void**)&y, sizeof(float)*N);
    32 
    33     set(N, 1.0f, x);
    34     set(N, 0.0f, y);
    35     saxpy(N, 2.0f, x, y);
    36     cudaMemcpy(&y0, y, sizeof(float), cudaMemcpyDeviceToHost);
    37     
    38     printf("
    y[0] = %f
    ", y0);
    39     cudaFree(x);
    40     cudaFree(y);
    41     //getchar();
    42     return 0;
    43 }

    ● 输出结果,代码在 win10上不能链接,WSL 和 Ubuntu 中报链接错误

    WSL:
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ nvcc -c main.cu -rdc=true
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ pgcc -acc -c fun.c
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ pgcc -ta=tesla:rdc,cuda9.1 -Mcuda -o acc.exe main.o fun.o
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ ./acc.exe
    Segmentation fault (core dumped)
    
    Ubuntu:
    cuan@CUAN:~/Temp$ nvcc -c main.cu -rdc=true
    cuan@CUAN:~/Temp$ pgcc -acc -c fun.c
    cuan@CUAN:~/Temp$ pgcc -ta=tesla:rdc,cuda9.1 -Mcuda -o acc.exe main.o fun.o
    @E@nvlink fatal   : elfLink fatbinary error
    pgacclnk: child process exit status 2: /usr/local/pgi/linux86-64/18.4/bin/pgnvd

    ● 代码,CUDA 调用 OpenACC,捆绑变量地址

     1 // fun.c
     2 #include <openacc.h>
     3 
     4 void map(float *restrict pHost, float *restrict pDevice, int sizeByte)
     5 {
     6     acc_map_data(pHost, pDevice, sizeByte);
     7 }
     8 
     9 void set(int n, float c, float *x)
    10 {
    11 #pragma acc kernels present(x)
    12     for (int i = 0; i < n; i++)
    13         x[i] = c;
    14 }
    15 
    16 void saxpy(int n, float a, float *restrict x, float *restrict y)
    17 {
    18 #pragma acc kernels deviceptr(x,y)
    19     for (int i = 0; i < n; i++)
    20         y[i] += a * x[i];
    21 }
    22 
    23 // main.cu
    24 #include <stdio.h>
    25 #include <stdlib.h>
    26 
    27 #define N   1024
    28 
    29 extern "C" void map(float *, float *, int);
    30 extern "C" void set(int, float, float *);
    31 extern "C" void saxpy(int, float, float *, float *);
    32 
    33 int main()
    34 {
    35 
    36     float *x = (float *)malloc(sizeof(float)*N);
    37     float *y = (float *)malloc(sizeof(float)*N);
    38     float *dx, *dy, y0;
    39     cudaMalloc((void**)&dx, sizeof(float)*N);
    40     cudaMalloc((void**)&dy, sizeof(float)*N);
    41 
    42     map(x, dx, sizeof(float)*N);
    43     map(y, dy, sizeof(float)*N);
    44     set(N, 1.0f, x);
    45     set(N, 4.0f, y);
    46     saxpy(N, 2.0f, x, y);
    47     cudaMemcpy(&y0, y, sizeof(float), cudaMemcpyDeviceToHost);
    48 
    49     printf("
    y[0] = %f
    ",y0);
    50     free(x);
    51     free(y);
    52     cudaFree(x);
    53     cudaFree(y);
    54     //getchar();
    55     return 0;
    56 }

    ● 输出结果,代码在 win10上不能链接,在 WSL 上结果正确,在 Ubuntu 中未尝试

    WSL:
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ nvcc -c main.cu -rdc=true
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ pgcc -c fun.c -acc
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ pgcc -acc -Mcuda -o acc.exe main.o fun.o -ta=tesla:rdc,cuda9.1
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ ./acc.exe
    
    y[0] = 6.000000

    ● 代码,OpenACC 调用 cuBLAS

     1 #include <stdio.h>
     2 #include <stdlib.h>
     3 
     4 #define N   1024
     5 
     6 extern void cublasSaxpy(int, float, float *, int, float *, int);
     7 
     8 int main()
     9 {
    10     float *x = (float *)malloc(sizeof(float)*N);
    11     float *y = (float *)malloc(sizeof(float)*N);
    12 
    13 #pragma acc data create(x[0:N]) copyout(y[0:N])
    14     {
    15 #pragma acc kernels
    16         for (int i = 0; i < N; i++)
    17         {
    18             x[i] = 1.0f;
    19             y[i] = 4.0f;
    20         }
    21 #pragma acc host_data use_device(x,y)
    22         {
    23             cublasSaxpy(N, 2.0f, x, 1, y, 1);
    24         }
    25     }
    26 
    27     printf("
    y[0] = %f
    ", y[0]);
    28     free(x);
    29     free(y);
    30     //getchar();
    31     return 0;
    32 }

    ● 输出结果,代码在 win10上不能链接,在 WSL 中结果错误,在 Ubuntu 中结果正确

    WSL:
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ nvcc -c fun.c -rdc=true
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ pgcc -acc -c main.c
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ pgcc -acc -Mcuda -lcublas -o acc.exe main.o
    cuan@CUAN:/mnt/d/Code/CUDA/cudaProject/cudaProject$ ./acc.exe
    
    y[0] = 4.000000
    
    Ubuntu:
    cuan@CUAN:~/Temp$ nvcc -c fun.c -rdc=true
    cuan@CUAN:~/Temp$ pgcc -acc -c main.c
    cuan@CUAN:~/Temp$ pgcc -acc -Mcuda -lcublas -o acc.exe main.o
    cuan@CUAN:~/Temp$ ./acc.exe
    
    y[0] = 6.000000
  • 相关阅读:
    NX二次开发-Block UI C++界面Body Collector(体收集器)控件的获取(持续补充)
    NX二次开发-Block UI C++界面(表达式)控件的获取(持续补充)
    NX二次开发-常用lib库文件
    NX二次开发-如何在类外面定义一个结构体
    NX二次开发-bat脚本文件切换NX的环境变量(NX路径和语言)
    NX二次开发-相对路径环境变量和绝对路径环境变量
    NX二次开发-UFUN所有对象类型的宏定义
    NX二次开发-UFUN创建球UF_MODL_create_sphere1
    NX二次开发-UFUN获取球的参数UF_MODL_ask_sphere_parms
    NX二次开发-UFUN获取块的参数UF_MODL_ask_block_parms
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/9446401.html
Copyright © 2011-2022 走看看