zoukankan      html  css  js  c++  java
  • 0_Simple__simpleSeparateCompilation

    ▶ 简单的将纯 C/C++ 函数放到另一个文件中,利用头文件引用到主体 .cu 中来,编译时共同编译。

    ▶ 源代码,把 C++ 的部分去掉了

    1 // simpleDeviceLibrary.cuh
    2 #ifndef SIMPLE_DEVICE_LIBRARY_CUH
    3 #define SIMPLE_DEVICE_LIBRARY_CUH
    4 
    5 extern "C" __device__ float multiplyByTwo(float number);
    6 
    7 extern "C" __device__ float divideByTwo(float number);
    8 
    9 #endif
     1 // simpleDeviceLibrary.cu
     2 #include <cuda_runtime.h>
     3 
     4 extern "C" __device__ float multiplyByTwo(float number) 
     5 { 
     6     return number * 2.0f;
     7 }
     8 
     9 extern "C" __device__ float divideByTwo(float number) 
    10 { 
    11     return number * 0.5f; 
    12 }
     1 // simpleSeparateCompilation.cu
     2 #include <stdio.h>
     3 #include <stdlib.h>
     4 #include <math.h>
     5 #include <cuda_runtime.h>
     6 #include "device_launch_parameters.h"
     7 #include "simpleDeviceLibrary.cuh"
     8 
     9 #define EPS 1e-5
    10 
    11 typedef float(*deviceFunc)(float);
    12 __device__ deviceFunc dMultiplyByTwoPtr = multiplyByTwo;    // 本地声明,直接在代码中调用 multiplyByTwo / divideByTwo 会导致运行时错误
    13 __device__ deviceFunc dDivideByTwoPtr = divideByTwo;
    14 
    15 __global__ void transformVector(float *v, deviceFunc f, unsigned int size)
    16 {
    17     unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
    18     if (tid < size)    
    19         v[tid] = (*f)(v[tid]);
    20 }
    21 
    22 int test()
    23 {
    24     cudaSetDevice(0);
    25     const unsigned int size = 1000;
    26     float hVector[size], hResultVector[size], *dVector;
    27     for (unsigned int i = 0; i < size; ++i)
    28     {
    29         hVector[i] = rand() / (float)RAND_MAX;
    30         hResultVector[i] = 0.0f;
    31     }
    32     cudaMalloc((void **)&dVector, size * sizeof(float));
    33     cudaMemcpy(dVector, hVector, sizeof(float) * size, cudaMemcpyHostToDevice);
    34 
    35     deviceFunc hFunctionPtr;                                                    // 作为调用参数的函数指针
    36     cudaMemcpyFromSymbol(&hFunctionPtr, dMultiplyByTwoPtr, sizeof(deviceFunc)); // 给 hFunctionPtr 一个地址,方便调用
    37     transformVector << <1, 1024 >>>(dVector, hFunctionPtr, size);
    38     cudaMemcpyFromSymbol(&hFunctionPtr, dDivideByTwoPtr, sizeof(deviceFunc));
    39     transformVector << <1, 1024 >> > (dVector, hFunctionPtr, size); 
    40     
    41     cudaMemcpy(hResultVector, dVector, sizeof(float) * size, cudaMemcpyDeviceToHost);
    42     cudaDeviceSynchronize();
    43     if (dVector)
    44         cudaFree(dVector);
    45     for (int i = 0; i < size; ++i)
    46     {
    47         if (fabs(hVector[i] - hResultVector[i]) > EPS)
    48         {
    49             printf("
    Error at i == %d, hVector[i] == %f, hResultVector[i] == %f", i, hVector[i], hResultVector[i]);
    50             return 0;
    51         }
    52     }    
    53     return 1;
    54 }
    55 
    56 int main()
    57 {
    58     printf("
    	Start.
    ");
    59     printf("
    	Finish: %s
    ", test() ? "Pass" : "Fail");
    60     getchar();
    61     return 0;
    62 }

    ● 输出结果:

            Start.
    
            Finish: Pass

    ▶ 涨姿势

    // cuda_runtime_api.h
    #define __dv(v) 
            = v
    
    extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol(void *dst, const void *symbol, size_t count, size_t offset __dv(0), enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost));
        // 从指定符号 symbol 处偏移 offset 字节处,拷贝 count 字节到 dst,默认模式为设备拷到主机
  • 相关阅读:
    【一些思路】web和app测试的区别
    【Python】I/O和比赛的其他一些问题
    【Python】迭代器和生成器的个人理解,再讲一讲协程
    【TCP/IP】如果打不开一个网页,需要如何处理?
    DOM事件
    GASP动画的基本使用
    Velocity的使用方法
    Swiper和Swiper Animate使用方法
    DOM操作
    JavaScript函数
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/8013139.html
Copyright © 2011-2022 走看看