zoukankan      html  css  js  c++  java
  • linux利用CMakeLists编译cuda程序

    文件目录:

    cudaTest

        |--utils.cu

        |--utils.h

        |--squaresum.cu

        |--squaresum.h

        |--test.cpp

        |--CMakeLists.txt

    编译命令:

    $cd /root/cudaTest

    $mkdir build

    $cd build

    $cmake ..

    $make

    调佣关系:

    utils:提供常用工具,这里提供查询设备信息功能;

    squaresum:计算平方和功能,为cuda运行的核心函数实现

    test:调用平方和函数

    CMakeLists.txt:组织所有文件编译生成可执行文件

    注意:调用cu文件中的函数时要在头文件声明成extern “C”

    文件内容:

    CMakeLists.txt

    # CMakeLists.txt to build hellocuda.cu
    cmake_minimum_required(VERSION 2.8)
    find_package(CUDA QUIET REQUIRED)
     
    # Specify binary name and source file to build it from
    #add_library(utils utils.cpp)
    cuda_add_executable(
        squaresum
        test.cpp squaresum.cu utils.cu)
    #target_link_libraries(squaresum utils)

    test.cpp

    #include <iostream>
    #include "squaresum.h"
    
    //extern "C" int squaresum();
    
    int main(){
      squaresum();
      return 0;
    }

    squaresum.h

    #include "utils.h"
    #include <cuda_runtime.h>
    
    extern "C" {
      int squaresum();
    }

    squaresum.cu

    #include <stdio.h>
    #include <stdlib.h>
    //#include "utils.h"
    #include <iostream>
    #include "squaresum.h"
    // ======== define area ========
    #define DATA_SIZE 1048576 // 1M
    
    // ======== global area ========
    int data[DATA_SIZE];
    
    __global__ static void squaresSum(int *data, int *sum, clock_t *time)
    {
     int sum_t = 0;
     clock_t start = clock();
     for (int i = 0; i < DATA_SIZE; ++i) {
      sum_t += data[i] * data[i];
     }
     *sum = sum_t;
     *time = clock() - start;
    }
    
    // ======== used to generate rand datas ========
    void generateData(int *data, int size)
    {
     for (int i = 0; i < size; ++i) {
      data[i] = rand() % 10;
     }
    }
    
    int squaresum()
    {
     // init CUDA device
     if (!InitCUDA()) {
      return 0;
     }
     printf("CUDA initialized.
    ");
    
     // generate rand datas
     generateData(data, DATA_SIZE);
    
     // malloc space for datas in GPU
     int *gpuData, *sum;
     clock_t *time;
     cudaMalloc((void**) &gpuData, sizeof(int) * DATA_SIZE);
     cudaMalloc((void**) &sum, sizeof(int));
     cudaMalloc((void**) &time, sizeof(clock_t));
     cudaMemcpy(gpuData, data, sizeof(int) * DATA_SIZE, cudaMemcpyHostToDevice);
    
     // calculate the squares's sum
     squaresSum<<<1, 1, 0>>>(gpuData, sum, time);
    
     // copy the result from GPU to HOST
     int result;
     clock_t time_used;
     cudaMemcpy(&result, sum, sizeof(int), cudaMemcpyDeviceToHost);
     cudaMemcpy(&time_used, time, sizeof(clock_t), cudaMemcpyDeviceToHost);
    
     // free GPU spaces
     cudaFree(gpuData);
     cudaFree(sum);
     cudaFree(time);
    
     // print result
     printf("(GPU) sum:%d time:%ld
    ", result, time_used);
    
     // CPU calculate
     result = 0;
     clock_t start = clock();
     for (int i = 0; i < DATA_SIZE; ++i) {
      result += data[i] * data[i];
     }
     time_used = clock() - start;
     printf("(CPU) sum:%d time:%ld
    ", result, time_used);
    
     return 0;
    }

    utils.h

    #include <stdio.h>
    #include <cuda_runtime.h>
    
    extern "C" {
      bool InitCUDA();
    }

    utils.cu

    #include "utils.h"
    #include <cuda_runtime.h>
    #include <iostream>
    
    void printDeviceProp(const cudaDeviceProp &prop)
    {
     printf("Device Name : %s.
    ", prop.name);
     printf("totalGlobalMem : %d.
    ", prop.totalGlobalMem);
     printf("sharedMemPerBlock : %d.
    ", prop.sharedMemPerBlock);
     printf("regsPerBlock : %d.
    ", prop.regsPerBlock);
     printf("warpSize : %d.
    ", prop.warpSize);
     printf("memPitch : %d.
    ", prop.memPitch);
     printf("maxThreadsPerBlock : %d.
    ", prop.maxThreadsPerBlock);
     printf("maxThreadsDim[0 - 2] : %d %d %d.
    ", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
     printf("maxGridSize[0 - 2] : %d %d %d.
    ", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
     printf("totalConstMem : %d.
    ", prop.totalConstMem);
     printf("major.minor : %d.%d.
    ", prop.major, prop.minor);
     printf("clockRate : %d.
    ", prop.clockRate);
     printf("textureAlignment : %d.
    ", prop.textureAlignment);
     printf("deviceOverlap : %d.
    ", prop.deviceOverlap);
     printf("multiProcessorCount : %d.
    ", prop.multiProcessorCount);
    }
    
    bool InitCUDA()
    {
     //used to count the device numbers
     int count; 
    
     // get the cuda device count
     cudaGetDeviceCount(&count);
    // print("%d
    ", count);
    std::cout << count << std::endl;
     if (count == 0) {
      fprintf(stderr, "There is no device.
    ");
      return false;
     }
    
     // find the device >= 1.X
     int i;
     for (i = 0; i < count; ++i) {
      cudaDeviceProp prop;
      if (cudaGetDeviceProperties(&prop, i) == cudaSuccess) {
       if (prop.major >= 1) {
        printDeviceProp(prop);
        break;
       }
      }
     }
    
     // if can't find the device
     if (i == count) {
      fprintf(stderr, "There is no device supporting CUDA 1.x.
    ");
      return false;
     }
    
     // set cuda device
     cudaSetDevice(i);
    
     return true;
    }
    
    //int main(){
    //  InitCUDA();
    //}
  • 相关阅读:
    Oracle SQL语句大全—查看表空间
    Class to disable copy and assign constructor
    在moss上自己总结了点小经验。。高手可以飘过 转贴
    在MOSS中直接嵌入ASP.NET Page zt
    Project Web Access 2007自定义FORM验证登录实现 zt
    SharePoint Portal Server 2003 中的单一登录 zt
    vs2008 开发 MOSS 顺序工作流
    VS2008开发MOSS工作流几个需要注意的地方
    向MOSS页面中添加服务器端代码的另外一种方式 zt
    状态机工作流的 SpecialPermissions
  • 原文地址:https://www.cnblogs.com/haiyang21/p/7788063.html
Copyright © 2011-2022 走看看