zoukankan html css js c++ java

命令行cpp与cu文件混合编译

首先这里有两段代码：

main.cpp：

#include <stdio.h>
#include <iostream>

extern "C"
{
int func();  
}

int main()
{
    std::cout<<"Hello C++"<<std::endl;
    func();
    return 0;
}

test.cu：

#include <cuda_runtime.h>
#include <stdio.h>

//thread 1D
__global__ void testThread1(int *c, const int *a, const int *b)
{
    int i = threadIdx.x;
    c[i] = b[i] - a[i];
}

void addWithCuda(int *c, const int *a, const int *b, unsigned int size)
{
    int *dev_a = 0;
    int *dev_b = 0;
    int *dev_c = 0;

    cudaSetDevice(0);

    cudaMalloc((void**)&dev_c, size * sizeof(int));
    cudaMalloc((void**)&dev_a, size * sizeof(int));
    cudaMalloc((void**)&dev_b, size * sizeof(int));

    cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice);

    testThread1<<<1, size>>>(dev_c, dev_a, dev_b);

    cudaMemcpy(c, dev_c, size*sizeof(int), cudaMemcpyDeviceToHost);

    cudaFree(dev_a);
    cudaFree(dev_b);
    cudaFree(dev_c);

    cudaGetLastError();
}

extern "C" 
int func() 
{
    const int n = 1000;

    int *a = new int[n];
    int *b = new int[n];
    int *c = new int[n];
    int *cc = new int[n];

    for (int i = 0; i < n; i++)
    {
        a[i] = rand() % 100;
        b[i] = rand() % 100;
        c[i] = b[i] - a[i];
    }

    addWithCuda(cc, a, b, n);

    FILE *fp = fopen("out.txt", "w");
    for (int i = 0; i < n; i++)
        fprintf(fp, "%d %d
", c[i], cc[i]);
    fclose(fp);

    bool flag = true;
    for (int i = 0; i < n; i++)
    {
        if (c[i] != cc[i])
        {
            flag = false;
            break;
        }
    }

    if (flag == false)
        printf("no pass");
    else
        printf("pass");

    cudaDeviceReset();

    delete[] a;
    delete[] b;
    delete[] c;
    delete[] cc;

    return 0;
}

Linux下可以这样：

nvcc -c test.cu
g++ -c main.cpp
g++ -o main.o test.o -lcudart -L/usr/local/cuda/lib64

Windows下可以这样：

nvcc -c test.cu
cl -c main.cpp
link main.obj test.obj cudart.lib -libpath:"C:Program FilesNVIDIA GPU Computing ToolkitCUDAv10.2libx64"

应该都差不多。

查看全文

相关阅读:
信息学奥赛一本通（C++）在线评测系统——基础（一）C++语言——1068：与指定数字相同的数的个数
 信息学奥赛一本通（C++）在线评测系统——基础（一）C++语言——1068：与指定数字相同的数的个数
 信息学奥赛一本通（C++）在线评测系统——基础（一）C++语言——1067：整数的个数
 SSLZYC 2405 巧克力
 SSLZYC 2404 上学
 SSLZYC 2403 蜡烛
 SSLZYC 2402 世界语
 SSLZYC 2124 涂色
 SSLZYC 2391 数列
 SSLZYC 洛谷 P1498 南蛮图腾

原文地址：https://www.cnblogs.com/ybsport/p/12321415.html