zoukankan      html  css  js  c++  java
  • 0_Simple__simpleMPI

    MPI 的简单使用

    ▶ 源代码。主机根结点生成随机数组,发布副本到各结点(例子用孩子使用了一个结点),分别使用 GPU 求平方根并求和,然后根结点使用 MPI 回收各节点的计算结果,规约求和后除以数组大小(相当于球随机数组中所有元素的平方根的平均值)。

    1 // simpleMPI.h
    2 extern "C"
    3 {
    4     void initData(float *data, int dataSize);
    5     void computeGPU(float *hostData, int blockSize, int gridSize);
    6     float sum(float *data, int size);
    7     void my_abort(int err);
    8 }
     1 // simpleMPI.cu
     2 #include <iostream>
     3 #include <mpi.h>
     4 #include "cuda_runtime.h"
     5 #include "device_launch_parameters.h"
     6 #include "simpleMPI.h"
     7 
     8 using std::cout;
     9 using std::cerr;
    10 using std::endl;
    11 
    12 #define CUDA_CHECK(call)                                                    
    13     if((call) != cudaSuccess)                                               
    14     {                                                                       
    15         cudaError_t err = cudaGetLastError();                               
    16         cerr << "CUDA error calling ""#call"", code is " << err << endl;  
    17         my_abort(err);                                                      
    18     }
    19 
    20 // GPU 计算平方根
    21 __global__ void simpleMPIKernel(float *input, float *output)
    22 {
    23     int tid = blockIdx.x * blockDim.x + threadIdx.x;
    24     output[tid] = sqrt(input[tid]);
    25 }
    26 
    27 // 初始化数组
    28 void initData(float *data, int dataSize)
    29 {
    30     for (int i = 0; i < dataSize; i++)
    31         data[i] = (float)rand() / RAND_MAX;
    32 }
    33 
    34 // 使用 GPU 进行计算的函数
    35 void computeGPU(float *hostData, int blockSize, int gridSize)
    36 {
    37     int dataSize = blockSize * gridSize;
    38 
    39     float *deviceInputData = NULL;
    40     CUDA_CHECK(cudaMalloc((void **)&deviceInputData, dataSize * sizeof(float)));
    41 
    42     float *deviceOutputData = NULL;
    43     CUDA_CHECK(cudaMalloc((void **)&deviceOutputData, dataSize * sizeof(float)));
    44 
    45     CUDA_CHECK(cudaMemcpy(deviceInputData, hostData, dataSize * sizeof(float), cudaMemcpyHostToDevice));
    46 
    47     simpleMPIKernel<<<gridSize, blockSize>>>(deviceInputData, deviceOutputData);
    48 
    49     CUDA_CHECK(cudaMemcpy(hostData, deviceOutputData, dataSize *sizeof(float), cudaMemcpyDeviceToHost));
    50 
    51     CUDA_CHECK(cudaFree(deviceInputData));
    52     CUDA_CHECK(cudaFree(deviceOutputData));
    53 }
    54 
    55 // 简单的求和函数
    56 float sum(float *data, int size)
    57 {
    58     float accum = 0.f;
    59     for (int i = 0; i < size; i++)
    60         accum += data[i];
    61     return accum;
    62 }
    63 
    64 // 中止函数
    65 void my_abort(int err)
    66 {
    67     cout << "Test FAILED
    ";
    68     MPI_Abort(MPI_COMM_WORLD, err);
    69 }
     1 // simpleMPI.cpp
     2 #include <mpi.h>
     3 #include <iostream>
     4 #include "simpleMPI.h"
     5 
     6 using std::cout;
     7 using std::cerr;
     8 using std::endl;
     9 
    10 #define MPI_CHECK(call) if((call) != MPI_SUCCESS) { cerr << "MPI error calling ""#call""
    "; my_abort(-1); }
    11 
    12 int main(int argc, char *argv[])
    13 {
    14     int blockSize = 256;
    15     int gridSize = 10000;
    16     int dataSizePerNode = gridSize * blockSize;
    17 
    18     // 初始化 MPI
    19     MPI_CHECK(MPI_Init(&argc, &argv));
    20 
    21     // 获取节点尺寸和编号
    22     int commSize, commRank;
    23     MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &commSize));
    24     MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &commRank));
    25 
    26     // 根结点生成随机数组
    27     int dataSizeTotal = dataSizePerNode * commSize;
    28     float *dataRoot = NULL;
    29     if (commRank == 0)
    30     {
    31         cout << "Running on " << commSize << " nodes" << endl;
    32         dataRoot = new float[dataSizeTotal];
    33         initData(dataRoot, dataSizeTotal);
    34     }
    35 
    36     // 每个结点上申请数组用于接收根结点发来的数据
    37     float *dataNode = new float[dataSizePerNode];
    38 
    39     MPI_CHECK(MPI_Scatter(dataRoot, dataSizePerNode, MPI_FLOAT, dataNode, dataSizePerNode, MPI_FLOAT, 0, MPI_COMM_WORLD));
    40 
    41     // 清空根节点数据
    42     if (commRank == 0)
    43         delete [] dataRoot;
    44 
    45     // 每个结点调用 GPU 计算平方根,然后规约到一个值
    46     computeGPU(dataNode, blockSize, gridSize);
    47     float sumNode = sum(dataNode, dataSizePerNode);
    48     
    49     // 使用 MPI 接收每个结点的计算结果并进行规约
    50     float sumRoot;
    51     MPI_CHECK(MPI_Reduce(&sumNode, &sumRoot, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD));
    52 
    53     // 回收和输出工作
    54     delete[] dataNode;
    55     MPI_CHECK(MPI_Finalize());
    56     
    57     if (commRank == 0)
    58     {
    59         float average = sumRoot / dataSizeTotal;
    60         cout << "Average of square roots is: " << average << endl;
    61         cout << "PASSED
    ";
    62     }
    63 
    64     getchar();
    65     return 0;
    66 }

    ▶ 输出结果

    Running on 1 nodes
    Average of square roots is: 0.667507
    PASSED

    ▶ 涨姿势

    ● 集中在 MPI 的几何函数的使用上,CUDA 部分没有新的认识。

  • 相关阅读:
    在 Windows 上测试 Redis Cluster的集群填坑笔记
    vmware安装黑苹果教程
    微信支付v3发布到iis时的证书问题
    Linux下安装SQL Server 2016(连接篇SQL Server on linux)
    Linux下安装SQL Server 2016(连接篇SQL Server on linux)
    Linux下安装SQL Server 2016(安装篇SQL Server on linux)
    Linux下安装SQL Server 2016(准备篇SQL Server on linux)
    客服端与服务端APP支付宝支付接口联调的那些坑
    ASP.NET MVC]WebAPI应用支持HTTPS的经验总结
    .net平台下C#socket通信(中)
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/7883401.html
Copyright © 2011-2022 走看看