zoukankan      html  css  js  c++  java
  • 0_Simple__simpleMPI

    MPI 的简单使用

    ▶ 源代码。主机根结点生成随机数组,发布副本到各结点(例子用孩子使用了一个结点),分别使用 GPU 求平方根并求和,然后根结点使用 MPI 回收各节点的计算结果,规约求和后除以数组大小(相当于球随机数组中所有元素的平方根的平均值)。

    1 // simpleMPI.h
    2 extern "C"
    3 {
    4     void initData(float *data, int dataSize);
    5     void computeGPU(float *hostData, int blockSize, int gridSize);
    6     float sum(float *data, int size);
    7     void my_abort(int err);
    8 }
     1 // simpleMPI.cu
     2 #include <iostream>
     3 #include <mpi.h>
     4 #include "cuda_runtime.h"
     5 #include "device_launch_parameters.h"
     6 #include "simpleMPI.h"
     7 
     8 using std::cout;
     9 using std::cerr;
    10 using std::endl;
    11 
    12 #define CUDA_CHECK(call)                                                    
    13     if((call) != cudaSuccess)                                               
    14     {                                                                       
    15         cudaError_t err = cudaGetLastError();                               
    16         cerr << "CUDA error calling ""#call"", code is " << err << endl;  
    17         my_abort(err);                                                      
    18     }
    19 
    20 // GPU 计算平方根
    21 __global__ void simpleMPIKernel(float *input, float *output)
    22 {
    23     int tid = blockIdx.x * blockDim.x + threadIdx.x;
    24     output[tid] = sqrt(input[tid]);
    25 }
    26 
    27 // 初始化数组
    28 void initData(float *data, int dataSize)
    29 {
    30     for (int i = 0; i < dataSize; i++)
    31         data[i] = (float)rand() / RAND_MAX;
    32 }
    33 
    34 // 使用 GPU 进行计算的函数
    35 void computeGPU(float *hostData, int blockSize, int gridSize)
    36 {
    37     int dataSize = blockSize * gridSize;
    38 
    39     float *deviceInputData = NULL;
    40     CUDA_CHECK(cudaMalloc((void **)&deviceInputData, dataSize * sizeof(float)));
    41 
    42     float *deviceOutputData = NULL;
    43     CUDA_CHECK(cudaMalloc((void **)&deviceOutputData, dataSize * sizeof(float)));
    44 
    45     CUDA_CHECK(cudaMemcpy(deviceInputData, hostData, dataSize * sizeof(float), cudaMemcpyHostToDevice));
    46 
    47     simpleMPIKernel<<<gridSize, blockSize>>>(deviceInputData, deviceOutputData);
    48 
    49     CUDA_CHECK(cudaMemcpy(hostData, deviceOutputData, dataSize *sizeof(float), cudaMemcpyDeviceToHost));
    50 
    51     CUDA_CHECK(cudaFree(deviceInputData));
    52     CUDA_CHECK(cudaFree(deviceOutputData));
    53 }
    54 
    55 // 简单的求和函数
    56 float sum(float *data, int size)
    57 {
    58     float accum = 0.f;
    59     for (int i = 0; i < size; i++)
    60         accum += data[i];
    61     return accum;
    62 }
    63 
    64 // 中止函数
    65 void my_abort(int err)
    66 {
    67     cout << "Test FAILED
    ";
    68     MPI_Abort(MPI_COMM_WORLD, err);
    69 }
     1 // simpleMPI.cpp
     2 #include <mpi.h>
     3 #include <iostream>
     4 #include "simpleMPI.h"
     5 
     6 using std::cout;
     7 using std::cerr;
     8 using std::endl;
     9 
    10 #define MPI_CHECK(call) if((call) != MPI_SUCCESS) { cerr << "MPI error calling ""#call""
    "; my_abort(-1); }
    11 
    12 int main(int argc, char *argv[])
    13 {
    14     int blockSize = 256;
    15     int gridSize = 10000;
    16     int dataSizePerNode = gridSize * blockSize;
    17 
    18     // 初始化 MPI
    19     MPI_CHECK(MPI_Init(&argc, &argv));
    20 
    21     // 获取节点尺寸和编号
    22     int commSize, commRank;
    23     MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &commSize));
    24     MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &commRank));
    25 
    26     // 根结点生成随机数组
    27     int dataSizeTotal = dataSizePerNode * commSize;
    28     float *dataRoot = NULL;
    29     if (commRank == 0)
    30     {
    31         cout << "Running on " << commSize << " nodes" << endl;
    32         dataRoot = new float[dataSizeTotal];
    33         initData(dataRoot, dataSizeTotal);
    34     }
    35 
    36     // 每个结点上申请数组用于接收根结点发来的数据
    37     float *dataNode = new float[dataSizePerNode];
    38 
    39     MPI_CHECK(MPI_Scatter(dataRoot, dataSizePerNode, MPI_FLOAT, dataNode, dataSizePerNode, MPI_FLOAT, 0, MPI_COMM_WORLD));
    40 
    41     // 清空根节点数据
    42     if (commRank == 0)
    43         delete [] dataRoot;
    44 
    45     // 每个结点调用 GPU 计算平方根,然后规约到一个值
    46     computeGPU(dataNode, blockSize, gridSize);
    47     float sumNode = sum(dataNode, dataSizePerNode);
    48     
    49     // 使用 MPI 接收每个结点的计算结果并进行规约
    50     float sumRoot;
    51     MPI_CHECK(MPI_Reduce(&sumNode, &sumRoot, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD));
    52 
    53     // 回收和输出工作
    54     delete[] dataNode;
    55     MPI_CHECK(MPI_Finalize());
    56     
    57     if (commRank == 0)
    58     {
    59         float average = sumRoot / dataSizeTotal;
    60         cout << "Average of square roots is: " << average << endl;
    61         cout << "PASSED
    ";
    62     }
    63 
    64     getchar();
    65     return 0;
    66 }

    ▶ 输出结果

    Running on 1 nodes
    Average of square roots is: 0.667507
    PASSED

    ▶ 涨姿势

    ● 集中在 MPI 的几何函数的使用上,CUDA 部分没有新的认识。

  • 相关阅读:
    Oracle 分析函数
    Oracle 增加修改删除字段
    Oracle 重置序列
    End2EndIT
    Hyperledger Fabric SDK use case 1
    云计算中8项核心技术
    Cloud
    JVM Guide
    微信公众平台PHP开发
    在Linux系统环境下修改MySQL的root密码
  • 原文地址:https://www.cnblogs.com/cuancuancuanhao/p/7883401.html
Copyright © 2011-2022 走看看