zoukankan      html  css  js  c++  java
  • 多GPU设备处理点积示例

     多GPU设备处理点积示例,项目打包下载

      1 /*
      2 * Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.
      3 *
      4 * NVIDIA Corporation and its licensors retain all intellectual property and
      5 * proprietary rights in and to this software and related documentation.
      6 * Any use, reproduction, disclosure, or distribution of this software
      7 * and related documentation without an express license agreement from
      8 * NVIDIA Corporation is strictly prohibited.
      9 *
     10 * Please refer to the applicable NVIDIA end user license agreement (EULA)
     11 * associated with this source code for terms and conditions that govern
     12 * your use of this NVIDIA software.
     13 *
     14 */
     15 
     16 
     17 #include "../common/book.h"
     18 #include "cuda.h"
     19 #include "device_launch_parameters.h"
     20 #include "device_functions.h"
     21 #include "cuda_runtime.h"
     22 
     23 #define imin(a,b) (a<b?a:b)
     24 
     25 #define     N    (33*1024*1024)
     26 const int threadsPerBlock = 256;
     27 const int blocksPerGrid =
     28 imin(32, (N / 2 + threadsPerBlock - 1) / threadsPerBlock);
     29 
     30 
     31 __global__ void dot(int size, float *a, float *b, float *c) {
     32     __shared__ float cache[threadsPerBlock];
     33     int tid = threadIdx.x + blockIdx.x * blockDim.x;
     34     int cacheIndex = threadIdx.x;
     35 
     36     float   temp = 0;
     37     while (tid < size) {
     38         temp += a[tid] * b[tid];
     39         tid += blockDim.x * gridDim.x;
     40     }
     41 
     42     // set the cache values
     43     cache[cacheIndex] = temp;
     44 
     45     // synchronize threads in this block
     46     __syncthreads();
     47 
     48     //块内归约
     49     int i = blockDim.x / 2;
     50     while (i != 0) {
     51         if (cacheIndex < i)
     52             cache[cacheIndex] += cache[cacheIndex + i];
     53         __syncthreads();
     54         i /= 2;
     55     }
     56 
     57     if (cacheIndex == 0)
     58         c[blockIdx.x] = cache[0];
     59 }
     60 
     61 
     62 struct DataStruct {
     63     int     deviceID;
     64     int     size;
     65     float   *a;
     66     float   *b;
     67     float   returnValue;
     68 };
     69 
     70 unsigned WINAPI routine(void *pvoidData)
     71 //void* routine(void *pvoidData) 
     72 {
     73     DataStruct  *data = (DataStruct*)pvoidData;
     74     HANDLE_ERROR(cudaSetDevice(data->deviceID));
     75 
     76     int     size = data->size;
     77     float   *a, *b, c, *partial_c;
     78     float   *dev_a, *dev_b, *dev_partial_c;
     79 
     80     // allocate memory on the CPU side
     81     a = data->a;
     82     b = data->b;
     83     partial_c = (float*)malloc(blocksPerGrid*sizeof(float));
     84 
     85     // allocate the memory on the GPU
     86     HANDLE_ERROR(cudaMalloc((void**)&dev_a,
     87         size*sizeof(float)));
     88     HANDLE_ERROR(cudaMalloc((void**)&dev_b,
     89         size*sizeof(float)));
     90     HANDLE_ERROR(cudaMalloc((void**)&dev_partial_c,
     91         blocksPerGrid*sizeof(float)));
     92 
     93     // copy the arrays 'a' and 'b' to the GPU
     94     HANDLE_ERROR(cudaMemcpy(dev_a, a, size*sizeof(float),
     95         cudaMemcpyHostToDevice));
     96     HANDLE_ERROR(cudaMemcpy(dev_b, b, size*sizeof(float),
     97         cudaMemcpyHostToDevice));
     98 
     99     dot <<<blocksPerGrid, threadsPerBlock >>>(size, dev_a, dev_b,
    100         dev_partial_c);
    101     // copy the array 'c' back from the GPU to the CPU
    102     HANDLE_ERROR(cudaMemcpy(partial_c, dev_partial_c,
    103         blocksPerGrid*sizeof(float),
    104         cudaMemcpyDeviceToHost));
    105 
    106     // finish up on the CPU side
    107     c = 0;
    108     for (int i = 0; i<blocksPerGrid; i++) {
    109         c += partial_c[i];
    110     }
    111 
    112     HANDLE_ERROR(cudaFree(dev_a));
    113     HANDLE_ERROR(cudaFree(dev_b));
    114     HANDLE_ERROR(cudaFree(dev_partial_c));
    115 
    116     // free memory on the CPU side
    117     free(partial_c);
    118 
    119     data->returnValue = c;
    120     return 0;
    121 }
    122 
    123 
    124 int main(void) {
    125     int deviceCount;
    126     HANDLE_ERROR(cudaGetDeviceCount(&deviceCount));
    127     //要求两个设备
    128     if (deviceCount < 2) {
    129         printf("We need at least two compute 1.0 or greater "
    130             "devices, but only found %d
    ", deviceCount);
    131         return 0;
    132     }
    133 
    134     float   *a = (float*)malloc(sizeof(float)* N);
    135     HANDLE_NULL(a);
    136     float   *b = (float*)malloc(sizeof(float)* N);
    137     HANDLE_NULL(b);
    138 
    139     // fill in the host memory with data
    140     for (int i = 0; i<N; i++) {
    141         a[i] = i;
    142         b[i] = i * 2;
    143     }
    144 
    145     /*
    146     为多线程做准备
    147     每个DateStruct都为数据集大小的一半
    148     */
    149     DataStruct  data[2];
    150     data[0].deviceID = 0;
    151     data[0].size = N / 2;
    152     data[0].a = a;
    153     data[0].b = b;
    154 
    155     data[1].deviceID = 1;
    156     data[1].size = N / 2;
    157     data[1].a = a + N / 2;
    158     data[1].b = b + N / 2;
    159 
    160     CUTThread   thread = start_thread(routine, &(data[0]));
    161     routine(&(data[1]));
    162     end_thread(thread);
    163 
    164 
    165     // free memory on the CPU side
    166     free(a);
    167     free(b);
    168 
    169     printf("Value calculated:  %f
    ",
    170         data[0].returnValue + data[1].returnValue);
    171 
    172     return 0;
    173 }
  • 相关阅读:
    fast incremental backup failed on standby database
    How to find error message from OMS repository
    Examine 11g automatic block Corruption recovery
    C#继承Control实用自定义控件
    手把手教你写SHELL CODE
    编写C#控件的3种方式
    Android中跨越ACTIVITY的全局线程
    DevExpress 皮肤使用方法
    PHP讨论之什么是HOOK?
    C#制作WinForm控件
  • 原文地址:https://www.cnblogs.com/liangliangdetianxia/p/3998729.html
Copyright © 2011-2022 走看看