zoukankan      html  css  js  c++  java
  • GPU和CPU耗时统计方法

    GPU端耗时统计

     1     cudaEvent_t start, stop;
     2     checkCudaErrors(cudaEventCreate(&start));
     3     checkCudaErrors(cudaEventCreate(&stop));
     4     checkCudaErrors(cudaDeviceSynchronize());
     5 
     6     float gpu_time = 0.0f;
     7     cudaEventRecord(start, 0);//cuda context中的操作完毕事件被记录
     8     //分配设备端内存
     9     float *d_idata;
    10     checkCudaErrors(cudaMalloc((void **) &d_idata, mem_size));
    11     
    12     //将主机端数据拷贝到设备端内存
    13     checkCudaErrors(cudaMemcpy(d_idata, h_idata, mem_size,  cudaMemcpyHostToDevice));
    14 
    15     //设备端为结果分配内存
    16     float *d_odata;
    17     checkCudaErrors(cudaMalloc((void **) &d_odata, mem_size));
    18 
    19     //设置执行参数
    20     dim3  grid(1, 1, 1);
    21     dim3  threads(num_threads, 1, 1);
    22 
    23     //执行内核,参数含义:grid是网格的纬度,threads是块的纬度,mem_size最多能动态分配的共享内存大小
    24     testKernel<<< grid, threads, mem_size >>>(d_idata, d_odata);
    25 
    26     //检查内核执行状态
    27     getLastCudaError("Kernel execution failed");
    28 
    29     //在主机端为结果分配内存
    30     float *h_odata = (float *) malloc(mem_size);
    31     //从设备端拷贝结果到主机端
    32     checkCudaErrors(cudaMemcpy(h_odata, d_odata, sizeof(float) * num_threads,
    33                                cudaMemcpyDeviceToHost));
    34 
    35     cudaEventRecord(stop, 0);
    36     unsigned long int counter = 0;
    37     while (cudaEventQuery(stop) == cudaErrorNotReady)
    38     {
    39         counter++;
    40     }
    41     checkCudaErrors(cudaEventElapsedTime(&gpu_time, start, stop));
    42     printf("GPU执行耗时: %.2f (ms)
    ", gpu_time);
    43     printf("CPU executed %lu iterations while waiting for GPU to finish
    ", counter);

    CPU端耗时统计

     1     StopWatchInterface *timer = 0;
     2     sdkCreateTimer(&timer);
     3     sdkResetTimer(&timer);
     4 
     5     sdkStartTimer(&timer);
     6     //计算参考方案
     7     float *reference = (float *) malloc(mem_size);
     8     computeGold(reference, h_idata, num_threads);
     9     sdkStopTimer(&timer);
    10     printf("串行耗时:%f (ms)
    ", sdkGetTimerValue(&timer));
  • 相关阅读:
    hitachi2020 C-ThREE
    LOJ#2083. 「NOI2016」优秀的拆分
    BZOJ2754: [SCOI2012]喵星球上的点名
    BZOJ4516: [Sdoi2016]生成魔咒
    AtCoder Beginner Contest 146解题报告
    拉格朗日插值复习笔记
    对于求解单峰函数最值问题的探讨
    BZOJ5509: [Tjoi2019]甲苯先生的滚榜
    面试技巧
    性能案例分析 | MAT分析内存泄露
  • 原文地址:https://www.cnblogs.com/liangliangdetianxia/p/4198607.html
Copyright © 2011-2022 走看看