zoukankan      html  css  js  c++  java
  • darknet编译GPU、CUDNN

    错误:/src/convolutional_layer.c:153:13: error: 'CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT' undeclared (first use in this function);

    修改出错的文件src/convolutional_layer.c的代码,增加针对CUDNN_MAJOR>=8的处理:

    #ifdef GPU
    
    #ifdef CUDNN
    
    void cudnn_convolutional_setup(layer *l)
    
    {
    
     cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); 
    
     cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); 
    
    
    
        cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); 
    
        cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); 
    
        cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); 
    
    
    
    
        cudnnSetFilter4dDescriptor(l->dweightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); 
    
        cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c/l->groups, l->size, l->size); 
    
        #if CUDNN_MAJOR >= 6
    
        cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT);
    
        #else
    
        cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
    
        #endif
    
    
    
    
        #if CUDNN_MAJOR >= 7
    
        cudnnSetConvolutionGroupCount(l->convDesc, l->groups);
    
        #else
    
        if(l->groups > 1){
    
            error("CUDNN < 7 doesn't support groups, please upgrade!");
    
        }
    
        #endif
    
       #if CUDNN_MAJOR >= 8
    
        int returnedAlgoCount;
    
        cudnnConvolutionFwdAlgoPerf_t       fw_results[2 * CUDNN_CONVOLUTION_FWD_ALGO_COUNT];
    
        cudnnConvolutionBwdDataAlgoPerf_t   bd_results[2 * CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT];
    
        cudnnConvolutionBwdFilterAlgoPerf_t bf_results[2 * CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT];
    
    
    
    
        cudnnFindConvolutionForwardAlgorithm(cudnn_handle(),
    
                l->srcTensorDesc,
    
                l->weightDesc,
    
                l->convDesc,
    
                l->dstTensorDesc,
    
                CUDNN_CONVOLUTION_FWD_ALGO_COUNT,
    
                &returnedAlgoCount,
    
            fw_results);
    
        for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){
    
            #if PRINT_CUDNN_ALGO > 0
    
            printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",
    
                   cudnnGetErrorString(fw_results[algoIndex].status),
    
                   fw_results[algoIndex].algo, fw_results[algoIndex].time,
    
                   (unsigned long long)fw_results[algoIndex].memory);
    
            #endif
    
            if( fw_results[algoIndex].memory < MEMORY_LIMIT ){
    
                l->fw_algo = fw_results[algoIndex].algo;
    
                break;
    
        }
    
        }
    
    
    
    
        cudnnFindConvolutionBackwardDataAlgorithm(cudnn_handle(),
    
                l->weightDesc,
    
                l->ddstTensorDesc,
    
                l->convDesc,
    
                l->dsrcTensorDesc,
    
                CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT,
    
                &returnedAlgoCount,
    
                bd_results);
    
        for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){
    
            #if PRINT_CUDNN_ALGO > 0
    
            printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",
    
                   cudnnGetErrorString(bd_results[algoIndex].status),
    
                   bd_results[algoIndex].algo, bd_results[algoIndex].time,
    
                   (unsigned long long)bd_results[algoIndex].memory);
    
            #endif
    
            if( bd_results[algoIndex].memory < MEMORY_LIMIT ){
    
                l->bd_algo = bd_results[algoIndex].algo;
    
                break;
    
            }
    
        }
    
    
    
    
        cudnnFindConvolutionBackwardFilterAlgorithm(cudnn_handle(),
    
                l->srcTensorDesc,
    
                l->ddstTensorDesc,
    
                l->convDesc,
    
                l->dweightDesc,
    
                CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT,
    
                &returnedAlgoCount,
    
                bf_results);
    
        for(int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex){
    
            #if PRINT_CUDNN_ALGO > 0
    
            printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",
    
                   cudnnGetErrorString(bf_results[algoIndex].status),
    
                   bf_results[algoIndex].algo, bf_results[algoIndex].time,
    
                   (unsigned long long)bf_results[algoIndex].memory);
    
            #endif
    
            if( bf_results[algoIndex].memory < MEMORY_LIMIT ){
    
                l->bf_algo = bf_results[algoIndex].algo;
    
                break;
    
            }
    
        }
    
    
    
    
        #else
    
    
    
    
        cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
    
                l->srcTensorDesc,
    
                l->weightDesc,
    
                l->convDesc,
    
                l->dstTensorDesc,
    
                CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
    
                2000000000,
    
                &l->fw_algo);
    
        cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
    
                l->weightDesc,
    
                l->ddstTensorDesc,
    
                l->convDesc,
    
                l->dsrcTensorDesc,
    
                CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
    
                2000000000,
    
                &l->bd_algo);
    
        cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
    
                l->srcTensorDesc,
    
                l->ddstTensorDesc,
    
                l->convDesc,
    
                l->dweightDesc,
    
                CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
    
                2000000000,
    
                &l->bf_algo);
    
        #endif
    
    }
    
    #endif
    
    #endif
    
    
    增加声明
    #define MEMORY_LIMIT 2000000000

    错误:nvcc fatal   : Unsupported gpu architecture 'compute_30'

    把Makefile里的配置修改一下,去掉ARCH配置中的 -gencode arch=compute_30,code=sm_30 \ 这行,改成下面这样即可:

    ARCH= -gencode arch=compute_35,code=sm_35 \
          -gencode arch=compute_50,code=[sm_50,compute_50] \
          -gencode arch=compute_52,code=[sm_52,compute_52] \
          -gencode arch=compute_70,code=[sm_70,compute_70] \
          -gencode arch=compute_75,code=[sm_75,compute_75]\
          -gencode arch=compute_86,code=[sm_86,compute_86]

    天道酬勤 循序渐进 技压群雄
  • 相关阅读:
    js复习(一)
    Webform(文件上传)
    Webform(分页与组合查询配合使用)
    Webform(分页、组合查询)
    webform(内置对象)
    Webform(内置对象-Response与Redirect、QueryString传值、Repeater删改)
    Webform(Repeater控件)
    MDI窗体容器 权限设置
    进程和线程
    WinForm三级联动
  • 原文地址:https://www.cnblogs.com/wuyuan2011woaini/p/15687491.html
Copyright © 2011-2022 走看看