zoukankan      html  css  js  c++  java
  • softmax_loss.cu 和 softmax_loss.cpp源码

     1 #include <algorithm>
     2 #include <cfloat>
     3 #include <vector>
     4 
     5 #include "caffe/layers/softmax_loss_layer.hpp"
     6 #include "caffe/util/math_functions.hpp"
     7 
     8 namespace caffe {
     9 
     10 template <typename Dtype>
     11 __global__ void SoftmaxLossForwardGPU(const int nthreads,
     12           const Dtype* prob_data, const Dtype* label, Dtype* loss,
     13           const int num, const int dim, const int spatial_dim,
     14           const bool has_ignore_label_, const int ignore_label_,
     15           Dtype* counts) {
     16   CUDA_KERNEL_LOOP(index, nthreads) {
     17     const int n = index / spatial_dim;
     18     const int s = index % spatial_dim;
     19     const int label_value = static_cast<int>(label[n * spatial_dim + s]);
     20     if (has_ignore_label_ && label_value == ignore_label_) {
     21       loss[index] = 0;
     22       counts[index] = 0;
     23     } else {
     24       loss[index] = -log(max(prob_data[n * dim + label_value * spatial_dim + s],
     25                       Dtype(FLT_MIN)));
     26       counts[index] = 1;
     27     }
     28   }
     29 }
     30 
     31 template <typename Dtype>
     32 void SoftmaxWithLossLayer<Dtype>::Forward_gpu(
     33     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
     34   softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
     35   const Dtype* prob_data = prob_.gpu_data();
     36   const Dtype* label = bottom[1]->gpu_data();
     37   const int dim = prob_.count() / outer_num_;
     38   const int nthreads = outer_num_ * inner_num_;
     39   // Since this memory is not used for anything until it is overwritten
     40   // on the backward pass, we use it here to avoid having to allocate new GPU
     41   // memory to accumulate intermediate results in the kernel.
     42   Dtype* loss_data = bottom[0]->mutable_gpu_diff();
     43   // Similarly, this memory is never used elsewhere, and thus we can use it
     44   // to avoid having to allocate additional GPU memory.
     45   Dtype* counts = prob_.mutable_gpu_diff();
     46   // NOLINT_NEXT_LINE(whitespace/operators)
     47   SoftmaxLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
     48       CAFFE_CUDA_NUM_THREADS>>>(nthreads, prob_data, label, loss_data,
     49       outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts);
     50   Dtype loss;
     51   caffe_gpu_asum(nthreads, loss_data, &loss);
     52   Dtype valid_count = -1;
     53   // Only launch another CUDA kernel if we actually need the count of valid
     54   // outputs.
     55   if (normalization_ == LossParameter_NormalizationMode_VALID &&
     56       has_ignore_label_) {
     57     caffe_gpu_asum(nthreads, counts, &valid_count);
     58   }
     59   top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_,
     60                                                         valid_count);
     61   if (top.size() == 2) {
     62     top[1]->ShareData(prob_);
     63   }
     64 }
     65 
     66 template <typename Dtype>
     67 __global__ void SoftmaxLossBackwardGPU(const int nthreads, const Dtype* top,
     68           const Dtype* label, Dtype* bottom_diff, const int num, const int dim,
     69           const int spatial_dim, const bool has_ignore_label_,
     70           const int ignore_label_, Dtype* counts) {
     71   const int channels = dim / spatial_dim;
     72 
     73   CUDA_KERNEL_LOOP(index, nthreads) {
     74     const int n = index / spatial_dim;
     75     const int s = index % spatial_dim;
     76     const int label_value = static_cast<int>(label[n * spatial_dim + s]);
     77 
     78     if (has_ignore_label_ && label_value == ignore_label_) {
     79       for (int c = 0; c < channels; ++c) {
     80         bottom_diff[n * dim + c * spatial_dim + s] = 0;
     81       }
     82       counts[index] = 0;
     83     } else {
     84       bottom_diff[n * dim + label_value * spatial_dim + s] -= 1;
     85       counts[index] = 1;
     86     }
     87   }
     88 }
     89 
     90 template <typename Dtype>
     91 void SoftmaxWithLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     92     const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
     93   if (propagate_down[1]) {
     94     LOG(FATAL) << this->type()
     95                << " Layer cannot backpropagate to label inputs.";
     96   }
     97   if (propagate_down[0]) {
     98     Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
     99     const Dtype* prob_data = prob_.gpu_data();
    100     const Dtype* top_data = top[0]->gpu_data();
    101     caffe_gpu_memcpy(prob_.count() * sizeof(Dtype), prob_data, bottom_diff);
    102     const Dtype* label = bottom[1]->gpu_data();
    103     const int dim = prob_.count() / outer_num_;
    104     const int nthreads = outer_num_ * inner_num_;
    105     // Since this memory is never used for anything else,
    106     // we use to to avoid allocating new GPU memory.
    107     Dtype* counts = prob_.mutable_gpu_diff();
    108     // NOLINT_NEXT_LINE(whitespace/operators)
    109     SoftmaxLossBackwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
    110         CAFFE_CUDA_NUM_THREADS>>>(nthreads, top_data, label, bottom_diff,
    111         outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts);
    112 
    113     Dtype valid_count = -1;
    114     // Only launch another CUDA kernel if we actually need the count of valid
    115     // outputs.
    116     if (normalization_ == LossParameter_NormalizationMode_VALID &&
    117         has_ignore_label_) {
    118       caffe_gpu_asum(nthreads, counts, &valid_count);
    119     }
    120     const Dtype loss_weight = top[0]->cpu_diff()[0] /
    121         (get_normalizer(normalization_, valid_count) * Caffe::getThreadNum());
    122     caffe_gpu_scal(prob_.count(), loss_weight , bottom_diff);
    123   }
    124 }
    125 
    126 INSTANTIATE_LAYER_GPU_FUNCS_DISABLE_FP16(SoftmaxWithLossLayer);
    127 
    128 }  // namespace caffe

    outer_num_:相当于batch_size

    dim: c*w*h

    spatial_dim(inner_num_):w*h

    softmax_loss.cpp的代码:

    outer_num_ = bottom[0]->count(0, softmax_axis_);
    inner_num_ = bottom[0]->count(softmax_axis_ + 1);

    其实可以看出来count的只取前,不取后,(0, softmax_axis_)只取了0这一个轴

  • 相关阅读:
    java基础(七) java四种访问权限
    java基础(六) switch语句的深入解析
    JavaSe: 不要小看了 Serializable
    对于培训出身的同学,接下来该怎么学习技术?
    Java Tomcat7性能监控与优化详解
    模仿spring-aop的功能,利用注解搭建自己的框架。
    动态页面技术EL
    如何在mysql客户端即mysql提示符下执行操作系统命令
    通过notepad++将混乱的xml配置的格式进行美化
    shell脚本中,for基于列表进行循环的实现方法
  • 原文地址:https://www.cnblogs.com/ymjyqsx/p/8479104.html
Copyright © 2011-2022 走看看