1 #include2 #include 3 #include 4 5 #include "caffe/layers/softmax_loss_layer.hpp" 6 #include "caffe/util/math_functions.hpp" 7 8 namespace caffe { 9 10 template 11 __global__ void SoftmaxLossForwardGPU(const int nthreads, 12 const Dtype* prob_data, const Dtype* label, Dtype* loss, 13 const int num, const int dim, const int spatial_dim, 14 const bool has_ignore_label_, const int ignore_label_, 15 Dtype* counts) { 16 CUDA_KERNEL_LOOP(index, nthreads) { 17 const int n = index / spatial_dim; 18 const int s = index % spatial_dim; 19 const int label_value = static_cast (label[n * spatial_dim + s]); 20 if (has_ignore_label_ && label_value == ignore_label_) { 21 loss[index] = 0; 22 counts[index] = 0; 23 } else { 24 loss[index] = -log(max(prob_data[n * dim + label_value * spatial_dim + s], 25 Dtype(FLT_MIN))); 26 counts[index] = 1; 27 } 28 } 29 } 30 31 template 32 void SoftmaxWithLossLayer ::Forward_gpu( 33 const vector *>& bottom, const vector *>& top) { 34 softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_); 35 const Dtype* prob_data = prob_.gpu_data(); 36 const Dtype* label = bottom[1]->gpu_data(); 37 const int dim = prob_.count() / outer_num_; 38 const int nthreads = outer_num_ * inner_num_; 39 // Since this memory is not used for anything until it is overwritten 40 // on the backward pass, we use it here to avoid having to allocate new GPU 41 // memory to accumulate intermediate results in the kernel. 42 Dtype* loss_data = bottom[0]->mutable_gpu_diff(); 43 // Similarly, this memory is never used elsewhere, and thus we can use it 44 // to avoid having to allocate additional GPU memory. 45 Dtype* counts = prob_.mutable_gpu_diff(); 46 // NOLINT_NEXT_LINE(whitespace/operators) 47 SoftmaxLossForwardGPU << >>(nthreads, prob_data, label, loss_data, 49 outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts); 50 Dtype loss; 51 caffe_gpu_asum(nthreads, loss_data, &loss); 52 Dtype valid_count = -1; 53 // Only launch another CUDA kernel if we actually need the count of valid 54 // outputs. 55 if (normalization_ == LossParameter_NormalizationMode_VALID && 56 has_ignore_label_) { 57 caffe_gpu_asum(nthreads, counts, &valid_count); 58 } 59 top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, 60 valid_count); 61 if (top.size() == 2) { 62 top[1]->ShareData(prob_); 63 } 64 } 65 66 template 67 __global__ void SoftmaxLossBackwardGPU(const int nthreads, const Dtype* top, 68 const Dtype* label, Dtype* bottom_diff, const int num, const int dim, 69 const int spatial_dim, const bool has_ignore_label_, 70 const int ignore_label_, Dtype* counts) { 71 const int channels = dim / spatial_dim; 72 73 CUDA_KERNEL_LOOP(index, nthreads) { 74 const int n = index / spatial_dim; 75 const int s = index % spatial_dim; 76 const int label_value = static_cast (label[n * spatial_dim + s]); 77 78 if (has_ignore_label_ && label_value == ignore_label_) { 79 for (int c = 0; c < channels; ++c) { 80 bottom_diff[n * dim + c * spatial_dim + s] = 0; 81 } 82 counts[index] = 0; 83 } else { 84 bottom_diff[n * dim + label_value * spatial_dim + s] -= 1; 85 counts[index] = 1; 86 } 87 } 88 } 89 90 template 91 void SoftmaxWithLossLayer ::Backward_gpu(const vector *>& top, 92 const vector & propagate_down, const vector *>& bottom) { 93 if (propagate_down[1]) { 94 LOG(FATAL) << this->type() 95 << " Layer cannot backpropagate to label inputs."; 96 } 97 if (propagate_down[0]) { 98 Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); 99 const Dtype* prob_data = prob_.gpu_data();100 const Dtype* top_data = top[0]->gpu_data();101 caffe_gpu_memcpy(prob_.count() * sizeof(Dtype), prob_data, bottom_diff);102 const Dtype* label = bottom[1]->gpu_data();103 const int dim = prob_.count() / outer_num_;104 const int nthreads = outer_num_ * inner_num_;105 // Since this memory is never used for anything else,106 // we use to to avoid allocating new GPU memory.107 Dtype* counts = prob_.mutable_gpu_diff();108 // NOLINT_NEXT_LINE(whitespace/operators)109 SoftmaxLossBackwardGPU << >>(nthreads, top_data, label, bottom_diff,111 outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts);112 113 Dtype valid_count = -1;114 // Only launch another CUDA kernel if we actually need the count of valid115 // outputs.116 if (normalization_ == LossParameter_NormalizationMode_VALID &&117 has_ignore_label_) {118 caffe_gpu_asum(nthreads, counts, &valid_count);119 }120 const Dtype loss_weight = top[0]->cpu_diff()[0] /121 (get_normalizer(normalization_, valid_count) * Caffe::getThreadNum());122 caffe_gpu_scal(prob_.count(), loss_weight , bottom_diff);123 }124 }125 126 INSTANTIATE_LAYER_GPU_FUNCS_DISABLE_FP16(SoftmaxWithLossLayer);127 128 } // namespace caffe
outer_num_:相当于batch_size
dim: c*w*h
spatial_dim(inner_num_):w*h
softmax_loss.cpp的代码:
outer_num_ = bottom[0]->count(0, softmax_axis_);inner_num_ = bottom[0]->count(softmax_axis_ + 1);
其实可以看出来count的只取前,不取后,(0, softmax_axis_)只取了0这一个轴