Skip to content

Commit e939b70

Browse files
committed
Merge pull request BVLC#1654 from longjon/softmax-missing-values
Add missing value support to SoftmaxLossLayer
2 parents e8d9c12 + c7f63da commit e939b70

File tree

3 files changed

+55
-10
lines changed

3 files changed

+55
-10
lines changed

include/caffe/loss_layers.hpp

+7
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,13 @@ class SoftmaxWithLossLayer : public LossLayer<Dtype> {
758758
vector<Blob<Dtype>*> softmax_bottom_vec_;
759759
/// top vector holder used in call to the underlying SoftmaxLayer::Forward
760760
vector<Blob<Dtype>*> softmax_top_vec_;
761+
/// Whether to ignore instances with a certain label.
762+
bool has_ignore_label_;
763+
/// The label indicating that an instance should be ignored.
764+
int ignore_label_;
765+
/// Whether to normalize the loss by the total number of values present
766+
/// (otherwise just by the batch size).
767+
bool normalize_;
761768
};
762769

763770
} // namespace caffe

src/caffe/layers/softmax_loss_layer.cpp

+35-9
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ void SoftmaxWithLossLayer<Dtype>::LayerSetUp(
1717
softmax_top_vec_.clear();
1818
softmax_top_vec_.push_back(&prob_);
1919
softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_);
20+
21+
has_ignore_label_ =
22+
this->layer_param_.loss_param().has_ignore_label();
23+
if (has_ignore_label_) {
24+
ignore_label_ = this->layer_param_.loss_param().ignore_label();
25+
}
26+
normalize_ = this->layer_param_.loss_param().normalize();
2027
}
2128

2229
template <typename Dtype>
@@ -40,27 +47,34 @@ void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
4047
int num = prob_.num();
4148
int dim = prob_.count() / num;
4249
int spatial_dim = prob_.height() * prob_.width();
50+
int count = 0;
4351
Dtype loss = 0;
4452
for (int i = 0; i < num; ++i) {
4553
for (int j = 0; j < spatial_dim; j++) {
4654
const int label_value = static_cast<int>(label[i * spatial_dim + j]);
55+
if (has_ignore_label_ && label_value == ignore_label_) {
56+
continue;
57+
}
4758
DCHECK_GE(label_value, 0);
48-
DCHECK_GT(dim, label_value * spatial_dim);
49-
loss -= log(std::max(prob_data[i * dim +
50-
label_value * spatial_dim + j],
59+
DCHECK_LT(label_value, prob_.channels());
60+
loss -= log(std::max(prob_data[i * dim + label_value * spatial_dim + j],
5161
Dtype(FLT_MIN)));
62+
++count;
5263
}
5364
}
54-
top[0]->mutable_cpu_data()[0] = loss / num / spatial_dim;
65+
if (normalize_) {
66+
top[0]->mutable_cpu_data()[0] = loss / count;
67+
} else {
68+
top[0]->mutable_cpu_data()[0] = loss / num;
69+
}
5570
if (top.size() == 2) {
5671
top[1]->ShareData(prob_);
5772
}
5873
}
5974

6075
template <typename Dtype>
6176
void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
62-
const vector<bool>& propagate_down,
63-
const vector<Blob<Dtype>*>& bottom) {
77+
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
6478
if (propagate_down[1]) {
6579
LOG(FATAL) << this->type_name()
6680
<< " Layer cannot backpropagate to label inputs.";
@@ -73,15 +87,27 @@ void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
7387
int num = prob_.num();
7488
int dim = prob_.count() / num;
7589
int spatial_dim = prob_.height() * prob_.width();
90+
int count = 0;
7691
for (int i = 0; i < num; ++i) {
7792
for (int j = 0; j < spatial_dim; ++j) {
78-
bottom_diff[i * dim + static_cast<int>(label[i * spatial_dim + j])
79-
* spatial_dim + j] -= 1;
93+
const int label_value = static_cast<int>(label[i * spatial_dim + j]);
94+
if (has_ignore_label_ && label_value == ignore_label_) {
95+
for (int c = 0; c < bottom[0]->channels(); ++c) {
96+
bottom_diff[i * dim + c * spatial_dim + j] = 0;
97+
}
98+
} else {
99+
bottom_diff[i * dim + label_value * spatial_dim + j] -= 1;
100+
++count;
101+
}
80102
}
81103
}
82104
// Scale gradient
83105
const Dtype loss_weight = top[0]->cpu_diff()[0];
84-
caffe_scal(prob_.count(), loss_weight / num / spatial_dim, bottom_diff);
106+
if (normalize_) {
107+
caffe_scal(prob_.count(), loss_weight / count, bottom_diff);
108+
} else {
109+
caffe_scal(prob_.count(), loss_weight / num, bottom_diff);
110+
}
85111
}
86112
}
87113

src/caffe/proto/caffe.proto

+13-1
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ message NetStateRule {
206206
// NOTE
207207
// Update the next available ID when you add a new LayerParameter field.
208208
//
209-
// LayerParameter next available ID: 42 (last added: exp_param)
209+
// LayerParameter next available ID: 43 (last added: loss_param)
210210
message LayerParameter {
211211
repeated string bottom = 2; // the name of the bottom blobs
212212
repeated string top = 3; // the name of the top blobs
@@ -333,6 +333,9 @@ message LayerParameter {
333333
// Parameters for data pre-processing.
334334
optional TransformationParameter transform_param = 36;
335335

336+
// Parameters shared by loss layers.
337+
optional LossParameter loss_param = 42;
338+
336339
// Note: certain layers may have more than one computational engine
337340
// for their implementation. These layers include an Engine type and
338341
// engine parameter for selecting the implementation.
@@ -363,6 +366,15 @@ message TransformationParameter {
363366
repeated float mean_value = 5;
364367
}
365368

369+
// Message that stores parameters shared by loss layers
370+
message LossParameter {
371+
// If specified, ignore instances with the given label.
372+
optional int32 ignore_label = 1;
373+
// If true, normalize each batch across all instances (including spatial
374+
// dimesions, but not ignored instances); else, divide by batch size only.
375+
optional bool normalize = 2 [default = true];
376+
}
377+
366378
// Message that stores parameters used by AccuracyLayer
367379
message AccuracyParameter {
368380
// When computing accuracy, count as correct by comparing the true label to

0 commit comments

Comments
 (0)