@@ -17,6 +17,13 @@ void SoftmaxWithLossLayer<Dtype>::LayerSetUp(
17
17
softmax_top_vec_.clear ();
18
18
softmax_top_vec_.push_back (&prob_);
19
19
softmax_layer_->SetUp (softmax_bottom_vec_, softmax_top_vec_);
20
+
21
+ has_ignore_label_ =
22
+ this ->layer_param_ .loss_param ().has_ignore_label ();
23
+ if (has_ignore_label_) {
24
+ ignore_label_ = this ->layer_param_ .loss_param ().ignore_label ();
25
+ }
26
+ normalize_ = this ->layer_param_ .loss_param ().normalize ();
20
27
}
21
28
22
29
template <typename Dtype>
@@ -40,27 +47,34 @@ void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
40
47
int num = prob_.num ();
41
48
int dim = prob_.count () / num;
42
49
int spatial_dim = prob_.height () * prob_.width ();
50
+ int count = 0 ;
43
51
Dtype loss = 0 ;
44
52
for (int i = 0 ; i < num; ++i) {
45
53
for (int j = 0 ; j < spatial_dim; j++) {
46
54
const int label_value = static_cast <int >(label[i * spatial_dim + j]);
55
+ if (has_ignore_label_ && label_value == ignore_label_) {
56
+ continue ;
57
+ }
47
58
DCHECK_GE (label_value, 0 );
48
- DCHECK_GT (dim, label_value * spatial_dim);
49
- loss -= log (std::max (prob_data[i * dim +
50
- label_value * spatial_dim + j],
59
+ DCHECK_LT (label_value, prob_.channels ());
60
+ loss -= log (std::max (prob_data[i * dim + label_value * spatial_dim + j],
51
61
Dtype (FLT_MIN)));
62
+ ++count;
52
63
}
53
64
}
54
- top[0 ]->mutable_cpu_data ()[0 ] = loss / num / spatial_dim;
65
+ if (normalize_) {
66
+ top[0 ]->mutable_cpu_data ()[0 ] = loss / count;
67
+ } else {
68
+ top[0 ]->mutable_cpu_data ()[0 ] = loss / num;
69
+ }
55
70
if (top.size () == 2 ) {
56
71
top[1 ]->ShareData (prob_);
57
72
}
58
73
}
59
74
60
75
template <typename Dtype>
61
76
void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
62
- const vector<bool >& propagate_down,
63
- const vector<Blob<Dtype>*>& bottom) {
77
+ const vector<bool >& propagate_down, const vector<Blob<Dtype>*>& bottom) {
64
78
if (propagate_down[1 ]) {
65
79
LOG (FATAL) << this ->type_name ()
66
80
<< " Layer cannot backpropagate to label inputs." ;
@@ -73,15 +87,27 @@ void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
73
87
int num = prob_.num ();
74
88
int dim = prob_.count () / num;
75
89
int spatial_dim = prob_.height () * prob_.width ();
90
+ int count = 0 ;
76
91
for (int i = 0 ; i < num; ++i) {
77
92
for (int j = 0 ; j < spatial_dim; ++j) {
78
- bottom_diff[i * dim + static_cast <int >(label[i * spatial_dim + j])
79
- * spatial_dim + j] -= 1 ;
93
+ const int label_value = static_cast <int >(label[i * spatial_dim + j]);
94
+ if (has_ignore_label_ && label_value == ignore_label_) {
95
+ for (int c = 0 ; c < bottom[0 ]->channels (); ++c) {
96
+ bottom_diff[i * dim + c * spatial_dim + j] = 0 ;
97
+ }
98
+ } else {
99
+ bottom_diff[i * dim + label_value * spatial_dim + j] -= 1 ;
100
+ ++count;
101
+ }
80
102
}
81
103
}
82
104
// Scale gradient
83
105
const Dtype loss_weight = top[0 ]->cpu_diff ()[0 ];
84
- caffe_scal (prob_.count (), loss_weight / num / spatial_dim, bottom_diff);
106
+ if (normalize_) {
107
+ caffe_scal (prob_.count (), loss_weight / count, bottom_diff);
108
+ } else {
109
+ caffe_scal (prob_.count (), loss_weight / num, bottom_diff);
110
+ }
85
111
}
86
112
}
87
113
0 commit comments