backport fixes in master branch (#19356)

apache · Oct 21, 2020 · 3f64203 · 3f64203
1 parent 0bc01e9
commit 3f64203
Showing 1 changed file with 5 additions and 5 deletions.
diff --git a/python/mxnet/gluon/loss.py b/python/mxnet/gluon/loss.py
@@ -476,7 +476,7 @@ def hybrid_forward(self, F, pred, label, sample_weight=None):
             pred = F.log_softmax(pred, self._axis)
         loss = label * (F.log(label + 1e-12) - pred)
         loss = _apply_weighting(F, loss, self._weight, sample_weight)
-        return F.mean(loss, axis=self._batch_axis, exclude=True)
+        return F.sum(loss, axis=self._batch_axis, exclude=True)
 
 
 class CTCLoss(Loss):
@@ -1010,8 +1010,7 @@ def _compute_labels(self, F, batch_size):
         confident output distributions." arXiv preprint arXiv:1701.06548 (2017).
         """
 
-        # TODO: replace with mx.nd.eye(batch_size) with mxnet 1.2
-        gold = F.one_hot(F.arange(batch_size), batch_size)
+        gold = F.eye(batch_size)
         labels = gold * (1 - self.smoothing_parameter) + (1 - gold) * self.smoothing_parameter / (batch_size - 1)
         return labels
 
@@ -1038,8 +1037,9 @@ def _loss(self, F, x1, x2):
         labels = self._compute_labels(F, batch_size)
         distances = self._compute_distances(x1, x2)
         log_probabilities = F.log_softmax(-distances, axis=1)
-        # multiply for the number of labels to obtain the correct loss (gluon kl_loss averages instead of sum)
-        return self.kl_loss(log_probabilities, labels.as_in_context(distances.context)) * batch_size
+        # PR#18423:multiply for the number of labels should multiply x1.shape[1] rather than x1.shape[0])
+        # After PR#18423, it is no need to multiply it anymore.
+        return self.kl_loss(log_probabilities, labels.as_in_context(distances.context))
 
 
     def hybrid_forward(self, F, x1, x2):