From c1ea3c1f95bddcfe25fca08d07df18dbba47051c Mon Sep 17 00:00:00 2001
From: WANG Lei <wlbksy@126.com>
Date: Sat, 29 Dec 2018 12:07:16 +0800
Subject: [PATCH 1/2] fix the order of error term's operands

---
 python/mxnet/gluon/loss.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/python/mxnet/gluon/loss.py b/python/mxnet/gluon/loss.py
index 7b5832e1ace6..e166e7c3f41c 100644
--- a/python/mxnet/gluon/loss.py
+++ b/python/mxnet/gluon/loss.py
@@ -101,7 +101,7 @@ def hybrid_forward(self, F, x, *args, **kwargs):
 class L2Loss(Loss):
     r"""Calculates the mean squared error between `pred` and `label`.
 
-    .. math:: L = \frac{1}{2} \sum_i \vert {pred}_i - {label}_i \vert^2.
+    .. math:: L = \frac{1}{2} \sum_i \vert {label}_i - {pred}_i \vert^2.
 
     `pred` and `label` can have arbitrary shape as long as they have the same
     number of elements.
@@ -131,7 +131,7 @@ def __init__(self, weight=1., batch_axis=0, **kwargs):
 
     def hybrid_forward(self, F, pred, label, sample_weight=None):
         label = _reshape_like(F, label, pred)
-        loss = F.square(pred - label)
+        loss = F.square(label - pred)
         loss = _apply_weighting(F, loss, self._weight/2, sample_weight)
         return F.mean(loss, axis=self._batch_axis, exclude=True)
 
@@ -139,7 +139,7 @@ def hybrid_forward(self, F, pred, label, sample_weight=None):
 class L1Loss(Loss):
     r"""Calculates the mean absolute error between `pred` and `label`.
 
-    .. math:: L = \sum_i \vert {pred}_i - {label}_i \vert.
+    .. math:: L = \sum_i \vert {label}_i - {pred}_i \vert.
 
     `pred` and `label` can have arbitrary shape as long as they have the same
     number of elements.
@@ -169,7 +169,7 @@ def __init__(self, weight=None, batch_axis=0, **kwargs):
 
     def hybrid_forward(self, F, pred, label, sample_weight=None):
         label = _reshape_like(F, label, pred)
-        loss = F.abs(pred - label)
+        loss = F.abs(label - pred)
         loss = _apply_weighting(F, loss, self._weight, sample_weight)
         return F.mean(loss, axis=self._batch_axis, exclude=True)
 
@@ -481,9 +481,9 @@ class HuberLoss(Loss):
     exceeds rho but is equal to L2 loss otherwise. Also called SmoothedL1 loss.
 
     .. math::
-        L = \sum_i \begin{cases} \frac{1}{2 {rho}} ({pred}_i - {label}_i)^2 &
-                           \text{ if } |{pred}_i - {label}_i| < {rho} \\
-                           |{pred}_i - {label}_i| - \frac{{rho}}{2} &
+        L = \sum_i \begin{cases} \frac{1}{2 {rho}} ({label}_i - {pred}_i)^2 &
+                           \text{ if } |{label}_i - {pred}_i| < {rho} \\
+                           |{label}_i - {pred}_i| - \frac{{rho}}{2} &
                            \text{ otherwise }
             \end{cases}
 
@@ -518,7 +518,7 @@ def __init__(self, rho=1, weight=None, batch_axis=0, **kwargs):
 
     def hybrid_forward(self, F, pred, label, sample_weight=None):
         label = _reshape_like(F, label, pred)
-        loss = F.abs(pred - label)
+        loss = F.abs(label - pred)
         loss = F.where(loss > self._rho, loss - 0.5 * self._rho,
                        (0.5/self._rho) * F.square(loss))
         loss = _apply_weighting(F, loss, self._weight, sample_weight)
@@ -670,8 +670,8 @@ class TripletLoss(Loss):
     example and a negative example:
 
     .. math::
-        L = \sum_i \max(\Vert {pred}_i - {pos_i} \Vert_2^2 -
-                        \Vert {pred}_i - {neg_i} \Vert_2^2 + {margin}, 0)
+        L = \sum_i \max(\Vert {pos_i}_i - {pred} \Vert_2^2 -
+                        \Vert {neg_i}_i - {pred} \Vert_2^2 + {margin}, 0)
 
     `pred`, `positive` and `negative` can have arbitrary shape as long as they
     have the same number of elements.
@@ -703,7 +703,7 @@ def __init__(self, margin=1, weight=None, batch_axis=0, **kwargs):
     def hybrid_forward(self, F, pred, positive, negative):
         positive = _reshape_like(F, positive, pred)
         negative = _reshape_like(F, negative, pred)
-        loss = F.sum(F.square(pred-positive) - F.square(pred-negative),
+        loss = F.sum(F.square(positive-pred) - F.square(negative-pred),
                      axis=self._batch_axis, exclude=True)
         loss = F.relu(loss + self._margin)
         return _apply_weighting(F, loss, self._weight, None)

From b4f2b86a725c5a6c273b940866b23a1ae9b86d10 Mon Sep 17 00:00:00 2001
From: WANG Lei <wlbksy@126.com>
Date: Tue, 8 Jan 2019 10:49:13 +0800
Subject: [PATCH 2/2] address comments

---
 python/mxnet/gluon/loss.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/python/mxnet/gluon/loss.py b/python/mxnet/gluon/loss.py
index e166e7c3f41c..29d0105ae8dd 100644
--- a/python/mxnet/gluon/loss.py
+++ b/python/mxnet/gluon/loss.py
@@ -99,11 +99,11 @@ def hybrid_forward(self, F, x, *args, **kwargs):
 
 
 class L2Loss(Loss):
-    r"""Calculates the mean squared error between `pred` and `label`.
+    r"""Calculates the mean squared error between `label` and `pred`.
 
     .. math:: L = \frac{1}{2} \sum_i \vert {label}_i - {pred}_i \vert^2.
 
-    `pred` and `label` can have arbitrary shape as long as they have the same
+    `label` and `pred` can have arbitrary shape as long as they have the same
     number of elements.
 
     Parameters
@@ -137,11 +137,11 @@ def hybrid_forward(self, F, pred, label, sample_weight=None):
 
 
 class L1Loss(Loss):
-    r"""Calculates the mean absolute error between `pred` and `label`.
+    r"""Calculates the mean absolute error between `label` and `pred`.
 
     .. math:: L = \sum_i \vert {label}_i - {pred}_i \vert.
 
-    `pred` and `label` can have arbitrary shape as long as they have the same
+    `label` and `pred` can have arbitrary shape as long as they have the same
     number of elements.
 
     Parameters
@@ -195,7 +195,7 @@ class SigmoidBinaryCrossEntropyLoss(Loss):
             (1 - {label}_i) * \log(1 - {pred}_i)
 
 
-    `pred` and `label` can have arbitrary shape as long as they have the same
+    `label` and `pred` can have arbitrary shape as long as they have the same
     number of elements.
 
     Parameters
@@ -344,7 +344,7 @@ class KLDivLoss(Loss):
         L = \sum_i {label}_i * \big[\log({label}_i) - log({pred}_i)\big]
 
 
-    `pred` and `label` can have arbitrary shape as long as they have the same
+    `label` and `pred` can have arbitrary shape as long as they have the same
     number of elements.
 
     Parameters
@@ -487,7 +487,7 @@ class HuberLoss(Loss):
                            \text{ otherwise }
             \end{cases}
 
-    `pred` and `label` can have arbitrary shape as long as they have the same
+    `label` and `pred` can have arbitrary shape as long as they have the same
     number of elements.
 
     Parameters
@@ -532,7 +532,7 @@ class HingeLoss(Loss):
         L = \sum_i max(0, {margin} - {pred}_i \cdot {label}_i)
 
     where `pred` is the classifier prediction and `label` is the target tensor
-    containing values -1 or 1. `pred` and `label` must have the same number of
+    containing values -1 or 1. `label` and `pred` must have the same number of
     elements.
 
     Parameters
@@ -576,7 +576,7 @@ class SquaredHingeLoss(Loss):
         L = \sum_i max(0, {margin} - {pred}_i \cdot {label}_i)^2
 
     where `pred` is the classifier prediction and `label` is the target tensor
-    containing values -1 or 1. `pred` and `label` can have arbitrary shape as
+    containing values -1 or 1. `label` and `pred` can have arbitrary shape as
     long as they have the same number of elements.
 
     Parameters
@@ -621,7 +621,7 @@ class LogisticLoss(Loss):
 
     where `pred` is the classifier prediction and `label` is the target tensor
     containing values -1 or 1 (0 or 1 if `label_format` is binary).
-    `pred` and `label` can have arbitrary shape as long as they have the same number of elements.
+    `label` and `pred` can have arbitrary shape as long as they have the same number of elements.
 
     Parameters
     ----------
@@ -666,14 +666,14 @@ def hybrid_forward(self, F, pred, label, sample_weight=None):
 
 class TripletLoss(Loss):
     r"""Calculates triplet loss given three input tensors and a positive margin.
-    Triplet loss measures the relative similarity between prediction, a positive
-    example and a negative example:
+    Triplet loss measures the relative similarity between a positive
+    example, a negative example, and prediction:
 
     .. math::
         L = \sum_i \max(\Vert {pos_i}_i - {pred} \Vert_2^2 -
                         \Vert {neg_i}_i - {pred} \Vert_2^2 + {margin}, 0)
 
-    `pred`, `positive` and `negative` can have arbitrary shape as long as they
+    `positive`, `negative`, and 'pred' can have arbitrary shape as long as they
     have the same number of elements.
 
     Parameters
@@ -717,7 +717,7 @@ class PoissonNLLLoss(Loss):
     .. math::
         L = \text{pred} - \text{target} * \log(\text{pred}) +\log(\text{target!})
 
-    `pred`, `target` can have arbitrary shape as long as they have the same number of elements.
+    `target`, 'pred' can have arbitrary shape as long as they have the same number of elements.
 
     Parameters
     ----------