apache · szha · Jan 16, 2019 · Dec 19, 2018 · Dec 19, 2018 · Dec 20, 2018
@@ -973,11 +973,9 @@ def update(self, index, weight, grad, state):
 
         if state is not None:
             mom = state
-            mom[:] *= self.momentum
-            grad += wd * weight
-            mom[:] += grad
+            mom[:] = self.momentum * mom[:] + grad + wd * weight
             grad[:] += self.momentum * mom
-            weight[:] += -lr * grad
+            weight[:] -= lr * grad
         else:
             assert self.momentum == 0.0
             weight[:] += -lr * (grad + wd * weight)

diff --git a/tests/python/unittest/test_optimizer.py b/tests/python/unittest/test_optimizer.py
@@ -384,11 +384,9 @@ def update(self, index, weight, grad, state):
                 weight[:] += -lr * (grad + wd * weight)
             else:
               mom = state
-              mom[:] *= self.momentum
-              grad += wd * weight
-              mom[:] += grad
+              mom[:] = self.momentum * mom[:] + grad + wd * weight
               grad[:] += self.momentum * mom
-              weight[:] += -lr * grad
+              weight[:] -= lr * grad
         else:
             grad32 = array(grad, ctx=grad.context, dtype=np.float32)
             grad32 = grad32 * self.rescale_grad
@@ -399,11 +397,9 @@ def update(self, index, weight, grad, state):
             if self.momentum == 0.0:
                 weight32[:] += -lr * (grad32 + wd * weight32)
             else:
-                mom[:] *= self.momentum
-                grad32 += wd * weight32
-                mom[:] += grad32
+                mom[:] = self.momentum * mom[:] + grad32 + wd * weight32
                 grad32[:] += self.momentum * mom
-                weight32[:] += -lr * grad32
+                weight32[:] -= lr * grad32
             tmp = weight32.astype(weight.dtype)
             tmp.copyto(weight)