From 0f85f5de9eaa3c1b31bff9337268c071f634073d Mon Sep 17 00:00:00 2001
From: Anirudh <anirudhkrec@gmail.com>
Date: Tue, 4 Dec 2018 18:00:58 -0800
Subject: [PATCH] doc fix (#13465)

---
 python/mxnet/optimizer/optimizer.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/python/mxnet/optimizer/optimizer.py b/python/mxnet/optimizer/optimizer.py
index d7b6821ac8cc..a085b6fe2ef6 100644
--- a/python/mxnet/optimizer/optimizer.py
+++ b/python/mxnet/optimizer/optimizer.py
@@ -688,8 +688,11 @@ class LBSGD(Optimizer):
         state = momentum * state + lr * rescale_grad * clip(grad, clip_gradient) + wd * weight
         weight = weight - state
 
-    For details of the update algorithm see :class:`~mxnet.ndarray.lbsgd_update` and
-    :class:`~mxnet.ndarray.lbsgd_mom_update`.
+    For details of the update algorithm see :class:`~mxnet.ndarray.sgd_update`
+    and :class:`~mxnet.ndarray.sgd_mom_update`.
+    In addition to the SGD updates the LBSGD optimizer uses the LARS, Layer-wise
+    Adaptive Rate Scaling, algorithm to have a separate learning rate for each
+    layer of the network, which leads to better stability over large batch sizes.
 
     This optimizer accepts the following parameters in addition to those accepted
     by :class:`.Optimizer`.