From 0f85f5de9eaa3c1b31bff9337268c071f634073d Mon Sep 17 00:00:00 2001 From: Anirudh Date: Tue, 4 Dec 2018 18:00:58 -0800 Subject: [PATCH] doc fix (#13465) --- python/mxnet/optimizer/optimizer.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/mxnet/optimizer/optimizer.py b/python/mxnet/optimizer/optimizer.py index d7b6821ac8cc..a085b6fe2ef6 100644 --- a/python/mxnet/optimizer/optimizer.py +++ b/python/mxnet/optimizer/optimizer.py @@ -688,8 +688,11 @@ class LBSGD(Optimizer): state = momentum * state + lr * rescale_grad * clip(grad, clip_gradient) + wd * weight weight = weight - state - For details of the update algorithm see :class:`~mxnet.ndarray.lbsgd_update` and - :class:`~mxnet.ndarray.lbsgd_mom_update`. + For details of the update algorithm see :class:`~mxnet.ndarray.sgd_update` + and :class:`~mxnet.ndarray.sgd_mom_update`. + In addition to the SGD updates the LBSGD optimizer uses the LARS, Layer-wise + Adaptive Rate Scaling, algorithm to have a separate learning rate for each + layer of the network, which leads to better stability over large batch sizes. This optimizer accepts the following parameters in addition to those accepted by :class:`.Optimizer`.