diff --git a/python/mxnet/optimizer/optimizer.py b/python/mxnet/optimizer/optimizer.py index cb52ac54fdab..a986f271c4b4 100644 --- a/python/mxnet/optimizer/optimizer.py +++ b/python/mxnet/optimizer/optimizer.py @@ -75,12 +75,11 @@ class Optimizer(object): The initial number of updates. multi_precision : bool, optional, default False - Flag to control the internal precision of the optimizer.:: - - False: results in using the same precision as the weights (default), - True: makes internal 32-bit copy of the weights and applies gradients - in 32-bit precision even if actual weights used in the model have lower precision. - Turning this on can improve convergence and accuracy when training with float16. + Flag to control the internal precision of the optimizer. + False: results in using the same precision as the weights (default), + True: makes internal 32-bit copy of the weights and applies gradients + in 32-bit precision even if actual weights used in the model have lower precision. + Turning this on can improve convergence and accuracy when training with float16. param_dict : dict of int -> gluon.Parameter, default None Dictionary of parameter index to gluon.Parameter, used to lookup parameter attributes @@ -541,12 +540,11 @@ class SGD(Optimizer): Default is True. If True, lazy updates are applied \ if the storage types of weight and grad are both ``row_sparse``. multi_precision: bool, optional - Flag to control the internal precision of the optimizer.:: - - False: results in using the same precision as the weights (default), - True: makes internal 32-bit copy of the weights and applies gradients - in 32-bit precision even if actual weights used in the model have lower precision. - Turning this on can improve convergence and accuracy when training with float16. + Flag to control the internal precision of the optimizer. + False: results in using the same precision as the weights (default), + True: makes internal 32-bit copy of the weights and applies gradients + in 32-bit precision even if actual weights used in the model have lower precision. + Turning this on can improve convergence and accuracy when training with float16. """ def __init__(self, momentum=0.0, lazy_update=True, **kwargs): super(SGD, self).__init__(**kwargs) @@ -790,12 +788,11 @@ class LBSGD(Optimizer): momentum : float, optional The momentum value. multi_precision: bool, optional - Flag to control the internal precision of the optimizer.:: - - False: results in using the same precision as the weights (default), - True: makes internal 32-bit copy of the weights and applies gradients - in 32-bit precision even if actual weights used in the model have lower precision. - Turning this on can improve convergence and accuracy when training with float16. + Flag to control the internal precision of the optimizer. + False: results in using the same precision as the weights (default), + True: makes internal 32-bit copy of the weights and applies gradients + in 32-bit precision even if actual weights used in the model have lower precision. + Turning this on can improve convergence and accuracy when training with float16. warmup_strategy: string ('linear', 'power2', 'sqrt'. , 'lars' default : 'linear') warmup_epochs: unsigned, default: 5 @@ -1031,12 +1028,11 @@ class NAG(Optimizer): momentum : float, optional The momentum value. multi_precision: bool, optional - Flag to control the internal precision of the optimizer.:: - - False: results in using the same precision as the weights (default), - True: makes internal 32-bit copy of the weights and applies gradients - in 32-bit precision even if actual weights used in the model have lower precision. - Turning this on can improve convergence and accuracy when training with float16. + Flag to control the internal precision of the optimizer. + False: results in using the same precision as the weights (default), + True: makes internal 32-bit copy of the weights and applies gradients + in 32-bit precision even if actual weights used in the model have lower precision. + Turning this on can improve convergence and accuracy when training with float16. """ def __init__(self, momentum=0.0, **kwargs): super(NAG, self).__init__(**kwargs)